├── .gitignore ├── README.md ├── exams ├── exam2018.pdf ├── exam2018solutions.pdf ├── exam2019.pdf ├── exam2019solutions.pdf ├── exam2020.pdf ├── exam2020solutions.pdf ├── exam2021.pdf ├── exam2021solutions.pdf ├── exam2022.pdf ├── exam2022solutions.pdf ├── exam2023.pdf ├── exam2023solutions.pdf ├── exam2024.pdf └── exam2024solutions.pdf ├── labs ├── ex00 │ ├── exercise00.pdf │ ├── npprimer.ipynb │ └── python_setup_tutorial.md ├── ex01 │ ├── exercise01.pdf │ └── solutions01.pdf ├── ex02 │ ├── exercise02.pdf │ ├── solution │ │ ├── Concrete_Data.csv │ │ ├── Lab 2 - Gradient Descent.ipynb │ │ ├── grid_search.py │ │ ├── height_weight_genders.csv │ │ ├── helpers.py │ │ └── plots.py │ ├── solutions02.pdf │ └── template │ │ ├── Concrete_Data.csv │ │ ├── Lab 2 - Gradient Descent.ipynb │ │ ├── grid_search.py │ │ ├── height_weight_genders.csv │ │ ├── helpers.py │ │ └── plots.py ├── ex03 │ ├── exercise03.pdf │ ├── solution │ │ └── notebook.ipynb │ ├── solution03.pdf │ └── template │ │ └── notebook.ipynb ├── ex04 │ ├── exercise04.pdf │ ├── solution │ │ ├── Lab 4 - Random Walks.ipynb │ │ ├── helpers.py │ │ ├── solution_lab04.ipynb │ │ └── torus_topology.png │ ├── solution04.pdf │ └── template │ │ ├── Lab 4 - Random Walks--alternative.ipynb │ │ ├── helpers.py │ │ ├── notebook_lab04.ipynb │ │ └── torus_topology.png ├── ex05 │ ├── exercise05.pdf │ ├── solution │ │ ├── Concrete_Data.csv │ │ ├── Lab 5 - Stochastic Gradient Descent.ipynb │ │ ├── gradient_descent.py │ │ ├── grid_search.py │ │ ├── height_weight_genders.csv │ │ ├── helpers.py │ │ └── plots.py │ ├── solution05.pdf │ └── template │ │ ├── Concrete_Data.csv │ │ ├── Lab 5 - Stochastic Gradient Descent.ipynb │ │ ├── gradient_descent.py │ │ ├── grid_search.py │ │ ├── height_weight_genders.csv │ │ ├── helpers.py │ │ └── plots.py ├── ex06 │ ├── exercise06.pdf │ ├── solution │ │ ├── Lab 6.ipynb │ │ ├── accelerated_mixing.ipynb │ │ ├── helper.py │ │ └── mixing_helpers.py │ ├── solution06.pdf │ └── template │ │ ├── Lab_6.ipynb │ │ └── helper.py ├── ex07 │ ├── exercise07.pdf │ ├── solution │ │ └── Lab 7 - Fixed Point with Newton.ipynb │ ├── solution07.pdf │ └── template │ │ └── Lab 7 - Fixed Point with Newton.ipynb ├── ex08 │ ├── exercise08.pdf │ ├── solution │ │ ├── Coordinate_descent.ipynb │ │ ├── data │ │ │ └── w1a │ │ └── solution-svm-derivation.pdf │ ├── solution08.pdf │ └── template │ │ ├── Lab_8.ipynb │ │ └── data │ │ └── w1a ├── ex09 │ ├── exercise09.pdf │ ├── solution │ │ ├── dataset_generation_NOT_NECESSARY_FOR_STUDENTS │ │ │ ├── generate_csv.py │ │ │ └── movielens100k.mat │ │ ├── ex09-MatrixCompletion.ipynb │ │ ├── helpers.py │ │ ├── movielens100k.csv │ │ ├── optimizers.py │ │ ├── plots.py │ │ ├── stat_ratings.png │ │ └── train_test.png │ ├── solution09.pdf │ └── template │ │ ├── ex09-MatrixCompletion.ipynb │ │ ├── helpers.py │ │ ├── movielens100k.csv │ │ ├── optimizers.py │ │ └── plots.py ├── ex10 │ ├── exercise10.pdf │ └── solution10.pdf └── mini-project │ ├── latex-example-paper │ ├── IEEEtran.cls │ ├── denoised_signal_1d.png │ ├── latex-template.pdf │ ├── latex-template.tex │ ├── literature.bib │ └── local_wdenoised_1d.png │ └── miniproject_description.pdf ├── lecture_notes └── lecture-notes.pdf └── slides ├── lecture01.pdf ├── lecture02.pdf ├── lecture03.pdf ├── lecture04.pdf ├── lecture05.pdf ├── lecture06.pdf ├── lecture07.pdf ├── lecture08.pdf ├── lecture09.pdf ├── lecture10.pdf ├── lecture11.pdf └── lecture12.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | .DS_Store 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EPFL Course - Optimization for Machine Learning - CS-439 2 | 3 | [Official coursebook information](http://edu.epfl.ch/coursebook/en/optimization-for-machine-learning-CS-439) 4 | 5 | `Lectures:` Fri 13:15-15:00 in [CO2](https://plan.epfl.ch/?room==CO%202) 6 | 7 | `Exercises:` Fri 15:15-17:00 in [BC01](https://plan.epfl.ch/?room==BC%2001) 8 | 9 | This course teaches an overview of modern mathematical optimization methods, for applications in machine learning and data science. In particular, scalability of algorithms to large datasets will be discussed in theory and in implementation. 10 | 11 | ### Team 12 | - Instructors: 13 | - Nicolas Flammarion [nicolas.flammarion@epfl.ch](mailto:nicolas.flammarion@epfl.ch) 14 | - Assistants: 15 | - Aditya Varre [aditya.varre@epfl.ch](mailto:aditya.varre@epfl.ch) 16 | - Oguz Kaan Yüksel [oguz.yuksel@epfl.ch](mailto:oguz.yuksel@epfl.ch) 17 | - Thomas Weinberger [thomas.weinberger@epfl.ch](mailto:thomas.weinberger@epfl.ch) 18 | - Yitao Xu [yitao.xu@epfl.ch](mailto:yitao.xu@epfl.ch) 19 | 20 | 21 | 22 | 23 | `Contents:` 24 | 25 | Convexity, Gradient Methods, Proximal algorithms, Subgradient Methods, Stochastic and Online Variants of mentioned methods, Coordinate Descent, Frank-Wolfe, Accelerated Methods, Primal-Dual context and certificates, Lagrange and Fenchel Duality, Second-Order Methods including Quasi-Newton Methods, Derivative-Free Optimization. 26 | 27 | *Advanced Contents:* 28 | 29 | Parallel and Distributed Optimization Algorithms, Federated Learning 30 | 31 | Computational Trade-Offs (Time vs Data vs Accuracy), Lower Bounds 32 | 33 | Non-Convex Optimization: Convergence to Critical Points, Alternating minimization, Neural network training 34 | 35 | ### Program: 36 | | Nr | Date | Topic | Materials | Exercises | 37 | | -- | ----- | ----------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | ------------------------------------- | 38 | | 1 | 21.2. | Introduction, Convexity | [slides](../../raw/master/slides/lecture01.pdf)| [lab00](../../raw/master/labs/ex00/exercise00.pdf)| 39 | | 2 | 28.2. | Gradient Descent | [slides](../../raw/master/slides/lecture02.pdf)| [lab01](../../raw/master/labs/ex01/exercise01.pdf) | 40 | | 3 | 7.3. | Projected Gradient Descent | [slides](../../raw/master/slides/lecture03.pdf) | [lab02](../../raw/master/labs/ex02/exercise02.pdf)| 41 | | 4 | 14.3. | Proximal and Subgradient Descent | [slides](../../raw/master/slides/lecture04.pdf) | [lab03](../../raw/master/labs/ex03/exercise03.pdf) | 42 | | 5 | 21.3. | Stochastic Gradient Descent, Non-Convex Optimization | [slides](../../raw/master/slides/lecture05.pdf) | [lab04](../../raw/master/labs/ex04/exercise04.pdf) | 43 | | 6 | 28.3. | Non-Convex Optimization | [slides](../../raw/master/slides/lecture06.pdf) | [lab05](../../raw/master/labs/ex05/exercise05.pdf) | 44 | | 7 | 4.4. | Newton's Method & Quasi-Newton | [slides](../../raw/master/slides/lecture07.pdf) | [lab06](../../raw/master/labs/ex06/exercise06.pdf) | 45 | | 8 | 11.4. | Coordinate Descent | [slides](../../raw/master/slides/lecture08.pdf) | lab07 | 46 | | . | 18.4. | `easter vacation` | | - | 47 | | . | 25.4. | `easter vacation` | | - | 48 | | 9 | 2.5. | Frank-Wolfe | [slides](../../raw/master/slides/lecture09.pdf) | [lab08](../../raw/master/labs/ex08/exercise08.pdf) | 49 | | 10 | 9.5. | Lower Bounds and Accelerated Gradient Descent | [slides](../../raw/master/slides/lecture10.pdf) | [lab09](../../raw/master/labs/ex09/exercise09.pdf) | 50 | | 11 | 16.5. | Gradient free and adaptive methods | [slides](../../raw/master/slides/lecture11.pdf) | [lab10](../../raw/master/labs/ex10/exercise10.pdf) | 51 | | 12 | 23.5. | Optimization for Large Language Models (LLMs) (Guest lecture from the SwissAI LLMs team) | [slides](../../raw/master/slides/lecture12.pdf) | Q&A Projects | 52 | | 13 | 30.5. | `Mini-Project week` | | - | 53 | 54 | ### Lecture Notes: 55 | The course is based on the following [lecture notes](../../raw/master/lecture_notes/lecture-notes.pdf). 56 | 57 | ### Videos: 58 | The [videos](https://mediaspace.epfl.ch/channel/CS-439+Optimization+for+machine+learning/31980) of the lectures for each week will be available. 59 | 60 | ### Exercises: 61 | The [weekly exercises](../../tree/master/labs/) consist of a mix of theoretical and practical `Python` exercises for the corresponding topic each week (starting week 2). Solutions to exercises are available in the lab folder. 62 | 63 | ### Forum: 64 | [Discussion forum](https://edstem.org/eu/courses/2015/discussion/) (EPFL internal) 65 | 66 | ### Project: 67 | A `mini-project` will focus on the practical implementation: Here we encourage students to investigate the real-world performance of one of the studied optimization algorithms or variants, helping to provide solid empirical evidence for some behaviour aspects on a real machine-learning task. The project is mandatory and done in groups of 3 students. It will count 30% to the final grade. Project reports (3 page PDF) are due June 13th. Here is a [detailed project description](../../raw/master/labs/mini-project/miniproject_description.pdf). 68 | 69 | ### Assessment: 70 | Session Exam. Format: Closed book. Theoretical questions similar to exercises. You are allowed to bring one cheat sheet (A4 size paper, both sides can be used). 71 | 72 | For practice: 73 | - exams [2023](../../raw/master/exams/exam2023.pdf), [2022](../../raw/master/exams/exam2022.pdf), [2021](../../raw/master/exams/exam2021.pdf), [2020](../../raw/master/exams/exam2020.pdf), [2019](../../raw/master/exams/exam2019.pdf), [2018](../../raw/master/exams/exam2018.pdf) 74 | - solutions [2023](../../raw/master/exams/exam2023solutions.pdf), [2022](../../raw/master/exams/exam2022solutions.pdf), [2021](../../raw/master/exams/exam2021solutions.pdf), [2020](../../raw/master/exams/exam2020solutions.pdf), [2019](../../raw/master/exams/exam2019solutions.pdf), [2018](../../raw/master/exams/exam2018solutions.pdf). 75 | 76 | ### Links to related courses and materials 77 | - [CMU 10-725](https://www.stat.cmu.edu/~ryantibs/convexopt-F18/) 78 | - [Berkeley EE-227C](https://ee227c.github.io/) 79 | 80 | ### Recommended Books 81 | - [Convex Optimization: Algorithms and Complexity](https://arxiv.org/pdf/1405.4980.pdf), by Sébastien Bubeck (free online) 82 | - [Convex Optimization](http://stanford.edu/~boyd/cvxbook/), Stephen Boyd and Lieven Vandenberghe (free online) 83 | - [Introductory Lectures on Convex Optimization](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.693.855&rep=rep1&type=pdf), Yurii Nesterov (free online) 84 | -------------------------------------------------------------------------------- /exams/exam2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2018.pdf -------------------------------------------------------------------------------- /exams/exam2018solutions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2018solutions.pdf -------------------------------------------------------------------------------- /exams/exam2019.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2019.pdf -------------------------------------------------------------------------------- /exams/exam2019solutions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2019solutions.pdf -------------------------------------------------------------------------------- /exams/exam2020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2020.pdf -------------------------------------------------------------------------------- /exams/exam2020solutions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2020solutions.pdf -------------------------------------------------------------------------------- /exams/exam2021.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2021.pdf -------------------------------------------------------------------------------- /exams/exam2021solutions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2021solutions.pdf -------------------------------------------------------------------------------- /exams/exam2022.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2022.pdf -------------------------------------------------------------------------------- /exams/exam2022solutions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2022solutions.pdf -------------------------------------------------------------------------------- /exams/exam2023.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2023.pdf -------------------------------------------------------------------------------- /exams/exam2023solutions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2023solutions.pdf -------------------------------------------------------------------------------- /exams/exam2024.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2024.pdf -------------------------------------------------------------------------------- /exams/exam2024solutions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2024solutions.pdf -------------------------------------------------------------------------------- /labs/ex00/exercise00.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex00/exercise00.pdf -------------------------------------------------------------------------------- /labs/ex00/python_setup_tutorial.md: -------------------------------------------------------------------------------- 1 | # Setup Guide for Coding Machine Learning and Optimization Methods - EPFL ML and OptML Courses 2 | 3 | In order to implement the algorithms seen in class and work on the projects, we'll be using Python notebooks. This first lab will serve as an introduction to the Python language, the environment we are going to be using, and how to do basic vector and matrix manipulations. 4 | 5 | ## The environment 6 | 7 | We recommend two easy-to-use online environments (EPFL Noto or Google Colab) or a local installation (Anaconda). 8 | 9 | ### Online: EPFL Noto & Google Colab 10 | #### EPFL Noto 11 | Called [Noto](https://www.epfl.ch/education/educational-initiatives/cede/digitaltools/jupyter-notebooks-for-education/), EPFL’s JupyterLab centralized platform allows teachers and students to use Jupyter (Python) notebooks without having to install anything on their computer: they can easily access, modify and run notebooks online with a simple web browser. EPFL Noto files are automatically saved on your EPFL account and remain available there. 12 | 13 | [Click here for an automatic setup for CS-439 on your EPFL Noto and start with the introduction exercise `npprimer.ipynb`.](https://noto.epfl.ch/hub/user-redirect/git-pull?repo=https://github.com/epfml/OptML_course&urlpath=lab/tree/OptML_course/labs/ex00/npprimer.ipynb) 14 | 15 | To get the latest content from github (when it is updated): 16 | 1. open a terminal window in Noto (via '+' sign) 17 | 2. `cd` to `OptML_course` 18 | 3. add and commit any changes you have made locally (an alternative is to stash your changes using 'git stash' if you don't want to keep the changes you've made) 19 | 4. execute `git pull` 20 | 21 | After opening the terminal (step 1): 22 | ```bash 23 | cd 24 | cd OptML_course 25 | git add * 26 | git commit -m "your custom message about these changes" 27 | git pull 28 | ``` 29 | 30 | #### Google Colab 31 | 32 | Google colab provides a similar environment to Noto, with additional access to GPUs (not needed in the first few labs). Note that you need to take care of storing the files permanently yourself (storing on Google Drive, downloading to a local machine, ...). 33 | 34 | You can open any exercise by adapting `XY` with the lab number and `PATH_TO_FILE` with the path of the notebook you wish to open: 35 | `http://colab.research.google.com/github/epfml/OptML_course/blob/master/labs/exXY/PATH_TO_FILE` 36 | 37 | E.g. for the numpy introduction `npprimer.ipynb`: 38 | [`http://colab.research.google.com/github/epfml/OptML_course/blob/master/labs/ex00/npprimer.ipynb`](http://colab.research.google.com/github/epfml/OptML_course/blob/master/labs/ex00/npprimer.ipynb) 39 | 40 | You can also create an empty notebook by following this [link](https://colab.research.google.com/) and clicking `"NEW NOTEBOOK"`, or you can open a pre-existing notebook (.ipynb extension) by selecting the `Upload` tab. 41 | 42 | If for some reason you've opened a python2 notebook, you can switch to python3 by going in `Runtime > Change runtime type`. There you can also add a GPU to your notebook if necessary. 43 | 44 | ### Offline: Python distribution Anaconda 45 | 46 | If you prefer to have an environment locally on your computer, you can use the [Anaconda](https://www.anaconda.com/) distribution to run Python 3, as it is easy to install and comes with most packages we will need. To install Anaconda, go to [the download page](https://www.anaconda.com/distribution/) and get the Python installer for your OS - make sure to use the newer version 3.x, not 2.x. Follow the instructions of the installer and you're done. 47 | > **Warning!** The installer will ask you if you want to add Anaconda to your path. Your default answer should be yes, unless you have specific reasons not to want this. 48 | 49 | 50 | ### Development Environment 51 | 52 | During the course, we will use [**Jupyter Notebooks**](http://jupyter.org/), which is a great tool for exploratory and interactive programming and in particular for data analysis. Notebooks are browser-based, and you start a notebook on your localhost by typing `jupyter notebook` in the console. Notebooks are already available by default by Anaconda. The interface is pretty intuitive, but there are a few tweaks and shortcuts that will make your life easier, which we'll detail in the next section. You can of course ask any of the TAs for help on using the Notebooks. 53 | 54 | ### The Notebook System 55 | 56 | For additional resources on how the notebook system works, we recommend 57 | 58 | * [The Jupyter notebook beginner's guide](https://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/index.html) 59 | * [The official documentation](http://jupyter-notebook.readthedocs.io/en/latest/index.html) 60 | 61 | #### Examples 62 | 63 | We provide you with an example of a notebook for [this first lab](https://github.com/epfml/OptML_course/tree/master/labs/ex00), but if you want to see some more examples already, feel free to take a look at 64 | 65 | * The introductory notebooks available at [Try Jupyter](https://try.jupyter.org/). It spawns an instance of the Jupyter Notebook, which won't save any of your changes. 66 | *Note: it might not be available if their server is under too much load.* 67 | * [A gallery of interesting IPython Notebooks](https://github.com/jupyter/jupyter/wiki/A-gallery-of-interesting-Jupyter-Notebooks) by the Ipython Notebook team 68 | 69 | #### Tips & Tricks 70 | 71 | There are a few handy commands that you should start every notebook with 72 | 73 | 74 | # Plot figures in the notebook (instead of a new window) 75 | %matplotlib notebook 76 | 77 | # Automatically reload modules 78 | %load_ext autoreload 79 | %autoreload 2 80 | 81 | # The usual imports 82 | import matplotlib.pyplot as plt 83 | import numpy as np 84 | import pandas as pd 85 | 86 | #### Keyboard shortcuts 87 | 88 | * Adding cells 89 | * `a` adds an empty cell above the selected one, 90 | * `b` adds it below. 91 | * Running code 92 | * `Enter` enters the edition mode of the currently selected cell. 93 | * `Shift-Enter` runs the current cell and goes to the next one. 94 | * `Ctrl-Enter` runs the current cell and leave it selected. 95 | * Autocompletion (Jupyter notebook) 96 | * `Tab` pops up the Autocompletion when you are in the middle of writing a function call/class name and shows the arguments of the function being called when used after an opening parenthesis. 97 | * `Shift-Tab` pops up the help/documentation of the function its used on 98 | * Autocompletion (Google Colab) 99 | * `Ctrl-Space` pops up the Autocompletion when you are in the middle of writing a function call/class name and shows the arguments of the function being called when used after an opening parenthesis. 100 | * Clicking on a function name and hovering over it will pop up the help/documentation for that function. 101 | 102 | * For a complete list of shortcuts, go to `help > keyboard shortcuts` 103 | 104 | ## Python 105 | 106 | We will be working in Python. If you already have been introduced to Python, feel free to skip this section. If you come from another background, you might want to take some tutorials in addition to this lab in the next week to feel comfortable with it. You do not need to become an expert in Python, but you should be comfortable with the general syntax, some of the idiosyncrasies of Python and know how to do basic vector and matrix algebra. For the last part, we will be using NumPy, a library we will introduce later. 107 | 108 | For a nice introduction to Python, you should take a look at [the Python tutorial](https://docs.python.org/3/tutorial/index.html). Here are some reading recommendations: 109 | 110 | * Skim through Sections 1-3 to get an idea of the Python syntax if you never used it 111 | * Pay a little more attention to Section 4, especially 112 | 113 | * Section 4.2 on for loops, as they behave like `foreach` by default, which may be disturbing if you are more accustomed to coding in lower level languages. 114 | * Section 4.7 on functions, default argument values and named arguments, as they are a real pleasure to use (compared to traditional, order-based arguments) once you are used to it. 115 | * Section 5 on Data Structures, especially how to use Lists, Dictionnaries and Tuples if you have not used a language with those concepts before 116 | * You can keep Sections 6-9 on Modules, IO, Exceptions and Objects for later - when you know you will be needing it. 117 | * Section 10 on the standard library and [the standard library index](https://docs.python.org/3/library/index.html) are worth a quick scroll to see what's available. 118 | * Do not bother with Sections 11-16 for now. 119 | 120 | Here are some additional resources on Python: 121 | 122 | * [Python's standard library reference](https://docs.python.org/3/library/index.html) 123 | * [Debugging and profiling](https://docs.python.org/3/library/debug.html) 124 | * If you want to, some exercises are available at [learnpython.org](http://www.learnpython.org/) 125 | 126 | 127 | ## NumPy and Vector Calculations 128 | 129 | Our `npprimer.ipynb` notebook as part of the first lab has some useful commands and exercises to help you get started with NumPy. 130 | 131 | We recommend [this list of small exercises](https://www.machinelearningplus.com/101-numpy-exercises-python/) to get started with NumPy arrays etc. 132 | 133 | If you are familiar with Matlab, a good starting point is [this guide](https://docs.scipy.org/doc/numpy/user/numpy-for-matlab-users.html). Be careful that we will use way more the `array` data structure compared to the `matrix` data structure. 134 | 135 | A good and probably more complete reference is [this one](https://sites.engineering.ucsb.edu/~shell/che210d/numpy.pdf). 136 | 137 | 138 | ### Installation FAQ 139 | 140 | > **Other shell.** If you are using another shell (e.g. zsh on Mac OSX), after installing Anaconda you still need to add the installed software to your path, that is to add it to the correct profile of your shell. To do so, run the following commands in your terminal ` touch ~/.bash_profile; open ~/.bash_profile`. It will open your bash profile where you'll see the line that was added by the Python installer. Copy it. Then ` touch ~/.zshrc; open ~/.zshrc`, that will open the profile for zsh, you can paste the line at the bottom of the file. 141 | 142 | > **Alternative Python IDEs.** While we recommend plain Jupyter Notebooks, if you are more comfortable using a more traditional IDE, you can give [**PyCharm**](https://www.jetbrains.com/pycharm/) a try. Your EPFL email gives you access to the free educational version. You should keep this option in mind if you need a full fledged debugger to find a nasty bug. 143 | 144 | And of course, as a third alternative, you can always use a [decent text editor](https://www.sublimetext.com/) and run your code from the console or any plugin. Keep in mind that the TAs might not be able to help you with your setup if you go down this path. 145 | 146 | ## Download the exercises content & basic Git tutorial 147 | 148 | ### Simplest: no git 149 | 150 | You can click on the green `code` button on the main [page](https://github.com/epfml/OptML_course) and select `Download ZIP`. We advise against this method as you might have to re-download the repository every time some new content is posted there. 151 | 152 | ### Still simple: using GitHub Desktop 153 | 154 | GitHub Desktop simplifies the interaction with a GitHub repository by providing a simple GUI, check it out [here](https://desktop.github.com/). GitHub Desktop supports most 'real' `git` usecases such as the ones described below. 155 | 156 | ### More advanced: Git via command line 157 | 158 | `Git` is the most widely used version control system. It's a tool to share and help you collaboratively develop and maintain code. GitHub is a Git repository hosting service, it allows you to create Github repositories you can interact with using `git`. 159 | 160 | `Git` is typically used via the terminal. To install Git, follow this [link](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git). 161 | 162 | **Download repository.** Once Git is installed you can pull a github repository using: `git clone `, e.g. `git clone https://github.com/epfml/OptML_course.git`. 163 | 164 | **Collaborative coding.** A standard workflow when working as a group is to implement features through pull-requests (PR): 165 | * You do not want to break the master branch by mistake, so you start by creating and moving to a new branch: `git checkout -b ` 166 | * Now you're safe on your new branch, the modifications you're making won't affect the master branch. You can modify/create new files as if you were on the master branch e.g. 167 | 168 | ```bash 169 | # let's say we modify file.py here 170 | git status # check the status of the files git is tracking 171 | git add file.py 172 | git commit -m "some message clearly explaining the modification" 173 | ``` 174 | * Once you are done doing all the modifications you want you can push to your new branch: `git push origin `. 175 | * Finally you can open a PR from the GitHub user interface. Typically you would ask your colleagues to review your PR and accept it or ask for modifications. 176 | * Once your PR is accepted and merged, do not forget to switch back to master: `git checkout master` and pull your approved changes `git pull origin master`. 177 | 178 | ## Additional References 179 | 180 | [A good Python and NumPy Tutorial from Stanford.](https://github.com/kuleshov/cs228-material/blob/master/tutorials/python/cs228-python-tutorial.ipynb) 181 | -------------------------------------------------------------------------------- /labs/ex01/exercise01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex01/exercise01.pdf -------------------------------------------------------------------------------- /labs/ex01/solutions01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex01/solutions01.pdf -------------------------------------------------------------------------------- /labs/ex02/exercise02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex02/exercise02.pdf -------------------------------------------------------------------------------- /labs/ex02/solution/grid_search.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Exercise 2. 3 | 4 | Grid Search 5 | """ 6 | 7 | import numpy as np 8 | 9 | 10 | def generate_w(num_intervals): 11 | """Generate a grid of values for w0 and w1.""" 12 | w0 = np.linspace(-100, 200, num_intervals) 13 | w1 = np.linspace(-150, 150, num_intervals) 14 | return w0, w1 15 | 16 | 17 | def grid_search(y, tx, w0, w1): 18 | """Algorithm for grid search.""" 19 | losses = np.zeros((len(w0), len(w1))) 20 | # compute loss for each combination of w0 and w1. 21 | for ind_row, row in enumerate(w0): 22 | for ind_col, col in enumerate(w1): 23 | w = np.array([row, col]) 24 | e = y - tx.dot(w) 25 | losses[ind_row, ind_col] = 1/2*np.mean(e**2) 26 | return losses 27 | 28 | 29 | def get_best_parameters(w0, w1, losses): 30 | """Get the best w from the result of grid search.""" 31 | min_row, min_col = np.unravel_index(np.argmin(losses), losses.shape) 32 | return losses[min_row, min_col], w0[min_row], w1[min_col] 33 | -------------------------------------------------------------------------------- /labs/ex02/solution/helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """some helper functions.""" 3 | import numpy as np 4 | 5 | 6 | def load_data(sub_sample=True, add_outlier=False): 7 | """Load data and convert it to the metric system.""" 8 | path_dataset = "height_weight_genders.csv" 9 | data = np.genfromtxt( 10 | path_dataset, delimiter=",", skip_header=1, usecols=[1, 2]) 11 | height = data[:, 0] 12 | weight = data[:, 1] 13 | gender = np.genfromtxt( 14 | path_dataset, delimiter=",", skip_header=1, usecols=[0], 15 | converters={0: lambda x: 0 if b"Male" in x else 1}) 16 | # Convert to metric system 17 | height *= 0.025 18 | weight *= 0.454 19 | 20 | # sub-sample 21 | if sub_sample: 22 | height = height[::50] 23 | weight = weight[::50] 24 | 25 | if add_outlier: 26 | # outlier experiment 27 | height = np.concatenate([height, [1.1, 1.2]]) 28 | weight = np.concatenate([weight, [51.5/0.454, 55.3/0.454]]) 29 | 30 | return height, weight, gender 31 | 32 | 33 | def standardize(x): 34 | """Standardize the original data set.""" 35 | mean_x = np.mean(x,axis = 0) 36 | x = x - mean_x 37 | std_x = np.std(x, axis = 0) 38 | x = x / std_x 39 | return x, mean_x, std_x 40 | 41 | 42 | def build_model_data(height, weight): 43 | """Form (y,tX) to get regression data in matrix form.""" 44 | y = weight 45 | x = height 46 | num_samples = len(y) 47 | tx = np.c_[np.ones(num_samples), x] 48 | return y, tx 49 | 50 | -------------------------------------------------------------------------------- /labs/ex02/solution/plots.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """function for plot.""" 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from grid_search import get_best_parameters 6 | 7 | 8 | def prediction(w0, w1, mean_x, std_x): 9 | """Get the regression line from the model.""" 10 | x = np.arange(1.2, 2, 0.01) 11 | x_normalized = (x - mean_x) / std_x 12 | return x, w0 + w1 * x_normalized 13 | 14 | 15 | def base_visualization(grid_losses, w0_list, w1_list, 16 | mean_x, std_x, height, weight): 17 | """Base Visualization for both models.""" 18 | w0, w1 = np.meshgrid(w0_list, w1_list) 19 | 20 | fig = plt.figure() 21 | 22 | # plot contourf 23 | ax1 = fig.add_subplot(1, 2, 1) 24 | cp = ax1.contourf(w0, w1, grid_losses.T, cmap=plt.cm.jet) 25 | fig.colorbar(cp, ax=ax1) 26 | ax1.set_xlabel(r'$w_0$') 27 | ax1.set_ylabel(r'$w_1$') 28 | # put a marker at the minimum 29 | loss_star, w0_star, w1_star = get_best_parameters( 30 | w0_list, w1_list, grid_losses) 31 | ax1.plot(w0_star, w1_star, marker='*', color='r', markersize=20) 32 | 33 | 34 | 35 | return fig 36 | 37 | 38 | def grid_visualization(grid_losses, w0_list, w1_list, 39 | mean_x, std_x, height, weight): 40 | """Visualize how the trained model looks like under the grid search.""" 41 | fig = base_visualization( 42 | grid_losses, w0_list, w1_list, mean_x, std_x, height, weight) 43 | 44 | loss_star, w0_star, w1_star = get_best_parameters( 45 | w0_list, w1_list, grid_losses) 46 | # plot prediciton 47 | x, f = prediction(w0_star, w1_star, mean_x, std_x) 48 | ax2 = fig.get_axes()[2] 49 | ax2.plot(x, f, 'r') 50 | 51 | return fig 52 | 53 | 54 | def gradient_descent_visualization( 55 | gradient_losses, gradient_ws, 56 | grid_losses, grid_w0, grid_w1, 57 | mean_x, std_x, height, weight, n_iter=None): 58 | """Visualize how the loss value changes until n_iter.""" 59 | fig = base_visualization( 60 | grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight) 61 | 62 | ws_to_be_plotted = np.stack(gradient_ws) 63 | if n_iter is not None: 64 | ws_to_be_plotted = ws_to_be_plotted[:n_iter] 65 | 66 | ax1 = fig.get_axes()[0] 67 | ax1.plot( 68 | ws_to_be_plotted[:, 0], ws_to_be_plotted[:, 1], 69 | marker='o', color='w', markersize=10) 70 | pred_x, pred_y = prediction( 71 | ws_to_be_plotted[-1, 0], ws_to_be_plotted[-1, 1], 72 | mean_x, std_x) 73 | 74 | return fig 75 | -------------------------------------------------------------------------------- /labs/ex02/solutions02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex02/solutions02.pdf -------------------------------------------------------------------------------- /labs/ex02/template/Lab 2 - Gradient Descent.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Useful starting lines\n", 10 | "%matplotlib inline\n", 11 | "import numpy as np\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "%load_ext autoreload\n", 14 | "%autoreload 2" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# Check the Python version\n", 24 | "import sys\n", 25 | "if sys.version.startswith(\"3.\"):\n", 26 | " print(\"You are running Python 3. Good job :)\")\n", 27 | "else:\n", 28 | " print(\"This notebook requires Python 3.\\nIf you are using Google Colab, go to Runtime > Change runtime type and choose Python 3.\")" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "try:\n", 38 | " import google.colab\n", 39 | " IN_COLAB = True\n", 40 | "except:\n", 41 | " IN_COLAB = False\n", 42 | "if IN_COLAB:\n", 43 | " # Clone the entire repo to access the files.\n", 44 | " !git clone -l -s https://github.com/epfml/OptML_course.git cloned-repo\n", 45 | " %cd cloned-repo/labs/ex02/template/" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "# Load the data" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "import datetime\n", 62 | "from helpers import *\n", 63 | "\n", 64 | "height, weight, gender = load_data(sub_sample=False, add_outlier=False)\n", 65 | "x, mean_x, std_x = standardize(height)\n", 66 | "b, A = build_model_data(x, weight)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "print('Number of samples n = ', b.shape[0])\n", 76 | "print('Dimension of each sample d = ', A.shape[1])" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "# Least Squares Estimation\n", 84 | "Least squares estimation is one of the fundamental machine learning algorithms. Given an $ n \\times d $ matrix $A$ and a $ n \\times 1$ vector $b$, the goal is to find a vector $x \\in \\mathbb{R}^d$ which minimizes the objective function $$f(x) = \\frac{1}{2n} \\sum_{i=1}^{n} (a_i^\\top x - b_i)^2 = \\frac{1}{2n} \\|Ax - b\\|^2 $$\n", 85 | "\n", 86 | "In this exercise, we will try to fit $x$ using Least Squares Estimation. \n", 87 | "\n", 88 | "One can see the function is $L$ smooth with $L =\\frac1n\\|A^T A\\| = \\frac1n\\|A\\|^2$ (Lemma 2.3 for the first equality, and a few manipulations for the second)." 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "# Computing the Objective Function\n", 96 | "Fill in the `calculate_objective` function below:" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "def calculate_objective(Axmb):\n", 106 | " \"\"\"Calculate the mean squared error for vector Axmb = Ax - b.\"\"\"\n", 107 | " # ***************************************************\n", 108 | " # INSERT YOUR CODE HERE\n", 109 | " # TODO: compute mean squared error\n", 110 | " # ***************************************************\n", 111 | " raise NotImplementedError" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "# Compute smoothness constant $L$" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "To compute the spectral norm of A you can use np.linalg.norm(A, 2)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "def calculate_L(b, A):\n", 135 | " \"\"\"Calculate the smoothness constant for f\"\"\"\n", 136 | " # ***************************************************\n", 137 | " # INSERT YOUR CODE HERE\n", 138 | " # TODO: compute ||A.T*A||\n", 139 | " # ***************************************************\n", 140 | " raise NotImplementedError\n", 141 | " # ***************************************************\n", 142 | " # INSERT YOUR CODE HERE\n", 143 | " # TODO: compute L = smoothness constant of f\n", 144 | " # ***************************************************\n", 145 | " raise NotImplementedError\n", 146 | " return L" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "# Gradient Descent" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "Please fill in the functions `compute_gradient` below:" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "def compute_gradient(b, A, x):\n", 170 | " \"\"\"Compute the gradient.\"\"\"\n", 171 | " # ***************************************************\n", 172 | " # INSERT YOUR CODE HERE\n", 173 | " # TODO: compute gradient and objective\n", 174 | " # ***************************************************\n", 175 | " raise NotImplementedError\n", 176 | " return grad, Axmb" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "Please fill in the functions `gradient_descent` below:" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "def gradient_descent(b, A, initial_x, max_iters, gamma):\n", 193 | " \"\"\"Gradient descent algorithm.\"\"\"\n", 194 | " # Define parameters to store x and objective func. values\n", 195 | " xs = [initial_x]\n", 196 | " objectives = []\n", 197 | " x = initial_x\n", 198 | " for n_iter in range(max_iters):\n", 199 | " # ***************************************************\n", 200 | " # INSERT YOUR CODE HERE\n", 201 | " # TODO: compute gradient and objective function\n", 202 | " # ***************************************************\n", 203 | " raise NotImplementedError\n", 204 | " # ***************************************************\n", 205 | " # INSERT YOUR CODE HERE\n", 206 | " # TODO: update x by a gradient descent step\n", 207 | " # ***************************************************\n", 208 | " raise NotImplementedError\n", 209 | " # store x and objective function value\n", 210 | " xs.append(x)\n", 211 | " objectives.append(obj)\n", 212 | " print(\"Gradient Descent({bi}/{ti}): objective={l}\".format(\n", 213 | " bi=n_iter, ti=max_iters - 1, l=obj))\n", 214 | "\n", 215 | " return objectives, xs" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "Test your gradient descent function with a naive step size through gradient descent demo shown below:" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "# from gradient_descent import *\n", 232 | "from plots import gradient_descent_visualization\n", 233 | "\n", 234 | "# Define the parameters of the algorithm.\n", 235 | "max_iters = 50\n", 236 | "\n", 237 | "gamma = 0.1\n", 238 | "\n", 239 | "# Initialization\n", 240 | "x_initial = np.zeros(A.shape[1])\n", 241 | "\n", 242 | "# Start gradient descent.\n", 243 | "start_time = datetime.datetime.now()\n", 244 | "gradient_objectives_naive, gradient_xs_naive = gradient_descent(b, A, x_initial, max_iters, gamma)\n", 245 | "end_time = datetime.datetime.now()\n", 246 | "\n", 247 | "# Print result\n", 248 | "exection_time = (end_time - start_time).total_seconds()\n", 249 | "print(\"Gradient Descent: execution time={t:.3f} seconds\".format(t=exection_time))" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "Time Visualization" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "from ipywidgets import IntSlider, interact\n", 266 | "from grid_search import *\n", 267 | "\n", 268 | "def plot_figure(n_iter):\n", 269 | " # Generate grid data for visualization (parameters to be swept and best combination)\n", 270 | " grid_x0, grid_x1 = generate_w(num_intervals=10)\n", 271 | " grid_objectives = grid_search(b, A, grid_x0, grid_x1)\n", 272 | " obj_star, x0_star, x1_star = get_best_parameters(grid_x0, grid_x1, grid_objectives)\n", 273 | " \n", 274 | " fig = gradient_descent_visualization(\n", 275 | " gradient_objectives_naive, gradient_xs_naive, grid_objectives, grid_x0, grid_x1, mean_x, std_x, height, weight, n_iter)\n", 276 | " fig.set_size_inches(10.0, 6.0)\n", 277 | "\n", 278 | "interact(plot_figure, n_iter=IntSlider(min=1, max=len(gradient_xs_naive)))" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "Try doing gradient descent with a better learning rate" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "# Define the parameters of the algorithm.\n", 295 | "max_iters = 50\n", 296 | "\n", 297 | "# ***************************************************\n", 298 | "# INSERT YOUR CODE HERE\n", 299 | "# TODO: a better learning rate using the smoothness of f\n", 300 | "# ***************************************************\n", 301 | "gamma = \n", 302 | "raise NotImplementedError\n", 303 | "\n", 304 | "# Initialization\n", 305 | "x_initial = np.zeros(A.shape[1])\n", 306 | "\n", 307 | "# Start gradient descent.\n", 308 | "start_time = datetime.datetime.now()\n", 309 | "gradient_objectives, gradient_xs = gradient_descent(b, A, x_initial, max_iters, gamma)\n", 310 | "end_time = datetime.datetime.now()\n", 311 | "\n", 312 | "# Print result\n", 313 | "exection_time = (end_time - start_time).total_seconds()\n", 314 | "print(\"Gradient Descent: execution time={t:.3f} seconds\".format(t=exection_time))" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "Time visualization with a better learning rate" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "def plot_figure(n_iter):\n", 331 | " # Generate grid data for visualization (parameters to be swept and best combination)\n", 332 | " grid_x0, grid_x1 = generate_w(num_intervals=10)\n", 333 | " grid_objectives = grid_search(b, A, grid_x0, grid_x1)\n", 334 | " obj_star, x0_star, x1_star = get_best_parameters(grid_x0, grid_x1, grid_objectives)\n", 335 | " \n", 336 | " fig = gradient_descent_visualization(\n", 337 | " gradient_objectives, gradient_xs, grid_objectives, grid_x0, grid_x1, mean_x, std_x, height, weight, n_iter)\n", 338 | " fig.set_size_inches(10.0, 6.0)\n", 339 | "\n", 340 | "interact(plot_figure, n_iter=IntSlider(min=1, max=len(gradient_xs)))" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "# Loading more complex data\n", 348 | "The data is taken from https://archive.ics.uci.edu/ml/datasets/Concrete+Compressive+Strength " 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "data = np.loadtxt(\"Concrete_Data.csv\",delimiter=\",\")\n", 358 | "\"\"\"Note that running this part will change the A above, and it will cause error in the interaction plot!\"\"\"\n", 359 | "A = data[:,:-1]\n", 360 | "b = data[:,-1]\n", 361 | "A, mean_A, std_A = standardize(A)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": {}, 368 | "outputs": [], 369 | "source": [ 370 | "print('Number of samples n = ', b.shape[0])\n", 371 | "print('Dimension of each sample d = ', A.shape[1])" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "# Running gradient descent" 379 | ] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "metadata": {}, 384 | "source": [ 385 | "## Assuming bounded gradients\n", 386 | "Assume we are moving in a bounded region $\\|x\\| \\leq 25$ containing all iterates (and we assume $\\|x-x^\\star\\| \\leq 25$ as well, for simplicity). Then by $\\nabla f(x) = \\frac{1}{n}A^\\top (Ax - b)$, one can see that $f$ is Lipschitz over that bounded region, with Lipschitz constant $\\|\\nabla f(x)\\| \\leq \\frac{1}{n} (\\|A^\\top A\\|\\|x\\| + \\|A^\\top b\\|)$" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "# ***************************************************\n", 396 | "# INSERT YOUR CODE HERE\n", 397 | "# TODO: Compute the bound on the gradient norm\n", 398 | "# ***************************************************\n", 399 | "grad_norm_bound = \n", 400 | "raise NotImplementedError" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "Fill in the learning rate assuming bounded gradients" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [ 416 | "max_iters = 50\n", 417 | "\n", 418 | "# ***************************************************\n", 419 | "# INSERT YOUR CODE HERE\n", 420 | "# TODO: Compute learning rate based on bounded gradient\n", 421 | "# ***************************************************\n", 422 | "gamma = \n", 423 | "raise NotImplementedError\n", 424 | "\n", 425 | "# Initialization\n", 426 | "x_initial = np.zeros(A.shape[1])\n", 427 | "\n", 428 | "# Start gradient descent.\n", 429 | "start_time = datetime.datetime.now()\n", 430 | "bd_gradient_objectives, bd_gradient_xs = gradient_descent(b, A, x_initial, max_iters, gamma)\n", 431 | "end_time = datetime.datetime.now()\n", 432 | "\n", 433 | "\n", 434 | "# Print result\n", 435 | "exection_time = (end_time - start_time).total_seconds()\n", 436 | "print(\"Gradient Descent: execution time={t:.3f} seconds\".format(t=exection_time))\n", 437 | "\n", 438 | "# Averaging the iterates as is the case for bounded gradients case\n", 439 | "bd_gradient_objectives_averaged = []\n", 440 | "for i in range(len(bd_gradient_xs)):\n", 441 | " if i > 0:\n", 442 | " bd_gradient_xs[i] = (i * bd_gradient_xs[i-1] + bd_gradient_xs[i])/(i + 1)\n", 443 | " grad, err = compute_gradient(b, A, bd_gradient_xs[i])\n", 444 | " obj = calculate_objective(err)\n", 445 | " bd_gradient_objectives_averaged.append(obj)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "## Gradient descent using smoothness\n", 453 | "Fill in the learning rate using smoothness of the function" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [ 462 | "max_iters = 50\n", 463 | "\n", 464 | "\n", 465 | "# ***************************************************\n", 466 | "# INSERT YOUR CODE HERE\n", 467 | "# TODO: a better learning rate using the smoothness of f\n", 468 | "# ***************************************************\n", 469 | "gamma = \n", 470 | "raise NotImplementedError\n", 471 | "\n", 472 | "# Initialization\n", 473 | "x_initial = np.zeros(A.shape[1])\n", 474 | "\n", 475 | "# Start gradient descent.\n", 476 | "start_time = datetime.datetime.now()\n", 477 | "gradient_objectives, gradient_xs = gradient_descent(b, A, x_initial, max_iters, gamma)\n", 478 | "end_time = datetime.datetime.now()\n", 479 | "\n", 480 | "# Print result\n", 481 | "exection_time = (end_time - start_time).total_seconds()\n", 482 | "print(\"Gradient Descent: execution time={t:.3f} seconds\".format(t=exection_time))" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "## Plotting the Evolution of the Objective Function" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "metadata": {}, 496 | "outputs": [], 497 | "source": [ 498 | "plt.figure(figsize=(8, 8))\n", 499 | "plt.xlabel('Number of steps')\n", 500 | "plt.ylabel('Objective Function')\n", 501 | "#plt.yscale(\"log\")\n", 502 | "plt.plot(range(len(gradient_objectives)), gradient_objectives,'r', label='gradient descent with 1/L stepsize')\n", 503 | "plt.plot(range(len(bd_gradient_objectives)), bd_gradient_objectives,'b', label='gradient descent assuming bounded gradients')\n", 504 | "plt.plot(range(len(bd_gradient_objectives_averaged)), bd_gradient_objectives_averaged,'g', label='gradient descent assuming bounded gradients with averaged iterates')\n", 505 | "plt.legend(loc='upper right')\n", 506 | "plt.show()" 507 | ] 508 | } 509 | ], 510 | "metadata": { 511 | "anaconda-cloud": {}, 512 | "kernelspec": { 513 | "display_name": "Python 3", 514 | "language": "python", 515 | "name": "python3" 516 | }, 517 | "language_info": { 518 | "codemirror_mode": { 519 | "name": "ipython", 520 | "version": 3 521 | }, 522 | "file_extension": ".py", 523 | "mimetype": "text/x-python", 524 | "name": "python", 525 | "nbconvert_exporter": "python", 526 | "pygments_lexer": "ipython3", 527 | "version": "3.7.4" 528 | }, 529 | "widgets": { 530 | "state": { 531 | "d2b2c3aea192430e81437f33ba0b0e69": { 532 | "views": [ 533 | { 534 | "cell_index": 22 535 | } 536 | ] 537 | }, 538 | "e4a6a7a70ccd42ddb112989c04f2ed3f": { 539 | "views": [ 540 | { 541 | "cell_index": 18 542 | } 543 | ] 544 | } 545 | }, 546 | "version": "1.2.0" 547 | } 548 | }, 549 | "nbformat": 4, 550 | "nbformat_minor": 1 551 | } 552 | -------------------------------------------------------------------------------- /labs/ex02/template/grid_search.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Exercise 2. 3 | 4 | Grid Search 5 | """ 6 | 7 | import numpy as np 8 | 9 | 10 | def generate_w(num_intervals): 11 | """Generate a grid of values for w0 and w1.""" 12 | w0 = np.linspace(-100, 200, num_intervals) 13 | w1 = np.linspace(-150, 150, num_intervals) 14 | return w0, w1 15 | 16 | 17 | def grid_search(y, tx, w0, w1): 18 | """Algorithm for grid search.""" 19 | losses = np.zeros((len(w0), len(w1))) 20 | # compute loss for each combination of w0 and w1. 21 | for ind_row, row in enumerate(w0): 22 | for ind_col, col in enumerate(w1): 23 | w = np.array([row, col]) 24 | e = y - tx.dot(w) 25 | losses[ind_row, ind_col] = 1/2*np.mean(e**2) 26 | return losses 27 | 28 | 29 | def get_best_parameters(w0, w1, losses): 30 | """Get the best w from the result of grid search.""" 31 | min_row, min_col = np.unravel_index(np.argmin(losses), losses.shape) 32 | return losses[min_row, min_col], w0[min_row], w1[min_col] 33 | -------------------------------------------------------------------------------- /labs/ex02/template/helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """some helper functions.""" 3 | import numpy as np 4 | 5 | 6 | def load_data(sub_sample=True, add_outlier=False): 7 | """Load data and convert it to the metric system.""" 8 | path_dataset = "height_weight_genders.csv" 9 | data = np.genfromtxt( 10 | path_dataset, delimiter=",", skip_header=1, usecols=[1, 2]) 11 | height = data[:, 0] 12 | weight = data[:, 1] 13 | gender = np.genfromtxt( 14 | path_dataset, delimiter=",", skip_header=1, usecols=[0], 15 | converters={0: lambda x: 0 if b"Male" in x else 1}) 16 | # Convert to metric system 17 | height *= 0.025 18 | weight *= 0.454 19 | 20 | # sub-sample 21 | if sub_sample: 22 | height = height[::50] 23 | weight = weight[::50] 24 | 25 | if add_outlier: 26 | # outlier experiment 27 | height = np.concatenate([height, [1.1, 1.2]]) 28 | weight = np.concatenate([weight, [51.5/0.454, 55.3/0.454]]) 29 | 30 | return height, weight, gender 31 | 32 | 33 | def standardize(x): 34 | """Standardize the original data set.""" 35 | mean_x = np.mean(x,axis = 0) 36 | x = x - mean_x 37 | std_x = np.std(x, axis = 0) 38 | x = x / std_x 39 | return x, mean_x, std_x 40 | 41 | 42 | def build_model_data(height, weight): 43 | """Form (y,tX) to get regression data in matrix form.""" 44 | y = weight 45 | x = height 46 | num_samples = len(y) 47 | tx = np.c_[np.ones(num_samples), x] 48 | return y, tx 49 | 50 | -------------------------------------------------------------------------------- /labs/ex02/template/plots.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """function for plot.""" 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from grid_search import get_best_parameters 6 | 7 | 8 | def prediction(w0, w1, mean_x, std_x): 9 | """Get the regression line from the model.""" 10 | x = np.arange(1.2, 2, 0.01) 11 | x_normalized = (x - mean_x) / std_x 12 | return x, w0 + w1 * x_normalized 13 | 14 | 15 | def base_visualization(grid_losses, w0_list, w1_list, 16 | mean_x, std_x, height, weight): 17 | """Base Visualization for both models.""" 18 | w0, w1 = np.meshgrid(w0_list, w1_list) 19 | 20 | fig = plt.figure() 21 | 22 | # plot contourf 23 | ax1 = fig.add_subplot(1, 2, 1) 24 | cp = ax1.contourf(w0, w1, grid_losses.T, cmap=plt.cm.jet) 25 | fig.colorbar(cp, ax=ax1) 26 | ax1.set_xlabel(r'$w_0$') 27 | ax1.set_ylabel(r'$w_1$') 28 | # put a marker at the minimum 29 | loss_star, w0_star, w1_star = get_best_parameters( 30 | w0_list, w1_list, grid_losses) 31 | ax1.plot(w0_star, w1_star, marker='*', color='r', markersize=20) 32 | 33 | 34 | 35 | return fig 36 | 37 | 38 | def grid_visualization(grid_losses, w0_list, w1_list, 39 | mean_x, std_x, height, weight): 40 | """Visualize how the trained model looks like under the grid search.""" 41 | fig = base_visualization( 42 | grid_losses, w0_list, w1_list, mean_x, std_x, height, weight) 43 | 44 | loss_star, w0_star, w1_star = get_best_parameters( 45 | w0_list, w1_list, grid_losses) 46 | # plot prediciton 47 | x, f = prediction(w0_star, w1_star, mean_x, std_x) 48 | ax2 = fig.get_axes()[2] 49 | ax2.plot(x, f, 'r') 50 | 51 | return fig 52 | 53 | 54 | def gradient_descent_visualization( 55 | gradient_losses, gradient_ws, 56 | grid_losses, grid_w0, grid_w1, 57 | mean_x, std_x, height, weight, n_iter=None): 58 | """Visualize how the loss value changes until n_iter.""" 59 | fig = base_visualization( 60 | grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight) 61 | 62 | ws_to_be_plotted = np.stack(gradient_ws) 63 | if n_iter is not None: 64 | ws_to_be_plotted = ws_to_be_plotted[:n_iter] 65 | 66 | ax1 = fig.get_axes()[0] 67 | ax1.plot( 68 | ws_to_be_plotted[:, 0], ws_to_be_plotted[:, 1], 69 | marker='o', color='w', markersize=10) 70 | pred_x, pred_y = prediction( 71 | ws_to_be_plotted[-1, 0], ws_to_be_plotted[-1, 1], 72 | mean_x, std_x) 73 | 74 | return fig 75 | -------------------------------------------------------------------------------- /labs/ex03/exercise03.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex03/exercise03.pdf -------------------------------------------------------------------------------- /labs/ex03/solution03.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex03/solution03.pdf -------------------------------------------------------------------------------- /labs/ex03/template/notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Useful starting lines\n", 10 | "%matplotlib inline\n", 11 | "import numpy as np\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "%load_ext autoreload\n", 14 | "%autoreload 2" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# Solving Fixed Point Problems\n", 22 | "\n", 23 | "In numerous applications, we encounter the task of solving equations of the form $$x = g(x)$$\n", 24 | "for a continuous function $g$. In this exercise we will see one simple method to solve such problems: $$x_{t+1} = g(x_t)\\,.$$\n", 25 | "We will solve two equations of this form: $$x = log(1+x)$$ and $$x = log(2+x)\\,.$$" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Plot $g$" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Let us see how the two functions look over an interval $[0,2]$." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "x = np.arange(-0.1, 2, 0.0001)\n", 49 | "\"\"\"Compute the value of g(x).\"\"\"\n", 50 | "# ***************************************************\n", 51 | "# INSERT YOUR CODE HERE\n", 52 | "# TODO: compute the values of log(1+x) and log(2+x)\n", 53 | "# ***************************************************\n", 54 | "\n", 55 | "\"\"\"Plot the computed values\"\"\"\n", 56 | "fig = plt.figure()\n", 57 | "plt.plot(x, x, label='x')\n", 58 | "plt.plot(x, y1, label='$\\log(1 + x)$')\n", 59 | "plt.plot(x, y2, label='$\\log(2 + x)$')\n", 60 | "plt.grid(linestyle=':')\n", 61 | "plt.axhline(0, color='black')\n", 62 | "plt.axvline(0, color='black')\n", 63 | "plt.legend()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "## Compute Lipschitz Constant of $g$\n", 71 | "\n", 72 | "Derive and plot the derivate of $\\log(1+x)$ and $\\log(2+x)$ over the interval $[0,2]$. What are the Lipschitz constants?" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "x = np.arange(-0.1, 2, 0.0001)\n", 82 | "\n", 83 | "\"\"\"Compute the derivative of g(x).\"\"\"\n", 84 | "# ***************************************************\n", 85 | "# INSERT YOUR CODE HERE\n", 86 | "# TODO: compute the derivatives of log(1+x) and log(2+x)\n", 87 | "# ***************************************************\n", 88 | "\n", 89 | "\"\"\"Plot the computed values\"\"\"\n", 90 | "fig = plt.figure()\n", 91 | "plt.plot(x, y1, label='$\\log(1 + x)$')\n", 92 | "plt.plot(x, y2, label='$\\log(2 + x)$')\n", 93 | "plt.grid(linestyle=':')\n", 94 | "plt.axhline(0, color='black')\n", 95 | "plt.axvline(0, color='black')\n", 96 | "plt.legend()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "## Computing Fixed Point" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "Please fill in the functions `compute_gradient` below:" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "def fixed_point(initial_x, max_iters, objective):\n", 120 | " \"\"\"Compute the fixed point.\"\"\"\n", 121 | " # Define parameters to store x and objective func. values\n", 122 | " xs = []\n", 123 | " errors = []\n", 124 | " x = initial_x\n", 125 | " for n_iter in range(max_iters):\n", 126 | " # compute objective and error\n", 127 | " obj = objective(x)\n", 128 | " error = np.abs(x - obj)\n", 129 | " # store x and error\n", 130 | " xs.append(x)\n", 131 | " errors.append(error)\n", 132 | " \n", 133 | " # update x\n", 134 | " \n", 135 | " # ***************************************************\n", 136 | " # INSERT YOUR CODE HERE\n", 137 | " # TODO: compute the update\n", 138 | " # ***************************************************\n", 139 | " \n", 140 | " # print the current error\n", 141 | " if n_iter % 10 == 0: \n", 142 | " print(\"Fixed point: iteration ={i}, x = {x:.2e}, error={err:.2e}\".format(i=n_iter, x=x, err=error))\n", 143 | " return errors, xs" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "Test your implementation on the first function. Does it converge to 0?" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# Define the parameters of the algorithm.\n", 160 | "max_iters = 100\n", 161 | "\n", 162 | "# Write the objective function\n", 163 | "func = lambda x: np.log(1 + x)\n", 164 | "\n", 165 | "# Initialization\n", 166 | "initial_x = 1\n", 167 | "\n", 168 | "# Run fixed point.\n", 169 | "errors_func1, xs_func1 = fixed_point(initial_x, max_iters, func)\n" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "Run your implementation on the second function" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "# Define the parameters of the algorithm.\n", 186 | "max_iters = 100\n", 187 | "\n", 188 | "# Write the second objective function\n", 189 | "func = lambda x: np.log(2 + x)\n", 190 | "\n", 191 | "# Initialization\n", 192 | "initial_x = 1\n", 193 | "\n", 194 | "# Run fixed point.\n", 195 | "errors_func2, xs_func2 = fixed_point(initial_x, max_iters, func)\n" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "**Plotting error values**" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": { 209 | "scrolled": false 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "plt.semilogy()\n", 214 | "plt.xlabel('Number of steps')\n", 215 | "plt.ylabel('Value of Error')\n", 216 | "#plt.yscale(\"log\")\n", 217 | "plt.plot(range(len(errors_func1)), errors_func1, label='$log(1 + x)$')\n", 218 | "plt.plot(range(len(errors_func2)), errors_func2, label='$log(2 + x)$')\n", 219 | "\n", 220 | "plt.legend()\n", 221 | "plt.show()" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "What do you observe about the rates of convergence of the two methods? Can you explain this difference?\n", 229 | "\n", 230 | "Repeat the experiment with different starting points. Try also with $x=sin(x)$, $x = sin(x+1)$, and $x = sin(x+2)$. How about other functions?" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [] 239 | } 240 | ], 241 | "metadata": { 242 | "anaconda-cloud": {}, 243 | "kernelspec": { 244 | "display_name": "Python 3", 245 | "language": "python", 246 | "name": "python3" 247 | }, 248 | "language_info": { 249 | "codemirror_mode": { 250 | "name": "ipython", 251 | "version": 3 252 | }, 253 | "file_extension": ".py", 254 | "mimetype": "text/x-python", 255 | "name": "python", 256 | "nbconvert_exporter": "python", 257 | "pygments_lexer": "ipython3", 258 | "version": "3.7.4" 259 | }, 260 | "toc": { 261 | "base_numbering": 1, 262 | "nav_menu": {}, 263 | "number_sections": true, 264 | "sideBar": true, 265 | "skip_h1_title": false, 266 | "title_cell": "Table of Contents", 267 | "title_sidebar": "Contents", 268 | "toc_cell": false, 269 | "toc_position": {}, 270 | "toc_section_display": true, 271 | "toc_window_display": false 272 | }, 273 | "widgets": { 274 | "state": { 275 | "d2b2c3aea192430e81437f33ba0b0e69": { 276 | "views": [ 277 | { 278 | "cell_index": 22 279 | } 280 | ] 281 | }, 282 | "e4a6a7a70ccd42ddb112989c04f2ed3f": { 283 | "views": [ 284 | { 285 | "cell_index": 18 286 | } 287 | ] 288 | } 289 | }, 290 | "version": "1.2.0" 291 | } 292 | }, 293 | "nbformat": 4, 294 | "nbformat_minor": 1 295 | } 296 | -------------------------------------------------------------------------------- /labs/ex04/exercise04.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex04/exercise04.pdf -------------------------------------------------------------------------------- /labs/ex04/solution/helpers.py: -------------------------------------------------------------------------------- 1 | import networkx 2 | import numpy as np 3 | 4 | n_nodes = 16 5 | def generate_torus_adj_matrix(n_nodes): 6 | G = networkx.generators.lattice.grid_2d_graph(int(np.sqrt(n_nodes)), int(np.sqrt(n_nodes)), periodic=True) 7 | # Adjacency matrix 8 | A = networkx.adjacency_matrix(G).toarray() 9 | 10 | # Add self-loops 11 | for i in range(0, A.shape[0]): 12 | A[i][i] = 1 13 | return A -------------------------------------------------------------------------------- /labs/ex04/solution/torus_topology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex04/solution/torus_topology.png -------------------------------------------------------------------------------- /labs/ex04/solution04.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex04/solution04.pdf -------------------------------------------------------------------------------- /labs/ex04/template/Lab 4 - Random Walks--alternative.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# Useful starting lines\n", 12 | "%matplotlib inline\n", 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from helpers import *\n", 16 | "%load_ext autoreload\n", 17 | "%autoreload 2" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Random Walks" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "In this exercise you will implement a simple random walk on a torus graph and will check its convergence to uniform distribution.\n", 32 | "\n", 33 | "Torus is a 2D-grid graph and looks like a 'doughnout', as shown in the picture below. \n", 34 | "\"Drawing\"" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "**Note:** We will use the networkx library to generate our graph. You can install this using\n", 42 | "\n", 43 | "```bash\n", 44 | " pip3 install --upgrade --user networkx\n", 45 | "```\n", 46 | "\n", 47 | "Let's generate the probability matrix $\\mathbf{G}$ of a torus graph of size $4\\times 4$, note that we include self-loops too. You can play around with the code in the helpers.py to generate different graphs." 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "collapsed": true 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "n_nodes = 25\n", 59 | "A = generate_torus_adj_matrix(n_nodes)\n", 60 | "G = A/5" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "Implement random walk on a torus." 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "collapsed": true 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "def random_walk(G, num_iter):\n", 79 | " ''' Performs num_iter steps of a random walk on a graph with graph transition matrix G,\n", 80 | " starting from the initial position at node 1.\n", 81 | " Output:\n", 82 | " positions: an array of integer positions in a graph at each iteration\n", 83 | " '''\n", 84 | " pos = 0\n", 85 | " positions = [pos]\n", 86 | " for t in range(0, num_iter):\n", 87 | " pos = # TODO: fill in here code to jump to random neighbor\n", 88 | " positions.append(pos)\n", 89 | " return positions" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "Now we will estimate probability distribution at each iteration. For this we will perform random walk on a graph 100 times and calculate how frequently we visited each node at each iteration." 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": true 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "num_iter = 15\n", 108 | "num_repeat = 100 * 1000\n", 109 | "frequencies = np.zeros((num_iter + 1, n_nodes))\n", 110 | "for _ in range(num_repeat):\n", 111 | " positions = random_walk(G, num_iter)\n", 112 | " frequencies[np.arange(len(positions)), np.array(positions)] += 1\n", 113 | "frequencies /= num_repeat" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "Lets plot our distribution at the last step." 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": { 127 | "collapsed": true 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "plt.bar(np.arange(n_nodes), frequencies[-1])\n", 132 | "plt.xlabel(\"node\")\n", 133 | "plt.ylabel(\"probability\")" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "You can see that the final disctribution is indeed uniform. Lets now plot how fast did the algorithm converge. For this you need to compute errors $||x_{t} - \\mu||_2^2$ at each iteration, where $x_{t}$ is a distribution at step $t$ and $\\mu$ is a uniforly distributed vector." 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "errors = # TODO: fill in here" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "Lets now plot how fast did the algorithm converge. We will use logarithmic scale on y-axis to be able to distinguish between sublinear and linear rates." 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": { 165 | "collapsed": true 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "plt.semilogy(errors)\n", 170 | "plt.xlabel(\"iteration\")\n", 171 | "plt.ylabel(\"$||x_{t} - mu||_2^2$\")\n", 172 | "plt.show()" 173 | ] 174 | } 175 | ], 176 | "metadata": { 177 | "anaconda-cloud": {}, 178 | "kernelspec": { 179 | "display_name": "Python 3", 180 | "language": "python", 181 | "name": "python3" 182 | }, 183 | "language_info": { 184 | "codemirror_mode": { 185 | "name": "ipython", 186 | "version": 3 187 | }, 188 | "file_extension": ".py", 189 | "mimetype": "text/x-python", 190 | "name": "python", 191 | "nbconvert_exporter": "python", 192 | "pygments_lexer": "ipython3", 193 | "version": "3.5.2" 194 | }, 195 | "widgets": { 196 | "state": { 197 | "d2b2c3aea192430e81437f33ba0b0e69": { 198 | "views": [ 199 | { 200 | "cell_index": 22 201 | } 202 | ] 203 | }, 204 | "e4a6a7a70ccd42ddb112989c04f2ed3f": { 205 | "views": [ 206 | { 207 | "cell_index": 18 208 | } 209 | ] 210 | } 211 | }, 212 | "version": "1.2.0" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 1 217 | } 218 | -------------------------------------------------------------------------------- /labs/ex04/template/helpers.py: -------------------------------------------------------------------------------- 1 | import networkx 2 | import numpy as np 3 | 4 | n_nodes = 16 5 | def generate_torus_adj_matrix(n_nodes): 6 | G = networkx.generators.lattice.grid_2d_graph(int(np.sqrt(n_nodes)), int(np.sqrt(n_nodes)), periodic=True) 7 | # Adjacency matrix 8 | A = networkx.adjacency_matrix(G).toarray() 9 | 10 | # Add self-loops 11 | for i in range(0, A.shape[0]): 12 | A[i][i] = 1 13 | return A -------------------------------------------------------------------------------- /labs/ex04/template/notebook_lab04.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Useful starting lines\n", 10 | "%matplotlib inline\n", 11 | "import numpy as np\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "try:\n", 14 | " import google.colab\n", 15 | " IN_COLAB = True\n", 16 | "except:\n", 17 | " IN_COLAB = False\n", 18 | "if IN_COLAB:\n", 19 | " # Clone the entire repo to access the files.\n", 20 | " !git clone -l -s https://github.com/epfml/OptML_course.git cloned-repo\n", 21 | " %cd cloned-repo/labs/ex04/template/\n", 22 | "\n", 23 | "from helpers import *\n", 24 | "%load_ext autoreload\n", 25 | "%autoreload 2" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Random Walks" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "In this exercise you will implement a simple random walk on a torus graph and will check its convergence to uniform distribution.\n", 40 | "\n", 41 | "Torus is a 2D-grid graph and looks like a 'doughnout', as shown in the picture below. \n", 42 | "\"Drawing\"" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "**Note:** We will use the networkx library to generate our graph. You can install this using\n", 50 | "\n", 51 | "```bash\n", 52 | " pip3 install --upgrade --user networkx\n", 53 | "```\n", 54 | "\n", 55 | "Let's generate the probability matrix $\\mathbf{G}$ of a torus graph of size $4\\times 4$, note that we include self-loops too. You can play around with the code in the helpers.py to generate different graphs." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "n_nodes = 16\n", 65 | "A = generate_torus_adj_matrix(n_nodes)\n", 66 | "degree = # fill in here the degree of a node in the graph\n", 67 | "G = A/degree" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "Lets generate initial probabitily distribution. Recall that our walk always starts from the node 1." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "x_init = # fill in here" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "As you will prove in Q2, probability distribution at each step evolves as $x_{t + 1} = G x_{t}$. " 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "def random_walk(G, x_init, num_iter):\n", 100 | " ''' Computes probability distribution of random walk after\n", 101 | " num_iter steps.\n", 102 | " Output: \n", 103 | " x: final estimate of probability distribution after\n", 104 | " num_iter steps\n", 105 | " errors: array of differences ||x_{t} - mu||_2^2, where\n", 106 | " mu is uniform distribution\n", 107 | " '''\n", 108 | " x = np.copy(x_init)\n", 109 | " errors = np.zeros(num_iter)\n", 110 | " mu = # fill in here\n", 111 | " for t in range(0, num_iter):\n", 112 | " # ***************************************************\n", 113 | " # INSERT YOUR CODE HERE\n", 114 | " # TODO: simulate probability distribution in random walk\n", 115 | " # ***************************************************\n", 116 | " return x, errors" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "Lets run our algorithm for 50 iterations and see at the final probability distribution." 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "x, errors = random_walk(G, x_init, num_iter=50)\n", 133 | "plt.bar(np.arange(len(x)), x)\n", 134 | "plt.xlabel(\"node\")\n", 135 | "plt.ylabel(\"probability\")" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "We see that the final disctribution is indeed uniform. Lets now plot how fast did the algorithm converge. We will use logarithmic scale on y-axis to be able to distinguish between sublinear and linear rates." 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "plt.semilogy(errors)\n", 152 | "plt.xlabel(\"iteration\")\n", 153 | "plt.ylabel(\"$||x_{t} - mu||_2^2$\")" 154 | ] 155 | } 156 | ], 157 | "metadata": { 158 | "anaconda-cloud": {}, 159 | "kernelspec": { 160 | "display_name": "Python 3", 161 | "language": "python", 162 | "name": "python3" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 3 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython3", 174 | "version": "3.7.4" 175 | }, 176 | "widgets": { 177 | "state": { 178 | "d2b2c3aea192430e81437f33ba0b0e69": { 179 | "views": [ 180 | { 181 | "cell_index": 22 182 | } 183 | ] 184 | }, 185 | "e4a6a7a70ccd42ddb112989c04f2ed3f": { 186 | "views": [ 187 | { 188 | "cell_index": 18 189 | } 190 | ] 191 | } 192 | }, 193 | "version": "1.2.0" 194 | } 195 | }, 196 | "nbformat": 4, 197 | "nbformat_minor": 1 198 | } 199 | -------------------------------------------------------------------------------- /labs/ex04/template/torus_topology.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex04/template/torus_topology.png -------------------------------------------------------------------------------- /labs/ex05/exercise05.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex05/exercise05.pdf -------------------------------------------------------------------------------- /labs/ex05/solution/gradient_descent.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Lab 3. 3 | 4 | Gradient descent 5 | """ 6 | 7 | import numpy as np 8 | 9 | def calculate_mse(e): 10 | """Calculate the mean squared error for vector e.""" 11 | return 1/2*np.mean(e**2) 12 | 13 | def compute_gradient(b, A, x): 14 | """Compute the gradient.""" 15 | err = b - A.dot(x) 16 | grad = -A.T.dot(err) / len(err) 17 | return grad, err 18 | 19 | def gradient_descent(b, A, initial_x, max_iters, gamma): 20 | """Gradient descent algorithm.""" 21 | # Define parameters to store x and objective func. values 22 | xs = [initial_x] 23 | objectives = [] 24 | x = initial_x 25 | for n_iter in range(max_iters): 26 | # compute objective, gradient 27 | grad, err = compute_gradient(b, A, x) 28 | obj = calculate_mse(err) 29 | # update x by a gradient descent step 30 | x = x - gamma * grad 31 | # store x and objective function value 32 | xs.append(x) 33 | objectives.append(obj) 34 | print("Gradient Descent({bi}/{ti}): objective={l}".format( 35 | bi=n_iter, ti=max_iters - 1, l=obj)) 36 | 37 | return objectives, xs -------------------------------------------------------------------------------- /labs/ex05/solution/grid_search.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Exercise 2. 3 | 4 | Grid Search 5 | """ 6 | 7 | import numpy as np 8 | 9 | 10 | def generate_w(num_intervals): 11 | """Generate a grid of values for w0 and w1.""" 12 | w0 = np.linspace(-100, 200, num_intervals) 13 | w1 = np.linspace(-150, 150, num_intervals) 14 | return w0, w1 15 | 16 | 17 | def grid_search(y, tx, w0, w1): 18 | """Algorithm for grid search.""" 19 | losses = np.zeros((len(w0), len(w1))) 20 | # compute loss for each combination of w0 and w1. 21 | for ind_row, row in enumerate(w0): 22 | for ind_col, col in enumerate(w1): 23 | w = np.array([row, col]) 24 | e = y - tx.dot(w) 25 | losses[ind_row, ind_col] = 1/2*np.mean(e**2) 26 | return losses 27 | 28 | 29 | def get_best_parameters(w0, w1, losses): 30 | """Get the best w from the result of grid search.""" 31 | min_row, min_col = np.unravel_index(np.argmin(losses), losses.shape) 32 | return losses[min_row, min_col], w0[min_row], w1[min_col] 33 | -------------------------------------------------------------------------------- /labs/ex05/solution/helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """some helper functions.""" 3 | import numpy as np 4 | 5 | 6 | def load_data(sub_sample=True, add_outlier=False): 7 | """Load data and convert it to the metric system.""" 8 | path_dataset = "height_weight_genders.csv" 9 | data = np.genfromtxt( 10 | path_dataset, delimiter=",", skip_header=1, usecols=[1, 2]) 11 | height = data[:, 0] 12 | weight = data[:, 1] 13 | gender = np.genfromtxt( 14 | path_dataset, delimiter=",", skip_header=1, usecols=[0], 15 | converters={0: lambda x: 0 if b"Male" in x else 1}) 16 | # Convert to metric system 17 | height *= 0.025 18 | weight *= 0.454 19 | 20 | # sub-sample 21 | if sub_sample: 22 | height = height[::50] 23 | weight = weight[::50] 24 | 25 | if add_outlier: 26 | # outlier experiment 27 | height = np.concatenate([height, [1.1, 1.2]]) 28 | weight = np.concatenate([weight, [51.5/0.454, 55.3/0.454]]) 29 | 30 | return height, weight, gender 31 | 32 | 33 | def standardize(x): 34 | """Standardize the original data set.""" 35 | mean_x = np.mean(x,axis = 0) 36 | x = x - mean_x 37 | std_x = np.std(x, axis = 0) 38 | x = x / std_x 39 | return x, mean_x, std_x 40 | 41 | 42 | def build_model_data(height, weight): 43 | """Form (y,tX) to get regression data in matrix form.""" 44 | y = weight 45 | x = height 46 | num_samples = len(y) 47 | tx = np.c_[np.ones(num_samples), x] 48 | return y, tx 49 | 50 | 51 | def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True): 52 | """ 53 | Generate a minibatch iterator for a dataset. 54 | Takes as input two iterables (here the output desired values 'y' and the input data 'tx') 55 | Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`. 56 | Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches. 57 | Example of use : 58 | for minibatch_y, minibatch_tx in batch_iter(y, tx, 32): 59 | 60 | """ 61 | data_size = len(y) 62 | 63 | if shuffle: 64 | shuffle_indices = np.random.permutation(np.arange(data_size)) 65 | shuffled_y = y[shuffle_indices] 66 | shuffled_tx = tx[shuffle_indices] 67 | else: 68 | shuffled_y = y 69 | shuffled_tx = tx 70 | for batch_num in range(num_batches): 71 | start_index = batch_num * batch_size 72 | end_index = min((batch_num + 1) * batch_size, data_size) 73 | if start_index != end_index: 74 | yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index] 75 | 76 | -------------------------------------------------------------------------------- /labs/ex05/solution/plots.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """function for plot.""" 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from grid_search import get_best_parameters 6 | 7 | 8 | def prediction(w0, w1, mean_x, std_x): 9 | """Get the regression line from the model.""" 10 | x = np.arange(1.2, 2, 0.01) 11 | x_normalized = (x - mean_x) / std_x 12 | return x, w0 + w1 * x_normalized 13 | 14 | 15 | def base_visualization(grid_losses, w0_list, w1_list, 16 | mean_x, std_x, height, weight): 17 | """Base Visualization for both models.""" 18 | w0, w1 = np.meshgrid(w0_list, w1_list) 19 | 20 | fig = plt.figure() 21 | 22 | # plot contourf 23 | ax1 = fig.add_subplot(1, 2, 1) 24 | cp = ax1.contourf(w0, w1, grid_losses.T, cmap=plt.cm.jet) 25 | fig.colorbar(cp, ax=ax1) 26 | ax1.set_xlabel(r'$w_0$') 27 | ax1.set_ylabel(r'$w_1$') 28 | # put a marker at the minimum 29 | loss_star, w0_star, w1_star = get_best_parameters( 30 | w0_list, w1_list, grid_losses) 31 | ax1.plot(w0_star, w1_star, marker='*', color='r', markersize=20) 32 | 33 | 34 | 35 | return fig 36 | 37 | 38 | def grid_visualization(grid_losses, w0_list, w1_list, 39 | mean_x, std_x, height, weight): 40 | """Visualize how the trained model looks like under the grid search.""" 41 | fig = base_visualization( 42 | grid_losses, w0_list, w1_list, mean_x, std_x, height, weight) 43 | 44 | loss_star, w0_star, w1_star = get_best_parameters( 45 | w0_list, w1_list, grid_losses) 46 | # plot prediciton 47 | x, f = prediction(w0_star, w1_star, mean_x, std_x) 48 | ax2 = fig.get_axes()[2] 49 | ax2.plot(x, f, 'r') 50 | 51 | return fig 52 | 53 | 54 | def gradient_descent_visualization( 55 | gradient_losses, gradient_ws, 56 | grid_losses, grid_w0, grid_w1, 57 | mean_x, std_x, height, weight, n_iter=None): 58 | """Visualize how the loss value changes until n_iter.""" 59 | fig = base_visualization( 60 | grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight) 61 | 62 | ws_to_be_plotted = np.stack(gradient_ws) 63 | if n_iter is not None: 64 | ws_to_be_plotted = ws_to_be_plotted[:n_iter] 65 | 66 | ax1 = fig.get_axes()[0] 67 | ax1.plot( 68 | ws_to_be_plotted[:, 0], ws_to_be_plotted[:, 1], 69 | marker='o', color='w', markersize=10) 70 | pred_x, pred_y = prediction( 71 | ws_to_be_plotted[-1, 0], ws_to_be_plotted[-1, 1], 72 | mean_x, std_x) 73 | 74 | return fig 75 | -------------------------------------------------------------------------------- /labs/ex05/solution05.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex05/solution05.pdf -------------------------------------------------------------------------------- /labs/ex05/template/gradient_descent.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Lab 3. 3 | 4 | Gradient descent 5 | """ 6 | 7 | import numpy as np 8 | 9 | def calculate_mse(e): 10 | """Calculate the mean squared error for vector e.""" 11 | # *************************************************** 12 | # INSERT YOUR CODE HERE 13 | # TODO: compute mean squared error 14 | # *************************************************** 15 | raise NotImplementedError 16 | 17 | def compute_gradient(b, A, x): 18 | """Compute the gradient.""" 19 | # *************************************************** 20 | # INSERT YOUR CODE HERE 21 | # TODO: compute gradient and objective 22 | # *************************************************** 23 | 24 | return grad, err 25 | 26 | def gradient_descent(b, A, initial_x, max_iters, gamma): 27 | """Gradient descent algorithm.""" 28 | # Define parameters to store x and objective func. values 29 | xs = [initial_x] 30 | objectives = [] 31 | x = initial_x 32 | for n_iter in range(max_iters): 33 | # *************************************************** 34 | # INSERT YOUR CODE HERE 35 | # TODO: compute gradient and objective function 36 | # *************************************************** 37 | raise NotImplementedError 38 | # *************************************************** 39 | # INSERT YOUR CODE HERE 40 | # TODO: update x by a gradient descent step 41 | # *************************************************** 42 | raise NotImplementedError 43 | # store x and objective function value 44 | xs.append(x) 45 | objectives.append(obj) 46 | print("Gradient Descent({bi}/{ti}): objective={l}".format( 47 | bi=n_iter, ti=max_iters - 1, l=obj)) 48 | 49 | return objectives, xs -------------------------------------------------------------------------------- /labs/ex05/template/grid_search.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Exercise 2. 3 | 4 | Grid Search 5 | """ 6 | 7 | import numpy as np 8 | 9 | 10 | def generate_w(num_intervals): 11 | """Generate a grid of values for w0 and w1.""" 12 | w0 = np.linspace(-100, 200, num_intervals) 13 | w1 = np.linspace(-150, 150, num_intervals) 14 | return w0, w1 15 | 16 | 17 | def grid_search(y, tx, w0, w1): 18 | """Algorithm for grid search.""" 19 | losses = np.zeros((len(w0), len(w1))) 20 | # compute loss for each combination of w0 and w1. 21 | for ind_row, row in enumerate(w0): 22 | for ind_col, col in enumerate(w1): 23 | w = np.array([row, col]) 24 | e = y - tx.dot(w) 25 | losses[ind_row, ind_col] = 1/2*np.mean(e**2) 26 | return losses 27 | 28 | 29 | def get_best_parameters(w0, w1, losses): 30 | """Get the best w from the result of grid search.""" 31 | min_row, min_col = np.unravel_index(np.argmin(losses), losses.shape) 32 | return losses[min_row, min_col], w0[min_row], w1[min_col] 33 | -------------------------------------------------------------------------------- /labs/ex05/template/helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """some helper functions.""" 3 | import numpy as np 4 | 5 | 6 | def load_data(sub_sample=True, add_outlier=False): 7 | """Load data and convert it to the metric system.""" 8 | path_dataset = "height_weight_genders.csv" 9 | data = np.genfromtxt( 10 | path_dataset, delimiter=",", skip_header=1, usecols=[1, 2]) 11 | height = data[:, 0] 12 | weight = data[:, 1] 13 | gender = np.genfromtxt( 14 | path_dataset, delimiter=",", skip_header=1, usecols=[0], 15 | converters={0: lambda x: 0 if b"Male" in x else 1}) 16 | # Convert to metric system 17 | height *= 0.025 18 | weight *= 0.454 19 | 20 | # sub-sample 21 | if sub_sample: 22 | height = height[::50] 23 | weight = weight[::50] 24 | 25 | if add_outlier: 26 | # outlier experiment 27 | height = np.concatenate([height, [1.1, 1.2]]) 28 | weight = np.concatenate([weight, [51.5/0.454, 55.3/0.454]]) 29 | 30 | return height, weight, gender 31 | 32 | 33 | def standardize(x): 34 | """Standardize the original data set.""" 35 | mean_x = np.mean(x,axis = 0) 36 | x = x - mean_x 37 | std_x = np.std(x, axis = 0) 38 | x = x / std_x 39 | return x, mean_x, std_x 40 | 41 | 42 | def build_model_data(height, weight): 43 | """Form (y,tX) to get regression data in matrix form.""" 44 | y = weight 45 | x = height 46 | num_samples = len(y) 47 | tx = np.c_[np.ones(num_samples), x] 48 | return y, tx 49 | 50 | 51 | def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True): 52 | """ 53 | Generate a minibatch iterator for a dataset. 54 | Takes as input two iterables (here the output desired values 'y' and the input data 'tx') 55 | Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`. 56 | Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches. 57 | Example of use : 58 | for minibatch_y, minibatch_tx in batch_iter(y, tx, 32): 59 | 60 | """ 61 | data_size = len(y) 62 | 63 | if shuffle: 64 | shuffle_indices = np.random.permutation(np.arange(data_size)) 65 | shuffled_y = y[shuffle_indices] 66 | shuffled_tx = tx[shuffle_indices] 67 | else: 68 | shuffled_y = y 69 | shuffled_tx = tx 70 | for batch_num in range(num_batches): 71 | start_index = batch_num * batch_size 72 | end_index = min((batch_num + 1) * batch_size, data_size) 73 | if start_index != end_index: 74 | yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index] 75 | 76 | -------------------------------------------------------------------------------- /labs/ex05/template/plots.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """function for plot.""" 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from grid_search import get_best_parameters 6 | 7 | 8 | def prediction(w0, w1, mean_x, std_x): 9 | """Get the regression line from the model.""" 10 | x = np.arange(1.2, 2, 0.01) 11 | x_normalized = (x - mean_x) / std_x 12 | return x, w0 + w1 * x_normalized 13 | 14 | 15 | def base_visualization(grid_losses, w0_list, w1_list, 16 | mean_x, std_x, height, weight): 17 | """Base Visualization for both models.""" 18 | w0, w1 = np.meshgrid(w0_list, w1_list) 19 | 20 | fig = plt.figure() 21 | 22 | # plot contourf 23 | ax1 = fig.add_subplot(1, 2, 1) 24 | cp = ax1.contourf(w0, w1, grid_losses.T, cmap=plt.cm.jet) 25 | fig.colorbar(cp, ax=ax1) 26 | ax1.set_xlabel(r'$w_0$') 27 | ax1.set_ylabel(r'$w_1$') 28 | # put a marker at the minimum 29 | loss_star, w0_star, w1_star = get_best_parameters( 30 | w0_list, w1_list, grid_losses) 31 | ax1.plot(w0_star, w1_star, marker='*', color='r', markersize=20) 32 | 33 | 34 | 35 | return fig 36 | 37 | 38 | def grid_visualization(grid_losses, w0_list, w1_list, 39 | mean_x, std_x, height, weight): 40 | """Visualize how the trained model looks like under the grid search.""" 41 | fig = base_visualization( 42 | grid_losses, w0_list, w1_list, mean_x, std_x, height, weight) 43 | 44 | loss_star, w0_star, w1_star = get_best_parameters( 45 | w0_list, w1_list, grid_losses) 46 | # plot prediciton 47 | x, f = prediction(w0_star, w1_star, mean_x, std_x) 48 | ax2 = fig.get_axes()[2] 49 | ax2.plot(x, f, 'r') 50 | 51 | return fig 52 | 53 | 54 | def gradient_descent_visualization( 55 | gradient_losses, gradient_ws, 56 | grid_losses, grid_w0, grid_w1, 57 | mean_x, std_x, height, weight, n_iter=None): 58 | """Visualize how the loss value changes until n_iter.""" 59 | fig = base_visualization( 60 | grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight) 61 | 62 | ws_to_be_plotted = np.stack(gradient_ws) 63 | if n_iter is not None: 64 | ws_to_be_plotted = ws_to_be_plotted[:n_iter] 65 | 66 | ax1 = fig.get_axes()[0] 67 | ax1.plot( 68 | ws_to_be_plotted[:, 0], ws_to_be_plotted[:, 1], 69 | marker='o', color='w', markersize=10) 70 | pred_x, pred_y = prediction( 71 | ws_to_be_plotted[-1, 0], ws_to_be_plotted[-1, 1], 72 | mean_x, std_x) 73 | 74 | return fig 75 | -------------------------------------------------------------------------------- /labs/ex06/exercise06.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex06/exercise06.pdf -------------------------------------------------------------------------------- /labs/ex06/solution/helper.py: -------------------------------------------------------------------------------- 1 | from math import inf 2 | 3 | import matplotlib 4 | import numpy as np 5 | import sklearn.datasets 6 | import torch 7 | from matplotlib import pyplot as plt 8 | from matplotlib.colors import LinearSegmentedColormap 9 | from torch.utils import data 10 | 11 | import sys, os 12 | SEED = 0 13 | 14 | # Disable printing while visualization 15 | def disable_print(): 16 | sys.stdout = open(os.devnull, 'w') 17 | 18 | # Restore printing 19 | def enable_print(): 20 | sys.stdout = sys.__stdout__ 21 | 22 | def generate_dataset(name, n_samples=200): 23 | """ 24 | Generate a random dataset with any of the predefined structures 25 | `blobs`, `moons`, `circles`, `bar`, or `xor` 26 | """ 27 | # Use Scikit-Learn's make_* functions to generate the samples 28 | if name == "blobs": 29 | coordinates, labels = sklearn.datasets.make_blobs(n_samples=n_samples, centers=2, random_state = SEED) 30 | elif name == "moons": 31 | coordinates, labels = sklearn.datasets.make_moons(n_samples=n_samples, random_state = SEED) 32 | coordinates[labels == 1] += 0.1 33 | coordinates[labels == 0] -= 0.1 34 | elif name == "circles": 35 | coordinates, labels = sklearn.datasets.make_circles(n_samples=n_samples, random_state = SEED) 36 | coordinates[labels == 1] *= 0.5 37 | elif name == "bar": 38 | # coordinates = np.random.rand(n_samples, 2) * 2 - 1 # range -1 to 1 39 | 40 | x_coordinate, y_coordinate = np.meshgrid( 41 | np.linspace(-1, 1, 12, dtype=np.float32), 42 | np.linspace(-1, 1, 6, dtype=np.float32), 43 | ) 44 | coordinates = np.stack([x_coordinate.reshape(-1), y_coordinate.reshape(-1)], axis=-1) 45 | n_samples = len(coordinates) 46 | 47 | l1norm = np.linalg.norm(coordinates, ord=inf, axis=1) 48 | labels = np.ones_like(l1norm).astype(np.int64) 49 | labels[np.abs(coordinates[:, 0]) < 0.1] = 0 50 | elif name == "xor": 51 | np.random.seed(SEED) 52 | coordinates = np.random.rand(n_samples, 2) 53 | 54 | # Create a small gap between the classes 55 | gap_size = 0 56 | coordinates[coordinates[:, 0] > 0.5, 0] += gap_size * 0.5 57 | coordinates[coordinates[:, 0] < 0.5, 0] -= gap_size * 0.5 58 | coordinates[coordinates[:, 1] > 0.5, 1] += gap_size * 0.5 59 | coordinates[coordinates[:, 1] < 0.5, 1] -= gap_size * 0.5 60 | 61 | labels = np.logical_xor(coordinates[:, 0] > 0.5, coordinates[:, 1] > 0.5).astype(np.int64) 62 | noisy_index = np.where(np.random.binomial(1, 0.1, size = len(coordinates)))[0] 63 | coordinates[noisy_index] += np.random.laplace(0, 0.1, [len(noisy_index), 2]) 64 | 65 | else: 66 | raise ValueError("Unknown dataset name {}".format(name)) 67 | 68 | # Convert to PyTorch 69 | coordinates = coordinates.astype(np.float32) 70 | coordinates = torch.from_numpy(coordinates) 71 | labels = torch.from_numpy(labels) 72 | 73 | # Normalize the range of coordinates to be 0 to 1 74 | coordinates -= torch.min(coordinates, 0, keepdim=True)[0] 75 | coordinates /= torch.max(coordinates, 0, keepdim=True)[0] 76 | 77 | # Create a PyTorch dataset 78 | dataset = data.TensorDataset(coordinates, labels) 79 | 80 | # Split it 50/50 into train and test 81 | train, test = torch.utils.data.random_split(dataset, [n_samples // 2, n_samples // 2]) 82 | return train, test 83 | 84 | def visualize_one_dataset(dataset: data.Dataset, ax: matplotlib.axes.Axes): 85 | for coordinate, label in dataset: 86 | x, y = coordinate 87 | color = {0: "#bada55", 1: "#55bada"}[label.item()] 88 | marker = {0: "+", 1: "."}[label.item()] 89 | ax.scatter(x, y, c=color, marker=marker) 90 | 91 | 92 | def visualize_datasets(datasets): 93 | f, axes = plt.subplots(2, len(datasets)) 94 | f.set_figheight(7) 95 | f.set_figwidth(14) 96 | axes[0][0].set_ylabel("Training") 97 | axes[1][0].set_ylabel("Test") 98 | for i, (name, train_set, test_set) in enumerate(datasets): 99 | visualize_one_dataset(train_set, ax=axes[0][i]) 100 | visualize_one_dataset(test_set, ax=axes[1][i]) 101 | axes[0][i].set_title(name) 102 | plt.show() 103 | 104 | #%% Visualize the predictions of a model on a grid 105 | def predict_grid(model, ax, xmin=-0.1, xmax=1.1, ymin=-0.1, ymax=1.1, num_grid_points=40): 106 | x_coordinate, y_coordinate = np.meshgrid( 107 | np.linspace(xmin, xmax, num_grid_points, dtype=np.float32), 108 | np.linspace(ymin, ymax, num_grid_points, dtype=np.float32), 109 | ) 110 | x_coordinate = torch.from_numpy(x_coordinate) 111 | y_coordinate = torch.from_numpy(y_coordinate) 112 | coordinates = torch.stack([x_coordinate.view(-1), y_coordinate.view(-1)], dim=-1) 113 | predictions = torch.nn.functional.softmax(model(coordinates), dim=1)[:, 1] 114 | 115 | predictions = predictions.view(*x_coordinate.shape).detach() 116 | cmap = LinearSegmentedColormap.from_list("bada55_dark", ["#4d5b23", "#234d5b"], N=100) 117 | ax.pcolormesh(x_coordinate, y_coordinate, predictions, cmap=cmap) 118 | 119 | def visualize_predictions(datasets, model, optimize): 120 | f, axes = plt.subplots(3, len(datasets)) 121 | f.set_figheight(10) 122 | f.set_figwidth(14) 123 | axes[0][0].set_ylabel("Training") 124 | axes[1][0].set_ylabel("Test") 125 | axes[2][0].set_ylabel("Test Loss") 126 | for i, (name, train_set, test_set) in enumerate(datasets): 127 | axes[0][i].set_title(name + ' (%s)'% model.name) 128 | # train model 129 | model.init_params(train_set) 130 | disable_print() 131 | losses = optimize(train_set, test_set, model) 132 | enable_print() 133 | #plot results 134 | predict_grid(model, ax=axes[1][i]) 135 | visualize_one_dataset(train_set, ax=axes[0][i]) 136 | visualize_one_dataset(test_set, ax=axes[1][i]) 137 | axes[2][i].plot(losses) 138 | axes[2][i].set_ylim([0,1]) 139 | plt.show() -------------------------------------------------------------------------------- /labs/ex06/solution/mixing_helpers.py: -------------------------------------------------------------------------------- 1 | import networkx 2 | import numpy as np 3 | 4 | n_nodes = 16 5 | def generate_torus_adj_matrix(n_nodes): 6 | G = networkx.generators.lattice.grid_2d_graph(int(np.sqrt(n_nodes)), int(np.sqrt(n_nodes)), periodic=True) 7 | # Adjacency matrix 8 | A = networkx.adjacency_matrix(G).toarray() 9 | 10 | # Add self-loops 11 | for i in range(0, A.shape[0]): 12 | A[i][i] = 1 13 | return A -------------------------------------------------------------------------------- /labs/ex06/solution06.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex06/solution06.pdf -------------------------------------------------------------------------------- /labs/ex06/template/Lab_6.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "try:\n", 10 | " import google.colab\n", 11 | " IN_COLAB = True\n", 12 | "except:\n", 13 | " IN_COLAB = False\n", 14 | "if IN_COLAB:\n", 15 | " # Clone the entire repo to access the files.\n", 16 | " !git clone -l -s https://github.com/epfml/OptML_course.git cloned-repo\n", 17 | " %cd cloned-repo/labs/ex06/template/" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "# Useful starting lines\n", 27 | "%matplotlib inline\n", 28 | "\n", 29 | "import numpy as np\n", 30 | "import torch\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "\n", 33 | "from helper import generate_dataset, visualize_one_dataset, visualize_datasets, predict_grid, visualize_predictions\n", 34 | "from torch.utils import data\n", 35 | "\n", 36 | "%load_ext autoreload\n", 37 | "%autoreload 2" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "# Generate data\n", 45 | "\n", 46 | "We provide a helper function which generates artificial datasets" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "NUM_SAMPLES = 100\n", 56 | "\n", 57 | "blobs_train, blobs_test = generate_dataset(\"blobs\", NUM_SAMPLES)\n", 58 | "moons_train, moons_test = generate_dataset(\"moons\", NUM_SAMPLES)\n", 59 | "xor_train, xor_test = generate_dataset(\"xor\", NUM_SAMPLES)\n", 60 | "squares_train, squares_test = generate_dataset(\"bar\", NUM_SAMPLES)\n", 61 | "\n", 62 | "# The generate_dataset function returns PyTorch dataset objects\n", 63 | "type(blobs_train), type(blobs_test)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "Let us plot the train and the test data sets. Note the differences between the two." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "datasets = [\n", 80 | " (\"Blobs\", blobs_train, blobs_test),\n", 81 | " (\"Moons\", moons_train, moons_test),\n", 82 | " (\"Bar\", squares_train, squares_test),\n", 83 | " (\"XOR\", xor_train, xor_test)\n", 84 | "]\n", 85 | "\n", 86 | "visualize_datasets(datasets)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "# Optimizing using PyTorch\n", 94 | "\n", 95 | "Write an optimizer in PyTorch by taking using its default SGD class" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "def optimize(train_data, test_data, model, loss_fn = torch.nn.CrossEntropyLoss(), lr = 0.1):\n", 105 | " \"\"\"\n", 106 | " Stochastic Gradient Descent optimizer\n", 107 | " \n", 108 | " :param train_data: torch.utils.data.dataset.Subset\n", 109 | " :param test_data: torch.utils.data.dataset.Subset\n", 110 | " :param model: torch.nn.Module (see https://pytorch.org/docs/stable/nn.html)\n", 111 | " :param loss_fn: torch.nn.modules.loss (see https://pytorch.org/docs/stable/nn.html#id51)\n", 112 | " :param lr: float, learning rate\n", 113 | " \n", 114 | " :return:\n", 115 | " - objectives, a list of loss values on the test dataset, collected at the end of each pass over the dataset (epoch)\n", 116 | " \"\"\"\n", 117 | " # defatult pytorch functions which are useful for loading testing and training data\n", 118 | " train_loader = data.DataLoader(train_data, batch_size=10, shuffle=True)\n", 119 | " test_loader = data.DataLoader(test_data, batch_size=NUM_SAMPLES)\n", 120 | " losses = []\n", 121 | " \n", 122 | " # ***************************************************\n", 123 | " # INSERT YOUR CODE HERE\n", 124 | " # TODO: Define SGD optimizer with learning rate = lr\n", 125 | " # HINT: Use torch.optim.SGD and model.parameters()\n", 126 | " # ***************************************************\n", 127 | " optimizer = ?\n", 128 | " \n", 129 | " # Run SGD\n", 130 | " for epoch in range(1000):\n", 131 | " for minibatch, label in train_loader:\n", 132 | " \n", 133 | " optimizer.zero_grad() # Zeroes the previously computed gradients\n", 134 | " \n", 135 | " # ***************************************************\n", 136 | " # INSERT YOUR CODE HERE\n", 137 | " # TODO: prediction on minibatch\n", 138 | " # HINT: Use model.forward\n", 139 | " # ***************************************************\n", 140 | " prediction = ?\n", 141 | " \n", 142 | " # ***************************************************\n", 143 | " # INSERT YOUR CODE HERE\n", 144 | " # TODO: compute the loss on prediction\n", 145 | " # HINT: Use loss_fn\n", 146 | " # ***************************************************\n", 147 | " loss = ?\n", 148 | " \n", 149 | " # ***************************************************\n", 150 | " # INSERT YOUR CODE HERE\n", 151 | " # TODO: compute the minibatch gradient\n", 152 | " # HINT: Use loss.backward!\n", 153 | " # ***************************************************\n", 154 | " \n", 155 | " \n", 156 | " # ***************************************************\n", 157 | " # INSERT YOUR CODE HERE\n", 158 | " # TODO: perform an SGD step\n", 159 | " # HINT: Use optimizer.step!\n", 160 | " # ***************************************************\n", 161 | " \n", 162 | " \n", 163 | " # Compute the test loss\n", 164 | " for minibatch, label in test_loader:\n", 165 | " # we let torch know that we dont intend to call .backward\n", 166 | " with torch.no_grad():\n", 167 | " # ***************************************************\n", 168 | " # INSERT YOUR CODE HERE\n", 169 | " # TODO: compute the test prediction and test loss\n", 170 | " # ***************************************************\n", 171 | " loss = ?\n", 172 | " \n", 173 | " losses.append(loss.item())\n", 174 | " \n", 175 | " # Print the test loss to monitor progress\n", 176 | " if epoch % 100 == 0:\n", 177 | " print(epoch, loss.item())\n", 178 | " \n", 179 | " return losses" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "# RBF Kernel\n", 187 | "\n", 188 | "An RBF kernel is the most commonly used `out of the box` kernel method for non-linear data. Intuitively, an RBF-kernel blurs the training data and uses this for classification i.e. the individual green and blue points above get blurred to make green and blue regions, which are used to make predictions. A critical parameter `sigma` defines the width of this blurring---large `sigma` results in more blurring.\n", 189 | "\n", 190 | "See [here](https://github.com/epfml/ML_course/blob/master/lectures/07/lecture07b_kernelRidge.pdf) for more information on the `kernel trick` and [here](https://www.cs.huji.ac.il/~shais/Lectures2014/lecture8.pdf) for an indepth mathematical treatment. Here, we will try develop an intuition for the RBF kernel." 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "class RadialBasisFunction(torch.nn.Module):\n", 200 | " def __init__(self, sigma=0.1):\n", 201 | " super().__init__()\n", 202 | " self.gamma = 1 / (2 * sigma ** 2)\n", 203 | " self.num_classes = 2\n", 204 | " self.name = 'RBF'\n", 205 | " \n", 206 | " def init_params(self, train_data):\n", 207 | " # data reshaping to do torch broadcasting magic\n", 208 | " data_matrix = train_data.dataset.tensors[0][train_data.indices, :]\n", 209 | " self.data_matrix = data_matrix.t().view(1, *data_matrix.t().shape)\n", 210 | " \n", 211 | " # ***************************************************\n", 212 | " # INSERT YOUR CODE HERE\n", 213 | " # TODO: initialize parameters alpha to 0\n", 214 | " # HINT: use torch.zeros\n", 215 | " # ***************************************************\n", 216 | " zeros = ?\n", 217 | " \n", 218 | " self.alpha = torch.nn.Parameter(zeros)\n", 219 | "\n", 220 | " def forward(self, minibatch):\n", 221 | " minibatch = minibatch.view(*minibatch.shape, 1)\n", 222 | " K = torch.exp(\n", 223 | " -self.gamma * torch.sum((self.data_matrix - minibatch) ** 2, dim=1, keepdim=True)\n", 224 | " ).squeeze()\n", 225 | " return K @ self.alpha\n" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "Try you code on the blobs data set. Your test loss should be around 0.1 by the end." 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "rbf_model = RadialBasisFunction(0.5)\n", 242 | "rbf_model.init_params(blobs_train)\n", 243 | "rbf_blob_losses = optimize(blobs_train, blobs_test, rbf_model)\n", 244 | "\n", 245 | "plt.plot(rbf_blob_losses)\n", 246 | "plt.xlabel('Epoch')\n", 247 | "plt.ylabel('Test Loss')\n", 248 | "plt.show()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "Plot the training data points and the predictions made" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "ax = plt.gca()\n", 265 | "predict_grid(rbf_model, ax)\n", 266 | "visualize_one_dataset(blobs_train, ax)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "## Effect of alpha" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "Plot the results while varying the value of alpha in [0.1, 0.5, 1]. What do you observe? Which is the best value?\n" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "rbf_model = RadialBasisFunction(0.1)\n", 290 | "visualize_predictions(datasets, rbf_model, optimize)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "# Neural Networks\n", 298 | "\n", 299 | "We will create a simple 2 layer neural network using the default functions provided by PyTorch" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "class NeuralNetwork(torch.nn.Sequential):\n", 309 | " def __init__(self, hidden_layer_size):\n", 310 | " self.name = 'NN'\n", 311 | " self.num_classes = 2\n", 312 | " # ***************************************************\n", 313 | " # INSERT YOUR CODE HERE\n", 314 | " # TODO: Define your neural network model with ReLU\n", 315 | " # HINT: Use torch.nn.Sequential and torch.nn.ReLU\n", 316 | " # ***************************************************\n", 317 | " super().__init__(\n", 318 | " ?\n", 319 | " )\n", 320 | " \n", 321 | " def init_params(self, train_data):\n", 322 | " ''' No need to do anything since it is taken care of by torch.nn.Sequential'''" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "Plot the results while varying the size of the hidden layer in [20, 200, 1000]\n", 330 | "\n", 331 | "What do you observe?" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "nn_model = NeuralNetwork(200)\n", 341 | "visualize_predictions(datasets, nn_model, optimize)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": [ 348 | "Also try increase the number of layers. How does this effect the classifier learnt?\n", 349 | "\n", 350 | "[This](https://playground.tensorflow.org/) is a cool website where you can play around more with training of neural networks on toy datasets." 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": { 356 | "collapsed": true 357 | }, 358 | "source": [ 359 | "## Momentum in training neural networks\n", 360 | "\n", 361 | "In the lecture, we have seen how Nesterov momentum can accelerate gradient descent on convex functions. Let's now explore if similar benefits can be achieved on non-convex functions (neural networks in this case) and for *stochastic* gradient descent. It is important to note that theory usually describes convergence on the training objective. In a typical machine learning setting, however, we care about loss on _unseen_ data. For that reason, here, we will always look at loss on the test set.\n", 362 | "\n", 363 | "__Exercise__
Add an argument `momentum` to your function `optimize` completed before. Hint: `torch.optim.SGD` also has a `momentum` argument that you can use. This implements heavy ball momentum, which is similar to, but [slightly different](https://dominikschmidt.xyz/nesterov-momentum/) from Nesterov momentum. You can try either variant, but they should have a similar effect." 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "### Hyper-parameter tuning\n", 371 | "\n", 372 | "You will be comparing the SGD optimizers without momentum and with momentum of 0.9 (a common value). To do this fairly, you need to find good learning rates for either variant." 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "__Exercise__
\n", 380 | "Find a good learning rate for SGD without momentum. You can try different learning rates on an exponential grid (i.e. 0.2, 0.4, 0.8, ...), and record the best test loss in each experiment with `np.min(losses)`. Use `NeuralNetwork(200)` as a model, and use this data:" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "metadata": {}, 387 | "outputs": [], 388 | "source": [ 389 | "dataset = datasets[1] # and use this dataset, you can try others as well\n", 390 | "dataset_name = dataset[0]\n", 391 | "train_data = dataset[1]\n", 392 | "test_data = dataset[2]\n", 393 | "dataset_name" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [ 402 | "plt.xlabel(\"Learning rate\")\n", 403 | "plt.ylabel(\"Best loss seen\")\n", 404 | "\n", 405 | "best_loss = None\n", 406 | "best_learning_rate = None\n", 407 | "\n", 408 | "grid = ### TODO, find a reasonable range, of values to try\n", 409 | "print(\"Learning rates to try:\", grid)\n", 410 | "\n", 411 | "for learning_rate in grid:\n", 412 | " model = NeuralNetwork(200)\n", 413 | " test_losses = optimize(train_data, test_data, model, lr=learning_rate)\n", 414 | " best_loss_achieved = ### TODO\n", 415 | " plt.scatter(learning_rate, best_loss_achieved)\n", 416 | " if ### TODO:\n", 417 | " best_loss = best_loss_achieved\n", 418 | " best_learning_rate = learning_rate" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "metadata": {}, 425 | "outputs": [], 426 | "source": [ 427 | "f\"The learning rate {best_learning_rate} worked well for SGD without momentum.\"" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [ 436 | "plt.xlabel(\"Learning rate\")\n", 437 | "plt.ylabel(\"Best loss seen\")\n", 438 | "\n", 439 | "best_loss = None\n", 440 | "best_learning_rate = None\n", 441 | "\n", 442 | "# The grid is chosen by trial and error in this case, \n", 443 | "grid = ### TODO, find a reasonable range, of values to try\n", 444 | "print(\"Learning rates to try:\", grid)\n", 445 | "\n", 446 | "for learning_rate in grid:\n", 447 | " model = NeuralNetwork(200)\n", 448 | " test_losses = optimize(train_data, test_data, model, lr=learning_rate, momentum=0.9)\n", 449 | " best_loss_achieved = ### TODO\n", 450 | " plt.scatter(learning_rate, best_loss_achieved)\n", 451 | " if ### TODO:\n", 452 | " best_loss = best_loss_achieved\n", 453 | " best_learning_rate = learning_rate" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [ 462 | "f\"The learning rate {best_learning_rate} worked well for SGD with 0.9 momentum.\"" 463 | ] 464 | }, 465 | { 466 | "cell_type": "markdown", 467 | "metadata": {}, 468 | "source": [ 469 | "### Comparing the optimizers\n", 470 | "\n", 471 | "__Exercise__
\n", 472 | "Plot test loss curves for both optimizers with the best learning rates you found for each. Do you see a benefit of momentum? How stable are the improvements over different datasets or different initializations? Can you think of ways to make the hyperparameter search procedure more stable?" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "model = NeuralNetwork(200) # reinitialize the model\n", 482 | "plt.plot(optimize(train_data, test_data, model, lr=### TODO, momentum=0.9), label=\"SGD with 0.9 momentum\")\n", 483 | "\n", 484 | "model = NeuralNetwork(200) # reinitialize the model\n", 485 | "plt.plot(optimize(train_data, test_data, model, lr=### TODO, momentum=0.0), label=\"SGD\")\n", 486 | "\n", 487 | "plt.ylabel(\"Test loss\")\n", 488 | "plt.xlabel(\"Epochs\")\n", 489 | "plt.ylim([0, 1])\n", 490 | "plt.legend();" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [] 499 | } 500 | ], 501 | "metadata": { 502 | "kernelspec": { 503 | "display_name": "Python 3", 504 | "language": "python", 505 | "name": "python3" 506 | }, 507 | "language_info": { 508 | "codemirror_mode": { 509 | "name": "ipython", 510 | "version": 3 511 | }, 512 | "file_extension": ".py", 513 | "mimetype": "text/x-python", 514 | "name": "python", 515 | "nbconvert_exporter": "python", 516 | "pygments_lexer": "ipython3", 517 | "version": "3.7.4" 518 | }, 519 | "toc": { 520 | "base_numbering": 1, 521 | "nav_menu": {}, 522 | "number_sections": true, 523 | "sideBar": true, 524 | "skip_h1_title": false, 525 | "title_cell": "Table of Contents", 526 | "title_sidebar": "Contents", 527 | "toc_cell": false, 528 | "toc_position": {}, 529 | "toc_section_display": true, 530 | "toc_window_display": false 531 | } 532 | }, 533 | "nbformat": 4, 534 | "nbformat_minor": 2 535 | } 536 | -------------------------------------------------------------------------------- /labs/ex06/template/helper.py: -------------------------------------------------------------------------------- 1 | from math import inf 2 | 3 | import matplotlib 4 | import numpy as np 5 | import sklearn.datasets 6 | import torch 7 | from matplotlib import pyplot as plt 8 | from matplotlib.colors import LinearSegmentedColormap 9 | from torch.utils import data 10 | 11 | import sys, os 12 | SEED = 0 13 | 14 | # Disable printing while visualization 15 | def disable_print(): 16 | sys.stdout = open(os.devnull, 'w') 17 | 18 | # Restore printing 19 | def enable_print(): 20 | sys.stdout = sys.__stdout__ 21 | 22 | def generate_dataset(name, n_samples=200): 23 | """ 24 | Generate a random dataset with any of the predefined structures 25 | `blobs`, `moons`, `circles`, `bar`, or `xor` 26 | """ 27 | # Use Scikit-Learn's make_* functions to generate the samples 28 | if name == "blobs": 29 | coordinates, labels = sklearn.datasets.make_blobs(n_samples=n_samples, centers=2, random_state = SEED) 30 | elif name == "moons": 31 | coordinates, labels = sklearn.datasets.make_moons(n_samples=n_samples, random_state = SEED) 32 | coordinates[labels == 1] += 0.1 33 | coordinates[labels == 0] -= 0.1 34 | elif name == "circles": 35 | coordinates, labels = sklearn.datasets.make_circles(n_samples=n_samples, random_state = SEED) 36 | coordinates[labels == 1] *= 0.5 37 | elif name == "bar": 38 | # coordinates = np.random.rand(n_samples, 2) * 2 - 1 # range -1 to 1 39 | 40 | x_coordinate, y_coordinate = np.meshgrid( 41 | np.linspace(-1, 1, 12, dtype=np.float32), 42 | np.linspace(-1, 1, 6, dtype=np.float32), 43 | ) 44 | coordinates = np.stack([x_coordinate.reshape(-1), y_coordinate.reshape(-1)], axis=-1) 45 | n_samples = len(coordinates) 46 | 47 | l1norm = np.linalg.norm(coordinates, ord=inf, axis=1) 48 | labels = np.ones_like(l1norm).astype(np.int64) 49 | labels[np.abs(coordinates[:, 0]) < 0.1] = 0 50 | elif name == "xor": 51 | np.random.seed(SEED) 52 | coordinates = np.random.rand(n_samples, 2) 53 | 54 | # Create a small gap between the classes 55 | gap_size = 0 56 | coordinates[coordinates[:, 0] > 0.5, 0] += gap_size * 0.5 57 | coordinates[coordinates[:, 0] < 0.5, 0] -= gap_size * 0.5 58 | coordinates[coordinates[:, 1] > 0.5, 1] += gap_size * 0.5 59 | coordinates[coordinates[:, 1] < 0.5, 1] -= gap_size * 0.5 60 | 61 | labels = np.logical_xor(coordinates[:, 0] > 0.5, coordinates[:, 1] > 0.5).astype(np.int64) 62 | noisy_index = np.where(np.random.binomial(1, 0.1, size = len(coordinates)))[0] 63 | coordinates[noisy_index] += np.random.laplace(0, 0.1, [len(noisy_index), 2]) 64 | 65 | else: 66 | raise ValueError("Unknown dataset name {}".format(name)) 67 | 68 | # Convert to PyTorch 69 | coordinates = coordinates.astype(np.float32) 70 | coordinates = torch.from_numpy(coordinates) 71 | labels = torch.from_numpy(labels) 72 | 73 | # Normalize the range of coordinates to be 0 to 1 74 | coordinates -= torch.min(coordinates, 0, keepdim=True)[0] 75 | coordinates /= torch.max(coordinates, 0, keepdim=True)[0] 76 | 77 | # Create a PyTorch dataset 78 | dataset = data.TensorDataset(coordinates, labels) 79 | 80 | # Split it 50/50 into train and test 81 | train, test = torch.utils.data.random_split(dataset, [n_samples // 2, n_samples // 2]) 82 | return train, test 83 | 84 | def visualize_one_dataset(dataset: data.Dataset, ax: matplotlib.axes.Axes): 85 | for coordinate, label in dataset: 86 | x, y = coordinate 87 | color = {0: "#bada55", 1: "#55bada"}[label.item()] 88 | marker = {0: "+", 1: "."}[label.item()] 89 | ax.scatter(x, y, c=color, marker=marker) 90 | 91 | 92 | def visualize_datasets(datasets): 93 | f, axes = plt.subplots(2, len(datasets)) 94 | f.set_figheight(7) 95 | f.set_figwidth(14) 96 | axes[0][0].set_ylabel("Training") 97 | axes[1][0].set_ylabel("Test") 98 | for i, (name, train_set, test_set) in enumerate(datasets): 99 | visualize_one_dataset(train_set, ax=axes[0][i]) 100 | visualize_one_dataset(test_set, ax=axes[1][i]) 101 | axes[0][i].set_title(name) 102 | plt.show() 103 | 104 | #%% Visualize the predictions of a model on a grid 105 | def predict_grid(model, ax, xmin=-0.1, xmax=1.1, ymin=-0.1, ymax=1.1, num_grid_points=40): 106 | x_coordinate, y_coordinate = np.meshgrid( 107 | np.linspace(xmin, xmax, num_grid_points, dtype=np.float32), 108 | np.linspace(ymin, ymax, num_grid_points, dtype=np.float32), 109 | ) 110 | x_coordinate = torch.from_numpy(x_coordinate) 111 | y_coordinate = torch.from_numpy(y_coordinate) 112 | coordinates = torch.stack([x_coordinate.view(-1), y_coordinate.view(-1)], dim=-1) 113 | predictions = torch.nn.functional.softmax(model(coordinates), dim=1)[:, 1] 114 | 115 | predictions = predictions.view(*x_coordinate.shape).detach() 116 | cmap = LinearSegmentedColormap.from_list("bada55_dark", ["#4d5b23", "#234d5b"], N=100) 117 | ax.pcolormesh(x_coordinate, y_coordinate, predictions, cmap=cmap) 118 | 119 | def visualize_predictions(datasets, model, optimize): 120 | f, axes = plt.subplots(3, len(datasets)) 121 | f.set_figheight(10) 122 | f.set_figwidth(14) 123 | axes[0][0].set_ylabel("Training") 124 | axes[1][0].set_ylabel("Test") 125 | axes[2][0].set_ylabel("Test Loss") 126 | for i, (name, train_set, test_set) in enumerate(datasets): 127 | axes[0][i].set_title(name + ' (%s)'% model.name) 128 | # train model 129 | model.init_params(train_set) 130 | disable_print() 131 | losses = optimize(train_set, test_set, model) 132 | enable_print() 133 | #plot results 134 | predict_grid(model, ax=axes[1][i]) 135 | visualize_one_dataset(train_set, ax=axes[0][i]) 136 | visualize_one_dataset(test_set, ax=axes[1][i]) 137 | axes[2][i].plot(losses) 138 | axes[2][i].set_ylim([0,1]) 139 | plt.show() -------------------------------------------------------------------------------- /labs/ex07/exercise07.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex07/exercise07.pdf -------------------------------------------------------------------------------- /labs/ex07/solution07.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex07/solution07.pdf -------------------------------------------------------------------------------- /labs/ex07/template/Lab 7 - Fixed Point with Newton.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Useful starting lines\n", 10 | "%matplotlib inline\n", 11 | "import numpy as np\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "%load_ext autoreload\n", 14 | "%autoreload 2" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# Fixed point interation\n", 22 | "\n", 23 | "In numerous applications, we encounter the task of solving equations of the form $$x = g(x)$$\n", 24 | "for a continuous function $g$. In lab 03 we saw one simple method to solve such problems: $$x_{t+1} = g(x_t)\\,.$$\n", 25 | "We solved two apparently similar equations $x = log(1+x)$ and $x = log(2+x)$, with showed very different convergence.\n", 26 | "\n", 27 | "## Newton steps\n", 28 | "\n", 29 | "This week's task is to adapt the iterative algorithm to use Newton-style steps. Like in lab 03, we can do this by expressing the update step as a gradient-descent update and computing its second derivative.\n", 30 | "\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Plot $g$" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "Let us see how the two functions look over an interval $[0,2]$." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "x = np.arange(0, 2, 0.0001)\n", 54 | "y1 = np.log(1 + x)\n", 55 | "y2 = np.log(2 + x)\n", 56 | "fig = plt.figure()\n", 57 | "plt.plot(x, x, label='x')\n", 58 | "plt.plot(x, y1, label='$\\log(1 + x)$')\n", 59 | "plt.plot(x, y2, label='$\\log(2 + x)$')\n", 60 | "plt.grid(linestyle=':')\n", 61 | "plt.axhline(0, color='black')\n", 62 | "plt.axvline(0, color='black')\n", 63 | "plt.legend()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "Please fill in the functions `fixed_point_newton` below:" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "def fixed_point_newton(initial_x, max_iters, objective, objective_grad):\n", 80 | " \"\"\"Compute the fixed point.\"\"\"\n", 81 | " # Define parameters to store x and objective func. values\n", 82 | " xs = []\n", 83 | " errors = []\n", 84 | " x = initial_x\n", 85 | " for n_iter in range(max_iters):\n", 86 | " # compute objective and error\n", 87 | " obj = objective(x)\n", 88 | " error = np.abs(x - obj)\n", 89 | " # store x and error\n", 90 | " xs.append(x)\n", 91 | " errors.append(error)\n", 92 | " \n", 93 | " ########################\n", 94 | " # @TODO Insert your code here\n", 95 | " # UPDATE x with a Newton step\n", 96 | " ########################\n", 97 | " \n", 98 | " # print the current error\n", 99 | " if n_iter % 10 == 0: \n", 100 | " print(\"Fixed point: iteration ={i}, x = {x:.2e}, error={err:.2e}\".format(i=n_iter, x=x, err=error))\n", 101 | " return errors, xs\n", 102 | "\n", 103 | "def fixed_point(initial_x, max_iters, objective):\n", 104 | " \"\"\"Compute the fixed point.\"\"\"\n", 105 | " # Define parameters to store x and objective func. values\n", 106 | " xs = []\n", 107 | " errors = []\n", 108 | " x = initial_x\n", 109 | " for n_iter in range(max_iters):\n", 110 | " # compute objective and error\n", 111 | " obj = objective(x)\n", 112 | " error = np.abs(x - obj)\n", 113 | " # store x and error\n", 114 | " xs.append(x)\n", 115 | " errors.append(error)\n", 116 | " # update x \n", 117 | " x = obj\n", 118 | " # print the current error\n", 119 | " if n_iter % 10 == 0: \n", 120 | " print(\"Fixed point: iteration ={i}, x = {x:.2e}, error={err:.2e}\".format(i=n_iter, x=x, err=error))\n", 121 | " return errors, xs" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "Let's test the implementations and compare it to the original algorithm from lab 03:" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "# Define the parameters of the algorithm.\n", 138 | "max_iters = 100\n", 139 | "\n", 140 | "# Initialization\n", 141 | "initial_x = 1\n", 142 | "\n", 143 | "# Run fixed point.\n", 144 | "errors_func1, xs_func1 = fixed_point(\n", 145 | " initial_x, \n", 146 | " max_iters, \n", 147 | " lambda x: np.log(1 + x)\n", 148 | ")\n", 149 | "\n", 150 | "errors_func1_newton, xs_func1_newton = fixed_point_newton(\n", 151 | " initial_x, \n", 152 | " max_iters, \n", 153 | " lambda x: np.log(1 + x), \n", 154 | " lambda x: 1./(1. + x)\n", 155 | ")\n" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "Run your implementation on the second function" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "# Define the parameters of the algorithm.\n", 172 | "max_iters = 100\n", 173 | "\n", 174 | "# Initialization\n", 175 | "initial_x = 1\n", 176 | "\n", 177 | "# Run fixed point.\n", 178 | "errors_func2, xs_func2 = fixed_point(\n", 179 | " initial_x, \n", 180 | " max_iters, \n", 181 | " lambda x: np.log(2 + x)\n", 182 | ")\n", 183 | "\n", 184 | "errors_func2_newton, xs_func2_newton = fixed_point_newton(\n", 185 | " initial_x, \n", 186 | " max_iters, \n", 187 | " lambda x: np.log(2 + x), \n", 188 | " lambda x: 1./(2. + x)\n", 189 | ")\n" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "**Plotting error values**" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "plt.semilogy()\n", 206 | "plt.xlabel('Number of steps')\n", 207 | "plt.ylabel('Value of Error')\n", 208 | "#plt.yscale(\"log\")\n", 209 | "plt.plot(range(len(errors_func1)), errors_func1, label='$log(1 + x)$')\n", 210 | "plt.plot(range(len(errors_func2)), errors_func2, label='$log(2 + x)$')\n", 211 | "plt.plot(range(len(errors_func1_newton)), errors_func1_newton, label='$log(1 + x)$ (Newton)')\n", 212 | "plt.plot(range(len(errors_func2_newton)), errors_func2_newton, label='$log(2 + x)$ (Newton)')\n", 213 | "\n", 214 | "plt.legend()\n", 215 | "plt.show()" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "What do you observe about the rates of convergence of the two methods? Can you explain this difference?" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [] 231 | } 232 | ], 233 | "metadata": { 234 | "anaconda-cloud": {}, 235 | "kernelspec": { 236 | "display_name": "Python 3 (ipykernel)", 237 | "language": "python", 238 | "name": "python3" 239 | }, 240 | "language_info": { 241 | "codemirror_mode": { 242 | "name": "ipython", 243 | "version": 3 244 | }, 245 | "file_extension": ".py", 246 | "mimetype": "text/x-python", 247 | "name": "python", 248 | "nbconvert_exporter": "python", 249 | "pygments_lexer": "ipython3", 250 | "version": "3.10.11" 251 | }, 252 | "widgets": { 253 | "state": { 254 | "d2b2c3aea192430e81437f33ba0b0e69": { 255 | "views": [ 256 | { 257 | "cell_index": 22 258 | } 259 | ] 260 | }, 261 | "e4a6a7a70ccd42ddb112989c04f2ed3f": { 262 | "views": [ 263 | { 264 | "cell_index": 18 265 | } 266 | ] 267 | } 268 | }, 269 | "version": "1.2.0" 270 | } 271 | }, 272 | "nbformat": 4, 273 | "nbformat_minor": 4 274 | } 275 | -------------------------------------------------------------------------------- /labs/ex08/exercise08.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex08/exercise08.pdf -------------------------------------------------------------------------------- /labs/ex08/solution/solution-svm-derivation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex08/solution/solution-svm-derivation.pdf -------------------------------------------------------------------------------- /labs/ex08/solution08.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex08/solution08.pdf -------------------------------------------------------------------------------- /labs/ex08/template/Lab_8.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from collections import defaultdict\n", 10 | "import numpy as np\n", 11 | "import scipy\n", 12 | "import scipy.sparse as sps\n", 13 | "import math\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import time\n", 16 | "from sklearn.datasets import load_svmlight_file\n", 17 | "import random\n", 18 | "%matplotlib inline" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "source": [ 27 | "# Support Vector Machines\n", 28 | "## Classification Using SVM\n", 29 | "Load dataset. We will use w1a dataset from LibSVM datasets https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "The original optimization problem for the Support Vector Machine (SVM) is given by\n", 37 | "\\begin{equation}\\label{eq:primal}\n", 38 | " \\min_{w \\in R^d} \\ \\sum_{i=1}^n \\ell(y_i A_i^\\top w) + \\frac\\lambda2 \\|w\\|^2\n", 39 | "\\end{equation}\n", 40 | "where $\\ell : R\\rightarrow R$, $\\ell(z) := \\max\\{0,1-z\\}$ is the hinge loss function.\n", 41 | "Here for any $i$, $1\\le i\\le n$, the vector $A_i\\in R^d$ is the $i$-th data example, and $y_i\\in\\{\\pm1\\}$ is the corresponding label.\n", 42 | " \n", 43 | "The dual optimization problem for the SVM is given by \n", 44 | "\\begin{equation}\\label{eq:dual}\n", 45 | " \\max_{\\boldsymbol{\\alpha} \\in R^n } \\ \\alpha^\\top\\boldsymbol{1} - \\tfrac1{2\\lambda} \\alpha^\\top Y A A^\\top Y\\alpha\n", 46 | " \\text{ such that $0\\le \\alpha_i \\le 1 \\ \\forall i$}\n", 47 | "\\end{equation}\n", 48 | "where $Y := \\mathop{diag}(y)$, and $A\\in R^{n \\times d}$ again collects all $n$ data examples as its columns. \n", 49 | "\n", 50 | "Note that $w$ can be derived from $\\alpha$ as\n", 51 | "\\begin{equation}\n", 52 | " w(\\alpha) = \\frac{1}{\\lambda} A^\\top Y \\alpha.\n", 53 | "\\end{equation}" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "DATA_TRAIN_PATH = 'data/w1a'\n", 63 | "\n", 64 | "A, y = load_svmlight_file(DATA_TRAIN_PATH)\n", 65 | "A = A.toarray()\n", 66 | "print(y.shape, A.shape)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## Prepare cost and prediction functions" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "def calculate_primal_objective(y, A, w, lambda_):\n", 83 | " \"\"\"\n", 84 | " Compute the full cost (the primal objective), that is loss plus regularizer.\n", 85 | " y: +1 or -1 labels, shape = (num_examples)\n", 86 | " A: Dataset matrix, shape = (num_examples, num_features)\n", 87 | " w: Model weights, shape = (num_features)\n", 88 | " return: scalar value\n", 89 | " \"\"\"\n", 90 | " # ***************************************************\n", 91 | " # INSERT YOUR CODE HERE\n", 92 | " # TODO\n", 93 | " # ***************************************************\n", 94 | " raise NotImplementedError" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "def calculate_accuracy(y, A, w):\n", 104 | " \"\"\"\n", 105 | " Compute the training accuracy on the training set (can be called for test set as well).\n", 106 | " y: +1 or -1 labels, shape = (num_examples)\n", 107 | " A: Dataset matrix, shape = (num_examples, num_features)\n", 108 | " w: Model weights, shape = (num_features)\n", 109 | " return: scalar value\n", 110 | " \"\"\"\n", 111 | " # ***************************************************\n", 112 | " # INSERT YOUR CODE HERE\n", 113 | " # TODO\n", 114 | " # ***************************************************\n", 115 | " raise NotImplementedError" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Coordinate Descent (Ascent) for SVM" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "Compute the closed-form update for the i-th variable alpha, in the dual optimization problem, given alpha and the current corresponding w.\n", 130 | "\n", 131 | "\n", 132 | "Hints: \n", 133 | "- Differentiate the dual objective with respect to one `alpha[i]`.\n", 134 | "- Set the derivative to zero to compute a new `alpha[i]`.\n", 135 | "- Make sure the values of alpha stay inside a `[0, 1]` box.\n", 136 | "- You can formulate the update as `alpha[i] = projection(alpha[i] + lambda_ * (some update))`.\n", 137 | "- You can test the correctness of your implementation by checking if the difference between the dual objective and primal objective goes to zero. This difference, the duality gap, should get smaller than 10 in 700000 iterations." 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "def calculate_coordinate_update(y, A, lambda_, alpha, w, i):\n", 147 | " \"\"\"\n", 148 | " Compute a coordinate update (closed form) for coordinate i.\n", 149 | " y: +1 or -1 labels, shape = (num_examples)\n", 150 | " A: Dataset matrix, shape = (num_examples, num_features)\n", 151 | " lambda_: Regularization parameter, scalar\n", 152 | " alpha: Dual variables, shape = (num_examples)\n", 153 | " w: Model weights, shape = (num_examples)\n", 154 | " i: Index of the entry of the dual variable 'alpha' that is to be updated\n", 155 | " return: New weights w (shape (num_features)), New dual variables alpha (shape (num_examples))\n", 156 | " \"\"\"\n", 157 | " # ***************************************************\n", 158 | " # INSERT YOUR CODE HERE\n", 159 | " # TODO\n", 160 | " # ***************************************************\n", 161 | " # calculate the update of coordinate at index=n.\n", 162 | " a_i, y_i = A[i], y[i]\n", 163 | " old_alpha_i = np.copy(alpha[i])\n", 164 | " \n", 165 | " raise NotImplementedError\n", 166 | " return w, alpha" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "def calculate_dual_objective(y, A, w, alpha, lambda_):\n", 176 | " \"\"\"\n", 177 | " Calculate the objective for the dual problem.\n", 178 | " Follow the formula given above.\n", 179 | " y: +1 or -1 labels, shape = (num_examples)\n", 180 | " A: Dataset matrix, shape = (num_examples, num_features)\n", 181 | " alpha: Dual variables, shape = (num_examples)\n", 182 | " lambda_: Regularization parameter, scalar\n", 183 | " return: Scalar value\n", 184 | " \"\"\"\n", 185 | " # ***************************************************\n", 186 | " # INSERT YOUR CODE HERE\n", 187 | " # TODO\n", 188 | " # ***************************************************\n", 189 | " raise NotImplementedError" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "def coordinate_descent_for_svm_demo(y, A, trace=False):\n", 199 | " max_iter = 1000000\n", 200 | " lambda_ = 0.01\n", 201 | " history = defaultdict(list) if trace else None\n", 202 | " \n", 203 | " num_examples, num_features = A.shape\n", 204 | " w = np.zeros(num_features)\n", 205 | " alpha = np.zeros(num_examples)\n", 206 | " \n", 207 | " for it in range(max_iter):\n", 208 | " # i = sample one data point uniformly at random from the columns of A\n", 209 | " i = random.randint(0,num_examples-1)\n", 210 | " \n", 211 | " w, alpha = calculate_coordinate_update(y, A, lambda_, alpha, w, i)\n", 212 | " \n", 213 | " if it % 100000 == 0:\n", 214 | " # primal objective\n", 215 | " primal_value = calculate_primal_objective(y, A, w, lambda_)\n", 216 | " # dual objective\n", 217 | " dual_value = calculate_dual_objective(y, A, w, alpha, lambda_)\n", 218 | " # primal dual gap\n", 219 | " duality_gap = primal_value - dual_value\n", 220 | " \n", 221 | " print('iteration=%i, primal:%.5f, dual:%.5f, gap:%.5f'%(\n", 222 | " it, primal_value, dual_value, duality_gap))\n", 223 | " if it % 1000 == 0:\n", 224 | " primal_value = calculate_primal_objective(y, A, w, lambda_)\n", 225 | " if trace:\n", 226 | " history[\"objective_function\"] += [primal_value]\n", 227 | " history['iter'].append(it)\n", 228 | "\n", 229 | " \n", 230 | " print(\"training accuracy = {l}\".format(l=calculate_accuracy(y, A, w)))\n", 231 | " return history\n", 232 | "\n", 233 | "history_cd = coordinate_descent_for_svm_demo(y, A, trace=True)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "# Stochastic gradient descent for SVM" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "Let's now compare it with SGD on original problem for the SVM. In this part, you will implement stochastic gradient descent on the primal SVM objective. The stochasticity comes from sampling data points." 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "def compute_stoch_gradient_svm(A_sample, b_sample, lambda_, w_t, num_data_points):\n", 257 | " \"\"\"\n", 258 | " Calculate stochastic gradient over A_batch, b_batch.\n", 259 | " A_sample: A data sample, shape=(num_features)\n", 260 | " b_sample: Corresponding +1 or -1 label, scalar\n", 261 | " w_t: Model weights, shape=(num_features)\n", 262 | " num_data_points: Total size of the dataset, scalar integer\n", 263 | " \"\"\"\n", 264 | " # ***************************************************\n", 265 | " # INSERT YOUR CODE HERE\n", 266 | " # TODO\n", 267 | " # ***************************************************\n", 268 | " raise NotImplementedError" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "def stochastic_gradient_descent_svm_demo(A, b, gamma, batch_size=1, trace=False):\n", 278 | " history = defaultdict(list) if trace else None\n", 279 | " num_data_points, num_features = np.shape(A)\n", 280 | " max_iter = 1000000\n", 281 | " lambda_ = 0.01\n", 282 | " \n", 283 | " w_t = np.zeros(num_features)\n", 284 | " \n", 285 | " current_iter = 0\n", 286 | " while (current_iter < max_iter):\n", 287 | " i = random.randint(0,num_data_points - 1)\n", 288 | " b_batch, A_batch = b[i], A[i]\n", 289 | " gradient = compute_stoch_gradient_svm(A_batch, b_batch, lambda_, w_t, num_data_points)\n", 290 | " w_t = w_t - gamma * gradient\n", 291 | " if current_iter % 100000 == 0:\n", 292 | " primal_value = calculate_primal_objective(y, A, w_t, lambda_)\n", 293 | " print('iteration=%i, primal:%.5f'%(\n", 294 | " current_iter, primal_value))\n", 295 | " if current_iter % 1000 == 0:\n", 296 | " primal_value = calculate_primal_objective(y, A, w_t, lambda_)\n", 297 | " if trace:\n", 298 | " history['objective_function'].append(primal_value)\n", 299 | " history['iter'].append(current_iter)\n", 300 | " current_iter += 1\n", 301 | " print(\"training accuracy = {l}\".format(l=calculate_accuracy(y, A, w_t)))\n", 302 | " return history\n" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "Try different stepsized and find the best one" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": { 316 | "scrolled": true 317 | }, 318 | "outputs": [], 319 | "source": [ 320 | "# ***************************************************\n", 321 | "# INSERT YOUR CODE HERE\n", 322 | "# TODO\n", 323 | "# ***************************************************" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "Plot learning curves" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": {}, 337 | "outputs": [], 338 | "source": [ 339 | "# ***************************************************\n", 340 | "# INSERT YOUR CODE HERE\n", 341 | "# TODO\n", 342 | "# ***************************************************" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": {}, 348 | "source": [ 349 | "## Compare SGD with Coordinate Descent" 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": {}, 355 | "source": [ 356 | "Compare two algorithms in terms of convergence, time complexities per iteration. Which one is easier to use?" 357 | ] 358 | } 359 | ], 360 | "metadata": { 361 | "anaconda-cloud": {}, 362 | "kernelspec": { 363 | "display_name": "Python 3", 364 | "language": "python", 365 | "name": "python3" 366 | }, 367 | "language_info": { 368 | "codemirror_mode": { 369 | "name": "ipython", 370 | "version": 3 371 | }, 372 | "file_extension": ".py", 373 | "mimetype": "text/x-python", 374 | "name": "python", 375 | "nbconvert_exporter": "python", 376 | "pygments_lexer": "ipython3", 377 | "version": "3.7.4" 378 | }, 379 | "toc": { 380 | "base_numbering": 1, 381 | "nav_menu": {}, 382 | "number_sections": true, 383 | "sideBar": true, 384 | "skip_h1_title": false, 385 | "title_cell": "Table of Contents", 386 | "title_sidebar": "Contents", 387 | "toc_cell": false, 388 | "toc_position": {}, 389 | "toc_section_display": true, 390 | "toc_window_display": false 391 | } 392 | }, 393 | "nbformat": 4, 394 | "nbformat_minor": 1 395 | } 396 | -------------------------------------------------------------------------------- /labs/ex09/exercise09.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex09/exercise09.pdf -------------------------------------------------------------------------------- /labs/ex09/solution/dataset_generation_NOT_NECESSARY_FOR_STUDENTS/generate_csv.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """convert movielens100k.mat to the same data type of project 2.""" 3 | import numpy as np 4 | from scipy.io import loadmat 5 | 6 | 7 | def load_data(): 8 | """load the mat data.""" 9 | data = loadmat('movielens100k.mat') 10 | ratings = data['ratings'] 11 | print("The data type of the 'ratings': {dt}".format(dt=type(ratings))) 12 | print("The shape of the 'ratings':{v}".format(v=ratings.shape)) 13 | return ratings 14 | 15 | 16 | def to_list(data): 17 | """save nz rating to list.""" 18 | nz = np.nonzero(data) 19 | return ["r{}_c{},{}".format(nz_row + 1, nz_col + 1, data[nz_row, nz_col]) 20 | for nz_row, nz_col in zip(*nz)] 21 | 22 | 23 | def to_csv(data, path): 24 | """write data to csv file.""" 25 | with open(path, "w") as f: 26 | f.write("\n".join(data)) 27 | 28 | 29 | if __name__ == '__main__': 30 | path = "movielens100k.csv" 31 | data = load_data() 32 | processed_data = to_list(data) 33 | to_csv(processed_data, path) 34 | -------------------------------------------------------------------------------- /labs/ex09/solution/dataset_generation_NOT_NECESSARY_FOR_STUDENTS/movielens100k.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex09/solution/dataset_generation_NOT_NECESSARY_FOR_STUDENTS/movielens100k.mat -------------------------------------------------------------------------------- /labs/ex09/solution/helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """some functions for help.""" 3 | 4 | from itertools import groupby 5 | 6 | import numpy as np 7 | import scipy.sparse as sp 8 | 9 | 10 | def read_txt(path): 11 | """read text file from path.""" 12 | with open(path, "r") as f: 13 | return f.read().splitlines() 14 | 15 | 16 | def load_data(path_dataset, n=500): 17 | """Load data in text format, one rating per line, as in the kaggle competition.""" 18 | data = read_txt(path_dataset)[1:] 19 | return preprocess_data(data,n) 20 | 21 | def slice(ratings,n=None): 22 | """take the first n rows and n columns only""" 23 | if n is not None: 24 | ratings = ratings[:n,:n] 25 | return ratings 26 | 27 | def preprocess_data(data, n = 500): 28 | """preprocessing the text data, conversion to numerical array format.""" 29 | def deal_line(line): 30 | pos, rating = line.split(',') 31 | row, col = pos.split("_") 32 | row = row.replace("r", "") 33 | col = col.replace("c", "") 34 | return int(row), int(col), float(rating) 35 | 36 | def statistics(data): 37 | row = set([line[0] for line in data]) 38 | col = set([line[1] for line in data]) 39 | return min(row), max(row), min(col), max(col) 40 | 41 | # parse each line 42 | data = [deal_line(line) for line in data] 43 | 44 | # do statistics on the dataset. 45 | min_row, max_row, min_col, max_col = statistics(data) 46 | # print("number of items: {}, number of users: {}".format(max_row, max_col)) 47 | # build rating matrix. 48 | ratings = np.zeros([max_row,max_col]) 49 | for row, col, rating in data: 50 | ratings[row - 1, col - 1] = rating 51 | # Reduce the size of the dataset 52 | ratings = slice(ratings, n) 53 | # Make the trace 1 by scaling all entries of matrix 54 | ratings = ratings/np.trace(ratings) 55 | print("number of items: {}, number of users: {}".format(n, n)) 56 | return ratings 57 | 58 | def group_by(data, index): 59 | """group list of list by a specific index.""" 60 | sorted_data = sorted(data, key=lambda x: x[index]) 61 | groupby_data = groupby(sorted_data, lambda x: x[index]) 62 | return groupby_data 63 | 64 | 65 | def build_index_groups(train): 66 | """build groups for nnz rows and cols.""" 67 | nz_row, nz_col = train.nonzero() 68 | nz_train = list(zip(nz_row, nz_col)) 69 | 70 | grouped_nz_train_byrow = group_by(nz_train, index=0) 71 | nz_row_colindices = [(g, np.array([v[1] for v in value])) 72 | for g, value in grouped_nz_train_byrow] 73 | 74 | grouped_nz_train_bycol = group_by(nz_train, index=1) 75 | nz_col_rowindices = [(g, np.array([v[0] for v in value])) 76 | for g, value in grouped_nz_train_bycol] 77 | return nz_train, nz_row_colindices, nz_col_rowindices 78 | 79 | 80 | def calculate_mse(real_label, prediction): 81 | """calculate MSE.""" 82 | t = real_label - prediction 83 | return 1.0 * t.dot(t.T) 84 | 85 | 86 | def split_data(ratings, num_items_per_user, num_users_per_item, 87 | min_num_ratings, p_test=0.2): 88 | """split the ratings to training data and test data. 89 | Args: 90 | min_num_ratings: 91 | all users and items we keep must have at least min_num_ratings per user and per item. 92 | """ 93 | # set seed 94 | np.random.seed(988) 95 | 96 | # select user and item based on the condition. 97 | valid_users = np.where(num_items_per_user >= min_num_ratings)[0] 98 | valid_items = np.where(num_users_per_item >= min_num_ratings)[0] 99 | valid_ratings = ratings[valid_items, :][: , valid_users] 100 | 101 | # init 102 | num_rows, num_cols = valid_ratings.shape 103 | train = np.zeros([num_rows, num_cols]) 104 | test = np.zeros([num_rows, num_cols]) 105 | 106 | print("the shape of original ratings. (# of row, # of col): {}".format( 107 | ratings.shape)) 108 | print("the shape of valid ratings. (# of row, # of col): {}".format( 109 | (num_rows, num_cols))) 110 | 111 | nz_items, nz_users = valid_ratings.nonzero() 112 | 113 | # split the data 114 | for user in set(nz_users): 115 | # randomly select a subset of ratings 116 | row = valid_ratings[:, user].nonzero()[0] 117 | selects = np.random.choice(row, size=int(len(row) * p_test)) 118 | residual = list(set(row) - set(selects)) 119 | 120 | # add to train set 121 | train[residual, user] = valid_ratings[residual, user] 122 | 123 | # add to test set 124 | test[selects, user] = valid_ratings[selects, user] 125 | 126 | print("Total number of nonzero elements in origial data:{v}".format(v=np.count_nonzero(ratings))) 127 | print("Total number of nonzero elements in train data:{v}".format(v=np.count_nonzero(train))) 128 | print("Total number of nonzero elements in test data:{v}".format(v=np.count_nonzero(test))) 129 | return valid_ratings, train, test 130 | -------------------------------------------------------------------------------- /labs/ex09/solution/optimizers.py: -------------------------------------------------------------------------------- 1 | """Common optimizers.""" 2 | 3 | 4 | import numpy as np 5 | from time import time 6 | 7 | def gradient_descent(init, steps, grad, proj=lambda x: x, num_to_keep=None): 8 | """Projected gradient descent. 9 | 10 | Parameters 11 | ---------- 12 | initial : array 13 | starting point 14 | steps : list of floats 15 | step size schedule for the algorithm 16 | grad : function 17 | mapping arrays to arrays of same shape 18 | proj : function, optional 19 | mapping arrays to arrays of same shape 20 | num_to_keep : integer, optional 21 | number of points to keep 22 | 23 | Returns 24 | ------- 25 | List of points computed by projected gradient descent and the wall clock time it took to compute them. Length of the 26 | lists is determined by `num_to_keep`. 27 | """ 28 | xs = [init] 29 | ts = [0] 30 | start = time() 31 | for step in steps: 32 | xs.append(proj(xs[-1] - step * grad(xs[-1]))) 33 | ts.append(time() - start) 34 | if num_to_keep: 35 | xs = xs[-num_to_keep:] 36 | ts = ts[-num_to_kepp:] 37 | return xs, ts 38 | 39 | 40 | def frank_wolfe(initial, update_oracle, num_steps, num_to_keep=None): 41 | """ Frank-Wolfe. 42 | 43 | Frank-Wolfe (Conditional gradient) for first-order optimization. 44 | 45 | Parameters: 46 | ----------- 47 | initial: array, 48 | initial starting point 49 | update_oracle: function, mapping points to points, 50 | computes the next iterate given the current iterate and iteration number 51 | num_steps: integer, 52 | number of steps to run the algorithm for 53 | Returns: 54 | -------- 55 | List of points computed by the algorithm and the wall clock time it took to compute them 56 | """ 57 | xs = [initial] 58 | ts = [0] 59 | start = time() 60 | for step in range(num_steps): 61 | xs.append(update_oracle(xs[-1],step)) 62 | ts.append(time() - start) 63 | if num_to_keep: 64 | xs = xs[-num_to_keep:] 65 | ts = ts[-num_to_kepp:] 66 | return xs, ts -------------------------------------------------------------------------------- /labs/ex09/solution/plots.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """some functions for plots.""" 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def plot_raw_data(ratings): 9 | """plot the statistics result on raw rating data.""" 10 | # do statistics. 11 | num_items_per_user = np.count_nonzero(ratings,axis=0) 12 | num_users_per_item = np.count_nonzero(ratings,axis=1) 13 | sorted_num_movies_per_user = np.sort(num_items_per_user)[::-1] 14 | sorted_num_users_per_movie = np.sort(num_users_per_item)[::-1] 15 | 16 | # plot 17 | fig = plt.figure() 18 | ax1 = fig.add_subplot(1, 2, 1) 19 | ax1.plot(sorted_num_movies_per_user, color='blue') 20 | ax1.set_xlabel("users") 21 | ax1.set_ylabel("number of ratings (sorted)") 22 | ax1.grid() 23 | 24 | ax2 = fig.add_subplot(1, 2, 2) 25 | ax2.plot(sorted_num_users_per_movie) 26 | ax2.set_xlabel("items") 27 | ax2.set_ylabel("number of ratings (sorted)") 28 | ax2.grid() 29 | 30 | plt.tight_layout() 31 | plt.savefig("stat_ratings") 32 | plt.show() 33 | # plt.close() 34 | return num_items_per_user, num_users_per_item 35 | 36 | 37 | def plot_train_test_data(train, test): 38 | """visualize the train and test data.""" 39 | fig = plt.figure() 40 | ax1 = fig.add_subplot(1, 2, 1) 41 | ax1.spy(train, precision=0.01, markersize=0.5) 42 | ax1.set_xlabel("Users") 43 | ax1.set_ylabel("Items") 44 | ax1.set_title("Training data") 45 | ax2 = fig.add_subplot(1, 2, 2) 46 | ax2.spy(test, precision=0.01, markersize=0.5) 47 | ax2.set_xlabel("Users") 48 | ax2.set_ylabel("Items") 49 | ax2.set_title("Test data") 50 | plt.tight_layout() 51 | plt.savefig("train_test") 52 | plt.show() 53 | -------------------------------------------------------------------------------- /labs/ex09/solution/stat_ratings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex09/solution/stat_ratings.png -------------------------------------------------------------------------------- /labs/ex09/solution/train_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex09/solution/train_test.png -------------------------------------------------------------------------------- /labs/ex09/solution09.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex09/solution09.pdf -------------------------------------------------------------------------------- /labs/ex09/template/ex09-MatrixCompletion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# Useful starting lines\n", 12 | "%matplotlib inline\n", 13 | "\n", 14 | "import numpy as np\n", 15 | "import scipy\n", 16 | "import scipy.io\n", 17 | "import scipy.sparse as sp\n", 18 | "import scipy.linalg as la\n", 19 | "from scipy.sparse.linalg import svds\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "from timeit import timeit\n", 22 | "%load_ext autoreload\n", 23 | "%autoreload 2" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Based on [https://ee227c.github.io/code/lecture5.html#projected-gd]" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "# Movie recommendation using low rank matrix completion" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "We are Netflix and have access to the ratings given by users to some movies they saw. Based on this data we want to predict the rating an user would give other movies." 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "### Load the Data\n", 52 | "Note that `ratings` is a sparse matrix that in the shape of (num_items, num_users)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": true 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "from helpers import load_data, preprocess_data, split_data\n", 64 | "\n", 65 | "path_dataset = \"movielens100k.csv\"\n", 66 | "ratings = load_data(path_dataset, n = 1000)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Plot the number of ratings per movie and user" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": true, 81 | "scrolled": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "from plots import plot_raw_data\n", 86 | "\n", 87 | "num_items_per_user, num_users_per_item = plot_raw_data(ratings)\n", 88 | "\n", 89 | "print(\"min # of items per user = {}, min # of users per item = {}.\".format(\n", 90 | " min(num_items_per_user), min(num_users_per_item)))" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "### Split the data into a train and test set" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "collapsed": true, 105 | "scrolled": true 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "from plots import plot_train_test_data\n", 110 | "\n", 111 | "valid_ratings, train, test = split_data(\n", 112 | " ratings, num_items_per_user, num_users_per_item, min_num_ratings=1, p_test=0.1)\n", 113 | "plot_train_test_data(train, test)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Learning low rank matrices" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "We want to minimize the squared error\n", 128 | "$$\n", 129 | "\\min_{Y\\in X\\subseteq R^{n\\times m}}\\ \\dfrac{1}{2}\\sum_{(i,j)\\in \\Omega} (Z_{ij} - Y_{ij})^2\n", 130 | "$$\n", 131 | "when $\\Omega\\subseteq[n]\\times[m]$ is the set of observed entries from a given matrix $Z$.\n", 132 | "\n", 133 | "Since without more assumptions this is a hopeless problem, we assume that the *true* matrix $Y$ is low rank. As a proxy for low rank, we assume that the trace norm of $Y$ is bounded.\n", 134 | "\n", 135 | "In this case, our optimization domain is the unit ball of the trace norm (or nuclear norm), which is known to be the convex hull of the rank-1 matrices \n", 136 | "$$\n", 137 | "X := \\mathop{conv}(\\mathcal{A}) \\ \\text{ with }\\ \\mathcal{A} := \\Big\\{ u v^\\top \\ \\Big|\\ \\substack{u\\in R^n,\\;||{u}||_2=1\\\\ v\\in R^m,\\;||{v}||_2=1} \\Big\\} \\ .\n", 138 | "$$" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "collapsed": true 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "def cost_se(Y,Z):\n", 150 | " \"\"\"Compute the objective function on an input matrix Y for the data matrix Z\n", 151 | " Assume all (and only) non-zero values of Z are observed\"\"\"\n", 152 | " cost = 0\n", 153 | " observed_rows,observed_columns = np.nonzero(Z)\n", 154 | " # ***************************************************\n", 155 | " # INSERT YOUR CODE HERE\n", 156 | " # TODO\n", 157 | " # *************************************************** \n", 158 | " raise NotImplementedError \n", 159 | " " 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Implementing Baselines " 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "### Use the global mean to do the prediction" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": true, 181 | "scrolled": true 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "def baseline_global_mean(train, test):\n", 186 | " \"\"\"baseline method: use the global mean.\"\"\"\n", 187 | " observed_rows,observed_columns = np.nonzero(train)\n", 188 | " \n", 189 | " # ***************************************************\n", 190 | " # INSERT YOUR CODE HERE\n", 191 | " # TODO\n", 192 | " # *************************************************** \n", 193 | " raise NotImplementedError \n", 194 | " test_error = cost_se(Y,test)\n", 195 | " print(\"The test error of baseline using global mean: {v}.\".format(v=test_error))\n", 196 | "\n", 197 | "baseline_global_mean(train, test)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "### Use the user means as the prediction" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "collapsed": true 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "def baseline_user_mean(train, test):\n", 216 | " \"\"\"baseline method: use the user means as the prediction.\"\"\"\n", 217 | " num_items, num_users = train.shape\n", 218 | " \n", 219 | " # ***************************************************\n", 220 | " # INSERT YOUR CODE HERE\n", 221 | " # TODO\n", 222 | " # *************************************************** \n", 223 | " raise NotImplementedError \n", 224 | "\n", 225 | " test_error = cost_se(Y,test)\n", 226 | " print(\"The test error of baseline using user mean: {v}.\".format(v=test_error))\n", 227 | "\n", 228 | "baseline_user_mean(train, test)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "### Use the item means as the prediction" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "collapsed": true 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "def baseline_item_mean(train, test):\n", 247 | " \"\"\"baseline method: use item means as the prediction.\"\"\"\n", 248 | " num_items, num_users = train.shape\n", 249 | " \n", 250 | " # ***************************************************\n", 251 | " # INSERT YOUR CODE HERE\n", 252 | " # TODO\n", 253 | " # *************************************************** \n", 254 | " raise NotImplementedError \n", 255 | " \n", 256 | " test_error = cost_se(Y,test)\n", 257 | " print(\"The test error of baseline using item mean: {v}.\".format(v=test_error))\n", 258 | " \n", 259 | "baseline_item_mean(train, test)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": { 265 | "collapsed": true 266 | }, 267 | "source": [ 268 | "## Learn matrix using projected gradient descent" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": true 276 | }, 277 | "outputs": [], 278 | "source": [ 279 | "def compute_gradient(Y,Z):\n", 280 | " \"\"\"Compute the gradient of the objective.\n", 281 | " Assume that all non-zero values in Z are observed and so are part of \\Omega\"\"\"\n", 282 | " gradient = np.zeros(Y.shape)\n", 283 | " observed_rows,observed_columns = np.nonzero(Z)\n", 284 | " \n", 285 | " # ***************************************************\n", 286 | " # INSERT YOUR CODE HERE\n", 287 | " # TODO\n", 288 | " # *************************************************** \n", 289 | " raise NotImplementedError" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "collapsed": true 297 | }, 298 | "outputs": [], 299 | "source": [ 300 | "def project_onto_simplex(s):\n", 301 | " \"\"\"Given a vector s, find its projection onto the unit simplex\"\"\" \n", 302 | " \n", 303 | " # ***************************************************\n", 304 | " # INSERT YOUR CODE HERE\n", 305 | " # TODO\n", 306 | " # *************************************************** \n", 307 | " raise NotImplementedError \n", 308 | "\n", 309 | "def project_onto_tracenormball(S):\n", 310 | " \"\"\"Compute the projection of the matrix S onto the set X (the unit ball of the trace norm)\n", 311 | " Hint: use the simplex projection function you wrote above\"\"\"\n", 312 | " \n", 313 | " \n", 314 | " # ***************************************************\n", 315 | " # INSERT YOUR CODE HERE\n", 316 | " # TODO\n", 317 | " # *************************************************** \n", 318 | " raise NotImplementedError " 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "#### Cost of a projecting onto a trace norm ball\n", 326 | "How does the cost of the projection scale with respect to increasing dimension?" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": { 333 | "collapsed": true 334 | }, 335 | "outputs": [], 336 | "source": [ 337 | "ts = []\n", 338 | "ns = [100, 200, 400, 600, 800]\n", 339 | "for n in ns:\n", 340 | " f = lambda: project_onto_tracenormball(np.random.normal(0,1,(n, n)))\n", 341 | " ts.append(timeit(f, number=1))\n", 342 | "\n", 343 | "plt.figure(figsize=(12,6))\n", 344 | "plt.xlabel('Input dimension')\n", 345 | "plt.ylabel('Time (s)')\n", 346 | "plt.title('Cost of nuclear norm projection')\n", 347 | "plt.plot(ns, ts)" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "#### Running projected gradient descent" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": null, 360 | "metadata": { 361 | "collapsed": true 362 | }, 363 | "outputs": [], 364 | "source": [ 365 | "from optimizers import gradient_descent\n", 366 | "\n", 367 | "# start from random matrix of nuclear norm 1\n", 368 | "Y0 = np.random.normal(0,1, train.shape)\n", 369 | "Y0 = project_onto_tracenormball(Y0)\n", 370 | "# define the train and test error\n", 371 | "test_objective = lambda Y: cost_se(Y, test)\n", 372 | "train_objective = lambda Y: cost_se(Y, train)\n", 373 | "# run the gradient descent algorithm\n", 374 | "gradient = lambda Y: compute_gradient(Y, train)\n", 375 | "Ys, ts = gradient_descent(Y0, [0.2]*10, gradient, project_onto_tracenormball)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": { 382 | "collapsed": true 383 | }, 384 | "outputs": [], 385 | "source": [ 386 | "\"\"\"Plot the test and train errors vs. number of iterations\"\"\"\n", 387 | "plt.figure(figsize=(12,6))\n", 388 | "plt.title('Projected gradient descent error')\n", 389 | "plt.subplot(1, 2, 1)\n", 390 | "plt.ylabel('Train error')\n", 391 | "plt.xlabel('Steps')\n", 392 | "plt.plot(np.arange(len(Ys)), [train_objective(Y) for Y in Ys], 'ko-')\n", 393 | "\n", 394 | "plt.subplot(1, 2, 2)\n", 395 | "plt.ylabel('Test error')\n", 396 | "plt.xlabel('Steps')\n", 397 | "plt.plot(np.arange(len(Ys)), [test_objective(Y) for Y in Ys], 'r.-')" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": { 404 | "collapsed": true 405 | }, 406 | "outputs": [], 407 | "source": [ 408 | "\"\"\"Plot the test and train errors vs. time\"\"\"\n", 409 | "plt.figure(figsize=(12,6))\n", 410 | "plt.title('Projected gradient descent error')\n", 411 | "plt.subplot(1, 2, 1)\n", 412 | "plt.ylabel('Train error')\n", 413 | "plt.xlabel('Time (in sec)')\n", 414 | "plt.plot(ts, [train_objective(Y) for Y in Ys], 'ko-')\n", 415 | "\n", 416 | "plt.subplot(1, 2, 2)\n", 417 | "plt.ylabel('Test error')\n", 418 | "plt.xlabel('Time (in sec)')\n", 419 | "plt.plot(ts, [test_objective(Y) for Y in Ys], 'r.-')" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "## Learn matrix using Frank-Wolfe" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": null, 432 | "metadata": { 433 | "collapsed": true 434 | }, 435 | "outputs": [], 436 | "source": [ 437 | "def LMO(S):\n", 438 | " \"\"\"Compute the linear maximization oracle (LMO) over the unit ball of the trace norm (nuclear norm) for an input S\"\"\"\n", 439 | " \n", 440 | " # ***************************************************\n", 441 | " # INSERT YOUR CODE HERE\n", 442 | " # TODO\n", 443 | " # *************************************************** \n", 444 | " raise NotImplementedError \n" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": null, 450 | "metadata": { 451 | "collapsed": true 452 | }, 453 | "outputs": [], 454 | "source": [ 455 | "def cond_grad_update(Y, Z, t):\n", 456 | " \"\"\"Compute the Frank-Wolfe update.\n", 457 | " Here t is the iteration number, Y is the current point and Z is the observed matrix\"\"\"\n", 458 | " gradient = compute_gradient(Y,Z)\n", 459 | " V = LMO(-gradient)\n", 460 | " \n", 461 | " # ***************************************************\n", 462 | " # INSERT YOUR CODE HERE\n", 463 | " # TODO\n", 464 | " # *************************************************** \n", 465 | " raise NotImplementedError " 466 | ] 467 | }, 468 | { 469 | "cell_type": "markdown", 470 | "metadata": {}, 471 | "source": [ 472 | "### Comparing cost of Projection and Linear Minimization" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": { 479 | "collapsed": true 480 | }, 481 | "outputs": [], 482 | "source": [ 483 | "ts1 = []\n", 484 | "ts2 = []\n", 485 | "ns = [100, 200, 400, 600, 800]\n", 486 | "for n in ns:\n", 487 | " f = lambda: project_onto_tracenormball(sp.random(n,n))\n", 488 | " ts1.append(timeit(f, number=1))\n", 489 | " f = lambda: LMO(np.random.normal(0,1,(n, n)))\n", 490 | " ts2.append(timeit(f, number=1))" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": { 497 | "collapsed": true 498 | }, 499 | "outputs": [], 500 | "source": [ 501 | "plt.figure(figsize=(12,6))\n", 502 | "plt.xlabel('Input dimension')\n", 503 | "plt.ylabel('Time (s)')\n", 504 | "plt.title('Projection vs linear optimization')\n", 505 | "plt.plot(ns, ts1, label='projection')\n", 506 | "plt.plot(ns, ts2, label='linear opt')\n", 507 | "plt.legend()" 508 | ] 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "metadata": {}, 513 | "source": [ 514 | "#### Running Frank-Wolfe" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": null, 520 | "metadata": { 521 | "collapsed": true 522 | }, 523 | "outputs": [], 524 | "source": [ 525 | "from optimizers import frank_wolfe\n", 526 | "\n", 527 | "# start from random matrix of nuclear norm 1\n", 528 | "Y0 = np.random.normal(0,1, train.shape)\n", 529 | "Y0 = project_onto_tracenormball(Y0)\n", 530 | "# define the train and test error\n", 531 | "test_objective = lambda Y: cost_se(Y, test)\n", 532 | "train_objective = lambda Y: cost_se(Y, train)\n", 533 | "# run the Frank-Wolfe algorithm\n", 534 | "update_oracle = lambda Y,k: cond_grad_update(Y, train, k)\n", 535 | "Ys, ts = frank_wolfe(Y0, update_oracle, num_steps = 10)" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": null, 541 | "metadata": { 542 | "collapsed": true 543 | }, 544 | "outputs": [], 545 | "source": [ 546 | "\"\"\"Plot the test and train errors vs. number of iterations\"\"\"\n", 547 | "plt.figure(figsize=(12,6))\n", 548 | "plt.title('Frank-Wolfe error vs. Number of iterations')\n", 549 | "plt.subplot(1, 2, 1)\n", 550 | "plt.ylabel('Train error')\n", 551 | "plt.xlabel('Steps')\n", 552 | "plt.plot(np.arange(len(Ys)), [train_objective(Y) for Y in Ys], 'ko-')\n", 553 | "\n", 554 | "plt.subplot(1, 2, 2)\n", 555 | "plt.ylabel('Test error')\n", 556 | "plt.xlabel('Steps')\n", 557 | "plt.plot(np.arange(len(Ys)), [test_objective(Y) for Y in Ys], 'r.-')" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "metadata": { 564 | "collapsed": true 565 | }, 566 | "outputs": [], 567 | "source": [ 568 | "\"\"\"Plot the test and train errors vs. time\"\"\"\n", 569 | "plt.figure(figsize=(12,6))\n", 570 | "plt.title('Frank-Wolfe error vs. Time')\n", 571 | "plt.subplot(1,2, 1)\n", 572 | "plt.ylabel('Train error')\n", 573 | "plt.xlabel('Time (in sec)')\n", 574 | "plt.plot(ts, [train_objective(Y) for Y in Ys], 'ko-')\n", 575 | "\n", 576 | "plt.subplot(1, 2, 2)\n", 577 | "plt.ylabel('Test error')\n", 578 | "plt.xlabel('Time (in sec)')\n", 579 | "plt.plot(ts, [test_objective(Y) for Y in Ys], 'r.-')" 580 | ] 581 | }, 582 | { 583 | "cell_type": "markdown", 584 | "metadata": { 585 | "collapsed": true 586 | }, 587 | "source": [ 588 | "## A more practical method\n", 589 | "\n", 590 | "In practice, to learn a low rank matrix, neither Frank-Wolfe nor Projected Gradient Descent are used. Instead, we formulate a **non-convex** problem which is then solved by SGD. In particular if we want to learn a rank $k$ matrix, $X$ is replaced by two matrices $UV^\\top$ where $U \\in R^{n\\times k}$ and $V \\in R^{m \\times k}$. This means that we never have to store the full matrix $X$ which would take $O(mn)$ space but instead only $O(mk + nk)$ space. Futher, the matrices $U$ and $V$ can be interpreted as *embeddings*. There have been recent theoretical results which prove that this algorithm in fact recovers the correct answer under some assumptions!\n", 591 | "\n", 592 | "Refer to this exercise from Machine Learning course (https://github.com/epfml/ML_course/blob/master/labs/ex10/solution/ex10.ipynb) for more details." 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": { 599 | "collapsed": true 600 | }, 601 | "outputs": [], 602 | "source": [] 603 | } 604 | ], 605 | "metadata": { 606 | "kernelspec": { 607 | "display_name": "Python 3", 608 | "language": "python", 609 | "name": "python3" 610 | }, 611 | "language_info": { 612 | "codemirror_mode": { 613 | "name": "ipython", 614 | "version": 3 615 | }, 616 | "file_extension": ".py", 617 | "mimetype": "text/x-python", 618 | "name": "python", 619 | "nbconvert_exporter": "python", 620 | "pygments_lexer": "ipython3", 621 | "version": "3.5.2" 622 | } 623 | }, 624 | "nbformat": 4, 625 | "nbformat_minor": 1 626 | } 627 | -------------------------------------------------------------------------------- /labs/ex09/template/helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """some functions for help.""" 3 | 4 | from itertools import groupby 5 | 6 | import numpy as np 7 | import scipy.sparse as sp 8 | 9 | 10 | def read_txt(path): 11 | """read text file from path.""" 12 | with open(path, "r") as f: 13 | return f.read().splitlines() 14 | 15 | 16 | def load_data(path_dataset, n=500): 17 | """Load data in text format, one rating per line, as in the kaggle competition.""" 18 | data = read_txt(path_dataset)[1:] 19 | return preprocess_data(data,n) 20 | 21 | def slice(ratings,n=None): 22 | """take the first n rows and n columns only""" 23 | if n is not None: 24 | ratings = ratings[:n,:n] 25 | return ratings 26 | 27 | def preprocess_data(data, n = 500): 28 | """preprocessing the text data, conversion to numerical array format.""" 29 | def deal_line(line): 30 | pos, rating = line.split(',') 31 | row, col = pos.split("_") 32 | row = row.replace("r", "") 33 | col = col.replace("c", "") 34 | return int(row), int(col), float(rating) 35 | 36 | def statistics(data): 37 | row = set([line[0] for line in data]) 38 | col = set([line[1] for line in data]) 39 | return min(row), max(row), min(col), max(col) 40 | 41 | # parse each line 42 | data = [deal_line(line) for line in data] 43 | 44 | # do statistics on the dataset. 45 | min_row, max_row, min_col, max_col = statistics(data) 46 | # print("number of items: {}, number of users: {}".format(max_row, max_col)) 47 | # build rating matrix. 48 | ratings = np.zeros([max_row,max_col]) 49 | for row, col, rating in data: 50 | ratings[row - 1, col - 1] = rating 51 | # Reduce the size of the dataset 52 | ratings = slice(ratings, n) 53 | # Make the trace 1 by scaling all entries of matrix 54 | ratings = ratings/np.trace(ratings) 55 | print("number of items: {}, number of users: {}".format(n, n)) 56 | return ratings 57 | 58 | def group_by(data, index): 59 | """group list of list by a specific index.""" 60 | sorted_data = sorted(data, key=lambda x: x[index]) 61 | groupby_data = groupby(sorted_data, lambda x: x[index]) 62 | return groupby_data 63 | 64 | 65 | def build_index_groups(train): 66 | """build groups for nnz rows and cols.""" 67 | nz_row, nz_col = train.nonzero() 68 | nz_train = list(zip(nz_row, nz_col)) 69 | 70 | grouped_nz_train_byrow = group_by(nz_train, index=0) 71 | nz_row_colindices = [(g, np.array([v[1] for v in value])) 72 | for g, value in grouped_nz_train_byrow] 73 | 74 | grouped_nz_train_bycol = group_by(nz_train, index=1) 75 | nz_col_rowindices = [(g, np.array([v[0] for v in value])) 76 | for g, value in grouped_nz_train_bycol] 77 | return nz_train, nz_row_colindices, nz_col_rowindices 78 | 79 | 80 | def calculate_mse(real_label, prediction): 81 | """calculate MSE.""" 82 | t = real_label - prediction 83 | return 1.0 * t.dot(t.T) 84 | 85 | 86 | def split_data(ratings, num_items_per_user, num_users_per_item, 87 | min_num_ratings, p_test=0.2): 88 | """split the ratings to training data and test data. 89 | Args: 90 | min_num_ratings: 91 | all users and items we keep must have at least min_num_ratings per user and per item. 92 | """ 93 | # set seed 94 | np.random.seed(988) 95 | 96 | # select user and item based on the condition. 97 | valid_users = np.where(num_items_per_user >= min_num_ratings)[0] 98 | valid_items = np.where(num_users_per_item >= min_num_ratings)[0] 99 | valid_ratings = ratings[valid_items, :][: , valid_users] 100 | 101 | # init 102 | num_rows, num_cols = valid_ratings.shape 103 | train = np.zeros([num_rows, num_cols]) 104 | test = np.zeros([num_rows, num_cols]) 105 | 106 | print("the shape of original ratings. (# of row, # of col): {}".format( 107 | ratings.shape)) 108 | print("the shape of valid ratings. (# of row, # of col): {}".format( 109 | (num_rows, num_cols))) 110 | 111 | nz_items, nz_users = valid_ratings.nonzero() 112 | 113 | # split the data 114 | for user in set(nz_users): 115 | # randomly select a subset of ratings 116 | row = valid_ratings[:, user].nonzero()[0] 117 | selects = np.random.choice(row, size=int(len(row) * p_test)) 118 | residual = list(set(row) - set(selects)) 119 | 120 | # add to train set 121 | train[residual, user] = valid_ratings[residual, user] 122 | 123 | # add to test set 124 | test[selects, user] = valid_ratings[selects, user] 125 | 126 | print("Total number of nonzero elements in origial data:{v}".format(v=np.count_nonzero(ratings))) 127 | print("Total number of nonzero elements in train data:{v}".format(v=np.count_nonzero(train))) 128 | print("Total number of nonzero elements in test data:{v}".format(v=np.count_nonzero(test))) 129 | return valid_ratings, train, test 130 | -------------------------------------------------------------------------------- /labs/ex09/template/optimizers.py: -------------------------------------------------------------------------------- 1 | """Common optimizers.""" 2 | 3 | 4 | import numpy as np 5 | from time import time 6 | 7 | def gradient_descent(init, steps, grad, proj=lambda x: x, num_to_keep=None): 8 | """Projected gradient descent. 9 | 10 | Parameters 11 | ---------- 12 | initial : array 13 | starting point 14 | steps : list of floats 15 | step size schedule for the algorithm 16 | grad : function 17 | mapping arrays to arrays of same shape 18 | proj : function, optional 19 | mapping arrays to arrays of same shape 20 | num_to_keep : integer, optional 21 | number of points to keep 22 | 23 | Returns 24 | ------- 25 | List of points computed by projected gradient descent and the wall clock time it took to compute them. Length of the 26 | lists is determined by `num_to_keep`. 27 | """ 28 | xs = [init] 29 | ts = [0] 30 | start = time() 31 | for step in steps: 32 | xs.append(proj(xs[-1] - step * grad(xs[-1]))) 33 | ts.append(time() - start) 34 | if num_to_keep: 35 | xs = xs[-num_to_keep:] 36 | ts = ts[-num_to_kepp:] 37 | return xs, ts 38 | 39 | 40 | def frank_wolfe(initial, update_oracle, num_steps, num_to_keep=None): 41 | """ Frank-Wolfe. 42 | 43 | Frank-Wolfe (Conditional gradient) for first-order optimization. 44 | 45 | Parameters: 46 | ----------- 47 | initial: array, 48 | initial starting point 49 | update_oracle: function, mapping points to points, 50 | computes the next iterate given the current iterate and iteration number 51 | num_steps: integer, 52 | number of steps to run the algorithm for 53 | Returns: 54 | -------- 55 | List of points computed by the algorithm and the wall clock time it took to compute them 56 | """ 57 | xs = [initial] 58 | ts = [0] 59 | start = time() 60 | for step in range(num_steps): 61 | xs.append(update_oracle(xs[-1],step)) 62 | ts.append(time() - start) 63 | if num_to_keep: 64 | xs = xs[-num_to_keep:] 65 | ts = ts[-num_to_kepp:] 66 | return xs, ts -------------------------------------------------------------------------------- /labs/ex09/template/plots.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """some functions for plots.""" 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def plot_raw_data(ratings): 9 | """plot the statistics result on raw rating data.""" 10 | # do statistics. 11 | num_items_per_user = np.count_nonzero(ratings,axis=0) 12 | num_users_per_item = np.count_nonzero(ratings,axis=1) 13 | sorted_num_movies_per_user = np.sort(num_items_per_user)[::-1] 14 | sorted_num_users_per_movie = np.sort(num_users_per_item)[::-1] 15 | 16 | # plot 17 | fig = plt.figure() 18 | ax1 = fig.add_subplot(1, 2, 1) 19 | ax1.plot(sorted_num_movies_per_user, color='blue') 20 | ax1.set_xlabel("users") 21 | ax1.set_ylabel("number of ratings (sorted)") 22 | ax1.grid() 23 | 24 | ax2 = fig.add_subplot(1, 2, 2) 25 | ax2.plot(sorted_num_users_per_movie) 26 | ax2.set_xlabel("items") 27 | ax2.set_ylabel("number of ratings (sorted)") 28 | ax2.grid() 29 | 30 | plt.tight_layout() 31 | plt.savefig("stat_ratings") 32 | plt.show() 33 | # plt.close() 34 | return num_items_per_user, num_users_per_item 35 | 36 | 37 | def plot_train_test_data(train, test): 38 | """visualize the train and test data.""" 39 | fig = plt.figure() 40 | ax1 = fig.add_subplot(1, 2, 1) 41 | ax1.spy(train, precision=0.01, markersize=0.5) 42 | ax1.set_xlabel("Users") 43 | ax1.set_ylabel("Items") 44 | ax1.set_title("Training data") 45 | ax2 = fig.add_subplot(1, 2, 2) 46 | ax2.spy(test, precision=0.01, markersize=0.5) 47 | ax2.set_xlabel("Users") 48 | ax2.set_ylabel("Items") 49 | ax2.set_title("Test data") 50 | plt.tight_layout() 51 | plt.savefig("train_test") 52 | plt.show() 53 | -------------------------------------------------------------------------------- /labs/ex10/exercise10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex10/exercise10.pdf -------------------------------------------------------------------------------- /labs/ex10/solution10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex10/solution10.pdf -------------------------------------------------------------------------------- /labs/mini-project/latex-example-paper/denoised_signal_1d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/mini-project/latex-example-paper/denoised_signal_1d.png -------------------------------------------------------------------------------- /labs/mini-project/latex-example-paper/latex-template.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/mini-project/latex-example-paper/latex-template.pdf -------------------------------------------------------------------------------- /labs/mini-project/latex-example-paper/latex-template.tex: -------------------------------------------------------------------------------- 1 | \documentclass[10pt,conference,compsocconf]{IEEEtran} 2 | 3 | \usepackage{hyperref} 4 | \usepackage{graphicx} % For figure environment 5 | 6 | 7 | \begin{document} 8 | \title{Writing Scientific Papers and Software} 9 | 10 | \author{ 11 | Cheng Soon Ong\\ 12 | \textit{Department of Computer Science, ETH Zurich, Switzerland} 13 | } 14 | 15 | \maketitle 16 | 17 | \begin{abstract} 18 | A critical part of scientific discovery is the 19 | communication of research findings to peers or the general public. 20 | Mastery of the process of scientific communication improves the 21 | visibility and impact of research. While this guide is a necessary 22 | tool for learning how to write in a manner suitable for publication 23 | at a scientific venue, it is by no means sufficient, on its own, to 24 | make its reader an accomplished writer. 25 | This guide should be a starting point for further development of 26 | writing skills. 27 | \end{abstract} 28 | 29 | \section{Introduction} 30 | 31 | The aim of writing a paper is to infect the mind of your reader with 32 | the brilliance of your idea~\cite{jones08}. 33 | The hope is that after reading your 34 | paper, the audience will be convinced to try out your idea. In other 35 | words, it is the medium to transport the idea from your head to your 36 | reader's head. 37 | In the following 38 | section, we show a common structure of scientific papers and briefly 39 | outline some tips for writing good papers in 40 | Section~\ref{sec:tips-writing}. 41 | 42 | At that 43 | point, it is important that the reader is able to reproduce your 44 | work~\cite{schwab00,wavelab,gentleman05}. This is why it is also 45 | important that if the work has a computational component, the software 46 | associated with producing the results are also made available in a 47 | useful form. Several guidelines for making your user's experience with 48 | your software as painless as possible is given in 49 | Section~\ref{sec:tips-software}. 50 | 51 | This brief guide is by no means sufficient, on its own, to 52 | make its reader an accomplished writer. The reader is urged to use the 53 | references to further improve his or her writing skills. 54 | 55 | \section{The Structure of a Paper} 56 | \label{sec:structure-paper} 57 | 58 | Scientific papers usually begin with the description of the problem, 59 | justifying why the problem is interesting. Most importantly, it argues 60 | that the problem is still unsolved, or that the current solutions are 61 | unsatisfactory. This leads to the main gist of the paper, which is 62 | ``the idea''. The authors then show evidence, using derivations or 63 | experiments, that the idea works. Since science does not occur in a 64 | vacuum, a proper comparison to the current state of the art is often 65 | part of the results. Following these ideas, papers usually have the 66 | following structure: 67 | \begin{description} 68 | \item[Abstract] \ \\ 69 | Short description of the whole paper, to help the 70 | reader decide whether to read it. 71 | \item[Introduction] \ \\ 72 | Describe your problem and state your 73 | contributions. 74 | \item[Models and Methods] \ \\ 75 | Describe your idea and how it was implemented to solve 76 | the problem. Survey the related work, giving credit where credit is 77 | due. 78 | \item[Results] \ \\ 79 | Show evidence to support your claims made in the 80 | introduction. 81 | \item[Discussion] \ \\ 82 | Discuss the strengths and weaknesses of your 83 | approach, based on the results. Point out the implications of your 84 | novel idea on the application concerned. 85 | \item[Summary] \ \\ 86 | Summarize your contributions in light of the new 87 | results. 88 | \end{description} 89 | 90 | 91 | \section{Tips for Good Writing} 92 | \label{sec:tips-writing} 93 | 94 | The ideas for good writing have come 95 | from~\cite{editor10,jones08,anderson04}. 96 | 97 | \subsection{Getting Help} 98 | One should try to get a draft read by as many friendly people as 99 | possible. And remember to treat your test readers with respect. If 100 | they are unable to understand something in your paper, then it is 101 | highly likely that your reviewers will not understand it 102 | either. Therefore, do not be defensive about the criticisms you get, 103 | but use it as an opportunity to improve the paper. Before your submit 104 | your friends to the pain of reading your draft, please \emph{use a 105 | spell checker}. 106 | 107 | \subsection{Abstract} 108 | The abstract should really be written last, along with the title of 109 | the paper. The four points that should be covered~\cite{jones08}: 110 | \begin{enumerate} 111 | \item State the problem. 112 | \item Say why it is an interesting problem. 113 | \item Say what your solution achieves. 114 | \item Say what follows from your solution. 115 | \end{enumerate} 116 | 117 | \subsection{Figures and Tables} 118 | 119 | \begin{figure}[tbp] 120 | \centering 121 | \includegraphics[width=\columnwidth]{denoised_signal_1d} 122 | \caption{Signal compression and denoising using the Fourier basis.} 123 | \vspace{-3mm} 124 | \label{fig:denoise-fourier} 125 | \end{figure} 126 | \begin{figure}[htbp] 127 | \centering 128 | \includegraphics[width=\columnwidth]{local_wdenoised_1d} 129 | \vspace{-3mm} 130 | \caption{Signal compression and denoising using the Daubechies wavelet basis.} 131 | \label{fig:denoise-wavelet} 132 | \end{figure} 133 | 134 | Use examples and illustrations to clarify ideas and results. For 135 | example, by comparing Figure~\ref{fig:denoise-fourier} and 136 | Figure~\ref{fig:denoise-wavelet}, we can see the two different 137 | situations where Fourier and wavelet basis perform well. 138 | 139 | \subsection{Models and Methods} 140 | The models and methods 141 | section should describe what was 142 | done to answer the research question, describe how it was done, 143 | justify the experimental design, and 144 | explain how the results were analyzed. 145 | 146 | The model refers to the underlying mathematical model or structure which 147 | you use to describe your problem, or that your solution is based on. 148 | The methods on the other hand, are the algorithms used to solve the problem. 149 | In some cases, the suggested method directly solves the problem, without having it 150 | stated in terms of an underlying model. Generally though it is a better practice to have 151 | the model figured out and stated clearly, rather than presenting a method without specifying 152 | the model. In this case, the method can be more easily evaluated in the task of fitting 153 | the given data to the underlying model. 154 | 155 | The methods part of this section, is not a step-by-step, directive, 156 | protocol as you might see in your lab manual, but detailed enough such 157 | that an interested reader can reproduce your 158 | work~\cite{anderson04,wavelab}. 159 | 160 | The methods section of a research paper provides the information by 161 | which a study's validity is judged. 162 | Therefore, it requires a clear and precise description of how an 163 | experiment was done, and the rationale 164 | for why specific experimental procedures were chosen. 165 | It is usually helpful to 166 | structure the methods section by~\cite{kallet04methods}: 167 | \begin{enumerate} 168 | \item Layout the model you used to describe the problem or the solution. 169 | \item Describing the algorithms used in the study, briefly including 170 | details such as hyperparameter values (e.g. thresholds), and 171 | preprocessing steps (e.g. normalizing the data to have mean value of 172 | zero). 173 | \item Explaining how the materials were prepared, for example the 174 | images used and their resolution. 175 | \item Describing the research protocol, for example which examples 176 | were used for estimating the parameters (training) and which were 177 | used for computing performance. 178 | \item Explaining how measurements were made and what 179 | calculations were performed. Do not reproduce the full source code in 180 | the paper, but explain the key steps. 181 | \end{enumerate} 182 | 183 | \subsection{Results} 184 | 185 | Organize the results section based on the sequence of table and 186 | figures you include. Prepare the tables and figures as soon as all 187 | the data are analyzed and arrange them in the sequence that best 188 | presents your findings in a logical way. A good strategy is to note, 189 | on a draft of each table or figure, the one or two key results you 190 | want to address in the text portion of the results. 191 | The information from the figures is 192 | summarized in Table~\ref{tab:fourier-wavelet}. 193 | 194 | \begin{table*}[htbp] 195 | \centering 196 | \begin{tabular}[c]{|l||l|l|l|} 197 | \hline 198 | Basis&Support&Suitable signals&Unsuitable signals\\ 199 | \hline 200 | Fourier&global&sine like&localized\\ 201 | wavelet&local&localized&sine like\\ 202 | \hline 203 | \end{tabular} 204 | \caption{Characteristics of Fourier and wavelet basis.} 205 | \label{tab:fourier-wavelet} 206 | \end{table*} 207 | 208 | When reporting computational or measurement results, always 209 | report the mean (average value) along with a measure of variability 210 | (standard deviation(s) or standard error of the mean). 211 | 212 | 213 | \section{Tips for Good Software} 214 | \label{sec:tips-software} 215 | 216 | There is a lot of literature (for example~\cite{hunt99pragmatic} and 217 | \cite{spolsky04software}) on how to write software. It is not the 218 | intention of this section to replace software engineering 219 | courses. However, in the interests of reproducible 220 | research~\cite{schwab00}, there are a few guidelines to make your 221 | reader happy: 222 | \begin{itemize} 223 | \item Have a \texttt{README} file that (at least) describes what your 224 | software does, and which commands to run to obtain results. Also 225 | mention anything special that needs to be set up, such as 226 | toolboxes\footnote{For those who are 227 | particularly interested, other common structures can be found at 228 | \url{http://en.wikipedia.org/wiki/README} and 229 | \url{http://www.gnu.org/software/womb/gnits/}.}. 230 | \item A list of authors and contributors can be included in a file 231 | called \texttt{AUTHORS}, acknowledging any help that you may have 232 | obtained. For small projects, this information is often also 233 | included in the \texttt{README}. 234 | \item Use meaningful filenames, and not \texttt{temp1.py}, 235 | \texttt{temp2.py}. 236 | \item Document your code. Each file should at least have a short 237 | description about its reason for existence. Non obvious steps in the 238 | code should be commented. Functions arguments and return values should be described. 239 | \item Describe how the results presented in your paper can be reproduced. 240 | \end{itemize} 241 | 242 | 243 | \subsection{\LaTeX{} Primer} 244 | \label{sec:latex-primer} 245 | 246 | \LaTeX{} is one of the most commonly used document preparation systems 247 | for scientific journals and conferences. It is based on the idea 248 | that authors should be able to focus on the content of what they are 249 | writing without being distracted by its visual presentation. 250 | The source of this file can be used as a starting point for how to use 251 | the different commands in \LaTeX{}. We are using an IEEE style for 252 | this course. 253 | 254 | \subsubsection{Installation} 255 | 256 | There are various different packages available for processing \LaTeX{} 257 | documents. See our webpage for more links for getting started. 258 | 259 | \subsubsection{Compiling \LaTeX{}} 260 | Your directory should contain at least~4 files, in addition to image 261 | files. Images should ideally be 262 | \texttt{.pdf} format (or \texttt{.png}). 263 | 264 | \subsubsection{Equations} 265 | 266 | There are three types of equations available: inline equations, for 267 | example $y=mx + c$, which appear in the text, unnumbered equations 268 | $$y=mx + c,$$ 269 | which are presented on a line on its own, and numbered equations 270 | \begin{equation} 271 | \label{eq:linear} 272 | y = mx + c 273 | \end{equation} 274 | which you can refer to at a later point (Equation~(\ref{eq:linear})). 275 | 276 | \subsubsection{Tables and Figures} 277 | 278 | Tables and figures are ``floating'' objects, which means that the text 279 | can flow around it. 280 | Note that \texttt{figure*} and \texttt{table*} cause the corresponding 281 | figure or table to span both columns. 282 | 283 | 284 | 285 | \section{Summary} 286 | 287 | The aim of a scientific paper is to convey the idea or discovery of 288 | the researcher to the minds of the readers. The associated software 289 | package provides the relevant details, which are often only briefly 290 | explained in the paper, such that the research can be reproduced. 291 | To write good papers, identify your key idea, make your contributions 292 | explicit, and use examples and illustrations to describe the problems 293 | and solutions. 294 | 295 | \section*{Acknowledgements} 296 | The author thanks Christian Sigg for his careful reading and helpful 297 | suggestions. 298 | 299 | \newpage 300 | \bibliographystyle{IEEEtran} 301 | \bibliography{literature} 302 | 303 | \end{document} 304 | -------------------------------------------------------------------------------- /labs/mini-project/latex-example-paper/literature.bib: -------------------------------------------------------------------------------- 1 | 2 | @Article{kallet04methods, 3 | author = {Richard H Kallet}, 4 | title = {How to Write the Methods Section of a Research Paper}, 5 | journal = {Respiratory Care}, 6 | year = 2004, 7 | volume = 49, 8 | number = 10, 9 | pages = {1229--1232} 10 | } 11 | 12 | @Unpublished{anderson04, 13 | author = {Greg Anderson}, 14 | title = {How to Write a Paper in Scientific Journal Style and Format}, 15 | year = 2004, 16 | organization = {Bates College}, 17 | note = {http://abacus.bates.edu/~ganderso/biology/resources/writing/HTWtoc.html} 18 | } 19 | 20 | @Unpublished{jones08, 21 | author = {Simon Peyton Jones}, 22 | title = {How to write a great research paper}, 23 | note = {Microsoft Research Cambridge}, 24 | year = 2008} 25 | 26 | @Article{editor10, 27 | author = {Editorial}, 28 | title = {Scientific writing 101}, 29 | journal = {Nature Structural \& Molecular Biology}, 30 | year = 2010, 31 | volume = 17, 32 | pages = 139} 33 | 34 | @TechReport{wavelab, 35 | author = {Jonathan B. Buckheit and David L. Donoho}, 36 | title = {WaveLab and Reproducible Research}, 37 | institution = {Stanford University}, 38 | year = 2009} 39 | 40 | @article{gentleman05, 41 | title = {Reproducible Research: A Bioinformatics Case Study}, 42 | author = {Gentleman, Robert}, 43 | year = {2005}, 44 | journal = {Statistical Applications in Genetics and Molecular Biology}, 45 | volume = 4, 46 | number = 1, 47 | publisher = {The Berkeley Electronic Press}, 48 | url = {http://www.bepress.com/sagmb/vol4/iss1/art2} 49 | } 50 | 51 | @article{schwab00, 52 | author = {Schwab, Matthias and Karrenbach, Martin and Claerbout, Jon}, 53 | title = {Making scientific computations reproducible}, 54 | journal = {Computing in Science and Engg.}, 55 | volume = {2}, 56 | number = {6}, 57 | year = {2000}, 58 | issn = {1521-9615}, 59 | pages = {61--67}, 60 | doi = {http://dx.doi.org/10.1109/5992.881708}, 61 | publisher = {IEEE Educational Activities Department}, 62 | address = {Piscataway, NJ, USA}, 63 | } 64 | 65 | 66 | 67 | @Book{spolsky04software, 68 | author = {Joel Spolsky}, 69 | title = {Joel on Software: And on Diverse \& Occasionally Related Matters That Will Prove of Interest etc..: And on Diverse and Occasionally Related Matters ... or Ill-Luck, Work with Them in Some Capacity}, 70 | publisher = {APRESS}, 71 | year = 2004} 72 | 73 | @Book{hunt99pragmatic, 74 | author = {Andrew Hunt and David Thomas}, 75 | title = {The Pragmatic Programmer}, 76 | publisher = {Addison Wesley}, 77 | year = 1999} 78 | 79 | -------------------------------------------------------------------------------- /labs/mini-project/latex-example-paper/local_wdenoised_1d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/mini-project/latex-example-paper/local_wdenoised_1d.png -------------------------------------------------------------------------------- /labs/mini-project/miniproject_description.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/mini-project/miniproject_description.pdf -------------------------------------------------------------------------------- /lecture_notes/lecture-notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/lecture_notes/lecture-notes.pdf -------------------------------------------------------------------------------- /slides/lecture01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture01.pdf -------------------------------------------------------------------------------- /slides/lecture02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture02.pdf -------------------------------------------------------------------------------- /slides/lecture03.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture03.pdf -------------------------------------------------------------------------------- /slides/lecture04.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture04.pdf -------------------------------------------------------------------------------- /slides/lecture05.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture05.pdf -------------------------------------------------------------------------------- /slides/lecture06.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture06.pdf -------------------------------------------------------------------------------- /slides/lecture07.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture07.pdf -------------------------------------------------------------------------------- /slides/lecture08.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture08.pdf -------------------------------------------------------------------------------- /slides/lecture09.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture09.pdf -------------------------------------------------------------------------------- /slides/lecture10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture10.pdf -------------------------------------------------------------------------------- /slides/lecture11.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture11.pdf -------------------------------------------------------------------------------- /slides/lecture12.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture12.pdf --------------------------------------------------------------------------------