├── .gitignore
├── README.md
├── exams
    ├── exam2018.pdf
    ├── exam2018solutions.pdf
    ├── exam2019.pdf
    ├── exam2019solutions.pdf
    ├── exam2020.pdf
    ├── exam2020solutions.pdf
    ├── exam2021.pdf
    ├── exam2021solutions.pdf
    ├── exam2022.pdf
    ├── exam2022solutions.pdf
    ├── exam2023.pdf
    ├── exam2023solutions.pdf
    ├── exam2024.pdf
    └── exam2024solutions.pdf
├── labs
    ├── ex00
    │   ├── exercise00.pdf
    │   ├── npprimer.ipynb
    │   └── python_setup_tutorial.md
    ├── ex01
    │   ├── exercise01.pdf
    │   └── solutions01.pdf
    ├── ex02
    │   ├── exercise02.pdf
    │   ├── solution
    │   │   ├── Concrete_Data.csv
    │   │   ├── Lab 2 - Gradient Descent.ipynb
    │   │   ├── grid_search.py
    │   │   ├── height_weight_genders.csv
    │   │   ├── helpers.py
    │   │   └── plots.py
    │   ├── solutions02.pdf
    │   └── template
    │   │   ├── Concrete_Data.csv
    │   │   ├── Lab 2 - Gradient Descent.ipynb
    │   │   ├── grid_search.py
    │   │   ├── height_weight_genders.csv
    │   │   ├── helpers.py
    │   │   └── plots.py
    ├── ex03
    │   ├── exercise03.pdf
    │   ├── solution
    │   │   └── notebook.ipynb
    │   ├── solution03.pdf
    │   └── template
    │   │   └── notebook.ipynb
    ├── ex04
    │   ├── exercise04.pdf
    │   ├── solution
    │   │   ├── Lab 4 - Random Walks.ipynb
    │   │   ├── helpers.py
    │   │   ├── solution_lab04.ipynb
    │   │   └── torus_topology.png
    │   ├── solution04.pdf
    │   └── template
    │   │   ├── Lab 4 - Random Walks--alternative.ipynb
    │   │   ├── helpers.py
    │   │   ├── notebook_lab04.ipynb
    │   │   └── torus_topology.png
    ├── ex05
    │   ├── exercise05.pdf
    │   ├── solution
    │   │   ├── Concrete_Data.csv
    │   │   ├── Lab 5 - Stochastic Gradient Descent.ipynb
    │   │   ├── gradient_descent.py
    │   │   ├── grid_search.py
    │   │   ├── height_weight_genders.csv
    │   │   ├── helpers.py
    │   │   └── plots.py
    │   ├── solution05.pdf
    │   └── template
    │   │   ├── Concrete_Data.csv
    │   │   ├── Lab 5 - Stochastic Gradient Descent.ipynb
    │   │   ├── gradient_descent.py
    │   │   ├── grid_search.py
    │   │   ├── height_weight_genders.csv
    │   │   ├── helpers.py
    │   │   └── plots.py
    ├── ex06
    │   ├── exercise06.pdf
    │   ├── solution
    │   │   ├── Lab 6.ipynb
    │   │   ├── accelerated_mixing.ipynb
    │   │   ├── helper.py
    │   │   └── mixing_helpers.py
    │   ├── solution06.pdf
    │   └── template
    │   │   ├── Lab_6.ipynb
    │   │   └── helper.py
    ├── ex07
    │   ├── exercise07.pdf
    │   ├── solution
    │   │   └── Lab 7 - Fixed Point with Newton.ipynb
    │   ├── solution07.pdf
    │   └── template
    │   │   └── Lab 7 - Fixed Point with Newton.ipynb
    ├── ex08
    │   ├── exercise08.pdf
    │   ├── solution
    │   │   ├── Coordinate_descent.ipynb
    │   │   ├── data
    │   │   │   └── w1a
    │   │   └── solution-svm-derivation.pdf
    │   ├── solution08.pdf
    │   └── template
    │   │   ├── Lab_8.ipynb
    │   │   └── data
    │   │       └── w1a
    ├── ex09
    │   ├── exercise09.pdf
    │   ├── solution
    │   │   ├── dataset_generation_NOT_NECESSARY_FOR_STUDENTS
    │   │   │   ├── generate_csv.py
    │   │   │   └── movielens100k.mat
    │   │   ├── ex09-MatrixCompletion.ipynb
    │   │   ├── helpers.py
    │   │   ├── movielens100k.csv
    │   │   ├── optimizers.py
    │   │   ├── plots.py
    │   │   ├── stat_ratings.png
    │   │   └── train_test.png
    │   ├── solution09.pdf
    │   └── template
    │   │   ├── ex09-MatrixCompletion.ipynb
    │   │   ├── helpers.py
    │   │   ├── movielens100k.csv
    │   │   ├── optimizers.py
    │   │   └── plots.py
    ├── ex10
    │   ├── exercise10.pdf
    │   └── solution10.pdf
    └── mini-project
    │   ├── latex-example-paper
    │       ├── IEEEtran.cls
    │       ├── denoised_signal_1d.png
    │       ├── latex-template.pdf
    │       ├── latex-template.tex
    │       ├── literature.bib
    │       └── local_wdenoised_1d.png
    │   └── miniproject_description.pdf
├── lecture_notes
    └── lecture-notes.pdf
└── slides
    ├── lecture01.pdf
    ├── lecture02.pdf
    ├── lecture03.pdf
    ├── lecture04.pdf
    ├── lecture05.pdf
    ├── lecture06.pdf
    ├── lecture07.pdf
    ├── lecture08.pdf
    ├── lecture09.pdf
    ├── lecture10.pdf
    ├── lecture11.pdf
    └── lecture12.pdf


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | .DS_Store
104 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # EPFL Course - Optimization for Machine Learning - CS-439
 2 | 
 3 | [Official coursebook information](http://edu.epfl.ch/coursebook/en/optimization-for-machine-learning-CS-439)
 4 | 
 5 | `Lectures:` Fri 13:15-15:00 in [CO2](https://plan.epfl.ch/?room==CO%202)
 6 | 
 7 | `Exercises:` Fri 15:15-17:00 in [BC01](https://plan.epfl.ch/?room==BC%2001)
 8 | 
 9 | This course teaches an overview of modern mathematical optimization methods, for applications in machine learning and data science. In particular, scalability of algorithms to large datasets will be discussed in theory and in implementation.
10 | 
11 | ### Team
12 |  - Instructors: 
13 |    - Nicolas Flammarion [nicolas.flammarion@epfl.ch](mailto:nicolas.flammarion@epfl.ch)
14 |  - Assistants:
15 |    - Aditya Varre [aditya.varre@epfl.ch](mailto:aditya.varre@epfl.ch)
16 |    - Oguz Kaan Yüksel [oguz.yuksel@epfl.ch](mailto:oguz.yuksel@epfl.ch)
17 |    - Thomas Weinberger [thomas.weinberger@epfl.ch](mailto:thomas.weinberger@epfl.ch)
18 |    - Yitao Xu [yitao.xu@epfl.ch](mailto:yitao.xu@epfl.ch)
19 | 
20 |  
21 | 
22 |    
23 | `Contents:`
24 | 
25 | Convexity, Gradient Methods, Proximal algorithms, Subgradient Methods, Stochastic and Online Variants of mentioned methods, Coordinate Descent, Frank-Wolfe, Accelerated Methods, Primal-Dual context and certificates, Lagrange and Fenchel Duality, Second-Order Methods including Quasi-Newton Methods, Derivative-Free Optimization.
26 | 
27 | *Advanced Contents:*
28 | 
29 | Parallel and Distributed Optimization Algorithms, Federated Learning
30 | 
31 | Computational Trade-Offs (Time vs Data vs Accuracy), Lower Bounds
32 | 
33 | Non-Convex Optimization: Convergence to Critical Points, Alternating minimization, Neural network training
34 | 
35 | ### Program:
36 | | Nr | Date  | Topic                                                 | Materials                                                                                                  | Exercises                             |
37 | | -- | ----- | ----------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | ------------------------------------- |
38 | | 1  | 21.2. | Introduction, Convexity                               |  [slides](../../raw/master/slides/lecture01.pdf)| [lab00](../../raw/master/labs/ex00/exercise00.pdf)|
39 | | 2  | 28.2.  | Gradient Descent                                      |  [slides](../../raw/master/slides/lecture02.pdf)| [lab01](../../raw/master/labs/ex01/exercise01.pdf) |
40 | | 3  | 7.3.  | Projected Gradient Descent                            |   [slides](../../raw/master/slides/lecture03.pdf) | [lab02](../../raw/master/labs/ex02/exercise02.pdf)|
41 | | 4  | 14.3. | Proximal and Subgradient Descent                      |  [slides](../../raw/master/slides/lecture04.pdf) |  [lab03](../../raw/master/labs/ex03/exercise03.pdf) |
42 | | 5  | 21.3. | Stochastic Gradient Descent, Non-Convex Optimization  |  [slides](../../raw/master/slides/lecture05.pdf)  | [lab04](../../raw/master/labs/ex04/exercise04.pdf) |
43 | | 6  | 28.3. | Non-Convex Optimization                               | [slides](../../raw/master/slides/lecture06.pdf)  | [lab05](../../raw/master/labs/ex05/exercise05.pdf) |
44 | | 7  | 4.4. | Newton's Method & Quasi-Newton                        | [slides](../../raw/master/slides/lecture07.pdf)  | [lab06](../../raw/master/labs/ex06/exercise06.pdf) |
45 | | 8  | 11.4. | Coordinate Descent                                    |  [slides](../../raw/master/slides/lecture08.pdf)  | lab07 |
46 | | .  | 18.4. | `easter vacation`                                     |                                                                                                            | -                                     |
47 | | .  | 25.4.  | `easter vacation`                                     |                                                                                                            | -                                     |
48 | | 9  |  2.5. | Frank-Wolfe                                           |  [slides](../../raw/master/slides/lecture09.pdf)  | [lab08](../../raw/master/labs/ex08/exercise08.pdf) |
49 | | 10 | 9.5. | Lower Bounds and Accelerated Gradient Descent |   [slides](../../raw/master/slides/lecture10.pdf)    | [lab09](../../raw/master/labs/ex09/exercise09.pdf) |
50 | | 11 | 16.5.  |  Gradient free and adaptive methods                                |    [slides](../../raw/master/slides/lecture11.pdf)                                                | [lab10](../../raw/master/labs/ex10/exercise10.pdf) | 
51 | | 12 | 23.5.  | Optimization for Large Language Models (LLMs) (Guest lecture from the SwissAI LLMs team)                                |  [slides](../../raw/master/slides/lecture12.pdf)                                                      | Q&A Projects                          |
52 | | 13 | 30.5. | `Mini-Project week`                                   |                                                                                                            | -                                     |
53 | 
54 | ### Lecture Notes:
55 | The course is based on the following [lecture notes](../../raw/master/lecture_notes/lecture-notes.pdf).
56 | 
57 | ### Videos:
58 | The [videos](https://mediaspace.epfl.ch/channel/CS-439+Optimization+for+machine+learning/31980) of the lectures for each week will be available.
59 | 
60 | ### Exercises:
61 | The [weekly exercises](../../tree/master/labs/) consist of a mix of theoretical and practical `Python` exercises for the corresponding topic each week (starting week 2). Solutions to exercises are available in the lab folder.
62 | 
63 | ### Forum:
64 | [Discussion forum](https://edstem.org/eu/courses/2015/discussion/) (EPFL internal)
65 | 
66 | ### Project:
67 | A `mini-project` will focus on the practical implementation: Here we encourage students to investigate the real-world performance of one of the studied optimization algorithms or variants, helping to provide solid empirical evidence for some behaviour aspects on a real machine-learning task. The project is mandatory and done in groups of 3 students. It will count 30% to the final grade. Project reports (3 page PDF) are due June 13th. Here is a [detailed project description](../../raw/master/labs/mini-project/miniproject_description.pdf).
68 | 
69 | ### Assessment:
70 | Session Exam. Format: Closed book. Theoretical questions similar to exercises. You are allowed to bring one cheat sheet (A4 size paper, both sides can be used).
71 | 
72 | For practice: 
73 | - exams [2023](../../raw/master/exams/exam2023.pdf), [2022](../../raw/master/exams/exam2022.pdf), [2021](../../raw/master/exams/exam2021.pdf), [2020](../../raw/master/exams/exam2020.pdf), [2019](../../raw/master/exams/exam2019.pdf), [2018](../../raw/master/exams/exam2018.pdf)
74 | - solutions [2023](../../raw/master/exams/exam2023solutions.pdf), [2022](../../raw/master/exams/exam2022solutions.pdf), [2021](../../raw/master/exams/exam2021solutions.pdf), [2020](../../raw/master/exams/exam2020solutions.pdf), [2019](../../raw/master/exams/exam2019solutions.pdf), [2018](../../raw/master/exams/exam2018solutions.pdf).
75 | 
76 | ### Links to related courses and materials 
77 |  - [CMU 10-725](https://www.stat.cmu.edu/~ryantibs/convexopt-F18/)
78 |  - [Berkeley EE-227C](https://ee227c.github.io/)
79 |  
80 | ### Recommended Books
81 |  - [Convex Optimization: Algorithms and Complexity](https://arxiv.org/pdf/1405.4980.pdf), by Sébastien Bubeck (free online)
82 |  - [Convex Optimization](http://stanford.edu/~boyd/cvxbook/), Stephen Boyd and Lieven Vandenberghe (free online)
83 |  - [Introductory Lectures on Convex Optimization](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.693.855&rep=rep1&type=pdf), Yurii Nesterov (free online)
84 | 


--------------------------------------------------------------------------------
/exams/exam2018.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2018.pdf


--------------------------------------------------------------------------------
/exams/exam2018solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2018solutions.pdf


--------------------------------------------------------------------------------
/exams/exam2019.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2019.pdf


--------------------------------------------------------------------------------
/exams/exam2019solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2019solutions.pdf


--------------------------------------------------------------------------------
/exams/exam2020.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2020.pdf


--------------------------------------------------------------------------------
/exams/exam2020solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2020solutions.pdf


--------------------------------------------------------------------------------
/exams/exam2021.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2021.pdf


--------------------------------------------------------------------------------
/exams/exam2021solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2021solutions.pdf


--------------------------------------------------------------------------------
/exams/exam2022.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2022.pdf


--------------------------------------------------------------------------------
/exams/exam2022solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2022solutions.pdf


--------------------------------------------------------------------------------
/exams/exam2023.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2023.pdf


--------------------------------------------------------------------------------
/exams/exam2023solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2023solutions.pdf


--------------------------------------------------------------------------------
/exams/exam2024.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2024.pdf


--------------------------------------------------------------------------------
/exams/exam2024solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/exams/exam2024solutions.pdf


--------------------------------------------------------------------------------
/labs/ex00/exercise00.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex00/exercise00.pdf


--------------------------------------------------------------------------------
/labs/ex00/python_setup_tutorial.md:
--------------------------------------------------------------------------------
  1 | # Setup Guide for Coding Machine Learning and Optimization Methods - EPFL ML and OptML Courses
  2 | 
  3 | In order to implement the algorithms seen in class and work on the projects, we'll be using Python notebooks. This first lab will serve as an introduction to the Python language, the environment we are going to be using, and how to do basic vector and matrix manipulations.
  4 | 
  5 | ## The environment
  6 | 
  7 | We recommend two easy-to-use online environments (EPFL Noto or Google Colab) or a local installation (Anaconda).
  8 | 
  9 | ### Online: EPFL Noto & Google Colab
 10 | #### EPFL Noto
 11 | Called [Noto](https://www.epfl.ch/education/educational-initiatives/cede/digitaltools/jupyter-notebooks-for-education/), EPFL’s JupyterLab centralized platform allows teachers and students to use Jupyter (Python) notebooks without having to install anything on their computer: they can easily access, modify and run notebooks online with a simple web browser. EPFL Noto files are automatically saved on your EPFL account and remain available there.
 12 | 
 13 | [Click here for an automatic setup for CS-439 on your EPFL Noto and start with the introduction exercise `npprimer.ipynb`.](https://noto.epfl.ch/hub/user-redirect/git-pull?repo=https://github.com/epfml/OptML_course&urlpath=lab/tree/OptML_course/labs/ex00/npprimer.ipynb)
 14 | 
 15 | To get the latest content from github (when it is updated):
 16 | 1. open a terminal window in Noto (via '+' sign)
 17 | 2. `cd` to `OptML_course`
 18 | 3. add and commit any changes you have made locally (an alternative is to stash your changes using 'git stash' if you don't want to keep the changes you've made)
 19 | 4. execute `git pull`
 20 | 
 21 | After opening the terminal (step 1):
 22 | ```bash
 23 | cd
 24 | cd OptML_course
 25 | git add *
 26 | git commit -m "your custom message about these changes"
 27 | git pull
 28 | ```
 29 | 
 30 | #### Google Colab
 31 | 
 32 | Google colab provides a similar environment to Noto, with additional access to GPUs (not needed in the first few labs). Note that you need to take care of storing the files permanently yourself (storing on Google Drive, downloading to a local machine, ...).
 33 | 
 34 | You can open any exercise by adapting `XY` with the lab number and `PATH_TO_FILE` with the path of the notebook you wish to open:  
 35 | `http://colab.research.google.com/github/epfml/OptML_course/blob/master/labs/exXY/PATH_TO_FILE`
 36 | 
 37 | E.g. for the numpy introduction `npprimer.ipynb`:  
 38 | [`http://colab.research.google.com/github/epfml/OptML_course/blob/master/labs/ex00/npprimer.ipynb`](http://colab.research.google.com/github/epfml/OptML_course/blob/master/labs/ex00/npprimer.ipynb)
 39 | 
 40 | You can also create an empty notebook by following this [link](https://colab.research.google.com/) and clicking `"NEW NOTEBOOK"`, or you can open a pre-existing notebook (.ipynb extension) by selecting the `Upload` tab.
 41 | 
 42 | If for some reason you've opened a python2 notebook, you can switch to python3 by going in `Runtime > Change runtime type`. There you can also add a GPU to your notebook if necessary.
 43 | 
 44 | ### Offline: Python distribution Anaconda
 45 | 
 46 | If you prefer to have an environment locally on your computer, you can use the [Anaconda](https://www.anaconda.com/) distribution to run Python 3, as it is easy to install and comes with most packages we will need. To install Anaconda, go to [the download page](https://www.anaconda.com/distribution/) and get the Python installer for your OS - make sure to use the newer version 3.x, not 2.x. Follow the instructions of the installer and you're done.
 47 | > **Warning!** The installer will ask you if you want to add Anaconda to your path. Your default answer should be yes, unless you have specific reasons not to want this.
 48 | 
 49 | 
 50 | ### Development Environment
 51 | 
 52 | During the course, we will use [**Jupyter Notebooks**](http://jupyter.org/), which is a great tool for exploratory and interactive programming and in particular for data analysis. Notebooks are browser-based, and you start a notebook on your localhost by typing `jupyter notebook` in the console. Notebooks are already available by default by Anaconda. The interface is pretty intuitive, but there are a few tweaks and shortcuts that will make your life easier, which we'll detail in the next section. You can of course ask any of the TAs for help on using the Notebooks.
 53 | 
 54 | ### The Notebook System
 55 | 
 56 | For additional resources on how the notebook system works, we recommend
 57 | 
 58 | * [The Jupyter notebook beginner's guide](https://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/index.html)
 59 | * [The official documentation](http://jupyter-notebook.readthedocs.io/en/latest/index.html)
 60 | 
 61 | #### Examples
 62 | 
 63 | We provide you with an example of a notebook for [this first lab](https://github.com/epfml/OptML_course/tree/master/labs/ex00), but if you want to see some more examples already, feel free to take a look at
 64 | 
 65 | * The introductory notebooks available at [Try Jupyter](https://try.jupyter.org/). It spawns an instance of the Jupyter Notebook, which won't save any of your changes.
 66 |   *Note: it might not be available if their server is under too much load.*
 67 | * [A gallery of interesting IPython Notebooks](https://github.com/jupyter/jupyter/wiki/A-gallery-of-interesting-Jupyter-Notebooks) by the Ipython Notebook team
 68 | 
 69 | #### Tips & Tricks
 70 | 
 71 | There are a few handy commands that you should start every notebook with
 72 | 
 73 | 
 74 | 	# Plot figures in the notebook (instead of a new window)
 75 | 	%matplotlib notebook
 76 | 
 77 | 	# Automatically reload modules
 78 | 	%load_ext autoreload
 79 | 	%autoreload 2
 80 | 
 81 | 	# The usual imports
 82 | 	import matplotlib.pyplot as plt
 83 | 	import numpy as np
 84 | 	import pandas as pd
 85 | 
 86 | #### Keyboard shortcuts
 87 | 
 88 | * Adding cells
 89 | 	* `a` adds an empty cell above the selected one,
 90 | 	* `b` adds it below.
 91 | * Running code
 92 | 	* `Enter` enters the edition mode of the currently selected cell.
 93 | 	* `Shift-Enter` runs the current cell and goes to the next one.
 94 | 	* `Ctrl-Enter` runs the current cell and leave it selected.
 95 | * Autocompletion (Jupyter notebook)
 96 |   * `Tab` pops up the Autocompletion when you are in the middle of writing a function call/class name and shows the arguments of the function being called when used after an opening parenthesis.
 97 |   * `Shift-Tab` pops up the help/documentation of the function its used on
 98 | * Autocompletion (Google Colab)
 99 |   * `Ctrl-Space` pops up the Autocompletion when you are in the middle of writing a function call/class name and shows the arguments of the function being called when used after an opening parenthesis.
100 |   * Clicking on a function name and hovering over it will pop up the help/documentation for that function.
101 | 
102 | * For a complete list of shortcuts, go to `help > keyboard shortcuts`
103 | 
104 | ## Python
105 | 
106 | We will be working in Python. If you already have been introduced to Python, feel free to skip this section. If you come from another background, you might want to take some tutorials in addition to this lab in the next week to feel comfortable with it. You do not need to become an expert in Python, but you should be comfortable with the general syntax, some of the idiosyncrasies of Python and know how to do basic vector and matrix algebra. For the last part, we will be using NumPy, a library we will introduce later.
107 | 
108 | For a nice introduction to Python, you should take a look at [the Python tutorial](https://docs.python.org/3/tutorial/index.html). Here are some reading recommendations:
109 | 
110 | * Skim through Sections 1-3 to get an idea of the Python syntax if you never used it
111 | * Pay a little more attention to Section 4, especially
112 | 
113 | 	* Section 4.2 on for loops, as they behave like `foreach` by default, which may be disturbing if you are more accustomed to coding in lower level languages.
114 | 	* Section 4.7 on functions, default argument values and named arguments, as they are a real pleasure to use (compared to traditional, order-based arguments) once you are used to it.
115 | * Section 5 on Data Structures, especially how to use Lists, Dictionnaries and Tuples if you have not used a language with those concepts before
116 | * You can keep Sections 6-9 on Modules, IO, Exceptions and Objects for later - when you know you will be needing it.
117 | * Section 10 on the standard library and [the standard library index](https://docs.python.org/3/library/index.html) are worth a quick scroll to see what's available.
118 | * Do not bother with Sections 11-16 for now.
119 | 
120 | Here are some additional resources on Python:
121 | 
122 | * [Python's standard library reference](https://docs.python.org/3/library/index.html)
123 | * [Debugging and profiling](https://docs.python.org/3/library/debug.html)
124 | * If you want to, some exercises are available at [learnpython.org](http://www.learnpython.org/)
125 | 
126 | 
127 | ## NumPy and Vector Calculations
128 | 
129 | Our `npprimer.ipynb` notebook as part of the first lab has some useful commands and exercises to help you get started with NumPy.
130 | 
131 | We recommend [this list of small exercises](https://www.machinelearningplus.com/101-numpy-exercises-python/) to get started with NumPy arrays etc.
132 | 
133 | If you are familiar with Matlab, a good starting point is [this guide](https://docs.scipy.org/doc/numpy/user/numpy-for-matlab-users.html). Be careful that we will use way more the `array` data structure compared to the `matrix` data structure.
134 | 
135 | A good and probably more complete reference is [this one](https://sites.engineering.ucsb.edu/~shell/che210d/numpy.pdf).
136 | 
137 | 
138 | ### Installation FAQ
139 | 
140 | > **Other shell.** If you are using another shell (e.g. zsh on Mac OSX), after installing Anaconda you still need to add the installed software to your path, that is to add it to the correct profile of your shell. To do so, run the following commands in your terminal ` touch ~/.bash_profile; open ~/.bash_profile`. It will open your bash profile where you'll see the line that was added by the Python installer. Copy it. Then ` touch ~/.zshrc; open ~/.zshrc`, that will open the profile for zsh, you can paste the line at the bottom of the file.
141 | 
142 | > **Alternative Python IDEs.** While we recommend plain Jupyter Notebooks, if you are more comfortable using a more traditional IDE, you can give [**PyCharm**](https://www.jetbrains.com/pycharm/) a try. Your EPFL email gives you access to the free educational version. You should keep this option in mind if you need a full fledged debugger to find a nasty bug.
143 | 
144 | And of course, as a third alternative, you can always use a [decent text editor](https://www.sublimetext.com/) and run your code from the console or any plugin. Keep in mind that the TAs might not be able to help you with your setup if you go down this path.
145 | 
146 | ## Download the exercises content & basic Git tutorial
147 | 
148 | ### Simplest: no git
149 | 
150 | You can click on the green `code` button on the main [page](https://github.com/epfml/OptML_course) and select `Download ZIP`. We advise against this method as you might have to re-download the repository every time some new content is posted there.
151 | 
152 | ### Still simple: using GitHub Desktop
153 | 
154 | GitHub Desktop simplifies the interaction with a GitHub repository by providing a simple GUI, check it out [here](https://desktop.github.com/). GitHub Desktop supports most 'real' `git` usecases such as the ones described below.  
155 | 
156 | ### More advanced: Git via command line
157 | 
158 | `Git` is the most widely used version control system. It's a tool to share and help you collaboratively develop and maintain code. GitHub is a Git repository hosting service, it allows you to create Github repositories you can interact with using `git`.
159 | 
160 | `Git` is typically used via the terminal. To install Git, follow this [link](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git).
161 | 
162 | **Download repository.** Once Git is installed you can pull a github repository using: `git clone <url.git>`, e.g. `git clone https://github.com/epfml/OptML_course.git`.
163 | 
164 | **Collaborative coding.** A standard workflow when working as a group is to implement features through pull-requests (PR):
165 | * You do not want to break the master branch by mistake, so you start by creating and moving to a new branch: `git checkout -b <name-of-my-new-branch>`
166 | * Now you're safe on your new branch, the modifications you're making won't affect the master branch. You can modify/create new files as if you were on the master branch e.g.
167 | 
168 | ```bash
169 | # let's say we modify file.py here
170 | git status # check the status of the files git is tracking
171 | git add file.py
172 | git commit -m "some message clearly explaining the modification"
173 | ```
174 | * Once you are done doing all the modifications you want you can push to your new branch: `git push origin <name-of-my-new-branch>`.
175 | * Finally you can open a PR from the GitHub user interface. Typically you would ask your colleagues to review your PR and accept it or ask for modifications.
176 | * Once your PR is accepted and merged, do not forget to switch back to master: `git checkout master` and pull your approved changes `git pull origin master`.
177 | 
178 | ## Additional References
179 | 
180 | [A good Python and NumPy Tutorial from Stanford.](https://github.com/kuleshov/cs228-material/blob/master/tutorials/python/cs228-python-tutorial.ipynb)
181 | 


--------------------------------------------------------------------------------
/labs/ex01/exercise01.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex01/exercise01.pdf


--------------------------------------------------------------------------------
/labs/ex01/solutions01.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex01/solutions01.pdf


--------------------------------------------------------------------------------
/labs/ex02/exercise02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex02/exercise02.pdf


--------------------------------------------------------------------------------
/labs/ex02/solution/grid_search.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Exercise 2.
 3 | 
 4 | Grid Search
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | def generate_w(num_intervals):
11 |     """Generate a grid of values for w0 and w1."""
12 |     w0 = np.linspace(-100, 200, num_intervals)
13 |     w1 = np.linspace(-150, 150, num_intervals)
14 |     return w0, w1
15 | 
16 | 
17 | def grid_search(y, tx, w0, w1):
18 |     """Algorithm for grid search."""
19 |     losses = np.zeros((len(w0), len(w1)))
20 |     # compute loss for each combination of w0 and w1.
21 |     for ind_row, row in enumerate(w0):
22 |         for ind_col, col in enumerate(w1):
23 |             w = np.array([row, col])
24 |             e = y - tx.dot(w)
25 |             losses[ind_row, ind_col] = 1/2*np.mean(e**2)
26 |     return losses
27 | 
28 | 
29 | def get_best_parameters(w0, w1, losses):
30 |     """Get the best w from the result of grid search."""
31 |     min_row, min_col = np.unravel_index(np.argmin(losses), losses.shape)
32 |     return losses[min_row, min_col], w0[min_row], w1[min_col]
33 | 


--------------------------------------------------------------------------------
/labs/ex02/solution/helpers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """some helper functions."""
 3 | import numpy as np
 4 | 
 5 | 
 6 | def load_data(sub_sample=True, add_outlier=False):
 7 |     """Load data and convert it to the metric system."""
 8 |     path_dataset = "height_weight_genders.csv"
 9 |     data = np.genfromtxt(
10 |         path_dataset, delimiter=",", skip_header=1, usecols=[1, 2])
11 |     height = data[:, 0]
12 |     weight = data[:, 1]
13 |     gender = np.genfromtxt(
14 |         path_dataset, delimiter=",", skip_header=1, usecols=[0],
15 |         converters={0: lambda x: 0 if b"Male" in x else 1})
16 |     # Convert to metric system
17 |     height *= 0.025
18 |     weight *= 0.454
19 | 
20 |     # sub-sample
21 |     if sub_sample:
22 |         height = height[::50]
23 |         weight = weight[::50]
24 | 
25 |     if add_outlier:
26 |         # outlier experiment
27 |         height = np.concatenate([height, [1.1, 1.2]])
28 |         weight = np.concatenate([weight, [51.5/0.454, 55.3/0.454]])
29 | 
30 |     return height, weight, gender
31 | 
32 | 
33 | def standardize(x):
34 |     """Standardize the original data set."""
35 |     mean_x = np.mean(x,axis = 0)
36 |     x = x - mean_x
37 |     std_x = np.std(x, axis = 0)
38 |     x = x / std_x
39 |     return x, mean_x, std_x
40 | 
41 | 
42 | def build_model_data(height, weight):
43 |     """Form (y,tX) to get regression data in matrix form."""
44 |     y = weight
45 |     x = height
46 |     num_samples = len(y)
47 |     tx = np.c_[np.ones(num_samples), x]
48 |     return y, tx
49 | 
50 | 


--------------------------------------------------------------------------------
/labs/ex02/solution/plots.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """function for plot."""
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from grid_search import get_best_parameters
 6 | 
 7 | 
 8 | def prediction(w0, w1, mean_x, std_x):
 9 |     """Get the regression line from the model."""
10 |     x = np.arange(1.2, 2, 0.01)
11 |     x_normalized = (x - mean_x) / std_x
12 |     return x, w0 + w1 * x_normalized
13 | 
14 | 
15 | def base_visualization(grid_losses, w0_list, w1_list,
16 |                        mean_x, std_x, height, weight):
17 |     """Base Visualization for both models."""
18 |     w0, w1 = np.meshgrid(w0_list, w1_list)
19 | 
20 |     fig = plt.figure()
21 | 
22 |     # plot contourf
23 |     ax1 = fig.add_subplot(1, 2, 1)
24 |     cp = ax1.contourf(w0, w1, grid_losses.T, cmap=plt.cm.jet)
25 |     fig.colorbar(cp, ax=ax1)
26 |     ax1.set_xlabel(r'$w_0$')
27 |     ax1.set_ylabel(r'$w_1$')
28 |     # put a marker at the minimum
29 |     loss_star, w0_star, w1_star = get_best_parameters(
30 |         w0_list, w1_list, grid_losses)
31 |     ax1.plot(w0_star, w1_star, marker='*', color='r', markersize=20)
32 | 
33 |     
34 | 
35 |     return fig
36 | 
37 | 
38 | def grid_visualization(grid_losses, w0_list, w1_list,
39 |                        mean_x, std_x, height, weight):
40 |     """Visualize how the trained model looks like under the grid search."""
41 |     fig = base_visualization(
42 |         grid_losses, w0_list, w1_list, mean_x, std_x, height, weight)
43 | 
44 |     loss_star, w0_star, w1_star = get_best_parameters(
45 |         w0_list, w1_list, grid_losses)
46 |     # plot prediciton
47 |     x, f = prediction(w0_star, w1_star, mean_x, std_x)
48 |     ax2 = fig.get_axes()[2]
49 |     ax2.plot(x, f, 'r')
50 | 
51 |     return fig
52 | 
53 | 
54 | def gradient_descent_visualization(
55 |         gradient_losses, gradient_ws,
56 |         grid_losses, grid_w0, grid_w1,
57 |         mean_x, std_x, height, weight, n_iter=None):
58 |     """Visualize how the loss value changes until n_iter."""
59 |     fig = base_visualization(
60 |         grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight)
61 | 
62 |     ws_to_be_plotted = np.stack(gradient_ws)
63 |     if n_iter is not None:
64 |         ws_to_be_plotted = ws_to_be_plotted[:n_iter]
65 | 
66 |     ax1 = fig.get_axes()[0]
67 |     ax1.plot(
68 |         ws_to_be_plotted[:, 0], ws_to_be_plotted[:, 1],
69 |         marker='o', color='w', markersize=10)
70 |     pred_x, pred_y = prediction(
71 |         ws_to_be_plotted[-1, 0], ws_to_be_plotted[-1, 1],
72 |         mean_x, std_x)
73 |     
74 |     return fig
75 | 


--------------------------------------------------------------------------------
/labs/ex02/solutions02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex02/solutions02.pdf


--------------------------------------------------------------------------------
/labs/ex02/template/Lab 2 - Gradient Descent.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# Useful starting lines\n",
 10 |     "%matplotlib inline\n",
 11 |     "import numpy as np\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "%load_ext autoreload\n",
 14 |     "%autoreload 2"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Check the Python version\n",
 24 |     "import sys\n",
 25 |     "if sys.version.startswith(\"3.\"):\n",
 26 |     "  print(\"You are running Python 3. Good job :)\")\n",
 27 |     "else:\n",
 28 |     "  print(\"This notebook requires Python 3.\\nIf you are using Google Colab, go to Runtime > Change runtime type and choose Python 3.\")"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "try:\n",
 38 |     "  import google.colab\n",
 39 |     "  IN_COLAB = True\n",
 40 |     "except:\n",
 41 |     "  IN_COLAB = False\n",
 42 |     "if IN_COLAB:\n",
 43 |     "  # Clone the entire repo to access the files.\n",
 44 |     "  !git clone -l -s https://github.com/epfml/OptML_course.git cloned-repo\n",
 45 |     "  %cd cloned-repo/labs/ex02/template/"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "# Load the data"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "import datetime\n",
 62 |     "from helpers import *\n",
 63 |     "\n",
 64 |     "height, weight, gender = load_data(sub_sample=False, add_outlier=False)\n",
 65 |     "x, mean_x, std_x = standardize(height)\n",
 66 |     "b, A = build_model_data(x, weight)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "print('Number of samples n = ', b.shape[0])\n",
 76 |     "print('Dimension of each sample d = ', A.shape[1])"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "# Least Squares Estimation\n",
 84 |     "Least squares estimation is one of the fundamental machine learning algorithms. Given an $ n \\times d $ matrix $A$ and a $ n \\times 1$ vector $b$, the goal is to find a vector $x \\in \\mathbb{R}^d$ which minimizes the objective function $$f(x) = \\frac{1}{2n} \\sum_{i=1}^{n} (a_i^\\top x - b_i)^2 = \\frac{1}{2n} \\|Ax - b\\|^2 $$\n",
 85 |     "\n",
 86 |     "In this exercise, we will try to fit $x$ using Least Squares Estimation. \n",
 87 |     "\n",
 88 |     "One can see the function is $L$ smooth with $L =\\frac1n\\|A^T A\\|  = \\frac1n\\|A\\|^2$ (Lemma 2.3 for the first equality, and a few manipulations for the second)."
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "# Computing the Objective Function\n",
 96 |     "Fill in the `calculate_objective` function below:"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "def calculate_objective(Axmb):\n",
106 |     "    \"\"\"Calculate the mean squared error for vector Axmb = Ax - b.\"\"\"\n",
107 |     "    # ***************************************************\n",
108 |     "    # INSERT YOUR CODE HERE\n",
109 |     "    # TODO: compute mean squared error\n",
110 |     "    # ***************************************************\n",
111 |     "    raise NotImplementedError"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "# Compute smoothness constant $L$"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "To compute the spectral norm of A you can use np.linalg.norm(A, 2)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "def calculate_L(b, A):\n",
135 |     "    \"\"\"Calculate the smoothness constant for f\"\"\"\n",
136 |     "    # ***************************************************\n",
137 |     "    # INSERT YOUR CODE HERE\n",
138 |     "    # TODO: compute ||A.T*A||\n",
139 |     "    # ***************************************************\n",
140 |     "    raise NotImplementedError\n",
141 |     "    # ***************************************************\n",
142 |     "    # INSERT YOUR CODE HERE\n",
143 |     "    # TODO: compute L = smoothness constant of f\n",
144 |     "    # ***************************************************\n",
145 |     "    raise NotImplementedError\n",
146 |     "    return L"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "# Gradient Descent"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "Please fill in the functions `compute_gradient` below:"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "def compute_gradient(b, A, x):\n",
170 |     "    \"\"\"Compute the gradient.\"\"\"\n",
171 |     "    # ***************************************************\n",
172 |     "    # INSERT YOUR CODE HERE\n",
173 |     "    # TODO: compute gradient and objective\n",
174 |     "    # ***************************************************\n",
175 |     "    raise NotImplementedError\n",
176 |     "    return grad, Axmb"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "Please fill in the functions `gradient_descent` below:"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "def gradient_descent(b, A, initial_x, max_iters, gamma):\n",
193 |     "    \"\"\"Gradient descent algorithm.\"\"\"\n",
194 |     "    # Define parameters to store x and objective func. values\n",
195 |     "    xs = [initial_x]\n",
196 |     "    objectives = []\n",
197 |     "    x = initial_x\n",
198 |     "    for n_iter in range(max_iters):\n",
199 |     "        # ***************************************************\n",
200 |     "        # INSERT YOUR CODE HERE\n",
201 |     "        # TODO: compute gradient and objective function\n",
202 |     "        # ***************************************************\n",
203 |     "        raise NotImplementedError\n",
204 |     "        # ***************************************************\n",
205 |     "        # INSERT YOUR CODE HERE\n",
206 |     "        # TODO: update x by a gradient descent step\n",
207 |     "        # ***************************************************\n",
208 |     "        raise NotImplementedError\n",
209 |     "        # store x and objective function value\n",
210 |     "        xs.append(x)\n",
211 |     "        objectives.append(obj)\n",
212 |     "        print(\"Gradient Descent({bi}/{ti}): objective={l}\".format(\n",
213 |     "              bi=n_iter, ti=max_iters - 1, l=obj))\n",
214 |     "\n",
215 |     "    return objectives, xs"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "Test your gradient descent function with a naive step size through gradient descent demo shown below:"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "# from gradient_descent import *\n",
232 |     "from plots import gradient_descent_visualization\n",
233 |     "\n",
234 |     "# Define the parameters of the algorithm.\n",
235 |     "max_iters = 50\n",
236 |     "\n",
237 |     "gamma = 0.1\n",
238 |     "\n",
239 |     "# Initialization\n",
240 |     "x_initial = np.zeros(A.shape[1])\n",
241 |     "\n",
242 |     "# Start gradient descent.\n",
243 |     "start_time = datetime.datetime.now()\n",
244 |     "gradient_objectives_naive, gradient_xs_naive = gradient_descent(b, A, x_initial, max_iters, gamma)\n",
245 |     "end_time = datetime.datetime.now()\n",
246 |     "\n",
247 |     "# Print result\n",
248 |     "exection_time = (end_time - start_time).total_seconds()\n",
249 |     "print(\"Gradient Descent: execution time={t:.3f} seconds\".format(t=exection_time))"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "Time Visualization"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": null,
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": [
265 |     "from ipywidgets import IntSlider, interact\n",
266 |     "from grid_search import *\n",
267 |     "\n",
268 |     "def plot_figure(n_iter):\n",
269 |     "    # Generate grid data for visualization (parameters to be swept and best combination)\n",
270 |     "    grid_x0, grid_x1 = generate_w(num_intervals=10)\n",
271 |     "    grid_objectives = grid_search(b, A, grid_x0, grid_x1)\n",
272 |     "    obj_star, x0_star, x1_star = get_best_parameters(grid_x0, grid_x1, grid_objectives)\n",
273 |     "    \n",
274 |     "    fig = gradient_descent_visualization(\n",
275 |     "        gradient_objectives_naive, gradient_xs_naive, grid_objectives, grid_x0, grid_x1, mean_x, std_x, height, weight, n_iter)\n",
276 |     "    fig.set_size_inches(10.0, 6.0)\n",
277 |     "\n",
278 |     "interact(plot_figure, n_iter=IntSlider(min=1, max=len(gradient_xs_naive)))"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "markdown",
283 |    "metadata": {},
284 |    "source": [
285 |     "Try doing gradient descent with a better learning rate"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {},
292 |    "outputs": [],
293 |    "source": [
294 |     "# Define the parameters of the algorithm.\n",
295 |     "max_iters = 50\n",
296 |     "\n",
297 |     "# ***************************************************\n",
298 |     "# INSERT YOUR CODE HERE\n",
299 |     "# TODO: a better learning rate using the smoothness of f\n",
300 |     "# ***************************************************\n",
301 |     "gamma = \n",
302 |     "raise NotImplementedError\n",
303 |     "\n",
304 |     "# Initialization\n",
305 |     "x_initial = np.zeros(A.shape[1])\n",
306 |     "\n",
307 |     "# Start gradient descent.\n",
308 |     "start_time = datetime.datetime.now()\n",
309 |     "gradient_objectives, gradient_xs = gradient_descent(b, A, x_initial, max_iters, gamma)\n",
310 |     "end_time = datetime.datetime.now()\n",
311 |     "\n",
312 |     "# Print result\n",
313 |     "exection_time = (end_time - start_time).total_seconds()\n",
314 |     "print(\"Gradient Descent: execution time={t:.3f} seconds\".format(t=exection_time))"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "markdown",
319 |    "metadata": {},
320 |    "source": [
321 |     "Time visualization with a better learning rate"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "code",
326 |    "execution_count": null,
327 |    "metadata": {},
328 |    "outputs": [],
329 |    "source": [
330 |     "def plot_figure(n_iter):\n",
331 |     "    # Generate grid data for visualization (parameters to be swept and best combination)\n",
332 |     "    grid_x0, grid_x1 = generate_w(num_intervals=10)\n",
333 |     "    grid_objectives = grid_search(b, A, grid_x0, grid_x1)\n",
334 |     "    obj_star, x0_star, x1_star = get_best_parameters(grid_x0, grid_x1, grid_objectives)\n",
335 |     "    \n",
336 |     "    fig = gradient_descent_visualization(\n",
337 |     "        gradient_objectives, gradient_xs, grid_objectives, grid_x0, grid_x1, mean_x, std_x, height, weight, n_iter)\n",
338 |     "    fig.set_size_inches(10.0, 6.0)\n",
339 |     "\n",
340 |     "interact(plot_figure, n_iter=IntSlider(min=1, max=len(gradient_xs)))"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "markdown",
345 |    "metadata": {},
346 |    "source": [
347 |     "# Loading more complex data\n",
348 |     "The data is taken from https://archive.ics.uci.edu/ml/datasets/Concrete+Compressive+Strength "
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "code",
353 |    "execution_count": null,
354 |    "metadata": {},
355 |    "outputs": [],
356 |    "source": [
357 |     "data = np.loadtxt(\"Concrete_Data.csv\",delimiter=\",\")\n",
358 |     "\"\"\"Note that running this part will change the A above, and it will cause error in the interaction plot!\"\"\"\n",
359 |     "A = data[:,:-1]\n",
360 |     "b = data[:,-1]\n",
361 |     "A, mean_A, std_A = standardize(A)"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": null,
367 |    "metadata": {},
368 |    "outputs": [],
369 |    "source": [
370 |     "print('Number of samples n = ', b.shape[0])\n",
371 |     "print('Dimension of each sample d = ', A.shape[1])"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "# Running gradient descent"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "markdown",
383 |    "metadata": {},
384 |    "source": [
385 |     "## Assuming bounded gradients\n",
386 |     "Assume we are moving in a bounded region $\\|x\\| \\leq 25$ containing all iterates (and we assume $\\|x-x^\\star\\| \\leq 25$ as well, for simplicity). Then by $\\nabla f(x) = \\frac{1}{n}A^\\top (Ax - b)$, one can see that $f$ is Lipschitz over that bounded region, with Lipschitz constant $\\|\\nabla f(x)\\| \\leq \\frac{1}{n} (\\|A^\\top A\\|\\|x\\| + \\|A^\\top b\\|)$"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "code",
391 |    "execution_count": null,
392 |    "metadata": {},
393 |    "outputs": [],
394 |    "source": [
395 |     "# ***************************************************\n",
396 |     "# INSERT YOUR CODE HERE\n",
397 |     "# TODO: Compute the bound on the gradient norm\n",
398 |     "# ***************************************************\n",
399 |     "grad_norm_bound = \n",
400 |     "raise NotImplementedError"
401 |    ]
402 |   },
403 |   {
404 |    "cell_type": "markdown",
405 |    "metadata": {},
406 |    "source": [
407 |     "Fill in the learning rate assuming bounded gradients"
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "code",
412 |    "execution_count": null,
413 |    "metadata": {},
414 |    "outputs": [],
415 |    "source": [
416 |     "max_iters = 50\n",
417 |     "\n",
418 |     "# ***************************************************\n",
419 |     "# INSERT YOUR CODE HERE\n",
420 |     "# TODO: Compute learning rate based on bounded gradient\n",
421 |     "# ***************************************************\n",
422 |     "gamma = \n",
423 |     "raise NotImplementedError\n",
424 |     "\n",
425 |     "# Initialization\n",
426 |     "x_initial = np.zeros(A.shape[1])\n",
427 |     "\n",
428 |     "# Start gradient descent.\n",
429 |     "start_time = datetime.datetime.now()\n",
430 |     "bd_gradient_objectives, bd_gradient_xs = gradient_descent(b, A, x_initial, max_iters, gamma)\n",
431 |     "end_time = datetime.datetime.now()\n",
432 |     "\n",
433 |     "\n",
434 |     "# Print result\n",
435 |     "exection_time = (end_time - start_time).total_seconds()\n",
436 |     "print(\"Gradient Descent: execution time={t:.3f} seconds\".format(t=exection_time))\n",
437 |     "\n",
438 |     "# Averaging the iterates as is the case for bounded gradients case\n",
439 |     "bd_gradient_objectives_averaged = []\n",
440 |     "for i in range(len(bd_gradient_xs)):\n",
441 |     "    if i > 0:\n",
442 |     "        bd_gradient_xs[i] = (i * bd_gradient_xs[i-1] + bd_gradient_xs[i])/(i + 1)\n",
443 |     "    grad, err = compute_gradient(b, A, bd_gradient_xs[i])\n",
444 |     "    obj = calculate_objective(err)\n",
445 |     "    bd_gradient_objectives_averaged.append(obj)"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "markdown",
450 |    "metadata": {},
451 |    "source": [
452 |     "## Gradient descent using smoothness\n",
453 |     "Fill in the learning rate using smoothness of the function"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": null,
459 |    "metadata": {},
460 |    "outputs": [],
461 |    "source": [
462 |     "max_iters = 50\n",
463 |     "\n",
464 |     "\n",
465 |     "# ***************************************************\n",
466 |     "# INSERT YOUR CODE HERE\n",
467 |     "# TODO: a better learning rate using the smoothness of f\n",
468 |     "# ***************************************************\n",
469 |     "gamma = \n",
470 |     "raise NotImplementedError\n",
471 |     "\n",
472 |     "# Initialization\n",
473 |     "x_initial = np.zeros(A.shape[1])\n",
474 |     "\n",
475 |     "# Start gradient descent.\n",
476 |     "start_time = datetime.datetime.now()\n",
477 |     "gradient_objectives, gradient_xs = gradient_descent(b, A, x_initial, max_iters, gamma)\n",
478 |     "end_time = datetime.datetime.now()\n",
479 |     "\n",
480 |     "# Print result\n",
481 |     "exection_time = (end_time - start_time).total_seconds()\n",
482 |     "print(\"Gradient Descent: execution time={t:.3f} seconds\".format(t=exection_time))"
483 |    ]
484 |   },
485 |   {
486 |    "cell_type": "markdown",
487 |    "metadata": {},
488 |    "source": [
489 |     "## Plotting the Evolution of the Objective Function"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": null,
495 |    "metadata": {},
496 |    "outputs": [],
497 |    "source": [
498 |     "plt.figure(figsize=(8, 8))\n",
499 |     "plt.xlabel('Number of steps')\n",
500 |     "plt.ylabel('Objective Function')\n",
501 |     "#plt.yscale(\"log\")\n",
502 |     "plt.plot(range(len(gradient_objectives)), gradient_objectives,'r', label='gradient descent with 1/L stepsize')\n",
503 |     "plt.plot(range(len(bd_gradient_objectives)), bd_gradient_objectives,'b', label='gradient descent assuming bounded gradients')\n",
504 |     "plt.plot(range(len(bd_gradient_objectives_averaged)), bd_gradient_objectives_averaged,'g', label='gradient descent assuming bounded gradients with averaged iterates')\n",
505 |     "plt.legend(loc='upper right')\n",
506 |     "plt.show()"
507 |    ]
508 |   }
509 |  ],
510 |  "metadata": {
511 |   "anaconda-cloud": {},
512 |   "kernelspec": {
513 |    "display_name": "Python 3",
514 |    "language": "python",
515 |    "name": "python3"
516 |   },
517 |   "language_info": {
518 |    "codemirror_mode": {
519 |     "name": "ipython",
520 |     "version": 3
521 |    },
522 |    "file_extension": ".py",
523 |    "mimetype": "text/x-python",
524 |    "name": "python",
525 |    "nbconvert_exporter": "python",
526 |    "pygments_lexer": "ipython3",
527 |    "version": "3.7.4"
528 |   },
529 |   "widgets": {
530 |    "state": {
531 |     "d2b2c3aea192430e81437f33ba0b0e69": {
532 |      "views": [
533 |       {
534 |        "cell_index": 22
535 |       }
536 |      ]
537 |     },
538 |     "e4a6a7a70ccd42ddb112989c04f2ed3f": {
539 |      "views": [
540 |       {
541 |        "cell_index": 18
542 |       }
543 |      ]
544 |     }
545 |    },
546 |    "version": "1.2.0"
547 |   }
548 |  },
549 |  "nbformat": 4,
550 |  "nbformat_minor": 1
551 | }
552 | 


--------------------------------------------------------------------------------
/labs/ex02/template/grid_search.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Exercise 2.
 3 | 
 4 | Grid Search
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | def generate_w(num_intervals):
11 |     """Generate a grid of values for w0 and w1."""
12 |     w0 = np.linspace(-100, 200, num_intervals)
13 |     w1 = np.linspace(-150, 150, num_intervals)
14 |     return w0, w1
15 | 
16 | 
17 | def grid_search(y, tx, w0, w1):
18 |     """Algorithm for grid search."""
19 |     losses = np.zeros((len(w0), len(w1)))
20 |     # compute loss for each combination of w0 and w1.
21 |     for ind_row, row in enumerate(w0):
22 |         for ind_col, col in enumerate(w1):
23 |             w = np.array([row, col])
24 |             e = y - tx.dot(w)
25 |             losses[ind_row, ind_col] = 1/2*np.mean(e**2)
26 |     return losses
27 | 
28 | 
29 | def get_best_parameters(w0, w1, losses):
30 |     """Get the best w from the result of grid search."""
31 |     min_row, min_col = np.unravel_index(np.argmin(losses), losses.shape)
32 |     return losses[min_row, min_col], w0[min_row], w1[min_col]
33 | 


--------------------------------------------------------------------------------
/labs/ex02/template/helpers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """some helper functions."""
 3 | import numpy as np
 4 | 
 5 | 
 6 | def load_data(sub_sample=True, add_outlier=False):
 7 |     """Load data and convert it to the metric system."""
 8 |     path_dataset = "height_weight_genders.csv"
 9 |     data = np.genfromtxt(
10 |         path_dataset, delimiter=",", skip_header=1, usecols=[1, 2])
11 |     height = data[:, 0]
12 |     weight = data[:, 1]
13 |     gender = np.genfromtxt(
14 |         path_dataset, delimiter=",", skip_header=1, usecols=[0],
15 |         converters={0: lambda x: 0 if b"Male" in x else 1})
16 |     # Convert to metric system
17 |     height *= 0.025
18 |     weight *= 0.454
19 | 
20 |     # sub-sample
21 |     if sub_sample:
22 |         height = height[::50]
23 |         weight = weight[::50]
24 | 
25 |     if add_outlier:
26 |         # outlier experiment
27 |         height = np.concatenate([height, [1.1, 1.2]])
28 |         weight = np.concatenate([weight, [51.5/0.454, 55.3/0.454]])
29 | 
30 |     return height, weight, gender
31 | 
32 | 
33 | def standardize(x):
34 |     """Standardize the original data set."""
35 |     mean_x = np.mean(x,axis = 0)
36 |     x = x - mean_x
37 |     std_x = np.std(x, axis = 0)
38 |     x = x / std_x
39 |     return x, mean_x, std_x
40 | 
41 | 
42 | def build_model_data(height, weight):
43 |     """Form (y,tX) to get regression data in matrix form."""
44 |     y = weight
45 |     x = height
46 |     num_samples = len(y)
47 |     tx = np.c_[np.ones(num_samples), x]
48 |     return y, tx
49 | 
50 | 


--------------------------------------------------------------------------------
/labs/ex02/template/plots.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """function for plot."""
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from grid_search import get_best_parameters
 6 | 
 7 | 
 8 | def prediction(w0, w1, mean_x, std_x):
 9 |     """Get the regression line from the model."""
10 |     x = np.arange(1.2, 2, 0.01)
11 |     x_normalized = (x - mean_x) / std_x
12 |     return x, w0 + w1 * x_normalized
13 | 
14 | 
15 | def base_visualization(grid_losses, w0_list, w1_list,
16 |                        mean_x, std_x, height, weight):
17 |     """Base Visualization for both models."""
18 |     w0, w1 = np.meshgrid(w0_list, w1_list)
19 | 
20 |     fig = plt.figure()
21 | 
22 |     # plot contourf
23 |     ax1 = fig.add_subplot(1, 2, 1)
24 |     cp = ax1.contourf(w0, w1, grid_losses.T, cmap=plt.cm.jet)
25 |     fig.colorbar(cp, ax=ax1)
26 |     ax1.set_xlabel(r'$w_0$')
27 |     ax1.set_ylabel(r'$w_1$')
28 |     # put a marker at the minimum
29 |     loss_star, w0_star, w1_star = get_best_parameters(
30 |         w0_list, w1_list, grid_losses)
31 |     ax1.plot(w0_star, w1_star, marker='*', color='r', markersize=20)
32 | 
33 |     
34 | 
35 |     return fig
36 | 
37 | 
38 | def grid_visualization(grid_losses, w0_list, w1_list,
39 |                        mean_x, std_x, height, weight):
40 |     """Visualize how the trained model looks like under the grid search."""
41 |     fig = base_visualization(
42 |         grid_losses, w0_list, w1_list, mean_x, std_x, height, weight)
43 | 
44 |     loss_star, w0_star, w1_star = get_best_parameters(
45 |         w0_list, w1_list, grid_losses)
46 |     # plot prediciton
47 |     x, f = prediction(w0_star, w1_star, mean_x, std_x)
48 |     ax2 = fig.get_axes()[2]
49 |     ax2.plot(x, f, 'r')
50 | 
51 |     return fig
52 | 
53 | 
54 | def gradient_descent_visualization(
55 |         gradient_losses, gradient_ws,
56 |         grid_losses, grid_w0, grid_w1,
57 |         mean_x, std_x, height, weight, n_iter=None):
58 |     """Visualize how the loss value changes until n_iter."""
59 |     fig = base_visualization(
60 |         grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight)
61 | 
62 |     ws_to_be_plotted = np.stack(gradient_ws)
63 |     if n_iter is not None:
64 |         ws_to_be_plotted = ws_to_be_plotted[:n_iter]
65 | 
66 |     ax1 = fig.get_axes()[0]
67 |     ax1.plot(
68 |         ws_to_be_plotted[:, 0], ws_to_be_plotted[:, 1],
69 |         marker='o', color='w', markersize=10)
70 |     pred_x, pred_y = prediction(
71 |         ws_to_be_plotted[-1, 0], ws_to_be_plotted[-1, 1],
72 |         mean_x, std_x)
73 |     
74 |     return fig
75 | 


--------------------------------------------------------------------------------
/labs/ex03/exercise03.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex03/exercise03.pdf


--------------------------------------------------------------------------------
/labs/ex03/solution03.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex03/solution03.pdf


--------------------------------------------------------------------------------
/labs/ex03/template/notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# Useful starting lines\n",
 10 |     "%matplotlib inline\n",
 11 |     "import numpy as np\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "%load_ext autoreload\n",
 14 |     "%autoreload 2"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "# Solving Fixed Point Problems\n",
 22 |     "\n",
 23 |     "In numerous applications, we encounter the task of solving equations of the form $$x = g(x)$$\n",
 24 |     "for a continuous function $g$. In this exercise we will see one simple method to solve such problems: $$x_{t+1} = g(x_t)\\,.$$\n",
 25 |     "We will solve two equations of this form: $$x = log(1+x)$$ and $$x = log(2+x)\\,.$$"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Plot $g$"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "Let us see how the two functions look over an interval $[0,2]$."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "x = np.arange(-0.1, 2, 0.0001)\n",
 49 |     "\"\"\"Compute the value of g(x).\"\"\"\n",
 50 |     "# ***************************************************\n",
 51 |     "# INSERT YOUR CODE HERE\n",
 52 |     "# TODO: compute the values of log(1+x) and log(2+x)\n",
 53 |     "# ***************************************************\n",
 54 |     "\n",
 55 |     "\"\"\"Plot the computed values\"\"\"\n",
 56 |     "fig = plt.figure()\n",
 57 |     "plt.plot(x, x, label='x')\n",
 58 |     "plt.plot(x, y1, label='$\\log(1 + x)$')\n",
 59 |     "plt.plot(x, y2, label='$\\log(2 + x)$')\n",
 60 |     "plt.grid(linestyle=':')\n",
 61 |     "plt.axhline(0, color='black')\n",
 62 |     "plt.axvline(0, color='black')\n",
 63 |     "plt.legend()"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "## Compute Lipschitz Constant of $g$\n",
 71 |     "\n",
 72 |     "Derive and plot the derivate of $\\log(1+x)$ and $\\log(2+x)$ over the interval $[0,2]$. What are the Lipschitz constants?"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "x = np.arange(-0.1, 2, 0.0001)\n",
 82 |     "\n",
 83 |     "\"\"\"Compute the derivative of g(x).\"\"\"\n",
 84 |     "# ***************************************************\n",
 85 |     "# INSERT YOUR CODE HERE\n",
 86 |     "# TODO: compute the derivatives of log(1+x) and log(2+x)\n",
 87 |     "# ***************************************************\n",
 88 |     "\n",
 89 |     "\"\"\"Plot the computed values\"\"\"\n",
 90 |     "fig = plt.figure()\n",
 91 |     "plt.plot(x, y1, label='$\\log(1 + x)$')\n",
 92 |     "plt.plot(x, y2, label='$\\log(2 + x)$')\n",
 93 |     "plt.grid(linestyle=':')\n",
 94 |     "plt.axhline(0, color='black')\n",
 95 |     "plt.axvline(0, color='black')\n",
 96 |     "plt.legend()"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "## Computing Fixed Point"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "Please fill in the functions `compute_gradient` below:"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "def fixed_point(initial_x, max_iters, objective):\n",
120 |     "    \"\"\"Compute the fixed point.\"\"\"\n",
121 |     "    # Define parameters to store x and objective func. values\n",
122 |     "    xs = []\n",
123 |     "    errors = []\n",
124 |     "    x = initial_x\n",
125 |     "    for n_iter in range(max_iters):\n",
126 |     "        # compute objective and error\n",
127 |     "        obj = objective(x)\n",
128 |     "        error = np.abs(x - obj)\n",
129 |     "        # store x and error\n",
130 |     "        xs.append(x)\n",
131 |     "        errors.append(error)\n",
132 |     "        \n",
133 |     "        # update x\n",
134 |     "        \n",
135 |     "        # ***************************************************\n",
136 |     "        # INSERT YOUR CODE HERE\n",
137 |     "        # TODO: compute the update\n",
138 |     "        # ***************************************************\n",
139 |     "        \n",
140 |     "        # print the current error\n",
141 |     "        if n_iter % 10 == 0: \n",
142 |     "            print(\"Fixed point: iteration ={i}, x = {x:.2e}, error={err:.2e}\".format(i=n_iter, x=x, err=error))\n",
143 |     "    return errors, xs"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "Test your implementation on the first function. Does it converge to 0?"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "# Define the parameters of the algorithm.\n",
160 |     "max_iters = 100\n",
161 |     "\n",
162 |     "# Write the objective function\n",
163 |     "func = lambda x: np.log(1 + x)\n",
164 |     "\n",
165 |     "# Initialization\n",
166 |     "initial_x = 1\n",
167 |     "\n",
168 |     "# Run fixed point.\n",
169 |     "errors_func1, xs_func1 = fixed_point(initial_x, max_iters, func)\n"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "Run your implementation on the second function"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "# Define the parameters of the algorithm.\n",
186 |     "max_iters = 100\n",
187 |     "\n",
188 |     "# Write the second objective function\n",
189 |     "func = lambda x: np.log(2 + x)\n",
190 |     "\n",
191 |     "# Initialization\n",
192 |     "initial_x = 1\n",
193 |     "\n",
194 |     "# Run fixed point.\n",
195 |     "errors_func2, xs_func2 = fixed_point(initial_x, max_iters, func)\n"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {},
201 |    "source": [
202 |     "**Plotting error values**"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": null,
208 |    "metadata": {
209 |     "scrolled": false
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "plt.semilogy()\n",
214 |     "plt.xlabel('Number of steps')\n",
215 |     "plt.ylabel('Value of Error')\n",
216 |     "#plt.yscale(\"log\")\n",
217 |     "plt.plot(range(len(errors_func1)), errors_func1, label='$log(1 + x)$')\n",
218 |     "plt.plot(range(len(errors_func2)), errors_func2, label='$log(2 + x)$')\n",
219 |     "\n",
220 |     "plt.legend()\n",
221 |     "plt.show()"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "markdown",
226 |    "metadata": {},
227 |    "source": [
228 |     "What do you observe about the rates of convergence of the two methods? Can you explain this difference?\n",
229 |     "\n",
230 |     "Repeat the experiment with different starting points. Try also with $x=sin(x)$, $x = sin(x+1)$, and $x = sin(x+2)$. How about other functions?"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": []
239 |   }
240 |  ],
241 |  "metadata": {
242 |   "anaconda-cloud": {},
243 |   "kernelspec": {
244 |    "display_name": "Python 3",
245 |    "language": "python",
246 |    "name": "python3"
247 |   },
248 |   "language_info": {
249 |    "codemirror_mode": {
250 |     "name": "ipython",
251 |     "version": 3
252 |    },
253 |    "file_extension": ".py",
254 |    "mimetype": "text/x-python",
255 |    "name": "python",
256 |    "nbconvert_exporter": "python",
257 |    "pygments_lexer": "ipython3",
258 |    "version": "3.7.4"
259 |   },
260 |   "toc": {
261 |    "base_numbering": 1,
262 |    "nav_menu": {},
263 |    "number_sections": true,
264 |    "sideBar": true,
265 |    "skip_h1_title": false,
266 |    "title_cell": "Table of Contents",
267 |    "title_sidebar": "Contents",
268 |    "toc_cell": false,
269 |    "toc_position": {},
270 |    "toc_section_display": true,
271 |    "toc_window_display": false
272 |   },
273 |   "widgets": {
274 |    "state": {
275 |     "d2b2c3aea192430e81437f33ba0b0e69": {
276 |      "views": [
277 |       {
278 |        "cell_index": 22
279 |       }
280 |      ]
281 |     },
282 |     "e4a6a7a70ccd42ddb112989c04f2ed3f": {
283 |      "views": [
284 |       {
285 |        "cell_index": 18
286 |       }
287 |      ]
288 |     }
289 |    },
290 |    "version": "1.2.0"
291 |   }
292 |  },
293 |  "nbformat": 4,
294 |  "nbformat_minor": 1
295 | }
296 | 


--------------------------------------------------------------------------------
/labs/ex04/exercise04.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex04/exercise04.pdf


--------------------------------------------------------------------------------
/labs/ex04/solution/helpers.py:
--------------------------------------------------------------------------------
 1 | import networkx
 2 | import numpy as np
 3 | 
 4 | n_nodes = 16
 5 | def generate_torus_adj_matrix(n_nodes):
 6 |     G = networkx.generators.lattice.grid_2d_graph(int(np.sqrt(n_nodes)), int(np.sqrt(n_nodes)), periodic=True)
 7 |     # Adjacency matrix
 8 |     A = networkx.adjacency_matrix(G).toarray()
 9 | 
10 |     # Add self-loops
11 |     for i in range(0, A.shape[0]):
12 |         A[i][i] = 1
13 |     return A


--------------------------------------------------------------------------------
/labs/ex04/solution/torus_topology.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex04/solution/torus_topology.png


--------------------------------------------------------------------------------
/labs/ex04/solution04.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex04/solution04.pdf


--------------------------------------------------------------------------------
/labs/ex04/template/Lab 4 - Random Walks--alternative.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# Useful starting lines\n",
 12 |     "%matplotlib inline\n",
 13 |     "import numpy as np\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "from helpers import *\n",
 16 |     "%load_ext autoreload\n",
 17 |     "%autoreload 2"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "# Random Walks"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "In this exercise you will implement a simple random walk on a torus graph and will check its convergence to uniform distribution.\n",
 32 |     "\n",
 33 |     "Torus is a 2D-grid graph and looks like a 'doughnout', as shown in the picture below. \n",
 34 |     "<img src=\"torus_topology.png\" alt=\"Drawing\" style=\"width: 200px;\"/>"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "**Note:** We will use the networkx library to generate our graph. You can install this using\n",
 42 |     "\n",
 43 |     "```bash\n",
 44 |     "    pip3 install --upgrade --user networkx\n",
 45 |     "```\n",
 46 |     "\n",
 47 |     "Let's generate the probability matrix $\\mathbf{G}$ of a torus graph of size $4\\times 4$, note that we include self-loops too. You can play around with the code in the helpers.py to generate different graphs."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {
 54 |     "collapsed": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "n_nodes = 25\n",
 59 |     "A = generate_torus_adj_matrix(n_nodes)\n",
 60 |     "G = A/5"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "Implement random walk on a torus."
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "collapsed": true
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "def random_walk(G, num_iter):\n",
 79 |     "    ''' Performs num_iter steps of a random walk on a graph with graph transition matrix G,\n",
 80 |     "        starting from the initial position at node 1.\n",
 81 |     "        Output:\n",
 82 |     "        positions: an array of integer positions in a graph at each iteration\n",
 83 |     "    '''\n",
 84 |     "    pos = 0\n",
 85 |     "    positions = [pos]\n",
 86 |     "    for t in range(0, num_iter):\n",
 87 |     "        pos = # TODO: fill in here code to jump to random neighbor\n",
 88 |     "        positions.append(pos)\n",
 89 |     "    return positions"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "Now we will estimate probability distribution at each iteration. For this we will perform random walk on a graph 100 times and calculate how frequently we visited each node at each iteration."
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {
103 |     "collapsed": true
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "num_iter = 15\n",
108 |     "num_repeat = 100 * 1000\n",
109 |     "frequencies = np.zeros((num_iter + 1, n_nodes))\n",
110 |     "for _ in range(num_repeat):\n",
111 |     "    positions = random_walk(G, num_iter)\n",
112 |     "    frequencies[np.arange(len(positions)), np.array(positions)] += 1\n",
113 |     "frequencies /= num_repeat"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "Lets plot our distribution at the last step."
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {
127 |     "collapsed": true
128 |    },
129 |    "outputs": [],
130 |    "source": [
131 |     "plt.bar(np.arange(n_nodes), frequencies[-1])\n",
132 |     "plt.xlabel(\"node\")\n",
133 |     "plt.ylabel(\"probability\")"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "You can see that the final disctribution is indeed uniform. Lets now plot how fast did the algorithm converge. For this you need to compute errors $||x_{t} - \\mu||_2^2$ at each iteration, where $x_{t}$ is a distribution at step $t$ and $\\mu$ is a uniforly distributed vector."
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "collapsed": true
148 |    },
149 |    "outputs": [],
150 |    "source": [
151 |     "errors = # TODO: fill in here"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "Lets now plot how fast did the algorithm converge. We will use logarithmic scale on y-axis to be able to distinguish between sublinear and linear rates."
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {
165 |     "collapsed": true
166 |    },
167 |    "outputs": [],
168 |    "source": [
169 |     "plt.semilogy(errors)\n",
170 |     "plt.xlabel(\"iteration\")\n",
171 |     "plt.ylabel(\"$||x_{t} - mu||_2^2$\")\n",
172 |     "plt.show()"
173 |    ]
174 |   }
175 |  ],
176 |  "metadata": {
177 |   "anaconda-cloud": {},
178 |   "kernelspec": {
179 |    "display_name": "Python 3",
180 |    "language": "python",
181 |    "name": "python3"
182 |   },
183 |   "language_info": {
184 |    "codemirror_mode": {
185 |     "name": "ipython",
186 |     "version": 3
187 |    },
188 |    "file_extension": ".py",
189 |    "mimetype": "text/x-python",
190 |    "name": "python",
191 |    "nbconvert_exporter": "python",
192 |    "pygments_lexer": "ipython3",
193 |    "version": "3.5.2"
194 |   },
195 |   "widgets": {
196 |    "state": {
197 |     "d2b2c3aea192430e81437f33ba0b0e69": {
198 |      "views": [
199 |       {
200 |        "cell_index": 22
201 |       }
202 |      ]
203 |     },
204 |     "e4a6a7a70ccd42ddb112989c04f2ed3f": {
205 |      "views": [
206 |       {
207 |        "cell_index": 18
208 |       }
209 |      ]
210 |     }
211 |    },
212 |    "version": "1.2.0"
213 |   }
214 |  },
215 |  "nbformat": 4,
216 |  "nbformat_minor": 1
217 | }
218 | 


--------------------------------------------------------------------------------
/labs/ex04/template/helpers.py:
--------------------------------------------------------------------------------
 1 | import networkx
 2 | import numpy as np
 3 | 
 4 | n_nodes = 16
 5 | def generate_torus_adj_matrix(n_nodes):
 6 |     G = networkx.generators.lattice.grid_2d_graph(int(np.sqrt(n_nodes)), int(np.sqrt(n_nodes)), periodic=True)
 7 |     # Adjacency matrix
 8 |     A = networkx.adjacency_matrix(G).toarray()
 9 | 
10 |     # Add self-loops
11 |     for i in range(0, A.shape[0]):
12 |         A[i][i] = 1
13 |     return A


--------------------------------------------------------------------------------
/labs/ex04/template/notebook_lab04.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# Useful starting lines\n",
 10 |     "%matplotlib inline\n",
 11 |     "import numpy as np\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "try:\n",
 14 |     "    import google.colab\n",
 15 |     "    IN_COLAB = True\n",
 16 |     "except:\n",
 17 |     "    IN_COLAB = False\n",
 18 |     "if IN_COLAB:\n",
 19 |     "    # Clone the entire repo to access the files.\n",
 20 |     "    !git clone -l -s https://github.com/epfml/OptML_course.git cloned-repo\n",
 21 |     "    %cd cloned-repo/labs/ex04/template/\n",
 22 |     "\n",
 23 |     "from helpers import *\n",
 24 |     "%load_ext autoreload\n",
 25 |     "%autoreload 2"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "# Random Walks"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "In this exercise you will implement a simple random walk on a torus graph and will check its convergence to uniform distribution.\n",
 40 |     "\n",
 41 |     "Torus is a 2D-grid graph and looks like a 'doughnout', as shown in the picture below. \n",
 42 |     "<img src=\"https://github.com/epfml/OptML_course/blob/2ff8711feb70637d0d0f9ac75ec6164c7659c1f5/labs/ex04/template/torus_topology.png?raw=true\" alt=\"Drawing\" style=\"width: 200px;\"/>"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "**Note:** We will use the networkx library to generate our graph. You can install this using\n",
 50 |     "\n",
 51 |     "```bash\n",
 52 |     "    pip3 install --upgrade --user networkx\n",
 53 |     "```\n",
 54 |     "\n",
 55 |     "Let's generate the probability matrix $\\mathbf{G}$ of a torus graph of size $4\\times 4$, note that we include self-loops too. You can play around with the code in the helpers.py to generate different graphs."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "n_nodes = 16\n",
 65 |     "A = generate_torus_adj_matrix(n_nodes)\n",
 66 |     "degree = # fill in here the degree of a node in the graph\n",
 67 |     "G = A/degree"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "Lets generate initial probabitily distribution. Recall that our walk always starts from the node 1."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "x_init = # fill in here"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "As you will prove in Q2, probability distribution at each step evolves as $x_{t + 1} = G x_{t}$. "
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "def random_walk(G, x_init, num_iter):\n",
100 |     "    ''' Computes probability distribution of random walk after\n",
101 |     "        num_iter steps.\n",
102 |     "        Output: \n",
103 |     "        x: final estimate of probability distribution after\n",
104 |     "            num_iter steps\n",
105 |     "        errors: array of differences ||x_{t} - mu||_2^2, where\n",
106 |     "            mu is uniform distribution\n",
107 |     "    '''\n",
108 |     "    x = np.copy(x_init)\n",
109 |     "    errors = np.zeros(num_iter)\n",
110 |     "    mu = # fill in here\n",
111 |     "    for t in range(0, num_iter):\n",
112 |     "        # ***************************************************\n",
113 |     "        # INSERT YOUR CODE HERE\n",
114 |     "        # TODO: simulate probability distribution in random walk\n",
115 |     "        # ***************************************************\n",
116 |     "    return x, errors"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "Lets run our algorithm for 50 iterations and see at the final probability distribution."
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "x, errors = random_walk(G, x_init, num_iter=50)\n",
133 |     "plt.bar(np.arange(len(x)), x)\n",
134 |     "plt.xlabel(\"node\")\n",
135 |     "plt.ylabel(\"probability\")"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "We see that the final disctribution is indeed uniform. Lets now plot how fast did the algorithm converge. We will use logarithmic scale on y-axis to be able to distinguish between sublinear and linear rates."
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "plt.semilogy(errors)\n",
152 |     "plt.xlabel(\"iteration\")\n",
153 |     "plt.ylabel(\"$||x_{t} - mu||_2^2$\")"
154 |    ]
155 |   }
156 |  ],
157 |  "metadata": {
158 |   "anaconda-cloud": {},
159 |   "kernelspec": {
160 |    "display_name": "Python 3",
161 |    "language": "python",
162 |    "name": "python3"
163 |   },
164 |   "language_info": {
165 |    "codemirror_mode": {
166 |     "name": "ipython",
167 |     "version": 3
168 |    },
169 |    "file_extension": ".py",
170 |    "mimetype": "text/x-python",
171 |    "name": "python",
172 |    "nbconvert_exporter": "python",
173 |    "pygments_lexer": "ipython3",
174 |    "version": "3.7.4"
175 |   },
176 |   "widgets": {
177 |    "state": {
178 |     "d2b2c3aea192430e81437f33ba0b0e69": {
179 |      "views": [
180 |       {
181 |        "cell_index": 22
182 |       }
183 |      ]
184 |     },
185 |     "e4a6a7a70ccd42ddb112989c04f2ed3f": {
186 |      "views": [
187 |       {
188 |        "cell_index": 18
189 |       }
190 |      ]
191 |     }
192 |    },
193 |    "version": "1.2.0"
194 |   }
195 |  },
196 |  "nbformat": 4,
197 |  "nbformat_minor": 1
198 | }
199 | 


--------------------------------------------------------------------------------
/labs/ex04/template/torus_topology.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex04/template/torus_topology.png


--------------------------------------------------------------------------------
/labs/ex05/exercise05.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex05/exercise05.pdf


--------------------------------------------------------------------------------
/labs/ex05/solution/gradient_descent.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Lab 3.
 3 | 
 4 | Gradient descent
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | def calculate_mse(e):
10 |     """Calculate the mean squared error for vector e."""
11 |     return 1/2*np.mean(e**2)
12 | 
13 | def compute_gradient(b, A, x):
14 |     """Compute the gradient."""
15 |     err = b - A.dot(x)
16 |     grad = -A.T.dot(err) / len(err)
17 |     return grad, err
18 | 
19 | def gradient_descent(b, A, initial_x, max_iters, gamma):
20 |     """Gradient descent algorithm."""
21 |     # Define parameters to store x and objective func. values
22 |     xs = [initial_x]
23 |     objectives = []
24 |     x = initial_x
25 |     for n_iter in range(max_iters):
26 |         # compute objective, gradient
27 |         grad, err = compute_gradient(b, A, x)
28 |         obj = calculate_mse(err)
29 |         # update x by a gradient descent step
30 |         x = x - gamma * grad
31 |         # store x and objective function value
32 |         xs.append(x)
33 |         objectives.append(obj)
34 |         print("Gradient Descent({bi}/{ti}): objective={l}".format(
35 |               bi=n_iter, ti=max_iters - 1, l=obj))
36 | 
37 |     return objectives, xs


--------------------------------------------------------------------------------
/labs/ex05/solution/grid_search.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Exercise 2.
 3 | 
 4 | Grid Search
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | def generate_w(num_intervals):
11 |     """Generate a grid of values for w0 and w1."""
12 |     w0 = np.linspace(-100, 200, num_intervals)
13 |     w1 = np.linspace(-150, 150, num_intervals)
14 |     return w0, w1
15 | 
16 | 
17 | def grid_search(y, tx, w0, w1):
18 |     """Algorithm for grid search."""
19 |     losses = np.zeros((len(w0), len(w1)))
20 |     # compute loss for each combination of w0 and w1.
21 |     for ind_row, row in enumerate(w0):
22 |         for ind_col, col in enumerate(w1):
23 |             w = np.array([row, col])
24 |             e = y - tx.dot(w)
25 |             losses[ind_row, ind_col] = 1/2*np.mean(e**2)
26 |     return losses
27 | 
28 | 
29 | def get_best_parameters(w0, w1, losses):
30 |     """Get the best w from the result of grid search."""
31 |     min_row, min_col = np.unravel_index(np.argmin(losses), losses.shape)
32 |     return losses[min_row, min_col], w0[min_row], w1[min_col]
33 | 


--------------------------------------------------------------------------------
/labs/ex05/solution/helpers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """some helper functions."""
 3 | import numpy as np
 4 | 
 5 | 
 6 | def load_data(sub_sample=True, add_outlier=False):
 7 |     """Load data and convert it to the metric system."""
 8 |     path_dataset = "height_weight_genders.csv"
 9 |     data = np.genfromtxt(
10 |         path_dataset, delimiter=",", skip_header=1, usecols=[1, 2])
11 |     height = data[:, 0]
12 |     weight = data[:, 1]
13 |     gender = np.genfromtxt(
14 |         path_dataset, delimiter=",", skip_header=1, usecols=[0],
15 |         converters={0: lambda x: 0 if b"Male" in x else 1})
16 |     # Convert to metric system
17 |     height *= 0.025
18 |     weight *= 0.454
19 | 
20 |     # sub-sample
21 |     if sub_sample:
22 |         height = height[::50]
23 |         weight = weight[::50]
24 | 
25 |     if add_outlier:
26 |         # outlier experiment
27 |         height = np.concatenate([height, [1.1, 1.2]])
28 |         weight = np.concatenate([weight, [51.5/0.454, 55.3/0.454]])
29 | 
30 |     return height, weight, gender
31 | 
32 | 
33 | def standardize(x):
34 |     """Standardize the original data set."""
35 |     mean_x = np.mean(x,axis = 0)
36 |     x = x - mean_x
37 |     std_x = np.std(x, axis = 0)
38 |     x = x / std_x
39 |     return x, mean_x, std_x
40 | 
41 | 
42 | def build_model_data(height, weight):
43 |     """Form (y,tX) to get regression data in matrix form."""
44 |     y = weight
45 |     x = height
46 |     num_samples = len(y)
47 |     tx = np.c_[np.ones(num_samples), x]
48 |     return y, tx
49 | 
50 | 
51 | def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True):
52 |     """
53 |     Generate a minibatch iterator for a dataset.
54 |     Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
55 |     Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`.
56 |     Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
57 |     Example of use :
58 |     for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
59 |         <DO-SOMETHING>
60 |     """
61 |     data_size = len(y)
62 | 
63 |     if shuffle:
64 |         shuffle_indices = np.random.permutation(np.arange(data_size))
65 |         shuffled_y = y[shuffle_indices]
66 |         shuffled_tx = tx[shuffle_indices]
67 |     else:
68 |         shuffled_y = y
69 |         shuffled_tx = tx
70 |     for batch_num in range(num_batches):
71 |         start_index = batch_num * batch_size
72 |         end_index = min((batch_num + 1) * batch_size, data_size)
73 |         if start_index != end_index:
74 |             yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]
75 | 
76 | 


--------------------------------------------------------------------------------
/labs/ex05/solution/plots.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """function for plot."""
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from grid_search import get_best_parameters
 6 | 
 7 | 
 8 | def prediction(w0, w1, mean_x, std_x):
 9 |     """Get the regression line from the model."""
10 |     x = np.arange(1.2, 2, 0.01)
11 |     x_normalized = (x - mean_x) / std_x
12 |     return x, w0 + w1 * x_normalized
13 | 
14 | 
15 | def base_visualization(grid_losses, w0_list, w1_list,
16 |                        mean_x, std_x, height, weight):
17 |     """Base Visualization for both models."""
18 |     w0, w1 = np.meshgrid(w0_list, w1_list)
19 | 
20 |     fig = plt.figure()
21 | 
22 |     # plot contourf
23 |     ax1 = fig.add_subplot(1, 2, 1)
24 |     cp = ax1.contourf(w0, w1, grid_losses.T, cmap=plt.cm.jet)
25 |     fig.colorbar(cp, ax=ax1)
26 |     ax1.set_xlabel(r'$w_0$')
27 |     ax1.set_ylabel(r'$w_1$')
28 |     # put a marker at the minimum
29 |     loss_star, w0_star, w1_star = get_best_parameters(
30 |         w0_list, w1_list, grid_losses)
31 |     ax1.plot(w0_star, w1_star, marker='*', color='r', markersize=20)
32 | 
33 |     
34 | 
35 |     return fig
36 | 
37 | 
38 | def grid_visualization(grid_losses, w0_list, w1_list,
39 |                        mean_x, std_x, height, weight):
40 |     """Visualize how the trained model looks like under the grid search."""
41 |     fig = base_visualization(
42 |         grid_losses, w0_list, w1_list, mean_x, std_x, height, weight)
43 | 
44 |     loss_star, w0_star, w1_star = get_best_parameters(
45 |         w0_list, w1_list, grid_losses)
46 |     # plot prediciton
47 |     x, f = prediction(w0_star, w1_star, mean_x, std_x)
48 |     ax2 = fig.get_axes()[2]
49 |     ax2.plot(x, f, 'r')
50 | 
51 |     return fig
52 | 
53 | 
54 | def gradient_descent_visualization(
55 |         gradient_losses, gradient_ws,
56 |         grid_losses, grid_w0, grid_w1,
57 |         mean_x, std_x, height, weight, n_iter=None):
58 |     """Visualize how the loss value changes until n_iter."""
59 |     fig = base_visualization(
60 |         grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight)
61 | 
62 |     ws_to_be_plotted = np.stack(gradient_ws)
63 |     if n_iter is not None:
64 |         ws_to_be_plotted = ws_to_be_plotted[:n_iter]
65 | 
66 |     ax1 = fig.get_axes()[0]
67 |     ax1.plot(
68 |         ws_to_be_plotted[:, 0], ws_to_be_plotted[:, 1],
69 |         marker='o', color='w', markersize=10)
70 |     pred_x, pred_y = prediction(
71 |         ws_to_be_plotted[-1, 0], ws_to_be_plotted[-1, 1],
72 |         mean_x, std_x)
73 |     
74 |     return fig
75 | 


--------------------------------------------------------------------------------
/labs/ex05/solution05.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex05/solution05.pdf


--------------------------------------------------------------------------------
/labs/ex05/template/gradient_descent.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Lab 3.
 3 | 
 4 | Gradient descent
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | def calculate_mse(e):
10 |     """Calculate the mean squared error for vector e."""
11 |     # ***************************************************
12 |     # INSERT YOUR CODE HERE
13 |     # TODO: compute mean squared error
14 |     # ***************************************************
15 |     raise NotImplementedError
16 | 
17 | def compute_gradient(b, A, x):
18 |     """Compute the gradient."""
19 |     # ***************************************************
20 |     # INSERT YOUR CODE HERE
21 |     # TODO: compute gradient and objective
22 |     # ***************************************************
23 | 
24 |     return grad, err
25 | 
26 | def gradient_descent(b, A, initial_x, max_iters, gamma):
27 |     """Gradient descent algorithm."""
28 |     # Define parameters to store x and objective func. values
29 |     xs = [initial_x]
30 |     objectives = []
31 |     x = initial_x
32 |     for n_iter in range(max_iters):
33 |         # ***************************************************
34 |         # INSERT YOUR CODE HERE
35 |         # TODO: compute gradient and objective function
36 |         # ***************************************************
37 |         raise NotImplementedError
38 |         # ***************************************************
39 |         # INSERT YOUR CODE HERE
40 |         # TODO: update x by a gradient descent step
41 |         # ***************************************************
42 |         raise NotImplementedError
43 |         # store x and objective function value
44 |         xs.append(x)
45 |         objectives.append(obj)
46 |         print("Gradient Descent({bi}/{ti}): objective={l}".format(
47 |               bi=n_iter, ti=max_iters - 1, l=obj))
48 | 
49 |     return objectives, xs


--------------------------------------------------------------------------------
/labs/ex05/template/grid_search.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Exercise 2.
 3 | 
 4 | Grid Search
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | def generate_w(num_intervals):
11 |     """Generate a grid of values for w0 and w1."""
12 |     w0 = np.linspace(-100, 200, num_intervals)
13 |     w1 = np.linspace(-150, 150, num_intervals)
14 |     return w0, w1
15 | 
16 | 
17 | def grid_search(y, tx, w0, w1):
18 |     """Algorithm for grid search."""
19 |     losses = np.zeros((len(w0), len(w1)))
20 |     # compute loss for each combination of w0 and w1.
21 |     for ind_row, row in enumerate(w0):
22 |         for ind_col, col in enumerate(w1):
23 |             w = np.array([row, col])
24 |             e = y - tx.dot(w)
25 |             losses[ind_row, ind_col] = 1/2*np.mean(e**2)
26 |     return losses
27 | 
28 | 
29 | def get_best_parameters(w0, w1, losses):
30 |     """Get the best w from the result of grid search."""
31 |     min_row, min_col = np.unravel_index(np.argmin(losses), losses.shape)
32 |     return losses[min_row, min_col], w0[min_row], w1[min_col]
33 | 


--------------------------------------------------------------------------------
/labs/ex05/template/helpers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """some helper functions."""
 3 | import numpy as np
 4 | 
 5 | 
 6 | def load_data(sub_sample=True, add_outlier=False):
 7 |     """Load data and convert it to the metric system."""
 8 |     path_dataset = "height_weight_genders.csv"
 9 |     data = np.genfromtxt(
10 |         path_dataset, delimiter=",", skip_header=1, usecols=[1, 2])
11 |     height = data[:, 0]
12 |     weight = data[:, 1]
13 |     gender = np.genfromtxt(
14 |         path_dataset, delimiter=",", skip_header=1, usecols=[0],
15 |         converters={0: lambda x: 0 if b"Male" in x else 1})
16 |     # Convert to metric system
17 |     height *= 0.025
18 |     weight *= 0.454
19 | 
20 |     # sub-sample
21 |     if sub_sample:
22 |         height = height[::50]
23 |         weight = weight[::50]
24 | 
25 |     if add_outlier:
26 |         # outlier experiment
27 |         height = np.concatenate([height, [1.1, 1.2]])
28 |         weight = np.concatenate([weight, [51.5/0.454, 55.3/0.454]])
29 | 
30 |     return height, weight, gender
31 | 
32 | 
33 | def standardize(x):
34 |     """Standardize the original data set."""
35 |     mean_x = np.mean(x,axis = 0)
36 |     x = x - mean_x
37 |     std_x = np.std(x, axis = 0)
38 |     x = x / std_x
39 |     return x, mean_x, std_x
40 | 
41 | 
42 | def build_model_data(height, weight):
43 |     """Form (y,tX) to get regression data in matrix form."""
44 |     y = weight
45 |     x = height
46 |     num_samples = len(y)
47 |     tx = np.c_[np.ones(num_samples), x]
48 |     return y, tx
49 | 
50 | 
51 | def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True):
52 |     """
53 |     Generate a minibatch iterator for a dataset.
54 |     Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
55 |     Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`.
56 |     Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
57 |     Example of use :
58 |     for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
59 |         <DO-SOMETHING>
60 |     """
61 |     data_size = len(y)
62 | 
63 |     if shuffle:
64 |         shuffle_indices = np.random.permutation(np.arange(data_size))
65 |         shuffled_y = y[shuffle_indices]
66 |         shuffled_tx = tx[shuffle_indices]
67 |     else:
68 |         shuffled_y = y
69 |         shuffled_tx = tx
70 |     for batch_num in range(num_batches):
71 |         start_index = batch_num * batch_size
72 |         end_index = min((batch_num + 1) * batch_size, data_size)
73 |         if start_index != end_index:
74 |             yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]
75 | 
76 | 


--------------------------------------------------------------------------------
/labs/ex05/template/plots.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """function for plot."""
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from grid_search import get_best_parameters
 6 | 
 7 | 
 8 | def prediction(w0, w1, mean_x, std_x):
 9 |     """Get the regression line from the model."""
10 |     x = np.arange(1.2, 2, 0.01)
11 |     x_normalized = (x - mean_x) / std_x
12 |     return x, w0 + w1 * x_normalized
13 | 
14 | 
15 | def base_visualization(grid_losses, w0_list, w1_list,
16 |                        mean_x, std_x, height, weight):
17 |     """Base Visualization for both models."""
18 |     w0, w1 = np.meshgrid(w0_list, w1_list)
19 | 
20 |     fig = plt.figure()
21 | 
22 |     # plot contourf
23 |     ax1 = fig.add_subplot(1, 2, 1)
24 |     cp = ax1.contourf(w0, w1, grid_losses.T, cmap=plt.cm.jet)
25 |     fig.colorbar(cp, ax=ax1)
26 |     ax1.set_xlabel(r'$w_0$')
27 |     ax1.set_ylabel(r'$w_1$')
28 |     # put a marker at the minimum
29 |     loss_star, w0_star, w1_star = get_best_parameters(
30 |         w0_list, w1_list, grid_losses)
31 |     ax1.plot(w0_star, w1_star, marker='*', color='r', markersize=20)
32 | 
33 |     
34 | 
35 |     return fig
36 | 
37 | 
38 | def grid_visualization(grid_losses, w0_list, w1_list,
39 |                        mean_x, std_x, height, weight):
40 |     """Visualize how the trained model looks like under the grid search."""
41 |     fig = base_visualization(
42 |         grid_losses, w0_list, w1_list, mean_x, std_x, height, weight)
43 | 
44 |     loss_star, w0_star, w1_star = get_best_parameters(
45 |         w0_list, w1_list, grid_losses)
46 |     # plot prediciton
47 |     x, f = prediction(w0_star, w1_star, mean_x, std_x)
48 |     ax2 = fig.get_axes()[2]
49 |     ax2.plot(x, f, 'r')
50 | 
51 |     return fig
52 | 
53 | 
54 | def gradient_descent_visualization(
55 |         gradient_losses, gradient_ws,
56 |         grid_losses, grid_w0, grid_w1,
57 |         mean_x, std_x, height, weight, n_iter=None):
58 |     """Visualize how the loss value changes until n_iter."""
59 |     fig = base_visualization(
60 |         grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight)
61 | 
62 |     ws_to_be_plotted = np.stack(gradient_ws)
63 |     if n_iter is not None:
64 |         ws_to_be_plotted = ws_to_be_plotted[:n_iter]
65 | 
66 |     ax1 = fig.get_axes()[0]
67 |     ax1.plot(
68 |         ws_to_be_plotted[:, 0], ws_to_be_plotted[:, 1],
69 |         marker='o', color='w', markersize=10)
70 |     pred_x, pred_y = prediction(
71 |         ws_to_be_plotted[-1, 0], ws_to_be_plotted[-1, 1],
72 |         mean_x, std_x)
73 |     
74 |     return fig
75 | 


--------------------------------------------------------------------------------
/labs/ex06/exercise06.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex06/exercise06.pdf


--------------------------------------------------------------------------------
/labs/ex06/solution/helper.py:
--------------------------------------------------------------------------------
  1 | from math import inf
  2 | 
  3 | import matplotlib
  4 | import numpy as np
  5 | import sklearn.datasets
  6 | import torch
  7 | from matplotlib import pyplot as plt
  8 | from matplotlib.colors import LinearSegmentedColormap
  9 | from torch.utils import data
 10 | 
 11 | import sys, os
 12 | SEED = 0
 13 | 
 14 | # Disable printing while visualization
 15 | def disable_print():
 16 |     sys.stdout = open(os.devnull, 'w')
 17 | 
 18 | # Restore printing
 19 | def enable_print():
 20 |     sys.stdout = sys.__stdout__
 21 |     
 22 | def generate_dataset(name, n_samples=200):
 23 |     """
 24 |     Generate a random dataset with any of the predefined structures
 25 |     `blobs`, `moons`, `circles`, `bar`, or `xor`
 26 |     """
 27 |     # Use Scikit-Learn's make_* functions to generate the samples
 28 |     if name == "blobs":
 29 |         coordinates, labels = sklearn.datasets.make_blobs(n_samples=n_samples, centers=2, random_state = SEED)
 30 |     elif name == "moons":
 31 |         coordinates, labels = sklearn.datasets.make_moons(n_samples=n_samples, random_state = SEED)
 32 |         coordinates[labels == 1] += 0.1
 33 |         coordinates[labels == 0] -= 0.1
 34 |     elif name == "circles":
 35 |         coordinates, labels = sklearn.datasets.make_circles(n_samples=n_samples, random_state = SEED)
 36 |         coordinates[labels == 1] *= 0.5
 37 |     elif name == "bar":
 38 |         # coordinates = np.random.rand(n_samples, 2) * 2 - 1  # range -1 to 1
 39 | 
 40 |         x_coordinate, y_coordinate = np.meshgrid(
 41 |             np.linspace(-1, 1, 12, dtype=np.float32),
 42 |             np.linspace(-1, 1, 6, dtype=np.float32),
 43 |         )
 44 |         coordinates = np.stack([x_coordinate.reshape(-1), y_coordinate.reshape(-1)], axis=-1)
 45 |         n_samples = len(coordinates)
 46 | 
 47 |         l1norm = np.linalg.norm(coordinates, ord=inf, axis=1)
 48 |         labels = np.ones_like(l1norm).astype(np.int64)
 49 |         labels[np.abs(coordinates[:, 0]) < 0.1] = 0
 50 |     elif name == "xor":
 51 |         np.random.seed(SEED)
 52 |         coordinates = np.random.rand(n_samples, 2)
 53 | 
 54 |         # Create a small gap between the classes
 55 |         gap_size = 0
 56 |         coordinates[coordinates[:, 0] > 0.5, 0] += gap_size * 0.5
 57 |         coordinates[coordinates[:, 0] < 0.5, 0] -= gap_size * 0.5
 58 |         coordinates[coordinates[:, 1] > 0.5, 1] += gap_size * 0.5
 59 |         coordinates[coordinates[:, 1] < 0.5, 1] -= gap_size * 0.5
 60 | 
 61 |         labels = np.logical_xor(coordinates[:, 0] > 0.5, coordinates[:, 1] > 0.5).astype(np.int64)
 62 |         noisy_index = np.where(np.random.binomial(1, 0.1, size = len(coordinates)))[0]
 63 |         coordinates[noisy_index] += np.random.laplace(0, 0.1, [len(noisy_index), 2])
 64 | 
 65 |     else:
 66 |         raise ValueError("Unknown dataset name {}".format(name))
 67 | 
 68 |     # Convert to PyTorch
 69 |     coordinates = coordinates.astype(np.float32)
 70 |     coordinates = torch.from_numpy(coordinates)
 71 |     labels = torch.from_numpy(labels)
 72 | 
 73 |     # Normalize the range of coordinates to be 0 to 1
 74 |     coordinates -= torch.min(coordinates, 0, keepdim=True)[0]
 75 |     coordinates /= torch.max(coordinates, 0, keepdim=True)[0]
 76 | 
 77 |     # Create a PyTorch dataset
 78 |     dataset = data.TensorDataset(coordinates, labels)
 79 | 
 80 |     # Split it 50/50 into train and test
 81 |     train, test = torch.utils.data.random_split(dataset, [n_samples // 2, n_samples // 2])
 82 |     return train, test
 83 | 
 84 | def visualize_one_dataset(dataset: data.Dataset, ax: matplotlib.axes.Axes):
 85 |     for coordinate, label in dataset:
 86 |         x, y = coordinate
 87 |         color = {0: "#bada55", 1: "#55bada"}[label.item()]
 88 |         marker = {0: "+", 1: "."}[label.item()]
 89 |         ax.scatter(x, y, c=color, marker=marker)
 90 | 
 91 | 
 92 | def visualize_datasets(datasets):
 93 |     f, axes = plt.subplots(2, len(datasets))
 94 |     f.set_figheight(7)
 95 |     f.set_figwidth(14)
 96 |     axes[0][0].set_ylabel("Training")
 97 |     axes[1][0].set_ylabel("Test")
 98 |     for i, (name, train_set, test_set) in enumerate(datasets):
 99 |         visualize_one_dataset(train_set, ax=axes[0][i])
100 |         visualize_one_dataset(test_set, ax=axes[1][i])
101 |         axes[0][i].set_title(name)
102 |     plt.show()
103 | 
104 | #%% Visualize the predictions of a model on a grid
105 | def predict_grid(model, ax, xmin=-0.1, xmax=1.1, ymin=-0.1, ymax=1.1, num_grid_points=40):
106 |     x_coordinate, y_coordinate = np.meshgrid(
107 |         np.linspace(xmin, xmax, num_grid_points, dtype=np.float32),
108 |         np.linspace(ymin, ymax, num_grid_points, dtype=np.float32),
109 |     )
110 |     x_coordinate = torch.from_numpy(x_coordinate)
111 |     y_coordinate = torch.from_numpy(y_coordinate)
112 |     coordinates = torch.stack([x_coordinate.view(-1), y_coordinate.view(-1)], dim=-1)
113 |     predictions = torch.nn.functional.softmax(model(coordinates), dim=1)[:, 1]
114 | 
115 |     predictions = predictions.view(*x_coordinate.shape).detach()
116 |     cmap = LinearSegmentedColormap.from_list("bada55_dark", ["#4d5b23", "#234d5b"], N=100)
117 |     ax.pcolormesh(x_coordinate, y_coordinate, predictions, cmap=cmap)    
118 | 
119 | def visualize_predictions(datasets, model, optimize):
120 |     f, axes = plt.subplots(3, len(datasets))
121 |     f.set_figheight(10)
122 |     f.set_figwidth(14)
123 |     axes[0][0].set_ylabel("Training")
124 |     axes[1][0].set_ylabel("Test")
125 |     axes[2][0].set_ylabel("Test Loss")
126 |     for i, (name, train_set, test_set) in enumerate(datasets):        
127 |         axes[0][i].set_title(name + ' (%s)'% model.name)
128 |         # train model
129 |         model.init_params(train_set)
130 |         disable_print()
131 |         losses = optimize(train_set, test_set, model)
132 |         enable_print()
133 |         #plot results
134 |         predict_grid(model, ax=axes[1][i])
135 |         visualize_one_dataset(train_set, ax=axes[0][i])
136 |         visualize_one_dataset(test_set, ax=axes[1][i])
137 |         axes[2][i].plot(losses)
138 |         axes[2][i].set_ylim([0,1])
139 |     plt.show()


--------------------------------------------------------------------------------
/labs/ex06/solution/mixing_helpers.py:
--------------------------------------------------------------------------------
 1 | import networkx
 2 | import numpy as np
 3 | 
 4 | n_nodes = 16
 5 | def generate_torus_adj_matrix(n_nodes):
 6 |     G = networkx.generators.lattice.grid_2d_graph(int(np.sqrt(n_nodes)), int(np.sqrt(n_nodes)), periodic=True)
 7 |     # Adjacency matrix
 8 |     A = networkx.adjacency_matrix(G).toarray()
 9 | 
10 |     # Add self-loops
11 |     for i in range(0, A.shape[0]):
12 |         A[i][i] = 1
13 |     return A


--------------------------------------------------------------------------------
/labs/ex06/solution06.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex06/solution06.pdf


--------------------------------------------------------------------------------
/labs/ex06/template/Lab_6.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "try:\n",
 10 |     "    import google.colab\n",
 11 |     "    IN_COLAB = True\n",
 12 |     "except:\n",
 13 |     "    IN_COLAB = False\n",
 14 |     "if IN_COLAB:\n",
 15 |     "    # Clone the entire repo to access the files.\n",
 16 |     "    !git clone -l -s https://github.com/epfml/OptML_course.git cloned-repo\n",
 17 |     "    %cd cloned-repo/labs/ex06/template/"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "# Useful starting lines\n",
 27 |     "%matplotlib inline\n",
 28 |     "\n",
 29 |     "import numpy as np\n",
 30 |     "import torch\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "\n",
 33 |     "from helper import generate_dataset, visualize_one_dataset, visualize_datasets, predict_grid, visualize_predictions\n",
 34 |     "from torch.utils import data\n",
 35 |     "\n",
 36 |     "%load_ext autoreload\n",
 37 |     "%autoreload 2"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "# Generate data\n",
 45 |     "\n",
 46 |     "We provide a helper function which generates artificial datasets"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "NUM_SAMPLES = 100\n",
 56 |     "\n",
 57 |     "blobs_train, blobs_test = generate_dataset(\"blobs\", NUM_SAMPLES)\n",
 58 |     "moons_train, moons_test = generate_dataset(\"moons\", NUM_SAMPLES)\n",
 59 |     "xor_train, xor_test = generate_dataset(\"xor\", NUM_SAMPLES)\n",
 60 |     "squares_train, squares_test = generate_dataset(\"bar\", NUM_SAMPLES)\n",
 61 |     "\n",
 62 |     "# The generate_dataset function returns PyTorch dataset objects\n",
 63 |     "type(blobs_train), type(blobs_test)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "Let us plot the train and the test data sets. Note the differences between the two."
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "datasets = [\n",
 80 |     "    (\"Blobs\", blobs_train, blobs_test),\n",
 81 |     "    (\"Moons\", moons_train, moons_test),\n",
 82 |     "    (\"Bar\", squares_train, squares_test),\n",
 83 |     "    (\"XOR\", xor_train, xor_test)\n",
 84 |     "]\n",
 85 |     "\n",
 86 |     "visualize_datasets(datasets)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "# Optimizing using PyTorch\n",
 94 |     "\n",
 95 |     "Write an optimizer in PyTorch by taking using its default SGD class"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "def optimize(train_data, test_data, model, loss_fn = torch.nn.CrossEntropyLoss(), lr = 0.1):\n",
105 |     "    \"\"\"\n",
106 |     "    Stochastic Gradient Descent optimizer\n",
107 |     "    \n",
108 |     "    :param train_data: torch.utils.data.dataset.Subset\n",
109 |     "    :param test_data: torch.utils.data.dataset.Subset\n",
110 |     "    :param model: torch.nn.Module (see https://pytorch.org/docs/stable/nn.html)\n",
111 |     "    :param loss_fn: torch.nn.modules.loss (see https://pytorch.org/docs/stable/nn.html#id51)\n",
112 |     "    :param lr: float, learning rate\n",
113 |     "    \n",
114 |     "    :return:\n",
115 |     "    - objectives, a list of loss values on the test dataset, collected at the end of each pass over the dataset (epoch)\n",
116 |     "    \"\"\"\n",
117 |     "    # defatult pytorch functions which are useful for loading testing and training data\n",
118 |     "    train_loader = data.DataLoader(train_data, batch_size=10, shuffle=True)\n",
119 |     "    test_loader = data.DataLoader(test_data, batch_size=NUM_SAMPLES)\n",
120 |     "    losses = []\n",
121 |     "        \n",
122 |     "    # ***************************************************\n",
123 |     "    # INSERT YOUR CODE HERE\n",
124 |     "    # TODO: Define SGD optimizer with learning rate = lr\n",
125 |     "    # HINT: Use torch.optim.SGD and model.parameters()\n",
126 |     "    # ***************************************************\n",
127 |     "    optimizer = ?\n",
128 |     "    \n",
129 |     "    # Run SGD\n",
130 |     "    for epoch in range(1000):\n",
131 |     "        for minibatch, label in train_loader:\n",
132 |     "            \n",
133 |     "            optimizer.zero_grad() # Zeroes the previously computed gradients\n",
134 |     "            \n",
135 |     "            # ***************************************************\n",
136 |     "            # INSERT YOUR CODE HERE\n",
137 |     "            # TODO: prediction on minibatch\n",
138 |     "            # HINT: Use model.forward\n",
139 |     "            # ***************************************************\n",
140 |     "            prediction = ?\n",
141 |     "            \n",
142 |     "            # ***************************************************\n",
143 |     "            # INSERT YOUR CODE HERE\n",
144 |     "            # TODO: compute the loss on prediction\n",
145 |     "            # HINT: Use loss_fn\n",
146 |     "            # ***************************************************\n",
147 |     "            loss = ?\n",
148 |     "            \n",
149 |     "            # ***************************************************\n",
150 |     "            # INSERT YOUR CODE HERE\n",
151 |     "            # TODO: compute the minibatch gradient\n",
152 |     "            # HINT: Use loss.backward!\n",
153 |     "            # ***************************************************\n",
154 |     "            \n",
155 |     "            \n",
156 |     "            # ***************************************************\n",
157 |     "            # INSERT YOUR CODE HERE\n",
158 |     "            # TODO: perform an SGD step\n",
159 |     "            # HINT: Use optimizer.step!\n",
160 |     "            # ***************************************************\n",
161 |     "            \n",
162 |     "            \n",
163 |     "        # Compute the test loss\n",
164 |     "        for minibatch, label in test_loader:\n",
165 |     "            # we let torch know that we dont intend to call .backward\n",
166 |     "            with torch.no_grad():\n",
167 |     "                # ***************************************************\n",
168 |     "                # INSERT YOUR CODE HERE\n",
169 |     "                # TODO: compute the test prediction and test loss\n",
170 |     "                # ***************************************************\n",
171 |     "                loss = ?\n",
172 |     "                \n",
173 |     "                losses.append(loss.item())\n",
174 |     "                \n",
175 |     "                # Print the test loss to monitor progress\n",
176 |     "                if epoch % 100 == 0:\n",
177 |     "                    print(epoch, loss.item())\n",
178 |     "                \n",
179 |     "    return losses"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "# RBF Kernel\n",
187 |     "\n",
188 |     "An RBF kernel is the most commonly used `out of the box` kernel method for non-linear data. Intuitively, an RBF-kernel blurs the training data and uses this for classification i.e. the individual green and blue points above get blurred to make green and blue regions, which are used to make predictions. A critical parameter `sigma` defines the width of this blurring---large `sigma` results in more blurring.\n",
189 |     "\n",
190 |     "See [here](https://github.com/epfml/ML_course/blob/master/lectures/07/lecture07b_kernelRidge.pdf) for more information on the `kernel trick` and [here](https://www.cs.huji.ac.il/~shais/Lectures2014/lecture8.pdf) for an indepth mathematical treatment. Here, we will try develop an intuition for the RBF kernel."
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "class RadialBasisFunction(torch.nn.Module):\n",
200 |     "    def __init__(self, sigma=0.1):\n",
201 |     "        super().__init__()\n",
202 |     "        self.gamma = 1 / (2 * sigma ** 2)\n",
203 |     "        self.num_classes = 2\n",
204 |     "        self.name = 'RBF'\n",
205 |     "    \n",
206 |     "    def init_params(self, train_data):\n",
207 |     "        # data reshaping to do torch broadcasting magic\n",
208 |     "        data_matrix = train_data.dataset.tensors[0][train_data.indices, :]\n",
209 |     "        self.data_matrix = data_matrix.t().view(1, *data_matrix.t().shape)\n",
210 |     "        \n",
211 |     "        # ***************************************************\n",
212 |     "        # INSERT YOUR CODE HERE\n",
213 |     "        # TODO: initialize parameters alpha to 0\n",
214 |     "        # HINT: use torch.zeros\n",
215 |     "        # ***************************************************\n",
216 |     "        zeros = ?\n",
217 |     "        \n",
218 |     "        self.alpha = torch.nn.Parameter(zeros)\n",
219 |     "\n",
220 |     "    def forward(self, minibatch):\n",
221 |     "        minibatch = minibatch.view(*minibatch.shape, 1)\n",
222 |     "        K = torch.exp(\n",
223 |     "            -self.gamma * torch.sum((self.data_matrix - minibatch) ** 2, dim=1, keepdim=True)\n",
224 |     "        ).squeeze()\n",
225 |     "        return K @ self.alpha\n"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "markdown",
230 |    "metadata": {},
231 |    "source": [
232 |     "Try you code on the blobs data set. Your test loss should be around 0.1 by the end."
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "metadata": {},
239 |    "outputs": [],
240 |    "source": [
241 |     "rbf_model = RadialBasisFunction(0.5)\n",
242 |     "rbf_model.init_params(blobs_train)\n",
243 |     "rbf_blob_losses = optimize(blobs_train, blobs_test, rbf_model)\n",
244 |     "\n",
245 |     "plt.plot(rbf_blob_losses)\n",
246 |     "plt.xlabel('Epoch')\n",
247 |     "plt.ylabel('Test Loss')\n",
248 |     "plt.show()"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "markdown",
253 |    "metadata": {},
254 |    "source": [
255 |     "Plot the training data points and the predictions made"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {},
262 |    "outputs": [],
263 |    "source": [
264 |     "ax = plt.gca()\n",
265 |     "predict_grid(rbf_model, ax)\n",
266 |     "visualize_one_dataset(blobs_train, ax)"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "markdown",
271 |    "metadata": {},
272 |    "source": [
273 |     "## Effect of alpha"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "metadata": {},
279 |    "source": [
280 |     "Plot the results while varying the value of alpha in [0.1, 0.5, 1]. What do you observe? Which is the best value?\n"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": null,
286 |    "metadata": {},
287 |    "outputs": [],
288 |    "source": [
289 |     "rbf_model = RadialBasisFunction(0.1)\n",
290 |     "visualize_predictions(datasets, rbf_model, optimize)"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "# Neural Networks\n",
298 |     "\n",
299 |     "We will create a simple 2 layer neural network using the default functions provided by PyTorch"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {},
306 |    "outputs": [],
307 |    "source": [
308 |     "class NeuralNetwork(torch.nn.Sequential):\n",
309 |     "    def __init__(self, hidden_layer_size):\n",
310 |     "        self.name = 'NN'\n",
311 |     "        self.num_classes = 2\n",
312 |     "        # ***************************************************\n",
313 |     "        # INSERT YOUR CODE HERE\n",
314 |     "        # TODO: Define your neural network model with ReLU\n",
315 |     "        # HINT: Use torch.nn.Sequential and torch.nn.ReLU\n",
316 |     "        # ***************************************************\n",
317 |     "        super().__init__(\n",
318 |     "            ?\n",
319 |     "        )\n",
320 |     "        \n",
321 |     "    def init_params(self, train_data):\n",
322 |     "        ''' No need to do anything since it is taken care of by torch.nn.Sequential'''"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "markdown",
327 |    "metadata": {},
328 |    "source": [
329 |     "Plot the results while varying the size of the hidden layer in [20, 200, 1000]\n",
330 |     "\n",
331 |     "What do you observe?"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": null,
337 |    "metadata": {},
338 |    "outputs": [],
339 |    "source": [
340 |     "nn_model = NeuralNetwork(200)\n",
341 |     "visualize_predictions(datasets, nn_model, optimize)"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "markdown",
346 |    "metadata": {},
347 |    "source": [
348 |     "Also try increase the number of layers. How does this effect the classifier learnt?\n",
349 |     "\n",
350 |     "[This](https://playground.tensorflow.org/) is a cool website where you can play around more with training of neural networks on toy datasets."
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "markdown",
355 |    "metadata": {
356 |     "collapsed": true
357 |    },
358 |    "source": [
359 |     "## Momentum in training neural networks\n",
360 |     "\n",
361 |     "In the lecture, we have seen how Nesterov momentum can accelerate gradient descent on convex functions. Let's now explore if similar benefits can be achieved on non-convex functions (neural networks in this case) and for *stochastic* gradient descent. It is important to note that theory usually describes convergence on the training objective. In a typical machine learning setting, however, we care about loss on _unseen_ data. For that reason, here, we will always look at loss on the test set.\n",
362 |     "\n",
363 |     "__Exercise__<br> Add an argument `momentum` to your function `optimize` completed before. Hint: `torch.optim.SGD` also has a `momentum` argument that you can use. This implements heavy ball momentum, which is similar to, but [slightly different](https://dominikschmidt.xyz/nesterov-momentum/) from Nesterov momentum. You can try either variant, but they should have a similar effect."
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "markdown",
368 |    "metadata": {},
369 |    "source": [
370 |     "### Hyper-parameter tuning\n",
371 |     "\n",
372 |     "You will be comparing the SGD optimizers without momentum and with momentum of 0.9 (a common value). To do this fairly, you need to find good learning rates for either variant."
373 |    ]
374 |   },
375 |   {
376 |    "cell_type": "markdown",
377 |    "metadata": {},
378 |    "source": [
379 |     "__Exercise__<br>\n",
380 |     "Find a good learning rate for SGD without momentum. You can try different learning rates on an exponential grid (i.e. 0.2, 0.4, 0.8, ...), and record the best test loss in each experiment with `np.min(losses)`. Use `NeuralNetwork(200)` as a model, and use this data:"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "code",
385 |    "execution_count": null,
386 |    "metadata": {},
387 |    "outputs": [],
388 |    "source": [
389 |     "dataset = datasets[1]  # and use this dataset, you can try others as well\n",
390 |     "dataset_name = dataset[0]\n",
391 |     "train_data = dataset[1]\n",
392 |     "test_data = dataset[2]\n",
393 |     "dataset_name"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": null,
399 |    "metadata": {},
400 |    "outputs": [],
401 |    "source": [
402 |     "plt.xlabel(\"Learning rate\")\n",
403 |     "plt.ylabel(\"Best loss seen\")\n",
404 |     "\n",
405 |     "best_loss = None\n",
406 |     "best_learning_rate = None\n",
407 |     "\n",
408 |     "grid = ### TODO, find a reasonable range, of values to try\n",
409 |     "print(\"Learning rates to try:\", grid)\n",
410 |     "\n",
411 |     "for learning_rate in grid:\n",
412 |     "    model = NeuralNetwork(200)\n",
413 |     "    test_losses = optimize(train_data, test_data, model, lr=learning_rate)\n",
414 |     "    best_loss_achieved = ### TODO\n",
415 |     "    plt.scatter(learning_rate, best_loss_achieved)\n",
416 |     "    if ### TODO:\n",
417 |     "        best_loss = best_loss_achieved\n",
418 |     "        best_learning_rate = learning_rate"
419 |    ]
420 |   },
421 |   {
422 |    "cell_type": "code",
423 |    "execution_count": null,
424 |    "metadata": {},
425 |    "outputs": [],
426 |    "source": [
427 |     "f\"The learning rate {best_learning_rate} worked well for SGD without momentum.\""
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "code",
432 |    "execution_count": null,
433 |    "metadata": {},
434 |    "outputs": [],
435 |    "source": [
436 |     "plt.xlabel(\"Learning rate\")\n",
437 |     "plt.ylabel(\"Best loss seen\")\n",
438 |     "\n",
439 |     "best_loss = None\n",
440 |     "best_learning_rate = None\n",
441 |     "\n",
442 |     "# The grid is chosen by trial and error in this case, \n",
443 |     "grid = ### TODO, find a reasonable range, of values to try\n",
444 |     "print(\"Learning rates to try:\", grid)\n",
445 |     "\n",
446 |     "for learning_rate in grid:\n",
447 |     "    model = NeuralNetwork(200)\n",
448 |     "    test_losses = optimize(train_data, test_data, model, lr=learning_rate, momentum=0.9)\n",
449 |     "    best_loss_achieved = ### TODO\n",
450 |     "    plt.scatter(learning_rate, best_loss_achieved)\n",
451 |     "    if ### TODO:\n",
452 |     "        best_loss = best_loss_achieved\n",
453 |     "        best_learning_rate = learning_rate"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": null,
459 |    "metadata": {},
460 |    "outputs": [],
461 |    "source": [
462 |     "f\"The learning rate {best_learning_rate} worked well for SGD with 0.9 momentum.\""
463 |    ]
464 |   },
465 |   {
466 |    "cell_type": "markdown",
467 |    "metadata": {},
468 |    "source": [
469 |     "### Comparing the optimizers\n",
470 |     "\n",
471 |     "__Exercise__<br>\n",
472 |     "Plot test loss curves for both optimizers with the best learning rates you found for each. Do you see a benefit of momentum? How stable are the improvements over different datasets or different initializations? Can you think of ways to make the hyperparameter search procedure more stable?"
473 |    ]
474 |   },
475 |   {
476 |    "cell_type": "code",
477 |    "execution_count": null,
478 |    "metadata": {},
479 |    "outputs": [],
480 |    "source": [
481 |     "model = NeuralNetwork(200)  # reinitialize the model\n",
482 |     "plt.plot(optimize(train_data, test_data, model, lr=### TODO, momentum=0.9), label=\"SGD with 0.9 momentum\")\n",
483 |     "\n",
484 |     "model = NeuralNetwork(200)  # reinitialize the model\n",
485 |     "plt.plot(optimize(train_data, test_data, model, lr=### TODO, momentum=0.0), label=\"SGD\")\n",
486 |     "\n",
487 |     "plt.ylabel(\"Test loss\")\n",
488 |     "plt.xlabel(\"Epochs\")\n",
489 |     "plt.ylim([0, 1])\n",
490 |     "plt.legend();"
491 |    ]
492 |   },
493 |   {
494 |    "cell_type": "code",
495 |    "execution_count": null,
496 |    "metadata": {},
497 |    "outputs": [],
498 |    "source": []
499 |   }
500 |  ],
501 |  "metadata": {
502 |   "kernelspec": {
503 |    "display_name": "Python 3",
504 |    "language": "python",
505 |    "name": "python3"
506 |   },
507 |   "language_info": {
508 |    "codemirror_mode": {
509 |     "name": "ipython",
510 |     "version": 3
511 |    },
512 |    "file_extension": ".py",
513 |    "mimetype": "text/x-python",
514 |    "name": "python",
515 |    "nbconvert_exporter": "python",
516 |    "pygments_lexer": "ipython3",
517 |    "version": "3.7.4"
518 |   },
519 |   "toc": {
520 |    "base_numbering": 1,
521 |    "nav_menu": {},
522 |    "number_sections": true,
523 |    "sideBar": true,
524 |    "skip_h1_title": false,
525 |    "title_cell": "Table of Contents",
526 |    "title_sidebar": "Contents",
527 |    "toc_cell": false,
528 |    "toc_position": {},
529 |    "toc_section_display": true,
530 |    "toc_window_display": false
531 |   }
532 |  },
533 |  "nbformat": 4,
534 |  "nbformat_minor": 2
535 | }
536 | 


--------------------------------------------------------------------------------
/labs/ex06/template/helper.py:
--------------------------------------------------------------------------------
  1 | from math import inf
  2 | 
  3 | import matplotlib
  4 | import numpy as np
  5 | import sklearn.datasets
  6 | import torch
  7 | from matplotlib import pyplot as plt
  8 | from matplotlib.colors import LinearSegmentedColormap
  9 | from torch.utils import data
 10 | 
 11 | import sys, os
 12 | SEED = 0
 13 | 
 14 | # Disable printing while visualization
 15 | def disable_print():
 16 |     sys.stdout = open(os.devnull, 'w')
 17 | 
 18 | # Restore printing
 19 | def enable_print():
 20 |     sys.stdout = sys.__stdout__
 21 |     
 22 | def generate_dataset(name, n_samples=200):
 23 |     """
 24 |     Generate a random dataset with any of the predefined structures
 25 |     `blobs`, `moons`, `circles`, `bar`, or `xor`
 26 |     """
 27 |     # Use Scikit-Learn's make_* functions to generate the samples
 28 |     if name == "blobs":
 29 |         coordinates, labels = sklearn.datasets.make_blobs(n_samples=n_samples, centers=2, random_state = SEED)
 30 |     elif name == "moons":
 31 |         coordinates, labels = sklearn.datasets.make_moons(n_samples=n_samples, random_state = SEED)
 32 |         coordinates[labels == 1] += 0.1
 33 |         coordinates[labels == 0] -= 0.1
 34 |     elif name == "circles":
 35 |         coordinates, labels = sklearn.datasets.make_circles(n_samples=n_samples, random_state = SEED)
 36 |         coordinates[labels == 1] *= 0.5
 37 |     elif name == "bar":
 38 |         # coordinates = np.random.rand(n_samples, 2) * 2 - 1  # range -1 to 1
 39 | 
 40 |         x_coordinate, y_coordinate = np.meshgrid(
 41 |             np.linspace(-1, 1, 12, dtype=np.float32),
 42 |             np.linspace(-1, 1, 6, dtype=np.float32),
 43 |         )
 44 |         coordinates = np.stack([x_coordinate.reshape(-1), y_coordinate.reshape(-1)], axis=-1)
 45 |         n_samples = len(coordinates)
 46 | 
 47 |         l1norm = np.linalg.norm(coordinates, ord=inf, axis=1)
 48 |         labels = np.ones_like(l1norm).astype(np.int64)
 49 |         labels[np.abs(coordinates[:, 0]) < 0.1] = 0
 50 |     elif name == "xor":
 51 |         np.random.seed(SEED)
 52 |         coordinates = np.random.rand(n_samples, 2)
 53 | 
 54 |         # Create a small gap between the classes
 55 |         gap_size = 0
 56 |         coordinates[coordinates[:, 0] > 0.5, 0] += gap_size * 0.5
 57 |         coordinates[coordinates[:, 0] < 0.5, 0] -= gap_size * 0.5
 58 |         coordinates[coordinates[:, 1] > 0.5, 1] += gap_size * 0.5
 59 |         coordinates[coordinates[:, 1] < 0.5, 1] -= gap_size * 0.5
 60 | 
 61 |         labels = np.logical_xor(coordinates[:, 0] > 0.5, coordinates[:, 1] > 0.5).astype(np.int64)
 62 |         noisy_index = np.where(np.random.binomial(1, 0.1, size = len(coordinates)))[0]
 63 |         coordinates[noisy_index] += np.random.laplace(0, 0.1, [len(noisy_index), 2])
 64 | 
 65 |     else:
 66 |         raise ValueError("Unknown dataset name {}".format(name))
 67 | 
 68 |     # Convert to PyTorch
 69 |     coordinates = coordinates.astype(np.float32)
 70 |     coordinates = torch.from_numpy(coordinates)
 71 |     labels = torch.from_numpy(labels)
 72 | 
 73 |     # Normalize the range of coordinates to be 0 to 1
 74 |     coordinates -= torch.min(coordinates, 0, keepdim=True)[0]
 75 |     coordinates /= torch.max(coordinates, 0, keepdim=True)[0]
 76 | 
 77 |     # Create a PyTorch dataset
 78 |     dataset = data.TensorDataset(coordinates, labels)
 79 | 
 80 |     # Split it 50/50 into train and test
 81 |     train, test = torch.utils.data.random_split(dataset, [n_samples // 2, n_samples // 2])
 82 |     return train, test
 83 | 
 84 | def visualize_one_dataset(dataset: data.Dataset, ax: matplotlib.axes.Axes):
 85 |     for coordinate, label in dataset:
 86 |         x, y = coordinate
 87 |         color = {0: "#bada55", 1: "#55bada"}[label.item()]
 88 |         marker = {0: "+", 1: "."}[label.item()]
 89 |         ax.scatter(x, y, c=color, marker=marker)
 90 | 
 91 | 
 92 | def visualize_datasets(datasets):
 93 |     f, axes = plt.subplots(2, len(datasets))
 94 |     f.set_figheight(7)
 95 |     f.set_figwidth(14)
 96 |     axes[0][0].set_ylabel("Training")
 97 |     axes[1][0].set_ylabel("Test")
 98 |     for i, (name, train_set, test_set) in enumerate(datasets):
 99 |         visualize_one_dataset(train_set, ax=axes[0][i])
100 |         visualize_one_dataset(test_set, ax=axes[1][i])
101 |         axes[0][i].set_title(name)
102 |     plt.show()
103 | 
104 | #%% Visualize the predictions of a model on a grid
105 | def predict_grid(model, ax, xmin=-0.1, xmax=1.1, ymin=-0.1, ymax=1.1, num_grid_points=40):
106 |     x_coordinate, y_coordinate = np.meshgrid(
107 |         np.linspace(xmin, xmax, num_grid_points, dtype=np.float32),
108 |         np.linspace(ymin, ymax, num_grid_points, dtype=np.float32),
109 |     )
110 |     x_coordinate = torch.from_numpy(x_coordinate)
111 |     y_coordinate = torch.from_numpy(y_coordinate)
112 |     coordinates = torch.stack([x_coordinate.view(-1), y_coordinate.view(-1)], dim=-1)
113 |     predictions = torch.nn.functional.softmax(model(coordinates), dim=1)[:, 1]
114 | 
115 |     predictions = predictions.view(*x_coordinate.shape).detach()
116 |     cmap = LinearSegmentedColormap.from_list("bada55_dark", ["#4d5b23", "#234d5b"], N=100)
117 |     ax.pcolormesh(x_coordinate, y_coordinate, predictions, cmap=cmap)    
118 | 
119 | def visualize_predictions(datasets, model, optimize):
120 |     f, axes = plt.subplots(3, len(datasets))
121 |     f.set_figheight(10)
122 |     f.set_figwidth(14)
123 |     axes[0][0].set_ylabel("Training")
124 |     axes[1][0].set_ylabel("Test")
125 |     axes[2][0].set_ylabel("Test Loss")
126 |     for i, (name, train_set, test_set) in enumerate(datasets):        
127 |         axes[0][i].set_title(name + ' (%s)'% model.name)
128 |         # train model
129 |         model.init_params(train_set)
130 |         disable_print()
131 |         losses = optimize(train_set, test_set, model)
132 |         enable_print()
133 |         #plot results
134 |         predict_grid(model, ax=axes[1][i])
135 |         visualize_one_dataset(train_set, ax=axes[0][i])
136 |         visualize_one_dataset(test_set, ax=axes[1][i])
137 |         axes[2][i].plot(losses)
138 |         axes[2][i].set_ylim([0,1])
139 |     plt.show()


--------------------------------------------------------------------------------
/labs/ex07/exercise07.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex07/exercise07.pdf


--------------------------------------------------------------------------------
/labs/ex07/solution07.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex07/solution07.pdf


--------------------------------------------------------------------------------
/labs/ex07/template/Lab 7 - Fixed Point with Newton.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# Useful starting lines\n",
 10 |     "%matplotlib inline\n",
 11 |     "import numpy as np\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "%load_ext autoreload\n",
 14 |     "%autoreload 2"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "# Fixed point interation\n",
 22 |     "\n",
 23 |     "In numerous applications, we encounter the task of solving equations of the form $$x = g(x)$$\n",
 24 |     "for a continuous function $g$. In lab 03 we saw one simple method to solve such problems: $$x_{t+1} = g(x_t)\\,.$$\n",
 25 |     "We solved two apparently similar equations $x = log(1+x)$ and $x = log(2+x)$, with showed very different convergence.\n",
 26 |     "\n",
 27 |     "## Newton steps\n",
 28 |     "\n",
 29 |     "This week's task is to adapt the iterative algorithm to use Newton-style steps. Like in lab 03, we can do this by expressing the update step as a gradient-descent update and computing its second derivative.\n",
 30 |     "\n"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "## Plot $g$"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "Let us see how the two functions look over an interval $[0,2]$."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "x = np.arange(0, 2, 0.0001)\n",
 54 |     "y1 = np.log(1 + x)\n",
 55 |     "y2 = np.log(2 + x)\n",
 56 |     "fig = plt.figure()\n",
 57 |     "plt.plot(x, x, label='x')\n",
 58 |     "plt.plot(x, y1, label='$\\log(1 + x)$')\n",
 59 |     "plt.plot(x, y2, label='$\\log(2 + x)$')\n",
 60 |     "plt.grid(linestyle=':')\n",
 61 |     "plt.axhline(0, color='black')\n",
 62 |     "plt.axvline(0, color='black')\n",
 63 |     "plt.legend()"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "Please fill in the functions `fixed_point_newton` below:"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "def fixed_point_newton(initial_x, max_iters, objective, objective_grad):\n",
 80 |     "    \"\"\"Compute the fixed point.\"\"\"\n",
 81 |     "    # Define parameters to store x and objective func. values\n",
 82 |     "    xs = []\n",
 83 |     "    errors = []\n",
 84 |     "    x = initial_x\n",
 85 |     "    for n_iter in range(max_iters):\n",
 86 |     "        # compute objective and error\n",
 87 |     "        obj = objective(x)\n",
 88 |     "        error = np.abs(x - obj)\n",
 89 |     "        # store x and error\n",
 90 |     "        xs.append(x)\n",
 91 |     "        errors.append(error)\n",
 92 |     "        \n",
 93 |     "        ########################\n",
 94 |     "        # @TODO Insert your code here\n",
 95 |     "        # UPDATE x with a Newton step\n",
 96 |     "        ########################\n",
 97 |     "        \n",
 98 |     "        # print the current error\n",
 99 |     "        if n_iter % 10 == 0: \n",
100 |     "            print(\"Fixed point: iteration ={i}, x = {x:.2e}, error={err:.2e}\".format(i=n_iter, x=x, err=error))\n",
101 |     "    return errors, xs\n",
102 |     "\n",
103 |     "def fixed_point(initial_x, max_iters, objective):\n",
104 |     "    \"\"\"Compute the fixed point.\"\"\"\n",
105 |     "    # Define parameters to store x and objective func. values\n",
106 |     "    xs = []\n",
107 |     "    errors = []\n",
108 |     "    x = initial_x\n",
109 |     "    for n_iter in range(max_iters):\n",
110 |     "        # compute objective and error\n",
111 |     "        obj = objective(x)\n",
112 |     "        error = np.abs(x - obj)\n",
113 |     "        # store x and error\n",
114 |     "        xs.append(x)\n",
115 |     "        errors.append(error)\n",
116 |     "        # update x \n",
117 |     "        x = obj\n",
118 |     "        # print the current error\n",
119 |     "        if n_iter % 10 == 0: \n",
120 |     "            print(\"Fixed point: iteration ={i}, x = {x:.2e}, error={err:.2e}\".format(i=n_iter, x=x, err=error))\n",
121 |     "    return errors, xs"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "Let's test the implementations and compare it to the original algorithm from lab 03:"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "# Define the parameters of the algorithm.\n",
138 |     "max_iters = 100\n",
139 |     "\n",
140 |     "# Initialization\n",
141 |     "initial_x = 1\n",
142 |     "\n",
143 |     "# Run fixed point.\n",
144 |     "errors_func1, xs_func1 = fixed_point(\n",
145 |     "    initial_x, \n",
146 |     "    max_iters, \n",
147 |     "    lambda x: np.log(1 + x)\n",
148 |     ")\n",
149 |     "\n",
150 |     "errors_func1_newton, xs_func1_newton = fixed_point_newton(\n",
151 |     "    initial_x, \n",
152 |     "    max_iters, \n",
153 |     "    lambda x: np.log(1 + x), \n",
154 |     "    lambda x: 1./(1. + x)\n",
155 |     ")\n"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "Run your implementation on the second function"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "# Define the parameters of the algorithm.\n",
172 |     "max_iters = 100\n",
173 |     "\n",
174 |     "# Initialization\n",
175 |     "initial_x = 1\n",
176 |     "\n",
177 |     "# Run fixed point.\n",
178 |     "errors_func2, xs_func2 = fixed_point(\n",
179 |     "    initial_x, \n",
180 |     "    max_iters, \n",
181 |     "    lambda x: np.log(2 + x)\n",
182 |     ")\n",
183 |     "\n",
184 |     "errors_func2_newton, xs_func2_newton = fixed_point_newton(\n",
185 |     "    initial_x, \n",
186 |     "    max_iters, \n",
187 |     "    lambda x: np.log(2 + x), \n",
188 |     "    lambda x: 1./(2. + x)\n",
189 |     ")\n"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "metadata": {},
195 |    "source": [
196 |     "**Plotting error values**"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "plt.semilogy()\n",
206 |     "plt.xlabel('Number of steps')\n",
207 |     "plt.ylabel('Value of Error')\n",
208 |     "#plt.yscale(\"log\")\n",
209 |     "plt.plot(range(len(errors_func1)), errors_func1, label='$log(1 + x)$')\n",
210 |     "plt.plot(range(len(errors_func2)), errors_func2, label='$log(2 + x)$')\n",
211 |     "plt.plot(range(len(errors_func1_newton)), errors_func1_newton, label='$log(1 + x)$ (Newton)')\n",
212 |     "plt.plot(range(len(errors_func2_newton)), errors_func2_newton, label='$log(2 + x)$ (Newton)')\n",
213 |     "\n",
214 |     "plt.legend()\n",
215 |     "plt.show()"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "What do you observe about the rates of convergence of the two methods? Can you explain this difference?"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": []
231 |   }
232 |  ],
233 |  "metadata": {
234 |   "anaconda-cloud": {},
235 |   "kernelspec": {
236 |    "display_name": "Python 3 (ipykernel)",
237 |    "language": "python",
238 |    "name": "python3"
239 |   },
240 |   "language_info": {
241 |    "codemirror_mode": {
242 |     "name": "ipython",
243 |     "version": 3
244 |    },
245 |    "file_extension": ".py",
246 |    "mimetype": "text/x-python",
247 |    "name": "python",
248 |    "nbconvert_exporter": "python",
249 |    "pygments_lexer": "ipython3",
250 |    "version": "3.10.11"
251 |   },
252 |   "widgets": {
253 |    "state": {
254 |     "d2b2c3aea192430e81437f33ba0b0e69": {
255 |      "views": [
256 |       {
257 |        "cell_index": 22
258 |       }
259 |      ]
260 |     },
261 |     "e4a6a7a70ccd42ddb112989c04f2ed3f": {
262 |      "views": [
263 |       {
264 |        "cell_index": 18
265 |       }
266 |      ]
267 |     }
268 |    },
269 |    "version": "1.2.0"
270 |   }
271 |  },
272 |  "nbformat": 4,
273 |  "nbformat_minor": 4
274 | }
275 | 


--------------------------------------------------------------------------------
/labs/ex08/exercise08.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex08/exercise08.pdf


--------------------------------------------------------------------------------
/labs/ex08/solution/solution-svm-derivation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex08/solution/solution-svm-derivation.pdf


--------------------------------------------------------------------------------
/labs/ex08/solution08.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex08/solution08.pdf


--------------------------------------------------------------------------------
/labs/ex08/template/Lab_8.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from collections import defaultdict\n",
 10 |     "import numpy as np\n",
 11 |     "import scipy\n",
 12 |     "import scipy.sparse as sps\n",
 13 |     "import math\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import time\n",
 16 |     "from sklearn.datasets import load_svmlight_file\n",
 17 |     "import random\n",
 18 |     "%matplotlib inline"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {
 24 |     "collapsed": true
 25 |    },
 26 |    "source": [
 27 |     "# Support Vector Machines\n",
 28 |     "## Classification Using SVM\n",
 29 |     "Load dataset. We will use w1a dataset from LibSVM datasets https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "The original optimization problem for the Support Vector Machine (SVM) is given by\n",
 37 |     "\\begin{equation}\\label{eq:primal}\n",
 38 |     "  \\min_{w \\in R^d} \\  \\sum_{i=1}^n \\ell(y_i A_i^\\top w) + \\frac\\lambda2 \\|w\\|^2\n",
 39 |     "\\end{equation}\n",
 40 |     "where $\\ell : R\\rightarrow R$, $\\ell(z) := \\max\\{0,1-z\\}$ is the hinge loss function.\n",
 41 |     "Here for any $i$, $1\\le i\\le n$, the vector $A_i\\in R^d$ is the $i$-th data example, and $y_i\\in\\{\\pm1\\}$ is the corresponding label.\n",
 42 |     "  \n",
 43 |     "The dual optimization problem for the SVM is given by \n",
 44 |     "\\begin{equation}\\label{eq:dual}\n",
 45 |     " \\max_{\\boldsymbol{\\alpha} \\in R^n } \\  \\alpha^\\top\\boldsymbol{1} - \\tfrac1{2\\lambda} \\alpha^\\top Y A A^\\top Y\\alpha\n",
 46 |     " \\text{    such that    $0\\le \\alpha_i \\le 1  \\ \\forall i$}\n",
 47 |     "\\end{equation}\n",
 48 |     "where $Y := \\mathop{diag}(y)$, and $A\\in R^{n \\times d}$ again collects all $n$ data examples as its columns. \n",
 49 |     "\n",
 50 |     "Note that $w$ can be derived from $\\alpha$ as\n",
 51 |     "\\begin{equation}\n",
 52 |     "    w(\\alpha) = \\frac{1}{\\lambda} A^\\top Y \\alpha.\n",
 53 |     "\\end{equation}"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "DATA_TRAIN_PATH = 'data/w1a'\n",
 63 |     "\n",
 64 |     "A, y = load_svmlight_file(DATA_TRAIN_PATH)\n",
 65 |     "A = A.toarray()\n",
 66 |     "print(y.shape, A.shape)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "## Prepare cost and prediction functions"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "def calculate_primal_objective(y, A, w, lambda_):\n",
 83 |     "    \"\"\"\n",
 84 |     "    Compute the full cost (the primal objective), that is loss plus regularizer.\n",
 85 |     "    y: +1 or -1 labels, shape = (num_examples)\n",
 86 |     "    A: Dataset matrix, shape = (num_examples, num_features)\n",
 87 |     "    w: Model weights, shape = (num_features)\n",
 88 |     "    return: scalar value\n",
 89 |     "    \"\"\"\n",
 90 |     "    # ***************************************************\n",
 91 |     "    # INSERT YOUR CODE HERE\n",
 92 |     "    # TODO\n",
 93 |     "    # ***************************************************\n",
 94 |     "    raise NotImplementedError"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "def calculate_accuracy(y, A, w):\n",
104 |     "    \"\"\"\n",
105 |     "    Compute the training accuracy on the training set (can be called for test set as well).\n",
106 |     "    y: +1 or -1 labels, shape = (num_examples)\n",
107 |     "    A: Dataset matrix, shape = (num_examples, num_features)\n",
108 |     "    w: Model weights, shape = (num_features)\n",
109 |     "    return: scalar value\n",
110 |     "    \"\"\"\n",
111 |     "    # ***************************************************\n",
112 |     "    # INSERT YOUR CODE HERE\n",
113 |     "    # TODO\n",
114 |     "    # ***************************************************\n",
115 |     "    raise NotImplementedError"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "## Coordinate Descent (Ascent) for SVM"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "Compute the closed-form update for the i-th variable alpha, in the dual optimization problem, given alpha and the current corresponding w.\n",
130 |     "\n",
131 |     "\n",
132 |     "Hints: \n",
133 |     "- Differentiate the dual objective with respect to one `alpha[i]`.\n",
134 |     "- Set the derivative to zero to compute a new `alpha[i]`.\n",
135 |     "- Make sure the values of alpha stay inside a `[0, 1]` box.\n",
136 |     "- You can formulate the update as `alpha[i] = projection(alpha[i] + lambda_ * (some update))`.\n",
137 |     "- You can test the correctness of your implementation by checking if the difference between the dual objective and primal objective goes to zero. This difference, the duality gap, should get smaller than 10 in 700000 iterations."
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "def calculate_coordinate_update(y, A, lambda_, alpha, w, i):\n",
147 |     "    \"\"\"\n",
148 |     "    Compute a coordinate update (closed form) for coordinate i.\n",
149 |     "    y: +1 or -1 labels, shape = (num_examples)\n",
150 |     "    A: Dataset matrix, shape = (num_examples, num_features)\n",
151 |     "    lambda_: Regularization parameter, scalar\n",
152 |     "    alpha: Dual variables, shape = (num_examples)\n",
153 |     "    w: Model weights, shape = (num_examples)\n",
154 |     "    i: Index of the entry of the dual variable 'alpha' that is to be updated\n",
155 |     "    return: New weights w (shape (num_features)), New dual variables alpha (shape (num_examples))\n",
156 |     "    \"\"\"\n",
157 |     "    # ***************************************************\n",
158 |     "    # INSERT YOUR CODE HERE\n",
159 |     "    # TODO\n",
160 |     "    # ***************************************************\n",
161 |     "    # calculate the update of coordinate at index=n.\n",
162 |     "    a_i, y_i = A[i], y[i]\n",
163 |     "    old_alpha_i = np.copy(alpha[i])\n",
164 |     "    \n",
165 |     "    raise NotImplementedError\n",
166 |     "    return w, alpha"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "def calculate_dual_objective(y, A, w, alpha, lambda_):\n",
176 |     "    \"\"\"\n",
177 |     "    Calculate the objective for the dual problem.\n",
178 |     "    Follow the formula given above.\n",
179 |     "    y: +1 or -1 labels, shape = (num_examples)\n",
180 |     "    A: Dataset matrix, shape = (num_examples, num_features)\n",
181 |     "    alpha: Dual variables, shape = (num_examples)\n",
182 |     "    lambda_: Regularization parameter, scalar\n",
183 |     "    return: Scalar value\n",
184 |     "    \"\"\"\n",
185 |     "    # ***************************************************\n",
186 |     "    # INSERT YOUR CODE HERE\n",
187 |     "    # TODO\n",
188 |     "    # ***************************************************\n",
189 |     "    raise NotImplementedError"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {},
196 |    "outputs": [],
197 |    "source": [
198 |     "def coordinate_descent_for_svm_demo(y, A, trace=False):\n",
199 |     "    max_iter = 1000000\n",
200 |     "    lambda_ = 0.01\n",
201 |     "    history = defaultdict(list) if trace else None\n",
202 |     "    \n",
203 |     "    num_examples, num_features = A.shape\n",
204 |     "    w = np.zeros(num_features)\n",
205 |     "    alpha = np.zeros(num_examples)\n",
206 |     "    \n",
207 |     "    for it in range(max_iter):\n",
208 |     "        # i = sample one data point uniformly at random from the columns of A\n",
209 |     "        i = random.randint(0,num_examples-1)\n",
210 |     "        \n",
211 |     "        w, alpha = calculate_coordinate_update(y, A, lambda_, alpha, w, i)\n",
212 |     "            \n",
213 |     "        if it % 100000 == 0:\n",
214 |     "            # primal objective\n",
215 |     "            primal_value = calculate_primal_objective(y, A, w, lambda_)\n",
216 |     "            # dual objective\n",
217 |     "            dual_value = calculate_dual_objective(y, A, w, alpha, lambda_)\n",
218 |     "            # primal dual gap\n",
219 |     "            duality_gap = primal_value - dual_value\n",
220 |     "            \n",
221 |     "            print('iteration=%i, primal:%.5f, dual:%.5f, gap:%.5f'%(\n",
222 |     "                    it, primal_value, dual_value, duality_gap))\n",
223 |     "        if it % 1000 == 0:\n",
224 |     "            primal_value = calculate_primal_objective(y, A, w, lambda_)\n",
225 |     "            if trace:\n",
226 |     "                history[\"objective_function\"] += [primal_value]\n",
227 |     "                history['iter'].append(it)\n",
228 |     "\n",
229 |     "            \n",
230 |     "    print(\"training accuracy = {l}\".format(l=calculate_accuracy(y, A, w)))\n",
231 |     "    return history\n",
232 |     "\n",
233 |     "history_cd = coordinate_descent_for_svm_demo(y, A, trace=True)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "# Stochastic gradient descent for SVM"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "Let's now compare it with SGD on original problem for the SVM. In this part, you will implement stochastic gradient descent on the primal SVM objective. The stochasticity comes from sampling data points."
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "def compute_stoch_gradient_svm(A_sample, b_sample, lambda_, w_t, num_data_points):\n",
257 |     "    \"\"\"\n",
258 |     "    Calculate stochastic gradient over A_batch, b_batch.\n",
259 |     "    A_sample: A data sample, shape=(num_features)\n",
260 |     "    b_sample: Corresponding +1 or -1 label, scalar\n",
261 |     "    w_t: Model weights, shape=(num_features)\n",
262 |     "    num_data_points: Total size of the dataset, scalar integer\n",
263 |     "    \"\"\"\n",
264 |     "    # ***************************************************\n",
265 |     "    # INSERT YOUR CODE HERE\n",
266 |     "    # TODO\n",
267 |     "    # ***************************************************\n",
268 |     "    raise NotImplementedError"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {},
275 |    "outputs": [],
276 |    "source": [
277 |     "def stochastic_gradient_descent_svm_demo(A, b, gamma, batch_size=1, trace=False):\n",
278 |     "    history = defaultdict(list) if trace else None\n",
279 |     "    num_data_points, num_features = np.shape(A)\n",
280 |     "    max_iter = 1000000\n",
281 |     "    lambda_ = 0.01\n",
282 |     "    \n",
283 |     "    w_t = np.zeros(num_features)\n",
284 |     "    \n",
285 |     "    current_iter = 0\n",
286 |     "    while (current_iter < max_iter):\n",
287 |     "        i = random.randint(0,num_data_points - 1)\n",
288 |     "        b_batch, A_batch = b[i], A[i]\n",
289 |     "        gradient = compute_stoch_gradient_svm(A_batch, b_batch, lambda_, w_t, num_data_points)\n",
290 |     "        w_t = w_t - gamma * gradient\n",
291 |     "        if current_iter % 100000 == 0:\n",
292 |     "            primal_value = calculate_primal_objective(y, A, w_t, lambda_)\n",
293 |     "            print('iteration=%i, primal:%.5f'%(\n",
294 |     "                    current_iter, primal_value))\n",
295 |     "        if current_iter % 1000 == 0:\n",
296 |     "            primal_value = calculate_primal_objective(y, A, w_t, lambda_)\n",
297 |     "            if trace:\n",
298 |     "                history['objective_function'].append(primal_value)\n",
299 |     "                history['iter'].append(current_iter)\n",
300 |     "        current_iter += 1\n",
301 |     "    print(\"training accuracy = {l}\".format(l=calculate_accuracy(y, A, w_t)))\n",
302 |     "    return history\n"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "markdown",
307 |    "metadata": {},
308 |    "source": [
309 |     "Try different stepsized and find the best one"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": null,
315 |    "metadata": {
316 |     "scrolled": true
317 |    },
318 |    "outputs": [],
319 |    "source": [
320 |     "# ***************************************************\n",
321 |     "# INSERT YOUR CODE HERE\n",
322 |     "# TODO\n",
323 |     "# ***************************************************"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "markdown",
328 |    "metadata": {},
329 |    "source": [
330 |     "Plot learning curves"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": null,
336 |    "metadata": {},
337 |    "outputs": [],
338 |    "source": [
339 |     "# ***************************************************\n",
340 |     "# INSERT YOUR CODE HERE\n",
341 |     "# TODO\n",
342 |     "# ***************************************************"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "markdown",
347 |    "metadata": {},
348 |    "source": [
349 |     "## Compare SGD with Coordinate Descent"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "markdown",
354 |    "metadata": {},
355 |    "source": [
356 |     "Compare two algorithms in terms of convergence, time complexities per iteration. Which one is easier to use?"
357 |    ]
358 |   }
359 |  ],
360 |  "metadata": {
361 |   "anaconda-cloud": {},
362 |   "kernelspec": {
363 |    "display_name": "Python 3",
364 |    "language": "python",
365 |    "name": "python3"
366 |   },
367 |   "language_info": {
368 |    "codemirror_mode": {
369 |     "name": "ipython",
370 |     "version": 3
371 |    },
372 |    "file_extension": ".py",
373 |    "mimetype": "text/x-python",
374 |    "name": "python",
375 |    "nbconvert_exporter": "python",
376 |    "pygments_lexer": "ipython3",
377 |    "version": "3.7.4"
378 |   },
379 |   "toc": {
380 |    "base_numbering": 1,
381 |    "nav_menu": {},
382 |    "number_sections": true,
383 |    "sideBar": true,
384 |    "skip_h1_title": false,
385 |    "title_cell": "Table of Contents",
386 |    "title_sidebar": "Contents",
387 |    "toc_cell": false,
388 |    "toc_position": {},
389 |    "toc_section_display": true,
390 |    "toc_window_display": false
391 |   }
392 |  },
393 |  "nbformat": 4,
394 |  "nbformat_minor": 1
395 | }
396 | 


--------------------------------------------------------------------------------
/labs/ex09/exercise09.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex09/exercise09.pdf


--------------------------------------------------------------------------------
/labs/ex09/solution/dataset_generation_NOT_NECESSARY_FOR_STUDENTS/generate_csv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """convert movielens100k.mat to the same data type of project 2."""
 3 | import numpy as np
 4 | from scipy.io import loadmat
 5 | 
 6 | 
 7 | def load_data():
 8 |     """load the mat data."""
 9 |     data = loadmat('movielens100k.mat')
10 |     ratings = data['ratings']
11 |     print("The data type of the 'ratings': {dt}".format(dt=type(ratings)))
12 |     print("The shape of the 'ratings':{v}".format(v=ratings.shape))
13 |     return ratings
14 | 
15 | 
16 | def to_list(data):
17 |     """save nz rating to list."""
18 |     nz = np.nonzero(data)
19 |     return ["r{}_c{},{}".format(nz_row + 1, nz_col + 1, data[nz_row, nz_col])
20 |             for nz_row, nz_col in zip(*nz)]
21 | 
22 | 
23 | def to_csv(data, path):
24 |     """write data to csv file."""
25 |     with open(path, "w") as f:
26 |         f.write("\n".join(data))
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     path = "movielens100k.csv"
31 |     data = load_data()
32 |     processed_data = to_list(data)
33 |     to_csv(processed_data, path)
34 | 


--------------------------------------------------------------------------------
/labs/ex09/solution/dataset_generation_NOT_NECESSARY_FOR_STUDENTS/movielens100k.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex09/solution/dataset_generation_NOT_NECESSARY_FOR_STUDENTS/movielens100k.mat


--------------------------------------------------------------------------------
/labs/ex09/solution/helpers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """some functions for help."""
  3 | 
  4 | from itertools import groupby
  5 | 
  6 | import numpy as np
  7 | import scipy.sparse as sp
  8 | 
  9 | 
 10 | def read_txt(path):
 11 |     """read text file from path."""
 12 |     with open(path, "r") as f:
 13 |         return f.read().splitlines()
 14 | 
 15 | 
 16 | def load_data(path_dataset, n=500):
 17 |     """Load data in text format, one rating per line, as in the kaggle competition."""
 18 |     data = read_txt(path_dataset)[1:]
 19 |     return preprocess_data(data,n)
 20 | 
 21 | def slice(ratings,n=None):
 22 |     """take the first n rows and n columns only"""
 23 |     if n is not None:
 24 |         ratings = ratings[:n,:n]
 25 |     return ratings
 26 | 
 27 | def preprocess_data(data, n = 500):
 28 |     """preprocessing the text data, conversion to numerical array format."""
 29 |     def deal_line(line):
 30 |         pos, rating = line.split(',')
 31 |         row, col = pos.split("_")
 32 |         row = row.replace("r", "")
 33 |         col = col.replace("c", "")
 34 |         return int(row), int(col), float(rating)
 35 | 
 36 |     def statistics(data):
 37 |         row = set([line[0] for line in data])
 38 |         col = set([line[1] for line in data])
 39 |         return min(row), max(row), min(col), max(col)
 40 | 
 41 |     # parse each line
 42 |     data = [deal_line(line) for line in data]
 43 | 
 44 |     # do statistics on the dataset.
 45 |     min_row, max_row, min_col, max_col = statistics(data)
 46 |     # print("number of items: {}, number of users: {}".format(max_row, max_col))
 47 |     # build rating matrix.
 48 |     ratings = np.zeros([max_row,max_col])
 49 |     for row, col, rating in data:
 50 |         ratings[row - 1, col - 1] = rating
 51 |     # Reduce the size of the dataset
 52 |     ratings = slice(ratings, n)
 53 |     # Make the trace 1 by scaling all entries of matrix
 54 |     ratings = ratings/np.trace(ratings)
 55 |     print("number of items: {}, number of users: {}".format(n, n))
 56 |     return ratings
 57 | 
 58 | def group_by(data, index):
 59 |     """group list of list by a specific index."""
 60 |     sorted_data = sorted(data, key=lambda x: x[index])
 61 |     groupby_data = groupby(sorted_data, lambda x: x[index])
 62 |     return groupby_data
 63 | 
 64 | 
 65 | def build_index_groups(train):
 66 |     """build groups for nnz rows and cols."""
 67 |     nz_row, nz_col = train.nonzero()
 68 |     nz_train = list(zip(nz_row, nz_col))
 69 | 
 70 |     grouped_nz_train_byrow = group_by(nz_train, index=0)
 71 |     nz_row_colindices = [(g, np.array([v[1] for v in value]))
 72 |                          for g, value in grouped_nz_train_byrow]
 73 | 
 74 |     grouped_nz_train_bycol = group_by(nz_train, index=1)
 75 |     nz_col_rowindices = [(g, np.array([v[0] for v in value]))
 76 |                          for g, value in grouped_nz_train_bycol]
 77 |     return nz_train, nz_row_colindices, nz_col_rowindices
 78 | 
 79 | 
 80 | def calculate_mse(real_label, prediction):
 81 |     """calculate MSE."""
 82 |     t = real_label - prediction
 83 |     return 1.0 * t.dot(t.T)
 84 | 
 85 |         
 86 | def split_data(ratings, num_items_per_user, num_users_per_item,
 87 |                min_num_ratings, p_test=0.2):
 88 |     """split the ratings to training data and test data.
 89 |     Args:
 90 |         min_num_ratings: 
 91 |             all users and items we keep must have at least min_num_ratings per user and per item. 
 92 |     """
 93 |     # set seed
 94 |     np.random.seed(988)
 95 |     
 96 |     # select user and item based on the condition.
 97 |     valid_users = np.where(num_items_per_user >= min_num_ratings)[0]
 98 |     valid_items = np.where(num_users_per_item >= min_num_ratings)[0]
 99 |     valid_ratings = ratings[valid_items, :][: , valid_users]  
100 |     
101 |     # init
102 |     num_rows, num_cols = valid_ratings.shape
103 |     train = np.zeros([num_rows, num_cols])
104 |     test = np.zeros([num_rows, num_cols])
105 |     
106 |     print("the shape of original ratings. (# of row, # of col): {}".format(
107 |         ratings.shape))
108 |     print("the shape of valid ratings. (# of row, # of col): {}".format(
109 |         (num_rows, num_cols)))
110 | 
111 |     nz_items, nz_users = valid_ratings.nonzero()
112 |     
113 |     # split the data
114 |     for user in set(nz_users):
115 |         # randomly select a subset of ratings
116 |         row = valid_ratings[:, user].nonzero()[0]
117 |         selects = np.random.choice(row, size=int(len(row) * p_test))
118 |         residual = list(set(row) - set(selects))
119 | 
120 |         # add to train set
121 |         train[residual, user] = valid_ratings[residual, user]
122 | 
123 |         # add to test set
124 |         test[selects, user] = valid_ratings[selects, user]
125 | 
126 |     print("Total number of nonzero elements in origial data:{v}".format(v=np.count_nonzero(ratings)))
127 |     print("Total number of nonzero elements in train data:{v}".format(v=np.count_nonzero(train)))
128 |     print("Total number of nonzero elements in test data:{v}".format(v=np.count_nonzero(test)))
129 |     return valid_ratings, train, test
130 | 


--------------------------------------------------------------------------------
/labs/ex09/solution/optimizers.py:
--------------------------------------------------------------------------------
 1 | """Common optimizers."""
 2 | 
 3 | 
 4 | import numpy as np
 5 | from time import time
 6 | 
 7 | def gradient_descent(init, steps, grad, proj=lambda x: x, num_to_keep=None):
 8 |     """Projected gradient descent.
 9 |     
10 |     Parameters
11 |     ----------
12 |         initial : array
13 |             starting point
14 |         steps : list of floats
15 |             step size schedule for the algorithm
16 |         grad : function
17 |             mapping arrays to arrays of same shape
18 |         proj : function, optional
19 |             mapping arrays to arrays of same shape
20 |         num_to_keep : integer, optional
21 |             number of points to keep
22 |         
23 |     Returns
24 |     -------
25 |         List of points computed by projected gradient descent and the wall clock time it took to compute them. Length of the
26 |         lists is determined by `num_to_keep`.
27 |     """
28 |     xs = [init]
29 |     ts = [0]
30 |     start = time()
31 |     for step in steps:
32 |         xs.append(proj(xs[-1] - step * grad(xs[-1])))
33 |         ts.append(time() - start)
34 |         if num_to_keep:
35 |             xs = xs[-num_to_keep:]
36 |             ts = ts[-num_to_kepp:]
37 |     return xs, ts
38 | 
39 | 
40 | def frank_wolfe(initial, update_oracle, num_steps, num_to_keep=None):
41 |     """ Frank-Wolfe.
42 |     
43 |         Frank-Wolfe (Conditional gradient) for first-order optimization.
44 |     
45 |     Parameters:
46 |     -----------
47 |         initial: array,
48 |             initial starting point
49 |         update_oracle: function, mapping points to points, 
50 |             computes the next iterate given the current iterate and iteration number
51 |         num_steps: integer, 
52 |             number of steps to run the algorithm for
53 |     Returns:
54 |     --------
55 |         List of points computed by the algorithm and the wall clock time it took to compute them
56 |     """
57 |     xs = [initial]
58 |     ts = [0]
59 |     start = time()
60 |     for step in range(num_steps):
61 |         xs.append(update_oracle(xs[-1],step))
62 |         ts.append(time() - start)
63 |         if num_to_keep:
64 |             xs = xs[-num_to_keep:]
65 |             ts = ts[-num_to_kepp:]
66 |     return xs, ts


--------------------------------------------------------------------------------
/labs/ex09/solution/plots.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """some functions for plots."""
 3 | 
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | 
 8 | def plot_raw_data(ratings):
 9 |     """plot the statistics result on raw rating data."""
10 |     # do statistics.
11 |     num_items_per_user = np.count_nonzero(ratings,axis=0)
12 |     num_users_per_item = np.count_nonzero(ratings,axis=1)
13 |     sorted_num_movies_per_user = np.sort(num_items_per_user)[::-1]
14 |     sorted_num_users_per_movie = np.sort(num_users_per_item)[::-1]
15 | 
16 |     # plot
17 |     fig = plt.figure()
18 |     ax1 = fig.add_subplot(1, 2, 1)
19 |     ax1.plot(sorted_num_movies_per_user, color='blue')
20 |     ax1.set_xlabel("users")
21 |     ax1.set_ylabel("number of ratings (sorted)")
22 |     ax1.grid()
23 | 
24 |     ax2 = fig.add_subplot(1, 2, 2)
25 |     ax2.plot(sorted_num_users_per_movie)
26 |     ax2.set_xlabel("items")
27 |     ax2.set_ylabel("number of ratings (sorted)")
28 |     ax2.grid()
29 | 
30 |     plt.tight_layout()
31 |     plt.savefig("stat_ratings")
32 |     plt.show()
33 |     # plt.close()
34 |     return num_items_per_user, num_users_per_item
35 | 
36 | 
37 | def plot_train_test_data(train, test):
38 |     """visualize the train and test data."""
39 |     fig = plt.figure()
40 |     ax1 = fig.add_subplot(1, 2, 1)
41 |     ax1.spy(train, precision=0.01, markersize=0.5)
42 |     ax1.set_xlabel("Users")
43 |     ax1.set_ylabel("Items")
44 |     ax1.set_title("Training data")
45 |     ax2 = fig.add_subplot(1, 2, 2)
46 |     ax2.spy(test, precision=0.01, markersize=0.5)
47 |     ax2.set_xlabel("Users")
48 |     ax2.set_ylabel("Items")
49 |     ax2.set_title("Test data")
50 |     plt.tight_layout()
51 |     plt.savefig("train_test")
52 |     plt.show()
53 | 


--------------------------------------------------------------------------------
/labs/ex09/solution/stat_ratings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex09/solution/stat_ratings.png


--------------------------------------------------------------------------------
/labs/ex09/solution/train_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex09/solution/train_test.png


--------------------------------------------------------------------------------
/labs/ex09/solution09.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex09/solution09.pdf


--------------------------------------------------------------------------------
/labs/ex09/template/ex09-MatrixCompletion.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# Useful starting lines\n",
 12 |     "%matplotlib inline\n",
 13 |     "\n",
 14 |     "import numpy as np\n",
 15 |     "import scipy\n",
 16 |     "import scipy.io\n",
 17 |     "import scipy.sparse as sp\n",
 18 |     "import scipy.linalg as la\n",
 19 |     "from scipy.sparse.linalg import svds\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "from timeit import timeit\n",
 22 |     "%load_ext autoreload\n",
 23 |     "%autoreload 2"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "Based on [https://ee227c.github.io/code/lecture5.html#projected-gd]"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "# Movie recommendation using low rank matrix completion"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "We are Netflix and have access to the ratings given by users to some movies they saw. Based on this data we want to predict the rating an user would give other movies."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "### Load the Data\n",
 52 |     "Note that `ratings` is a sparse matrix that in the shape of (num_items, num_users)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {
 59 |     "collapsed": true
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "from helpers import load_data, preprocess_data, split_data\n",
 64 |     "\n",
 65 |     "path_dataset = \"movielens100k.csv\"\n",
 66 |     "ratings = load_data(path_dataset, n = 1000)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "### Plot the number of ratings per movie and user"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {
 80 |     "collapsed": true,
 81 |     "scrolled": true
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "from plots import plot_raw_data\n",
 86 |     "\n",
 87 |     "num_items_per_user, num_users_per_item = plot_raw_data(ratings)\n",
 88 |     "\n",
 89 |     "print(\"min # of items per user = {}, min # of users per item = {}.\".format(\n",
 90 |     "        min(num_items_per_user), min(num_users_per_item)))"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "### Split the data into a train and test set"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {
104 |     "collapsed": true,
105 |     "scrolled": true
106 |    },
107 |    "outputs": [],
108 |    "source": [
109 |     "from plots import plot_train_test_data\n",
110 |     "\n",
111 |     "valid_ratings, train, test = split_data(\n",
112 |     "    ratings, num_items_per_user, num_users_per_item, min_num_ratings=1, p_test=0.1)\n",
113 |     "plot_train_test_data(train, test)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## Learning low rank matrices"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "We want to minimize the squared error\n",
128 |     "$$\n",
129 |     "\\min_{Y\\in X\\subseteq R^{n\\times m}}\\ \\dfrac{1}{2}\\sum_{(i,j)\\in \\Omega} (Z_{ij} - Y_{ij})^2\n",
130 |     "$$\n",
131 |     "when $\\Omega\\subseteq[n]\\times[m]$ is the set of observed entries from a given matrix $Z$.\n",
132 |     "\n",
133 |     "Since without more assumptions this is a hopeless problem, we assume that the *true* matrix $Y$ is low rank. As a proxy for low rank, we assume that the trace norm of $Y$ is bounded.\n",
134 |     "\n",
135 |     "In this case, our optimization domain is the unit ball of the trace norm (or nuclear norm), which is known to be the convex hull of the rank-1 matrices \n",
136 |     "$$\n",
137 |     "X := \\mathop{conv}(\\mathcal{A}) \\ \\text{ with }\\ \\mathcal{A} := \\Big\\{ u v^\\top \\ \\Big|\\ \\substack{u\\in R^n,\\;||{u}||_2=1\\\\ v\\in R^m,\\;||{v}||_2=1} \\Big\\} \\ .\n",
138 |     "$$"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {
145 |     "collapsed": true
146 |    },
147 |    "outputs": [],
148 |    "source": [
149 |     "def cost_se(Y,Z):\n",
150 |     "    \"\"\"Compute the objective function on an input matrix Y for the data matrix Z\n",
151 |     "    Assume all (and only) non-zero values of Z are observed\"\"\"\n",
152 |     "    cost = 0\n",
153 |     "    observed_rows,observed_columns = np.nonzero(Z)\n",
154 |     "    # ***************************************************\n",
155 |     "    # INSERT YOUR CODE HERE\n",
156 |     "    # TODO\n",
157 |     "    # ***************************************************    \n",
158 |     "    raise NotImplementedError    \n",
159 |     "    "
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "## Implementing Baselines "
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "### Use the global mean to do the prediction"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {
180 |     "collapsed": true,
181 |     "scrolled": true
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "def baseline_global_mean(train, test):\n",
186 |     "    \"\"\"baseline method: use the global mean.\"\"\"\n",
187 |     "    observed_rows,observed_columns = np.nonzero(train)\n",
188 |     "    \n",
189 |     "    # ***************************************************\n",
190 |     "    # INSERT YOUR CODE HERE\n",
191 |     "    # TODO\n",
192 |     "    # ***************************************************    \n",
193 |     "    raise NotImplementedError    \n",
194 |     "    test_error = cost_se(Y,test)\n",
195 |     "    print(\"The test error of baseline using global mean: {v}.\".format(v=test_error))\n",
196 |     "\n",
197 |     "baseline_global_mean(train, test)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "### Use the user means as the prediction"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "collapsed": true
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "def baseline_user_mean(train, test):\n",
216 |     "    \"\"\"baseline method: use the user means as the prediction.\"\"\"\n",
217 |     "    num_items, num_users = train.shape\n",
218 |     "    \n",
219 |     "    # ***************************************************\n",
220 |     "    # INSERT YOUR CODE HERE\n",
221 |     "    # TODO\n",
222 |     "    # ***************************************************    \n",
223 |     "    raise NotImplementedError    \n",
224 |     "\n",
225 |     "    test_error = cost_se(Y,test)\n",
226 |     "    print(\"The test error of baseline using user mean: {v}.\".format(v=test_error))\n",
227 |     "\n",
228 |     "baseline_user_mean(train, test)"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "### Use the item means as the prediction"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {
242 |     "collapsed": true
243 |    },
244 |    "outputs": [],
245 |    "source": [
246 |     "def baseline_item_mean(train, test):\n",
247 |     "    \"\"\"baseline method: use item means as the prediction.\"\"\"\n",
248 |     "    num_items, num_users = train.shape\n",
249 |     "    \n",
250 |     "    # ***************************************************\n",
251 |     "    # INSERT YOUR CODE HERE\n",
252 |     "    # TODO\n",
253 |     "    # ***************************************************    \n",
254 |     "    raise NotImplementedError    \n",
255 |     "    \n",
256 |     "    test_error = cost_se(Y,test)\n",
257 |     "    print(\"The test error of baseline using item mean: {v}.\".format(v=test_error))\n",
258 |     "    \n",
259 |     "baseline_item_mean(train, test)"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {
265 |     "collapsed": true
266 |    },
267 |    "source": [
268 |     "## Learn matrix using projected gradient descent"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {
275 |     "collapsed": true
276 |    },
277 |    "outputs": [],
278 |    "source": [
279 |     "def compute_gradient(Y,Z):\n",
280 |     "    \"\"\"Compute the gradient of the objective.\n",
281 |     "    Assume that all non-zero values in Z are observed and so are part of \\Omega\"\"\"\n",
282 |     "    gradient = np.zeros(Y.shape)\n",
283 |     "    observed_rows,observed_columns = np.nonzero(Z)\n",
284 |     "    \n",
285 |     "    # ***************************************************\n",
286 |     "    # INSERT YOUR CODE HERE\n",
287 |     "    # TODO\n",
288 |     "    # ***************************************************    \n",
289 |     "    raise NotImplementedError"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {
296 |     "collapsed": true
297 |    },
298 |    "outputs": [],
299 |    "source": [
300 |     "def project_onto_simplex(s):\n",
301 |     "    \"\"\"Given a vector s, find its projection onto the unit simplex\"\"\"        \n",
302 |     "   \n",
303 |     "    # ***************************************************\n",
304 |     "    # INSERT YOUR CODE HERE\n",
305 |     "    # TODO\n",
306 |     "    # ***************************************************    \n",
307 |     "    raise NotImplementedError    \n",
308 |     "\n",
309 |     "def project_onto_tracenormball(S):\n",
310 |     "    \"\"\"Compute the projection of the matrix S onto the set X (the unit ball of the trace norm)\n",
311 |     "    Hint: use the simplex projection function you wrote above\"\"\"\n",
312 |     "    \n",
313 |     "    \n",
314 |     "    # ***************************************************\n",
315 |     "    # INSERT YOUR CODE HERE\n",
316 |     "    # TODO\n",
317 |     "    # ***************************************************    \n",
318 |     "    raise NotImplementedError    "
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "markdown",
323 |    "metadata": {},
324 |    "source": [
325 |     "#### Cost of a projecting onto a trace norm ball\n",
326 |     "How does the cost of the projection scale with respect to increasing dimension?"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": null,
332 |    "metadata": {
333 |     "collapsed": true
334 |    },
335 |    "outputs": [],
336 |    "source": [
337 |     "ts = []\n",
338 |     "ns = [100, 200, 400, 600, 800]\n",
339 |     "for n in ns:\n",
340 |     "    f = lambda: project_onto_tracenormball(np.random.normal(0,1,(n, n)))\n",
341 |     "    ts.append(timeit(f, number=1))\n",
342 |     "\n",
343 |     "plt.figure(figsize=(12,6))\n",
344 |     "plt.xlabel('Input dimension')\n",
345 |     "plt.ylabel('Time (s)')\n",
346 |     "plt.title('Cost of nuclear norm projection')\n",
347 |     "plt.plot(ns, ts)"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "markdown",
352 |    "metadata": {},
353 |    "source": [
354 |     "#### Running projected gradient descent"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "code",
359 |    "execution_count": null,
360 |    "metadata": {
361 |     "collapsed": true
362 |    },
363 |    "outputs": [],
364 |    "source": [
365 |     "from optimizers import gradient_descent\n",
366 |     "\n",
367 |     "# start from random matrix of nuclear norm 1\n",
368 |     "Y0 = np.random.normal(0,1, train.shape)\n",
369 |     "Y0 = project_onto_tracenormball(Y0)\n",
370 |     "# define the train and test error\n",
371 |     "test_objective = lambda Y: cost_se(Y, test)\n",
372 |     "train_objective = lambda Y: cost_se(Y, train)\n",
373 |     "# run the gradient descent algorithm\n",
374 |     "gradient = lambda Y: compute_gradient(Y, train)\n",
375 |     "Ys, ts = gradient_descent(Y0, [0.2]*10, gradient, project_onto_tracenormball)"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": null,
381 |    "metadata": {
382 |     "collapsed": true
383 |    },
384 |    "outputs": [],
385 |    "source": [
386 |     "\"\"\"Plot the test and train errors vs. number of iterations\"\"\"\n",
387 |     "plt.figure(figsize=(12,6))\n",
388 |     "plt.title('Projected gradient descent error')\n",
389 |     "plt.subplot(1, 2, 1)\n",
390 |     "plt.ylabel('Train error')\n",
391 |     "plt.xlabel('Steps')\n",
392 |     "plt.plot(np.arange(len(Ys)), [train_objective(Y) for Y in Ys], 'ko-')\n",
393 |     "\n",
394 |     "plt.subplot(1, 2, 2)\n",
395 |     "plt.ylabel('Test error')\n",
396 |     "plt.xlabel('Steps')\n",
397 |     "plt.plot(np.arange(len(Ys)), [test_objective(Y) for Y in Ys], 'r.-')"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": null,
403 |    "metadata": {
404 |     "collapsed": true
405 |    },
406 |    "outputs": [],
407 |    "source": [
408 |     "\"\"\"Plot the test and train errors vs. time\"\"\"\n",
409 |     "plt.figure(figsize=(12,6))\n",
410 |     "plt.title('Projected gradient descent error')\n",
411 |     "plt.subplot(1, 2, 1)\n",
412 |     "plt.ylabel('Train error')\n",
413 |     "plt.xlabel('Time (in sec)')\n",
414 |     "plt.plot(ts, [train_objective(Y) for Y in Ys], 'ko-')\n",
415 |     "\n",
416 |     "plt.subplot(1, 2, 2)\n",
417 |     "plt.ylabel('Test error')\n",
418 |     "plt.xlabel('Time (in sec)')\n",
419 |     "plt.plot(ts, [test_objective(Y) for Y in Ys], 'r.-')"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "markdown",
424 |    "metadata": {},
425 |    "source": [
426 |     "## Learn matrix using Frank-Wolfe"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": null,
432 |    "metadata": {
433 |     "collapsed": true
434 |    },
435 |    "outputs": [],
436 |    "source": [
437 |     "def LMO(S):\n",
438 |     "    \"\"\"Compute the linear maximization oracle (LMO) over the unit ball of the trace norm (nuclear norm) for an input S\"\"\"\n",
439 |     "    \n",
440 |     "    # ***************************************************\n",
441 |     "    # INSERT YOUR CODE HERE\n",
442 |     "    # TODO\n",
443 |     "    # ***************************************************    \n",
444 |     "    raise NotImplementedError    \n"
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "code",
449 |    "execution_count": null,
450 |    "metadata": {
451 |     "collapsed": true
452 |    },
453 |    "outputs": [],
454 |    "source": [
455 |     "def cond_grad_update(Y, Z, t):\n",
456 |     "    \"\"\"Compute the Frank-Wolfe update.\n",
457 |     "    Here t is the iteration number, Y is the current point and Z is the observed matrix\"\"\"\n",
458 |     "    gradient = compute_gradient(Y,Z)\n",
459 |     "    V = LMO(-gradient)\n",
460 |     "    \n",
461 |     "    # ***************************************************\n",
462 |     "    # INSERT YOUR CODE HERE\n",
463 |     "    # TODO\n",
464 |     "    # ***************************************************    \n",
465 |     "    raise NotImplementedError    "
466 |    ]
467 |   },
468 |   {
469 |    "cell_type": "markdown",
470 |    "metadata": {},
471 |    "source": [
472 |     "### Comparing cost of Projection and Linear Minimization"
473 |    ]
474 |   },
475 |   {
476 |    "cell_type": "code",
477 |    "execution_count": null,
478 |    "metadata": {
479 |     "collapsed": true
480 |    },
481 |    "outputs": [],
482 |    "source": [
483 |     "ts1 = []\n",
484 |     "ts2 = []\n",
485 |     "ns = [100, 200, 400, 600, 800]\n",
486 |     "for n in ns:\n",
487 |     "    f = lambda: project_onto_tracenormball(sp.random(n,n))\n",
488 |     "    ts1.append(timeit(f, number=1))\n",
489 |     "    f = lambda: LMO(np.random.normal(0,1,(n, n)))\n",
490 |     "    ts2.append(timeit(f, number=1))"
491 |    ]
492 |   },
493 |   {
494 |    "cell_type": "code",
495 |    "execution_count": null,
496 |    "metadata": {
497 |     "collapsed": true
498 |    },
499 |    "outputs": [],
500 |    "source": [
501 |     "plt.figure(figsize=(12,6))\n",
502 |     "plt.xlabel('Input dimension')\n",
503 |     "plt.ylabel('Time (s)')\n",
504 |     "plt.title('Projection vs linear optimization')\n",
505 |     "plt.plot(ns, ts1, label='projection')\n",
506 |     "plt.plot(ns, ts2, label='linear opt')\n",
507 |     "plt.legend()"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "markdown",
512 |    "metadata": {},
513 |    "source": [
514 |     "#### Running Frank-Wolfe"
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "code",
519 |    "execution_count": null,
520 |    "metadata": {
521 |     "collapsed": true
522 |    },
523 |    "outputs": [],
524 |    "source": [
525 |     "from optimizers import frank_wolfe\n",
526 |     "\n",
527 |     "# start from random matrix of nuclear norm 1\n",
528 |     "Y0 = np.random.normal(0,1, train.shape)\n",
529 |     "Y0 = project_onto_tracenormball(Y0)\n",
530 |     "# define the train and test error\n",
531 |     "test_objective = lambda Y: cost_se(Y, test)\n",
532 |     "train_objective = lambda Y: cost_se(Y, train)\n",
533 |     "# run the Frank-Wolfe algorithm\n",
534 |     "update_oracle = lambda Y,k: cond_grad_update(Y, train, k)\n",
535 |     "Ys, ts = frank_wolfe(Y0, update_oracle, num_steps = 10)"
536 |    ]
537 |   },
538 |   {
539 |    "cell_type": "code",
540 |    "execution_count": null,
541 |    "metadata": {
542 |     "collapsed": true
543 |    },
544 |    "outputs": [],
545 |    "source": [
546 |     "\"\"\"Plot the test and train errors vs. number of iterations\"\"\"\n",
547 |     "plt.figure(figsize=(12,6))\n",
548 |     "plt.title('Frank-Wolfe error vs. Number of iterations')\n",
549 |     "plt.subplot(1, 2, 1)\n",
550 |     "plt.ylabel('Train error')\n",
551 |     "plt.xlabel('Steps')\n",
552 |     "plt.plot(np.arange(len(Ys)), [train_objective(Y) for Y in Ys], 'ko-')\n",
553 |     "\n",
554 |     "plt.subplot(1, 2, 2)\n",
555 |     "plt.ylabel('Test error')\n",
556 |     "plt.xlabel('Steps')\n",
557 |     "plt.plot(np.arange(len(Ys)), [test_objective(Y) for Y in Ys], 'r.-')"
558 |    ]
559 |   },
560 |   {
561 |    "cell_type": "code",
562 |    "execution_count": null,
563 |    "metadata": {
564 |     "collapsed": true
565 |    },
566 |    "outputs": [],
567 |    "source": [
568 |     "\"\"\"Plot the test and train errors vs. time\"\"\"\n",
569 |     "plt.figure(figsize=(12,6))\n",
570 |     "plt.title('Frank-Wolfe error vs. Time')\n",
571 |     "plt.subplot(1,2, 1)\n",
572 |     "plt.ylabel('Train error')\n",
573 |     "plt.xlabel('Time (in sec)')\n",
574 |     "plt.plot(ts, [train_objective(Y) for Y in Ys], 'ko-')\n",
575 |     "\n",
576 |     "plt.subplot(1, 2, 2)\n",
577 |     "plt.ylabel('Test error')\n",
578 |     "plt.xlabel('Time (in sec)')\n",
579 |     "plt.plot(ts, [test_objective(Y) for Y in Ys], 'r.-')"
580 |    ]
581 |   },
582 |   {
583 |    "cell_type": "markdown",
584 |    "metadata": {
585 |     "collapsed": true
586 |    },
587 |    "source": [
588 |     "## A more practical method\n",
589 |     "\n",
590 |     "In practice, to learn a low rank matrix, neither Frank-Wolfe nor Projected Gradient Descent are used. Instead, we formulate a **non-convex** problem which is then solved by SGD. In particular if we want to learn a rank $k$ matrix, $X$ is replaced by two matrices $UV^\\top$ where $U \\in R^{n\\times k}$ and $V \\in R^{m \\times k}$. This means that we never have to store the full matrix $X$ which would take $O(mn)$ space but instead only $O(mk + nk)$ space. Futher, the matrices $U$ and $V$ can be interpreted as *embeddings*. There have been recent theoretical results which prove that this algorithm in fact recovers the correct answer under some assumptions!\n",
591 |     "\n",
592 |     "Refer to this exercise from Machine Learning course (https://github.com/epfml/ML_course/blob/master/labs/ex10/solution/ex10.ipynb) for more details."
593 |    ]
594 |   },
595 |   {
596 |    "cell_type": "code",
597 |    "execution_count": null,
598 |    "metadata": {
599 |     "collapsed": true
600 |    },
601 |    "outputs": [],
602 |    "source": []
603 |   }
604 |  ],
605 |  "metadata": {
606 |   "kernelspec": {
607 |    "display_name": "Python 3",
608 |    "language": "python",
609 |    "name": "python3"
610 |   },
611 |   "language_info": {
612 |    "codemirror_mode": {
613 |     "name": "ipython",
614 |     "version": 3
615 |    },
616 |    "file_extension": ".py",
617 |    "mimetype": "text/x-python",
618 |    "name": "python",
619 |    "nbconvert_exporter": "python",
620 |    "pygments_lexer": "ipython3",
621 |    "version": "3.5.2"
622 |   }
623 |  },
624 |  "nbformat": 4,
625 |  "nbformat_minor": 1
626 | }
627 | 


--------------------------------------------------------------------------------
/labs/ex09/template/helpers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """some functions for help."""
  3 | 
  4 | from itertools import groupby
  5 | 
  6 | import numpy as np
  7 | import scipy.sparse as sp
  8 | 
  9 | 
 10 | def read_txt(path):
 11 |     """read text file from path."""
 12 |     with open(path, "r") as f:
 13 |         return f.read().splitlines()
 14 | 
 15 | 
 16 | def load_data(path_dataset, n=500):
 17 |     """Load data in text format, one rating per line, as in the kaggle competition."""
 18 |     data = read_txt(path_dataset)[1:]
 19 |     return preprocess_data(data,n)
 20 | 
 21 | def slice(ratings,n=None):
 22 |     """take the first n rows and n columns only"""
 23 |     if n is not None:
 24 |         ratings = ratings[:n,:n]
 25 |     return ratings
 26 | 
 27 | def preprocess_data(data, n = 500):
 28 |     """preprocessing the text data, conversion to numerical array format."""
 29 |     def deal_line(line):
 30 |         pos, rating = line.split(',')
 31 |         row, col = pos.split("_")
 32 |         row = row.replace("r", "")
 33 |         col = col.replace("c", "")
 34 |         return int(row), int(col), float(rating)
 35 | 
 36 |     def statistics(data):
 37 |         row = set([line[0] for line in data])
 38 |         col = set([line[1] for line in data])
 39 |         return min(row), max(row), min(col), max(col)
 40 | 
 41 |     # parse each line
 42 |     data = [deal_line(line) for line in data]
 43 | 
 44 |     # do statistics on the dataset.
 45 |     min_row, max_row, min_col, max_col = statistics(data)
 46 |     # print("number of items: {}, number of users: {}".format(max_row, max_col))
 47 |     # build rating matrix.
 48 |     ratings = np.zeros([max_row,max_col])
 49 |     for row, col, rating in data:
 50 |         ratings[row - 1, col - 1] = rating
 51 |     # Reduce the size of the dataset
 52 |     ratings = slice(ratings, n)
 53 |     # Make the trace 1 by scaling all entries of matrix
 54 |     ratings = ratings/np.trace(ratings)
 55 |     print("number of items: {}, number of users: {}".format(n, n))
 56 |     return ratings
 57 | 
 58 | def group_by(data, index):
 59 |     """group list of list by a specific index."""
 60 |     sorted_data = sorted(data, key=lambda x: x[index])
 61 |     groupby_data = groupby(sorted_data, lambda x: x[index])
 62 |     return groupby_data
 63 | 
 64 | 
 65 | def build_index_groups(train):
 66 |     """build groups for nnz rows and cols."""
 67 |     nz_row, nz_col = train.nonzero()
 68 |     nz_train = list(zip(nz_row, nz_col))
 69 | 
 70 |     grouped_nz_train_byrow = group_by(nz_train, index=0)
 71 |     nz_row_colindices = [(g, np.array([v[1] for v in value]))
 72 |                          for g, value in grouped_nz_train_byrow]
 73 | 
 74 |     grouped_nz_train_bycol = group_by(nz_train, index=1)
 75 |     nz_col_rowindices = [(g, np.array([v[0] for v in value]))
 76 |                          for g, value in grouped_nz_train_bycol]
 77 |     return nz_train, nz_row_colindices, nz_col_rowindices
 78 | 
 79 | 
 80 | def calculate_mse(real_label, prediction):
 81 |     """calculate MSE."""
 82 |     t = real_label - prediction
 83 |     return 1.0 * t.dot(t.T)
 84 | 
 85 |         
 86 | def split_data(ratings, num_items_per_user, num_users_per_item,
 87 |                min_num_ratings, p_test=0.2):
 88 |     """split the ratings to training data and test data.
 89 |     Args:
 90 |         min_num_ratings: 
 91 |             all users and items we keep must have at least min_num_ratings per user and per item. 
 92 |     """
 93 |     # set seed
 94 |     np.random.seed(988)
 95 |     
 96 |     # select user and item based on the condition.
 97 |     valid_users = np.where(num_items_per_user >= min_num_ratings)[0]
 98 |     valid_items = np.where(num_users_per_item >= min_num_ratings)[0]
 99 |     valid_ratings = ratings[valid_items, :][: , valid_users]  
100 |     
101 |     # init
102 |     num_rows, num_cols = valid_ratings.shape
103 |     train = np.zeros([num_rows, num_cols])
104 |     test = np.zeros([num_rows, num_cols])
105 |     
106 |     print("the shape of original ratings. (# of row, # of col): {}".format(
107 |         ratings.shape))
108 |     print("the shape of valid ratings. (# of row, # of col): {}".format(
109 |         (num_rows, num_cols)))
110 | 
111 |     nz_items, nz_users = valid_ratings.nonzero()
112 |     
113 |     # split the data
114 |     for user in set(nz_users):
115 |         # randomly select a subset of ratings
116 |         row = valid_ratings[:, user].nonzero()[0]
117 |         selects = np.random.choice(row, size=int(len(row) * p_test))
118 |         residual = list(set(row) - set(selects))
119 | 
120 |         # add to train set
121 |         train[residual, user] = valid_ratings[residual, user]
122 | 
123 |         # add to test set
124 |         test[selects, user] = valid_ratings[selects, user]
125 | 
126 |     print("Total number of nonzero elements in origial data:{v}".format(v=np.count_nonzero(ratings)))
127 |     print("Total number of nonzero elements in train data:{v}".format(v=np.count_nonzero(train)))
128 |     print("Total number of nonzero elements in test data:{v}".format(v=np.count_nonzero(test)))
129 |     return valid_ratings, train, test
130 | 


--------------------------------------------------------------------------------
/labs/ex09/template/optimizers.py:
--------------------------------------------------------------------------------
 1 | """Common optimizers."""
 2 | 
 3 | 
 4 | import numpy as np
 5 | from time import time
 6 | 
 7 | def gradient_descent(init, steps, grad, proj=lambda x: x, num_to_keep=None):
 8 |     """Projected gradient descent.
 9 |     
10 |     Parameters
11 |     ----------
12 |         initial : array
13 |             starting point
14 |         steps : list of floats
15 |             step size schedule for the algorithm
16 |         grad : function
17 |             mapping arrays to arrays of same shape
18 |         proj : function, optional
19 |             mapping arrays to arrays of same shape
20 |         num_to_keep : integer, optional
21 |             number of points to keep
22 |         
23 |     Returns
24 |     -------
25 |         List of points computed by projected gradient descent and the wall clock time it took to compute them. Length of the
26 |         lists is determined by `num_to_keep`.
27 |     """
28 |     xs = [init]
29 |     ts = [0]
30 |     start = time()
31 |     for step in steps:
32 |         xs.append(proj(xs[-1] - step * grad(xs[-1])))
33 |         ts.append(time() - start)
34 |         if num_to_keep:
35 |             xs = xs[-num_to_keep:]
36 |             ts = ts[-num_to_kepp:]
37 |     return xs, ts
38 | 
39 | 
40 | def frank_wolfe(initial, update_oracle, num_steps, num_to_keep=None):
41 |     """ Frank-Wolfe.
42 |     
43 |         Frank-Wolfe (Conditional gradient) for first-order optimization.
44 |     
45 |     Parameters:
46 |     -----------
47 |         initial: array,
48 |             initial starting point
49 |         update_oracle: function, mapping points to points, 
50 |             computes the next iterate given the current iterate and iteration number
51 |         num_steps: integer, 
52 |             number of steps to run the algorithm for
53 |     Returns:
54 |     --------
55 |         List of points computed by the algorithm and the wall clock time it took to compute them
56 |     """
57 |     xs = [initial]
58 |     ts = [0]
59 |     start = time()
60 |     for step in range(num_steps):
61 |         xs.append(update_oracle(xs[-1],step))
62 |         ts.append(time() - start)
63 |         if num_to_keep:
64 |             xs = xs[-num_to_keep:]
65 |             ts = ts[-num_to_kepp:]
66 |     return xs, ts


--------------------------------------------------------------------------------
/labs/ex09/template/plots.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """some functions for plots."""
 3 | 
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | 
 8 | def plot_raw_data(ratings):
 9 |     """plot the statistics result on raw rating data."""
10 |     # do statistics.
11 |     num_items_per_user = np.count_nonzero(ratings,axis=0)
12 |     num_users_per_item = np.count_nonzero(ratings,axis=1)
13 |     sorted_num_movies_per_user = np.sort(num_items_per_user)[::-1]
14 |     sorted_num_users_per_movie = np.sort(num_users_per_item)[::-1]
15 | 
16 |     # plot
17 |     fig = plt.figure()
18 |     ax1 = fig.add_subplot(1, 2, 1)
19 |     ax1.plot(sorted_num_movies_per_user, color='blue')
20 |     ax1.set_xlabel("users")
21 |     ax1.set_ylabel("number of ratings (sorted)")
22 |     ax1.grid()
23 | 
24 |     ax2 = fig.add_subplot(1, 2, 2)
25 |     ax2.plot(sorted_num_users_per_movie)
26 |     ax2.set_xlabel("items")
27 |     ax2.set_ylabel("number of ratings (sorted)")
28 |     ax2.grid()
29 | 
30 |     plt.tight_layout()
31 |     plt.savefig("stat_ratings")
32 |     plt.show()
33 |     # plt.close()
34 |     return num_items_per_user, num_users_per_item
35 | 
36 | 
37 | def plot_train_test_data(train, test):
38 |     """visualize the train and test data."""
39 |     fig = plt.figure()
40 |     ax1 = fig.add_subplot(1, 2, 1)
41 |     ax1.spy(train, precision=0.01, markersize=0.5)
42 |     ax1.set_xlabel("Users")
43 |     ax1.set_ylabel("Items")
44 |     ax1.set_title("Training data")
45 |     ax2 = fig.add_subplot(1, 2, 2)
46 |     ax2.spy(test, precision=0.01, markersize=0.5)
47 |     ax2.set_xlabel("Users")
48 |     ax2.set_ylabel("Items")
49 |     ax2.set_title("Test data")
50 |     plt.tight_layout()
51 |     plt.savefig("train_test")
52 |     plt.show()
53 | 


--------------------------------------------------------------------------------
/labs/ex10/exercise10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex10/exercise10.pdf


--------------------------------------------------------------------------------
/labs/ex10/solution10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/ex10/solution10.pdf


--------------------------------------------------------------------------------
/labs/mini-project/latex-example-paper/denoised_signal_1d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/mini-project/latex-example-paper/denoised_signal_1d.png


--------------------------------------------------------------------------------
/labs/mini-project/latex-example-paper/latex-template.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/mini-project/latex-example-paper/latex-template.pdf


--------------------------------------------------------------------------------
/labs/mini-project/latex-example-paper/latex-template.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[10pt,conference,compsocconf]{IEEEtran}
  2 | 
  3 | \usepackage{hyperref}
  4 | \usepackage{graphicx}	% For figure environment
  5 | 
  6 | 
  7 | \begin{document}
  8 | \title{Writing Scientific Papers and Software}
  9 | 
 10 | \author{
 11 |   Cheng Soon Ong\\
 12 |   \textit{Department of Computer Science, ETH Zurich, Switzerland}
 13 | }
 14 | 
 15 | \maketitle
 16 | 
 17 | \begin{abstract}
 18 |   A critical part of scientific discovery is the
 19 |   communication of research findings to peers or the general public.
 20 |   Mastery of the process of scientific communication improves the
 21 |   visibility and impact of research. While this guide is a necessary
 22 |   tool for learning how to write in a manner suitable for publication
 23 |   at a scientific venue, it is by no means sufficient, on its own, to
 24 |   make its reader an accomplished writer. 
 25 |   This guide should be a starting point for further development of 
 26 |   writing skills.
 27 | \end{abstract}
 28 | 
 29 | \section{Introduction}
 30 | 
 31 | The aim of writing a paper is to infect the mind of your reader with
 32 | the brilliance of your idea~\cite{jones08}. 
 33 | The hope is that after reading your
 34 | paper, the audience will be convinced to try out your idea. In other
 35 | words, it is the medium to transport the idea from your head to your
 36 | reader's head. 
 37 | In the following
 38 | section, we show a common structure of scientific papers and briefly
 39 | outline some tips for writing good papers in
 40 | Section~\ref{sec:tips-writing}.
 41 | 
 42 | At that
 43 | point, it is important that the reader is able to reproduce your
 44 | work~\cite{schwab00,wavelab,gentleman05}. This is why it is also
 45 | important that if the work has a computational component, the software
 46 | associated with producing the results are also made available in a
 47 | useful form. Several guidelines for making your user's experience with
 48 | your software as painless as possible is given in
 49 | Section~\ref{sec:tips-software}.
 50 | 
 51 | This brief guide is by no means sufficient, on its own, to
 52 | make its reader an accomplished writer. The reader is urged to use the
 53 | references to further improve his or her writing skills.
 54 | 
 55 | \section{The Structure of a Paper}
 56 | \label{sec:structure-paper}
 57 | 
 58 | Scientific papers usually begin with the description of the problem,
 59 | justifying why the problem is interesting. Most importantly, it argues
 60 | that the problem is still unsolved, or that the current solutions are
 61 | unsatisfactory. This leads to the main gist of the paper, which is
 62 | ``the idea''. The authors then show evidence, using derivations or
 63 | experiments, that the idea works. Since science does not occur in a
 64 | vacuum, a proper comparison to the current state of the art is often
 65 | part of the results. Following these ideas, papers usually have the
 66 | following structure:
 67 | \begin{description}
 68 | \item[Abstract] \ \\
 69 |   Short description of the whole paper, to help the
 70 |   reader decide whether to read it.
 71 | \item[Introduction] \ \\
 72 |   Describe your problem and state your
 73 |   contributions.
 74 | \item[Models and Methods] \ \\
 75 |   Describe your idea and how it was implemented to solve
 76 |   the problem. Survey the related work, giving credit where credit is
 77 |   due.
 78 | \item[Results] \ \\
 79 |   Show evidence to support your claims made in the
 80 |   introduction.
 81 | \item[Discussion] \ \\
 82 |   Discuss the strengths and weaknesses of your
 83 |   approach, based on the results. Point out the implications of your
 84 |   novel idea on the application concerned.
 85 | \item[Summary] \ \\
 86 |   Summarize your contributions in light of the new
 87 |   results.
 88 | \end{description}
 89 | 
 90 | 
 91 | \section{Tips for Good Writing}
 92 | \label{sec:tips-writing}
 93 | 
 94 | The ideas for good writing have come
 95 | from~\cite{editor10,jones08,anderson04}.
 96 | 
 97 | \subsection{Getting Help}
 98 | One should try to get a draft read by as many friendly people as
 99 | possible. And remember to treat your test readers with respect. If
100 | they are unable to understand something in your paper, then it is
101 | highly likely that your reviewers will not understand it
102 | either. Therefore, do not be defensive about the criticisms you get,
103 | but use it as an opportunity to improve the paper. Before your submit
104 | your friends to the pain of reading your draft, please \emph{use a
105 |   spell checker}.
106 | 
107 | \subsection{Abstract}
108 | The abstract should really be written last, along with the title of
109 | the paper. The four points that should be covered~\cite{jones08}:
110 | \begin{enumerate}
111 | \item State the problem.
112 | \item Say why it is an interesting problem.
113 | \item Say what your solution achieves.
114 | \item Say what follows from your solution.
115 | \end{enumerate}
116 | 
117 | \subsection{Figures and Tables}
118 | 
119 | \begin{figure}[tbp]
120 |   \centering
121 |   \includegraphics[width=\columnwidth]{denoised_signal_1d}
122 |   \caption{Signal compression and denoising using the Fourier basis.}
123 |   \vspace{-3mm}
124 |   \label{fig:denoise-fourier}
125 | \end{figure}
126 | \begin{figure}[htbp]
127 |   \centering
128 |   \includegraphics[width=\columnwidth]{local_wdenoised_1d}
129 |   \vspace{-3mm}
130 |   \caption{Signal compression and denoising using the Daubechies wavelet basis.}
131 |   \label{fig:denoise-wavelet}
132 | \end{figure}
133 | 
134 | Use examples and illustrations to clarify ideas and results. For
135 | example, by comparing Figure~\ref{fig:denoise-fourier} and
136 | Figure~\ref{fig:denoise-wavelet}, we can see the two different
137 | situations where Fourier and wavelet basis perform well. 
138 | 
139 | \subsection{Models and Methods}
140 | The models and methods
141 | section should describe what was
142 | done to answer the research question, describe how it was done,
143 | justify the experimental design, and
144 | explain how the results were analyzed.
145 | 
146 | The model refers to the underlying mathematical model or structure which 
147 | you use to describe your problem, or that your solution is based on. 
148 | The methods on the other hand, are the algorithms used to solve the problem. 
149 | In some cases, the suggested method directly solves the problem, without having it 
150 | stated in terms of an underlying model. Generally though it is a better practice to have 
151 | the model figured out and stated clearly, rather than presenting a method without specifying 
152 | the model. In this case, the method can be more easily evaluated in the task of fitting 
153 | the given data to the underlying model.
154 | 
155 | The methods part of this section, is not a step-by-step, directive,
156 | protocol as you might see in your lab manual, but detailed enough such
157 | that an interested reader can reproduce your
158 | work~\cite{anderson04,wavelab}.
159 | 
160 | The methods section of a research paper provides the information by
161 | which a study's validity is judged.
162 | Therefore, it requires a clear and precise description of how an
163 | experiment was done, and the rationale
164 | for why specific experimental procedures were chosen.
165 | It is usually helpful to
166 | structure the methods section by~\cite{kallet04methods}:
167 | \begin{enumerate}
168 | \item Layout the model you used to describe the problem or the solution.
169 | \item Describing the algorithms used in the study, briefly including
170 |   details such as hyperparameter values (e.g. thresholds), and
171 |   preprocessing steps (e.g. normalizing the data to have mean value of
172 |   zero).
173 | \item Explaining how the materials were prepared, for example the
174 |   images used and their resolution.
175 | \item Describing the research protocol, for example which examples
176 |   were used for estimating the parameters (training) and which were
177 |   used for computing performance.
178 | \item Explaining how measurements were made and what
179 |   calculations were performed. Do not reproduce the full source code in
180 |   the paper, but explain the key steps.
181 | \end{enumerate}
182 | 
183 | \subsection{Results}
184 | 
185 | Organize the results section based on the sequence of table and
186 | figures you include. Prepare the tables and figures as soon as all
187 | the data are analyzed and arrange them in the sequence that best
188 | presents your findings in a logical way. A good strategy is to note,
189 | on a draft of each table or figure, the one or two key results you
190 | want to address in the text portion of the results.
191 | The information from the figures is
192 | summarized in Table~\ref{tab:fourier-wavelet}.
193 | 
194 | \begin{table*}[htbp]
195 |   \centering
196 |   \begin{tabular}[c]{|l||l|l|l|}
197 |     \hline
198 |     Basis&Support&Suitable signals&Unsuitable signals\\
199 |     \hline
200 |     Fourier&global&sine like&localized\\
201 |     wavelet&local&localized&sine like\\
202 |     \hline
203 |   \end{tabular}
204 |   \caption{Characteristics of Fourier and wavelet basis.}
205 |   \label{tab:fourier-wavelet}
206 | \end{table*}
207 | 
208 | When reporting computational or measurement results, always
209 | report the mean (average value) along with a measure of variability
210 | (standard deviation(s) or standard error of the mean).
211 | 
212 | 
213 | \section{Tips for Good Software}
214 | \label{sec:tips-software}
215 | 
216 | There is a lot of literature (for example~\cite{hunt99pragmatic} and
217 | \cite{spolsky04software}) on how to write software. It is not the
218 | intention of this section to replace software engineering
219 | courses. However, in the interests of reproducible
220 | research~\cite{schwab00}, there are a few guidelines to make your
221 | reader happy:
222 | \begin{itemize}
223 | \item Have a \texttt{README} file that (at least) describes what your
224 |   software does, and which commands to run to obtain results. Also
225 |   mention anything special that needs to be set up, such as
226 |   toolboxes\footnote{For those who are
227 |   particularly interested, other common structures can be found at
228 |   \url{http://en.wikipedia.org/wiki/README} and
229 |   \url{http://www.gnu.org/software/womb/gnits/}.}.
230 | \item A list of authors and contributors can be included in a file
231 |   called \texttt{AUTHORS}, acknowledging any help that you may have
232 |   obtained. For small projects, this information is often also
233 |   included in the \texttt{README}.
234 | \item Use meaningful filenames, and not \texttt{temp1.py},
235 |   \texttt{temp2.py}. 
236 | \item Document your code. Each file should at least have a short
237 |   description about its reason for existence. Non obvious steps in the
238 |   code should be commented. Functions arguments and return values should be described.
239 | \item Describe how the results presented in your paper can be reproduced.
240 | \end{itemize}
241 | 
242 | 
243 | \subsection{\LaTeX{} Primer}
244 | \label{sec:latex-primer}
245 | 
246 | \LaTeX{} is one of the most commonly used document preparation systems
247 | for scientific journals and conferences. It is based on the idea
248 | that authors should be able to focus on the content of what they are
249 | writing without being distracted by its visual presentation.
250 | The source of this file can be used as a starting point for how to use
251 | the different commands in \LaTeX{}. We are using an IEEE style for
252 | this course.
253 | 
254 | \subsubsection{Installation}
255 | 
256 | There are various different packages available for processing \LaTeX{}
257 | documents. See our webpage for more links for getting started.
258 | 
259 | \subsubsection{Compiling \LaTeX{}}
260 | Your directory should contain at least~4 files, in addition to image
261 | files. Images should ideally be
262 | \texttt{.pdf} format (or \texttt{.png}).
263 | 
264 | \subsubsection{Equations}
265 | 
266 | There are three types of equations available: inline equations, for
267 | example $y=mx + c$, which appear in the text, unnumbered equations
268 | $$y=mx + c,$$
269 | which are presented on a line on its own, and numbered equations
270 | \begin{equation}
271 |   \label{eq:linear}
272 |   y = mx + c
273 | \end{equation}
274 | which you can refer to at a later point (Equation~(\ref{eq:linear})).
275 | 
276 | \subsubsection{Tables and Figures}
277 | 
278 | Tables and figures are ``floating'' objects, which means that the text
279 | can flow around it.
280 | Note that \texttt{figure*} and \texttt{table*} cause the corresponding
281 | figure or table to span both columns.
282 | 
283 | 
284 | 
285 | \section{Summary}
286 | 
287 | The aim of a scientific paper is to convey the idea or discovery of
288 | the researcher to the minds of the readers. The associated software
289 | package provides the relevant details, which are often only briefly
290 | explained in the paper, such that the research can be reproduced.
291 | To write good papers, identify your key idea, make your contributions
292 | explicit, and use examples and illustrations to describe the problems
293 | and solutions.
294 | 
295 | \section*{Acknowledgements}
296 | The author thanks Christian Sigg for his careful reading and helpful
297 | suggestions.
298 | 
299 | \newpage
300 | \bibliographystyle{IEEEtran}
301 | \bibliography{literature}
302 | 
303 | \end{document}
304 | 


--------------------------------------------------------------------------------
/labs/mini-project/latex-example-paper/literature.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | @Article{kallet04methods,
 3 |   author = 	 {Richard H Kallet},
 4 |   title = 	 {How to Write the Methods Section of a Research Paper},
 5 |   journal = 	 {Respiratory Care},
 6 |   year = 	 2004,
 7 |   volume =	 49,
 8 |   number =	 10,
 9 |   pages =	 {1229--1232}
10 | }
11 | 
12 | @Unpublished{anderson04,
13 |   author = 		 {Greg Anderson},
14 |   title = 		 {How to Write a Paper in Scientific Journal Style and Format},
15 |   year = 	 2004,
16 |   organization = {Bates College},
17 |   note = {http://abacus.bates.edu/~ganderso/biology/resources/writing/HTWtoc.html}
18 | }
19 | 
20 | @Unpublished{jones08,
21 |   author = 		 {Simon Peyton Jones},
22 |   title = 		 {How to write a great research paper},
23 |   note = 		 {Microsoft Research Cambridge},
24 |   year = 	 2008}
25 | 
26 | @Article{editor10,
27 |   author = 		 {Editorial},
28 |   title = 		 {Scientific writing 101},
29 |   journal = 	 {Nature Structural \& Molecular Biology},
30 |   year = 		 2010,
31 |   volume = 	 17,
32 |   pages = 	 139}
33 | 
34 | @TechReport{wavelab,
35 |   author = 		 {Jonathan B. Buckheit and David L. Donoho},
36 |   title = 		 {WaveLab and Reproducible Research},
37 |   institution =  {Stanford University},
38 |   year = 		 2009}
39 | 
40 | @article{gentleman05,
41 |         title = {Reproducible Research: A Bioinformatics Case Study},
42 |         author = {Gentleman, Robert},
43 |         year = {2005},
44 |         journal = {Statistical Applications in Genetics and Molecular Biology},
45 |         volume = 4,
46 |         number = 1,
47 |         publisher = {The Berkeley Electronic Press},
48 |         url = {http://www.bepress.com/sagmb/vol4/iss1/art2}
49 | }
50 | 
51 | @article{schwab00,
52 |  author = {Schwab, Matthias and Karrenbach, Martin and Claerbout, Jon},
53 |  title = {Making scientific computations reproducible},
54 |  journal = {Computing in Science and Engg.},
55 |  volume = {2},
56 |  number = {6},
57 |  year = {2000},
58 |  issn = {1521-9615},
59 |  pages = {61--67},
60 |  doi = {http://dx.doi.org/10.1109/5992.881708},
61 |  publisher = {IEEE Educational Activities Department},
62 |  address = {Piscataway, NJ, USA},
63 |  }
64 | 
65 | 
66 | 
67 | @Book{spolsky04software,
68 |   author = 	 {Joel Spolsky},
69 |   title = 		 {Joel on Software: And on Diverse \& Occasionally Related Matters That Will Prove of Interest etc..: And on Diverse and Occasionally Related Matters ... or Ill-Luck, Work with Them in Some Capacity},
70 |   publisher = 	 {APRESS},
71 |   year = 		 2004}
72 | 
73 | @Book{hunt99pragmatic,
74 |   author = 	 {Andrew Hunt and David Thomas},
75 |   title = 		 {The Pragmatic Programmer},
76 |   publisher = 	 {Addison Wesley},
77 |   year = 		 1999}
78 | 
79 | 


--------------------------------------------------------------------------------
/labs/mini-project/latex-example-paper/local_wdenoised_1d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/mini-project/latex-example-paper/local_wdenoised_1d.png


--------------------------------------------------------------------------------
/labs/mini-project/miniproject_description.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/labs/mini-project/miniproject_description.pdf


--------------------------------------------------------------------------------
/lecture_notes/lecture-notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/lecture_notes/lecture-notes.pdf


--------------------------------------------------------------------------------
/slides/lecture01.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture01.pdf


--------------------------------------------------------------------------------
/slides/lecture02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture02.pdf


--------------------------------------------------------------------------------
/slides/lecture03.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture03.pdf


--------------------------------------------------------------------------------
/slides/lecture04.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture04.pdf


--------------------------------------------------------------------------------
/slides/lecture05.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture05.pdf


--------------------------------------------------------------------------------
/slides/lecture06.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture06.pdf


--------------------------------------------------------------------------------
/slides/lecture07.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture07.pdf


--------------------------------------------------------------------------------
/slides/lecture08.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture08.pdf


--------------------------------------------------------------------------------
/slides/lecture09.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture09.pdf


--------------------------------------------------------------------------------
/slides/lecture10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture10.pdf


--------------------------------------------------------------------------------
/slides/lecture11.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture11.pdf


--------------------------------------------------------------------------------
/slides/lecture12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfml/OptML_course/d1762a4b6fad27bdfe949f9abd99014da9944dd3/slides/lecture12.pdf


--------------------------------------------------------------------------------