├── .DS_Store ├── Lecture 1 ├── main.pdf ├── main.tex ├── mybib.bib └── scribe.sty ├── Lecture 10 ├── angular-seperable.png ├── clusters.png ├── counter-perp-bisector.png ├── inseperable-no-bias.png ├── main.pdf ├── main.tex ├── mybib.bib └── scribe.sty ├── Lecture 11 ├── Lecture11.pdf ├── Overlapping Clusters.png ├── Separable Clusters.png ├── Tuning-b.jpg ├── image.png ├── image1.jpg ├── main.tex └── scribe.sty ├── Lecture 12 ├── cs337__lecture_12.pdf ├── input.png ├── main.tex ├── mybib.bib ├── scribe.sty └── transformed.png ├── Lecture 13 ├── scribe.pdf └── scribe.tex ├── Lecture 14 ├── Lecture_14.pdf ├── main.tex └── scribe.sty ├── Lecture 15 ├── Lecture_15.pdf ├── graph.jpeg ├── main.tex ├── mybib.bib ├── rules.png ├── scribe.sty └── sin_eg.png ├── Lecture 16 ├── Lecture16.pdf ├── Lecture16.tex └── scribe.sty ├── Lecture 17 ├── Lecture_17.pdf ├── lecture17.tex ├── mybib.bib └── scribe.sty ├── Lecture 18 ├── Lecture_18.pdf ├── sampling.png └── scribe.tex ├── Lecture 19 ├── Lecture_19.pdf ├── Lecutre 19.tex └── scribe.sty ├── Lecture 2 ├── Lecture2.pdf ├── Lecture2.tex ├── LinSub.jpg ├── mybib.bib └── scribe.sty ├── Lecture 20 ├── CS337_Scribe_Final.pdf ├── Lecture n.tex ├── MLaaS.jpg ├── gradDes.png ├── loss.png ├── lr.jpeg ├── lr.jpg ├── mybib.bib └── scribe.sty ├── Lecture 21 ├── Lecture 21.pdf ├── main.tex ├── mybib.bib └── scribe.sty ├── Lecture 3 ├── classification.png ├── graph.PNG ├── lecture 3.pdf ├── lecture 3.tex ├── mybib.bib └── scribe.sty ├── Lecture 4 ├── CS337_Lecture_4.pdf ├── main.tex └── scribe.sty ├── Lecture 5 ├── CS337_Lecture_5_Scribe.pdf ├── graph.png ├── lecture5.tex ├── mybib.bib └── scribe.sty ├── Lecture 6 ├── main.pdf ├── main.tex ├── mybib.bib └── scribe.sty ├── Lecture 7 ├── 2022_Scribe_lecture7.pdf ├── final_Scribe.tex └── scribe.sty ├── Lecture 8 ├── Lecture_8.pdf ├── Lecture_8.tex ├── knn.png ├── linear.png └── scribe.sty └── Lecture 9 ├── Lecture9.pdf ├── graph.jpg ├── graph.png ├── lecture9.tex ├── mybib.bib └── scribe.sty /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/.DS_Store -------------------------------------------------------------------------------- /Lecture 1/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 1/main.pdf -------------------------------------------------------------------------------- /Lecture 1/mybib.bib: -------------------------------------------------------------------------------- 1 | @book{bertsimas1997introduction, 2 | title={Introduction to linear optimization}, 3 | author={Bertsimas, Dimitris and Tsitsiklis, John N}, 4 | volume={6}, 5 | year={1997}, 6 | publisher={Athena Scientific Belmont, MA} 7 | } 8 | 9 | @book{boyd2004convex, 10 | title={Convex optimization}, 11 | author={Boyd, Stephen and Vandenberghe, Lieven}, 12 | year={2004}, 13 | publisher={Cambridge university press} 14 | } 15 | 16 | @book{wolsey2014integer, 17 | title={Integer and combinatorial optimization}, 18 | author={Wolsey, Laurence A and Nemhauser, George L}, 19 | year={2014}, 20 | publisher={John Wiley \& Sons} 21 | } -------------------------------------------------------------------------------- /Lecture 1/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 10/angular-seperable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 10/angular-seperable.png -------------------------------------------------------------------------------- /Lecture 10/clusters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 10/clusters.png -------------------------------------------------------------------------------- /Lecture 10/counter-perp-bisector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 10/counter-perp-bisector.png -------------------------------------------------------------------------------- /Lecture 10/inseperable-no-bias.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 10/inseperable-no-bias.png -------------------------------------------------------------------------------- /Lecture 10/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 10/main.pdf -------------------------------------------------------------------------------- /Lecture 10/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass[12pt]{article} 2 | \usepackage[english]{babel} 3 | \usepackage[utf8x]{inputenc} 4 | \usepackage[T1]{fontenc} 5 | \usepackage{float} 6 | \usepackage{scribe} 7 | \usepackage{listings} 8 | 9 | \Scribe{Group 19, Group 20} 10 | \Lecturer{Abir De} 11 | \LectureNumber{10} 12 | \LectureDate{8th September 2022} 13 | \LectureTitle{Introduction to Classification using SVM} 14 | 15 | \lstset{style=mystyle} 16 | 17 | \begin{document} 18 | \MakeScribeTop 19 | 20 | %############################################################# 21 | %############################################################# 22 | %############################################################# 23 | %############################################################# 24 | 25 | 26 | %% ##### Small roundup of Lecture ######## 27 | We begin by defining the classification problem and then introduce a linear classifier with a bias. We work on ways to find the appropriate parameters for such a model. For a slightly complex and more realistic classification problem, we progress towards the commonly used \textit{hinge-loss} function for SVM. 28 | 29 | \section{Introduction} 30 | We now introduce the classification problem. Our dataset will be of the form $\mathbb{D}=\{(x_i,y_i)\}_{i=1}^N$. Similar to regression, the $x_i$ are data points, usually in $\mathbb{R}^k$ for some $k \in \mathbb{N}$. But unlike regression, $y_i$s are now discrete. For a 2 class problem, the set of possible labels can be \{+1,-1\}, \{Cat,Dog\} etc. Even if they are written as \{+1,-1\}, for the purpose of a classification problem we shall always assume there is no partial order among the labels.\\[2mm] 31 | \textbf{Note-} Take a \textit{ranking} task where all objects are given labels from \{1,2,3,4,5\}. These labels are assumed to posses an implicit partial order. This task can thus use this ordering information and is different from our classification problem. In fact, this is called \textit{ordinal regression}.\\[2mm] 32 | Let us consider 2 dimensional data points with class labels from \{+1,-1\} as shown below\\ 33 | %\textbf{\textcolor{red}{insert fig}}\\ 34 | 35 | \begin{figure}[H] 36 | \centering 37 | \includegraphics[width=6.5cm]{clusters.png} 38 | \caption{Cluster of positive and negative labels} 39 | \end{figure} 40 | 41 | A few simple classifiers are listed below : 42 | \begin{itemize} 43 | \item \textbf{Constant model classifier}:- Such a classifier will assign a constant label to every data point. Clearly, if samples from multiple labels are present in the test dataset, it is impossible for a constant model to produce a zero error - even on the training dataset. 44 | \item \textbf{Unsupervised classifier}:- Unsupervised classifier perform no learning and use heuristics to assign a label to a test data point. For example, an unclassified classifier can find the closest point to the test point in the training dataset and return the class label of this point. Another approach might involve calculating the centroid of all class labels and return the label of the class whose centroid lies closest to the test point. Such models have limited utility. 45 | \end{itemize} 46 | Supervised learning solutions to the problem follow : 47 | \section{Linear Classifier} 48 | One way to define a classifier is to introduce a parameter $w \in \mathbb{R}^k$. Then, 49 | $$\mathbf{w}^Tx > 0 \; \Rightarrow \; y=+1$$ 50 | $$\mathbf{w}^Tx < 0 \; \Rightarrow \; y=-1$$ 51 | A classifier of this form will fail to provide a significant margin between the classes. To counter this issue, we change the classification rules to the following : 52 | $$\mathbf{w}^Tx > +1 \; \Rightarrow \; y=+1$$ 53 | $$\mathbf{w}^Tx < -1 \; \Rightarrow \; y=-1$$ 54 | You might ask how one came up with the magic numbers +1 and -1? These numbers don't matter as $w$ can be scaled appropriately to get the exact same classifier whatever $\{-\delta, +\delta\}$ pair one choses to set the margins.\\[2mm] 55 | A major problem with the current classifier is that the origin is never classified to any class label. This is unnecessarily restrictive. For example, consider the following dataset which cannot be seperated by a line passing through the origin but is clearly seperable by a line that does not. 56 | \begin{figure}[H] 57 | \centering 58 | \includegraphics[width=6.5cm]{inseperable-no-bias.png} 59 | \caption{Angularly inseperable clusters} 60 | \end{figure} 61 | \textbf{Note-} Angularly seperable clusters can be seperated using the current classification model. 62 | \begin{figure}[H] 63 | \centering 64 | \includegraphics[width=6cm]{angular-seperable.png}% 65 | \caption{Angularly separated clusters} 66 | \end{figure} 67 | 68 | Solving this issue is simple, clearly we are missing a bias term in our rules : 69 | $$\mathbf{w}^Tx + \mathbf{b} > 1 \; \Rightarrow \; y=+1$$ 70 | $$\mathbf{w}^Tx + \mathbf{b} < -1 \; \Rightarrow \; y=-1$$ 71 | Now, our model has 2 parameters to train, $w,b \in \mathbb{R}^k$. We shall discuss methods of finding a working pair of $w,b$ which are capable of correctly classifying a training dataset.\\[2mm] 72 | A proposed solution is to find 2 points that are closest to one another and lie in different classes and use the perpendicular bisector between this pair as the seperation boundry. Clearly this is a heuristic and a intrinsically unsupervised approach and a counter example can be easily found - 73 | \begin{figure}[H] 74 | \centering 75 | \includegraphics[width=9cm]{counter-perp-bisector.png}% 76 | \caption{The perpendicular bisector approach does not work.} 77 | \end{figure} 78 | \section{Convex Optimization} 79 | Clearly there can be multiple $w,b$ such that the classifier 80 | $$\mathbf{w}^Tx + \mathbf{b} > +1 \; \Rightarrow \; y=+1$$ 81 | $$\mathbf{w}^Tx + \mathbf{b} < -1 \; \Rightarrow \; y=-1$$ 82 | classifies the training data reasonably well.\\[2mm] 83 | The pair of conditions can be rewritten as $ y_i(\mathbf{w}^Tx_i+\mathbf{b}) > 1$ if the class labels are fixed to be $\{+1,-1\}$.\\[1mm] 84 | Our aim is to find a convex objective function $\mathbf{c}$ for which one can perform : 85 | $$\{\mathbf{w}^*,\mathbf{b}^*\}=argmin \; \mathbf{c(w,b)} \;\; : \;\; \forall i \in \mathbb{D}\;\; y_i({\mathbf{w}^*}^Tx_i+\mathbf{b}^*) > 1$$ 86 | which captures the gist of the problem we are trying to solve. A few proposals follow : 87 | \begin{itemize} 88 | \item Let the perpendicular distance of the $i^{th}$ positive point from a line $\{\mathbf{w},\mathbf{b}\}$ be $d_i^+(\mathbf{w},\mathbf{b})$ and for the $j^{th}$ negative point be $d_j^-(\mathbf{w},\mathbf{b})$ 89 | $$\mathbf{c(w,b)}:=min_{i\in n_+}\{d_i^+(\mathbf{w},\mathbf{b})\} \cdot min_{j\in n_-}\{d_j^-(\mathbf{w},\mathbf{b})\}$$ 90 | Our optimization problem would have been 91 | $$\{\mathbf{w}^*,\mathbf{b}^*\}=argmax \; \mathbf{c(w,b)}$$ 92 | We try to maximize this objective function. It is equivalent to minimizing its negative (:\\[1mm] 93 | This objective function tries to increase the distance of both clusters from our classification line. Convince yourself that a sum would not achieve this 2 fold minimization.\\[1mm] 94 | This objective function is not easily optimized. 95 | \item Define 96 | $$\mathbf{c(w,b)}=min(min_{i\in n_+}\{d_i^+(\mathbf{w},\mathbf{b})\},min_{ j\in n_-}\{d_j^-(\mathbf{w},\mathbf{b})\})$$ 97 | Or 98 | $$\mathbf{c(w,b)}=min_{\forall i\in \mathbb{D}}\{d_i(\mathbf{w},\mathbf{b})\}$$ 99 | Here, we are interested in maximizing the minimum distance across all points. If this quantity is large, then all other distances will be large.\\ 100 | In fact, perpendicular distance of the $i^{th}$ point from the line $\mathbf{w}^Tx+\mathbf{b}=0$ is given by $d_i=\frac{|\mathbf{w}^Tx_i+\mathbf{b}|}{||\mathbf{w}||}$.\\[1mm] 101 | Since this is a constrained minimization (maximization), we have $y_i(\mathbf{w}^Tx_i+\mathbf{b}) > 1$. Thus, 102 | $$d_i=\frac{|\mathbf{w}^Tx_i+\mathbf{b}|}{||\mathbf{w}||} > \frac{1}{||\mathbf{w}||}$$ 103 | For increasing $d_i$ over all i, we increase the lower bound on $d_i$. 104 | \item This leads to the famous regularized loss : 105 | $$\{\mathbf{w}^*,\mathbf{b}^*\}=argmin \; ||\mathbf{w}||^2\;\; : \;\;\forall i \in \mathbb{D}\;\; y_i(\mathbf{w}^Tx_i+\mathbf{b}) > 1$$ 106 | \end{itemize} 107 | \section{A more realistic task} 108 | % \textbf{\textcolor{red}{insert fig}}\\ 109 | After seeing how to select a good classifier when multiple are available, it is time to look at a more real world problem. Clearly the set of inequalities 110 | $$\forall i \in \mathbb{D}\;\; y_i({\mathbf{w}^*}^Tx_i+\mathbf{b}^*) > 1$$ 111 | need not always have a $\mathbf{w,b}$ pair satisfying them. Consider the following figure : 112 | \begin{figure}[H] 113 | \centering 114 | \includegraphics[width=6cm]{overlapping-clusters.png} 115 | \caption{Overlapping clusters. The points cannot be linearly classified.} 116 | \end{figure} 117 | Dropping constraints, one can simultaneously minimize $\mathbf{c}$ and the missclassification count : 118 | $$|\{i\; : \;|y_i(\mathbf{w}^Tx_i+\mathbf{b})|\leq 1\}|$$ 119 | However, this doesn't take into consideration the degree of misclassification. The points for which $|y_i(\mathbf{w}^Tx_i+\mathbf{b})|\sim 1$ are treated the same as the points for which $|y_i(\mathbf{w}^Tx_i+\mathbf{b})|\sim 0$. To counter this, we introduce the hinge loss over $y_i(\mathbf{w}^Tx_i+\mathbf{b})-1$ and use it to train the SVM.\\ 120 | The final loss function looks like : 121 | $$ \mathbf{w}^*, \mathbf{b}^* = argmin_{w,b} \sum_i H(y_i(\mathbf{w}^Tx_i+\mathbf{b})-1) + \lambda||w||^2 $$ 122 | 123 | \end{document} -------------------------------------------------------------------------------- /Lecture 10/mybib.bib: -------------------------------------------------------------------------------- 1 | @book{bertsimas1997introduction, 2 | title={Introduction to linear optimization}, 3 | author={Bertsimas, Dimitris and Tsitsiklis, John N}, 4 | volume={6}, 5 | year={1997}, 6 | publisher={Athena Scientific Belmont, MA} 7 | } 8 | 9 | @book{boyd2004convex, 10 | title={Convex optimization}, 11 | author={Boyd, Stephen and Vandenberghe, Lieven}, 12 | year={2004}, 13 | publisher={Cambridge university press} 14 | } 15 | 16 | @book{wolsey2014integer, 17 | title={Integer and combinatorial optimization}, 18 | author={Wolsey, Laurence A and Nemhauser, George L}, 19 | year={2014}, 20 | publisher={John Wiley \& Sons} 21 | } -------------------------------------------------------------------------------- /Lecture 10/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 11/Lecture11.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 11/Lecture11.pdf -------------------------------------------------------------------------------- /Lecture 11/Overlapping Clusters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 11/Overlapping Clusters.png -------------------------------------------------------------------------------- /Lecture 11/Separable Clusters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 11/Separable Clusters.png -------------------------------------------------------------------------------- /Lecture 11/Tuning-b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 11/Tuning-b.jpg -------------------------------------------------------------------------------- /Lecture 11/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 11/image.png -------------------------------------------------------------------------------- /Lecture 11/image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 11/image1.jpg -------------------------------------------------------------------------------- /Lecture 11/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 12/cs337__lecture_12.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 12/cs337__lecture_12.pdf -------------------------------------------------------------------------------- /Lecture 12/input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 12/input.png -------------------------------------------------------------------------------- /Lecture 12/mybib.bib: -------------------------------------------------------------------------------- 1 | @book{UNderstandingML, 2 | title={Understanding Machine Learning, Chapter 16}, 3 | author={Shai Shalev-Shwartz, Shai Ben-David}, 4 | year={2014}, 5 | publisher={Cambridge University Press} 6 | } 7 | -------------------------------------------------------------------------------- /Lecture 12/transformed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 12/transformed.png -------------------------------------------------------------------------------- /Lecture 13/scribe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 13/scribe.pdf -------------------------------------------------------------------------------- /Lecture 13/scribe.tex: -------------------------------------------------------------------------------- 1 | \documentclass[12pt]{article} 2 | \usepackage[english]{babel} 3 | \usepackage[utf8x]{inputenc} 4 | \usepackage[T1]{fontenc} 5 | \usepackage{scribe} 6 | \usepackage{listings} 7 | \usepackage{romannum} 8 | 9 | 10 | \Scribe{} 11 | \Lecturer{Abir De} 12 | \LectureNumber{14} 13 | \LectureDate{29th Sept 2022} 14 | \LectureTitle{Kernel Methods - \Romannum{2}} 15 | 16 | \lstset{style=mystyle} 17 | 18 | \begin{document} 19 | 20 | \MakeScribeTop 21 | 22 | \section{Kernel for probability distributions} 23 | In the previous lecture we had seen that a Kernel can be defined as an inner product in the feature space as thus: 24 | \begin{equation} 25 | K(x, x') = \langle \phi(x),\phi(x') \rangle 26 | \end{equation} 27 | Extending on this definition let $x \in A, x' \in B$, and $x,$ $x'$ be drawn from probability distributions $P_1, P_2$ respectively. We can define a Kernel K over the sets A,B as such: 28 | \begin{align} 29 | K(A,B) &= \int_{-\infty}^{\infty} \phi_A^T(x).\phi_B(x')Pr(x,x')\\ 30 | &= \iint_{x \in A, x' \in B} \phi^T(x)\phi(x')dP(x,x') 31 | \end{align} 32 | Now, one possible measure of similitarity between the two sets A and B is $P(A \cap B) - P(A)P(B)$. To show that this indeed is a valid kernel, we can show that there exists some $\phi$ such that $K(A,B) = P(A \cap B) - P(A)P(B)$ and $K(A,B)$ satisfies Equation (3). 33 | \\ 34 | 35 | \noindent Consider $\phi_A(x) = \mathbb{I}_A(x) - P(A)$ where $\mathbb{I}_A(x)$ is the indicator function: 36 | 37 | \begin{equation*} 38 | \mathbb{I}_A(x) = 39 | \begin{cases} 40 | 1 & x \in A \\ 41 | 0 & \text{otherwise} 42 | \end{cases} 43 | \end{equation*} 44 | \\ 45 | Substituting in Equation (2) we get: 46 | \begin{align*} 47 | K(A,B) &= \int_{-\infty}^{\infty} (\mathbb{I}_A(x) - P(A))(\mathbb{I}_B(x') - P(B))Pr(x,x')\\ 48 | &= \int_{-\infty}^{\infty} (\mathbb{I}_A(x) \mathbb{I}_B(x')) Pr(x,x') - P(A)\int_{-\infty}^{\infty} (\mathbb{I}_B(x')) Pr(x,x') \\ 49 | &- P(B)\int_{-\infty}^{\infty} (\mathbb{I}_A(x')) Pr(x,x') + P(A)P(B)\int_{-\infty}^{\infty} Pr(x,x')\\ 50 | &= P(A \cap B) - P(A)P(B) - P(A)P(B) + P(A)P(B)\\ 51 | &= P(A \cap B) - P(A)P(B) 52 | \end{align*} 53 | 54 | 55 | \section{Inner Product of Functions} 56 | We define inner product of two functions f,g as: 57 | \[ 58 | \langle f,g\rangle \doteq \int_{x,y}f(x)g(y)dP(x,y) 59 | \] 60 | Under this inner product definition, K is Kernel for $\phi$. \\ 61 | Properties of inner product: 62 | \begin{enumerate} 63 | \item Positive Semidefinite: $\langle f,f\rangle\;\geq 0$ 64 | \item Symmetric: $\langle f,g\rangle = \langle g,f\rangle$ 65 | \item Linearity: $<\langle c_{1}f_{1} + c_{2}f_{2},g\rangle = c_{1}\langle f_{1},g\rangle + c_{2}\langle f_{2},g\rangle$ 66 | \end{enumerate} 67 | We define norm of function using this inner product definition as: 68 | \[ 69 | \lVert f\rVert \doteq \sqrt{\langle f,f\rangle} 70 | \] 71 | 72 | \section{Finding similarity from loss} 73 | Until now, we assumed that Kernel was given to us and we used the kernel as a measure of similarity. But what if we have to find the Kernel if the loss is given.\\ 74 | Let F(w) be defined as follows:\\ 75 | \begin{equation*} 76 | F(w) = \sum_{i \in D} l( h_w(x_i), y_i) 77 | \end{equation*} 78 | Given this loss, we now need to find the similarity between points in Dataset. 79 | \begin{center} 80 | \[ 81 | \begin{bmatrix} 82 | Loss(x_{i} | t=0)\\ 83 | Loss(x_{i} | t=1)\\ 84 | .\\ 85 | .\\ 86 | .\\ 87 | \end{bmatrix} 88 | \] 89 | \end{center} 90 | We make vectors of all $x_{i}$'s and compare their similarity. 91 | We use this to select batches of data which have different training curve. 92 | If loss of two points is similar, we can not say that the points themselves are also similar. This is because weights are randomly initialized, so loss cannot be a good measure of similarity. We can instead use gradient of loss to find similarity. 93 | \\ 94 | If $X_i \sim X_j$ then $\nabla_w l(h_w(x_i),y_i) \approx \nabla_w l(h_w(x_j),y_j)$ but not the other way round.\\ 95 | Therefore, we can define the kernel as follows: 96 | \begin{equation*} 97 | K(x_i,x_j) = E_w[\nabla_w^{T} l(h_w(x_i),y_i) . \nabla_w l(h_w(x_j),y_j)] 98 | \end{equation*} 99 | 100 | \section{Final Problem} 101 | Consider now the following optimization objective: 102 | \begin{equation*} 103 | \min_{f\in\Lambda}\sum_{i\in\calD}\left(y_i - f(x_i)\right)^2 + \lambda\sum_{i\in\calD}f(x_i)^2 104 | \end{equation*} 105 | where $f$ is defined in the vector space $\Lambda$ of functions generated by the set 106 | \begin{equation*} 107 | \left\{k(x_i,\cdot)\right\}_{i\in\calD} 108 | \end{equation*} 109 | which is a linear subspace of $\R^{\calX}$, where $x_i\in\calX$ for all $i\in\calD$ and $k(\cdot,\cdot)$ is the kernel function defined $\calX\times\calX\stackrel{k}{\longrightarrow}\R$. This vector space is also equipped with the following inner product: 110 | \begin{equation*} 111 | \left\langle\sum_{i\in\calD}\alpha_ik(x_i,\cdot), \sum_{j\in\calD}\beta_jk(x_j,\cdot)\right\rangle = \sum_{i\in\calD}\sum_{j\in\calD}\alpha_i\beta_jk(x_i,x_j) 112 | \end{equation*} 113 | That the above is an inner product space is easily verified. Indeed, for all $g\in\Lambda$, there are $\alpha_i\in\R$ for all $i\in\calD$ such that $g = \sum_{i\in\calD}\alpha_ik(x_i,\cdot)$. 114 | \begin{equation*} 115 | \langle g,g\rangle = \sum_{i\in\calD}\sum_{j\in\calD}\alpha_i\alpha_jk(x_i,x_j)\ge0 116 | \end{equation*} 117 | since $k(\cdot,\cdot)$ is a kernel and therefore is positive semidefinite. Linearity in both operands of $\langle\cdot,\cdot\rangle$ is implicit from the definition and finally symmetry of $\langle\cdot,\cdot\rangle$ follows from the symmetry of $k(\cdot,\cdot)$. 118 | 119 | As a result, we may rephrase the objective as 120 | \begin{equation*} 121 | \min_{\mathbf{\alpha}\in\R^{|\calD|}}\sum_{i\in\calD}\left(y_i - \sum_{j\in\calD}\alpha_jk(x_j, x_i)\right)^2 + \lambda\sum_{i\in\calD}\left(\sum_{j\in\calD}\alpha_jk(x_j,x_i)\right)^2 122 | \end{equation*} 123 | 124 | We have 125 | \begin{align*} 126 | \sum_{i\in\calD}f(x_i)^2 &= \sum_{i\in\calD}\left(\sum_{j\in\calD}\alpha_jk(x_j,x_i)\right)^2\\ 127 | &= \sum_{i\in\calD}\left(\sum_{j\in\calD}\sum_{k\in\calD}\alpha_j\alpha_kk(x_j, x_i)k(x_k,x_i)\right) 128 | \end{align*} 129 | 130 | Finally, we note that the norm of $f$ in the aforementioned inner product space is given by 131 | \begin{align*} 132 | \left\langle\sum_{i\in\calD}\alpha_ik(x_i,\cdot),\sum_{i\in\calD}\alpha_jk(x_j,\cdot)\right\rangle &= \sum_{i\in\calD}\sum_{j\in\calD}\alpha_i\alpha_jk(x_i,x_j)\\ 133 | &= \mathbf{\alpha}^TG\mathbf{\alpha} 134 | \end{align*} 135 | where $G = \Bigl[k(x_i,x_j)\Bigr]_{|\calD|\times |\calD|}$ 136 | and $\alpha = \begin{bmatrix}\alpha_1 & \cdots & \alpha_{|\calD|}\end{bmatrix}^T$. 137 | \section{Homework Problem} 138 | % TODO: Homework Problem 139 | \noindent\textbf{Problem.} Show that the following kernel is positive semidefinite: 140 | \begin{equation*} 141 | k(\x,\y) = \exp\left(-\frac{\|\x - \y\|^2}{2\sigma^2}\right) 142 | \end{equation*} 143 | 144 | \noindent\textbf{Solution.} Note the following equality: 145 | \begin{equation*} 146 | \int_{-\infty}^{\infty}\frac{1}{\sigma\sqrt{\pi}}\exp\left(-\frac{(x - z)^2}{\sigma^2}\right)\frac{1}{\sigma\sqrt{\pi}}\exp\left(-\frac{(y - z)^2}{\sigma^2}\right)~dz = \frac{1}{\sigma\sqrt{2\pi}}\exp\left(-\frac{(x - y)^2}{2\sigma^2}\right) 147 | \end{equation*} 148 | 149 | which is equivalent to the following (assuming a Euclidean norm): 150 | \begin{equation*} 151 | \int_{\R^n} \left(\frac{1}{\sigma}\sqrt{\frac{2}{\pi}}\right)^n\exp\left(-\frac{\|\x - \z\|^2}{\sigma^2}\right)\exp\left(-\frac{\|\y - \z\|^2}{\sigma^2}\right)~d\z = \exp\left(-\frac{\|\x - \y\|^2}{2\sigma^2}\right) 152 | \end{equation*} 153 | 154 | Let $\{\x_i\}_{i\in\calD}$ be a set of data points. Then, for any sequence of real numbers $\{c_i\}_{i\in\calD}$, we have 155 | \begin{align*} 156 | &\sum_{i\in\calD}\sum_{j\in\calD}c_ic_j\exp\left(-\frac{\|\x_i-\x_j\|^2}{2\sigma^2}\right) \\ 157 | &= \left(\frac{1}{\sigma}\sqrt{\frac{2}{\pi}}\right)^n\int_{\R^n}\sum_{i,j\in\calD\times\calD}c_ic_j\exp\left(-\frac{\|\x_i - \z\|^2}{\sigma^2}\right)\exp\left(-\frac{\|\x_j - \z\|^2}{\sigma^2}\right)~d\mathbf{z}\\ 158 | &= \left(\frac{1}{\sigma}\sqrt{\frac{2}{\pi}}\right)^n\int_{\R^n}\left[\sum_{i\in\calD}c_i\exp\left(-\frac{\|\x_i - \z\|^2}{\sigma^2}\right)\right]^2~d\mathbf{z}\\ 159 | &\ge 0 160 | \end{align*} 161 | which is obviously non-negative. This completes the proof. $\blacksquare$ 162 | \end{document} 163 | -------------------------------------------------------------------------------- /Lecture 14/Lecture_14.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 14/Lecture_14.pdf -------------------------------------------------------------------------------- /Lecture 14/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 15/Lecture_15.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 15/Lecture_15.pdf -------------------------------------------------------------------------------- /Lecture 15/graph.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 15/graph.jpeg -------------------------------------------------------------------------------- /Lecture 15/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass[12pt]{article} 2 | \usepackage[utf8]{inputenc} 3 | \usepackage[T1]{fontenc} 4 | 5 | \usepackage{biblatex} 6 | \usepackage{scribe} 7 | \usepackage{hyperref} 8 | \usepackage{listings} 9 | \usepackage{float} 10 | \addbibresource{mybib.bib} 11 | 12 | \hypersetup{ 13 | colorlinks=true, 14 | linkcolor=blue, 15 | filecolor=magenta, 16 | urlcolor=cyan, 17 | pdftitle={Overleaf Example}, 18 | pdfpagemode=FullScreen, 19 | } 20 | 21 | \Scribe{Group 31 and Group 32} 22 | \Lecturer{Abir De} 23 | \LectureNumber{15} 24 | \LectureDate{6th October 2022} 25 | \LectureTitle{Kernels and Gaussian Processes} 26 | 27 | \lstset{style=mystyle} 28 | 29 | 30 | 31 | \begin{document} 32 | \MakeScribeTop 33 | 34 | %############################################################# 35 | %############################################################# 36 | %############################################################# 37 | %############################################################# 38 | 39 | \section{Prologue} 40 | By now, we have studied various kernel tricks which can be for separating data having non-linear relationship by simply defining an appropriate Gram matrix representing the kernel. Further, the trick can be extended to non parametric regression\cite{reg}, classification and PCA(kernel PCA\cite{pca}) as well. In this lecture we look at another application of kernels in the context of Gaussian Processes and how to deal with smaller training sets to still give fair results. 41 | 42 | \section{Problem} 43 | Consider the standard linear regression model as follows: 44 | \[w^{\text{regression}}\rightarrow \min \left[\sum_{i\in D}(y_i -w^Tx_i)^2\right]\] 45 | The solution to the above problem is: 46 | \[w^{\text{regression}} = (\sum_{i \in D}x_ix_i^T)^{-1}\cdot(\sum_{i\in D}x_iy_i)\] 47 | The predictions are made using function $f:\mathbb{R}^d\rightarrow \mathbb{R}$, $f(x_i) = w^T\cdot x_i$ 48 | Notice that when we substitute an input data from training set, 49 | \[f(x_i) \neq y_i\] 50 | 51 | An alternative approach to this could be to obtain a distribution on the function we are trying to predict such that every point in the training data must have exactly the same output in the hypothesis as the training label. More precisely, 52 | we would like to design a non linear estimator f to model the training data with the additional restriction that $\forall x_i \in D$ $f(x_i) = y_i$; for the other points $x \notin D$, $f(x)$ is a random variable with an associated probability distribution, while having certain guarantees on accuracy on test set and assuming train and test set are from same distribution. We can visualise such a function as shown in the figure below: 53 | 54 | 55 | \begin{figure}[H] 56 | \centering 57 | \includegraphics[width = 0.55\textwidth]{graph.jpeg} 58 | \caption{Graphical Representation} 59 | \end{figure} 60 | 61 | Here in this figure you can see that the points marked as \texttt{+} are the points in our dataset, for which the output is exactly one value while it is a distribution (as given by the shaded area) for all the other points 62 | 63 | \section{Gaussian Process} 64 | 65 | Gaussian processes are a method for non parametric estimation to provide confidence on the seen data and some kind of distribution on unseen data. For any subset of the training data, we must have that the joint prior distribution of this subset is normally distributed for some mean and covariance matrix. For any subset $\{x_1 ... x_m\}$ of the training data, the prior distribution follows: 66 | 67 | \[ 68 | \begin{bmatrix} 69 | f(x_1) \\ 70 | \vdots \\ 71 | f(x_m) \\ 72 | \end{bmatrix} 73 | \quad \sim \quad 74 | \mathcal{N} (\vec{\mu} (x_1,\dots ,x_m),\Sigma (x_1,\dots,x_m) ) 75 | \] 76 | \\ 77 | where $\vec{\mu}$ and $\Sigma$ are deterministic functions. 78 | 79 | 80 | On introducing a new data point into any subset of the training data, we expect the resulting conditional distribution to also follow the normal distribution. For the data point $x^*$ 81 | 82 | \[ 83 | f(x^*) \vert (f(x_1), \dots f(x_m), x^*) \sim \mathcal{N}(\vec{\mu}(x_1,\dots,x_m,x^*), \Sigma(x_1,\dots,x_m,x^*)) 84 | \] 85 | 86 | 87 | 88 | As described earlier, we expect that if a new data point introduced is already in the training data, then we expect that $f(x^*)$ takes the value that was present in the training set. This means that for any $x^*$ such that $x^* \in \{x_1,\dots,x_m\} $ 89 | 90 | 91 | \[ 92 | f(x^*) \vert (f(x_1), \dots f(x_m), x^*) \sim \mathcal{N}(f(x^*), 0) 93 | \] 94 | 95 | Our aim is to design a matrix $\Sigma$ that satisfies such a property i.e. posterior for any point in training data must have zero variance. 96 | 97 | 98 | Let $X_{D} = [X^{d}_1 X^{d}_2 \dots X^{d}_n]^T$ denote the points in train set and $X_{T} = [X^{t}_1 X^{t}_2 \dots X^{t}_n]^T$ denote the points in test set. $f(X_D)$ and $f(X_T)$ denote random variables depending on the input. 99 | \[f(X) = 100 | \begin{pmatrix} 101 | f(X^{d}_{1})\\ 102 | f(X^{d}_{2})\\ 103 | .\\ 104 | .\\ 105 | f(X^{t}_{n})\\ 106 | \end{pmatrix} 107 | \] 108 | These random variables are dependent on each other, and we need to model the dependency between them. We model $f(X)$ as multi-variate Gaussian distribution. Therefore, 109 | \[ 110 | \begin{bmatrix} 111 | f(X_{D}) \\ 112 | f(X_{T}) \\ 113 | \end{bmatrix} \sim 114 | \mathcal{N} 115 | \left( 116 | \begin{bmatrix} 117 | m(X_{D}) \\ 118 | m(X_{T}) \\ 119 | \end{bmatrix} 120 | , 121 | \begin{bmatrix} 122 | k(X_{D}, X_{D}) & k(X_{D}, X_{T})\\ 123 | k(X_{D}, X_{T})^{T} & k(X_{T}, X_{T}) \\ 124 | \end{bmatrix} 125 | \right) 126 | \] 127 | Here, $m(\cdot)$ is a function denoting mean, and $k(\cdot,\cdot)$ is a kernel function used for creating the covariance matrix. The reason we are using kernel function here is because we want to model some sort of similarity between the random variables, high correlation implying higher similarity. With this model, we can determine the prior distribution of the random variables $f(\cdot)$ 128 | \[ 129 | P(f(X))= 130 | P 131 | \left( 132 | \begin{bmatrix} 133 | f(X_{D}) \\ 134 | f(X_{T}) \\ 135 | \end{bmatrix} 136 | \right)= 137 | \frac{1}{\text{(some constant)}\cdot \det(K)^{0.5} } 138 | \exp(-0.5f(X)^{T}\Sigma^{-1}f(X)) 139 | \] 140 | 141 | Our aim now is to get distribution of $f(X_{T})$ given the train predictions $\{y_{i}\}$ and $X_T$. for which we can use Bayes' rule. 142 | 143 | \subsection{Evaluating the posterior} 144 | We would model the predictions $Y = [y_{1} y_{2} \dots y_{n}]^{T}$ as 145 | \[Y = I\cdot f(X_{D})\] 146 | A more general model would be to include additive Gaussian noise such that $Y = f(X_D) + \eta $. But for simplicity, we assume noise to be zero. 147 | Before the actual derivation, we note two important results. 148 | 149 | \subsubsection{Gaussian Marginalisation Rule} 150 | \textbf{If we marginalize out variables in a multivariate Gaussian distribution, the result is still a Gaussian distribution. }Mathematically, if $X=[X_1, X_2, \dots X_n]^{T}$ is a multi-variate Gaussian random variable($\sim \mathcal{N}(\mu, \sigma)$), then any subset of $X$ is a multi-variate Gaussian and the mean and covariance is given by $(A\mu, A\sigma A^{T})$. $A$ can be constructed by using $e_{i}^{T}$ as rows. For example, if $n=3$, and the subset is constructed using $[X_{1}\text{ }X_{3}]$, then, 151 | \[A = 152 | \begin{pmatrix} 153 | 1 & 0 & 0 \\ 154 | 0 & 0 & 1 \\ 155 | \end{pmatrix} 156 | \] 157 | 158 | \subsubsection{Conditional Rule for multi-variate Gaussian} 159 | Intuitively, if we start with a Gaussian distribution and update our knowledge given the observed value of one of its components(that is, find conditional probability distribution), then the resulting distribution is still Gaussian! Mathematically,\\ 160 | Let $[x \text{ }y]$ jointly form multi variate Gaussian random variable, 161 | \[ 162 | \begin{bmatrix} 163 | x \\ 164 | y \\ 165 | \end{bmatrix} \sim 166 | \mathcal{N} 167 | \left( 168 | \begin{bmatrix} 169 | \mu_x \\ 170 | \mu_y \\ 171 | \end{bmatrix}, 172 | \begin{bmatrix} 173 | \Sigma_{xx} & \Sigma_{xy}\\ 174 | \Sigma_{yx} & \Sigma_{yy}\\ 175 | \end{bmatrix} 176 | \right) 177 | \] 178 | 179 | Here $\Sigma_{ab}$ represents covariance matrix between random vectors $a$ and $b$ and $f(\cdot)$ represents the PDF. 180 | 181 | \[f(x\vert y) = \frac{f(x,y)}{f(y)}\] 182 | Now, we will substitute $f(x,y)$ with the expression for multi-variate Gaussian distribution($\mathcal{N}(\mu, \Sigma)$), and $f(y)$ with $\mathcal{N}(\mu_{y}, \Sigma_{yy})$. 183 | Simplifying the equations, we get 184 | \[ 185 | f(x\vert y)= 186 | \mathcal{N} 187 | (\Sigma_{xy}\Sigma_{yy}^{-1}y, 188 | \Sigma_{xx} - \Sigma_{xy}\Sigma_{yy}^{-1}\Sigma_{yx}) 189 | \] 190 | ($\mu$ is assumed to be zero for simplicity)\\ 191 | To get the detailed derivation, please see \cite{prop} 192 | \begin{figure}[h] 193 | \begin{center} \includegraphics[width=0.8\textwidth] {rules.png} 194 | \caption{Joint, Marginal, Conditional for bivariate Gaussian. Source\cite{prop}} 195 | \end{center} 196 | \end{figure} 197 | 198 | \subsection{Getting to the posterior} 199 | We modelled $Y=f(X_D)$, now's the time to use it. 200 | \[ 201 | \begin{bmatrix} 202 | f(X_{T}) \\ 203 | Y \\ 204 | \end{bmatrix} 205 | \sim 206 | \mathcal{N} 207 | \left( 208 | \begin{bmatrix} 209 | \mu(X_{T}) \\ 210 | \mu(X_{D}) \\ 211 | \end{bmatrix}, 212 | \begin{bmatrix} 213 | k(X_{T}, X_{T}) & k(X_{T}, X_{D})\\ 214 | k(X_{T}, X_{D})^{T} & k(X_{D}, X_{D})\\ 215 | \end{bmatrix} 216 | \right) 217 | \] 218 | Using the conditional rule described above, 219 | \[ 220 | P(f(X_{D})\vert Y) = \mathcal{N}(\mu_{posterior}, \sigma_{posterior}^2) 221 | \] 222 | where 223 | \[ 224 | \mu_{posterior} = \mu{X_{T}} + k(X_{T}, X_{D})(k(X_{D},X_{D}))^{-1}(Y-\mu(X_{D}))\\ 225 | \] 226 | \[ 227 | \sigma_{posterior}^{2} = k(X_T,X_T) - k(X_T, X_D)(k(X_{D},X_{D}))^{-1}k(X_T, X_D)^{T} 228 | \] 229 | 230 | 231 | 232 | \section{Aftermath\footnote{pun intended (credits Group 31)}}. 233 | \subsection{Posterior mean} 234 | For the sake of investigation, let's assume $\mu(\cdot) = 0$. \\ 235 | Now, if $X_{D} = X_{T}$, $\mu_{posterior} = Y$ which is desirable because we want the mean for predictions at training points to be the same as the given predictions in train set. The mean value at a single test location, say $x_{i}^{t}$, is a weighted sum of all the observations Y. The weights are defined by the kernel between the test location $x_{i}^{t}$ and all training locations in X. 236 | 237 | \subsection{Posterior variance} 238 | Observe that if $X_{D} = X_{T}$, we get $\sigma_{posterior} = 0$. This means that the prediction for a point in train set is exactly the mean, which in turn is the $Y$ of the training set. 239 | 240 | %\subsection{Example} 241 | %This 242 | %example is taken from here \cite{blog}. 243 | %The train set is generated by random sampling $x\sim$U([$0, 2\pi$]), and $y=Sin(x) + \eps(noise)$. The blue bands represent a 95\% confidence interval. Notice that for test points near the train points, variance is quite low and ultimate zero at exactly the train points. 244 | 245 | %\begin{figure}[h] 246 | % \begin{center} \includegraphics[width=0.6\textwidth] {sin_eg.png} 247 | % \caption{Red points are for test points, and blue ones for train points} 248 | % \end{center} 249 | %\end{figure} 250 | 251 | \section{Conclusion} 252 | We started with a train set \{($x^{d}_i, y^{d}_i$)\} and test inputs \{$x^{t}_{i}$\}, and devised a function that would yield no error for inputs which are in train set, and low errors on other points. The described method is particularly useful for low data situations. A detailed study of Gaussian processes can be found in the reference \cite{gaussianprocessmachinelearning}. 253 | 254 | \clearpage 255 | 256 | %%%%%%%%%%% If you don't have citations then comment the lines below: 257 | % 258 | \printbibliography 259 | 260 | 261 | %%%%%%%%%%% end of doc 262 | \end{document} -------------------------------------------------------------------------------- /Lecture 15/mybib.bib: -------------------------------------------------------------------------------- 1 | @ONLINE{prop, 2 | url ={https://fabiandablander.com/statistics/Two-Properties.html}, 3 | title = {Properties of multi-variate Gaussian} 4 | } 5 | 6 | @ONLINE{blog, 7 | url ={https://towardsdatascience.com/understanding-gaussian-process-the-socratic-way-ba02369d804}, 8 | title = {Gaussian Process} 9 | } 10 | @ONLINE{reg, 11 | url ={https://en.wikipedia.org/wiki/Nonparametric_regression}, 12 | title = {Non-parametric regression} 13 | } 14 | 15 | @ONLINE{pca, 16 | url ={https://www.geeksforgeeks.org/ml-introduction-to-kernel-pca/}, 17 | title = {Kernel PCA} 18 | } 19 | 20 | @book{gaussianprocessmachinelearning, 21 | title={Gaussian Processes for Machine Learning}, 22 | author={C. E. Rasmussen and C. K. I. Williams}, 23 | year={2006}, 24 | publisher={The MIT Press} 25 | } -------------------------------------------------------------------------------- /Lecture 15/rules.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 15/rules.png -------------------------------------------------------------------------------- /Lecture 15/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 15/sin_eg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 15/sin_eg.png -------------------------------------------------------------------------------- /Lecture 16/Lecture16.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 16/Lecture16.pdf -------------------------------------------------------------------------------- /Lecture 16/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 17/Lecture_17.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 17/Lecture_17.pdf -------------------------------------------------------------------------------- /Lecture 17/mybib.bib: -------------------------------------------------------------------------------- 1 | @book{rasmussen, 2 | title={Gaussian Processes for Machine Learning}, 3 | author={C. E. Rasmussen and C. K. I. Williams}, 4 | year={2006}, 5 | publisher={The MIT Press} 6 | } 7 | -------------------------------------------------------------------------------- /Lecture 17/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 18/Lecture_18.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 18/Lecture_18.pdf -------------------------------------------------------------------------------- /Lecture 18/sampling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 18/sampling.png -------------------------------------------------------------------------------- /Lecture 19/Lecture_19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 19/Lecture_19.pdf -------------------------------------------------------------------------------- /Lecture 19/Lecutre 19.tex: -------------------------------------------------------------------------------- 1 | \documentclass[12pt]{article} 2 | \usepackage[english]{babel} 3 | \usepackage[utf8x]{inputenc} 4 | \usepackage[T1]{fontenc} 5 | \usepackage[fleqn]{amsmath} 6 | \usepackage{scribe} 7 | \Scribe{Course Team} 8 | \Lecturer{Abir De} 9 | \LectureNumber{19} 10 | \LectureDate{20/10/2022} 11 | \LectureTitle{Mixture Models} 12 | 13 | \lstset{style=mystyle} 14 | 15 | \begin{document} 16 | \MakeScribeTop 17 | 18 | %############################################################# 19 | %############################################################# 20 | %############################################################# 21 | %############################################################# 22 | 23 | \section{Introduction} 24 | 25 | We observe a data set $D = \{X_i\}_{i=1}^{N}$, where each $X_i = x_i$ is being sampled from one of the K mixture components. 26 | 27 | Each of the mixture component is a multivariate Gaussian density with its own parameters $\theta_k = \{\mu_k, \sum_k\}$ 28 | \begin{center} 29 | $p_k(x_i|\theta_k) = \frac{1}{(2\pi)^{d/2}|\sum_k|^{1/2}} \e^{-\frac{1}{2}(x-\mu_k)^t\sum_k^{-1}(x-\mu_k)}$ 30 | \end{center} 31 | \\ 32 | 33 | We now have to estimate the parameters of the K mixture components, $\theta_k$ and the mixture weights, which represent the probability that a randomly selected $\bar{x}$ was generated by $k^{th}$ component, $\pi_k = P(c(\bar{x}) = k)$, where $\sum_{k=1}^K \pi_k = 1$. 34 | 35 | \section{Computing posterior distribution $P(c(X_i)=k | X_i)$} 36 | 37 | Using initial estimates for $\omega$, we obtain the posterior in the following way - 38 | \begin{gather*} 39 | \begin{aligned} 40 | & P(c(X_i)=k \mid X_{i}, \omega) = \dfrac {P(\bm{\X_i} \mid c(X_i)=k, \theta_k). P(c = k)} {\sum_{m}P(\bm{X_i} \mid c(X_i) = m, \theta_m)P(c = m)} = \dfrac {N(X_i; \theta_k) \pi_k} {\sum_{m}N(X_i; \theta_m) \pi_m}\\ 41 | \end{aligned} 42 | \end{gather*} 43 | 44 | This follows a direct application of Bayes rule. These membership weights reflect the uncertainty, given $X_i = x_i$ and $\omega$, about which of the K components generated vector $X_i = x_i$. 45 | 46 | \section{Maximum Likelihood Estimation} 47 | The complete set of parameters for a mixture model with K components is - 48 | \begin{center} 49 | $\omega = \{ \pi_1, \pi_2, .. \pi_K, \theta_1, .., \theta_K \}$ 50 | \end{center} 51 | We now maximize the likelihood of data, $P(D) = P(X_1 = x_1, X_2 = x_2, ..., X_N = x_N)$ w.r.t $\omega$. 52 | 53 | \begin{gather*} 54 | \begin{aligned} 55 | & P(D) = \prod_{i=1}^{N} P(\bm{\X_i = \x_{i}}) \\ 56 | & \implies \log(P(D)) = \sum_{i=1}^{N} \log(P(\bm{\X_i = \x_{i}})) \\ 57 | \end{aligned} 58 | \end{gather*} 59 | 60 | We know that marginal probability of $X_i$ is, 61 | 62 | \begin{gather*} 63 | \begin{aligned} 64 | & P(\X_i = \x_i) = \sum_{k=1}^{K} P(\bm{\X_{i} = \x_i} \mid c(X_{i}) = k) P(c = k) \\ 65 | & \implies P(\X_i = \x_i) = \sum_{k=1}^{K} P(\bm{\X_{i} = \x_i} \mid c(X_{i}) = k) \pi_k \\ 66 | \end{aligned} 67 | \end{gather*} 68 | 69 | Using the above, 70 | 71 | \begin{gather*} 72 | \begin{aligned} 73 | & \log(P(D)) = \sum_{i=1}^{N} \log(\sum_{k=1}^{K} P(\bm{\X_{i}} \mid c(X_{i}) = k) \pi_k) \\ 74 | \end{aligned} 75 | \end{gather*} 76 | 77 | Differentiating the above w.r.t $\pi_k$, $\mu_k$ and $\sum_k$, we obtain the new parameters (and using the equation presented in Section 2)- 78 | 79 | \begin{center} 80 | Let $N_k = \sum_{i=1}^N P(c(X_i) = k| X_i, \omega)$ \\ 81 | \\ 82 | \end{center} 83 | \begin{center} 84 | $\pi_k^{new} = \frac{N_k}{N}$ \\ 85 | \end{center} 86 | \begin{center} \\ 87 | $\mu_k^{new} = (\frac{1}{ N_k}) \sum_{i=1}^N X_i. P(c(X_i) = k| X_i, \omega)$ \\ 88 | 89 | \\ 90 | \end{center} 91 | \begin{center} 92 | $\sum_k^{new} = (\frac{1}{ N_k}) \sum_{i=1}^N P(c(X_i) = k| X_i, \omega). (X_i - \mu_k^{new}). (X_i - \mu_k^{new})^t$ 93 | \end{center} 94 | \section{Iterative Procedure for Parameter Estimation} 95 | 96 | We now work on choosing a suitable initial prior for $\pi_k$. 97 | Entropy of a distribution is defined as $-\sum_{i=1}^{N} P(\bm{\X_{i}}) * \log(P(\bm{\X_{i}})) $ where $\bm{\X_{i}}$ are random variables of the distribution. In a K-means cluster distribution, we have $\pi_1, \pi_2, .. , \pi_K$. In order to maximize the randomness, we assign each one of the random variables probability 1/K. \\ 98 | 99 | Now, using the above initial prior for $\pi_k$, and some initial parameter estimates $\theta_k$, we derive the posterior $P(c(X_i) = k | X_i)$ (membership weights) as presented in Section 2. \\ 100 | 101 | Using these new membership weights, we calculate the new $\pi_k$, $\mu_k$ and $\sum_k$ using the equations given at the end of Section 3 (derived by differentiating the log likelihood). \\ 102 | 103 | Using these new parameter estimates, we calculate the new membership weights and repeat the steps again until the value of likelihood of data converges. 104 | 105 | \begin{gather*} 106 | \begin{aligned} 107 | & \text{Log likelihood of data - } \log\prod_{i=1}^{N} P(\bm{\X_{i}}) = \sum_{i=1}^{N} \log(\sum_{k=1}^{K} P(\bm{\X_{i}} \mid c(\bm{\X_{i}}) = k) P(c = k)) \\ 108 | & \text{Let } P_\omega = P(\bm{\X_{i}} \mid c(\bm{\X_{i}}) = k), P_c = P(c=k \mid \bm{\X_{i}})\\ 109 | & \omega = \omega^{t-1}\\ 110 | & \text{At time t, } \max_{\omega} \sum_{i=1}^{N} \log(\sum_{k=1}^{K} P_\omega P_c(\omega^{t-1})) \text{ will give us the new parameter estimates } \omega 111 | \end{aligned} 112 | \end{gather*} 113 | 114 | 115 | \section{Representation in terms of Expectation} 116 | We can also represent the likelihood of data $\{\prod_{i=1}^{N} P(\bm{\X_{i}})\}$ as below. 117 | 118 | \begin{gather*} 119 | \begin{aligned} 120 | & \text{Now, }P(\bm{X}) = \sum_Z{P(X|Z) P(Z)} \\ 121 | & \text{implies } P(\bm{X}) = \E_{\bm{Z}}[P(\bm{X|Z})] \\ \\ \\ 122 | & \text{Hence, } P(\bm{\X_{i}}) = \E_{c}[P(\bm{\X_i} \mid c)] \\ 123 | & \prod_{i=1}^{N} P(\bm{\X_{i}}) = \prod_{i=1}^{N} \E_{c} [P(\bm{\X_i} \mid c)] \\ \\ \\ 124 | & \text{Now, } \prod_{i=1}^{N} \sum_{k=1}^{K} P(\bm{\X_{i} | c = k}) P(\bm{c = k}) \\ 125 | & \text{is equal to, } \sum_{k_1=1}^{K} \sum_{k_2=1}^{K} \sum_{k_3=1}^{K} .. \sum_{k_N=1}^{K} (\prod_{i=1}^{N} P(\bm{\X_{i} | c = k_i}) P(\bm{c = k_i})) \\ 126 | % & \text{Thus switching product of sum with sum of product, } \\ 127 | & \prod_{i=1}^{N} P(\bm{\X_{i}}) = \E_{(k_1, k_2, k_3..., k_N)}[\prod_{i=1}^{N} P(\bm{\X_i} \mid c = k_i)] \\ \\ \\ 128 | \end{aligned} 129 | \end{gather*} 130 | 131 | \section{Mixture Model to K-Means iterative algorithm} 132 | 133 | Entropy of a distribution is defined as $S(X) = \sum_{i=1}^{N} P(X_{i})\log(P(X_{i})) $ where $X_{i}$ are random variables of the distribution. Entropy is maximised when all of these probabilities are equal (easily proved with differentiation). 134 | \\ 135 | \\So we set the prior $w_k= 1/K$ for all k initially to maximise entropy in K-Means. We set random initial parameter estimates $\theta$. In addition, for later iterations we set $P(c=k) = \I(c=k)$ where $\I$ is the delta function. 136 | 137 | Using maximum likelihood estimation of data, we calculate the new parameters and weights and stop when the likelihood converges. 138 | 139 | %%%%%%%%%%% end of doc 140 | \end{document} 141 | -------------------------------------------------------------------------------- /Lecture 19/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 2/Lecture2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 2/Lecture2.pdf -------------------------------------------------------------------------------- /Lecture 2/LinSub.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 2/LinSub.jpg -------------------------------------------------------------------------------- /Lecture 2/mybib.bib: -------------------------------------------------------------------------------- 1 | @book{bertsimas1997introduction, 2 | title={Introduction to linear optimization}, 3 | author={Bertsimas, Dimitris and Tsitsiklis, John N}, 4 | volume={6}, 5 | year={1997}, 6 | publisher={Athena Scientific Belmont, MA} 7 | } 8 | 9 | @book{boyd2004convex, 10 | title={Convex optimization}, 11 | author={Boyd, Stephen and Vandenberghe, Lieven}, 12 | year={2004}, 13 | publisher={Cambridge university press} 14 | } 15 | 16 | @book{wolsey2014integer, 17 | title={Integer and combinatorial optimization}, 18 | author={Wolsey, Laurence A and Nemhauser, George L}, 19 | year={2014}, 20 | publisher={John Wiley \& Sons} 21 | } -------------------------------------------------------------------------------- /Lecture 2/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 20/CS337_Scribe_Final.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 20/CS337_Scribe_Final.pdf -------------------------------------------------------------------------------- /Lecture 20/MLaaS.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 20/MLaaS.jpg -------------------------------------------------------------------------------- /Lecture 20/gradDes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 20/gradDes.png -------------------------------------------------------------------------------- /Lecture 20/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 20/loss.png -------------------------------------------------------------------------------- /Lecture 20/lr.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 20/lr.jpeg -------------------------------------------------------------------------------- /Lecture 20/lr.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 20/lr.jpg -------------------------------------------------------------------------------- /Lecture 20/mybib.bib: -------------------------------------------------------------------------------- 1 | @book{bertsimas1997introduction, 2 | title={Introduction to linear optimization}, 3 | author={Bertsimas, Dimitris and Tsitsiklis, John N}, 4 | volume={6}, 5 | year={1997}, 6 | publisher={Athena Scientific Belmont, MA} 7 | } 8 | 9 | @book{boyd2004convex, 10 | title={Convex optimization}, 11 | author={Boyd, Stephen and Vandenberghe, Lieven}, 12 | year={2004}, 13 | publisher={Cambridge university press} 14 | } 15 | 16 | @book{wolsey2014integer, 17 | title={Integer and combinatorial optimization}, 18 | author={Wolsey, Laurence A and Nemhauser, George L}, 19 | year={2014}, 20 | publisher={John Wiley \& Sons} 21 | } 22 | 23 | @book{class, 24 | title = {CS231n: Deep Learning for Computer Vision}, 25 | author={}, 26 | year={}, 27 | publisher={Stanford University} 28 | } 29 | 30 | @article{graddesc, 31 | author = {Sebastian Ruder}, 32 | title = {An overview of gradient descent optimization algorithms}, 33 | journal = {CoRR}, 34 | volume = {abs}, 35 | year = {2016}, 36 | } 37 | 38 | @article{rate, 39 | 40 | author = {Fehrman, Benjamin and Gess, Benjamin and Jentzen, Arnulf}, 41 | 42 | keywords = {Numerical Analysis (math.NA), Machine Learning (cs.LG), Probability (math.PR), Machine Learning (stat.ML), FOS: Mathematics, FOS: Mathematics, FOS: Computer and information sciences, FOS: Computer and information sciences}, 43 | 44 | title = {Convergence rates for the stochastic gradient descent method for non-convex objective functions}, 45 | 46 | 47 | year = {2019}, 48 | 49 | } 50 | 51 | @article{he, 52 | 53 | author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, 54 | 55 | keywords = {Computer Vision and Pattern Recognition (cs.CV), Artificial Intelligence (cs.AI), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, 56 | 57 | title = {Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification}, 58 | } 59 | -------------------------------------------------------------------------------- /Lecture 20/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | \lstset{% 122 | language = Julia, 123 | basicstyle = \ttfamily, 124 | keywordstyle = \bfseries\color{blue}, 125 | stringstyle = \color{magenta}, 126 | commentstyle = \color{ForestGreen}, 127 | showstringspaces = false, 128 | } 129 | \newtheorem{theorem}{Theorem}[section] 130 | \newtheorem{lemma}[theorem]{Lemma} 131 | \newtheorem{claim}[theorem]{Claim} 132 | \newtheorem{proposition}[theorem]{Proposition} 133 | \newtheorem{corollary}[theorem]{Corollary} 134 | \newtheorem{fact}[theorem]{Fact} 135 | \newtheorem{example}[theorem]{Example} 136 | \newtheorem{notation}[theorem]{Notation} 137 | \newtheorem{observation}[theorem]{Observation} 138 | \newtheorem{conjecture}[theorem]{Conjecture} 139 | \theoremstyle{definition} 140 | \newtheorem{definition}[theorem]{Definition} 141 | \theoremstyle{remark} 142 | \newtheorem{remark}[theorem]{Remark} 143 | % Setting the theorem style back to plain in case theorems are defined in the main file 144 | \theoremstyle{plain} 145 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 146 | % Useful macros 147 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 148 | % for temporarily chunks of text 149 | \newcommand{\ignore}[1]{} 150 | % Probability/expectation operators. The ones ending in x should be used if you want 151 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 152 | % go below and to the right. NB: \P is remapped below for the complexity class P. 153 | \renewcommand{\Pr}{{\bf Pr}} 154 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 155 | \newcommand{\E}{{\bf E}} 156 | \newcommand{\Ex}{\mathop{\bf E\/}} 157 | \newcommand{\Var}{{\bf Var}} 158 | \newcommand{\Varx}{\mathop{\bf Var\/}} 159 | \newcommand{\Cov}{{\bf Cov}} 160 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 161 | % shortcuts for symbol names that are too long to type 162 | \newcommand{\eps}{\epsilon} 163 | \newcommand{\lam}{\lambda} 164 | \renewcommand{\l}{\ell} 165 | \newcommand{\la}{\langle} 166 | \newcommand{\ra}{\rangle} 167 | \newcommand{\wh}{\widehat} 168 | \newcommand{\wt}{\widetilde} 169 | % "blackboard-fonted" letters for the reals, naturals etc. 170 | \newcommand{\R}{\mathbb R} 171 | \newcommand{\N}{\mathbb N} 172 | \newcommand{\Z}{\mathbb Z} 173 | \newcommand{\F}{\mathbb F} 174 | \newcommand{\Q}{\mathbb Q} 175 | \newcommand{\C}{\mathbb C} 176 | % operators that should be typeset in Roman font 177 | \newcommand{\poly}{\mathrm{poly}} 178 | \newcommand{\polylog}{\mathrm{polylog}} 179 | \newcommand{\sgn}{\mathrm{sgn}} 180 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 181 | \newcommand{\val}{{\mathrm{val}}} 182 | % complexity classes 183 | \renewcommand{\P}{\mathrm{P}} 184 | \newcommand{\NP}{\mathrm{NP}} 185 | \newcommand{\BPP}{\mathrm{BPP}} 186 | \newcommand{\DTIME}{\mathrm{DTIME}} 187 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 188 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 189 | \newcommand{\NTIME}{\mathrm{NTIME}} 190 | % values associated to optimization algorithm instances 191 | \newcommand{\Opt}{{\mathsf{Opt}}} 192 | \newcommand{\Alg}{{\mathsf{Alg}}} 193 | \newcommand{\Lp}{{\mathsf{Lp}}} 194 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 195 | \newcommand{\Exp}{{\mathsf{Exp}}} 196 | % if you think the sum and product signs are too big in your math mode; x convention 197 | % as in the probability operators 198 | \newcommand{\littlesum}{{\textstyle \sum}} 199 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 200 | \newcommand{\littleprod}{{\textstyle \prod}} 201 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 202 | % horizontal line across the page 203 | \newcommand{\horz}{ 204 | \vspace{-.4in} 205 | \begin{center} 206 | \begin{tabular}{p{\textwidth}}\\ 207 | \hline 208 | \end{tabular} 209 | \end{center} 210 | } 211 | % calligraphic letters 212 | \newcommand{\calA}{{\cal A}} 213 | \newcommand{\calB}{{\cal B}} 214 | \newcommand{\calC}{{\cal C}} 215 | \newcommand{\calD}{{\cal D}} 216 | \newcommand{\calE}{{\cal E}} 217 | \newcommand{\calF}{{\cal F}} 218 | \newcommand{\calG}{{\cal G}} 219 | \newcommand{\calH}{{\cal H}} 220 | \newcommand{\calI}{{\cal I}} 221 | \newcommand{\calJ}{{\cal J}} 222 | \newcommand{\calK}{{\cal K}} 223 | \newcommand{\calL}{{\cal L}} 224 | \newcommand{\calM}{{\cal M}} 225 | \newcommand{\calN}{{\cal N}} 226 | \newcommand{\calO}{{\cal O}} 227 | \newcommand{\calP}{{\cal P}} 228 | \newcommand{\calQ}{{\cal Q}} 229 | \newcommand{\calR}{{\cal R}} 230 | \newcommand{\calS}{{\cal S}} 231 | \newcommand{\calT}{{\cal T}} 232 | \newcommand{\calU}{{\cal U}} 233 | \newcommand{\calV}{{\cal V}} 234 | \newcommand{\calW}{{\cal W}} 235 | \newcommand{\calX}{{\cal X}} 236 | \newcommand{\calY}{{\cal Y}} 237 | \newcommand{\calZ}{{\cal Z}} 238 | % bold letters (useful for random variables) 239 | \renewcommand{\a}{{\boldsymbol a}} 240 | \renewcommand{\b}{{\boldsymbol b}} 241 | \renewcommand{\c}{{\boldsymbol c}} 242 | \renewcommand{\d}{{\boldsymbol d}} 243 | \newcommand{\e}{{\boldsymbol e}} 244 | \newcommand{\f}{{\boldsymbol f}} 245 | \newcommand{\g}{{\boldsymbol g}} 246 | \newcommand{\h}{{\boldsymbol h}} 247 | \renewcommand{\i}{{\boldsymbol i}} 248 | \renewcommand{\j}{{\boldsymbol j}} 249 | \renewcommand{\k}{{\boldsymbol k}} 250 | \newcommand{\m}{{\boldsymbol m}} 251 | \newcommand{\n}{{\boldsymbol n}} 252 | \renewcommand{\o}{{\boldsymbol o}} 253 | \newcommand{\p}{{\boldsymbol p}} 254 | \newcommand{\q}{{\boldsymbol q}} 255 | \renewcommand{\r}{{\boldsymbol r}} 256 | \newcommand{\s}{{\boldsymbol s}} 257 | \renewcommand{\t}{{\boldsymbol t}} 258 | \renewcommand{\u}{{\boldsymbol u}} 259 | \renewcommand{\v}{{\boldsymbol v}} 260 | \newcommand{\w}{{\boldsymbol w}} 261 | \newcommand{\x}{{\boldsymbol x}} 262 | \newcommand{\y}{{\boldsymbol y}} 263 | \newcommand{\z}{{\boldsymbol z}} 264 | \newcommand{\A}{{\boldsymbol A}} 265 | \newcommand{\B}{{\boldsymbol B}} 266 | \newcommand{\D}{{\boldsymbol D}} 267 | \newcommand{\G}{{\boldsymbol G}} 268 | \renewcommand{\H}{{\boldsymbol H}} 269 | \newcommand{\I}{{\boldsymbol I}} 270 | \newcommand{\J}{{\boldsymbol J}} 271 | \newcommand{\K}{{\boldsymbol K}} 272 | \renewcommand{\L}{{\boldsymbol L}} 273 | \newcommand{\M}{{\boldsymbol M}} 274 | \renewcommand{\O}{{\boldsymbol O}} 275 | \renewcommand{\S}{{\boldsymbol S}} 276 | \newcommand{\T}{{\boldsymbol T}} 277 | \newcommand{\U}{{\boldsymbol U}} 278 | \newcommand{\V}{{\boldsymbol V}} 279 | \newcommand{\W}{{\boldsymbol W}} 280 | \newcommand{\X}{{\boldsymbol X}} 281 | \newcommand{\Y}{{\boldsymbol Y}} 282 | % useful for Fourier analysis 283 | \newcommand{\bits}{\{-1,1\}} 284 | \newcommand{\bitsn}{\{-1,1\}^n} 285 | \newcommand{\bn}{\bitsn} 286 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 287 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 288 | % if you want 289 | \newcommand{\half}{{\textstyle \frac12}} 290 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 291 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 292 | % Feel free to ignore the rest of this file 293 | \def\ScribeStr{??} 294 | \def\LecStr{??} 295 | \def\LecNum{??} 296 | \def\LecTitle{??} 297 | \def\LecDate{??} 298 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 299 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 300 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 301 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 302 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 303 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 304 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 305 | \newdimen\headerwidth 306 | \newcommand{\MakeScribeTop}{ 307 | \noindent 308 | \begin{center} 309 | \framebox{ 310 | \vbox{ 311 | \headerwidth=\textwidth 312 | \advance\headerwidth by -0.22in 313 | \hbox to \headerwidth {\hfill AIML - CS 337} 314 | \vspace{4mm} 315 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 316 | \vspace{2mm} 317 | \hbox to \headerwidth {\hfill \LecDate \hfill} 318 | \vspace{2mm} 319 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 320 | } 321 | } 322 | \end{center} 323 | \vspace*{4mm}} -------------------------------------------------------------------------------- /Lecture 21/Lecture 21.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 21/Lecture 21.pdf -------------------------------------------------------------------------------- /Lecture 21/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass[12pt]{article} 2 | \usepackage[english]{babel} 3 | \usepackage[utf8x]{inputenc} 4 | \usepackage[T1]{fontenc} 5 | \usepackage{scribe} 6 | \usepackage{listings} 7 | 8 | \usepackage{url} 9 | 10 | \Scribe{Group 43} 11 | \Lecturer{Abir De} 12 | \LectureNumber{21} 13 | \LectureDate{7th November 2022} 14 | \LectureTitle{Generative Models} 15 | 16 | \lstset{style=mystyle} 17 | 18 | \begin{document} 19 | \MakeScribeTop 20 | 21 | %############################################################# 22 | %############################################################# 23 | %############################################################# 24 | %############################################################# 25 | 26 | \section{Introduction} 27 | So far, we have looked at various discriminative algorithms. It is important to understand the difference between Discriminative models and Generative models. Discriminative models draw boundaries in the data space, while generative models try to model how data is placed throughout the space. A generative model focuses on explaining how the data was generated, while a discriminative model focuses on predicting the labels of the data. 28 | % Here's a citation~\cite{Kar84a}. 29 | 30 | \section{What to expect from a Generative Model} 31 | Let's say we are dealing with images. Given a set of images, $I = \{\mathbf{x_{1}, x_{2}, ....x_{N}}\}$, a Generative model tries to learn the underlying distribution from which the data might have been obtained. However, this is not an easy task. We also expect the Generative model to generate new images similar to a given image $\mathbf{x}$, i.e the model should be able to sample an output image $\mathbf{x'}$ using the conditional distribution given an input image $\mathbf{x}$. A few obvious challenges are: How do we approximate the underlying distribution? and how do we calculate the similarity between an input $\mathbf{x}$ and a generated output $\mathbf{x'}$? 32 | 33 | \section{Universal approximator of a distribution} 34 | \paragraph{Goal} To learn a function $P:\mathbb{R}^{d} \rightarrow [0, 1]$ satisfying $\int_{-\infty}^{\infty}{P(\mathbf{x})}d\mathbf{x} = 1$ and capable of capturing any distribution. For example, we would like $P_{\mathbf{\theta}}(.)$ to capture different distributions by changing the parameter $\theta$. 35 | \paragraph{A possible approach} 36 | For each $\mathbf{x}$, we can model the distribution as a Normal distribution with mean and covariance matrices taken as a function of $\theta$ and $\mathbf{x}$. We can model the mean and covariance as the output of some neural network. We can write 37 | \begin{equation*} 38 | P_{\theta}(\mathbf{x}) \sim \mathcal{N}(\mu_{\theta}(\mathbf{x}), \Sigma_{\theta}(\mathbf{x})) 39 | \end{equation*} 40 | The neural network should be able to learn $\mu_{\theta}(\mathbf{x})$ and $\Sigma_{\theta}(\mathbf{x})$ to enable the Normal distribution to capture the desired distribution. 41 | 42 | \paragraph{How do we train this model?} 43 | We can use the KL divergence to measure the similarity between two probability distributions. Here we will be comparing the empirically generated PDF and the PDF obtained by the Generative model. 44 | 45 | \paragraph{KL divergence} For distributions $P$ and $Q$ of a continuous Random variable, the KL divergence is defined to be 46 | \begin{equation*} 47 | KL(P || Q) = \int_{-\infty}^{\infty} p(x)\log{\left(\frac{p(x)}{q(x)}\right)}dx 48 | \end{equation*} 49 | It is a measure of how one probability distribution $P$ is different from a second, reference probability distribution $Q$. 50 | 51 | 52 | \section{Generative procedure : Latent Variable Model} 53 | Let $\mathbf{x}$ denote the variable that represents our data and assume that $\mathbf{x}$ is generated from a latent variable $z$ that is not directly observed. Here, $z$ can be thought of an \textit{encoded} representation of $\mathbf{x}$ and is analogous to a seed for generation. We assume that we can easily sample the latent variables according to some probability density function $P(z)$ defined over some high dimensional space $\mathcal{Z}$. Then, say we have a family of deterministic functions $f(z; \theta)$, parameterized 54 | by a vector $\theta$ in some space $\Theta$, where $f : \mathcal{Z} \times \Theta$ . $f$ is deterministic, but 55 | if $z$ is random and $\theta$ is fixed, then $f(z; \theta)$ is a random variable in the space $\mathcal{X}$. We wish to optimize $\theta$ such that we can sample $z$ from $P(z)$ and, with 56 | high probability, $f(z; \theta)$ will be \textit{like} the $\mathbf{x}$’s in our dataset.\\ 57 | To accommodate the notion of likelihood, we replace these deterministic functions with a probability distribution. The total probability of $\mathbf{x}$ being observed can then be expressed as 58 | \begin{equation*} 59 | P(\mathbf{x}) = \int{P(\mathbf{x}|z; \theta)P(z)dz} 60 | \end{equation*} 61 | For example, we can assume that both $z$ and $\mathbf{x}|z$ are distributed normally. Then the genrative process can be summarized as 62 | \begin{align*} 63 | z &\sim \mathcal{N}(\mathbf{0}, I)\\ 64 | \mathbf{x}|z &\sim \mathcal{N}(\mu_{\theta}(z), \Sigma_{\theta}(z)) 65 | \end{align*} 66 | Here, $\mu_{\theta}(z)$ and $\Sigma_{\theta}(z)$ can be thought of as the output of some Neural Network which takes in $z$ as an input. \\ 67 | The model should learn to increase $P(\mathbf{x})$ given some training data, i.e., the generative model should learn to make the training data more likely. 68 | \paragraph{Training} Let's say we have some training data ${\mathbf{x_{1}}, \mathbf{x_{2}}, ..., \mathbf{x_{N}}}$. As mentioned above, we focus on maximizing the \textbf{Likelihood} of the training data. The likelihood of training data for a generative model can be written as 69 | \begin{align*} 70 | P(\mathbf{x_{1}}, \mathbf{x_{2}}, ..., \mathbf{x_{N}}) &= \prod_{i=1}^{N}P(\mathbf{x_{i}})\\ 71 | &= \prod_{i=1}^{N} \int{P_{\theta}(\mathbf{x_{i}}|z)P(z)dz} 72 | \end{align*} 73 | So, we can write the log likelihood as 74 | \begin{align*} 75 | LL(\mathbf{x_{1}}, \mathbf{x_{2}, ..., \mathbf{x_{N}}}) &= \sum_{i=1}^{N}\log\int{P_{\theta}(\mathbf{x_{i}}|z)P(z)dz} 76 | \end{align*} 77 | Using \textit{Jensen's} Inequality, 78 | \begin{equation*} 79 | \sum_{i=1}^{N}\log{\mathbb{E}_{z}[P_{\theta}(\mathbf{x_{i}}|z)]} \geq \sum_{i=1}^{N}\mathbb{E}_{z}[\log{P_{\theta}(\mathbf{x_{i}}|z)}] 80 | \end{equation*} 81 | So, we can finally write 82 | \begin{equation*} 83 | \sum_{i=1}^{N}\log\int{P_{\theta}(\mathbf{x_{i}}|z)P(z)dz} \geq \sum_{i=1}^{N}\int[\log{P_{\theta}(\mathbf{x_{i}}|z)]P(z)dz} = \sum_{i=1}^{N}\mathbb{E}_{z}[\log{P_{\theta}(\mathbf{x_{i}}|z)}] 84 | \end{equation*} 85 | Based on the above inequality, we can instead aim to maximise $\sum_{i=1}^{N}\mathbb{E}_{z}[\log{P_{\theta}(\mathbf{x_{i}}|z)}]$.\\ 86 | We can compute $\mathbb{E}_{z}[\log{P_{\theta}(\mathbf{x_{i}}|z)}]$ approximately. For a given $i$, first obtain a large number of samples ${s_{i1}, s_{i2}, ..., s_{1S}}$ for $z$ and compute $\frac{1}{S}\sum_{j=1}^{S}\log P_{\theta}(\mathbf{x_{i}}|s_{ij})$. 87 | Thus we finally try to maximize 88 | \begin{align*} 89 | \sum_{i=1}^{N}\sum_{j=1}^{S}\frac{\log{P_{\theta}(\mathbf{x_{i}}|s_{ij})}}{S} 90 | \end{align*} 91 | 92 | \section{Conditional generation} 93 | So far we have seen the generation of a sample based on the training data without any conditions. Suppose, we now modify the problem : Given a sample $\mathbf{x^{*}}$, generate a sample $\mathbf{x}$ which is most similar to $\mathbf{x^{*}}$. In other words, we would like to maximize 94 | \begin{equation*} 95 | P(\mathbf{x|x^{*}}) = \int{P(\mathbf{x}|z)P(z|\mathbf{x^{*}})dz} 96 | \end{equation*} 97 | The first quantity $P(\mathbf{x}|z)$ within the integral is known. The second part can be calculated as 98 | \begin{align*} 99 | P(z|\mathbf{x^{*}}) &= \frac{P(\mathbf{x^{*}}|z)P(z)}{P(\mathbf{x^{*}})}\\ 100 | &= \frac{P(\mathbf{x^{*}}|z)P(z)}{\int_{\mathcal{Z}}{P(\mathbf{x^{*}}|s)P(s)ds}} 101 | \end{align*} 102 | Thus, the required probability becomes 103 | \begin{equation*} 104 | P(\mathbf{x}|\mathbf{x^{*}}) = \int{P(\mathbf{x}|z)\frac{P(\mathbf{x^{*}}|z)P(z)}{\int_{\mathcal{Z}}{P(\mathbf{x^{*}}|s)P(s)ds}}dz} 105 | \end{equation*} 106 | All quantities in the above equation can be computed using known information. However, it is expensive to compute it in general.\\ 107 | Alternatively, we can model $P(z|\mathbf{x^{*}})$ separately. It means that we need a new function $Q_{\mathbf{x}}(z)$ which can take a value of $\mathbf{x}$ and give us a distribution over $z$ values that are most likely to produce $\mathbf{x}$. The space of $z$ values that are likely under $Q$ should be much smaller than the space of all $z$ values which were all previously equally likely under the prior $P(z)$. 108 | \paragraph{Training $Q_{\mathbf{x}}(z)$} A separate neural network can be used to train $Q_{\mathbf{x}}(z)$. 109 | Our objective can be to minimise the KL divergence between $Q_{\mathbf{x}}(z)$ and $P(z|\mathbf{x})$ 110 | \begin{align*} 111 | \mathrm{KL}(Q_{\mathbf{x}}(z), P(z|\mathbf{x})) &= \mathbb{E}_{z \sim Q_{\mathbf{x}}}\log{Q_{\mathbf{x}}(z)} - \mathbb{E}_{z \sim Q_{\mathbf{x}}}\left(\log\frac{P(\mathbf{x}|z)P(z)}{P(\mathbf{x})}\right)\\ 112 | &= \mathbb{E}_{z \sim Q_{\mathbf{x}}}\log Q_{\mathbf{x}}(z) - \mathbb{E}_{z \sim Q_{\mathbf{x}}}\log P(\mathbf{x}|z) - \mathbb{E}_{z \sim Q_{\mathbf{x}}}\log P(z) + \mathbb{E}_{z \sim Q_{\mathbf{x}}}\log P(\mathbf{x})\\ 113 | &= \mathrm{KL}(Q_{\mathbf{x}}(z), P(z)) - \mathbb{E}_{z \sim Q_{\mathbf{x}}}\log{P(\mathbf{x}|z)} + \mathbb{E}_{z \sim Q_{\mathbf{x}}}P(\mathbf{x}) 114 | \end{align*} 115 | Here, our goal is to minimise the KL divergence between $Q_{\mathbf{x}}(z)$ and $P(z|\mathbf{x})$. Alternatively, we aim to maximize $\mathbb{E}_{z \sim Q_{\mathbf{x}}}\log{P(\mathbf{x}|z)} - \mathrm{KL}(Q_{\mathbf{x}}(z), P(z))$. The first part in this quantity aims to maximize the likelihood, whereas the second part tries to minimize the KL divergence between $Q_{\mathbf{x}}(z)$ and $P(z|\mathbf{x})$. The expressed we obtained here is also referred to as the Evidence Lower Bound(\textbf{ELBO}) function \cite{ELBO}. 116 | Thus, our objective function will be $\sum_{i=1}^{N}\mathbb{E}_{z \sim Q_{\mathbf{x_{i}}}}\log{P(\mathbf{x_{i}}|z)} - \mathrm{KL}(Q_{\mathbf{x_{i}}}(z), P(z))$. We aim to maximize ELBO and identify the underlying parameters which give us the maximum.\\ 117 | 118 | 119 | Please refer to \cite{VAET} and \cite{VAE} for more info on Variational Auto encoders 120 | 121 | 122 | % %%%%%%%%%%% If you don't have citations then comment the lines below: 123 | % % 124 | \bibliographystyle{abbrv} % if you need a bibliography 125 | \bibliography{mybib} % assuming yours is named mybib.bib 126 | 127 | %%%%%%%%%%% end of doc 128 | \end{document} 129 | -------------------------------------------------------------------------------- /Lecture 21/mybib.bib: -------------------------------------------------------------------------------- 1 | @misc{VAET, 2 | title={Tutorial on Variational autoencoders}, 3 | author={Carl Doersch}, 4 | year={2016}, 5 | howpublished = "\url{https://arxiv.org/pdf/1606.05908.pdf}" 6 | } 7 | 8 | @misc{VAE, 9 | title={Understanding Variational Autoencoders}, 10 | author={Joseph Rocca}, 11 | year={2019}, 12 | howpublished = "\url{https://towardsdatascience.com/understanding-variational-autoencoders-vaes-f70510919f73}" 13 | } 14 | 15 | @misc{ELBO, 16 | title={Understanding Variational Autoencoders}, 17 | howpublished = "\url{https://en.wikipedia.org/wiki/Evidence_lower_bound}" 18 | } 19 | -------------------------------------------------------------------------------- /Lecture 3/classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 3/classification.png -------------------------------------------------------------------------------- /Lecture 3/graph.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 3/graph.PNG -------------------------------------------------------------------------------- /Lecture 3/lecture 3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 3/lecture 3.pdf -------------------------------------------------------------------------------- /Lecture 3/mybib.bib: -------------------------------------------------------------------------------- 1 | @book{interactivelinalg, 2 | title="Interactive Linear Algebra", 3 | author="Dan Margalit and Joseph Rabinoff", 4 | year="2019" 5 | } 6 | 7 | @INPROCEEDINGS{averageconsensus, 8 | author={Lin Xiao and Boyd, S.}, 9 | booktitle={42nd IEEE International Conference on Decision and Control (IEEE Cat. No.03CH37475)}, 10 | title={Fast linear iterations for distributed averaging}, 11 | year={2003}, 12 | volume={5}, 13 | number={}, 14 | pages={4997-5002 Vol.5}, 15 | doi={10.1109/CDC.2003.1272421} 16 | } -------------------------------------------------------------------------------- /Lecture 4/CS337_Lecture_4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 4/CS337_Lecture_4.pdf -------------------------------------------------------------------------------- /Lecture 4/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass[12pt]{article} 2 | \usepackage[english]{babel} 3 | \usepackage[utf8x]{inputenc} 4 | \usepackage[T1]{fontenc} 5 | \usepackage{scribe} 6 | \usepackage{listings} 7 | \usepackage{dsfont} 8 | \Scribe{Sankalp, Shikhar, Ameeya, Tavva} 9 | \Lecturer{Abir De} 10 | \LectureNumber{4} 11 | \LectureDate{18/08/22} 12 | \LectureTitle{Introduction to Loss Functions} 13 | 14 | \lstset{style=mystyle} 15 | 16 | \begin{document} 17 | \MakeScribeTop 18 | 19 | %############################################################# 20 | %############################################################# 21 | %############################################################# 22 | %############################################################# 23 | 24 | In this lecture we develop methods that evaluate how well machine learning models learn our data set. This is accomplished by means of {\em loss} functions. We see what entails in the design of such loss functions. 25 | 26 | \section{Motivation for Loss Functions} 27 | We study about loss function in a simple setting of classification which is defined as follows: 28 | \subsection{Review of Classification Task} 29 | The general classification task can be described as follows : 30 | \paragraph{Classification Task :}Given a dataset $\{(x_i, y_i)\}$ where $x_i \in \mathbb{R}^d$ and $y \in \mathcal{Y}$, where d is dimension of the input features and $\mathcal{Y}$ is the target variables, our goal is to devise an algorithm that selects a hypothetsis $h \in H : \mathbb{R}^d \rightarrow \mathcal{Y}$, So that given some unseen test case $x_j$ and the corresponding label $y_j$, $h(x_j) = y_j$. For example, in \verb|MNIST| image classification task, each $x_i \in \mathbb{R}^{28 \times 28}$ and $\mathcal{Y} = \{0, 1, \cdots 9\}$ are the class labels.\\ 31 | \\ 32 | However, in order to devise a good algorithm that selects the best hypothesis $h^{*} \in H$, the developer needs to know a more concrete metric which is used by the user to evaluate the algorithm on the test set. This metric which mathematically quantifies how well the algorithm models the data in any dataset is called a \textbf{Loss Function}. 33 | \subsection{Loss Minimization Task} 34 | Now that we are aware of the metric which is used by the user to evaluate the algorithm $H$ we try to model the classification task as a loss minimization task instead. The general classification task can thus be modelled by the following loss minimization task :\\ 35 | \begin{align*} 36 | h^{*} = \arg \min_{h \in H}\sum_{j=1}^{M}\mathds{1}(h(x_j) \neq y_j) 37 | \end{align*} 38 | Here $x_j$ is an image in the test dataset, M is the number of images in the test dataset and $\mathds{1}(\cdot)$ is the indicator function which is equal to $0$ if its argument is false and equals $1$ if the argument holds true. If we assume that (a) we could enumerate all the possible functions $H : \mathbb{R}^d \rightarrow Y$ and (b) the function that minimizes the value over training set also minimizes it over the test set we could obtain $h^{*}$ as follows : 39 | \begin{align*} 40 | h^{*} = \arg \min_{h \in H}\sum_{i=1}^{D}\mathds{1}(h(x_i) \neq y_i) 41 | \end{align*} 42 | where $x_i$ are the images in the training dataset, $y_i$ are the corresponding labels and $D$ is the number of images in the training dataset. However, both our assumptions are clearly unreasonable. Most importantly it is not possible to enumerate all possible functions $H$ particularly when $H$ is an infinite set which is often the case. Therefore, we look for relaxations in the loss function in order to make the loss minimization task solvable. Some suitable relaxations for the loss function have been discussed in the next section. 43 | \section{Relaxation of Loss Function} 44 | In this section, we will try to look at the art of designing loss functions. We will look at, how we can arrive at a mathematically appealing loss function that models the classification problem in steps.\\ 45 | In all the subsections below we will consider the classification problem where $\forall x_i \in X$, there exists a label $y_i \in \{-1,1\}$, over the training set. 46 | \subsection{Constant Hypothesis} 47 | While developing the loss function, one student suggested to use constant hypothesis. In other words, $H(x_i) = c$, where $c$ is a uniformly generated random number in the interval $[-1,1]$.\\ 48 | The loss function optimization problem can then be written as, 49 | \begin{align*} 50 | c^{*} = \arg\min_{c} \sum_{i=1}^{M}\mathds{1}(c \ne y_i) 51 | \end{align*} 52 | We do know that the point probability of a continuous distribution is $0$ whence $P(c=1)$ and $P(c=-1)$ are both zero. Hence, the loss function value is always $M$ in this case. We cannot do better if we stick to this model with uniform distribution. \\ 53 | What if we choose the constant $c$ among the values $\{-1,1\}$. Let us denote $n_+$ to be the number of points in the training data set having labels as +1, and similarly denote $n_-$ to be the number of points in the training data set having labels as -1. The optimization problem is same as above, but constrained to the fact that $c \in \{-1,1\}$. It can be seen easily that if we take, $c = \max(n_+, n_-)$, the loss say $L$ is $\min(n_+, n_-)$. This is the minimum that we can get. This method is Majority Mode, since we took the hypothesis to be the mode in the training set data. 54 | \subsection{Linear Hypothesis with Indicator Cost} 55 | Lets see if we can do better by increasing the complexity of our hypothesis class $H$. We suppose, $h(x_i) = w^Tx_i+b$, where $w$ is the vector of parameters of the same dimensions as $x_i$ and $b$ is the bias parameter. Hence, we have the following task in hand, 56 | \begin{align*} 57 | \{w^{*}, b^{*}\} = \underset{w, b}{\arg\min} \sum_{i=1}^{M} \mathbb{I}(w^Tx_i+b \ne y_i) 58 | \end{align*} 59 | For brevity, let $L_1$ denote the minimum loss achieved by considering a constant hypothesis and $L_2$ denote the minimum loss achieved by considering a linear hypothesis, then it is guaranteed that $L_2 \leq L_1$. The reason is obvious as we are trying to search a larger space to fit the data. Hence, our Linear model is reducible to constant hypothesis model by taking $(w, b) = (\mathbf{0}, c)$. \\ 60 | We can establish a generalized statement here. As model complexity increases, performance on the data used to build the model (training data) improves. However, performance on an independent set (validation data) may improve up to a point, then starts to get worse. This is called \textbf{overfitting}.\\ 61 | Nevertheless, we have definitely done better as compared to constant hypothesis. 62 | \subsection{Linear Hypothesis with Absolute Difference Cost} 63 | Another Loss function was suggested which takes the cost as the absolute value of the difference between hypothesis and the label value (Assuming that the labels are mapped to some subset of integers). In this case the optimization problem becomes, 64 | \begin{align*} 65 | \{w^{*}, b^{*}\} = \underset{w, b}{\arg\min} \sum_{i=1}^{M} |w^Tx_i+b - y_i| 66 | \end{align*} 67 | But even this is not a good choice. $w^Tx_i + b$ takes values in $\mathbb{R}$ but $y_i \in \{-1,1\}$. A good loss function should act on predictions that are on the same scale as that of the ground truth targets which we allude to next. 68 | 69 | \subsection{Linear Hypothesis with Sign and Indicator Cost} 70 | %We can try one more approach on this. 71 | Instead of looking at the value of $w^Tx_i + b$, what if we look at it's sign. Hence, if $w^Tx_i+b > 0$, then the estimated label should be 1 and vice-versa (boundary condition can be included within any one of them). This gives us the following optimization problem, 72 | \begin{align*} 73 | \{w^{*}, b^{*}\} = \underset{w, b}{\arg\min} \sum_{i=1}^{M} \mathbb{I}(\text{sgn}(w^Tx_i+b) \ne y_i) 74 | \end{align*} 75 | where $\text{sgn}(.)$ denotes the signum function. 76 | \subsection{Linear Hypothesis with Sigmoid Mapping} 77 | While the above loss function is good, it is not conducive to design efficient search algorithms that find $h^*$ and is difficult to optimize. Nonetheless, the above loss function can still be used to test the performance of our trained models. 78 | 79 | To make optimization convenient, we map $w^Tx_i+b$ that takes values in $\mathbb{R}$ to the interval $[-1,1]$. To do this, we can use the sigmoid activation function, 80 | \begin{align*} 81 | f(x_i) = \frac{1}{1+e^{-(w^Tx_i + b)}} 82 | \end{align*} 83 | There's one downside to this, the sigmoid function doesn't map to $[0,1]$, instead it does to $(0,1)$. Hence, using the below optimization problem wouldn't make sense, 84 | \begin{align*} 85 | \{w^{*}, b^{*}\} = \underset{w, b}{arg\;min} \sum_{i=1}^{M} \mathbb{I}(f(x_i) \ne \frac{y_i+1}{2}) 86 | \end{align*} 87 | One may be tempted to use the absolute difference squared cost, $|f(x_i) - \frac{y_i+1}{2}|^2$, which makes sense. The issue which we may incur due to this is a non-convex loss function of parameters. It may be difficult to converge to global minima in such cases.\\ 88 | \newline 89 | 90 | There's a probabilistic approach to loss functions as well, which will be discussed in the upcoming lectures, but we will just state the result here, 91 | \begin{align*} 92 | \{w^{*}, b^{*}\} = \underset{w, b}{\arg\min} \sum_{i=1}^{M}\left [ -\left (\frac{y_i+1}{2}\right )\log(f(x_i))-\left (1-\frac{y_i+1}{2}\right )\log(1-f(x_i))\right ] 93 | \end{align*} 94 | 95 | We can also define the loss in the following manner. We will incur a loss if $f(x_i)>0.5$ and $y_i=-1$ OR $f(x_i) \le 0.5$ and $y_i = 1$. We can hence write the following optimization problem, 96 | \begin{align*} 97 | \{w^{*}, b^{*}\} = \underset{w, b}{\arg\min} \sum_{i=1}^{M} \max \left (0, \left ( \frac{1}{2} - f(x_i)\right )y_i\right) 98 | \end{align*} 99 | This kind of loss is inspired from the ReLU function which is defined as \\ 100 | \begin{align} 101 | \text{ReLU}(x) &= 102 | \begin{cases} 103 | 0 & \text{if } x \leq 0 \\ 104 | x & \text{otherwise} 105 | \end{cases} 106 | \end{align} 107 | %%%%%%%%%%% If you don't have citations then comment the lines below: 108 | % 109 | %\bibliographystyle{abbrv} % if you need a bibliography 110 | %\bibliography{mybib} % assuming yours is named mybib.bib 111 | 112 | 113 | %%%%%%%%%%% end of doc 114 | \end{document} 115 | -------------------------------------------------------------------------------- /Lecture 4/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 5/CS337_Lecture_5_Scribe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 5/CS337_Lecture_5_Scribe.pdf -------------------------------------------------------------------------------- /Lecture 5/graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 5/graph.png -------------------------------------------------------------------------------- /Lecture 5/mybib.bib: -------------------------------------------------------------------------------- 1 | @book{bertsimas1997introduction, 2 | title={Introduction to linear optimization}, 3 | author={Bertsimas, Dimitris and Tsitsiklis, John N}, 4 | volume={6}, 5 | year={1997}, 6 | publisher={Athena Scientific Belmont, MA} 7 | } 8 | 9 | @book{boyd2004convex, 10 | title={Convex optimization}, 11 | author={Boyd, Stephen and Vandenberghe, Lieven}, 12 | year={2004}, 13 | publisher={Cambridge university press} 14 | } 15 | 16 | @book{wolsey2014integer, 17 | title={Integer and combinatorial optimization}, 18 | author={Wolsey, Laurence A and Nemhauser, George L}, 19 | year={2014}, 20 | publisher={John Wiley \& Sons} 21 | } -------------------------------------------------------------------------------- /Lecture 5/scribe.sty: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Scribe notes style file 3 | % 4 | % This file should be called scribe.sty 5 | % 6 | % Your main LaTeX file should look like this: 7 | % 8 | % \documentclass[12pt]{article} 9 | % \usepackage{scribe} 10 | % 11 | % \Scribe{YOUR NAME} 12 | % \Lecturer{Anupam Gupta OR Ryan O'Donnell} 13 | % \LectureNumber{N} 14 | % \LectureDate{DATE} 15 | % \LectureTitle{A TITLE FOR THE LECTURE} 16 | % 17 | % \begin{document} 18 | % \MakeScribeTop 19 | % 20 | % \section{SECTION NAME} 21 | % 22 | % NOTES GO HERE 23 | % 24 | % \section{ANOTHER SECTION NAME} 25 | % 26 | % MORE NOTES GO HERE 27 | % 28 | % etc. 29 | % 30 | % \bibliographystyle{abbrv} % if you need a bibliography 31 | % \bibliography{mybib} % assuming yours is named mybib.bib 32 | % 33 | % \end{document} 34 | % 35 | % 36 | % A .bib file is a text file containing a sequence like... 37 | % 38 | % @article{ADR82, 39 | % author = "Alain Aspect and Jean Dalibard and G{\'e}rard Roger", 40 | % title = "Experimental Test of {B}ell's Inequalities Using Time-Varying Analyzers", 41 | % journal = "Phys.\ Rev.\ Lett.", 42 | % volume = 49, 43 | % number = 25, 44 | % pages = "1804--1807", 45 | % year = 1982 46 | % } 47 | % 48 | % @inproceedings{Fei91, 49 | % author = "Uriel Feige", 50 | % title = "On the success probability of the two provers in one round proof systems", 51 | % booktitle = "Proc.\ 6th Symp.\ on Structure in Complexity Theory (CCC)", 52 | % pages = "116--123", 53 | % year = 1991 54 | % } 55 | % 56 | % 57 | % 58 | % 59 | % 60 | % 61 | % For your LaTeX files, there are some macros you may want to use below... 62 | 63 | 64 | \oddsidemargin 0in \evensidemargin 0in \marginparwidth 40pt 65 | \marginparsep 10pt \topmargin 0pt \headsep 0in \headheight 0in 66 | \textheight 8.5in \textwidth 6.5in \brokenpenalty=10000 67 | 68 | \usepackage{amssymb} 69 | \usepackage{amsfonts} 70 | \usepackage{amsmath} 71 | \usepackage{amsthm} 72 | \usepackage{latexsym} 73 | \usepackage{epsfig} 74 | \usepackage{bm} 75 | \usepackage{xspace} 76 | \usepackage{times} 77 | \usepackage[utf8x]{inputenc} 78 | \usepackage[T1]{fontenc} 79 | \usepackage{listings} 80 | \usepackage{color} 81 | 82 | \definecolor{codegreen}{rgb}{0.3,0.6,0.4} 83 | \definecolor{codegray}{rgb}{0.5,0.5,0.5} 84 | \definecolor{codepurple}{rgb}{0.58,0,0.82} 85 | \definecolor{backcolour}{rgb}{0.95,0.95,0.92} 86 | 87 | \lstdefinestyle{mystyle}{ 88 | backgroundcolor=\color{backcolour}, 89 | commentstyle=\color{codegreen}, 90 | keywordstyle=\color{magenta}, 91 | numberstyle=\tiny\color{codegray}, 92 | stringstyle=\color{codepurple}, 93 | basicstyle=\footnotesize, 94 | breakatwhitespace=false, 95 | breaklines=true, 96 | captionpos=b, 97 | keepspaces=true, 98 | numbers=left, 99 | numbersep=5pt, 100 | showspaces=false, 101 | showstringspaces=false, 102 | showtabs=false, 103 | tabsize=2 104 | } 105 | 106 | %% 107 | %% Julia definition (c) 2014 Jubobs 108 | %% 109 | \lstdefinelanguage{Julia}% 110 | {morekeywords={abstract,break,case,catch,const,continue,do,else,elseif,% 111 | end,export,false,for,function,immutable,import,importall,if,in,% 112 | macro,module,otherwise,quote,return,switch,true,try,type,typealias,% 113 | using,while},% 114 | sensitive=true,% 115 | alsoother={$},% 116 | morecomment=[l]\#,% 117 | morecomment=[n]{\#=}{=\#},% 118 | morestring=[s]{"}{"},% 119 | morestring=[m]{'}{'},% 120 | }[keywords,comments,strings]% 121 | 122 | \lstset{% 123 | language = Julia, 124 | basicstyle = \ttfamily, 125 | keywordstyle = \bfseries\color{blue}, 126 | stringstyle = \color{magenta}, 127 | commentstyle = \color{ForestGreen}, 128 | showstringspaces = false, 129 | } 130 | 131 | 132 | \newtheorem{theorem}{Theorem}[section] 133 | \newtheorem{lemma}[theorem]{Lemma} 134 | \newtheorem{claim}[theorem]{Claim} 135 | \newtheorem{proposition}[theorem]{Proposition} 136 | \newtheorem{corollary}[theorem]{Corollary} 137 | \newtheorem{fact}[theorem]{Fact} 138 | \newtheorem{example}[theorem]{Example} 139 | \newtheorem{notation}[theorem]{Notation} 140 | \newtheorem{observation}[theorem]{Observation} 141 | \newtheorem{conjecture}[theorem]{Conjecture} 142 | 143 | \theoremstyle{definition} 144 | \newtheorem{definition}[theorem]{Definition} 145 | 146 | \theoremstyle{remark} 147 | \newtheorem{remark}[theorem]{Remark} 148 | 149 | % Setting the theorem style back to plain in case theorems are defined in the main file 150 | \theoremstyle{plain} 151 | 152 | 153 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 154 | % Useful macros 155 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 156 | 157 | % for temporarily chunks of text 158 | \newcommand{\ignore}[1]{} 159 | 160 | % Probability/expectation operators. The ones ending in x should be used if you want 161 | % subscripts that go directly *below* the operator (in math mode); no x means the subscripts 162 | % go below and to the right. NB: \P is remapped below for the complexity class P. 163 | \renewcommand{\Pr}{{\bf Pr}} 164 | \newcommand{\Prx}{\mathop{\bf Pr\/}} 165 | \newcommand{\E}{{\bf E}} 166 | \newcommand{\Ex}{\mathop{\bf E\/}} 167 | \newcommand{\Var}{{\bf Var}} 168 | \newcommand{\Varx}{\mathop{\bf Var\/}} 169 | \newcommand{\Cov}{{\bf Cov}} 170 | \newcommand{\Covx}{\mathop{\bf Cov\/}} 171 | 172 | % shortcuts for symbol names that are too long to type 173 | \newcommand{\eps}{\epsilon} 174 | \newcommand{\lam}{\lambda} 175 | \renewcommand{\l}{\ell} 176 | \newcommand{\la}{\langle} 177 | \newcommand{\ra}{\rangle} 178 | \newcommand{\wh}{\widehat} 179 | \newcommand{\wt}{\widetilde} 180 | 181 | % "blackboard-fonted" letters for the reals, naturals etc. 182 | \newcommand{\R}{\mathbb R} 183 | \newcommand{\N}{\mathbb N} 184 | \newcommand{\Z}{\mathbb Z} 185 | \newcommand{\F}{\mathbb F} 186 | \newcommand{\Q}{\mathbb Q} 187 | \newcommand{\C}{\mathbb C} 188 | 189 | % operators that should be typeset in Roman font 190 | \newcommand{\poly}{\mathrm{poly}} 191 | \newcommand{\polylog}{\mathrm{polylog}} 192 | \newcommand{\sgn}{\mathrm{sgn}} 193 | \newcommand{\avg}{\mathop{\mathrm{avg}}} 194 | \newcommand{\val}{{\mathrm{val}}} 195 | 196 | % complexity classes 197 | \renewcommand{\P}{\mathrm{P}} 198 | \newcommand{\NP}{\mathrm{NP}} 199 | \newcommand{\BPP}{\mathrm{BPP}} 200 | \newcommand{\DTIME}{\mathrm{DTIME}} 201 | \newcommand{\ZPTIME}{\mathrm{ZPTIME}} 202 | \newcommand{\BPTIME}{\mathrm{BPTIME}} 203 | \newcommand{\NTIME}{\mathrm{NTIME}} 204 | 205 | % values associated to optimization algorithm instances 206 | \newcommand{\Opt}{{\mathsf{Opt}}} 207 | \newcommand{\Alg}{{\mathsf{Alg}}} 208 | \newcommand{\Lp}{{\mathsf{Lp}}} 209 | \newcommand{\Sdp}{{\mathsf{Sdp}}} 210 | \newcommand{\Exp}{{\mathsf{Exp}}} 211 | 212 | % if you think the sum and product signs are too big in your math mode; x convention 213 | % as in the probability operators 214 | \newcommand{\littlesum}{{\textstyle \sum}} 215 | \newcommand{\littlesumx}{\mathop{{\textstyle \sum}}} 216 | \newcommand{\littleprod}{{\textstyle \prod}} 217 | \newcommand{\littleprodx}{\mathop{{\textstyle \prod}}} 218 | 219 | % horizontal line across the page 220 | \newcommand{\horz}{ 221 | \vspace{-.4in} 222 | \begin{center} 223 | \begin{tabular}{p{\textwidth}}\\ 224 | \hline 225 | \end{tabular} 226 | \end{center} 227 | } 228 | 229 | % calligraphic letters 230 | \newcommand{\calA}{{\cal A}} 231 | \newcommand{\calB}{{\cal B}} 232 | \newcommand{\calC}{{\cal C}} 233 | \newcommand{\calD}{{\cal D}} 234 | \newcommand{\calE}{{\cal E}} 235 | \newcommand{\calF}{{\cal F}} 236 | \newcommand{\calG}{{\cal G}} 237 | \newcommand{\calH}{{\cal H}} 238 | \newcommand{\calI}{{\cal I}} 239 | \newcommand{\calJ}{{\cal J}} 240 | \newcommand{\calK}{{\cal K}} 241 | \newcommand{\calL}{{\cal L}} 242 | \newcommand{\calM}{{\cal M}} 243 | \newcommand{\calN}{{\cal N}} 244 | \newcommand{\calO}{{\cal O}} 245 | \newcommand{\calP}{{\cal P}} 246 | \newcommand{\calQ}{{\cal Q}} 247 | \newcommand{\calR}{{\cal R}} 248 | \newcommand{\calS}{{\cal S}} 249 | \newcommand{\calT}{{\cal T}} 250 | \newcommand{\calU}{{\cal U}} 251 | \newcommand{\calV}{{\cal V}} 252 | \newcommand{\calW}{{\cal W}} 253 | \newcommand{\calX}{{\cal X}} 254 | \newcommand{\calY}{{\cal Y}} 255 | \newcommand{\calZ}{{\cal Z}} 256 | 257 | % bold letters (useful for random variables) 258 | \renewcommand{\a}{{\boldsymbol a}} 259 | \renewcommand{\b}{{\boldsymbol b}} 260 | \renewcommand{\c}{{\boldsymbol c}} 261 | \renewcommand{\d}{{\boldsymbol d}} 262 | \newcommand{\e}{{\boldsymbol e}} 263 | \newcommand{\f}{{\boldsymbol f}} 264 | \newcommand{\g}{{\boldsymbol g}} 265 | \newcommand{\h}{{\boldsymbol h}} 266 | \renewcommand{\i}{{\boldsymbol i}} 267 | \renewcommand{\j}{{\boldsymbol j}} 268 | \renewcommand{\k}{{\boldsymbol k}} 269 | \newcommand{\m}{{\boldsymbol m}} 270 | \newcommand{\n}{{\boldsymbol n}} 271 | \renewcommand{\o}{{\boldsymbol o}} 272 | \newcommand{\p}{{\boldsymbol p}} 273 | \newcommand{\q}{{\boldsymbol q}} 274 | \renewcommand{\r}{{\boldsymbol r}} 275 | \newcommand{\s}{{\boldsymbol s}} 276 | \renewcommand{\t}{{\boldsymbol t}} 277 | \renewcommand{\u}{{\boldsymbol u}} 278 | \renewcommand{\v}{{\boldsymbol v}} 279 | \newcommand{\w}{{\boldsymbol w}} 280 | \newcommand{\x}{{\boldsymbol x}} 281 | \newcommand{\y}{{\boldsymbol y}} 282 | \newcommand{\z}{{\boldsymbol z}} 283 | \newcommand{\A}{{\boldsymbol A}} 284 | \newcommand{\B}{{\boldsymbol B}} 285 | \newcommand{\D}{{\boldsymbol D}} 286 | \newcommand{\G}{{\boldsymbol G}} 287 | \renewcommand{\H}{{\boldsymbol H}} 288 | \newcommand{\I}{{\boldsymbol I}} 289 | \newcommand{\J}{{\boldsymbol J}} 290 | \newcommand{\K}{{\boldsymbol K}} 291 | \renewcommand{\L}{{\boldsymbol L}} 292 | \newcommand{\M}{{\boldsymbol M}} 293 | \renewcommand{\O}{{\boldsymbol O}} 294 | \renewcommand{\S}{{\boldsymbol S}} 295 | \newcommand{\T}{{\boldsymbol T}} 296 | \newcommand{\U}{{\boldsymbol U}} 297 | \newcommand{\V}{{\boldsymbol V}} 298 | \newcommand{\W}{{\boldsymbol W}} 299 | \newcommand{\X}{{\boldsymbol X}} 300 | \newcommand{\Y}{{\boldsymbol Y}} 301 | 302 | 303 | 304 | % useful for Fourier analysis 305 | \newcommand{\bits}{\{-1,1\}} 306 | \newcommand{\bitsn}{\{-1,1\}^n} 307 | \newcommand{\bn}{\bitsn} 308 | \newcommand{\isafunc}{{: \bitsn \rightarrow \bits}} 309 | \newcommand{\fisafunc}{{f : \bitsn \rightarrow \bits}} 310 | 311 | % if you want 312 | \newcommand{\half}{{\textstyle \frac12}} 313 | 314 | \newcommand{\myfig}[4]{\begin{figure}[h] \begin{center} \includegraphics[width=#1\textwidth]{#2} \caption{#3} \label{#4} \end{center} \end{figure}} 315 | 316 | 317 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 318 | % Feel free to ignore the rest of this file 319 | 320 | 321 | 322 | \def\ScribeStr{??} 323 | \def\LecStr{??} 324 | \def\LecNum{??} 325 | \def\LecTitle{??} 326 | \def\LecDate{??} 327 | \newcommand{\Scribe}[1]{\def\ScribeStr{Scribe: #1}} 328 | \newcommand{\Scribes}[1]{\def\ScribeStr{Scribes: #1}} 329 | \newcommand{\Lecturer}[1]{\def\LecStr{Lecturer: #1}} 330 | \newcommand{\Lecturers}[1]{\def\LecStr{Lecturers: #1}} 331 | \newcommand{\LectureNumber}[1]{\def\LecNum{#1}} 332 | \newcommand{\LectureDate}[1]{\def\LecDate{#1}} 333 | \newcommand{\LectureTitle}[1]{\def\LecTitle{#1}} 334 | 335 | \newdimen\headerwidth 336 | 337 | \newcommand{\MakeScribeTop}{ 338 | \noindent 339 | \begin{center} 340 | \framebox{ 341 | \vbox{ 342 | \headerwidth=\textwidth 343 | \advance\headerwidth by -0.22in 344 | \hbox to \headerwidth {\hfill AIML - CS 337} 345 | \vspace{4mm} 346 | \hbox to \headerwidth {{\Large \hfill Lecture \LecNum: {\LecTitle} \hfill}} 347 | \vspace{2mm} 348 | \hbox to \headerwidth {\hfill \LecDate \hfill} 349 | \vspace{2mm} 350 | \hbox to \headerwidth {{\it \LecStr \hfill \ScribeStr}} 351 | } 352 | } 353 | \end{center} 354 | \vspace*{4mm}} 355 | -------------------------------------------------------------------------------- /Lecture 6/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 6/main.pdf -------------------------------------------------------------------------------- /Lecture 6/mybib.bib: -------------------------------------------------------------------------------- 1 | @book{bertsimas1997introduction, 2 | title={Introduction to linear optimization}, 3 | author={Bertsimas, Dimitris and Tsitsiklis, John N}, 4 | volume={6}, 5 | year={1997}, 6 | publisher={Athena Scientific Belmont, MA} 7 | } 8 | 9 | @book{boyd2004convex, 10 | title={Convex optimization}, 11 | author={Boyd, Stephen and Vandenberghe, Lieven}, 12 | year={2004}, 13 | publisher={Cambridge university press} 14 | } 15 | 16 | @book{wolsey2014integer, 17 | title={Integer and combinatorial optimization}, 18 | author={Wolsey, Laurence A and Nemhauser, George L}, 19 | year={2014}, 20 | publisher={John Wiley \& Sons} 21 | } -------------------------------------------------------------------------------- /Lecture 7/2022_Scribe_lecture7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 7/2022_Scribe_lecture7.pdf -------------------------------------------------------------------------------- /Lecture 8/Lecture_8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 8/Lecture_8.pdf -------------------------------------------------------------------------------- /Lecture 8/knn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 8/knn.png -------------------------------------------------------------------------------- /Lecture 8/linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 8/linear.png -------------------------------------------------------------------------------- /Lecture 9/Lecture9.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 9/Lecture9.pdf -------------------------------------------------------------------------------- /Lecture 9/graph.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 9/graph.jpg -------------------------------------------------------------------------------- /Lecture 9/graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-ML-IITB-2022/Lecture-Notes/af71763d03f0d0dcfa02b1b8470cd07b8a96fc21/Lecture 9/graph.png -------------------------------------------------------------------------------- /Lecture 9/mybib.bib: -------------------------------------------------------------------------------- 1 | @book{bertsimas1997introduction, 2 | title={Introduction to linear optimization}, 3 | author={Bertsimas, Dimitris and Tsitsiklis, John N}, 4 | volume={6}, 5 | year={1997}, 6 | publisher={Athena Scientific Belmont, MA} 7 | } 8 | 9 | @book{boyd2004convex, 10 | title={Convex optimization}, 11 | author={Boyd, Stephen and Vandenberghe, Lieven}, 12 | year={2004}, 13 | publisher={Cambridge university press} 14 | } 15 | 16 | @book{wolsey2014integer, 17 | title={Integer and combinatorial optimization}, 18 | author={Wolsey, Laurence A and Nemhauser, George L}, 19 | year={2014}, 20 | publisher={John Wiley \& Sons} 21 | } --------------------------------------------------------------------------------