├── .DS_Store
├── .gitattributes
├── .gitignore
├── README.md
├── go
├── images
    ├── elm_1E_3.png
    ├── elmtest.png
    ├── leastsq5.png
    ├── leastsq8.png
    ├── plotdat.png
    ├── polyreg.png
    ├── ridge4.png
    ├── ridge5.png
    └── ridge8.png
├── jelm.ijs
├── jelm.nw
├── jelm.pdf
└── jelm.tex


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/.DS_Store


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *.swp
3 | *.aux
4 | jelm.log
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # jelm
2 | Extreme Learning Machine in J
3 | See the corresponding web page:
4 | http://peportier.me/blog/201905_JELM
5 | 


--------------------------------------------------------------------------------
/go:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | noweave -x -delay jelm.nw > jelm.tex
3 | pdflatex jelm
4 | pdflatex jelm
5 | notangle -Rjelm.ijs jelm.nw > jelm.ijs
6 | 


--------------------------------------------------------------------------------
/images/elm_1E_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/images/elm_1E_3.png


--------------------------------------------------------------------------------
/images/elmtest.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/images/elmtest.png


--------------------------------------------------------------------------------
/images/leastsq5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/images/leastsq5.png


--------------------------------------------------------------------------------
/images/leastsq8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/images/leastsq8.png


--------------------------------------------------------------------------------
/images/plotdat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/images/plotdat.png


--------------------------------------------------------------------------------
/images/polyreg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/images/polyreg.png


--------------------------------------------------------------------------------
/images/ridge4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/images/ridge4.png


--------------------------------------------------------------------------------
/images/ridge5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/images/ridge5.png


--------------------------------------------------------------------------------
/images/ridge8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/images/ridge8.png


--------------------------------------------------------------------------------
/jelm.ijs:
--------------------------------------------------------------------------------
  1 | require'trig'
  2 | require'plot'
  3 | require'numeric'
  4 | 
  5 | pushup=:   ] + 0.1 * |
  6 | pushdown=: ] - 0.1 * |
  7 | 
  8 | NB. locate the elements with values between {.x and {:x
  9 | sel=: (] >: {.@[) *. (] <: {:@[)
 10 | 
 11 | mean=: +/ % #
 12 | rmse=: [: %: [: mean ([: *: -)
 13 | 
 14 | mp=: +/ . * NB. matrix product
 15 | 
 16 | diag=: (<0 1)&|: : (([:(>:*i.)[:#])})
 17 | addDiag=: ([+diag@]) diag ] NB. add x to the diagonal of y
 18 | 
 19 | f=: 3 : '(^y) * cos 2*pi * sin pi * y'
 20 | noise=: 4 : 'y + -&x *&(+:x) ? (#y) # 0'
 21 | 
 22 | gendat=: 4 : 0
 23 |   X=: ? y $ 0
 24 |   Y=: x noise f X
 25 |   minmaxX=: (<./ , >./) X
 26 |   minmaxf=: (([: pushdown <./) , ([: pushup >./)) f steps 0 1 100
 27 |   XT=: ? (>. 0.1 * y) $ 0
 28 |   YT=: f XT
 29 |   0
 30 | )
 31 | 
 32 | 
 33 | plotdatnoshow=: 3 : 0
 34 |   pd 'reset'
 35 |   pd 'color green'
 36 |   pd 'type marker'
 37 |   pd 'markersize 1'
 38 |   pd 'markers circle'
 39 | 
 40 |   pd X;Y
 41 |   pd 'color red'
 42 |   pd 'type line'
 43 |   pd 'pensize 1'
 44 |   pd (;f) steps 0 1 100
 45 | 
 46 | )
 47 | plotdat=: 3 : 0
 48 |   plotdatnoshow 0
 49 |   pd 'show'
 50 | )
 51 | 
 52 | plotpoly=: 3 : 0
 53 |   plotdatnoshow 0
 54 |   pd 'color blue'
 55 |   xs=: (] #~ minmaxX"_ sel ]) /:~ X,steps 0 1 100
 56 |   pval=: c&p. xs
 57 |   crop=: minmaxf sel pval
 58 |   pd (crop # xs);(crop # pval)
 59 |   pd 'show'
 60 | )
 61 | 
 62 | polyreg=: 3 : 0
 63 |   c=: Y %. X ^/ i.#X
 64 |   YThat=: c&p. XT
 65 |   plotpoly 0
 66 | )
 67 | 
 68 | gram=: 3 : 0
 69 |   A=: X ^/ i.y
 70 |   S=: (mp~ |:) A
 71 | ) 
 72 | 
 73 | leastsq=: 3 : 0
 74 |   gram y
 75 |   c=: ((|:A) mp Y) %. S
 76 |   YThat=: c&p. XT
 77 |   plotpoly 0
 78 | )
 79 | 
 80 | ridge=: 4 : 0
 81 |   gram y
 82 |   c=: ((|:A) mp Y) %.  x addDiag S
 83 |   YThat=: c&p. XT
 84 |   plotpoly 0
 85 | )
 86 | 
 87 | plotelm=: 3 : 0
 88 |   plotdatnoshow 0
 89 |   pd 'type line'
 90 |   pd 'color blue'
 91 |   xs=: (] #~ minmaxX"_ sel ]) steps (<.<./X),(>.>./X),100
 92 |   pd xs;(mkH ,. xs) mp c
 93 |   pd 'show'
 94 | )
 95 | 
 96 | initelm=: 3 : 0
 97 |   W=: _1 + 2 * ? (y,1) $ 0 NB. input weights
 98 |   B=: ? y $ 0 NB. bias
 99 |   H=: mkH ,. X
100 |   0 [ S=: (mp~ |:) H
101 | )
102 | mkH=: 3 : '0&>. B +"1 y mp"1/ W'
103 | 
104 | elm=: 3 : 0
105 |   c=: ((|:H) mp Y) %.  y addDiag S
106 |   YThat=: (mkH ,. XT) mp c
107 |   plotelm 0
108 | )
109 | 
110 | plottest=: 3 : 0
111 |   pd 'reset'
112 |   pd 'color green'
113 |   pd 'type marker'
114 |   pd 'markersize 1'
115 |   pd 'markers circle'
116 | 
117 |   pd XT;YT
118 |   pd 'color magenta'
119 |   pd XT;YThat
120 |   pd 'color red'
121 |   pd 'type line'
122 |   pd 'pensize 1'
123 |   pd (;f) steps 0 1 100
124 | 
125 |   pd 'show'
126 | )
127 | 
128 | test=: 3 : 0
129 |   plottest 0
130 |   YT rmse YThat
131 | )
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/jelm.nw:
--------------------------------------------------------------------------------
  1 | \documentclass[10pt]{article}
  2 | \usepackage[a4paper, total={6in, 8in}]{geometry}
  3 | \usepackage{graphicx}
  4 | \graphicspath{ {./images/} }
  5 | \usepackage{noweb}
  6 | \usepackage{amsmath}
  7 | \usepackage{amssymb}
  8 | \usepackage{url}
  9 | \title{Extreme Learning Machine in J}
 10 | \author{Pierre-Edouard Portier}
 11 | \date{2019}
 12 | \renewcommand{\vec}[1]{\boldsymbol{#1}}
 13 | \newcommand{\norm}[1]{\left\lVert#1\right\rVert}
 14 | 
 15 | \begin{document}
 16 | \maketitle
 17 | \section{Regression}
 18 | $\vec{x^{(1)}}\dots\vec{x^{(P)}}$ are vectors of $\mathbb{R}^{n-1}$ with
 19 | associated values $y^{(1)}\dots y^{(P)}$ of $\mathbb{R}$.
 20 | We search a function $f(\vec{x}):\mathbb{R}^{n-1} \rightarrow \mathbb{R}$ to
 21 | model the observed relationship between $\vec{x}$ and $y$.
 22 | $f$ can have a fixed parameterized form. For example:
 23 | \[
 24 | f(\vec{x}) = a_0 + a_1 x_1 + a_2 x_2 + \dots + a_{n-1} x_{n-1}
 25 | \]
 26 | 
 27 | If $P=n$, parameters $a_0 \dots a_{n-1}$ are found by solving a linear system.
 28 | \[
 29 | \begin{cases}
 30 | y^{(1)} &= a_0 + a_1 x_1^{(1)} + a_2 x_2^{(1)} + \dots + a_{n-1} x_{n-1}^{(1)} \\
 31 | \dots &= \dots \\
 32 | y^{(P)} &= a_0 + a_1 x_1^{(P)} + a_2 x_2^{(P)} + \dots + a_{n-1} x_{n-1}^{(P)} \\
 33 | \end{cases}
 34 | \]
 35 | This system can be written in matrix form.
 36 | \[
 37 | \left( \begin{array}{cccc}
 38 | 1 & x^{(1)}_1 & \dots & x^{(1)}_{n-1} \\
 39 | 1 & x^{(2)}_1 & \dots & x^{(2)}_{n-1} \\
 40 | \dots & \dots & \dots & \dots \\
 41 | 1 & x^{(P)}_1 & \dots & x^{(P)}_{n-1}
 42 | \end{array} \right)
 43 | \left( \begin{array}{c}
 44 | a_0 \\ a_1 \\ \dots \\ a_{n-1} 
 45 | \end{array} \right)
 46 | =
 47 | \left( \begin{array}{c}
 48 | y^{(1)} \\ y^{(2)} \\ \dots \\ y^{(P)} 
 49 | \end{array} \right)
 50 | \]
 51 | 
 52 | Each line of the first term matrix is a vector $\vec{x^{(i)T}}$ with the
 53 | addition of a constant coordinate that accounts for parameter $a_0$.
 54 | Thus, naming this matrix $\vec{X}^T$, the linear system can also be written:
 55 | \[
 56 | \vec{X}^T \vec{a} = \vec{y}
 57 | \]
 58 | 
 59 | Consider the special case when $x$ is a number and $f$ is a polynomial of degree $n-1$:
 60 | \[
 61 | f(x) = a_0 + a_1 x + a_2 x^2 + \dots + a_{n-1}x^{n-1}
 62 | \]
 63 | 
 64 | With $P=n$ examples $\left(x^{(k)},y^{(k)}\right)$, the parameters are found by
 65 | solving the following linear system:
 66 | \begin{equation}
 67 | \left( \begin{array}{ccccc}
 68 | 1 & x^{(1)} & (x^{(1)})^2 & \dots & (x^{(1)})^{n-1} \\
 69 | 1 & x^{(2)} & (x^{(2)})^2 & \dots & (x^{(2)})^{n-1} \\
 70 | \dots & \dots & \dots & \dots \\
 71 | 1 & x^{(P)} & (x^{(P)})^2 & \dots & (x^{(P)})^{n-1}
 72 | \end{array} \right)
 73 | \left( \begin{array}{c}
 74 | a_0 \\ a_1 \\ \dots \\ a_{n-1} 
 75 | \end{array} \right)
 76 | =
 77 | \left( \begin{array}{c}
 78 | y^{(1)} \\ y^{(2)} \\ \dots \\ y^{(P)} 
 79 | \end{array} \right)
 80 | \label{eqn:vandermonde}
 81 | \end{equation}
 82 | Incidentally, the first term is called the Vandermonde Matrix.
 83 | 
 84 | \subsection{Experiment with a 1-dimensional synthetic dataset}
 85 | We define a non linear function [[f]] from which we generate a dataset.
 86 | 
 87 | <<dataset>>=
 88 | f=: 3 : '(^y) * cos 2*pi * sin pi * y'
 89 | <<noise>>
 90 | <<gendat>>
 91 | 
 92 | @
 93 | In traditional mathematical form, this function is: 
 94 | \[f(x)=e^x \times cos\left(2\pi sin\left(\pi x\right)\right)\]
 95 | 
 96 | Function [[noise]] adds some random noise to the values of a vector. For
 97 | example [[0.5 noise v]], will add random values uniformly drawn from interval
 98 | $[-0.5,0.5]$ to the terms of vector [[v]].
 99 | 
100 | <<noise>>=
101 | noise=: 4 : 'y + -&x *&(+:x) ? (#y) # 0'
102 | 
103 | @
104 | [[0.5 gendat 10]] generates from [[f]] a dataset [[(X,Y)]] of 10 points with
105 | random noise in $[-0.5,0.5]$ added to [[Y]]. It also stores in [[minmaxX]] the
106 | minimum and maximum values of [[X]]. It computes the pair [[minmaxf]],
107 | where the first term is ten percent smaller than the minimum of [[f]] on interval
108 | $[0,1]$, and the second term is ten percent bigger than the maximum of [[f]] on
109 | interval $[0,1]$. [[minmaxf]] is later used to crop the plots so that
110 | extreme values are not visible.
111 | 
112 | A test set [[(XT,YT)]] is used to assert the capacity of the model to generalize
113 | on unseen data. Its size is fixed to $10\%$ of the size of the training set.
114 | 
115 | <<utils>>=
116 | pushup=:   ] + 0.1 * |
117 | pushdown=: ] - 0.1 * |
118 | 
119 | <<gendat>>=
120 | gendat=: 4 : 0
121 |   X=: ? y $ 0
122 |   Y=: x noise f X
123 |   minmaxX=: (<./ , >./) X
124 |   minmaxf=: (([: pushdown <./) , ([: pushup >./)) f steps 0 1 100
125 |   XT=: ? (>. 0.1 * y) $ 0
126 |   YT=: f XT
127 |   0
128 | )
129 | 
130 | @
131 | [[plotdat 0]] plots the dataset.
132 | 
133 | <<plotdat>>=
134 | plotdatnoshow=: 3 : 0
135 |   <<initplot>>
136 |   pd X;Y
137 |   <<plotf>>
138 | )
139 | plotdat=: 3 : 0
140 |   plotdatnoshow 0
141 |   pd 'show'
142 | )
143 | 
144 | <<initplot>>=
145 | pd 'reset'
146 | pd 'color green'
147 | pd 'type marker'
148 | pd 'markersize 1'
149 | pd 'markers circle'
150 | 
151 | <<plotf>>=
152 | pd 'color red'
153 | pd 'type line'
154 | pd 'pensize 1'
155 | pd (;f) steps 0 1 100
156 | 
157 | @
158 | \noindent\begin{minipage}{0.5\textwidth}
159 | \includegraphics[width=\linewidth]{plotdat}
160 | \end{minipage}%
161 | \hfill%
162 | \begin{minipage}{0.4\textwidth}
163 | \begin{verbatim}
164 |    0.5 gendat 10
165 |    plotdat 0
166 | \end{verbatim}
167 | \end{minipage}
168 | \vskip.5\baselineskip
169 | 
170 | [[polyreg 0]] solves the linear system \eqref{eqn:vandermonde}, stores the
171 | coefficients of the polynomial in variable~[[c]] and computes [[YThat]], the
172 | predictions on the test dataset.
173 | 
174 | <<polyreg>>=
175 | polyreg=: 3 : 0
176 |   c=: Y %. X ^/ i.#X
177 |   YThat=: c&p. XT
178 |   plotpoly 0
179 | )
180 | 
181 | <<utils>>=
182 | NB. locate the elements with values between {.x and {:x
183 | sel=: (] >: {.@[) *. (] <: {:@[)
184 | 
185 | <<plotpoly>>=
186 | plotpoly=: 3 : 0
187 |   plotdatnoshow 0
188 |   pd 'color blue'
189 |   xs=: (] #~ minmaxX"_ sel ]) /:~ X,steps 0 1 100
190 |   pval=: c&p. xs
191 |   crop=: minmaxf sel pval
192 |   pd (crop # xs);(crop # pval)
193 |   pd 'show'
194 | )
195 | 
196 | @
197 | \noindent\begin{minipage}{0.5\textwidth}
198 | \includegraphics[width=\linewidth]{polyreg}
199 | \end{minipage}%
200 | \hfill%
201 | \begin{minipage}{0.4\textwidth}
202 | \begin{verbatim}
203 |    polyreg 0
204 | \end{verbatim}
205 | \end{minipage}
206 | \vskip.5\baselineskip
207 | 
208 | [[test 0]] returns the root mean square error (RMSE) on the test set, and a plot
209 | of the predictions.
210 | 
211 | <<utils>>=
212 | mean=: +/ % #
213 | rmse=: [: %: [: mean ([: *: -)
214 | 
215 | <<test>>=
216 | test=: 3 : 0
217 |   plottest 0
218 |   YT rmse YThat
219 | )
220 | 
221 | <<plottest>>=
222 | plottest=: 3 : 0
223 |   <<initplot>>
224 |   pd XT;YT
225 |   pd 'color magenta'
226 |   pd XT;YThat
227 |   <<plotf>>
228 |   pd 'show'
229 | )
230 | 
231 | @
232 | \subsection{Generalization to a function space}
233 | Given a basis for a function space, we can try to express [[f]] as a combination
234 | of basis functions.
235 | \[
236 | f(\vec{x}) = a_1 f_1(\vec{x}) + a_2 f_2(\vec{x}) + \dots + a_n f_n(\vec{x})
237 | \]
238 | 
239 | Given a dataset of $n$ pairs $\biggl(\vec{x}^{(k)},\vec{y}^{(k)}\biggr)$, the
240 | coefficients $a_i$ are found by solving a linear system.
241 | 
242 | \[
243 | \left( \begin{array}{ccccc}
244 | f_1(\vec{x}^{(1)}) & f_2(\vec{x}^{(1)}) & \dots & f_n(\vec{x}^{(1)}) \\
245 | f_1(\vec{x}^{(2)}) & f_2(\vec{x}^{(2)}) & \dots & f_n(\vec{x}^{(2)}) \\
246 | \dots & \dots & \dots & \dots \\
247 | f_1(\vec{x}^{(n)}) & f_2(\vec{x}^{(n)}) & \dots & f_n(\vec{x}^{(n)})
248 | \end{array} \right)
249 | \left( \begin{array}{c}
250 | a_1 \\ a_2 \\ \dots \\ a_{n} 
251 | \end{array} \right)
252 | =
253 | \left( \begin{array}{c}
254 | y^{(1)} \\ y^{(2)} \\ \dots \\ y^{(n)} 
255 | \end{array} \right)
256 | \]
257 | 
258 | Let us denote this linear system by $\vec{A}\vec{x}=\vec{b}$.
259 | 
260 | \subsection{Least squares}
261 | The linear system $\vec{A}\vec{x}=\vec{b}$
262 | (with $\vec{A} \in \mathbb{R}^{m \times n}$) doesn't necessarily have a solution
263 | when there are more examples than the number of basis functions (i.e. $m>n$).
264 | Thus, we want to find an approximate solution $\vec{A}\vec{x}\approx\vec{b}$
265 | that minimizes the squares of the errors: $\norm{\vec{A}\vec{x}-\vec{b}}^2_2$.
266 | 
267 | \begin{align*}
268 |  & \norm{\vec{A}\vec{x}-\vec{b}}^2_2 \\
269 | = \{ & \norm{\vec{x}}_2 = \sqrt{\vec{x}\cdot\vec{x}} \} \\
270 |  & \left(\vec{A}\vec{x}-\vec{b}\right) \cdot \left(\vec{A}\vec{x}-\vec{b}\right) \\
271 | = \{ & \text{euclidean scalar product} \} \\
272 |  & \left(\vec{A}\vec{x}-\vec{b}\right)^T \left(\vec{A}\vec{x}-\vec{b}\right) \\
273 | = \{ & \text{property of transposition} \} \\
274 |  & \left(\vec{x}^T\vec{A}^T - \vec{b}^T \right) \left(\vec{A}\vec{x}-\vec{b}\right) \\
275 | = \{ & \text{multiplication} \} \\
276 |  & \vec{x}^T\vec{A}^T\vec{A}\vec{x} - \vec{x}^T\vec{A}^T\vec{b} - \vec{b}^T\vec{A}\vec{x} + \vec{b}^T\vec{b} \\
277 | = \{ & \text{Since each element of the sum is a scalar, } \vec{b}^T\vec{A}\vec{x} = \left(\vec{b}^T\vec{A}\vec{x}\right)^T = \vec{x}^T\vec{A}^T\vec{b} \} \\
278 |  & \vec{x}^T\vec{A}^T\vec{A}\vec{x} - 2\vec{x}^T\vec{A}^T\vec{b} + \vec{b}^T\vec{b}
279 | \end{align*}
280 | 
281 | To this quadratic expression corresponds a convex surface.
282 | Its minimum is found by setting its derivative to zero.
283 | 
284 | \begin{align*}
285 |  & \vec{0} = 2\vec{A}^T\vec{A}\vec{x} - 2\vec{A}^T\vec{b} \\
286 | =& \\
287 |  & \vec{A}^T\vec{A}\vec{x} = \vec{A}^T\vec{b}
288 | \end{align*}
289 | 
290 | Thus, when $m>n$, we solve $\vec{A}\vec{x}\approx\vec{b}$ by solving
291 | $\vec{A}^T\vec{A}\vec{x} = \vec{A}^T\vec{b}$.
292 | $\vec{A}^T\vec{A}$ is called the Gram matrix.
293 | 
294 | [[gram y]] computes the Gram matrix [[S]] for a polynomial basis of degree [[y-1]].
295 | 
296 | <<gram>>=
297 | gram=: 3 : 0
298 |   A=: X ^/ i.y
299 |   S=: (mp~ |:) A
300 | ) 
301 | 
302 | <<utils>>=
303 | mp=: +/ . * NB. matrix product
304 | 
305 | @
306 | 
307 | [[leastsq y]] solves the overdetermined linear system by computing the Gram
308 | matrix for a polynomial basis of degree [[y-1]].
309 | 
310 | <<gram>>=
311 | leastsq=: 3 : 0
312 |   gram y
313 |   c=: ((|:A) mp Y) %. S
314 |   YThat=: c&p. XT
315 |   plotpoly 0
316 | )
317 | 
318 | @
319 | \noindent\begin{minipage}{0.5\textwidth}
320 | \includegraphics[width=\linewidth]{leastsq5}
321 | \end{minipage}%
322 | \hfill%
323 | \begin{minipage}{0.4\textwidth}
324 | \begin{verbatim}
325 |    0.5 gendat 100
326 |    leastsq 5
327 | \end{verbatim}
328 | \end{minipage}
329 | 
330 | \noindent\begin{minipage}{0.5\textwidth}
331 | \includegraphics[width=\linewidth]{leastsq8}
332 | \end{minipage}%
333 | \hfill%
334 | \begin{minipage}{0.4\textwidth}
335 | \begin{verbatim}
336 |    leastsq 8
337 | \end{verbatim}
338 | \end{minipage}
339 | \vskip.5\baselineskip
340 | 
341 | \subsection{Tikhonov regularization}
342 | With less examples than the number of basis functions (i.e. $m<n$,
343 | underdetermined system), $\vec{A}\vec{x}=\vec{b}$ doesn't have a unique solution.
344 | Even with $m\geq n$, the linear system can have approximate solutions more
345 | desirable than the optimal one. In particular, this is the case when several
346 | examples are very similar. For example, the solution to\dots
347 | 
348 | \[
349 | \left( \begin{array}{cc}
350 | 1 & 1 \\
351 | 1 & 1.00001 \\
352 | \end{array} \right)
353 | \left( \begin{array}{c}
354 | x_1 \\ x_2
355 | \end{array} \right)
356 | =
357 | \left( \begin{array}{c}
358 | 1 \\ 0.99 
359 | \end{array} \right)
360 | \]
361 | 
362 | \dots is $\vec{x}^T = (1001,-1000)$. However, the approximate solution
363 | $\vec{x}^T = (0.5,0.5)$ is  more suitable. Indeed, the optimal solution is not
364 | likely to adapt well to new inputs (e.g., input $(1,2)$ would be projected onto
365 | $-999$\dots).
366 | 
367 | Thus, when several solutions are feasible, we want to favor smaller
368 | norms $\norm{x}_2$ by solving a new minimization problem:
369 | 
370 | \begin{align*}
371 | \min_{\vec{x}} \norm{\vec{A}\vec{x}-\vec{b}}^2_2 + \alpha \norm{\vec{x}}^2_2 \\
372 | with \quad 0 < \alpha < 1
373 | \end{align*}
374 | 
375 | The minimum of this expression is found by setting its derivative to zero.
376 | 
377 | \begin{align*}
378 |  & \vec{0} = 2\vec{A}^T\vec{A}\vec{x} - 2\vec{A}^T\vec{b} + 2\alpha\vec{x} \\
379 | =& \\
380 |  & \left( \vec{A}^T\vec{A} + \alpha \vec{I}_{n\times n} \right) \vec{x} = \vec{A}^T \vec{b}
381 | \end{align*}
382 | 
383 | It comes down to adding a small positive value to the diagonal of the
384 | Gram matrix. This approach has been given several names: Tikhonov regularization,
385 | ridge regression\dots
386 | 
387 | [[1E_3 ridge 5]] will solve the ridge regression for a polynomial basis of
388 | degree $5$ and a regularization coefficient equal to $10^{-3}$.
389 | 
390 | <<ridge>>=
391 | ridge=: 4 : 0
392 |   gram y
393 |   c=: ((|:A) mp Y) %.  x addDiag S
394 |   YThat=: c&p. XT
395 |   plotpoly 0
396 | )
397 | 
398 | <<utils>>=
399 | diag=: (<0 1)&|: : (([:(>:*i.)[:#])})
400 | addDiag=: ([+diag@]) diag ] NB. add x to the diagonal of y
401 | 
402 | @
403 | \noindent\begin{minipage}{0.5\textwidth}
404 | \includegraphics[width=\linewidth]{ridge4}
405 | \end{minipage}%
406 | \hfill%
407 | \begin{minipage}{0.4\textwidth}
408 | \begin{verbatim}
409 |    1E_4 ridge 4
410 | \end{verbatim}
411 | \end{minipage}
412 | \vskip.5\baselineskip
413 | 
414 | \noindent\begin{minipage}{0.5\textwidth}
415 | \includegraphics[width=\linewidth]{ridge5}
416 | \end{minipage}%
417 | \hfill%
418 | \begin{minipage}{0.4\textwidth}
419 | \begin{verbatim}
420 |    1E_4 ridge 5
421 | \end{verbatim}
422 | \end{minipage}
423 | \vskip.5\baselineskip
424 | 
425 | \noindent\begin{minipage}{0.5\textwidth}
426 | \includegraphics[width=\linewidth]{ridge8}
427 | \end{minipage}%
428 | \hfill%
429 | \begin{minipage}{0.4\textwidth}
430 | \begin{verbatim}
431 |    1E_4 ridge 8
432 | \end{verbatim}
433 | \end{minipage}
434 | \vskip.5\baselineskip
435 | 
436 | \subsection{Extreme Learning Machine}
437 | The following parameterized form of $f$ corresponds to a single hidden layer
438 | neural network.
439 | \[
440 | f(\vec{x}) = c_1 g(\vec{w_1}\cdot\vec{x}+b_1) + c_2 g(\vec{w_2}\cdot\vec{x}+b_2) 
441 | + \dots + c_M g(\vec{w_M}\cdot\vec{x}+b_M)
442 | \]
443 | $g$ is a non-linear activation function. We use the rectified linear unit (ReLU):
444 | $g(y)=max(0,y)$.
445 | 
446 | If vectors $\vec{w_1}\dots\vec{w_M}$ and scalars $b_1\dots b_M$ are initialized
447 | randomly and never modified (i.e., if they are not parameters), we can solve a
448 | linear system $\vec{H}\vec{c}=\vec{y}$ of unknwon $\vec{c}$.
449 | \[
450 | \vec{H}:
451 | \left( \begin{array}{ccc}
452 | g(\vec{w_1}\cdot\vec{x_1}+b_1) & \dots & g(\vec{w_M}\cdot\vec{x_1}+b_M) \\
453 | \dots & \dots & \dots \\
454 | g(\vec{w_1}\cdot\vec{x_N}+b_1) & \dots & g(\vec{w_M}\cdot\vec{x_N}+b_M)
455 | \end{array} \right)
456 | \]
457 | \begin{align*}
458 | \vec{c}^T &: \left(c_1 \dots c_M\right) \\
459 | \vec{y}^T &: \left(y_1 \dots y_N\right)
460 | \end{align*}
461 | 
462 | This approach is named \emph{Extreme Learning Machine}
463 | \footnote{\url{https://scholar.google.fr/scholar?q=extreme+learning+machine}}.
464 | 
465 | [[initelm 100]] initializes randomly matrix H with $100$ neurons on the hidden
466 | layer (i.e., $M=100$) and computes its Gram form [[S]].
467 | 
468 | <<elm>>=
469 | initelm=: 3 : 0
470 |   W=: _1 + 2 * ? (y,1) $ 0 NB. input weights
471 |   B=: ? y $ 0 NB. bias
472 |   H=: mkH ,. X
473 |   0 [ S=: (mp~ |:) H
474 | )
475 | mkH=: 3 : '0&>. B +"1 y mp"1/ W'
476 | 
477 | @
478 | [[elm 1E_4]] solves the extreme learning machine linear system with a Tikhonov
479 | regularization coefficient of $10^{-4}$.
480 | 
481 | <<elm>>=
482 | elm=: 3 : 0
483 |   c=: ((|:H) mp Y) %.  y addDiag S
484 |   YThat=: (mkH ,. XT) mp c
485 |   plotelm 0
486 | )
487 | 
488 | <<plotelm>>=
489 | plotelm=: 3 : 0
490 |   plotdatnoshow 0
491 |   pd 'type line'
492 |   pd 'color blue'
493 |   xs=: (] #~ minmaxX"_ sel ]) steps (<.<./X),(>.>./X),100
494 |   pd xs;(mkH ,. xs) mp c
495 |   pd 'show'
496 | )
497 | 
498 | @
499 | \noindent\begin{minipage}{0.5\textwidth}
500 | \includegraphics[width=\linewidth]{elm_1E_3}
501 | \end{minipage}%
502 | \hfill%
503 | \begin{minipage}{0.4\textwidth}
504 | \begin{verbatim}
505 |    initelm 100
506 | 0
507 |    elm 1E_3
508 | \end{verbatim}
509 | \end{minipage}
510 | \vskip.5\baselineskip
511 | 
512 | \noindent\begin{minipage}{0.5\textwidth}
513 | \includegraphics[width=\linewidth]{elmtest}
514 | \end{minipage}%
515 | \hfill%
516 | \begin{minipage}{0.4\textwidth}
517 | \begin{verbatim}
518 |    test 0
519 | \end{verbatim}
520 | \end{minipage}
521 | \vskip.5\baselineskip
522 | 
523 | <<require>>=
524 | require'trig'
525 | require'plot'
526 | require'numeric'
527 | 
528 | <<jelm.ijs>>=
529 | <<require>>
530 | <<utils>>
531 | <<dataset>>
532 | <<plotdat>>
533 | <<plotpoly>>
534 | <<polyreg>>
535 | <<gram>>
536 | <<ridge>>
537 | <<plotelm>>
538 | <<elm>>
539 | <<plottest>>
540 | <<test>>
541 | 
542 | @
543 | \end{document}
544 | 


--------------------------------------------------------------------------------
/jelm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peportier/jelm/5f0447e2a9b5abb53cf423f830f43976f18147c6/jelm.pdf


--------------------------------------------------------------------------------
/jelm.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[10pt]{article}% ===> this file was generated automatically by noweave --- better not edit it
  2 | \usepackage[a4paper, total={6in, 8in}]{geometry}
  3 | \usepackage{graphicx}
  4 | \graphicspath{ {./images/} }
  5 | \usepackage{noweb}
  6 | \usepackage{amsmath}
  7 | \usepackage{amssymb}
  8 | \usepackage{url}
  9 | \title{Extreme Learning Machine in J}
 10 | \author{Pierre-Edouard Portier}
 11 | \date{2019}
 12 | \renewcommand{\vec}[1]{\boldsymbol{#1}}
 13 | \newcommand{\norm}[1]{\left\lVert#1\right\rVert}
 14 | 
 15 | \begin{document}
 16 | \maketitle
 17 | \section{Regression}
 18 | $\vec{x^{(1)}}\dots\vec{x^{(P)}}$ are vectors of $\mathbb{R}^{n-1}$ with
 19 | associated values $y^{(1)}\dots y^{(P)}$ of $\mathbb{R}$.
 20 | We search a function $f(\vec{x}):\mathbb{R}^{n-1} \rightarrow \mathbb{R}$ to
 21 | model the observed relationship between $\vec{x}$ and $y$.
 22 | $f$ can have a fixed parameterized form. For example:
 23 | \[
 24 | f(\vec{x}) = a_0 + a_1 x_1 + a_2 x_2 + \dots + a_{n-1} x_{n-1}
 25 | \]
 26 | 
 27 | If $P=n$, parameters $a_0 \dots a_{n-1}$ are found by solving a linear system.
 28 | \[
 29 | \begin{cases}
 30 | y^{(1)} &= a_0 + a_1 x_1^{(1)} + a_2 x_2^{(1)} + \dots + a_{n-1} x_{n-1}^{(1)} \\
 31 | \dots &= \dots \\
 32 | y^{(P)} &= a_0 + a_1 x_1^{(P)} + a_2 x_2^{(P)} + \dots + a_{n-1} x_{n-1}^{(P)} \\
 33 | \end{cases}
 34 | \]
 35 | This system can be written in matrix form.
 36 | \[
 37 | \left( \begin{array}{cccc}
 38 | 1 & x^{(1)}_1 & \dots & x^{(1)}_{n-1} \\
 39 | 1 & x^{(2)}_1 & \dots & x^{(2)}_{n-1} \\
 40 | \dots & \dots & \dots & \dots \\
 41 | 1 & x^{(P)}_1 & \dots & x^{(P)}_{n-1}
 42 | \end{array} \right)
 43 | \left( \begin{array}{c}
 44 | a_0 \\ a_1 \\ \dots \\ a_{n-1} 
 45 | \end{array} \right)
 46 | =
 47 | \left( \begin{array}{c}
 48 | y^{(1)} \\ y^{(2)} \\ \dots \\ y^{(P)} 
 49 | \end{array} \right)
 50 | \]
 51 | 
 52 | Each line of the first term matrix is a vector $\vec{x^{(i)T}}$ with the
 53 | addition of a constant coordinate that accounts for parameter $a_0$.
 54 | Thus, naming this matrix $\vec{X}^T$, the linear system can also be written:
 55 | \[
 56 | \vec{X}^T \vec{a} = \vec{y}
 57 | \]
 58 | 
 59 | Consider the special case when $x$ is a number and $f$ is a polynomial of degree $n-1$:
 60 | \[
 61 | f(x) = a_0 + a_1 x + a_2 x^2 + \dots + a_{n-1}x^{n-1}
 62 | \]
 63 | 
 64 | With $P=n$ examples $\left(x^{(k)},y^{(k)}\right)$, the parameters are found by
 65 | solving the following linear system:
 66 | \begin{equation}
 67 | \left( \begin{array}{ccccc}
 68 | 1 & x^{(1)} & (x^{(1)})^2 & \dots & (x^{(1)})^{n-1} \\
 69 | 1 & x^{(2)} & (x^{(2)})^2 & \dots & (x^{(2)})^{n-1} \\
 70 | \dots & \dots & \dots & \dots \\
 71 | 1 & x^{(P)} & (x^{(P)})^2 & \dots & (x^{(P)})^{n-1}
 72 | \end{array} \right)
 73 | \left( \begin{array}{c}
 74 | a_0 \\ a_1 \\ \dots \\ a_{n-1} 
 75 | \end{array} \right)
 76 | =
 77 | \left( \begin{array}{c}
 78 | y^{(1)} \\ y^{(2)} \\ \dots \\ y^{(P)} 
 79 | \end{array} \right)
 80 | \label{eqn:vandermonde}
 81 | \end{equation}
 82 | Incidentally, the first term is called the Vandermonde Matrix.
 83 | 
 84 | \subsection{Experiment with a 1-dimensional synthetic dataset}
 85 | We define a non linear function {\Tt{}f\nwendquote} from which we generate a dataset.
 86 | 
 87 | \nwfilename{jelm.nw}\nwbegincode{1}\sublabel{NWS2xmL-1BDS9S-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-1BDS9S-1}}}\moddef{dataset~{\nwtagstyle{}\subpageref{NWS2xmL-1BDS9S-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwenddeflinemarkup
 88 | f=: 3 : '(^y) * cos 2*pi * sin pi * y'
 89 | \LA{}noise~{\nwtagstyle{}\subpageref{NWS2xmL-148nWG-1}}\RA{}
 90 | \LA{}gendat~{\nwtagstyle{}\subpageref{NWS2xmL-15el3j-1}}\RA{}
 91 | 
 92 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegindocs{2}\nwdocspar
 93 | In traditional mathematical form, this function is: 
 94 | \[f(x)=e^x \times cos\left(2\pi sin\left(\pi x\right)\right)\]
 95 | 
 96 | Function {\Tt{}noise\nwendquote} adds some random noise to the values of a vector. For
 97 | example {\Tt{}0.5\ noise\ v\nwendquote}, will add random values uniformly drawn from interval
 98 | $[-0.5,0.5]$ to the terms of vector {\Tt{}v\nwendquote}.
 99 | 
100 | \nwenddocs{}\nwbegincode{3}\sublabel{NWS2xmL-148nWG-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-148nWG-1}}}\moddef{noise~{\nwtagstyle{}\subpageref{NWS2xmL-148nWG-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-1BDS9S-1}}\nwenddeflinemarkup
101 | noise=: 4 : 'y + -&x *&(+:x) ? (#y) # 0'
102 | 
103 | \nwused{\\{NWS2xmL-1BDS9S-1}}\nwendcode{}\nwbegindocs{4}\nwdocspar
104 | {\Tt{}0.5\ gendat\ 10\nwendquote} generates from {\Tt{}f\nwendquote} a dataset {\Tt{}(X,Y)\nwendquote} of 10 points with
105 | random noise in $[-0.5,0.5]$ added to {\Tt{}Y\nwendquote}. It also stores in {\Tt{}minmaxX\nwendquote} the
106 | minimum and maximum values of {\Tt{}X\nwendquote}. It computes the pair {\Tt{}minmaxf\nwendquote},
107 | where the first term is ten percent smaller than the minimum of {\Tt{}f\nwendquote} on interval
108 | $[0,1]$, and the second term is ten percent bigger than the maximum of {\Tt{}f\nwendquote} on
109 | interval $[0,1]$. {\Tt{}minmaxf\nwendquote} is later used to crop the plots so that
110 | extreme values are not visible.
111 | 
112 | A test set {\Tt{}(XT,YT)\nwendquote} is used to assert the capacity of the model to generalize
113 | on unseen data. Its size is fixed to $10\%$ of the size of the training set.
114 | 
115 | \nwenddocs{}\nwbegincode{5}\sublabel{NWS2xmL-qrUr4-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-1}}}\moddef{utils~{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwprevnextdefs{\relax}{NWS2xmL-qrUr4-2}\nwenddeflinemarkup
116 | pushup=:   ] + 0.1 * |
117 | pushdown=: ] - 0.1 * |
118 | 
119 | \nwalsodefined{\\{NWS2xmL-qrUr4-2}\\{NWS2xmL-qrUr4-3}\\{NWS2xmL-qrUr4-4}\\{NWS2xmL-qrUr4-5}}\nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegincode{6}\sublabel{NWS2xmL-15el3j-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-15el3j-1}}}\moddef{gendat~{\nwtagstyle{}\subpageref{NWS2xmL-15el3j-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-1BDS9S-1}}\nwenddeflinemarkup
120 | gendat=: 4 : 0
121 |   X=: ? y $ 0
122 |   Y=: x noise f X
123 |   minmaxX=: (<./ , >./) X
124 |   minmaxf=: (([: pushdown <./) , ([: pushup >./)) f steps 0 1 100
125 |   XT=: ? (>. 0.1 * y) $ 0
126 |   YT=: f XT
127 |   0
128 | )
129 | 
130 | \nwused{\\{NWS2xmL-1BDS9S-1}}\nwendcode{}\nwbegindocs{7}\nwdocspar
131 | {\Tt{}plotdat\ 0\nwendquote} plots the dataset.
132 | 
133 | \nwenddocs{}\nwbegincode{8}\sublabel{NWS2xmL-MlnJE-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-MlnJE-1}}}\moddef{plotdat~{\nwtagstyle{}\subpageref{NWS2xmL-MlnJE-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwenddeflinemarkup
134 | plotdatnoshow=: 3 : 0
135 |   \LA{}initplot~{\nwtagstyle{}\subpageref{NWS2xmL-1JSNQW-1}}\RA{}
136 |   pd X;Y
137 |   \LA{}plotf~{\nwtagstyle{}\subpageref{NWS2xmL-4gTINf-1}}\RA{}
138 | )
139 | plotdat=: 3 : 0
140 |   plotdatnoshow 0
141 |   pd 'show'
142 | )
143 | 
144 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegincode{9}\sublabel{NWS2xmL-1JSNQW-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-1JSNQW-1}}}\moddef{initplot~{\nwtagstyle{}\subpageref{NWS2xmL-1JSNQW-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-MlnJE-1}\\{NWS2xmL-D6YgQ-1}}\nwenddeflinemarkup
145 | pd 'reset'
146 | pd 'color green'
147 | pd 'type marker'
148 | pd 'markersize 1'
149 | pd 'markers circle'
150 | 
151 | \nwused{\\{NWS2xmL-MlnJE-1}\\{NWS2xmL-D6YgQ-1}}\nwendcode{}\nwbegincode{10}\sublabel{NWS2xmL-4gTINf-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-4gTINf-1}}}\moddef{plotf~{\nwtagstyle{}\subpageref{NWS2xmL-4gTINf-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-MlnJE-1}\\{NWS2xmL-D6YgQ-1}}\nwenddeflinemarkup
152 | pd 'color red'
153 | pd 'type line'
154 | pd 'pensize 1'
155 | pd (;f) steps 0 1 100
156 | 
157 | \nwused{\\{NWS2xmL-MlnJE-1}\\{NWS2xmL-D6YgQ-1}}\nwendcode{}\nwbegindocs{11}\nwdocspar
158 | \noindent\begin{minipage}{0.5\textwidth}
159 | \includegraphics[width=\linewidth]{plotdat}
160 | \end{minipage}%
161 | \hfill%
162 | \begin{minipage}{0.4\textwidth}
163 | \begin{verbatim}
164 |    0.5 gendat 10
165 |    plotdat 0
166 | \end{verbatim}
167 | \end{minipage}
168 | \vskip.5\baselineskip
169 | 
170 | {\Tt{}polyreg\ 0\nwendquote} solves the linear system \eqref{eqn:vandermonde}, stores the
171 | coefficients of the polynomial in variable~{\Tt{}c\nwendquote} and computes {\Tt{}YThat\nwendquote}, the
172 | predictions on the test dataset.
173 | 
174 | \nwenddocs{}\nwbegincode{12}\sublabel{NWS2xmL-16Ud8S-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-16Ud8S-1}}}\moddef{polyreg~{\nwtagstyle{}\subpageref{NWS2xmL-16Ud8S-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwenddeflinemarkup
175 | polyreg=: 3 : 0
176 |   c=: Y %. X ^/ i.#X
177 |   YThat=: c&p. XT
178 |   plotpoly 0
179 | )
180 | 
181 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegincode{13}\sublabel{NWS2xmL-qrUr4-2}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-2}}}\moddef{utils~{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-1}}}\plusendmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwprevnextdefs{NWS2xmL-qrUr4-1}{NWS2xmL-qrUr4-3}\nwenddeflinemarkup
182 | NB. locate the elements with values between \{.x and \{:x
183 | sel=: (] >: \{.@[) *. (] <: \{:@[)
184 | 
185 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegincode{14}\sublabel{NWS2xmL-44Bx4d-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-44Bx4d-1}}}\moddef{plotpoly~{\nwtagstyle{}\subpageref{NWS2xmL-44Bx4d-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwenddeflinemarkup
186 | plotpoly=: 3 : 0
187 |   plotdatnoshow 0
188 |   pd 'color blue'
189 |   xs=: (] #~ minmaxX"_ sel ]) /:~ X,steps 0 1 100
190 |   pval=: c&p. xs
191 |   crop=: minmaxf sel pval
192 |   pd (crop # xs);(crop # pval)
193 |   pd 'show'
194 | )
195 | 
196 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegindocs{15}\nwdocspar
197 | \noindent\begin{minipage}{0.5\textwidth}
198 | \includegraphics[width=\linewidth]{polyreg}
199 | \end{minipage}%
200 | \hfill%
201 | \begin{minipage}{0.4\textwidth}
202 | \begin{verbatim}
203 |    polyreg 0
204 | \end{verbatim}
205 | \end{minipage}
206 | \vskip.5\baselineskip
207 | 
208 | {\Tt{}test\ 0\nwendquote} returns the root mean square error (RMSE) on the test set, and a plot
209 | of the predictions.
210 | 
211 | \nwenddocs{}\nwbegincode{16}\sublabel{NWS2xmL-qrUr4-3}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-3}}}\moddef{utils~{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-1}}}\plusendmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwprevnextdefs{NWS2xmL-qrUr4-2}{NWS2xmL-qrUr4-4}\nwenddeflinemarkup
212 | mean=: +/ % #
213 | rmse=: [: %: [: mean ([: *: -)
214 | 
215 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegincode{17}\sublabel{NWS2xmL-Joiug-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-Joiug-1}}}\moddef{test~{\nwtagstyle{}\subpageref{NWS2xmL-Joiug-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwenddeflinemarkup
216 | test=: 3 : 0
217 |   plottest 0
218 |   YT rmse YThat
219 | )
220 | 
221 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegincode{18}\sublabel{NWS2xmL-D6YgQ-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-D6YgQ-1}}}\moddef{plottest~{\nwtagstyle{}\subpageref{NWS2xmL-D6YgQ-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwenddeflinemarkup
222 | plottest=: 3 : 0
223 |   \LA{}initplot~{\nwtagstyle{}\subpageref{NWS2xmL-1JSNQW-1}}\RA{}
224 |   pd XT;YT
225 |   pd 'color magenta'
226 |   pd XT;YThat
227 |   \LA{}plotf~{\nwtagstyle{}\subpageref{NWS2xmL-4gTINf-1}}\RA{}
228 |   pd 'show'
229 | )
230 | 
231 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegindocs{19}\nwdocspar
232 | \subsection{Generalization to a function space}
233 | Given a basis for a function space, we can try to express {\Tt{}f\nwendquote} as a combination
234 | of basis functions.
235 | \[
236 | f(\vec{x}) = a_1 f_1(\vec{x}) + a_2 f_2(\vec{x}) + \dots + a_n f_n(\vec{x})
237 | \]
238 | 
239 | Given a dataset of $n$ pairs $\biggl(\vec{x}^{(k)},\vec{y}^{(k)}\biggr)$, the
240 | coefficients $a_i$ are found by solving a linear system.
241 | 
242 | \[
243 | \left( \begin{array}{ccccc}
244 | f_1(\vec{x}^{(1)}) & f_2(\vec{x}^{(1)}) & \dots & f_n(\vec{x}^{(1)}) \\
245 | f_1(\vec{x}^{(2)}) & f_2(\vec{x}^{(2)}) & \dots & f_n(\vec{x}^{(2)}) \\
246 | \dots & \dots & \dots & \dots \\
247 | f_1(\vec{x}^{(n)}) & f_2(\vec{x}^{(n)}) & \dots & f_n(\vec{x}^{(n)})
248 | \end{array} \right)
249 | \left( \begin{array}{c}
250 | a_1 \\ a_2 \\ \dots \\ a_{n} 
251 | \end{array} \right)
252 | =
253 | \left( \begin{array}{c}
254 | y^{(1)} \\ y^{(2)} \\ \dots \\ y^{(n)} 
255 | \end{array} \right)
256 | \]
257 | 
258 | Let us denote this linear system by $\vec{A}\vec{x}=\vec{b}$.
259 | 
260 | \subsection{Least squares}
261 | The linear system $\vec{A}\vec{x}=\vec{b}$
262 | (with $\vec{A} \in \mathbb{R}^{m \times n}$) doesn't necessarily have a solution
263 | when there are more examples than the number of basis functions (i.e. $m>n$).
264 | Thus, we want to find an approximate solution $\vec{A}\vec{x}\approx\vec{b}$
265 | that minimizes the squares of the errors: $\norm{\vec{A}\vec{x}-\vec{b}}^2_2$.
266 | 
267 | \begin{align*}
268 |  & \norm{\vec{A}\vec{x}-\vec{b}}^2_2 \\
269 | = \{ & \norm{\vec{x}}_2 = \sqrt{\vec{x}\cdot\vec{x}} \} \\
270 |  & \left(\vec{A}\vec{x}-\vec{b}\right) \cdot \left(\vec{A}\vec{x}-\vec{b}\right) \\
271 | = \{ & \text{euclidean scalar product} \} \\
272 |  & \left(\vec{A}\vec{x}-\vec{b}\right)^T \left(\vec{A}\vec{x}-\vec{b}\right) \\
273 | = \{ & \text{property of transposition} \} \\
274 |  & \left(\vec{x}^T\vec{A}^T - \vec{b}^T \right) \left(\vec{A}\vec{x}-\vec{b}\right) \\
275 | = \{ & \text{multiplication} \} \\
276 |  & \vec{x}^T\vec{A}^T\vec{A}\vec{x} - \vec{x}^T\vec{A}^T\vec{b} - \vec{b}^T\vec{A}\vec{x} + \vec{b}^T\vec{b} \\
277 | = \{ & \text{Since each element of the sum is a scalar, } \vec{b}^T\vec{A}\vec{x} = \left(\vec{b}^T\vec{A}\vec{x}\right)^T = \vec{x}^T\vec{A}^T\vec{b} \} \\
278 |  & \vec{x}^T\vec{A}^T\vec{A}\vec{x} - 2\vec{x}^T\vec{A}^T\vec{b} + \vec{b}^T\vec{b}
279 | \end{align*}
280 | 
281 | To this quadratic expression corresponds a convex surface.
282 | Its minimum is found by setting its derivative to zero.
283 | 
284 | \begin{align*}
285 |  & \vec{0} = 2\vec{A}^T\vec{A}\vec{x} - 2\vec{A}^T\vec{b} \\
286 | =& \\
287 |  & \vec{A}^T\vec{A}\vec{x} = \vec{A}^T\vec{b}
288 | \end{align*}
289 | 
290 | Thus, when $m>n$, we solve $\vec{A}\vec{x}\approx\vec{b}$ by solving
291 | $\vec{A}^T\vec{A}\vec{x} = \vec{A}^T\vec{b}$.
292 | $\vec{A}^T\vec{A}$ is called the Gram matrix.
293 | 
294 | {\Tt{}gram\ y\nwendquote} computes the Gram matrix {\Tt{}S\nwendquote} for a polynomial basis of degree {\Tt{}y-1\nwendquote}.
295 | 
296 | \nwenddocs{}\nwbegincode{20}\sublabel{NWS2xmL-4YHogE-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-4YHogE-1}}}\moddef{gram~{\nwtagstyle{}\subpageref{NWS2xmL-4YHogE-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwprevnextdefs{\relax}{NWS2xmL-4YHogE-2}\nwenddeflinemarkup
297 | gram=: 3 : 0
298 |   A=: X ^/ i.y
299 |   S=: (mp~ |:) A
300 | ) 
301 | 
302 | \nwalsodefined{\\{NWS2xmL-4YHogE-2}}\nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegincode{21}\sublabel{NWS2xmL-qrUr4-4}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-4}}}\moddef{utils~{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-1}}}\plusendmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwprevnextdefs{NWS2xmL-qrUr4-3}{NWS2xmL-qrUr4-5}\nwenddeflinemarkup
303 | mp=: +/ . * NB. matrix product
304 | 
305 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegindocs{22}\nwdocspar
306 | 
307 | {\Tt{}leastsq\ y\nwendquote} solves the overdetermined linear system by computing the Gram
308 | matrix for a polynomial basis of degree {\Tt{}y-1\nwendquote}.
309 | 
310 | \nwenddocs{}\nwbegincode{23}\sublabel{NWS2xmL-4YHogE-2}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-4YHogE-2}}}\moddef{gram~{\nwtagstyle{}\subpageref{NWS2xmL-4YHogE-1}}}\plusendmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwprevnextdefs{NWS2xmL-4YHogE-1}{\relax}\nwenddeflinemarkup
311 | leastsq=: 3 : 0
312 |   gram y
313 |   c=: ((|:A) mp Y) %. S
314 |   YThat=: c&p. XT
315 |   plotpoly 0
316 | )
317 | 
318 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegindocs{24}\nwdocspar
319 | \noindent\begin{minipage}{0.5\textwidth}
320 | \includegraphics[width=\linewidth]{leastsq5}
321 | \end{minipage}%
322 | \hfill%
323 | \begin{minipage}{0.4\textwidth}
324 | \begin{verbatim}
325 |    0.5 gendat 100
326 |    leastsq 5
327 | \end{verbatim}
328 | \end{minipage}
329 | 
330 | \noindent\begin{minipage}{0.5\textwidth}
331 | \includegraphics[width=\linewidth]{leastsq8}
332 | \end{minipage}%
333 | \hfill%
334 | \begin{minipage}{0.4\textwidth}
335 | \begin{verbatim}
336 |    leastsq 8
337 | \end{verbatim}
338 | \end{minipage}
339 | \vskip.5\baselineskip
340 | 
341 | \subsection{Tikhonov regularization}
342 | With less examples than the number of basis functions (i.e. $m<n$,
343 | underdetermined system), $\vec{A}\vec{x}=\vec{b}$ doesn't have a unique solution.
344 | Even with $m\geq n$, the linear system can have approximate solutions more
345 | desirable than the optimal one. In particular, this is the case when several
346 | examples are very similar. For example, the solution to\dots
347 | 
348 | \[
349 | \left( \begin{array}{cc}
350 | 1 & 1 \\
351 | 1 & 1.00001 \\
352 | \end{array} \right)
353 | \left( \begin{array}{c}
354 | x_1 \\ x_2
355 | \end{array} \right)
356 | =
357 | \left( \begin{array}{c}
358 | 1 \\ 0.99 
359 | \end{array} \right)
360 | \]
361 | 
362 | \dots is $\vec{x}^T = (1001,-1000)$. However, the approximate solution
363 | $\vec{x}^T = (0.5,0.5)$ is  more suitable. Indeed, the optimal solution is not
364 | likely to adapt well to new inputs (e.g., input $(1,2)$ would be projected onto
365 | $-999$\dots).
366 | 
367 | Thus, when several solutions are feasible, we want to favor smaller
368 | norms $\norm{x}_2$ by solving a new minimization problem:
369 | 
370 | \begin{align*}
371 | \min_{\vec{x}} \norm{\vec{A}\vec{x}-\vec{b}}^2_2 + \alpha \norm{\vec{x}}^2_2 \\
372 | with \quad 0 < \alpha < 1
373 | \end{align*}
374 | 
375 | The minimum of this expression is found by setting its derivative to zero.
376 | 
377 | \begin{align*}
378 |  & \vec{0} = 2\vec{A}^T\vec{A}\vec{x} - 2\vec{A}^T\vec{b} + 2\alpha\vec{x} \\
379 | =& \\
380 |  & \left( \vec{A}^T\vec{A} + \alpha \vec{I}_{n\times n} \right) \vec{x} = \vec{A}^T \vec{b}
381 | \end{align*}
382 | 
383 | It comes down to adding a small positive value to the diagonal of the
384 | Gram matrix. This approach has been given several names: Tikhonov regularization,
385 | ridge regression\dots
386 | 
387 | {\Tt{}1E{\_}3\ ridge\ 5\nwendquote} will solve the ridge regression for a polynomial basis of
388 | degree $5$ and a regularization coefficient equal to $10^{-3}$.
389 | 
390 | \nwenddocs{}\nwbegincode{25}\sublabel{NWS2xmL-Vqgr5-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-Vqgr5-1}}}\moddef{ridge~{\nwtagstyle{}\subpageref{NWS2xmL-Vqgr5-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwenddeflinemarkup
391 | ridge=: 4 : 0
392 |   gram y
393 |   c=: ((|:A) mp Y) %.  x addDiag S
394 |   YThat=: c&p. XT
395 |   plotpoly 0
396 | )
397 | 
398 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegincode{26}\sublabel{NWS2xmL-qrUr4-5}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-5}}}\moddef{utils~{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-1}}}\plusendmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwprevnextdefs{NWS2xmL-qrUr4-4}{\relax}\nwenddeflinemarkup
399 | diag=: (<0 1)&|: : (([:(>:*i.)[:#])\})
400 | addDiag=: ([+diag@]) diag ] NB. add x to the diagonal of y
401 | 
402 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegindocs{27}\nwdocspar
403 | \noindent\begin{minipage}{0.5\textwidth}
404 | \includegraphics[width=\linewidth]{ridge4}
405 | \end{minipage}%
406 | \hfill%
407 | \begin{minipage}{0.4\textwidth}
408 | \begin{verbatim}
409 |    1E_4 ridge 4
410 | \end{verbatim}
411 | \end{minipage}
412 | \vskip.5\baselineskip
413 | 
414 | \noindent\begin{minipage}{0.5\textwidth}
415 | \includegraphics[width=\linewidth]{ridge5}
416 | \end{minipage}%
417 | \hfill%
418 | \begin{minipage}{0.4\textwidth}
419 | \begin{verbatim}
420 |    1E_4 ridge 5
421 | \end{verbatim}
422 | \end{minipage}
423 | \vskip.5\baselineskip
424 | 
425 | \noindent\begin{minipage}{0.5\textwidth}
426 | \includegraphics[width=\linewidth]{ridge8}
427 | \end{minipage}%
428 | \hfill%
429 | \begin{minipage}{0.4\textwidth}
430 | \begin{verbatim}
431 |    1E_4 ridge 8
432 | \end{verbatim}
433 | \end{minipage}
434 | \vskip.5\baselineskip
435 | 
436 | \subsection{Extreme Learning Machine}
437 | The following parameterized form of $f$ corresponds to a single hidden layer
438 | neural network.
439 | \[
440 | f(\vec{x}) = c_1 g(\vec{w_1}\cdot\vec{x}+b_1) + c_2 g(\vec{w_2}\cdot\vec{x}+b_2) 
441 | + \dots + c_M g(\vec{w_M}\cdot\vec{x}+b_M)
442 | \]
443 | $g$ is a non-linear activation function. We use the rectified linear unit (ReLU):
444 | $g(y)=max(0,y)$.
445 | 
446 | If vectors $\vec{w_1}\dots\vec{w_M}$ and scalars $b_1\dots b_M$ are initialized
447 | randomly and never modified (i.e., if they are not parameters), we can solve a
448 | linear system $\vec{H}\vec{c}=\vec{y}$ of unknwon $\vec{c}$.
449 | \[
450 | \vec{H}:
451 | \left( \begin{array}{ccc}
452 | g(\vec{w_1}\cdot\vec{x_1}+b_1) & \dots & g(\vec{w_M}\cdot\vec{x_1}+b_M) \\
453 | \dots & \dots & \dots \\
454 | g(\vec{w_1}\cdot\vec{x_N}+b_1) & \dots & g(\vec{w_M}\cdot\vec{x_N}+b_M)
455 | \end{array} \right)
456 | \]
457 | \begin{align*}
458 | \vec{c}^T &: \left(c_1 \dots c_M\right) \\
459 | \vec{y}^T &: \left(y_1 \dots y_N\right)
460 | \end{align*}
461 | 
462 | This approach is named \emph{Extreme Learning Machine}
463 | \footnote{\url{https://scholar.google.fr/scholar?q=extreme+learning+machine}}.
464 | 
465 | {\Tt{}initelm\ 100\nwendquote} initializes randomly matrix H with $100$ neurons on the hidden
466 | layer (i.e., $M=100$) and computes its Gram form {\Tt{}S\nwendquote}.
467 | 
468 | \nwenddocs{}\nwbegincode{28}\sublabel{NWS2xmL-3rGYkW-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-3rGYkW-1}}}\moddef{elm~{\nwtagstyle{}\subpageref{NWS2xmL-3rGYkW-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwprevnextdefs{\relax}{NWS2xmL-3rGYkW-2}\nwenddeflinemarkup
469 | initelm=: 3 : 0
470 |   W=: _1 + 2 * ? (y,1) $ 0 NB. input weights
471 |   B=: ? y $ 0 NB. bias
472 |   H=: mkH ,. X
473 |   0 [ S=: (mp~ |:) H
474 | )
475 | mkH=: 3 : '0&>. B +"1 y mp"1/ W'
476 | 
477 | \nwalsodefined{\\{NWS2xmL-3rGYkW-2}}\nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegindocs{29}\nwdocspar
478 | {\Tt{}elm\ 1E{\_}4\nwendquote} solves the extreme learning machine linear system with a Tikhonov
479 | regularization coefficient of $10^{-4}$.
480 | 
481 | \nwenddocs{}\nwbegincode{30}\sublabel{NWS2xmL-3rGYkW-2}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-3rGYkW-2}}}\moddef{elm~{\nwtagstyle{}\subpageref{NWS2xmL-3rGYkW-1}}}\plusendmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwprevnextdefs{NWS2xmL-3rGYkW-1}{\relax}\nwenddeflinemarkup
482 | elm=: 3 : 0
483 |   c=: ((|:H) mp Y) %.  y addDiag S
484 |   YThat=: (mkH ,. XT) mp c
485 |   plotelm 0
486 | )
487 | 
488 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegincode{31}\sublabel{NWS2xmL-4OfyJI-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-4OfyJI-1}}}\moddef{plotelm~{\nwtagstyle{}\subpageref{NWS2xmL-4OfyJI-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwenddeflinemarkup
489 | plotelm=: 3 : 0
490 |   plotdatnoshow 0
491 |   pd 'type line'
492 |   pd 'color blue'
493 |   xs=: (] #~ minmaxX"_ sel ]) steps (<.<./X),(>.>./X),100
494 |   pd xs;(mkH ,. xs) mp c
495 |   pd 'show'
496 | )
497 | 
498 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegindocs{32}\nwdocspar
499 | \noindent\begin{minipage}{0.5\textwidth}
500 | \includegraphics[width=\linewidth]{elm_1E_3}
501 | \end{minipage}%
502 | \hfill%
503 | \begin{minipage}{0.4\textwidth}
504 | \begin{verbatim}
505 |    initelm 100
506 | 0
507 |    elm 1E_3
508 | \end{verbatim}
509 | \end{minipage}
510 | \vskip.5\baselineskip
511 | 
512 | \noindent\begin{minipage}{0.5\textwidth}
513 | \includegraphics[width=\linewidth]{elmtest}
514 | \end{minipage}%
515 | \hfill%
516 | \begin{minipage}{0.4\textwidth}
517 | \begin{verbatim}
518 |    test 0
519 | \end{verbatim}
520 | \end{minipage}
521 | \vskip.5\baselineskip
522 | 
523 | \nwenddocs{}\nwbegincode{33}\sublabel{NWS2xmL-3bJuaU-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-3bJuaU-1}}}\moddef{require~{\nwtagstyle{}\subpageref{NWS2xmL-3bJuaU-1}}}\endmoddef\nwstartdeflinemarkup\nwusesondefline{\\{NWS2xmL-2IxhEH-1}}\nwenddeflinemarkup
524 | require'trig'
525 | require'plot'
526 | require'numeric'
527 | 
528 | \nwused{\\{NWS2xmL-2IxhEH-1}}\nwendcode{}\nwbegincode{34}\sublabel{NWS2xmL-2IxhEH-1}\nwmargintag{{\nwtagstyle{}\subpageref{NWS2xmL-2IxhEH-1}}}\moddef{jelm.ijs~{\nwtagstyle{}\subpageref{NWS2xmL-2IxhEH-1}}}\endmoddef\nwstartdeflinemarkup\nwenddeflinemarkup
529 | \LA{}require~{\nwtagstyle{}\subpageref{NWS2xmL-3bJuaU-1}}\RA{}
530 | \LA{}utils~{\nwtagstyle{}\subpageref{NWS2xmL-qrUr4-1}}\RA{}
531 | \LA{}dataset~{\nwtagstyle{}\subpageref{NWS2xmL-1BDS9S-1}}\RA{}
532 | \LA{}plotdat~{\nwtagstyle{}\subpageref{NWS2xmL-MlnJE-1}}\RA{}
533 | \LA{}plotpoly~{\nwtagstyle{}\subpageref{NWS2xmL-44Bx4d-1}}\RA{}
534 | \LA{}polyreg~{\nwtagstyle{}\subpageref{NWS2xmL-16Ud8S-1}}\RA{}
535 | \LA{}gram~{\nwtagstyle{}\subpageref{NWS2xmL-4YHogE-1}}\RA{}
536 | \LA{}ridge~{\nwtagstyle{}\subpageref{NWS2xmL-Vqgr5-1}}\RA{}
537 | \LA{}plotelm~{\nwtagstyle{}\subpageref{NWS2xmL-4OfyJI-1}}\RA{}
538 | \LA{}elm~{\nwtagstyle{}\subpageref{NWS2xmL-3rGYkW-1}}\RA{}
539 | \LA{}plottest~{\nwtagstyle{}\subpageref{NWS2xmL-D6YgQ-1}}\RA{}
540 | \LA{}test~{\nwtagstyle{}\subpageref{NWS2xmL-Joiug-1}}\RA{}
541 | 
542 | \nwnotused{jelm.ijs}\nwendcode{}
543 | 
544 | \nwixlogsorted{c}{{dataset}{NWS2xmL-1BDS9S-1}{\nwixd{NWS2xmL-1BDS9S-1}\nwixu{NWS2xmL-2IxhEH-1}}}%
545 | \nwixlogsorted{c}{{elm}{NWS2xmL-3rGYkW-1}{\nwixd{NWS2xmL-3rGYkW-1}\nwixd{NWS2xmL-3rGYkW-2}\nwixu{NWS2xmL-2IxhEH-1}}}%
546 | \nwixlogsorted{c}{{gendat}{NWS2xmL-15el3j-1}{\nwixu{NWS2xmL-1BDS9S-1}\nwixd{NWS2xmL-15el3j-1}}}%
547 | \nwixlogsorted{c}{{gram}{NWS2xmL-4YHogE-1}{\nwixd{NWS2xmL-4YHogE-1}\nwixd{NWS2xmL-4YHogE-2}\nwixu{NWS2xmL-2IxhEH-1}}}%
548 | \nwixlogsorted{c}{{initplot}{NWS2xmL-1JSNQW-1}{\nwixu{NWS2xmL-MlnJE-1}\nwixd{NWS2xmL-1JSNQW-1}\nwixu{NWS2xmL-D6YgQ-1}}}%
549 | \nwixlogsorted{c}{{jelm.ijs}{NWS2xmL-2IxhEH-1}{\nwixd{NWS2xmL-2IxhEH-1}}}%
550 | \nwixlogsorted{c}{{noise}{NWS2xmL-148nWG-1}{\nwixu{NWS2xmL-1BDS9S-1}\nwixd{NWS2xmL-148nWG-1}}}%
551 | \nwixlogsorted{c}{{plotdat}{NWS2xmL-MlnJE-1}{\nwixd{NWS2xmL-MlnJE-1}\nwixu{NWS2xmL-2IxhEH-1}}}%
552 | \nwixlogsorted{c}{{plotelm}{NWS2xmL-4OfyJI-1}{\nwixd{NWS2xmL-4OfyJI-1}\nwixu{NWS2xmL-2IxhEH-1}}}%
553 | \nwixlogsorted{c}{{plotf}{NWS2xmL-4gTINf-1}{\nwixu{NWS2xmL-MlnJE-1}\nwixd{NWS2xmL-4gTINf-1}\nwixu{NWS2xmL-D6YgQ-1}}}%
554 | \nwixlogsorted{c}{{plotpoly}{NWS2xmL-44Bx4d-1}{\nwixd{NWS2xmL-44Bx4d-1}\nwixu{NWS2xmL-2IxhEH-1}}}%
555 | \nwixlogsorted{c}{{plottest}{NWS2xmL-D6YgQ-1}{\nwixd{NWS2xmL-D6YgQ-1}\nwixu{NWS2xmL-2IxhEH-1}}}%
556 | \nwixlogsorted{c}{{polyreg}{NWS2xmL-16Ud8S-1}{\nwixd{NWS2xmL-16Ud8S-1}\nwixu{NWS2xmL-2IxhEH-1}}}%
557 | \nwixlogsorted{c}{{require}{NWS2xmL-3bJuaU-1}{\nwixd{NWS2xmL-3bJuaU-1}\nwixu{NWS2xmL-2IxhEH-1}}}%
558 | \nwixlogsorted{c}{{ridge}{NWS2xmL-Vqgr5-1}{\nwixd{NWS2xmL-Vqgr5-1}\nwixu{NWS2xmL-2IxhEH-1}}}%
559 | \nwixlogsorted{c}{{test}{NWS2xmL-Joiug-1}{\nwixd{NWS2xmL-Joiug-1}\nwixu{NWS2xmL-2IxhEH-1}}}%
560 | \nwixlogsorted{c}{{utils}{NWS2xmL-qrUr4-1}{\nwixd{NWS2xmL-qrUr4-1}\nwixd{NWS2xmL-qrUr4-2}\nwixd{NWS2xmL-qrUr4-3}\nwixd{NWS2xmL-qrUr4-4}\nwixd{NWS2xmL-qrUr4-5}\nwixu{NWS2xmL-2IxhEH-1}}}%
561 | \nwbegindocs{35}\nwdocspar
562 | \end{document}
563 | \nwenddocs{}
564 | 


--------------------------------------------------------------------------------