├── .gitignore
├── APPM4720_5720_Spr2019_Syllabus.pdf
├── Code
    ├── AMS_sketch.m
    ├── Hadamard_teaching_code.m
    ├── README.md
    ├── countSketch.c
    ├── countSketch_sparse.c
    ├── my_normest.m
    ├── pdist2_faster.m
    ├── power_method.py
    ├── sketch.m
    └── sketch.py
├── Demos
    ├── README.md
    ├── demo01_exactRankR.ipynb
    ├── demo01_exactRankR.m
    ├── demo01_exactRankR.py
    ├── demo02_sorts.ipynb
    ├── demo02_sorts.m
    ├── demo02_sorts.py
    ├── demo03_FrobeniusNorm.c
    ├── demo03_FrobeniusNorm.ipynb
    ├── demo04_FrobeniusNorm_sparse.ipynb
    ├── demo04_FrobeniusNorm_sparse.m
    ├── demo05_5_HadamardTransform.ipynb
    ├── demo05_FastJL_speedTest.jl
    ├── demo05_FastJL_speedTest.m
    ├── demo05_results_excludingSetup.pdf
    ├── demo05_results_withSetup.pdf
    ├── demo06_leverageScores.ipynb
    ├── demo06_leverageScores.m
    ├── demo07_rand_mat_mult.m
    ├── demo07_rand_mat_mult.py
    ├── demo07_rand_mat_mult_ortho.py
    ├── demo08_higherAccuracyRegression.ipynb
    ├── demo08_higherAccuracyRegression.m
    ├── demo09_RandomizedKaczmarz.ipynb
    ├── demo09_RandomizedKaczmarz.m
    ├── demo10_05_SLQ.ipynb
    ├── demo10_l1_regression.ipynb
    ├── demo10_l1_regression.m
    ├── demo11_JamesSteinEstimator.m
    ├── demo12_CompressedSensing.m
    ├── demo13_EDM.mlx
    ├── demo13_EDM.pdf
    ├── demo14_MonteCarlo_and_improvements.ipynb
    ├── demo14_MonteCarlo_and_improvements.m
    ├── demo15_SGD.m
    ├── demo16_LSH.m
    ├── demo17_kNN_via_LSH.m
    ├── demo18_names.m
    ├── demo19_AMS_sketch_vs_JL.m
    ├── demo20_CoreSets_for_Kmeans.m
    ├── demo21_randomizedSVDs.ipynb
    ├── vignette-rsvd.jl
    └── vignette_rsvd.m
├── Handouts
    ├── Linear_algebra_notes_matrices.pdf
    ├── README.md
    └── SamplingLecture_Sept29_2021.pdf
├── Homeworks
    ├── APPM5650Fall21_RandomizedAlgos_HW01.pdf
    ├── APPM5650Fall21_RandomizedAlgos_HW02.pdf
    ├── APPM5650Fall21_RandomizedAlgos_HW03.pdf
    ├── APPM5650Fall21_RandomizedAlgos_HW04.pdf
    ├── APPM5650Fall21_RandomizedAlgos_HW05.pdf
    ├── APPM5650Fall21_RandomizedAlgos_HW06.pdf
    ├── APPM5650Fall21_RandomizedAlgos_HW07.pdf
    ├── APPM5650Fall21_RandomizedAlgos_HW08.pdf
    ├── APPM5650Fall21_RandomizedAlgos_HW09.pdf
    ├── APPM5650Fall21_RandomizedAlgos_HW10.pdf
    ├── ProjectInformation.md
    ├── ProjectRubric.pdf
    ├── README.md
    └── custom_headers.tex
├── LICENSE
├── README.md
├── SlideshowAllPresentations_4720Spr19_Randomized.jpeg
├── SlideshowAllPresentations_4720Spr19_Randomized.pdf
├── SlideshowAllPresentations_5650_Fall21.jpg
├── SlideshowAllPresentations_5650_Fall21.pdf
└── syllabus.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Core latex/pdflatex auxiliary files:
  2 | *.aux
  3 | *.lof
  4 | *.log
  5 | *.lot
  6 | *.fls
  7 | *.out
  8 | *.toc
  9 | *.fmt
 10 | *.fot
 11 | *.cb
 12 | *.cb2
 13 | .*.lb
 14 | 
 15 | ## Intermediate documents:
 16 | *.dvi
 17 | *.xdv
 18 | *-converted-to.*
 19 | # these rules might exclude image files for figures etc.
 20 | # *.ps
 21 | # *.eps
 22 | # *.pdf
 23 | 
 24 | ## Generated if empty string is given at "Please type another file name for output:"
 25 | .pdf
 26 | 
 27 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 28 | *.bbl
 29 | *.bcf
 30 | *.blg
 31 | *-blx.aux
 32 | *-blx.bib
 33 | *.run.xml
 34 | 
 35 | ## Build tool auxiliary files:
 36 | *.fdb_latexmk
 37 | *.synctex
 38 | *.synctex(busy)
 39 | *.synctex.gz
 40 | *.synctex.gz(busy)
 41 | *.pdfsync
 42 | 
 43 | ## Auxiliary and intermediate files from other packages:
 44 | # algorithms
 45 | *.alg
 46 | *.loa
 47 | 
 48 | # achemso
 49 | acs-*.bib
 50 | 
 51 | # amsthm
 52 | *.thm
 53 | 
 54 | # beamer
 55 | *.nav
 56 | *.pre
 57 | *.snm
 58 | *.vrb
 59 | 
 60 | # changes
 61 | *.soc
 62 | 
 63 | # cprotect
 64 | *.cpt
 65 | 
 66 | # elsarticle (documentclass of Elsevier journals)
 67 | *.spl
 68 | 
 69 | # endnotes
 70 | *.ent
 71 | 
 72 | # fixme
 73 | *.lox
 74 | 
 75 | # feynmf/feynmp
 76 | *.mf
 77 | *.mp
 78 | *.t[1-9]
 79 | *.t[1-9][0-9]
 80 | *.tfm
 81 | 
 82 | #(r)(e)ledmac/(r)(e)ledpar
 83 | *.end
 84 | *.?end
 85 | *.[1-9]
 86 | *.[1-9][0-9]
 87 | *.[1-9][0-9][0-9]
 88 | *.[1-9]R
 89 | *.[1-9][0-9]R
 90 | *.[1-9][0-9][0-9]R
 91 | *.eledsec[1-9]
 92 | *.eledsec[1-9]R
 93 | *.eledsec[1-9][0-9]
 94 | *.eledsec[1-9][0-9]R
 95 | *.eledsec[1-9][0-9][0-9]
 96 | *.eledsec[1-9][0-9][0-9]R
 97 | 
 98 | # glossaries
 99 | *.acn
100 | *.acr
101 | *.glg
102 | *.glo
103 | *.gls
104 | *.glsdefs
105 | 
106 | # gnuplottex
107 | *-gnuplottex-*
108 | 
109 | # gregoriotex
110 | *.gaux
111 | *.gtex
112 | 
113 | # htlatex
114 | *.4ct
115 | *.4tc
116 | *.idv
117 | *.lg
118 | *.trc
119 | *.xref
120 | 
121 | # hyperref
122 | *.brf
123 | 
124 | # knitr
125 | *-concordance.tex
126 | # TODO Comment the next line if you want to keep your tikz graphics files
127 | *.tikz
128 | *-tikzDictionary
129 | 
130 | # listings
131 | *.lol
132 | 
133 | # makeidx
134 | *.idx
135 | *.ilg
136 | *.ind
137 | *.ist
138 | 
139 | # minitoc
140 | *.maf
141 | *.mlf
142 | *.mlt
143 | *.mtc[0-9]*
144 | *.slf[0-9]*
145 | *.slt[0-9]*
146 | *.stc[0-9]*
147 | 
148 | # minted
149 | _minted*
150 | *.pyg
151 | 
152 | # morewrites
153 | *.mw
154 | 
155 | # nomencl
156 | *.nlg
157 | *.nlo
158 | *.nls
159 | 
160 | # pax
161 | *.pax
162 | 
163 | # pdfpcnotes
164 | *.pdfpc
165 | 
166 | # sagetex
167 | *.sagetex.sage
168 | *.sagetex.py
169 | *.sagetex.scmd
170 | 
171 | # scrwfile
172 | *.wrt
173 | 
174 | # sympy
175 | *.sout
176 | *.sympy
177 | sympy-plots-for-*.tex/
178 | 
179 | # pdfcomment
180 | *.upa
181 | *.upb
182 | 
183 | # pythontex
184 | *.pytxcode
185 | pythontex-files-*/
186 | 
187 | # thmtools
188 | *.loe
189 | 
190 | # TikZ & PGF
191 | *.dpth
192 | *.md5
193 | *.auxlock
194 | 
195 | # todonotes
196 | *.tdo
197 | 
198 | # easy-todo
199 | *.lod
200 | 
201 | # xmpincl
202 | *.xmpi
203 | 
204 | # xindy
205 | *.xdy
206 | 
207 | # xypic precompiled matrices
208 | *.xyc
209 | 
210 | # endfloat
211 | *.ttt
212 | *.fff
213 | 
214 | # Latexian
215 | TSWLatexianTemp*
216 | 
217 | ## Editors:
218 | # WinEdt
219 | *.bak
220 | *.sav
221 | 
222 | # Texpad
223 | .texpadtmp
224 | 
225 | # Kile
226 | *.backup
227 | 
228 | # KBibTeX
229 | *~[0-9]*
230 | 
231 | # auto folder when using emacs and auctex
232 | ./auto/*
233 | *.el
234 | 
235 | # expex forward references with \gathertags
236 | *-tags.tex
237 | 
238 | # standalone packages
239 | *.sta
240 | 
241 | # generated if using elsarticle.cls
242 | *.spl
243 | 


--------------------------------------------------------------------------------
/APPM4720_5720_Spr2019_Syllabus.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/APPM4720_5720_Spr2019_Syllabus.pdf


--------------------------------------------------------------------------------
/Code/AMS_sketch.m:
--------------------------------------------------------------------------------
 1 | function C  = AMS_sketch( X, w, d, varargin )
 2 | % C  = AMS_sketch( X, w, d )
 3 | %   returns a linear sketch of the input matrix X, X is p x n
 4 | %       (note convention: *columns* not *rows* of X are the data pts)
 5 | %       and the output sketch C is of size w*d x n
 6 | %       where C(:,i)    = AMS_sketch( X(:,i) )
 7 | %
 8 | %   w controls number of buckets per hash (e.g., 2^8, 2^12)
 9 | %   d controls how many hashes we do (e.g., 7, or ceil(log2(1/.01)) )
10 | %
11 | % C  = AMS_sketch( X, w, d, parameters, values )
12 | %   gives more options, e.g.,
13 | %   
14 | %   'saltSeed', seed
15 | %   gives a new seed to the random number generator (default: 0)
16 | %   which controls both hashes
17 | %
18 | %   'transposedX', false
19 | %   if True, assumes X is n x p not p x n
20 | %
21 | % Stephen Becker
22 | %   This version is good for matrices that are not too sparse,
23 | %   and have more than a few columns n. If you are applying
24 | %   this to a very sparse vector, then you really should
25 | %   use hash functions if you want sub-linear time
26 | %   (this implementation calls several Count sketches, 
27 | %    which I have implemented in a way that is not sub-linear
28 | %    for a single column).
29 | 
30 | prs = inputParser;
31 | addParameter(prs,'saltSeed',.0);
32 | addParameter(prs,'transposedX',false);
33 | parse(prs,varargin{:});
34 | saltSeed        = prs.Results.saltSeed;
35 | transposedX     = prs.Results.transposedX;
36 | 
37 | 
38 | if transposedX
39 |     [n,p]   = size(X);
40 | else
41 |     [p,n]   = size(X);
42 | end
43 | % if p > intmax('uint32')
44 | %     error('Dimensions of input matrix are too large!');
45 | % end
46 | % if w > intmax('uint16')
47 | %     error('Code needs to be updated if you want w > 2^16');
48 | % end
49 | % if 2*d > 20
50 | %     error('Code needs to be updated if you want 2*d > 20');
51 | % end
52 | 
53 | rng( saltSeed );
54 | % saltPerm    = randperm( 20 ); % output of SHA has 20 int8's
55 | % 
56 | % 
57 | % 
58 | % % C   = zeros( d, w, n );
59 | % C   = zeros( d*w, n );
60 | % 
61 | % for j = 1:p 
62 | %     Engine  = java.security.MessageDigest.getInstance('SHA');
63 | %     Engine.update(typecast( uint32(j), 'uint8'));
64 | %     L       = uint16(typecast( Engine.digest, 'uint8' ));
65 | %     L       = L( saltPerm );
66 | %     binaryHashes    = sign( randn(d,1) ); % don't even need a hash "function"
67 | %     for k = 1:d
68 | %         ell     = L(2*k-1)*2^8 + L(2*k);
69 | %         ell     = mod( ell, w ) + 1;    % keep it in range, and make it 0-based
70 | %         ind     = sub2ind( [d,w], k, ell );
71 | %         C ( ind, : ) = C( ind, : ) + binaryHashes(k)*X( j, : );
72 | %         %C ( k, ell, : ) = C( k, ell, : ) + X( j, : ); % same idea, if C is
73 | %         %   a tensor
74 | %     end
75 | % end
76 | % 
77 | 
78 | % % Make it faster... by skipping the hash!
79 | % Instead, just call Count Sketch function
80 | 
81 | 
82 | m       = w;
83 | M       = p;
84 | useTranspose    = true;
85 | C       = zeros( d*w, n );
86 | for k = 1:d
87 |     D        = spdiags(sign(randn(M,1)),0,M,M); % bsxfun() is another efficient way to do this
88 |     indx_map = int64(randi(m,M,1));
89 |     if transposedX
90 |         % I want X', but it's already transposed, so don't worry!
91 |         C( (1+(k-1)*w):k*w, :) =  countSketch_BLAS(X*D,indx_map,m,useTranspose)';
92 |     else
93 |         C( (1+(k-1)*w):k*w, :) =  countSketch_BLAS(X'*D,indx_map,m,useTranspose)';
94 |     end
95 | end


--------------------------------------------------------------------------------
/Code/Hadamard_teaching_code.m:
--------------------------------------------------------------------------------
 1 | function y = Hadamard_teaching_code(x)
 2 | % y = Hadamard_teaching_code(x)
 3 | %   applies the Hadamard transform to x
 4 | %   If x has more than one column, the transform is applied
 5 | %   to each column.
 6 | % This code is not fast, but it shows you how to exploit
 7 | %   the structure of the transform.
 8 | % Note: this code does not do any sub-sampling
 9 | 
10 | [m,n]   = size(x);
11 | if 2^nextpow2(m) ~= m
12 |     error('Must have leading dimension of x be power of 2');
13 | end
14 | 
15 | y   = x;
16 | for bit = 1:log2(m)
17 |     k   = 2^bit; % e.g., 2, 4, ..., m
18 |     k2  = 2^(bit-1); % e.g., 1, 2, ..., m/2
19 |     
20 |     y   = reshape( y, k, [], n );
21 |     tmp = y(1:k2,:,:);
22 |     y(1:k2,:,:)     = y(1:k2,:,:) + y(k2+1:k,:,:);
23 |     y(k2+1:k,:,:)   = tmp         - y(k2+1:k,:,:);
24 |     y   = reshape( y, m, n);
25 | end


--------------------------------------------------------------------------------
/Code/README.md:
--------------------------------------------------------------------------------
 1 | # Code useful for the randomized algorithm class
 2 | 
 3 | ## Sketching code
 4 | - [sketch](sketch.m) is a multipurpose code that can call a variety of sketches, and give you a function handle (and a matrix, if requested), and it also has a self-test mode to check if E[S^TS] = I (where S is the sketch), which is useful for spotting mistakes in scaling. This code relies on other functions inside this directory for the actual sketching. For count sketch and hadamard transforms, make sure to compile the C code with the mex compiler, otherwise you'll have slow performance.
 5 | 
 6 | ## Misc utilities
 7 | 
 8 | - [my_normest](my_normest.m) for estimating the spectral norm in Matlab (variant of Matlab's normest that allows for function handles)
 9 | - [power_method](power_method.py) for estimating the spectral norm in Python
10 | - [pdist2_faster](pdist2_faster.m) for calculating all pairwise distances, similar to Matlabs pdist2 but faster often (this version uses simple matrix multiplies)
11 | 
12 | ## Hadamard Transform
13 | [Hadamard Transform Code](https://github.com/jeffeverett/hadamard-transform) written by Stephen Becker and Jeff Everett, much faster than Matlab's code. You can also use this [Hadamard_teaching_code.m](Hadamard_teaching_code.m) simple .m file, which is actually faster than Matlab's code sometimes, and it's much simpler, so you can get a better idea of how the fast Hadamard transform works
14 | 
15 | ## CountSketch
16 | 
17 | For a python version, see [scipy.linalg.clarkson_woodruff_transform](https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.clarkson_woodruff_transform.html#scipy.linalg.clarkson_woodruff_transform). I haven't used it myself so not sure how efficient it is.
18 | 
19 | [CountSketch.c](countSketch.c) is an implementation of CountSketch in Matlab's mex interface. You'll need to compile it. See the source code for some fancy compilation options, but a basic version is:
20 | ```
21 | >> mex countSketch.c
22 | ```
23 | and a complicated faster version is:
24 | ```
25 | >> mex countSketch.c -output countSketch_BLAS -DUSE_BLAS -lmwblas CFLAGS="\$CFLAGS -O3 -malign-double -march=native"
26 | ``` 
27 | and run it like
28 | ```
29 | >> mSmall = 10; mBig = 100; n = 7;
30 | >> A = randn(mBig,n);
31 | >> indx_map    = int64(randi(mSmall,mBig,1));
32 | >> D = spdiags( sign(randn(mBig,1)), 0, mBig, mBig );
33 | >> P = countSketch( D*A,  indx_map, mSmall, false );
34 | >> P2 = countSketch( A'*D,  indx_map, mSmall, true )'; % alternative way
35 | >> P2 = countSketch_BLAS( A'*D,  indx_map, mSmall, true )'; % if you did the fancy compile
36 | ```
37 | 
38 | [CountSketch_sparse.c](countSketch_sparse.c) is similar but works with sparse matrices. It doesn't have an option to use BLAS, so compilation is easy:
39 | ```
40 | >> mex countSketch_sparse.c
41 | ```
42 | This always assumes the transpose version. So, use it like:
43 | ```
44 | >> A = sparse(A);
45 | >> P2 = countSketch_sparse( A'*D,  indx_map, mSmall )';
46 | ```
47 | 
48 | 


--------------------------------------------------------------------------------
/Code/countSketch.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Standard Usage:
  3 |  *
  4 |  * indx_map    = int64(randi(mSmall,mBig,1));
  5 |  * D = spdiags( sign(randn(mBig,1)), 0, mBig, mBig );
  6 |  *
  7 |  * doTranspose = false;
  8 |  * P = countSketch( D*A,  indx_map, mSmall, doTranspose )
  9 |  *
 10 |  * or 
 11 |  *
 12 |  * doTranspose = true;
 13 |  * P  = countSketch( A'*D, indx_map, mSmall, doTranspose )'
 14 |  * (note the transposes; this version should be faster
 15 |  *  due to how Matlab stores matrices, and the fact that
 16 |  *  Matlab can transpose a matrix very efficiently)
 17 |  * 
 18 |  * A        is an mBig x N matrix
 19 |  * P        is an mSmall x N matrix
 20 |  * indx_map is an int64 vector of length mBig where each entry
 21 |  *      is an integer in [1,mSmall] (i.e., 1-based indexing, Matlab-style)
 22 |  *
 23 |  *
 24 |  * Note: the random sign flips are NOT done in this mex file,
 25 |  *  that is why you should multiply by the diagonal D matrix as suggested
 26 |  *  above. The code is written this way because Matlab is very efficient
 27 |  *  at the diagonal multiply, so writing that myself in C would lead to
 28 |  *  worse performance. Or maybe it would be fast when using the BLAS
 29 |  *  version but not by much, so I was too lazy to do it...
 30 |  *
 31 |  * Implements the "CountSketch"
 32 |  * as known in the data streaming literature
 33 |  * (e.g., 7] Moses Charikar, Kevin Chen, and Martin Farach-Colton. 
 34 |  * "Finding frequent items in data streams". Theor.
 35 |  *  Comput. Sci., 312(1):3–15, 2004 )
 36 |  *
 37 |  * In the compressed least squares literature, this was analyzed in
 38 |  *
 39 |  * "Low Rank Approximation and Regression in Input Sparsity Time"
 40 |  * Kenneth L. Clarkson, David P. Woodruff
 41 |  * http://arxiv.org/abs/1207.6365
 42 |  * STOC '13, co-winner of best paper award
 43 |  *
 44 |  * Computational complexity is nnz(A)
 45 |  *
 46 |  * */
 47 | 
 48 | /*  Compiling:
 49 |  *
 50 |  * Compile with just:  mex countSketch.c
 51 |  *
 52 |  * or you can try to get more performance with things like
 53 |  *
 54 |  * mex countSketch.c CFLAGS="\$CFLAGS -O3 -malign-double -march=native" 
 55 |  *
 56 |  * or 
 57 |  *
 58 |  * mex countSketch.c -output countSketch_BLAS -DUSE_BLAS -lmwblas CFLAGS="\$CFLAGS -O3 -malign-double -march=native" 
 59 |  *
 60 |  * (these are for GCC compilers, you'll need to change the flags slightly
 61 |  *  for MVCC or LLVM/Clang compilers, e.g., Clang doens't use -malign-double )
 62 |  * NOTE: pass the flag -DLONGLONG to use LONG LONG pointer to indx_map. This is 
 63 |  * necessary if LONG is of size 32 bit.
 64 |  *
 65 |  */
 66 | 
 67 | /* Notes:
 68 |  *
 69 |  * For efficiency, since Matlab uses column-major order,
 70 |  * the input should be At ( = A') and NOT A
 71 |  * Likewise, the output is Pt ( = P' = (Sketch(A))' )
 72 |  *
 73 |  * In theory, this can be applied to sparse matrices
 74 |  * It would only be efficient if they are stored in csr order
 75 |  * (Matlab uses csc), or if we have the transpose of a csc matrix
 76 |  * (i.e., do the exact same transpose trick we do for sparse
 77 |  *  matrices )
 78 |  *
 79 |  * For now, does NOT do sparse matrices
 80 |  * There is a separate code just for sparse matrices
 81 |  *
 82 |  * Warning: the code does not do error checks, so it can easily crash.
 83 |  * Make sure that the "indx_map" is of type int64
 84 |  *
 85 |  * Stephen Becker, srbecker@us.ibm.com, June 5 2014
 86 |  * The use of CountSketch was suggested by Haim Avron
 87 |  *
 88 |  * Updates by Stephen Becker, Feb 2019
 89 |  **/
 90 | #if defined(__GNUC__) && !(defined(__clang__)) 
 91 | #include <uchar.h>
 92 | #endif
 93 | #include <math.h>
 94 | #include "mex.h"
 95 | 
 96 | #ifdef USE_BLAS
 97 | #include "blas.h"
 98 | #endif
 99 | 
100 | 
101 | void mexFunction( int nlhs, mxArray *plhs[], 
102 | 		  int nrhs, const mxArray*prhs[] )
103 |      
104 | { 
105 |     double *At,*Pt; 
106 |     double *A, *P;
107 |     double alpha;
108 | #ifdef LONGLONG
109 |     long long  *indx_map;
110 | #else
111 |     long   *indx_map;
112 | #endif
113 |     mwSize mBig,mSmall,n, i,j,k;
114 |     int     DO_TRANSPOSE=1;
115 | #ifdef USE_BLAS
116 |     ptrdiff_t   size, stride, strideBig;
117 | #endif
118 |     
119 |     /* Check for proper number of arguments */
120 |     if (nrhs != 4) { 
121 | 	    mexErrMsgIdAndTxt( "MATLAB:countSketch:invalidNumInputs",
122 |                 "Four input arguments required."); 
123 |     } else if (nlhs > 1) {
124 | 	    mexErrMsgIdAndTxt( "MATLAB:countSketch:maxlhs",
125 |                 "Too many output arguments."); 
126 |     } 
127 |     if ( !(mxIsInt64(prhs[1])) )
128 |         mexErrMsgTxt("2nd input must be of type int64");
129 | #ifdef LONGLONG
130 |     indx_map      = (long long *)mxGetData( prhs[1] );
131 | #else
132 |     indx_map      = (long *)mxGetData( prhs[1] );
133 | #endif
134 |     mSmall        = mxGetScalar( prhs[2] );
135 |     DO_TRANSPOSE  = (int)mxGetScalar( prhs[3] );
136 |     
137 |     if (mxIsSparse(prhs[0]))
138 |         mexErrMsgTxt("Cannot handle sparse 'A' matrix, try countSketch_sparse.c instead");
139 |     
140 |     if ( DO_TRANSPOSE == 1 ) {
141 |         At  = mxGetPr(prhs[0] );
142 |         n   = mxGetM( prhs[0] );
143 |         mBig= mxGetN( prhs[0] );
144 |         /* Create a matrix for the return argument */
145 |         plhs[0] = mxCreateDoubleMatrix( (mwSize)n, (mwSize)mSmall, mxREAL);
146 |         Pt      = mxGetPr( plhs[0] );
147 |         P       = NULL; /* try to prevent bugs */
148 |         A       = NULL;
149 | #ifdef USE_BLAS
150 |         size    = (ptrdiff_t)n;
151 |         stride  = (ptrdiff_t)1;
152 | #endif
153 |         /* And the actual computation:
154 |          * Copy columns of At to Pt */
155 |         alpha = 1.;
156 |         for ( i=0; i < mBig ; i++ ) {
157 |             k   = indx_map[i]-1; /* 0-based */
158 |             /* copy Pt(:,k) <-- At(:,i)
159 |              * e.g. since height of Pt is N,
160 |              * P + k*n <-- At + i*n  */
161 | #ifdef USE_BLAS
162 |             daxpy(&size,&alpha,At+i*n,&stride,Pt+k*n,&stride);
163 | #else
164 |             for ( j=0; j<n; j++ )
165 |                 Pt[k*n+j] += At[i*n+j];
166 | #endif       
167 |         }
168 |         
169 |     } else if ( DO_TRANSPOSE == 0 ) {
170 |         A   = mxGetPr(prhs[0] );
171 |         n   = mxGetN( prhs[0] );
172 |         mBig= mxGetM( prhs[0] );
173 |         /* Create a matrix for the return argument */
174 |         plhs[0] = mxCreateDoubleMatrix( (mwSize)mSmall, (mwSize)n, mxREAL);
175 |         P       = mxGetPr( plhs[0] );
176 |         Pt      = NULL;
177 |         At      = NULL;
178 | #ifdef USE_BLAS
179 |         size    = (ptrdiff_t)n;
180 |         stride  = (ptrdiff_t)mSmall;
181 |         strideBig  = (ptrdiff_t)mBig;
182 | #endif
183 |         
184 |         /* And the actual computation:
185 |          * Copy rows of A to P */
186 |         alpha = 1.;
187 |         for ( i=0; i < mBig ; i++ ) {
188 |             k   = indx_map[i]-1; /* 0-based */
189 |             /* copy P(k,:) <-- A(i,:) */
190 | #ifdef USE_BLAS
191 |             daxpy(&size,&alpha,A+i,&strideBig,P+k,&stride);
192 | #else
193 |             for ( j=0; j<n; j++ )
194 |                 P[k+j*mSmall] += A[i+j*mBig];
195 | #endif
196 |             
197 |         }
198 |         
199 |     } else {
200 |         mexErrMsgTxt("4th input must 0 (no transpose) or 1 (transpose)");
201 |     }
202 | 
203 | 
204 |     
205 |     
206 |     return;
207 | }
208 | 


--------------------------------------------------------------------------------
/Code/countSketch_sparse.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *Calling sequence:
  3 |  * P = countSketch_sparse( A', indx_map, mSmall )';
  4 |  * 
  5 |  * A is a mBig x N matrix and At is its transpose
  6 |  * P is a mSmall x N matrix, and Pt is its transpose
  7 |  * indx_map     is a int64 vector of length mBig where each entry
  8 |  *      is an integer in [1,mSmall] (i.e., 1-based indexing, Matlab-style)
  9 |  *
 10 |  * Implements the "CountSketch"
 11 |  * as known in the data streaming literature
 12 |  * (e.g., 7] Moses Charikar, Kevin Chen, and Martin Farach-Colton. 
 13 |  * "Finding frequent items in data streams". Theor.
 14 |  *  Comput. Sci., 312(1):3–15, 2004 )
 15 |  *
 16 |  * In the compressed least squares literature, this was analyzed in
 17 |  *
 18 |  * "Low Rank Approximation and Regression in Input Sparsity Time"
 19 |  * Kenneth L. Clarkson, David P. Woodruff
 20 |  * http://arxiv.org/abs/1207.6365
 21 |  * STOC '13, co-winner of best paper award
 22 |  *
 23 |  * Computational complexity is nnz(A)
 24 |  *
 25 |  * */
 26 | 
 27 | /*  Implementation details
 28 |  * Compile with just:  mex -largeArrayDims -O countSketch_sparse.c
 29 |  *
 30 |  * For efficiency, since Matlab uses column-major order,
 31 |  * the input should be At ( = A') and NOT A
 32 |  * Likewise, the output is Pt ( = P' = (Sketch(A))' )
 33 |  *
 34 |  * This version of the code works with sparse or dense input matrices
 35 |  * "A". Output is always dense regardless of input sparsity.
 36 |  *
 37 |  * Warning: the code does not do error checks, so it can easily crash.
 38 |  * Make sure that the "indx_map" is of type int64
 39 |  * HIGHLY RECOMMENDED: do not call this function yourself,
 40 |  *  but only through FJLT_Count.m
 41 |  *
 42 |  * Stephen Becker, stephen.becker@colorado.edu, June 21 2015
 43 |  * The use of CountSketch was suggested by Haim Avron
 44 |  **/
 45 | #if defined(__GNUC__) && !(defined(__clang__)) 
 46 | #include <uchar.h>
 47 | #endif
 48 | #include <math.h>
 49 | #include "mex.h"
 50 | 
 51 | /* NOTE: pass the flag -DLONGLONG to use LONG LONG pointer to indx_map. This is 
 52 |  * necessary if LONG is of size 32 bit.
 53 |  */
 54 | 
 55 | void mexFunction( int nlhs, mxArray *plhs[], 
 56 | 		  int nrhs, const mxArray*prhs[] )
 57 |      
 58 | { 
 59 |     double *At,*Pt; 
 60 | #ifdef LONGLONG
 61 |     long long  *indx_map;
 62 | #else
 63 | 	long 	*indx_map;
 64 | #endif
 65 |     mwSize mBig,mSmall,n, i,j,k;
 66 |     
 67 |     mwIndex *ir, *jc;
 68 |     double *a;
 69 |     
 70 |     /* Check for proper number of arguments */
 71 |     if (nrhs != 3) { 
 72 | 	    mexErrMsgIdAndTxt( "MATLAB:countSketch_sparse:invalidNumInputs",
 73 |                 "Three input arguments required."); 
 74 |     } else if (nlhs > 1) {
 75 | 	    mexErrMsgIdAndTxt( "MATLAB:countSketch_sparse:maxlhs",
 76 |                 "Too many output arguments."); 
 77 |     } 
 78 |     if ( !(mxIsInt64(prhs[1])) )
 79 |         mexErrMsgTxt("2nd input must be of type int64");
 80 | #ifdef LONGLONG
 81 | 	indx_map      = (long long *)mxGetData( prhs[1] );
 82 | #else
 83 |     indx_map      = (long *)mxGetData( prhs[1] );
 84 | #endif
 85 |     mSmall        = mxGetScalar( prhs[2] );
 86 |     
 87 |    
 88 |     At  = mxGetPr(prhs[0] );
 89 |     n   = mxGetM( prhs[0] );
 90 |     mBig= mxGetN( prhs[0] );
 91 |     /* Create a matrix for the return argument */
 92 |     plhs[0] = mxCreateDoubleMatrix( (mwSize)n, (mwSize)mSmall, mxREAL);
 93 |     Pt      = mxGetPr( plhs[0] );
 94 |     
 95 |     if (mxIsComplex(prhs[0]))
 96 |         mexErrMsgTxt("Cannot handle complex data yet");
 97 |     
 98 |     if (mxIsSparse(prhs[0])) {
 99 |         
100 |         ir = mxGetIr(prhs[0]);      /* Row indexing      */
101 |         jc = mxGetJc(prhs[0]);      /* Column count      */
102 |         a  = mxGetPr(prhs[0]);      /* Non-zero elements */
103 |         
104 |         /* Loop through columns of At */
105 |         
106 |         for ( i=0; i < mBig; i++ ) {
107 |             k   = indx_map[i]-1; /* 0-based */
108 |             /* copy Pt(:,k) <-- At(:,i)
109 |              * e.g. since height of Pt is N,
110 |              * P + k*n <-- At + i*n  */
111 |             
112 |             for ( j=jc[i]; j<jc[i+1]; j++ )
113 |                 Pt[k*n+ ir[j] ] += a[j];
114 |         }
115 |         
116 |     } else {
117 |         
118 |         
119 |         /* And the actual computation:
120 |          * Copy columns of At to Pt */
121 |         for ( i=0; i < mBig; i++ ) {
122 |             k   = indx_map[i]-1; /* 0-based */
123 |             /* copy Pt(:,k) <-- At(:,i)
124 |              * e.g. since height of Pt is N,
125 |              * P + k*n <-- At + i*n  */
126 |             
127 |             for ( j=0; j<n; j++ )
128 |                 Pt[k*n+j] += At[i*n+j];
129 |         }
130 |         
131 |     }
132 | 
133 |     return;
134 | }
135 | 


--------------------------------------------------------------------------------
/Code/my_normest.m:
--------------------------------------------------------------------------------
  1 | function [e,cnt] = my_normest(S,St,n,tol, maxiter, nVectors)
  2 | %MY_NORMEST Estimate the matrix 2-norm. Improvement on matlab's version.
  3 | %   NORMEST(S) is an estimate of the 2-norm of the matrix S.
  4 | %   NORMEST(S,tol) uses relative error tol instead of 1.e-6.
  5 | %   [nrm,cnt] = NORMEST(..) also gives the number of iterations used.
  6 | %
  7 | %   NORMEST(S,St,n,[tol]) uses the funcions S() and St() to compute
  8 | %   the forward and transpose multiplication (where S is m x n)
  9 | %   This modification due to Stephen Becker, 10/01/09
 10 | %   If St is the empty matrix, then we assume S = S'
 11 | %   Note: this method is just the power method.
 12 | %
 13 | %   NORMEST( ..., maxiter ) uses at most maxiter iterations.
 14 | %   NORMEST( ..., maxiter, nVectors ) uses nVectors
 15 | %       and does the QR iteration
 16 | %
 17 | %   This function is intended primarily for sparse matrices,
 18 | %   although it works correctly and may be useful for large, full
 19 | %   matrices as well.  Use NORMEST when your problem is large
 20 | %   enough that NORM takes too long to compute and an approximate
 21 | %   norm is acceptable.
 22 | %
 23 | % Version modified by Stephen Becker to allow for function handles
 24 | %   Dec 2012, also suppors n=[n1,n2] for matrix-domain functions
 25 | % And modified Feb 2019 for vector-domain functions to do QR
 26 | %   iteration.
 27 | 
 28 | if nargin < 2, tol = 1.e-6; end
 29 | if nargin < 5, maxiter = 20; end
 30 | if nargin < 6, nVectors = []; end
 31 | IMPLICIT = false;
 32 | if isa(S,'function_handle')
 33 |     if isempty(St)
 34 |         St = S;  % we assume the matrix is symmetric;
 35 |     elseif ~isa(St,'function_handle')
 36 |         error('normest: must provide transpose function');
 37 |     end
 38 |     if nargin < 3
 39 |         error('normest: must provide width of matrix');
 40 |     end
 41 |     
 42 |     if nargin < 4, tol = 1.e-6; end
 43 |     IMPLICIT = true;
 44 | else
 45 |     if nargin >= 2 && isnumeric(St) && numel(St)==1, tol = St; end
 46 |     if nargin >= 3 && isnumeric(n) && numel(n)==1, maxiter = n; end
 47 |     n = size(S,2);
 48 | end
 49 | 
 50 | if isempty(nVectors)
 51 |     if numel(n) > 1
 52 |         nVectors = 1;
 53 |     else
 54 |         nVectors = 1;
 55 |     end
 56 | end
 57 | 
 58 | if ~IMPLICIT
 59 |     x = sum(abs(S),1)';
 60 |     if nVectors > 1
 61 |         x = [x, randn(n,nVectors-1)];
 62 |     end
 63 | else
 64 | %     x = ones(n,1); % can interact with some special operators
 65 |     if numel(n) == 1
 66 |         x = randn(n,nVectors);
 67 |     else
 68 |         if nVectors > 1
 69 |             error('Not compatible in this mode');
 70 |         end
 71 |         x = randn(n); % assume n is a size vector
 72 |     end
 73 | end
 74 | 
 75 | cnt = 0;
 76 | if nVectors > 1
 77 |     e = sqrt(max( sum(x.^2,1) ) );
 78 |     [x,~] = qr(x,0);
 79 | else
 80 |     e = norm(x(:));
 81 |     if e == 0, return, end
 82 |     x = x/e;
 83 | end
 84 | e0 = 0;
 85 | while abs(e-e0) > tol*e && cnt < maxiter
 86 |    e0 = e;
 87 |    if ~IMPLICIT
 88 |        Sx = S*x;
 89 |    else
 90 |        Sx = S(x);
 91 |    end
 92 |    if nnz(Sx) == 0
 93 |       Sx = rand(size(Sx));
 94 |    end
 95 |    if nVectors > 1
 96 |        e = sqrt(max( sum(Sx.^2,1) ) );
 97 | %        [Q,~] = qr(Sx,0);
 98 | %        Sx = Q;
 99 |    else
100 |        e = norm(Sx(:));
101 |    end
102 |    if ~IMPLICIT
103 |        x = S'*Sx;
104 |    else
105 |        x = St(Sx);
106 |    end
107 |    if nVectors > 1
108 |       [Q,~] = qr(x,0);
109 |       x     = Q;
110 |    else
111 |        x = x/norm(x(:));
112 |    end
113 |    cnt = cnt+1;
114 | end
115 | 


--------------------------------------------------------------------------------
/Code/pdist2_faster.m:
--------------------------------------------------------------------------------
 1 | function [D,I]   = pdist2_faster(X,Y,style,smallStr,K)
 2 | %  pdist2_faster Pairwise distance between two sets of observations.
 3 | %     D = pdist2(X,Y) returns a matrix D containing the Euclidean distances
 4 | %     between each pair of observations in the MX-by-N data matrix X and
 5 | %     MY-by-N data matrix Y. Rows of X and Y correspond to observations,
 6 | %     and columns correspond to variables. D is an MX-by-MY matrix, with the
 7 | %     (I,J) entry equal to distance between observation I in X and
 8 | %     observation J in Y.
 9 | %  
10 | %     D = pdist2(X,Y,DISTANCE) computes D using DISTANCE.  Choices are:
11 | %  
12 | %         'euclidean'        - Euclidean distance (default)
13 | %         'squaredeuclidean' - Squared Euclidean distance
14 | %
15 | % Also gives the index corresponding to the smallest entry if requested ...
16 | %   see documentation for pdist2 from the Statistics Toolbox
17 | % Code by Stephen Becker, March 2019
18 | % See also pdist2
19 | 
20 | if nargin < 2 || isempty(Y)
21 |     Y   = X;
22 | end
23 | 
24 | [Mx,N]  = size(X);
25 | [My,N]  = size(Y);
26 | 
27 | XtY     = X*Y';
28 | nrm1    = sum(X.^2,2);
29 | nrm2    = sum(Y.^2,2);
30 | 
31 | D       = nrm1*ones(1,My) + ones(Mx,1)*nrm2' - 2*XtY;
32 | 
33 | if nargin > 2 && ~isempty(style)
34 |     if isa(style,'function_handle')
35 |         % apply the function handle!
36 |         D   = style(D);
37 |     else
38 |         switch lower(style)
39 |             case 'euclidean'
40 |                 D   = sqrt(D);
41 |             case 'squaredeuclidean'
42 |         end
43 |     end
44 | else
45 |     % by default, Euclidean
46 |     D   = sqrt(D);
47 | end
48 | 
49 | % and similar to pdist2,
50 | if nargin > 3
51 |     if nargin ==5 && ~isempty(K)
52 |         if K~=1
53 |             error('K ~= 1 not supported');
54 |         end
55 |     end
56 |     if strcmpi(smallStr,'smallest')
57 |         [~,I]     = min( D, [], 1 ); % over 1st dimension, like Matlab's convention
58 |         D         = D(I);
59 |     else
60 |         fprintf('Bad string: %s\n');
61 |         error('Wrong 4th input');
62 |     end
63 | end
64 | end


--------------------------------------------------------------------------------
/Code/power_method.py:
--------------------------------------------------------------------------------
 1 | from scipy.sparse.linalg import LinearOperator
 2 | from scipy.sparse import spmatrix
 3 | import numpy as np
 4 | import logging
 5 | 
 6 | def spectral_norm(A, tol=1e-8, max_iter=1000):
 7 |     """Computes the spectral norm of a linear operator A using power iteration.
 8 | 
 9 |     Parameters
10 |     ===================
11 |     - `A` (`numpy.ndarray`, `scipy.sparse.spmatrix`, or `scipy.sparse.linalg.LinearOperator`):
12 |       the matrix for which we want to compute the spectral norm.
13 | 
14 |     Keyword parameters
15 |     ====================
16 |     - `tol` (float, default = `1e-8`): tolerance used to determine whether or not we
17 |       should stop iterating. Once the estimates for the spectral norm are within distance
18 |       `tol` of one another, we stop the power iterations and return.
19 |     - `max_iter` (int, default = `1000`): maximum number of power iterations to do. If
20 |       we reach this number of iterations then this function will return, but will display
21 |       a warning that we reached the maximum number of iterations.
22 |       - Power iteration can be extremely slow to converge, so you may need a large value
23 |         of `max_iter` in order to find the true spectral norm.
24 | 
25 |     Return
26 |     ====================
27 |     - `sp_norm` (float): the estimated spectral norm of `A`.
28 | 
29 |     Code by Will Shand at the request of Stephen Becker, March 2019
30 |     """
31 |     if not any(issubclass(type(A),T) for T in [np.ndarray, spmatrix, LinearOperator]):
32 |         raise ValueError("spectral_norm can only take arguments of type "
33 |                 "numpy.ndarray, scipy.sparse.spmatrix, or "
34 |                 "scipy.sparse.linalg.LinearOperator.")
35 | 
36 |     # Create an anonymous function matvec_op whose effect is equivalent to multiplying
37 |     # the input by A'A.
38 |     if issubclass(type(A), LinearOperator):
39 |         matvec_op = lambda x: A.adjoint().matvec(A.matvec(x))
40 |     else:
41 |         matvec_op = lambda x: A.T.dot(A.dot(x))
42 | 
43 |     sp_norm = 0.
44 |     sp_iter = np.random.normal(size = A.shape[-1])
45 |     for ii in range(max_iter):
46 |         Ax = matvec_op(sp_iter)
47 |         new_sp_norm = np.linalg.norm(sp_iter)
48 | 
49 |         # Stopping condition when eigenvalue estimates get sufficiently close
50 |         if abs(new_sp_norm - sp_norm) < tol:
51 |             break
52 |         else:
53 |             sp_norm = new_sp_norm
54 |             sp_iter = Ax / new_sp_norm
55 | 
56 |     if ii == max_iter-1:
57 |         logging.warn(" spectral_norm ran for max_iter = %d iterations "
58 |             "without converging. Returning..." % max_iter)
59 | 
60 |     return np.sqrt(sp_norm)
61 | 
62 | """
63 | TESTING
64 | """
65 | if __name__ == "__main__":
66 |     from scipy.sparse import random as sprandom
67 | 
68 |     # 1. Test on some random numpy arrays
69 |     for ii in range(50):
70 |         X = np.random.normal(size=(50,30))
71 |         assert(abs(np.linalg.norm(X,2) - spectral_norm(X, max_iter=5000, tol=1e-8)) <= 1e-7)
72 | 
73 |     # 2. Test on some LinearOperator instances
74 |     for ii in range(50):
75 |         X1 = sprandom(50,50,density=0.2)
76 |         X2 = np.random.normal(size=(10,50))
77 | 
78 |         # Dense representation of difference X1 - X2'X2
79 |         A = X1 - X2.T.dot(X2)
80 | 
81 |         # LinearOperator representation of X1 - X2'X2
82 |         mv  = lambda x: X1.dot(x) - X2.T.dot(X2.dot(x))
83 |         rmv = lambda x: X1.T.dot(x) - X2.T.dot(X2.dot(x))
84 |         L   = LinearOperator(X1.shape, matvec=mv, rmatvec=rmv)
85 | 
86 |         assert(abs(np.linalg.norm(A,2) - spectral_norm(L, max_iter=5000, tol=1e-8)) <= 1e-7)
87 | 


--------------------------------------------------------------------------------
/Code/sketch.m:
--------------------------------------------------------------------------------
  1 | function [fcn,S] = sketch( m, M, typeOfSketch, performTest, varargin )
  2 | % fcn = sketch( m, M, 'type' )
  3 | %   returns a function that implements a sketch of the requested type
  4 | %   so that Y = fcn(A) is a sketched version of A.
  5 | %   Every time you call this function, you get a new random sketch
  6 | %   Y has m rows, A has M rows. The number of columns of A is arbitrary
  7 | %   (Y will have the same number of columns)
  8 | %
  9 | % Valid types of sketches:
 10 | %   gaussian, haar, count, fjlt, hadamard, sparse, subsample
 11 | %
 12 | % [fcn,S]   = sketch( ... )
 13 | %   also returns the explicit matrix representation of the sketch
 14 | %   e.g., fcn(A) is the same as S*A.  S is of size m x M
 15 | %
 16 | % sketch( ..., performTest )
 17 | %   if performTest = true, then this checks that the sketch
 18 | %   really has the property E[ S'S ] = I_M
 19 | %   errorHistory = sketch( ..., performTest )
 20 | %       will return the full history of the errrors
 21 | %
 22 | % sketch( ..., parameterName, parameterValue, ... )
 23 | %   allows optional parameter, such as:
 24 | %       'sparsity' (for sparse sketches)
 25 | %       'weights'  (for subsample, if you want it non-uniform)
 26 | %       'nReps'    (for how many repetitions to use when testing)
 27 | %
 28 | % Stephen Becker, Feb 2019, updates Oct 2021
 29 | 
 30 | % todo: instead of countSketch_slow, make a sparse matrix,
 31 | %   as in scipy.linalg.clarkson_woodruff_transform
 32 | % todo: create adjoints, as in the python version of this code
 33 | 
 34 | S   = [];
 35 | if nargin < 4 || isempty(performTest)
 36 |     performTest = false;
 37 | end
 38 | 
 39 | 
 40 | prs = inputParser;
 41 | defaultSparsity = 0.01;
 42 | addParameter(prs,'sparsity',defaultSparsity);
 43 | addParameter(prs,'weights',[]);
 44 | addParameter(prs,'nReps',100);
 45 | parse(prs,varargin{:});
 46 | sparsity     = prs.Results.sparsity;
 47 | weights      = prs.Results.weights;
 48 | nReps        = prs.Results.nReps;
 49 | 
 50 | if performTest
 51 |     sumS    = zeros(M);
 52 |     if nargout > 0
 53 |         errHist     = zeros(nReps,1);
 54 |     end
 55 |     fprintf('\nRunning test to see of E[S''S] = I (for sketch of type %s)\n', typeOfSketch);
 56 |     warning('off','sketch:buildMatrix') 
 57 |     printEvery  = round( nReps / 10 );
 58 |     for rep = 1:nReps
 59 |         % Call this own function recursively
 60 |         [~,S]   = sketch( m, M, typeOfSketch, false, varargin{:} ); % 10/26/21 fixed bug here
 61 |         sumS    = sumS + S'*S;
 62 |         if nargout > 0
 63 |             errHist(rep) = norm( sumS/rep - eye(M), 'fro' )/M;
 64 |         end
 65 |         if ~mod(rep,printEvery)
 66 |             err = norm( sumS/rep - eye(M), 'fro' )/M;
 67 |             fprintf('%3d trials, error || sampleMean(S''S)-I ||_F is %4.1e', ...
 68 |                 rep, err );
 69 |             if rep > printEvery
 70 |                 fprintf(', %.2f change', err/errOld);
 71 |             end
 72 |             fprintf('\n');
 73 |             errOld = err;
 74 |         end
 75 |     end
 76 |     sumS    = sumS/nReps;
 77 |     fprintf('The first 5 x 5 block of sampleMean is: \n');
 78 |     disp( sumS(1:5,1:5) );
 79 |     fprintf('Average diagonal entry is %.7f, should be 1\n', mean(diag(sumS)) );
 80 |     if nargout > 0
 81 |         fcn     = errHist;
 82 |     end
 83 |     warning('on','sketch:buildMatrix') 
 84 |     return;
 85 | end
 86 | 
 87 | 
 88 | 
 89 | switch lower(typeOfSketch)
 90 |     
 91 |     case 'gaussian'
 92 |         S       = randn(m,M)/sqrt(m);
 93 |         fcn     = @(A) S*A;
 94 | 
 95 |     case 'haar'  % see http://arxiv.org/abs/math-ph/0609050  by Mezzadri
 96 |         [Q,R]   = qr( randn(M,m), 0 );
 97 |         d       = sign(diag(R));
 98 |         Q       = Q*spdiags(d,0,m,m);
 99 |         S       = sqrt(M/m)*Q';
100 |         fcn     = @(A) S*A;
101 |         
102 |     case {'count', 'countsketch'}
103 |         d       = sign(randn(M,1));
104 |         D       = spdiags(d,0,M,M); % bsxfun() is another efficient way to do this
105 |         useTranspose    = true;
106 |         indx_map        = int64(randi(m,M,1)); % don't do this in C!
107 |         if exist( 'countSketch_BLAS', 'file' ) 
108 |             fcn     = @(A) countSketch_BLAS(A'*D,indx_map,m,useTranspose)';
109 |         elseif exist( 'countSketch', 'file' ) 
110 |             fcn     = @(A) countSketch(A'*D,indx_map,m,useTranspose)';
111 |         else
112 |             msg = 'Using slow countSketch, please compile countSketch.c';
113 |             msg = [msg,'\n To turn this warning off, call warning(''off'',''sketch:slowCount'')'];
114 |             warning('sketch:slowCount',msg);
115 |             fcn     = @(A) slowCountSketch( D*A, double(indx_map), m );
116 |         end
117 | 
118 |     case 'fjlt'
119 |         d       = sign(randn(M,1));
120 |         D       = spdiags(d,0,M,M); % bsxfun() is another efficient way to do this
121 |         ind     = randperm( M, m );
122 |         subsample   = @(X) X(ind,:);
123 |         fcn     = @(A) sqrt(M/m)*subsample( dct( D*A ) ); % FIXME
124 |         
125 |     case {'fljt_hadamard','hadamard'} % Hadamard version of FJLT
126 |         M2  = 2^nextpow2(M);
127 |         if M ~= M2
128 |             % need to zero pad
129 |             upsample = @(X) [X; zeros(M2 - M, size(X,2) ) ];
130 |         else
131 |             upsample = @(X) X; % do nothing
132 |         end
133 |         
134 |         d       = sign(randn(M2,1));
135 |         D       = spdiags(d,0,M2,M2); % bsxfun() is another efficient way to do this
136 |         ind     = randperm( M2, m );
137 |         subsample   = @(X) X(ind,:);
138 |         
139 |         if exist('hadamard_pthreads','file')==3 
140 |             fcn     = @(x) 1/sqrt(m)*subsample( hadamard_pthreads( D*upsample(full(x))) );
141 |         elseif exist('hadamard','file')==3
142 |             fcn     = @(x) 1/sqrt(m)*subsample( hadamard( D*upsample(full(x))) );
143 |         elseif exist('Hadamard_teaching_code','file')==2
144 |             % It turns out our naive Matlab implementation is better than
145 |             %   the fwht function!
146 |             fcn     = @(x) 1/sqrt(m)*subsample( Hadamard_teaching_code( ...
147 |                 D*upsample(full(x)) ) );
148 |         else
149 |             % This is slow!
150 |             fcn     = @(x) (M2*sqrt(1/m))*subsample( fwht( D*upsample(full(x)), [], 'hadamard') );
151 |         end
152 |         
153 |     case 'sparse'
154 |         S   = sign(sprandn(m,M,sparsity));
155 |         nz = nnz(S);
156 |         if nz == 0
157 |             warning('skech:sparse','Sparse sketch is all zeros! Increase sparsity and/or dimensions');
158 |         end
159 |         sparsity_actual = nz/(m*M); % often slightly under sparse
160 |         S   = sqrt(1/(m*sparsity_actual))*S; % we may not have had exactly sparsity*m*n entries
161 |         fcn = @(A) S*A;
162 |         
163 |     case 'subsample'
164 |         if isempty(weights)
165 |             ind     = randperm( M, m );
166 |             subsample   = @(X) X(ind,:);
167 |             fcn     = @(A) sqrt(M/m)*subsample(A);
168 |         else
169 |             weights = weights/sum(weights); % normalize to a valid probability
170 |             ind     = randsample( M, m, true, weights );
171 |             subsample   = @(X) X(ind,:);
172 |             fcn     = @(A) spdiags(sqrt(1./(m*weights(ind))),0,m,m)*subsample(A);
173 |         end
174 |     otherwise
175 |         error('Invalid type of sketch requested');
176 | end
177 | 
178 | if isempty(S) && nargout >= 2
179 |     msg='You have requested the explicit sketch matrix which is slow!';
180 |     msg=[msg,'\n To turn this warning off, call warning(''off'',''sketch:buildMatrix'')'];
181 |     warning('sketch:buildMatrix',msg);
182 |     S   = fcn(eye(M));
183 | end
184 | 
185 | end % end of main function
186 | 
187 | function Y = slowCountSketch( DX, targetRows, m )
188 | % slow version of count sketch
189 |     Y   = zeros(m, size(DX,2) );
190 |     for j = 1:size(DX,1)
191 |         i   = targetRows(j);
192 |         Y(i,:) = Y(i,:) + DX(j,:);
193 |     end
194 | end
195 | 
196 | 


--------------------------------------------------------------------------------
/Demos/README.md:
--------------------------------------------------------------------------------
 1 | # Demos
 2 | 
 3 | Below are the demos from Spring 2019, which we will use and modify Fall 2021. Most are Matlab, some are Python or Julia.  TODO: create ipynb versions (with colab links) for the Python demos. (If a student wants to do this, please go ahead, and make a pull request)
 4 | 
 5 | The hyperlinks below are to ipynb (jupyter) notebooks using [nbviewer](https://nbviewer.jupyter.org) since github's default markdown interpreter doesn't always work (it often works if you refresh the page a few times, but not always)
 6 | 
 7 | - [Demo 1](https://nbviewer.jupyter.org/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo01_exactRankR.ipynb): a simple randomized SVD algorithm assuming exactly low-rank matrix, ([colab link for ipynb](https://colab.research.google.com/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo01_exactRankR.ipynb))
 8 | - [Demo 2](https://nbviewer.jupyter.org/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo02_sorts.ipynb): compare deterministic and randomized bubble and quicksorts, ([colab link for ipynb](https://colab.research.google.com/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo02_sorts.ipynb))
 9 | - Demo 3: compare different ways to compute the Frobenius norm of a matrix (in C)
10 | - Demo 4: same as demo 3 but for sparse matrices (in Matlab). Comparse row vs column access
11 | - Demo 5: speed/timing results for various Fast Johnson-Lindenstrauss Transforms
12 | - Demo 6: statistical leverage scores applied to least-squares regressoin
13 | - Demo 7: random matrix mulitplication via sub-sampling, following Drineas/Mahoney summer school notes
14 | - Demo 8: high accuracy l2 regression via either iterative Hessian sketch or preconditioning (BLENDENPIK/LSRN)
15 | - Demo 9: Randomized Kacmarz for solving consistent over-determined systems of equations
16 | - Demo 10: l1 regression and p-stable distributions (for p=1,2, i.e., Cauchy and Gaussian)
17 | - Demo 11: James-Stein estimator
18 | - Demo 12: Basic noiseless Compressed Sensing demo
19 | - Demo 13: Euclidean Distance Matrix (EDM) completion example, using nuclear norm minimization
20 | - Demo 14: Monte Carlo integration and improvements (quasi-Monte Carlo, control variates), and comparison with quadrature
21 | - Demo 15: Stochastic Gradient Descent (SGD) and improvements (SAGA, SVRG, Mini-batches, iterate averaging)
22 | - Demo 16: Locality Sensitive Hashing (LSH): MinHash, SimHash, Euclidean distance hash
23 | - Demo 17: LSH applied to k-Nearest-Neighbors (kNN)
24 | - Demo 18: CountMin sketch to efficiently find frequencies of data names (data from Soc Security administration)
25 | - Demo 19: AMS sketch vs Count sketch (median vs mean postprocessing)
26 | - Demo 20: Core sets for Kmeans using Kmeans++ as coarse approximation
27 | 
28 | At some point I thought I had a distinction between "vignettes" and "demos" but I think that is gone now, and I've tried to rename them all to "demo"
29 | 
30 | 
31 | ### ipynb notebooks not rendering
32 | Do you ever get the error message "Sorry, something went wrong. Reload?" when clicking on a `ipynb` file? If so, try refreshing the page a few times. If that doesn't resolve it soon (sometimes it does, sometimes it doesn't), then you can try either
33 | 1. go to <https://colab.research.google.com/> directly and "open" the file using the github interface
34 | 2. view the file by going to <https://nbviewer.jupyter.org/> and pasting in the URL
35 | 


--------------------------------------------------------------------------------
/Demos/demo01_exactRankR.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "demo01_exactRankR.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyNs9oSNhJJJsBsL89hqpJF/",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     }
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo01_exactRankR.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "markdown",
 32 |       "metadata": {
 33 |         "id": "tpCtpbyG6Od3"
 34 |       },
 35 |       "source": [
 36 |         "# Demo #1\n",
 37 |         "\n",
 38 |         "APPM 5650 Randomized Algorithms, Fall 2021\n",
 39 |         "\n",
 40 |         "Stephen Becker (original MATLAB '19, jupyter version '21) & Jake Knigge (Python '19)"
 41 |       ]
 42 |     },
 43 |     {
 44 |       "cell_type": "code",
 45 |       "metadata": {
 46 |         "id": "azcvL3hp6Ir9"
 47 |       },
 48 |       "source": [
 49 |         "import numpy as np\n",
 50 |         "from numpy.linalg import norm\n",
 51 |         "from scipy.sparse.linalg import LinearOperator, svds\n",
 52 |         "import scipy\n",
 53 |         "\n",
 54 |         "np.set_printoptions(precision = 4)      # display only four digits\n",
 55 |         "rng = np.random.default_rng(12345)\n",
 56 |         "n = np.int(4e3); m = n                  # dimension of problem\n",
 57 |         "r = np.int(100)                         # rank of matrix\n",
 58 |         "\n",
 59 |         "Left = rng.standard_normal( size=(m,r))\n",
 60 |         "Right= rng.standard_normal( size=(r,n))\n",
 61 |         "A = Left@Right\n",
 62 |         "# Another case is that we *know* A has this structure, in which case we can exploit:\n",
 63 |         "A_operator = LinearOperator( (m,n), matvec = lambda x : Left@(Right@x), \n",
 64 |         "                            rmatvec = lambda y : Right.T@(Left.T@y) )\n",
 65 |         "\n",
 66 |         "def printError(U,s,Vh):\n",
 67 |         "  S = np.reshape( s, (len(s),1) )\n",
 68 |         "  A_estimate = U@(S*Vh)\n",
 69 |         "  err = norm( A - A_estimate ) / norm( A )\n",
 70 |         "  print(f'The error ||A-A_estimate||_F/||A||_F is {err:0.2e}')\n",
 71 |         "  print(f'The largest and smallest (non-zero) singular values are {s[0]:0.4f} and {s[-1]:0.4f}')"
 72 |       ],
 73 |       "execution_count": 1,
 74 |       "outputs": []
 75 |     },
 76 |     {
 77 |       "cell_type": "markdown",
 78 |       "metadata": {
 79 |         "id": "figfEc3m6raF"
 80 |       },
 81 |       "source": [
 82 |         "## Find SVD of $A$ with conventional methods"
 83 |       ]
 84 |     },
 85 |     {
 86 |       "cell_type": "markdown",
 87 |       "metadata": {
 88 |         "id": "jDN11ngv-a3B"
 89 |       },
 90 |       "source": [
 91 |         "Dense SVD"
 92 |       ]
 93 |     },
 94 |     {
 95 |       "cell_type": "code",
 96 |       "metadata": {
 97 |         "colab": {
 98 |           "base_uri": "https://localhost:8080/"
 99 |         },
100 |         "id": "32Z8kWeA6q4M",
101 |         "outputId": "97da52da-58f6-4d6d-ff43-027acd13bec8"
102 |       },
103 |       "source": [
104 |         "%time U, S, Vh = np.linalg.svd(A, full_matrices=False)\n",
105 |         "\n",
106 |         "printError(U,S,Vh)"
107 |       ],
108 |       "execution_count": 2,
109 |       "outputs": [
110 |         {
111 |           "output_type": "stream",
112 |           "text": [
113 |             "CPU times: user 1min 31s, sys: 3.36 s, total: 1min 34s\n",
114 |             "Wall time: 48.8 s\n",
115 |             "The error ||A-A_estimate||_F/||A||_F is 2.60e-15\n",
116 |             "The largest and smallest (non-zero) singular values are 4854.7887 and 0.0000\n"
117 |           ],
118 |           "name": "stdout"
119 |         }
120 |       ]
121 |     },
122 |     {
123 |       "cell_type": "markdown",
124 |       "metadata": {
125 |         "id": "2JU63wKz-fG-"
126 |       },
127 |       "source": [
128 |         "Krylov subspace method (usually best for sparse matrices or some kind of structure)"
129 |       ]
130 |     },
131 |     {
132 |       "cell_type": "code",
133 |       "metadata": {
134 |         "colab": {
135 |           "base_uri": "https://localhost:8080/"
136 |         },
137 |         "id": "HCBTmURM_Dav",
138 |         "outputId": "57e07f79-60db-4f44-ef14-aa3a9ba0b016"
139 |       },
140 |       "source": [
141 |         "%time U, S, Vh = scipy.sparse.linalg.svds( A, k=r)\n",
142 |         "\n",
143 |         "printError(U,S,Vh)"
144 |       ],
145 |       "execution_count": 4,
146 |       "outputs": [
147 |         {
148 |           "output_type": "stream",
149 |           "text": [
150 |             "CPU times: user 7.61 s, sys: 4.85 s, total: 12.5 s\n",
151 |             "Wall time: 6.49 s\n",
152 |             "The error ||A-A_estimate||_F/||A||_F is 9.28e-16\n",
153 |             "The largest and smallest (non-zero) singular values are 3159.3836 and 4854.7887\n"
154 |           ],
155 |           "name": "stdout"
156 |         }
157 |       ]
158 |     },
159 |     {
160 |       "cell_type": "markdown",
161 |       "metadata": {
162 |         "id": "JObhNvxwAX0v"
163 |       },
164 |       "source": [
165 |         "... and **if we knew the structure of $A$** :\n",
166 |         "(careful: for `svds` the documentation says \"The order of the singular values is not guaranteed.\")"
167 |       ]
168 |     },
169 |     {
170 |       "cell_type": "code",
171 |       "metadata": {
172 |         "colab": {
173 |           "base_uri": "https://localhost:8080/"
174 |         },
175 |         "id": "XQq_XTcmASK_",
176 |         "outputId": "d658d6c6-3308-4b18-d23a-d6d1ebf75ce5"
177 |       },
178 |       "source": [
179 |         "%time U, S, Vh = scipy.sparse.linalg.svds( A_operator, k=r)\n",
180 |         "\n",
181 |         "printError(U,S,Vh)"
182 |       ],
183 |       "execution_count": 5,
184 |       "outputs": [
185 |         {
186 |           "output_type": "stream",
187 |           "text": [
188 |             "CPU times: user 879 ms, sys: 526 ms, total: 1.4 s\n",
189 |             "Wall time: 759 ms\n",
190 |             "The error ||A-A_estimate||_F/||A||_F is 1.29e-15\n",
191 |             "The largest and smallest (non-zero) singular values are 3159.3836 and 4854.7887\n"
192 |           ],
193 |           "name": "stdout"
194 |         }
195 |       ]
196 |     },
197 |     {
198 |       "cell_type": "markdown",
199 |       "metadata": {
200 |         "id": "8oQdOtlD6ysQ"
201 |       },
202 |       "source": [
203 |         "## Find SVD of $A$ with randomized method"
204 |       ]
205 |     },
206 |     {
207 |       "cell_type": "markdown",
208 |       "metadata": {
209 |         "id": "x0mHeQc0AeTm"
210 |       },
211 |       "source": [
212 |         "(no knowledge of the structure of $A$ required, other than knowing a good value for $r$)"
213 |       ]
214 |     },
215 |     {
216 |       "cell_type": "code",
217 |       "metadata": {
218 |         "colab": {
219 |           "base_uri": "https://localhost:8080/"
220 |         },
221 |         "id": "e0osJVd560y9",
222 |         "outputId": "12aa4286-8014-4ba0-e8c8-2fbb008b85f3"
223 |       },
224 |       "source": [
225 |         "%%time\n",
226 |         "Omega = rng.standard_normal(size=(n, r))\n",
227 |         "Y     = A@Omega       # matrix multiply\n",
228 |         "Q, R  = np.linalg.qr(Y, mode='reduced')\n",
229 |         "QtA   = Q.T@A\n",
230 |         "# A = Q@QtA, which is a low-rank factorization. If we also want\n",
231 |         "#   the SVD of A, then continue a little bit more:\n",
232 |         "U_temp, S, Vh = np.linalg.svd(QtA, full_matrices=False)\n",
233 |         "U     = Q@U_temp"
234 |       ],
235 |       "execution_count": 12,
236 |       "outputs": [
237 |         {
238 |           "output_type": "stream",
239 |           "text": [
240 |             "CPU times: user 707 ms, sys: 72.9 ms, total: 780 ms\n",
241 |             "Wall time: 439 ms\n"
242 |           ],
243 |           "name": "stdout"
244 |         }
245 |       ]
246 |     },
247 |     {
248 |       "cell_type": "code",
249 |       "metadata": {
250 |         "colab": {
251 |           "base_uri": "https://localhost:8080/"
252 |         },
253 |         "id": "9k6tub2965rj",
254 |         "outputId": "46d59c6b-9d19-44da-ade7-3545d7c316f9"
255 |       },
256 |       "source": [
257 |         "printError( U, S, Vh )"
258 |       ],
259 |       "execution_count": 8,
260 |       "outputs": [
261 |         {
262 |           "output_type": "stream",
263 |           "text": [
264 |             "The error ||A-A_estimate||_F/||A||_F is 8.11e-15\n",
265 |             "The largest and smallest (non-zero) singular values are 4854.7887 and 3159.3836\n"
266 |           ],
267 |           "name": "stdout"
268 |         }
269 |       ]
270 |     },
271 |     {
272 |       "cell_type": "markdown",
273 |       "metadata": {
274 |         "id": "e-cUh8Wth-fa"
275 |       },
276 |       "source": [
277 |         "By the way, if we do know the structure of $A$, we can also exploit that in the randomized method and get something a bit faster:"
278 |       ]
279 |     },
280 |     {
281 |       "cell_type": "code",
282 |       "metadata": {
283 |         "colab": {
284 |           "base_uri": "https://localhost:8080/"
285 |         },
286 |         "id": "Ac3SkJtN7OZp",
287 |         "outputId": "55b0dc9f-f728-4611-9ba5-ce4131e2ffd4"
288 |       },
289 |       "source": [
290 |         "%%time\n",
291 |         "Omega = rng.standard_normal(size=(n, r))\n",
292 |         "Y     = A_operator@Omega       # matrix multiply\n",
293 |         "Q, R  = np.linalg.qr(Y, mode='reduced')\n",
294 |         "QtA   = (A_operator.T@Q).T\n",
295 |         "# A = Q@QtA, which is a low-rank factorization. If we also want\n",
296 |         "#   the SVD of A, then continue a little bit more:\n",
297 |         "U_temp, S, Vh = np.linalg.svd(QtA, full_matrices=False)\n",
298 |         "U     = Q@U_temp"
299 |       ],
300 |       "execution_count": 11,
301 |       "outputs": [
302 |         {
303 |           "output_type": "stream",
304 |           "text": [
305 |             "CPU times: user 353 ms, sys: 81.6 ms, total: 435 ms\n",
306 |             "Wall time: 259 ms\n"
307 |           ],
308 |           "name": "stdout"
309 |         }
310 |       ]
311 |     },
312 |     {
313 |       "cell_type": "code",
314 |       "metadata": {
315 |         "colab": {
316 |           "base_uri": "https://localhost:8080/"
317 |         },
318 |         "id": "AgmSrSrfhwJO",
319 |         "outputId": "4cb9766b-e4af-4cc1-a47a-191f571f5e7a"
320 |       },
321 |       "source": [
322 |         "printError( U, S, Vh )"
323 |       ],
324 |       "execution_count": 10,
325 |       "outputs": [
326 |         {
327 |           "output_type": "stream",
328 |           "text": [
329 |             "The error ||A-A_estimate||_F/||A||_F is 1.00e-14\n",
330 |             "The largest and smallest (non-zero) singular values are 4854.7887 and 3159.3836\n"
331 |           ],
332 |           "name": "stdout"
333 |         }
334 |       ]
335 |     },
336 |     {
337 |       "cell_type": "code",
338 |       "metadata": {
339 |         "id": "PaxlQvAWh9Qv"
340 |       },
341 |       "source": [
342 |         ""
343 |       ],
344 |       "execution_count": null,
345 |       "outputs": []
346 |     }
347 |   ]
348 | }


--------------------------------------------------------------------------------
/Demos/demo01_exactRankR.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | 
 3 | Vignette #1
 4 | APPM 4720/5720 Randomized Algorithms, Spring 2019
 5 | Stephen Becker
 6 | 
 7 | This is not a practical algorithm, since it only works
 8 | if the matrix is *exactly* low rank
 9 | 
10 | %}
11 | 
12 | rng(0);  % seed the random number generator so this is reproducible
13 | 
14 | % -- Generate a low-rank matrix (m x n, with rank r )
15 | n   = 4e3;
16 | m   = n;
17 | r   = 100;  % rank
18 | 
19 | A   = randn(m,r)*randn(r,n);
20 | 
21 | %% -- Find its SVD with conventional methods
22 | tic
23 | [U,Sigma,V]     = svd(A,'econ');
24 | toc
25 | % 35 seconds
26 | % semilogy( diag(Sigma), 'o-' )
27 | % Matlab doesn't understand that the matrix is not full rank
28 | 
29 | clear U Sigma V
30 | %% -- Find its SVD with a randomized method
31 | tt  = tic;
32 | tic; Omega   = randn(n,r);   toc
33 | tic; Y       = A*Omega;      toc
34 | tic; [Q,R]   = qr(Y,0);      toc
35 | tic; QtA     = Q'*A;         toc
36 | tm  = toc(tt);
37 | 
38 | A_estimate  = Q*QtA;
39 | err         = norm( A - A_estimate, 'fro' )/norm(A,'fro');
40 | fprintf('||A-A_estimate||_F/||A||_F is %g\n', err );
41 | fprintf('Overall time: %g seconds\n', tm );


--------------------------------------------------------------------------------
/Demos/demo01_exactRankR.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------------------- #
 2 | # -------------------------------------------------------------------------------------- #
 3 | # Vignette #1
 4 | # APPM 4720/5720 Randomized Algorithms, Spring 2019
 5 | # Stephen Becker (original MATLAB) & Jake Knigge (Python)
 6 | # This is not a practical algorithm, since it only works if the matrix is *exactly* low
 7 | # rank.
 8 | # -------------------------------------------------------------------------------------- #
 9 | import numpy as np                      # import NumPy package
10 | import time as time                     # import time package
11 | # -------------------------------------------------------------------------------------- #
12 | np.set_printoptions(precision = 4)      # display only four digits
13 | np.random.seed(seed = 2)                # set seed for reproducibility
14 | n = np.int(4e3); m = n                  # dimension of problem
15 | r = np.int(100)                         # rank of matrix
16 | mu, sigma = 0, 1                        # mean and standard deviation
17 | zz = np.random.normal(mu, sigma, n*r)   # generate (normal) random numbers
18 | Z = zz.reshape(n,r)                     # reshape to matrix
19 | A = np.matmul(Z, Z.T)                   # compute outerproduct matrix
20 | # -------------------------------------------------------------------------------------- #
21 | # Find its SVD with conventional methods
22 | t = time.time()                         # time SVD calculation
23 | U, S, Vh = np.linalg.svd(A); V = Vh.T   # compute SVD of A and transpose V
24 | elapsed = time.time() - t
25 | print('The full SVD took', round(elapsed, 4), 'seconds.')
26 | # -------------------------------------------------------------------------------------- #
27 | # Find its SVD with a randomized method
28 | tt = time.time()
29 | t = time.time(); Omega = np.random.normal(mu, sigma, (n, r)); print(round(time.time() - t, 4), 'seconds')
30 | t = time.time(); Y     = np.matmul(A, Omega);                 print(round(time.time() - t, 4), 'seconds')
31 | t = time.time(); Q, R  = np.linalg.qr(Y, mode='reduced');     print(round(time.time() - t, 4), 'seconds')
32 | t = time.time(); QtA   = np.matmul(Q.T, A);                   print(round(time.time() - t, 4), 'seconds')
33 | tm  = time.time() - tt
34 | print('The approximate SVD took', round(tm, 4), 'seconds.')
35 | # -------------------------------------------------------------------------------------- #
36 | A_estimate = np.matmul(Q, QtA)
37 | err = np.linalg.norm( np.ravel(A) - np.ravel(A_estimate) ) / np.linalg.norm( np.ravel(A) )
38 | print('The error ||A-A_estimate||_F/||A||_F is ', '{:0.4e}'.format(err), '.', sep = '')
39 | # -------------------------------------------------------------------------------------- #
40 | # -------------------------------------------------------------------------------------- #
41 | 


--------------------------------------------------------------------------------
/Demos/demo02_sorts.m:
--------------------------------------------------------------------------------
  1 | function vignette02
  2 | 
  3 | %{
  4 | Demo to show the effect of randomized perturbations on the speed
  5 | of sorting algorithms
  6 | 
  7 | Stephen Becker
  8 | %}
  9 | 
 10 | 
 11 | rng(0); % make it reproducible
 12 | 
 13 | n   = 100;
 14 | % x   = randn(n,1);
 15 | % x   = (1:n)'; % good for bubble_sort, bad for quick_sort
 16 | x   = (n:-1:1)'; % bad for bubble_sort, bad for quick_sort
 17 | 
 18 | 
 19 | % == Gather systematic data ==
 20 | 
 21 | nList   = round( logspace( 1, 3, 10) );
 22 | N       = length( nList );
 23 | nReps   = 10;
 24 | 
 25 | [bubble, quick]                 = deal( zeros(N,1) );
 26 | [bubbleRandom, quickRandom]     = deal( zeros(N,nReps) );
 27 | for ni  = 1:N
 28 |     n   = nList(ni);
 29 |     fprintf('%d of %d trials\n', ni, N );
 30 |     
 31 |     x   = (n:-1:1)';
 32 |     
 33 |     a_less_than_b(); % zero out the conter
 34 |     y   = bubble_sort(x);
 35 |     bubble(ni)  = a_less_than_b(); % number of comparisons
 36 |     
 37 |     % issorted( y )  % <-- if you don't trust the implementation, check it!
 38 |     
 39 |     a_less_than_b(); % zero out the conter
 40 |     y   = quick_sort(x);
 41 |     quick(ni)  = a_less_than_b(); % number of comparisons
 42 |     
 43 |     for r = 1:nReps
 44 |         x   = x(randperm(n));
 45 |         
 46 |         a_less_than_b(); % zero out the conter
 47 |         y   = bubble_sort(x);
 48 |         bubbleRandom(ni,r)  = a_less_than_b(); % number of comparisons
 49 |         
 50 |         a_less_than_b(); % zero out the conter
 51 |         y   = quick_sort(x);
 52 |         quickRandom(ni,r)  = a_less_than_b(); % number of comparisons
 53 |         
 54 |     end
 55 |     
 56 | end
 57 | % ==  Plot ==
 58 | figure(1); clf;
 59 | loglog( nList, bubble, 'o-','linewidth',2,'markersize',8);
 60 | hold all
 61 | loglog( nList, quick, 's:','linewidth',2,'markersize',8);
 62 | loglog( nList, mean(bubbleRandom,2), '*:','linewidth',2,'markersize',8);
 63 | loglog( nList, mean(quickRandom,2), 'd:','linewidth',2,'markersize',8);
 64 | loglog( nList, nList.^2/2, '--' )
 65 | loglog( nList, nList.*log2(nList), '.-' )
 66 | set(gca,'fontsize',18);
 67 | lh=legend('Bubble Sort','Quicksort','Bubble (randomized)',...
 68 |     'Quick (randomized','n^2/2','n log(n)','location','northwest');
 69 | xlabel('Length of list');
 70 | ylabel('Number of comparisons');
 71 | grid on
 72 | 
 73 | end % end of main function
 74 | 
 75 | %%%%%
 76 | % SUBROUTINES
 77 | %%%%%
 78 | 
 79 | function x = bubble_sort( x )
 80 | % x = bubble_sort(x)
 81 | %   sorts x in increasing order
 82 | 
 83 | n   = length(x);
 84 | 
 85 | for iteration = 1:n-1
 86 |     
 87 |     endIndex    = n - iteration;
 88 |     
 89 |     alreadySorted   = true;
 90 |     
 91 |     for i = 1:endIndex
 92 |         
 93 |         if a_less_than_b( x(i+1), x(i) )
 94 |             % swap them:
 95 |             tmp = x(i+1);
 96 |             x(i+1) = x(i);
 97 |             x(i)   = tmp;
 98 |             alreadySorted   = false;
 99 |         end
100 |         
101 |     end
102 |     if alreadySorted
103 |         break;  % early return
104 |     end
105 | end
106 | end
107 | 
108 | 
109 | function x = quick_sort( x )
110 | % x = quick_sort(x)
111 | %   sorts x in increasing order
112 | 
113 | n   = length(x);
114 | 
115 | % check for an early return (e.g., the base case in recursion)
116 | %   (a real implementation would have a larger base case)
117 | if n <= 1
118 |     return;
119 | end
120 | 
121 | % Pick a pivot:
122 | pivot   = x(n); % the last element
123 | % Note: quick sort is never implemented this way, since
124 | %   it uses tricks so that it doesn't have to use extra memory.
125 | smallerList     = [];
126 | largerList      = [];
127 | for i = 1:(n-1)
128 |     value   = x(i);
129 |     if a_less_than_b( value, pivot )
130 |         smallerList(end+1)   = value;
131 |     else
132 |         largerList(end+1)    = value;
133 |     end
134 | end
135 | 
136 | % Now, recurse
137 | smallerList     = quick_sort( smallerList );
138 | largerList      = quick_sort( largerList );
139 | 
140 | % and combine
141 | x   = [smallerList; pivot; largerList];
142 | end
143 | 
144 | 
145 | function y = a_less_than_b( a, b )
146 | % y = a_less_than_b( a, b )
147 | %   returns "true" if a < b
148 | %   and "false: if a >= b
149 | 
150 | persistent counter
151 | if nargin == 0
152 |     y   = counter;
153 |     counter = [];
154 |     return;
155 | end
156 | if isempty( counter ), counter = 0; end
157 | 
158 | % main code:
159 | 
160 | y   = (a < b );
161 | counter     = counter + 1;
162 | end


--------------------------------------------------------------------------------
/Demos/demo02_sorts.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------------------------------------------- #
  2 | # -------------------------------------------------------------------------------------- #
  3 | # Vignette #2
  4 | # APPM 4720/5720 Randomized Algorithms, Spring 2019
  5 | # Stephen Becker (original MATLAB) & Jake Knigge (Python)
  6 | # Demo to show the effect of randomized perturbations on the speed of sorting algorithms.
  7 | # -------------------------------------------------------------------------------------- #
  8 | import numpy as np                      # import numpy package
  9 | import time as time                     # import time package
 10 | import matplotlib.pyplot as plt         # import matplotlib package
 11 | # -------------------------------------------------------------------------------------- #
 12 | def vignette02():
 13 |     np.random.seed(seed = 2)                # set seed for reproducibility
 14 |     nList = np.round(np.logspace(1, 2.75, num = 10));
 15 |     N = len(nList);
 16 |     nReps = 5
 17 |     bubble = np.zeros((N,1)); quick = np.zeros((N,1));
 18 |     bubbleRandom = np.zeros((N,nReps)); quickRandom = np.zeros((N,nReps));
 19 |     for ni in range(N):
 20 |         n = nList[ni]
 21 |         print(ni+1, 'of', N, 'trials')
 22 |         x = np.linspace(n,1,n);
 23 |         a_less_than_b.counter = 0
 24 |         y = bubble_sort(x);
 25 |         bubble[ni] = a_less_than_b.counter;
 26 |         a_less_than_b.counter = 0
 27 |         x = np.linspace(1,n,n);
 28 |         x = np.linspace(n,1,n);
 29 |         y = quick_sort(x);
 30 |         quick[ni] = a_less_than_b.counter;
 31 |         for r in range(nReps):
 32 |             x = x[np.random.permutation(np.int(n))];
 33 |             a_less_than_b.counter = 0;
 34 |             y = bubble_sort(x);
 35 |             bubbleRandom[ni, r] = a_less_than_b.counter;
 36 |             x = x[np.random.permutation(np.int(n))];
 37 |             a_less_than_b.counter = 0;
 38 |             y = quick_sort(x);
 39 |             quickRandom[ni, r] = a_less_than_b.counter;
 40 |     fig, ax = plt.subplots()
 41 |     line1, = ax.loglog(nList, bubble, label='bubble', marker='o')
 42 |     line2, = ax.loglog(nList, quick, label='quick' , marker='s')
 43 |     line3, = ax.loglog(nList, np.mean(quickRandom,1), label='quick random', marker=".")
 44 |     line4, = ax.loglog(nList, np.mean(bubbleRandom,1), label='bubble random', marker="*")
 45 |     line5, = ax.loglog(nList, np.log(nList)*nList, label='n log(n)', marker="x")
 46 |     line6, = ax.loglog(nList, nList**2 / 2, label='n^2/2', marker=',')
 47 |     ax.legend(loc='upper left')
 48 |     ax.grid(True)
 49 |     ax.set_xlabel('list length')
 50 |     ax.set_ylabel('number of comparisons')
 51 |     plt.title('comparison test: randomized vs. ordered lists')
 52 |     plt.show()
 53 | 
 54 | # -------------------------------------------------------------------------------------- #
 55 | # subroutines
 56 | # -------------------------------------------------------------------------------------- #
 57 | def bubble_sort(x):                     # sorts x in increasing order
 58 |     n = len(x)
 59 |     for iteration in range(n-1, 0, -1):
 60 |         alreadySorted = True
 61 |         for j in range(iteration):
 62 |             if a_less_than_b( x[j+1], x[j] ) == True:    # swap them
 63 |                 tmp    = x[j+1]
 64 |                 x[j+1] = x[j]
 65 |                 x[j]   = tmp
 66 |                 alreadySorted = False
 67 |         if alreadySorted == True:
 68 |             break
 69 |     return x
 70 | 
 71 | # -------------------------------------------------------------------------------------- #
 72 | def quick_sort(x):                 # sorts x in increasing order
 73 |     # quick_sort and supporting code follows example from...
 74 |     # http://interactivepython.org/runestone/static/pythonds/SortSearch/TheQuickSort.html
 75 |     quick_sort_r(x,0,len(x)-1)
 76 |     return x
 77 | 
 78 | # -------------------------------------------------------------------------------------- #
 79 | def quick_sort_r(x, first, last):   # recursive workhorse for quick_sort
 80 |     if first < last:
 81 |        split = partition(x, first, last)
 82 |        quick_sort_r(x, first, split-1)
 83 |        quick_sort_r(x, split+1, last)
 84 | 
 85 | # -------------------------------------------------------------------------------------- #
 86 | def partition(x,first,last):    # find the split point and move other items
 87 |    pivotvalue = x[first]
 88 |    left = first + 1
 89 |    right = last
 90 |    done = False
 91 |    while not done:
 92 |        while left <= right and x[left] <= pivotvalue:
 93 |            a_less_than_b.counter = a_less_than_b.counter + 1
 94 |            left = left + 1
 95 |        while x[right] >= pivotvalue and right >= left:
 96 |            a_less_than_b.counter = a_less_than_b.counter + 1
 97 |            right = right - 1
 98 |        if right < left:
 99 |            done = True
100 |        else:
101 |            temp = x[left]
102 |            x[left] = x[right]
103 |            x[right] = temp
104 |    temp = x[first]
105 |    x[first] = x[right]
106 |    x[right] = temp
107 |    return right
108 | 
109 | # -------------------------------------------------------------------------------------- #
110 | def a_less_than_b(a, b):
111 |     y = (a < b)
112 |     a_less_than_b.counter += 1          # counter property used to track function calls
113 |     return y
114 | 
115 | # -------------------------------------------------------------------------------------- #
116 | vignette02()
117 | 


--------------------------------------------------------------------------------
/Demos/demo03_FrobeniusNorm.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <stddef.h>
  4 | #include "blas.h"
  5 | 
  6 | /* Stephen Becker, Jan 15 2018
  7 |  * Make sure to allocate the array A on the heap,
  8 |  * not on the stack!
  9 |  *  e.g.,  double A[1000*1000]; is not a good idea
 10 |  *
 11 |  * Usage:
 12 |  *   gcc demo03_FrobeniusNorm.c -O3
 13 |  *   ./a.out n      # uses a n x n matrix
 14 |  *   ./a.out n 1    # if a 2nd argument is positive, use row-based version
 15 |  *   gcc demo03_FrobeniusNorm.c -O3 -I/Applications/MATLAB_R2017b.app/extern/include -lblas
 16 |  *   ./a.out n 1 1   # if a 3rd argument is positve, use BLAS for rows/columns
 17 |  *      ... if this 3rd argument is negative, use BLAS and vectorize
 18 |  *
 19 |  * Results for 10,000 x 10,000:
 20 |  * NO BLAS:
 21 |  *  2.8 s   Looping over rows, inner loop over columns
 22 |  *  .75 s   Looping over columns, inner loop over rows
 23 |  * WITH BLAS:
 24 |  *  2.5 s   Looping over rows, inner loop over columns
 25 |  *  .52     Looping over columns, inner loop over rows
 26 |  *  */
 27 | 
 28 | /* See dnrm2( ptrdiff_t *N, double *X, ptrdiff_t *INCX)  */
 29 | 
 30 | int main(int argc, char *argv[]) {
 31 | 
 32 |     ptrdiff_t m, n, length;
 33 |     ptrdiff_t INCX;
 34 |     int     i, j; /* counters */
 35 |     int     ROW_BASED, USE_BLAS, VECTORIZE = 0; /* boolean flags */
 36 |     double  *A;
 37 |     double  s=0.; /* sum */
 38 |     double  t = 0.;
 39 | 
 40 | 
 41 |     m   = 10;
 42 |     if (argc > 1)
 43 |         m = atoi( argv[1] );
 44 |     n   = m;
 45 | 
 46 | 
 47 |     /* We're storing it in column-major format */
 48 |     /* The problem is, this code can be slow itself,
 49 |      *  so for speed runs, set it all to zero and hope
 50 |      *  compiler doesn't try to be too clever */
 51 |     /*
 52 |     A   = malloc( m * n * sizeof( double ) );
 53 |     for (i=0; i<m; i++ )
 54 |         for (j=0; j<n; j++ )
 55 |             A[i + m*j] = i + m*j;
 56 |     */
 57 | 
 58 |     A   = calloc( m * n , sizeof( double ) ); 
 59 | 
 60 |     printf("A[1] is %f and A[end] is %f\n", A[1], A[ m*n - 1] );
 61 | 
 62 | 
 63 |     if ( (argc > 2 ) && (atoi(argv[2])>0) )
 64 |         ROW_BASED = 1;
 65 |     else
 66 |         ROW_BASED = 0;
 67 |     if ( (argc > 3 ) && (atoi(argv[3])>0) )
 68 |         USE_BLAS = 1;
 69 |     else if ( (argc > 3 ) && (atoi(argv[3])<0) ) {
 70 |         USE_BLAS = 1;
 71 |         VECTORIZE = 1;
 72 |     }    
 73 |     else
 74 |         USE_BLAS = 0;
 75 |     if (USE_BLAS)
 76 |         printf("Using BLAS\n");
 77 |     if (VECTORIZE){
 78 |         printf("Vectorizing (this is the *proper* way, no 'for' loops)\n");
 79 |         INCX = 1;
 80 |         length = m * n;
 81 |         s = dnrm2( &length, A, &INCX );
 82 | 
 83 |         free( A );
 84 |         printf("... sum of squared entries is %e\n", s );
 85 | 
 86 |         return 0;
 87 | 
 88 | 
 89 |     }
 90 | 
 91 |     if (ROW_BASED == 1 ){
 92 |         printf("Looping over columns, inner loop over rows\n");
 93 |         if (USE_BLAS) {
 94 |             INCX = 1;
 95 |             for (j=0; j<n; j++) {
 96 |                 t   = dnrm2( &m, A + m*j, &INCX );
 97 |                 s   += t*t;
 98 |             }
 99 |         } else {
100 |             for (j=0; j<n; j++)
101 |                 for (i=0; i<m; i++ )
102 |                     s += A[i+m*j]*A[i+m*j];
103 |         }
104 |     } else {
105 |         printf("Looping over rows, inner loop over columns\n");
106 |         if (USE_BLAS) {
107 |             INCX = m;
108 |             for (i=0; i<m; i++) {
109 |                 t   = dnrm2( &n, A+i, &INCX );
110 |                 s   += t*t;
111 |             }
112 | 
113 |         } else {
114 |             for (i=0; i<m; i++ )
115 |                 for (j=0; j<n; j++)
116 |                     s += A[i+m*j]*A[i+m*j];
117 |         }
118 |     }
119 |     free( A );
120 |     printf("... sum of squared entries is %e\n", s );
121 | 
122 |     return 0;
123 | 
124 | }
125 | 


--------------------------------------------------------------------------------
/Demos/demo03_FrobeniusNorm.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "demo03_FrobeniusNorm.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyOupL8f7HTfWFTS7ri4I8Gf",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     }
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo03_FrobeniusNorm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "markdown",
 32 |       "metadata": {
 33 |         "id": "RojslPeDFSWI"
 34 |       },
 35 |       "source": [
 36 |         "# Demo 3: calculating the Frobenius norm, looping over rows vs columns\n",
 37 |         "\n",
 38 |         "Demonstrates effect of stride length, and row- or column-based storage\n",
 39 |         "\n",
 40 |         "See also the `c` language demo\n",
 41 |         "\n",
 42 |         "Stephen Becker, Aug 2021, APPM 5650 Randomized Algorithms, University of Colorado Boulder"
 43 |       ]
 44 |     },
 45 |     {
 46 |       "cell_type": "code",
 47 |       "metadata": {
 48 |         "id": "NfqPCp2PBtKj"
 49 |       },
 50 |       "source": [
 51 |         "import numpy as np\n",
 52 |         "rng = np.random.default_rng(12345)"
 53 |       ],
 54 |       "execution_count": 1,
 55 |       "outputs": []
 56 |     },
 57 |     {
 58 |       "cell_type": "code",
 59 |       "metadata": {
 60 |         "id": "y37x6bdACOqq"
 61 |       },
 62 |       "source": [
 63 |         "def FrobeniusNormByRow(A, use_blas = True):\n",
 64 |         "  \"\"\" Outer loop over rows (inner loop over columns) \"\"\"\n",
 65 |         "  m,n = A.shape\n",
 66 |         "  nrm = 0.\n",
 67 |         "  if use_blas:\n",
 68 |         "    for row in range(m):\n",
 69 |         "      nrm += np.linalg.norm( A[row,:] )**2  # this is Euclidean norm, not Frobenius\n",
 70 |         "  else:\n",
 71 |         "    for row in range(m):\n",
 72 |         "      for col in range(n):\n",
 73 |         "        nrm += A[row,col]**2\n",
 74 |         "  return np.sqrt(nrm)\n",
 75 |         "\n",
 76 |         "def FrobeniusNormByColumn(A, use_blas = True):\n",
 77 |         "  \"\"\" Outer loop over columns (inner loop over rows) \"\"\"\n",
 78 |         "  m,n = A.shape\n",
 79 |         "  nrm = 0.\n",
 80 |         "  if use_blas:\n",
 81 |         "    for col in range(n):\n",
 82 |         "      nrm += np.linalg.norm( A[:,col] )**2  # this is Euclidean norm, not Frobenius\n",
 83 |         "  else:\n",
 84 |         "    for col in range(n):\n",
 85 |         "      for row in range(m):\n",
 86 |         "        nrm += A[row,col]**2\n",
 87 |         "  return np.sqrt(nrm)"
 88 |       ],
 89 |       "execution_count": 24,
 90 |       "outputs": []
 91 |     },
 92 |     {
 93 |       "cell_type": "markdown",
 94 |       "metadata": {
 95 |         "id": "oWBT6c9rEkGd"
 96 |       },
 97 |       "source": [
 98 |         "#### Run some experiments"
 99 |       ]
100 |     },
101 |     {
102 |       "cell_type": "code",
103 |       "metadata": {
104 |         "colab": {
105 |           "base_uri": "https://localhost:8080/"
106 |         },
107 |         "id": "lPX-0wH6CB__",
108 |         "outputId": "4cc5db7b-0d15-4c6d-96ac-a4ef8bc830d7"
109 |       },
110 |       "source": [
111 |         "n   = int(1e4)\n",
112 |         "A   = rng.standard_normal( size=(n,n) )\n",
113 |         "\n",
114 |         "%time nrm = np.linalg.norm(A)\n",
115 |         "print(f'The true norm is {nrm-1e4:.6f} + 1e4')"
116 |       ],
117 |       "execution_count": 26,
118 |       "outputs": [
119 |         {
120 |           "output_type": "stream",
121 |           "text": [
122 |             "CPU times: user 121 ms, sys: 1.02 ms, total: 122 ms\n",
123 |             "Wall time: 64.3 ms\n",
124 |             "The true norm is -1.311721 + 1e4\n"
125 |           ],
126 |           "name": "stdout"
127 |         }
128 |       ]
129 |     },
130 |     {
131 |       "cell_type": "code",
132 |       "metadata": {
133 |         "colab": {
134 |           "base_uri": "https://localhost:8080/"
135 |         },
136 |         "id": "oAfXR-rSClJ8",
137 |         "outputId": "5d0c821f-1b74-41ed-af9c-e09260517f95"
138 |       },
139 |       "source": [
140 |         "%time nrmRow = FrobeniusNormByRow(A, use_blas = True)\n",
141 |         "print(f'Looping over rows, the discrepancy in the norm is {nrmRow-nrm:.8f}')"
142 |       ],
143 |       "execution_count": 22,
144 |       "outputs": [
145 |         {
146 |           "output_type": "stream",
147 |           "text": [
148 |             "CPU times: user 153 ms, sys: 0 ns, total: 153 ms\n",
149 |             "Wall time: 154 ms\n",
150 |             "Looping over rows, the discrepancy in the norm is -0.00000000\n"
151 |           ],
152 |           "name": "stdout"
153 |         }
154 |       ]
155 |     },
156 |     {
157 |       "cell_type": "code",
158 |       "metadata": {
159 |         "colab": {
160 |           "base_uri": "https://localhost:8080/"
161 |         },
162 |         "id": "8rzsXn1FCn4L",
163 |         "outputId": "69145613-f5c9-4146-db75-c4de1aa3982d"
164 |       },
165 |       "source": [
166 |         "%time nrmRow = FrobeniusNormByColumn(A, use_blas = True)\n",
167 |         "print(f'Looping over columns, the discrepancy in the norm is {nrmRow-nrm:.8f}')"
168 |       ],
169 |       "execution_count": 25,
170 |       "outputs": [
171 |         {
172 |           "output_type": "stream",
173 |           "text": [
174 |             "CPU times: user 615 ms, sys: 2.93 ms, total: 618 ms\n",
175 |             "Wall time: 628 ms\n",
176 |             "Looping over columns, the discrepancy in the norm is -0.00000000\n"
177 |           ],
178 |           "name": "stdout"
179 |         }
180 |       ]
181 |     },
182 |     {
183 |       "cell_type": "markdown",
184 |       "metadata": {
185 |         "id": "aOCZbKZHEVkW"
186 |       },
187 |       "source": [
188 |         "### Repeat the experiment without using BLAS\n",
189 |         "Let's make the matrix smaller so we don't have to wait so long\n",
190 |         "\n",
191 |         "Here there is less difference, because there's already a lot of overhead just due to the `for` loop (since Python isn't compiled)"
192 |       ]
193 |     },
194 |     {
195 |       "cell_type": "code",
196 |       "metadata": {
197 |         "colab": {
198 |           "base_uri": "https://localhost:8080/"
199 |         },
200 |         "id": "vN2AitEPDYaT",
201 |         "outputId": "b7a5333f-194b-451f-a4c3-c0d65826f59f"
202 |       },
203 |       "source": [
204 |         "n   = int(4e3)\n",
205 |         "A   = rng.standard_normal( size=(n,n) )\n",
206 |         "\n",
207 |         "%time nrm = np.linalg.norm(A)\n",
208 |         "print(f'The true norm is {nrm-n:.6f} + ', n)"
209 |       ],
210 |       "execution_count": 31,
211 |       "outputs": [
212 |         {
213 |           "output_type": "stream",
214 |           "text": [
215 |             "CPU times: user 18.9 ms, sys: 1.03 ms, total: 20 ms\n",
216 |             "Wall time: 10.4 ms\n",
217 |             "The true norm is -0.319010 +  4000\n"
218 |           ],
219 |           "name": "stdout"
220 |         }
221 |       ]
222 |     },
223 |     {
224 |       "cell_type": "code",
225 |       "metadata": {
226 |         "colab": {
227 |           "base_uri": "https://localhost:8080/"
228 |         },
229 |         "id": "-jWTJsKIEwaE",
230 |         "outputId": "f46dc6a2-f676-40b2-de80-e880ea85f0ae"
231 |       },
232 |       "source": [
233 |         "%time nrmRow = FrobeniusNormByRow(A, use_blas = True)\n",
234 |         "print(f'Looping over rows, the discrepancy in the norm is {nrmRow-nrm:.8f}')\n",
235 |         "\n",
236 |         "%time nrmRow = FrobeniusNormByRow(A, use_blas = False)\n",
237 |         "print(f'Looping over rows (no BLAS), the discrepancy in the norm is {nrmRow-nrm:.8f}')"
238 |       ],
239 |       "execution_count": 32,
240 |       "outputs": [
241 |         {
242 |           "output_type": "stream",
243 |           "text": [
244 |             "CPU times: user 44.9 ms, sys: 3.03 ms, total: 47.9 ms\n",
245 |             "Wall time: 51.7 ms\n",
246 |             "Looping over rows, the discrepancy in the norm is 0.00000000\n",
247 |             "CPU times: user 10.4 s, sys: 20.1 ms, total: 10.5 s\n",
248 |             "Wall time: 10.5 s\n",
249 |             "Looping over rows (no BLAS), the discrepancy in the norm is 0.00000000\n"
250 |           ],
251 |           "name": "stdout"
252 |         }
253 |       ]
254 |     },
255 |     {
256 |       "cell_type": "code",
257 |       "metadata": {
258 |         "colab": {
259 |           "base_uri": "https://localhost:8080/"
260 |         },
261 |         "id": "ryB36awVE0N8",
262 |         "outputId": "10e74b6e-36dd-465e-f03c-fd25e0d3a9b9"
263 |       },
264 |       "source": [
265 |         "%time nrmRow = FrobeniusNormByColumn(A, use_blas = True)\n",
266 |         "print(f'Looping over columns, the discrepancy in the norm is {nrmRow-nrm:.8f}')\n",
267 |         "\n",
268 |         "%time nrmRow = FrobeniusNormByColumn(A, use_blas = False)\n",
269 |         "print(f'Looping over columns (no BLAS), the discrepancy in the norm is {nrmRow-nrm:.8f}')"
270 |       ],
271 |       "execution_count": 33,
272 |       "outputs": [
273 |         {
274 |           "output_type": "stream",
275 |           "text": [
276 |             "CPU times: user 107 ms, sys: 2 ms, total: 109 ms\n",
277 |             "Wall time: 113 ms\n",
278 |             "Looping over columns, the discrepancy in the norm is 0.00000000\n",
279 |             "CPU times: user 10.6 s, sys: 18.7 ms, total: 10.6 s\n",
280 |             "Wall time: 10.6 s\n",
281 |             "Looping over columns (no BLAS), the discrepancy in the norm is -0.00000000\n"
282 |           ],
283 |           "name": "stdout"
284 |         }
285 |       ]
286 |     },
287 |     {
288 |       "cell_type": "code",
289 |       "metadata": {
290 |         "id": "1_qbYnswE4Vu"
291 |       },
292 |       "source": [
293 |         ""
294 |       ],
295 |       "execution_count": null,
296 |       "outputs": []
297 |     }
298 |   ]
299 | }


--------------------------------------------------------------------------------
/Demos/demo04_FrobeniusNorm_sparse.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | 
 3 | Accessing rows vs accessing columns of a sparse matrix
 4 | 
 5 | We saw in demo03 that for a dense matrix, in one particular
 6 | code and size of matrix, there was about a factor of 3 or 4x
 7 | improvement if we looped over columns vs rows
 8 | 
 9 | What about for sparse matrices?
10 | 
11 | %}
12 | 
13 | %% Generate a sparse matrix with random entries, 20% sparse
14 | A = sprandn(1e4,1e4,0.2); % this is 10,000 x 10,000
15 | 
16 | %% 
17 | disp('Compute Frobenius norm by looping over columns');
18 | s=0; 
19 | tic
20 | for i=1:size(A,2)
21 |     s = s + norm( A(:,i) )^2; 
22 | end
23 | t1=toc;
24 | fprintf('Frobenius norm is %e, took %f seconds\n\n', s^2, t1 );
25 | 
26 | %% 
27 | disp('Compute Frobenius norm by looping over rows')
28 | s=0; 
29 | tic
30 | for i=1:size(A,1)
31 |     s = s + norm( A(i,:) )^2; 
32 | end
33 | t2 = toc;
34 | fprintf('Frobenius norm is %e, took %f seconds\n', s^2, t2 );
35 | 
36 | fprintf('Access via column vs row is %.1fx faster\n', t2/t1 );
37 | 
38 | %% or, if you really want to loop over rows, do so on the transpos
39 | % i.e., first transpose, then loop over columns.
40 | disp('Compute Frobenius norm by looping over columns of transpose');
41 | s=0; 
42 | tic
43 | At  = A'; % include the time to transpose
44 | for i=1:size(A,2)
45 |     s = s + norm( At(:,i) )^2; 
46 | end
47 | t1=toc;
48 | fprintf('Frobenius norm is %e, took %f seconds\n\n', s^2, t1 );
49 | 
50 | 
51 | 
52 | %% Extra: can you tell what's the difference?
53 | % Why do we get different values? Why is one slower?
54 | X   = randn(5e3);
55 | tic; s1=norm(X(:)); toc
56 | tic; s2=norm(X); toc


--------------------------------------------------------------------------------
/Demos/demo05_FastJL_speedTest.jl:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------------------------- #
  2 | #=
  3 |  Testing computational speed of various Fast JL transforms
  4 |   Feb 11 2019
  5 | Y = S*X, X is M x N and S is m x M
  6 | 
  7 | Code by: Stephen Becker
  8 | Julia modifications: Jake Knigge
  9 | =#
 10 | # ---------------------------------------------------------------------------- #
 11 | # load packages
 12 | using Random, LinearAlgebra, Plots, Statistics, Hadamard, FFTW, SparseArrays
 13 | # ---------------------------------------------------------------------------- #
 14 | function Hadamard_teaching_code(x)
 15 | # Hadamard_teaching_code(x)
 16 | #   applies the Hadamard transform to x.
 17 | #   If x has more than one column, the transform is applied
 18 | #   to each column.
 19 | # This code is not fast, but it shows you how to exploit
 20 | #   the structure of the transform.
 21 | # Note: this code does not do any sub-sampling.
 22 | # Reference: https://en.wikipedia.org/wiki/Fast_Walsh–Hadamard_transform
 23 |   m = size(x,1); n = size(x,2);
 24 |   if nextpow(2,m) != m
 25 |       print("ERROR: The number of rows of x must be a power of 2.")
 26 |       return
 27 |   end
 28 |   y = copy(x)
 29 |     for bit in 1:log2(m)
 30 |         k   = Int64(2^bit);      # e.g., 2, 4, ..., m
 31 |         k2  = Int64(2^(bit-1));  # e.g., 1, 2, ..., m/2
 32 |         y   = reshape(y, k, :, n);
 33 |         tmp = y[1:k2,:,:];
 34 |         y[1:k2,:,:]       = y[1:k2,:,:] + y[k2+1:k,:,:];
 35 |         y[(k2+1):k,:,:]   = tmp         - y[k2+1:k,:,:];
 36 |         y   = reshape(y, m, n)
 37 |     end # loop
 38 |     return y
 39 | end # function
 40 | # ---------------------------------------------------------------------------- #
 41 | function slowCountSketch( DX, targetRows )
 42 | # slow version of count sketch
 43 |     m   = length( targetRows );
 44 |     Y   = zeros(m, size(DX,2) );
 45 |     for j in 1:size(DX,1)
 46 |         i  = targetRows[j];
 47 |         Y[i,:] = Y[i,:] + DX[j,:];
 48 |     end # loop
 49 | end # function
 50 | # ---------------------------------------------------------------------------- #
 51 | # test to generate an error
 52 | x = randn(65,5);
 53 | y = Hadamard_teaching_code(x)
 54 | 
 55 | # test to generate an error
 56 | x = randn(64,5);
 57 | y = Hadamard_teaching_code(x);
 58 | y_alt = 64*fwht_natural(x, 1);
 59 | norm(y - y_alt)
 60 | # Check Implementations for correctness
 61 | X = randn(2^13,100); M, N = size(X); m = M/4;
 62 | 
 63 | # == Hadamard Code ==
 64 | # Check for normalization and ordering
 65 | Y1 = Hadamard_teaching_code(Matrix{Float64}(I, 4, 4))
 66 | Y2 = 4*fwht_natural(Matrix{Float64}(I, 4, 4), 1)
 67 | norm(Y1 - Y2)
 68 | # Compare times---run more than once b/c of Julia's "just-in-time" compiler!
 69 | @time Y1 = Hadamard_teaching_code(X);
 70 | @time Y2 = M*fwht_natural(X, 1);
 71 | norm(Y1 - Y2)
 72 | # ---------------------------------------------------------------------------- #
 73 | # Test speed
 74 | N = 100;
 75 | M_list  = 2 .^(11:14)# (^).(2,10:13); # defined using "broadcast" operation
 76 | nTrials     = 10; # get some averages
 77 | nAlgos      = 6; # change to 7 if/when CountSketch is included
 78 | Times       = zeros(nAlgos,length(M_list),nTrials);
 79 | Times_setup = Times;
 80 | ALGO_NAMES = ["Gaussian","FJLT - DCT","FJLT - Hadamard", #"Count",
 81 |               "Very Very sparse","Very sparse","Sparse"];
 82 | rng = Random.seed!(9);
 83 | # ---------------------------------------------------------------------------- #
 84 | for Mi in 1:length( M_list )
 85 |   println("Mi is ", Mi, " of ", length(M_list), ".");
 86 |   for trial = 1:nTrials
 87 |     M   = M_list[Mi];
 88 |     m   = round(M/4);
 89 |     X   = randn(M,N);
 90 | 
 91 |     ALGO = 1; # Gaussian sketch
 92 |     tic = time();
 93 |     S = randn(Int64(m),M);
 94 |     Times_setup[ALGO,Mi,trial] = time() - tic;
 95 |     tic = time();
 96 |     Y   = S*X;
 97 |     Times[ALGO,Mi,trial] = time() - tic;
 98 | 
 99 |     ALGO = 2; # Fast JL, DCT
100 |     tic     = time();
101 |     D = spdiagm(0 => broadcast(sign,randn(M)) );
102 |     ind     = rand(1:M, Int64(m));
103 |     Times_setup[ALGO,Mi,trial] = time() - tic;
104 |     tic = time();
105 |     Y       = dct( D*X );
106 |     Y       = Y[ind,:];
107 |     Times[ALGO,Mi,trial] = time() - tic;
108 | 
109 |     ALGO = 3;  # Fast JL, Hadamard
110 |     tic     = time();
111 |     D = spdiagm(0 => broadcast(sign,randn(M)) );
112 |     ind     = rand(1:M, Int64(m));
113 |     Times_setup[ALGO,Mi,trial] = time() - tic;
114 |     tic = time();
115 |     Y       = M*fwht_natural( D*X, 1 );
116 |     Y       = Y[ind,:];
117 |     Times[ALGO,Mi,trial] = time() - tic;
118 | 
119 |     # ALGO = 4; # Count
120 |     # tic     = time();
121 |     # D = spdiagm(0 => broadcast(sign,randn(M)) );
122 |     # indx_map        = Int64.(rand(1:m,M));
123 |     # Times_setup[ALGO,Mi,trial] = time() - tic;
124 |     # tic = time();
125 |     # Y = slowCountSketch(X'*D,indx_map);
126 |     # Times[ALGO,Mi,trial] = time() - tic;
127 | 
128 |     let ALGO = ALGO
129 |     # Sparse. We can normalize later. Does that help speed?
130 |       for Sparsity = 1:3
131 |           ALGO        = ALGO + 1;
132 |           s           = 2.0^(1-Sparsity)*sqrt(M);
133 |           density     = 1/(2*s);
134 |           ALGO_NAMES[ALGO] = string(round(100*density,digits=2), "% sparse");
135 |           tic = time();
136 |           S   = sprandn(Int64(m),M,density); # this takes longer than the multiply!
137 |           S   = broadcast(sign, S);
138 |           Times_setup[ALGO,Mi,trial]     = time() - tic;
139 |           tic = time();
140 |           Y   = sqrt(s)*(S*X);
141 |           Times[ALGO,Mi,trial] = time() - tic;
142 |       end # for loop for sparsity
143 |     end # let block
144 |   end # for loop for trials
145 | end # for loop M list
146 | # ---------------------------------------------------------------------------- #
147 | # Plots
148 | Data = Times;
149 | mn  = reshape(mean(Data, dims = 3), nAlgos, length(M_list));
150 | plot(M_list, mn', yscale = :log10, xscale = :log10, legend = :topleft,
151 |         label = ALGO_NAMES, title = "times to apply sketch",
152 |         titlefontsize = 10)
153 | xlabel!("size M")
154 | ylabel!("times in seconds")
155 | y   = M_list/M_list[1];
156 | plot!(M_list, mn[1].*y.^2, label = "M^2", linecolor = :black)
157 | plot!(M_list, mn[3,1].*(M_list.*log.(M_list)/(M_list[1]*log.(M_list[1]))),
158 |       label = "M log M", linecolor = :black, linestyle = :dash)
159 | plot!(M_list, minimum(mn[:,1])*y, label = "M", linecolor = :black,
160 |       linestyle = :dot)
161 | 
162 | # ---------------------------------------------------------------------------- #
163 | Data = Times_setup + Times;
164 | mn   = reshape(mean(Data,dims=3),nAlgos, length(M_list));;
165 | plot(M_list, mn', yscale = :log10, xscale = :log10, legend = :topleft,
166 |         label = ALGO_NAMES, title = "times to apply sketch with setup",
167 |         titlefontsize = 10)
168 | xlabel!("size M")
169 | ylabel!("times in seconds")
170 | y   = M_list/M_list[1];
171 | plot!(M_list, mn[1].*y.^2, label = "M^2", linecolor = :black)
172 | plot!(M_list, mn[3,1].*(M_list.*log.(M_list)/(M_list[1]*log.(M_list[1]))),
173 |       label = "M log M", linecolor = :black, linestyle = :dash)
174 | plot!(M_list, minimum(mn[:,1])*y, label = "M", linecolor = :black,
175 |       linestyle = :dot)
176 | 
177 | # ---------------------------------------------------------------------------- #
178 | 


--------------------------------------------------------------------------------
/Demos/demo05_FastJL_speedTest.m:
--------------------------------------------------------------------------------
  1 | %{
  2 |  Testing computational speed of various Fast JL transforms
  3 |   Feb 11 2019
  4 | 
  5 | Y   = S*X, X is M x N and S is m x M
  6 | 
  7 | %}
  8 | %% Setup paths
  9 | % addpath ~/'Google Drive'/GroupDocuments/MatlabUtilities/ % for countSketch
 10 | addpath ~/Repos/randomized-algorithm-class/Code/ % from https://github.com/stephenbeckr/randomized-algorithm-class
 11 | addpath ~/Repos/hadamard-transform/ % from https://github.com/jeffeverett/hadamard-transform
 12 | 
 13 | %% Check Implementations for correctness
 14 | X       = randn(2^13,100);
 15 | [M,N]   = size(X); m = M/4;
 16 | 
 17 | % == Hadamard Code ==
 18 | % Check for normalization
 19 | Hadamard_teaching_code( eye(4) )
 20 | 4*fwht(eye(4),[],'hadamard')
 21 | %%
 22 | tic
 23 | Y1 = Hadamard_teaching_code(X);
 24 | toc
 25 | tic
 26 | Y2 = M*fwht(X,[],'hadamard');
 27 | toc
 28 | tic
 29 | Y3 = hadamard_pthreads(X); % my mex code
 30 | toc
 31 | fprintf('Hadamard code discrepancies: %g and %g\n', norm(Y1-Y2,'fro'), ...
 32 |     norm(Y1-Y3,'fro') );
 33 | 
 34 | %% == Count sketch --
 35 | d       = sign(randn(M,1));
 36 | D       = spdiags(d,0,M,M); % bsxfun() is another efficient way to do this
 37 | useTranspose    = true;
 38 | indx_map        = int64(randi(m,M,1)); % don't do this in C!
 39 | Y2 = countSketch_BLAS(X'*D,indx_map,m,useTranspose)';
 40 | 
 41 | % Do Count sketch slowly to check
 42 | DX      = D*X;
 43 | targetRows  = double(indx_map);
 44 | Y       = zeros(m,N);
 45 | for j = 1:M
 46 |     i   = targetRows(j);
 47 |     Y(i,:) = Y(i,:) + DX(j,:);
 48 | end
 49 | fprintf('Count sketch code discrepancies: %g\n', norm( Y - Y2, 'fro' ) )
 50 | 
 51 | 
 52 | %% Test speed
 53 | N       = 100;
 54 | % M_list  = round(logspace( 3, 4, 4 )); % 4 points between 10^3 and 10^4
 55 | M_list  = 2.^(10:13);
 56 | 
 57 | nTrials     = 10; % get some averages
 58 | nAlgos      = 7;
 59 | Times       = zeros(nAlgos,length(M_list),nTrials);
 60 | % Times_setup = zeros(3,length(M_list),nTrials); % time to make sparse matrix
 61 | Times_setup = Times;
 62 | ALGO_NAMES = {'Gaussian','FJLT, DCT','FJLT, Hadamard','Count','Very Very sparse',...
 63 |     'Very sparse','Sparse'};
 64 | 
 65 | for Mi  = 1:length( M_list )
 66 |   fprintf('Mi is %d of %d\n', Mi, length(M_list) );
 67 |   for trial = 1:nTrials
 68 |       
 69 |     M   = M_list(Mi);
 70 |     m   = round( M/4 );
 71 |     
 72 |     X       = randn(M,N );
 73 |     
 74 |     ALGO    = 1; % Gaussian sketch
 75 |     tic
 76 |     S   = randn(m,M);
 77 |     Times_setup(ALGO,Mi,trial)     = toc;
 78 |     Y   = S*X; 
 79 |     Times(ALGO,Mi,trial) = toc;
 80 |     
 81 |     ALGO = 2; % Fast JL, DCT
 82 |     tic;
 83 |     D       = spdiags( sign(randn(M,1)) ,0,M,M);
 84 |     ind     = randsample(M,m); % in Stats toolbox
 85 |     ind     = randperm(M,m); % faster than randsample, doesn't need toolbox
 86 |     Times_setup(ALGO,Mi,trial)     = toc;
 87 |     Y       = dct( D*X );
 88 |     Y       = Y(ind,:);
 89 |     Times(ALGO,Mi,trial) = toc;
 90 |     
 91 |     ALGO = 3;  % Fast JL, Hadamard
 92 |     tic;
 93 |     D       = spdiags( sign(randn(M,1)) ,0,M,M);
 94 |     %ind     = randsample(M,m); % in Stats toolbox
 95 |     ind     = randperm(M,m); % faster than randsample, doesn't need toolbox
 96 |     Times_setup(ALGO,Mi,trial)     = toc;
 97 |     Y       = hadamard_pthreads( D*X );
 98 |     Y       = Y(ind,:);
 99 |     Times(ALGO,Mi,trial) = toc;
100 |     
101 |     ALGO = 4; % Count
102 |     tic;
103 |     D               = spdiags( sign(randn(M,1)) ,0,M,M);
104 |     useTranspose    = true;
105 |     indx_map        = int64(randi(m,M,1));
106 |     Times_setup(ALGO,Mi,trial)     = toc;
107 |     Y = countSketch_BLAS(X'*D,indx_map,m,useTranspose)';
108 |     Times(ALGO,Mi,trial) = toc;
109 |     
110 |     
111 |     % Sparse. We can normalize later. Does that help speed?
112 |     for Sparsity = 1:3
113 |         ALGO        = ALGO + 1;
114 |         s           = 2^(1-Sparsity)*sqrt(M);
115 |         density     = 1/(2*s);
116 |         ALGO_NAMES{ALGO} = sprintf('%.1f%% sparse',100*density);
117 |         tic
118 |         S   = sprandn(m,M,density); % this takes longer than the multiply!
119 |         S   = sign(S);
120 |         % SS = logical(S); % alternative
121 |         Times_setup(ALGO,Mi,trial)     = toc;
122 |         Y   = sqrt(s)*(S*X); 
123 |           % is this faster if S is "logical"? that doesn't work,
124 |           % it only has 1 bit, need 2 bits, but sparse of type uint8 not
125 |           % supported
126 |         Times(ALGO,Mi,trial) = toc;
127 |     end
128 |   end
129 | end
130 | %% Plot
131 | 
132 | Data = Times;
133 | % Data = Times - Times_setup;
134 | mn   = mean(Data,3);
135 | 
136 | figure(1); clf;
137 | h=loglog( M_list, mn','o-','linewidth',2 );
138 | set(gca,'fontsize',16);
139 | h(2).LineStyle = '--';
140 | h(3).LineStyle = '--';
141 | h(5).LineStyle = ':'; h(6).LineStyle = ':'; h(7).LineStyle = ':';
142 | legend(ALGO_NAMES,'location','northwest','box','off');
143 | 
144 | % Add something for reference
145 | hold all
146 | y   = M_list/M_list(1);
147 | h2 = loglog( M_list, mn(1)*y.^2, 'k--','DisplayName','M^2','linewidth',2 );
148 | h3 = loglog( M_list, mn(3,1)*...
149 |     (M_list.*log(M_list)/(M_list(1)*log(M_list(1)))),...
150 |     'k-.','DisplayName','M log M','linewidth',2 );
151 | h4 = loglog( M_list, mn(4,1)*y, 'k:','DisplayName','M','linewidth',2 );
152 | 
153 | xlim([M_list(1),M_list(end)]);
154 | ylim([min(mn(:)),max(mn(:))]);
155 | xlabel('Size M');
156 | ylabel('Time (seconds)');
157 | title('Total times, including setup');
158 | %%
159 | % export_fig 'FastJLtimes_withSetup' '-pdf' -transparent
160 | 
161 | %%
162 | Data = Times - Times_setup;
163 | 
164 | mn   = mean(Data,3);
165 | 
166 | figure(1); clf;
167 | h=loglog( M_list, mn','o-','linewidth',2 );
168 | set(gca,'fontsize',16);
169 | h(2).LineStyle = '--';
170 | h(3).LineStyle = '--';
171 | h(5).LineStyle = ':'; h(6).LineStyle = ':'; h(7).LineStyle = ':';
172 | legend(ALGO_NAMES,'location','northwest');
173 | 
174 | % Add something for reference
175 | hold all
176 | y   = M_list/M_list(1);
177 | h2 = loglog( M_list, mn(1)*y.^2, 'k--','DisplayName','M^2','linewidth',2 );
178 | h3 = loglog( M_list, mn(3,1)*...
179 |     (M_list.*log(M_list)/(M_list(1)*log(M_list(1)))),...
180 |     'k-.','DisplayName','M log M','linewidth',2 );
181 | h4 = loglog( M_list, mn(4,1)*y, 'k:','DisplayName','M','linewidth',2 );
182 | 
183 | xlim([M_list(1),M_list(end)]);
184 | ylim([min(mn(:)),max(mn(:))]);
185 | xlabel('Size M');
186 | ylabel('Time (seconds)');
187 | title('Times to apply sketch, excluding one-time setup cost');
188 | %%
189 | % export_fig 'FastJLtimes_excludingSetup' '-pdf' -transparent


--------------------------------------------------------------------------------
/Demos/demo05_results_excludingSetup.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Demos/demo05_results_excludingSetup.pdf


--------------------------------------------------------------------------------
/Demos/demo05_results_withSetup.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Demos/demo05_results_withSetup.pdf


--------------------------------------------------------------------------------
/Demos/demo06_leverageScores.m:
--------------------------------------------------------------------------------
  1 | %{
  2 | Demo for 1D leverages scores
  3 | 
  4 | Fig. 2 in Section 6.1 of 
  5 | "Randomized algorithms for matrices and data"
  6 | (Mahoney, 2011, http://arxiv.org/abs/1104.5557 )
  7 | 
  8 | is misleading, since it discussing perturbing regressors and data
  9 | 
 10 | Consider a 1D regression problem,
 11 | min_{beta} || x*beta - y ||_2
 12 | 
 13 | where x is a n x 1 vector of covariates, and y are the data.
 14 | 
 15 | By perturbing x, we change the solution.
 16 | This is captured by the leverages scores of x.
 17 | Since x is a vector, leverage scores are simply proportional
 18 | to the magnitude of each entry.
 19 | 
 20 | Therefore, the idea of "leverage" is that if we perturb
 21 | entries of x that have more leverage, i.e., that are large
 22 | in magnitude, then the effect on the regression is greater.
 23 | 
 24 | Do we observate that?
 25 | 
 26 | Stephen Becker, Feb 14 2018
 27 | 
 28 | %}
 29 | 
 30 | rng(0);
 31 | n   = 5;
 32 | % x   = 1:n; 
 33 | % x   = -n:-1;
 34 | x   = -2:2;
 35 | 
 36 | 
 37 | x = x';
 38 | slope   = 1;
 39 | y   = slope*x + .1*randn(n,1);
 40 | 
 41 | slopeEst    = x\y;
 42 | 
 43 | delta   = -.9; % try +/-
 44 | i = 1;
 45 | figure(1); clf;
 46 | 
 47 | for i = 1:5
 48 |     
 49 |     subplot(2,3,i);
 50 |     
 51 |     plot( x, y ,'o','markersize',10)
 52 |     hold all
 53 |     plot( x, slopeEst*x, '--' ,'linewidth',2)
 54 |     xx = x; xx(i) = xx(i) + delta;
 55 |     plot( xx(i), y(i),'s','markersize',10,'MarkerFaceColor','k');
 56 |     line( [x(i),xx(i)], y(i)*[1,1]);
 57 |     slopeEst_perturbed    = xx\y;
 58 |     plot( x, slopeEst_perturbed*x, '-','linewidth',2 )
 59 |     xlim([min(x)-abs(delta),max(x)+abs(delta)]);
 60 |     title(sprintf('Moving %d^{th} data point',i))
 61 | 
 62 | end
 63 | 
 64 | %% Part 2: weighted sampling
 65 | % This was added October 2021 (and ipynb also updated)
 66 | % Shows how to do weighted sampling, where the weights could be from, e.g., leverage scores
 67 | % We confirm that the scaling is done correctly by seeing if we converge to the identity
 68 | 
 69 | M   = 10;
 70 | m   = 5;
 71 | % S is m x M
 72 | 
 73 | % Usually weights is calculated based on A (if we're doing leverage scores, 
 74 | % but for now let's just use random weights.
 75 | rng(1);
 76 | weights     = rand(M,1) + 1e-2;
 77 | weights(1)  = 3;
 78 | weights     = weights/sum(weights);  % these are our normalized leverage scores
 79 | 
 80 | nReps   = 1e5;
 81 | StS     = zeros(M);
 82 | I       = eye(M);
 83 | errList = zeros(nReps,1);
 84 | 
 85 | withReplacement = true;
 86 | 
 87 | for trial = 1:nReps
 88 |     omega   = randsample( M, m, withReplacement, weights );
 89 |     scaling = 1./sqrt(m.*weights(omega));
 90 | %     S       = diag( normalization ) * I( omega, : );
 91 |     % or, another way to do this
 92 |     S       = bsxfun( @times, scaling, I(omega,:) );
 93 |     
 94 |     StS     = StS + S'*S;
 95 |     % We want E[ S'*S ] = I
 96 |     % E[ S'*S ] = E[  sum_j  S(j,:)'*S(j,:) ]
 97 |     
 98 |     errList( trial ) = norm( StS/trial - I, 'fro' );
 99 |     
100 | end
101 | %% and plot it
102 | 
103 | figure(1); clf;
104 | loglog( smooth( errList, 1e3 ) )
105 | xlabel('Repetitions');
106 | ylabel('Error ||S^TS-I||_F');
107 | 


--------------------------------------------------------------------------------
/Demos/demo07_rand_mat_mult.m:
--------------------------------------------------------------------------------
  1 | % function vignette07_rand_mat_mult
  2 | %{
  3 | Vignette #7
  4 | APPM 4720/5720 Randomized Algorithms, Spring 2019
  5 | 
  6 | Demo to show how to approximate a matrix-matrix product using randomization
  7 | based on the approach of P. Drineas and M. Mahoney. See section four of
  8 | "Lectures on Randomized Linear Algebra" by Drineas and Mahoney for additional
  9 | details and analysis (?http://arxiv.org/abs/1712.08880)
 10 | 
 11 | Algorithm randomized matrix multiplication.
 12 | Inputs:
 13 |  A is m x n
 14 |  B is n x p
 15 |  integer  1 <= c <= n
 16 |  probability distribution of length n
 17 | 
 18 | repeat for k = 1,\dots, c:
 19 |   1. pick i \in {1,\dots,n} with P(i = k) = p_k iid with replacement.
 20 |   2. set C(:,k) = 1/sqrt(c * p_i)*A(:,i) and R(k,:) = 1/sqrt(c * p_i)*B(i,:).
 21 | 
 22 | return C, R, and CR = sum_{k=1}^c 1/(c * p_i) * A(:,i) B(i,:).
 23 | 
 24 | Code by Jake Knigge, modifications by Stephen Becker
 25 | 
 26 | Exercise: can you use leverage scores to improve sampling?
 27 |   Issue #1: you have to balance leverage scores of A *and* of B
 28 |   Issue #2: what happens if n > p or n > m?
 29 | %}
 30 | % ---------------------------------------------------------------------------- %
 31 | rng(2);                                     % set seed for reproducibility
 32 | n_sims = 1000; m = 500; n = 20; p = 250;   % parameters for matrix sizes
 33 | c = min(max(ceil(0.5*n),1),n);              % subsampling parameter
 34 | % storage for simulations
 35 | fro_norms = zeros(n_sims, 1); fro_norms_opt = fro_norms;
 36 | % ---------------------------------------------------------------------------- %
 37 | % generate "data" matrices
 38 | A = randn(m,n)/sqrt(n); A(:,1) = 5 + 5*rand(m,1); % make the first column "big"
 39 | B = randn(n,p)/sqrt(n); AB = A*B;
 40 | % ell_2 norms: columns of A and rows of B
 41 | col_norm_A = sqrt(sum(A.^2, 1)); row_norm_B = sqrt(sum(B.^2, 2));
 42 | % MATLAB R2018b includes a 'vecnorm' function to calculate column- or row-wise
 43 | % norms of matrices; see https://www.mathworks.com/help/matlab/ref/vecnorm.html.
 44 | % ---------------------------------------------------------------------------- %
 45 | % probabilities for sampling---naive = uniform; optimal uses norm information
 46 | probs = ones(n, 1)/n;
 47 | probs_opt = ( (col_norm_A .* row_norm_B') / (col_norm_A * row_norm_B) )';
 48 | % compute theoretical upper bounds on expected squared Frobenius norms
 49 | upper_bound = sum(col_norm_A.^2 .* (row_norm_B.^2)' ./ (c*probs'));
 50 | upper_bound_opt = (col_norm_A * row_norm_B)^2 / c;
 51 | % ---------------------------------------------------------------------------- %
 52 | % simulation
 53 | for t = 1:n_sims
 54 |     % take samples
 55 |     samplesUniform  = randperm( n, c );
 56 |     replace         = true;
 57 |     samplesOptimal  = randsample( n, c, replace, probs_opt );
 58 |     rescale         = 1./sqrt( c/n );
 59 |     rescale_opt     = 1./sqrt( c*probs_opt( samplesOptimal) );
 60 |     C               = A(:,samplesUniform)*diag( rescale );
 61 |     C_opt           = A(:,samplesOptimal)*diag( rescale_opt );
 62 |     R               = diag( rescale )    *B(samplesUniform,:);
 63 |     R_opt           = diag( rescale_opt )*B(samplesOptimal,:);
 64 |     
 65 |     % compute random matrix product via outerproduct
 66 |     CR      = C*R; 
 67 |     CR_opt  = C_opt*R_opt;
 68 |     % calculate Frobenius norms of actual vs. randomized
 69 |     fro_norms(t)    = norm(AB-CR,'fro'); 
 70 |     fro_norms_opt(t)= norm(AB-CR_opt,'fro');
 71 | end 
 72 | 
 73 | % display comparisons of simulations
 74 | formatSpec = '||AB-CR||_F simulation %4.2f vs. upper bound %4.2f (naive)\n';
 75 | fprintf(formatSpec, sqrt(mean(fro_norms.^2)), sqrt(upper_bound))
 76 | formatSpec = '||AB-CR||_F simulation %4.2f vs. upper bound %4.2f (optimal)\n';
 77 | fprintf(formatSpec, sqrt(mean(fro_norms_opt.^2)), sqrt(upper_bound_opt))
 78 | 
 79 | % fprintf('relative err: %.3f (naive), %.3f (optimal)\n', ...
 80 |     
 81 | 
 82 | % NOTE: the Python verison of this script compares the squared Frobenius norms
 83 | % from the simulations vs. the upper bounds. The comparisons in this script are
 84 | % different (and make use Jensen's inequality for concave functions), but their
 85 | % interpretations are the same.
 86 | % end % vignette_rand_mat_mult function
 87 | % ---------------------------------------------------------------------------- %
 88 | % If you don't have randsample (in new versions of Stat toolbox)
 89 | %   then repeatedly call this randSample function
 90 | % helper function
 91 | % function y = randSample(x)
 92 | % % function randSample
 93 | % % samples an integer from a prob. distribution x (i.e., x >= 0, sum(x) = 1)
 94 | % idx = (1:length(x))'; cdf = cumsum(x); z = rand(); y = min(idx(z <= cdf));
 95 | % end
 96 | % ---------------------------------------------------------------------------- %
 97 | % test of randSample() function using optimal sampling probabilities from the
 98 | % above code
 99 | % y = zeros(1e5,1);
100 | % for t = 1:(length(y))
101 | %     y(t) = randSample(probs_opt);
102 | % end
103 | % histogram(y,'Normalization','probability'); hold on, plot(1:n,probs_opt,'*r');
104 | 


--------------------------------------------------------------------------------
/Demos/demo07_rand_mat_mult.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------------------- #
 2 | # -------------------------------------------------------------------------------------- #
 3 | # Vignette #XX
 4 | # APPM 4720/5720 Randomized Algorithms, Spring 2019
 5 | #
 6 | # Demo to show how to approximate a matrix-matrix product using randomization based on
 7 | # the approach of P. Drineas and M. Mahoney. See section four of "Lectures on Randomized
 8 | # Linear Algebra" by Drineas and Mahoney for additional details and analysis.
 9 | #
10 | # Algorithm randomized matrix multiplication.
11 | # given A \in \reals^{m \times n}, B \in \reals^{n \times p}, an integer c
12 | #    (1 \le c \le n), and a probability distribution p of length n.
13 | # repeat for k = 1,\dots, c:
14 | #       1. pick i \in {1,\dots,n} with P(i = k) = p_k iid with replacement.
15 | #       2. set C(:,k) = 1/sqrt(c * p_i) * A(:,i) and R(k,:) = 1/sqrt(c * p_i) * B(i,:).
16 | # return C, R, and CR = sum_{k=1}^c 1/(c * p_i) * A(:,i) B(i,:).
17 | # -------------------------------------------------------------------------------------- #
18 | import numpy as np                      # import numpy package
19 | import time as time                     # import time package
20 | import matplotlib.pyplot as plt         # import matplotlib package
21 | np.set_printoptions(precision = 2)      # display only four digits
22 | # -------------------------------------------------------------------------------------- #
23 | def vignette_rand_mat_mult(n_sims = 1000, m = 100, n = 20, p = 80):
24 |     np.random.seed(seed = 2)                # set seed for reproducibility
25 |     # ensure parameters are integers
26 |     n_sims = np.int(n_sims);  m = np.int(m);  n = np.int(n);  p = np.int(p)
27 |     c = min(max(np.int(np.round(0.5*n)), 1), n)
28 |     # storage for simulations
29 |     fro_norms = np.zeros(n_sims);   fro_norms_opt = np.zeros(n_sims)
30 |     # generate "data" matrices
31 |     A = np.random.normal(scale = 1 / np.sqrt(n), size = (m,n))  # isotropic rows
32 |     B = np.random.normal(scale = 1 / np.sqrt(n), size = (n,p))  # isotropic columns
33 |     AB = np.matmul(A, B);
34 |     # ell_2 norms: columns of A and rows of B
35 |     col_norm_A = np.linalg.norm(A, axis = 0); row_norm_B = np.linalg.norm(B, axis = 1)
36 |     # probabilities for sampling
37 |     probs = np.ones(n)/n                # naive probabilities---i.e., uniform distribution
38 |     probs_opt = (col_norm_A * row_norm_B) / sum(col_norm_A * row_norm_B) # optimal probs
39 |     probs_opt = probs_opt / sum(probs_opt)  # undo roundoff---ensure they sum to 1
40 |     # compute theoretical upper bounds on expected squared Frobenius norms
41 |     upper_bound = sum(col_norm_A**2 * row_norm_B**2 / (c*probs))   # naive probabilities
42 |     upper_bound_opt = sum(col_norm_A * row_norm_B)**2 / c          # optimal probabilities
43 |     # simulation
44 |     for t in range(n_sims):
45 |         # initialize / re-zero matrices
46 |         C = np.zeros((m,c));     R = np.zeros((c,p))
47 |         C_opt = np.zeros((m,c)); R_opt = np.zeros((c,p))
48 |         for k in range(c):
49 |             # step 1
50 |             i = np.random.choice(a = np.arange(n), replace = True, p = probs)
51 |             i_opt = np.random.choice(a = np.arange(n), replace = True, p = probs_opt)
52 |             # calculate rescaling coefficients
53 |             rescale = 1 / np.sqrt(c*probs[i]);
54 |             rescale_opt = 1 / np.sqrt(c*probs_opt[i_opt])
55 |             # step 2
56 |             C[:,k] = rescale * A[:,i]; R[k,:] = rescale * B[i,:]
57 |             C_opt[:,k] = rescale_opt * A[:,i_opt]; R_opt[k,:] = rescale_opt * B[i_opt,:]
58 |         # compute random matrix product via outerproduct
59 |         CR = np.matmul(C, R)
60 |         CR_opt = np.matmul(C_opt, R_opt)
61 |         # calculate Frobenius norms of actual vs. randomized
62 |         fro_norms[t] = np.linalg.norm(AB - CR, ord = 'fro')**2
63 |         fro_norms_opt[t] = np.linalg.norm(AB - CR_opt, ord = 'fro')**2
64 |     # print comparisons averaged across the number of simulations
65 |     print('||AB - CR||_F^2 simulation vs. upper bound:',
66 |             np.round(np.mean(fro_norms),2), 'vs.',
67 |             np.round(upper_bound,2), '(using naive sampling probabilities)')
68 |     print('||AB - CR||_F^2 simulation vs. upper bound:',
69 |             np.round(np.mean(fro_norms_opt),2), 'vs.',
70 |             np.round(upper_bound_opt,2), '(using optimal sampling probabilities)')
71 |     # return np.mean(fro_norms), upper_bound, np.mean(fro_norms_opt), upper_bound_opt
72 | 
73 | # small problem
74 | vignette_rand_mat_mult(n_sims = 10000, m = 10, n = 4, p = 10)
75 | 
76 | # moderate problem
77 | vignette_rand_mat_mult(n_sims = 1000, m = 1000, n = 20, p = 1000)
78 | 
79 | # big problem
80 | vignette_rand_mat_mult(n_sims = 1000, m = 5000, n = 10, p = 5000)
81 | 
82 | # Question: If you see that the estimated squared Frobenius norm for the naive
83 | #           probabilities is smaller than that for the optimal probabilities, what should
84 | #           you do?
85 | 


--------------------------------------------------------------------------------
/Demos/demo07_rand_mat_mult_ortho.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------------------- #
 2 | # -------------------------------------------------------------------------------------- #
 3 | # Vignette #XX
 4 | # APPM 4720/5720 Randomized Algorithms, Spring 2019
 5 | #
 6 | # Demo to show how to approximate a matrix-matrix product using randomization based on
 7 | # the approach of P. Drineas and M. Mahoney. See section four of "Lectures on Randomized
 8 | # Linear Algebra" by Drineas and Mahoney for additional details and analysis.
 9 | #
10 | # This script focuses on the case where U \in \reals^{n \times d} with n >> d is an
11 | # orthogonal matrix (specifically its columns are orthogonal). We choose c so that the
12 | # deviation between our randomized matrix and the one we're approximating is bounded with
13 | # high probability. The details for can be found in the Drineas and Mahoney paper.
14 | #
15 | # Algorithm randomized matrix multiplication.
16 | # given U \in \reals^{n \times d} with n >> d, an integer c (1 \le c \le n),
17 | #   and a probability distribution p of length n.
18 | # repeat for k = 1,\dots, c:
19 | #       1. pick i_k \in {1,\dots,n} with P(i = k) = p_k iid with replacement.
20 | #       2. set R(k,:) = 1/sqrt(c * p_{i_k}) * U(i_k,:).
21 | # return R^T R = sum_{k=1}^c 1/(c * p_{i_k}) * U(:,i_k) U(i_k,:).
22 | # -------------------------------------------------------------------------------------- #
23 | import numpy as np                      # import numpy package
24 | import time as time                     # import time package
25 | import matplotlib.pyplot as plt         # import matplotlib package
26 | np.set_printoptions(precision = 2)      # display only four digits
27 | # -------------------------------------------------------------------------------------- #
28 | def vignette_rand_mat_mult_ortho(n_sims = 100, n = 1000, d = 6):
29 |     # n_sims = 100; n = 1000; d = 6         # choose d ~ log n
30 |     np.random.seed(seed = 2)                # set seed for reproducibility
31 |     # ensure parameters are integers
32 |     n_sims = np.int(n_sims);  n = np.int(n);  d = np.int(d);
33 |     beta = 1; epsilon = 0.9; delta = 0.1;
34 |     c_big_n = np.int(np.ceil(96*d/(beta*epsilon**2) *
35 |                 np.log(96*d/(beta*epsilon**2*np.sqrt(delta)))))
36 |     c_sm_n = np.int(np.ceil(10*d**2 / (beta*epsilon**2)))
37 |     c = np.amin([c_big_n, c_sm_n])
38 |     # storage for simulations
39 |     fro_norms = np.zeros(n_sims)
40 |     # generate "data" matrices
41 |     U = np.random.normal(scale = 1 / np.sqrt(n), size = (n,d))  # isotropic columns
42 |     U, R_qr = np.linalg.qr(U)
43 |     # ell_2 norms: columns of U and rows of U
44 |     col_norm_U = np.linalg.norm(U, axis = 0); row_norm_U = np.linalg.norm(U, axis = 1)
45 |     # probabilities for sampling
46 |     probs = beta * row_norm_U**2 / d    # nearly optimal probs
47 |     probs = probs / sum(probs)          # undo roundoff---ensure they sum to 1
48 |     # compute theoretical upper bounds on expected squared Frobenius norms
49 |     upper_bound = d**2 / (c*beta)          # optimal probabilities
50 |     # simulation
51 |     for t in range(n_sims):
52 |         # initialize / re-zero matrices
53 |         RTR = np.zeros((d,d))
54 |         for k in range(c):
55 |             # step 1
56 |             i = np.random.choice(a = np.arange(n), replace = True, p = probs)
57 |             # calculate rescaling coefficients
58 |             rescale = 1 / np.sqrt(c*probs[i])
59 |             # step 2
60 |             R = rescale * U[i,:]
61 |             # compute random matrix product via outerproduct
62 |             RTR = np.outer(R, R) + RTR
63 |         # calculate Frobenius norms of actual vs. randomized
64 |         fro_norms[t] = np.linalg.norm(np.eye(d) - RTR, ord = 'fro')
65 |     # print comparisons averaged across the number of simulations
66 |     print('||U^T U - R^T R||_F^2 =', np.round(np.mean(fro_norms**2), 4),
67 |           'vs', np.round(upper_bound, 4))
68 | 
69 | vignette_rand_mat_mult_ortho()
70 | 
71 | # Question: If you see that the estimated squared Frobenius norm for the naive
72 | #           probabilities is smaller than that for the optimal probabilities, what should
73 | #           you do?
74 | 


--------------------------------------------------------------------------------
/Demos/demo08_higherAccuracyRegression.m:
--------------------------------------------------------------------------------
  1 | %{
  2 | Demo of the Iterative Hessian Sketch (IHS) cf. Pilanci and Wainwright
  3 | and of the preconditioned approaches (BLENDENPIK, LSRN)
  4 | 
  5 | These are two methods to get high-accuracy l2 regression
  6 | 
  7 | The goal is to approximate the solution of
  8 |   min_{x} || Ax-b ||^2
  9 | where
 10 |   A is M x N
 11 | and we are assuming M >> N.
 12 | 
 13 | Code: Stephen Becker
 14 | 
 15 | References:
 16 | 
 17 | - "Iterative Hessian Sketch: Fast and Accurate Solution
 18 | Approximation for Constrained Least-Squares" (Pilanci, Wainwright; JMLR 2016
 19 | http://www.jmlr.org/papers/volume17/14-460/14-460.pdf )
 20 | - "Blendenpik: Supercharging LAPACK's Least-Squares Solver" (Avron et al. 2010, https://epubs.siam.org/doi/abs/10.1137/090767911); 
 21 | - "LSRN: A Parallel Iterative Solver for Strongly Over- or Underdetermined Systems" (Meng et al. 2014, https://epubs.siam.org/doi/abs/10.1137/120866580 )
 22 | 
 23 | %}
 24 | 
 25 | addpath ~/Repos/randomized-algorithm-class/Code/
 26 | rng(0);
 27 | 
 28 | M   = 8e4;
 29 | N   = 5e2;
 30 | % M   = 5e3; N = 20;
 31 | A   = randn(M,N)*diag(logspace(0,3,N))*(randn(N)+.1*eye(N));
 32 | fprintf('Condition number is %.3e\n', cond(A)  )
 33 | 
 34 | x   = randn(N,1);
 35 | b   = A*x;
 36 | b   = b + .3*norm(b)/sqrt(M)*randn(M,1);
 37 | 
 38 | %% Solve via dense method, about 7 seconds
 39 | tic
 40 | xLS     = A\b;
 41 | tm_LS = toc;
 42 | fprintf('\nSolved via classical least-squares in %.2f seconds\n\n',tm_LS);
 43 | 
 44 | %% Solve via another dense method, not as safe if ill=conditioned
 45 | tic
 46 | [Q,R]   = qr(A,0);
 47 | xHat    = R\(Q'*b);
 48 | tm_LS_2 = toc;
 49 | 
 50 | err1        = norm( A*xHat - b )/norm(A*xLS-b) - 1;
 51 | err2        = norm( xHat - xLS)/norm(xLS);
 52 | err3        = norm( A*(xHat - xLS))/norm(A*xLS);
 53 | 
 54 | fprintf('== Classical algorithm 1, QR without pivoting, less robust but faster ==\n');
 55 | fprintf('Took %.2f sec, err metrics %.2e and %.2e and %.2e\n\n', ...
 56 |     tm_LS_2, err1,err2,err3);
 57 | 
 58 | % At least do pivoting
 59 | tic
 60 | [Q,R,e]     = qr(A,0);
 61 | xHat(e)       = R\(Q'*b); %
 62 | tm_LS_3 = toc;
 63 | 
 64 | err1        = norm( A*xHat - b )/norm(A*xLS-b) - 1;
 65 | err2        = norm( xHat - xLS)/norm(xLS);
 66 | err3        = norm( A*(xHat - xLS))/norm(A*xLS);
 67 | 
 68 | fprintf('== Classical algorithm 2, QR with column pivoting ==\n');
 69 | fprintf('Took %.2f sec, err metrics %.2e and %.2e and %.2e\n\n', ...
 70 |     tm_LS_3, err1,err2,err3);
 71 | 
 72 | %% Take a sketch
 73 | rng(2);
 74 | m   = 40*N;
 75 | sketchType = {'count','FJLT'}; % Gaussian is too slow!
 76 | for i = 1:2
 77 |     type    = sketchType{i};
 78 |     tic;
 79 |     sketchFcn   = sketch( m, M, type );
 80 |     SAb         = sketchFcn([A,b]);
 81 |     time_preprocess     = toc;
 82 |     fprintf(' -- Sketch type %s took %.2f sec\n', type, time_preprocess );
 83 | end
 84 | 
 85 | 
 86 | %% Try a normal style sketches
 87 | 
 88 | tic;
 89 | SA          = SAb(:,1:N);
 90 | Sb          = SAb(:,N+1);
 91 | xHat        = SA\Sb;
 92 | time_solve          = toc;
 93 | err1        = norm( A*xHat - b )/norm(A*xLS-b) - 1;
 94 | err2        = norm( xHat - xLS)/norm(xLS);
 95 | err3        = norm( A*(xHat - xLS))/norm(A*xLS);
 96 | fprintf('== Standard JL style sketch ==\n');
 97 | fprintf('Took %.2f = %.2f + %.2f sec, err metrics %.2e and %.2e and %.2e\n\n', ...
 98 |     time_preprocess+time_solve, time_preprocess,time_solve, err1,err2,err3);
 99 | 
100 | %% Try the iterative Hessian Sketch (run the above block first to get SAb)
101 | nBlocks     = 4;
102 | mm          = floor(m/nBlocks);
103 | tic
104 | xHat        = zeros(N,1);
105 | bHat        = b;
106 | fprintf('== Iterative Hessian Sketch ==\n');
107 | tic
108 | for i = 1:nBlocks
109 |     startInd    = 1 + (i-1)*mm;
110 |     endInd      = i*mm;
111 |     SA          = sqrt(m/mm)*SAb(startInd:endInd,1:N); % renormalize!
112 | %     Sb          = SAb(startInd:endInd,N+1);
113 | %     xx          = SA\Sb;  $ regular sketching
114 |     xx          = (SA'*SA )\(A'*bHat);
115 |     xHat        = xHat + xx;
116 |     bHat        = bHat - A*xx;
117 |     err1        = norm( A*xHat - b )/norm(A*xLS-b) - 1;
118 |     err2        = norm( xHat - xLS)/norm(xLS);
119 |     err3        = norm( A*(xHat - xLS))/norm(A*xLS); % need < 1
120 |     fprintf('  contraction factor at iter %d is %.4f\n', i, err3 );
121 | end
122 | time_solve_IHS = toc;
123 | fprintf('Took %.2f = %.2f + %.2f sec, err metrics %.2e and %.2e and %.2e\n\n', ...
124 |     time_preprocess+time_solve_IHS, time_preprocess,time_solve_IHS, err1,err2,err3);
125 | 
126 | %% Try preconditioning
127 | fprintf('== Computing thin QR on sketched data ==\n');
128 | tic
129 | [Q,R]       = qr( SA, 0 ); % thin QR
130 | time_QR     = toc;
131 | 
132 | k1=cond( SA/R ); % unless we had precision issues, this ought to be 1
133 | k2=cond( A/R );  % and this thing we *hope* is small
134 | % Note: cond( A/R ) is a nicer way to write cond( A*inv(R) )
135 | 
136 | fprintf('QR on SA took %.2f sec, cond(SA*inv(R)) is %.2f, cond(A*inv(R)) is %.2f\n\n',...
137 |     time_QR,k1,k2);
138 | %% For reference, use LSQR to solve, without preconditioning
139 | fprintf('== LSQR, for reference, with 100 iterations ==\n');
140 | tol     = 1e-8;
141 | maxit   = 1e2;
142 | tic
143 | xHat      = lsqr(A,b,tol,maxit);
144 | time_LSQR = toc;
145 | err1        = norm( A*xHat - b )/norm(A*xLS-b) - 1;
146 | err2        = norm( xHat - xLS)/norm(xLS);
147 | err3        = norm( A*(xHat - xLS))/norm(A*xLS);
148 | fprintf('Took %.2f sec for LSQR, err metrics %.2e and %.2e and %.2e\n\n', ...
149 |     time_LSQR, err1,err2,err3);
150 | 
151 | 
152 | fprintf('== LSQR, for reference, with 500 iterations ==\n');
153 | tol     = 1e-8;
154 | maxit   = 5e2;
155 | tic
156 | xHat      = lsqr(A,b,tol,maxit);
157 | time_LSQR = toc;
158 | err1        = norm( A*xHat - b )/norm(A*xLS-b) - 1;
159 | err2        = norm( xHat - xLS)/norm(xLS);
160 | err3        = norm( A*(xHat - xLS))/norm(A*xLS);
161 | fprintf('Took %.2f sec for LSQR, err metrics %.2e and %.2e and %.2e\n\n', ...
162 |     time_LSQR, err1,err2,err3);
163 | 
164 | %% Now try LSQR with preconditioning
165 | fprintf('== Preconditioned LSQR (a la BLENDENPIK/LSRN) ==\n');
166 | tol     = 1e-9;
167 | maxit   = 1e2;
168 | tic
169 | xHat      = lsqr(A,b,tol,maxit,R);
170 | time_LSQR_R = toc;
171 | err1        = norm( A*xHat - b )/norm(A*xLS-b) - 1;
172 | err2        = norm( xHat - xLS)/norm(xLS);
173 | err3        = norm( A*(xHat - xLS))/norm(A*xLS);
174 | fprintf('Took %.2f = %.2f + %.2f + %.2f sec for LSQR, err metrics %.2e and %.2e and %.2e\n\n', ...
175 |     time_preprocess+time_QR+time_LSQR_R, time_preprocess,time_QR,time_LSQR_R, err1,err2,err3);
176 | 


--------------------------------------------------------------------------------
/Demos/demo09_RandomizedKaczmarz.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | Demonstration of the basic Randomized Kaczmarz Algorithm 
 3 |  (cf. "?A Randomized Kaczmarz Algorithm with Exponential Convergence"
 4 |    by Strohmer and Vershynin, 2008
 5 |    ?http://www.springerlink.com/index/10.1007/s00041-008-9030-4 )
 6 | 
 7 | For fancier versions, see:
 8 | 
 9 | - "?Acceleration of randomized Kaczmarz method via the
10 | Johnson?Lindenstrauss Lemma" by Y. Eldar and D. Needell 2011
11 | - "?Paved with good intentions: analysis of a randomized block kaczmarz
12 | method" by D. Needell and J. Tropp 2012
13 | - "?Stochastic gradient descent, weighted sampling, and the randomized
14 | Kaczmarz algorithm" by D. Needell, N. Srebro and R. Ward 2016
15 | 
16 | Code: Stephen Becker 2019
17 | %}
18 | 
19 | 
20 | rng(0);
21 | M   = 3e5;
22 | N   = 1e2;
23 | A   = randn(M,N);
24 | x0  = randn(N,1);
25 | b   = A*x0; % no noise, since we're not doing least squares, we're solving a system
26 | 
27 | tic
28 | xLS     = A\b;
29 | tm_LS = toc;
30 | fprintf('Solving %d x %d system via Matlab classical method takes %.2f sec\n', ...
31 |     M,N,tm_LS );
32 | 
33 | tic
34 | [Q,R]   = qr(A,0);
35 | xLS2    = R\(Q'*b);
36 | tm_LS2  = toc;
37 | fprintf('... or via a thin QR w/o pivoting takes %.2f sec\n', tm_LS2 );
38 | %%
39 | tic
40 | rowNorms    = sum(A.^2,2);
41 | % At = A'; % slow, but if I do this, then can accelerate iterations
42 | tm_preprocess = toc;
43 | % stem( rowNorms )
44 | prob        = rowNorms/sum(rowNorms);
45 | %%
46 | x   = zeros(N,1);
47 | maxIter     = 1e2;
48 | errFcn      = @(x) norm(x - xLS );
49 | errList     = zeros(maxIter,1);
50 | tic
51 | for k = 1:maxIter
52 | %     i   = randsample(M,1,true,prob);
53 | %     x   = x + (b(i)-A(i,:)*x)/rowNorms(i) * A(i,:)';
54 |     
55 |     iList = randsample(M,500,true,prob);
56 |     for ind = 1:500
57 |         i   = iList(ind);
58 |         x   = x + (b(i)-A(i,:)*x)/rowNorms(i) * A(i,:)';
59 | %         x   = x + (b(i)-At(:,i)'*x)/rowNorms(i) * At(:,i); % faster
60 |     end
61 |     
62 |     errList(k)  = errFcn(x);
63 |     if errList(k) < 1e-13
64 |         break
65 |     end
66 | end
67 | tm_Kaczmarz = toc;
68 | %%
69 | fprintf('Randomized Kaczmarz took %.2f sec = %.2f + %.2f sec; final error %.2e\n',...
70 |     tm_preprocess + tm_Kaczmarz, tm_preprocess, tm_Kaczmarz, errList(k) );
71 | %%
72 | semilogy( errList,'o-','linewidth',2 )
73 | xlabel('Epochs'); ylabel('Error'); set(gca,'fontsize',18); grid on
74 | %% For a fair comparison, try with LSQR
75 | tic
76 | [xHat,flag,relres,iter]    = lsqr( A, b, 1e-13, 1e3 );
77 | tm_CG = toc;
78 | fprintf('LSQR took %.2f sec in %d iterations; final error %.2e\n',...
79 |     tm_CG, iter, errFcn(xHat) );


--------------------------------------------------------------------------------
/Demos/demo10_l1_regression.m:
--------------------------------------------------------------------------------
  1 | %{
  2 | 
  3 | (1) Motivate l1 or l_p (1<=p<=infty) regression
  4 | 
  5 | (2) Sketching approaches
  6 | Ref: "The Fast Cauchy Transform..." by Clarkson et all 2016
  7 | in SIAM J Sci Comp, ?http://epubs.siam.org/doi/10.1137/140963698
  8 | 
  9 | see also David Woodruff's 2014 monograph
 10 | 
 11 | Stephen Becker
 12 | 
 13 | %}
 14 | 
 15 | %% Why l1 regression? Let's do a 1D example
 16 | rng(0);
 17 | x   = 2;
 18 | A   = (1:6)';
 19 | [M,N]   = size(A); % N = 1 since 1D example
 20 | z   = .3*randn(M,1);
 21 | b   = A*x + z;
 22 | 
 23 | b(6) = 1.2; % gross corruption, e.g., someone typed "1.2" instead of "12"
 24 | 
 25 | % Notation is funny: "x" is really like a slope,
 26 | %   and "A" is really like "x"
 27 | figure(1); clf;
 28 | h=plot( A, b, 'd', 'DisplayName', 'Samples','markersize',10,...
 29 |     'markerfacecolor','b');
 30 | hold all
 31 | % plot( A, A*x, '-', 'DisplayName', 'True data','linewidth',2);
 32 | 
 33 | % Find line of best fit, in l2 sense
 34 | xLS     = A\b;
 35 | plot( A, A*xLS, '--', 'DisplayName', 'l2 fit','linewidth',2);
 36 | cvx_begin quiet
 37 |     variable x1(N)
 38 |     minimize norm( A*x1 - b , 1 )
 39 | cvx_end
 40 | plot( A, A*x1, '--', 'DisplayName', 'l1 fit','linewidth',2);
 41 | legend('location','northwest');
 42 | set(gca,'fontsize',18);
 43 | 
 44 | 
 45 | %% Check basic Johnson-Lindenstrauss results: preserve distance in lp sense
 46 | rng(0);
 47 | % nPoints     = 1e3;
 48 | M           = 5e2; % dimension of each point
 49 | % make the data points, some of them sparse, some weird distributions, ...
 50 | A   = [log(abs(gallery('binomial',M))), gallery('dramadah',M), ...
 51 |     gallery('cauchy',M), gallery('hanowa',M), gallery('lotkin',M) ];
 52 | nPoints     = size(A,2);
 53 | 
 54 | clf; cspy(A); title('Depiction of matrix "A"');
 55 | % normalize it
 56 | nrms2_A = sqrt( sum(A.^2,1) );
 57 | nrms1_A = sum(abs(A),1);
 58 | % A       = bsxfun( @times, A, 1./nrms );
 59 | 
 60 | %% Take the l2 sketch
 61 | m   = round(.3*M);
 62 | addpath ~/Repos/randomized-algorithm-class/Code/
 63 | %% Gaussian sketch
 64 | S   = randn(m,M)/sqrt(m);
 65 | SA  = S*A;
 66 | %% FJLT sketch
 67 | ind = randperm(M,m);
 68 | SA  = dct(spdiags(sign(randn(M,1)),0,M,M)*A);
 69 | SA  = sqrt(M/m)*SA(ind,:);
 70 | %% Cauchy sketch
 71 | % Same as student-t with 1 degree of freedom
 72 | S   = trnd(1,m,M);
 73 | SA  = 1/m*S*A;
 74 | %% Check if we've preserved l1 and l2 norms
 75 | nrms2    = sqrt( sum(SA.^2,1) );
 76 | nrms1    = sum(  abs(SA),1);
 77 | figure(1); clf;
 78 | subplot(1,2,1);
 79 | histogram( nrms2./nrms2_A,'Normalization','probability' )
 80 | xlim([0,2]);
 81 | title('$\|Sx\|_2/\|x\|_2$','interpreter','latex','fontsize',20);
 82 | 
 83 | subplot(1,2,2);
 84 | histogram( nrms1./nrms1_A,'Normalization','probability' )
 85 | title('$\|Sx\|_1/\|x\|_1$','interpreter','latex','fontsize',20);
 86 | 
 87 | %% Zoom in on histograms for the case of Cauchy sketch
 88 | figure(1); clf;
 89 | BMIN    = .5;
 90 | BMAX    = 40;
 91 | subplot(1,2,1);
 92 | histogram( nrms2./nrms2_A,'BinLimits',[BMIN,BMAX] ,'Normalization','pdf')
 93 | % xlim([0,2]);
 94 | title('$\|Sx\|_2/\|x\|_2$','interpreter','latex','fontsize',20);
 95 | 
 96 | subplot(1,2,2);
 97 | histogram( nrms1./nrms1_A ,'BinLimits',[BMIN,BMAX],'Normalization','pdf')
 98 | title('$\|Sx\|_1/\|x\|_1$','interpreter','latex','fontsize',20);
 99 | 
100 | 
101 | 
102 | %% Interpret another way: use p-stable to estimate p-norms
103 | innerProds    = sqrt( abs(sum(SA,1)) ); % abs after sum
104 | 
105 | figure(1); clf;
106 | BMIN    = .5;
107 | BMAX    = 10;
108 | histogram( innerProds./nrms1_A,'BinLimits',[BMIN,BMAX] ,'Normalization','pdf')
109 | % xlim([0,2]);
110 | title('$\sqrt{E|\langle x, s \rangle|^2} /\|x\|_1$','interpreter','latex','fontsize',20);
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | %% Regression
118 | rng(0);
119 | M   = 1e3;
120 | N   = 1e2;
121 | A   = rand(M,N);
122 | x0  = randn(N,1);
123 | b   = A*x0 + randn(M,1);
124 | 
125 | % Solve large problem for reference solution
126 | tic
127 | cvx_begin quiet
128 |   variable x(N)
129 |   minimize norm( A*x - b, 1 )
130 | cvx_end
131 | toc
132 | xRef    = x;
133 | %% Make well-conditioned basis
134 | rng(1);
135 | m   = round(.5*M);
136 | for i = 1:2
137 |     switch i
138 |         case 1
139 |             % Cauchy sketch:
140 |             S   = trnd(1,m,M)/sqrt(m);
141 |             fprintf('\nUsing Cauchy sketch\n');
142 |         case 2
143 |             % Gaussian sketch
144 |             S   = randn(m,M)/sqrt(m);
145 |             fprintf('\nUsing Gaussian sketch\n');
146 |     end
147 | 
148 | SA  = S*A;
149 | 
150 | 
151 | [Q,R]   = qr(SA,0);
152 | Q   = A/R;
153 | % estimate l1 leverage scores
154 | levScores   = sum( abs(Q), 2 );
155 | % weighted sampling
156 | ind         = randsample(M,round(.5*M), true,levScores );
157 | 
158 | %  == Solve smaller problem
159 | tic
160 | cvx_begin quiet
161 |   variable x(N)
162 |   minimize norm( A(ind,:)*x - b(ind), 1 )
163 | cvx_end
164 | toc
165 | er1=norm( x - xRef )/norm(xRef );
166 | er2=norm( A*x - b, 1 )/norm( A*xRef - b, 1 ) - 1;
167 | fprintf('||x-xRef| is %.2e, ||Ax-b||_1/||AxRef-b||_1-1 is %.2e\n', er1,er2);
168 | 
169 | end
170 | 


--------------------------------------------------------------------------------
/Demos/demo11_JamesSteinEstimator.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | 
 3 | James-Stein Estimate
 4 |   Proof that the MLE is not admissible (in dimensions p>=3 at least)
 5 | 
 6 | see
 7 | http://statweb.stanford.edu/~ckirby/brad/LSI/chapter1.pdf
 8 | and
 9 | http://statweb.stanford.edu/~ckirby/brad/other/CASI_Chap7_Nov2014.pdf
10 | for connection to Empirical Bayes...
11 | 
12 |  (it's also similar to the idea of control variates)
13 | 
14 | %}
15 | 
16 | p       = 50; % dimension
17 | sigma   = .3;
18 | 
19 | nReps   = 1e3;
20 | 
21 | mu      = zeros(p,1);
22 | v       = .5*ones(p,1); % arbitrary fixed vector
23 | % v       = .1*randn(p,1);
24 | 
25 | sampleMeans     = zeros(p,2);
26 | firstCoordinate = zeros(nReps,2);
27 | avgError        = zeros(nReps,2);
28 | errFcn          = @(xhat) norm(xhat-mu)^2;
29 | for r = 1:nReps
30 |     
31 |     y   = mu + sigma*randn(p,1);
32 |     
33 |     % MLE is y
34 |     sampleMeans(:,1)    = sampleMeans(:,1) + y;
35 |     firstCoordinate(r,1)= y(1);
36 |     avgError(r,1)       = errFcn(y);
37 |     
38 |     % James-Stein estimator
39 |     xhat    = (1 - (p-3)*sigma^2/( norm(y-v)^2 ) )*(y-v) + v;
40 |     sampleMeans(:,2)    = sampleMeans(:,2) + xhat;
41 |     firstCoordinate(r,2)=xhat(1);
42 |     avgError(r,2)       = errFcn(xhat);
43 |     
44 | end
45 | sampleMeans = sampleMeans/nReps;
46 | 
47 | %% Analyze results
48 | figure(1); clf;
49 | boxplot( avgError,'Labels',{'MLE','James-Stein'} )
50 | set(gca,'fontsize',18);
51 | title('Values of $\|\hat{x} - \mu\|_2^2$','interpreter','latex')
52 | 
53 | %% Look at estimate of the first coordinate: is it biased?
54 | % (mu and v are all the same in all coordinates, so just pick the first
55 | %  coordinate, since then it's easy to show graphically)
56 | figure(1); clf;
57 | boxplot( firstCoordinate,'Labels',{'MLE (unbiased)','James-Stein (biased!)'} )
58 | set(gca,'fontsize',18);
59 | title('First coordinate of the estimate');
60 | line([-.5,2.5],[0,0],'color','k','linestyle','--')


--------------------------------------------------------------------------------
/Demos/demo12_CompressedSensing.m:
--------------------------------------------------------------------------------
 1 | % demonstrate Compressed Sensing ideas
 2 | % This code requires CVX (cvxr.org)
 3 | 
 4 | rng(0);
 5 | 
 6 | N       = 100; % dimensionality of signal
 7 | s       = 5;   % sparsity of signal
 8 | 
 9 | x0      = zeros(N,1);
10 | x0( randperm(N,s) )     = rand(s,1); % random entries
11 | 
12 | % Try this for different values of m. How low can you go?
13 | m       = 4*s;
14 | % m       = round( 2.5*s ); % theoretical lower limit is 2*s
15 | A       = randn(m,N);   % Sensing matrix
16 | 
17 | % figure(1); clf; imagesc(A); axis image
18 | 
19 | y       = A*x0;
20 | 
21 | cvx_begin quiet
22 |     variable x1(N)
23 |     minimize norm(x1,1)
24 |     subject to
25 |         A*x1 == y
26 | cvx_end
27 | 
28 | x1( abs(x1) < 1e-9 ) = 0;
29 | 
30 | x2  = pinv(A)*y; % least-squares solution
31 | 
32 | %% Plot
33 | figure(1); clf;
34 | stem( find(x0), x0(find(x0)), 'd' , 'markersize',10);
35 | hold all
36 | stem( find(x1), x1(find(x1)), 'o','MarkerFaceColor','r');
37 | stem( x2, '*' );
38 | set(gca,'fontsize',16)
39 | legend('Original','l1','l2','location','best');
40 | 
41 | %% Can we do this with a combinatorial algorithm? No
42 | % Assuming we know s
43 | nchoosek(N,s)  % # of permutations to try
44 | 
45 | % Make a list of all permutations... or not. Pretty slow!
46 | % This is even't account for the cost required per permutation
47 | tic;
48 | list = nchoosek( 1:N, 2 );
49 | toc
50 | 
51 | tic;
52 | list = nchoosek( 1:N, 3 );
53 | toc
54 | 
55 | tic;
56 | list = nchoosek( 1:N, 4 );
57 | toc
58 | 
59 | tic;
60 | list = nchoosek( 1:N, 5 );
61 | toc


--------------------------------------------------------------------------------
/Demos/demo13_EDM.mlx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Demos/demo13_EDM.mlx


--------------------------------------------------------------------------------
/Demos/demo13_EDM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Demos/demo13_EDM.pdf


--------------------------------------------------------------------------------
/Demos/demo14_MonteCarlo_and_improvements.m:
--------------------------------------------------------------------------------
  1 | %{
  2 | Discusses Monte Carlo in the context of integration:
  3 | 
  4 | - There are many ways to integrate functions
  5 | 
  6 | - Deterministic "quadrature" rules are fancy Riemann Sums, and
  7 |     will work *very well* if the integrand is smooth
  8 |     They break down when the integrand is highly oscillatory,
  9 |     and/or for high-dimensional integrals. Special versions targeted
 10 |     for oscillatory integrals is the subject of current applied math
 11 |     research.
 12 | 
 13 | - Monte Carlo integration interprets the integral as an expectation
 14 |     of a random variable, and draws samples to approximate the true mean
 15 |     with a sample mean.
 16 |     For a smooth function, Monte Carlo integration is a bad idea because
 17 |     classical quadrature rules are much, much better
 18 | 
 19 | - Monte Carlo is slow/inaccurate, but the inaccuracy is independent
 20 |     of the dimension of the integral. So for large enough dimensions, 
 21 |     it makes sense (while in large dimensions, making a deterministic
 22 |     grid is impossible since it will be too large)
 23 | 
 24 | - Since Monte Carlo is useful sometimes, there are many known techniques
 25 |     to make it better. We examine two:
 26 |         -- Quasi Monte Carlo, which uses low-discrepancy sequences, and
 27 |             inherits some of the advantages and disadvantages from 
 28 |             both Monte Carlo and grid/quadrature methods.
 29 |             Refs:
 30 |             - https://en.wikipedia.org/wiki/Low-discrepancy_sequence#Construction_of_low-discrepancy_sequences
 31 |             - "High-dimensional integration: The quasi-Monte Carlo way" by Dick, Kuo
 32 |             and Sloan (Acta Numerica, 2013)
 33 |         -- Control variates as a means of variance reduction
 34 |             Refs: 
 35 |             - https://en.wikipedia.org/wiki/Control_variates
 36 | 
 37 | Stephen Becker, University of Colorado, April 2019
 38 | %}
 39 | 
 40 | %% Integrate sin(x)/x from 0 to 1 (e.g. Si(1), Si is Sine Integral)
 41 | %{
 42 |     The sine integral, Si(z), is the integral of sin(x)/x from 0 to z
 43 |     where we define sin(0)/0 to be 0 (consistent with the limit)
 44 | 
 45 |     This integral is not known in closed form
 46 |     See https://en.wikipedia.org/wiki/Trigonometric_integral#Sine_integral
 47 | 
 48 |     How can we approximate it? There are specialized techniques that are
 49 |     faster and more accurate than what we will discuss here, but we'll
 50 |     treat it via the integral definition and try to numerically
 51 |     evaluate the integral.
 52 | %}
 53 | si  = sinint(1);       % get fairly accurate answer using Matlab's symbolic toolbox
 54 | f   = @(x) sinc(x/pi); % equivalent to sin(x)/x and f(0)=0
 55 | N   = 1e2+1; % keep it odd for my composite Simpson's to work
 56 | xgrid   = linspace(0,1,N);
 57 | dx      = xgrid(2)-xgrid(1);
 58 | fx      = f(xgrid);
 59 | composite_mid   = dx*sum(f(xgrid(2:end)-dx/2)); % open formula
 60 | composite_trap  = dx*( sum(fx) -fx(1)/2 - fx(end)/2 );
 61 | composite_simp  = dx/3*( fx(1)+fx(end)+ 4*sum(fx(2:2:end-1)) + 2*sum(fx(3:2:end-1)) );
 62 | si - composite_mid
 63 | si - composite_trap
 64 | si - composite_simp
 65 | 
 66 | %% 2a visualize discrepancy of random numbers on [0,1]
 67 | 
 68 | N   = 1e3;
 69 | setA    = sort(rand(N,1));
 70 | setB    = [.5*setA(1:2:end); .5 + .5*setA(2:2:end)];
 71 | 
 72 | figure(1); clf;
 73 | plot( setA, 'linewidth',2 ); hold all; plot( setB, 'linewidth',2 ); 
 74 | legend('uniform random','lower discrepancy');
 75 | line([0,N],[0,1],'linestyle','--','color','k');
 76 | %% more plots
 77 | figure(1); clf;
 78 | area( smooth(setA - linspace(0,1,N)') ); hold all
 79 | ar=area( smooth(setB - linspace(0,1,N)') ); line([0,N],[0,0],'color','k');
 80 | ar.FaceAlpha = 0.5; ar.FaceColor = 'r';
 81 | legend('uniform random','lower discrepancy');
 82 | %% ore plots
 83 | clf;
 84 | histogram( diff(setA) ); hold all
 85 | histogram( diff(setB) ); legend('uniform random','lower discrepancy');
 86 | title('Separation distances in random "grid"')
 87 | %% Try Monte Carlo evaluation of Si(1)
 88 | N   = 1e2;
 89 | setA    = sort(rand(N,1));
 90 | setB    = [.5*setA(1:2:end); .5 + .5*setA(2:2:end)];
 91 | 
 92 | int_MonteCarlo  = mean(f(setA));
 93 | int_QuasiMonteCarlo     = mean( f(setB) );
 94 | 
 95 | % Add in control variate
 96 | % Use sin(x)/x ~ 1 - x^2/6 (first part of Taylor series)
 97 | g   = @(x) 1 - x.^2/6;
 98 | % The integral (or mean/expectation) of g over [0,1] is:
 99 | int_g   = 17/18;
100 | % si - int_g  % already a fairly good approximation
101 | fx      = f(setA);
102 | gx      = g(setA);
103 | % Estimate covariance and variance of gx
104 | cv      = cov(fx,gx);
105 | c       = -cv(1,2)/cv(2,2); % estimated optimal control variate parameter
106 | int_ControlVariate  = int_MonteCarlo + c*(mean(gx)-int_g);
107 | int_ControlVariate_quasi  = int_QuasiMonteCarlo + c*(mean(g(setB))-int_g);
108 | fprintf('\nError is %10.3e for plain Monte Carlo\n',  si - int_MonteCarlo );
109 | fprintf('Error is %10.3e for Quasi Monte Carlo\n', si - int_QuasiMonteCarlo );
110 | fprintf('Error is %10.3e for 2nd order Taylor Series\n', si - int_g );
111 | fprintf('Error is %10.3e for Control-Variate Monte Carlo\n', si - int_ControlVariate );
112 | fprintf('Error is %10.3e for Control-Variate Quasi Monte Carlo\n', si - int_ControlVariate_quasi );
113 | fprintf('Error is %10.3e for quadrature (composite Trapezoidal Rule)\n',si - composite_trap);
114 | fprintf('Error is %10.3e for quadrature (composite Simposon''s Rule)\n',si - composite_simp);


--------------------------------------------------------------------------------
/Demos/demo15_SGD.m:
--------------------------------------------------------------------------------
  1 | %{
  2 | Stochastic Gradient Descent (SGD) and variants,
  3 |  demonstrated on the primal support vector machine (SVM) problem
  4 |  with MNIST data.
  5 | 
  6 | Primal SVM:
  7 | 
  8 | min_w  ||w||^2/2 + C \sum_i^N hinge( y_i w'*x_i )
  9 | where
 10 | hinge(a) = max( 0, 1 - a )
 11 | 
 12 | We will pick 2 classes from MNIST (not all 10)
 13 | If we pick class 0 and class 1, it's very easy (accuracy is > 99% in 1
 14 | step)
 15 | Try harder classes, like 4 and 9
 16 | 
 17 | Compare SGD with batch gradient descent,
 18 |  as well as
 19 |  - minibatch
 20 |  - SAGA (minibatch)
 21 |  - SVRG (minibatch)
 22 | 
 23 | It's all quite messy, since stepsizes are a big issue (and other
 24 | parameters)
 25 | 
 26 | %}
 27 | 
 28 | load('~/Google Drive/TeachingDocs/APPM5650_Fall21_Randomized/Code/mnist_data_all.mat');
 29 | percentCorrect = @(labels1,labels2) length(find(labels1==labels2))/length(labels1);
 30 | 
 31 | %% In prep for SVM, reduce to just two labels
 32 | A   = 4; B   = 9;       % harder to distinguish
 33 | % A   = 0; B   = 1;     % very easy to distinguish
 34 | 
 35 | indx   = (Train_labels==A) | (Train_labels==B);
 36 | % convert to -1, +1 labels
 37 | Train_labels_2class     = Train_labels( indx );
 38 | Train_labels_2class( Train_labels_2class==A ) = -1;
 39 | Train_labels_2class( Train_labels_2class==B ) = +1;
 40 | Train_2class            = Train( indx, : );
 41 | 
 42 | indx   = (Test_labels==A) | (Test_labels==B);
 43 | Test_labels_2class     = Test_labels( indx );
 44 | Test_labels_2class( Test_labels_2class==A ) = -1;
 45 | Test_labels_2class( Test_labels_2class==B ) = +1;
 46 | Test_2class            = Test( indx, : );
 47 | 
 48 | clear Test Test_labels Train Train_labels
 49 | 
 50 | hist(Train_labels_2class ) % make sure it looks OK, about equal
 51 | %% Plot hinge loss
 52 | x   = linspace(-3,3,40);
 53 | plot( x, max(0,1-x),'linewidth',2)
 54 | line([-3,3],[0,0],'linestyle','--','color','k');
 55 | line([0,0],[-1,4],'linestyle','--','color','k');
 56 | ylim([-1,4]); title('Hinge loss');
 57 | 
 58 | %% Try SVM... but apply to just two classes
 59 | % Pick the dataset
 60 | X       = Train_2class;
 61 | X       = [X,ones(size(X,1),1)]; % allow for an offset
 62 | y       = Train_labels_2class; % this is now -1, +1
 63 | 
 64 | yX      = bsxfun( @times, X, y );
 65 | [N,p]   = size( X );
 66 | 
 67 | C1      = 1e-2;  % constant for SVM model
 68 | C2      = 1/N;   % constant for SVM model
 69 | 
 70 | maxIts  = 5e2;
 71 | minibatch_ratio     = .1; % 10% sampling
 72 | minibatch_n         = round(minibatch_ratio*N);
 73 | 
 74 | ALGONAMES = {'Gradient Descent','SGD fixed stepsize','SGD decaying stepsize',...
 75 |     'SGD minibatch','SAGA minibatch','SVRG'};
 76 | errList       = zeros(length(ALGONAMES),maxIts,3); % 3 types of errors
 77 | for ALGO = 1:length(ALGONAMES)
 78 |     fprintf('\nAlgorithm: %s\n', ALGONAMES{ALGO} );
 79 |     
 80 |     w       = zeros(p,1); % our variable
 81 |     switch ALGO
 82 |         case {1,2}
 83 |             decay_gamma     = false;
 84 |         otherwise
 85 |             decay_gamma     = true;
 86 |     end
 87 |     switch ALGO % choose learning rate
 88 |         case {1,4}
 89 |             gamma   = 1e-5; % 1e-2 is too big
 90 |         case 5 % SAGA
 91 |             gamma   = 1e-4; % do it with minibatch
 92 |         case 6 % SVRG
 93 |             gamma   = 1e-6;
 94 |         otherwise
 95 |             gamma   = 1e-5;  % SGD needs smaller stepsize
 96 |     end
 97 |     
 98 |     for k = 1:maxIts
 99 |     
100 |         %  f(w) = C1 ||w||^2/2 + C2 ones(n,1)*hinge( diag(y)*X*w )
101 |         % where
102 |         % hinge(a) = max( 0, 1 - a )
103 |         % so d(hinge)/da = { -1 (a <= 1); 0 (a > 1) }
104 | 
105 |         % for SGD without minibatch, let's take more steps
106 |         extraStepsBase = 100;
107 |         if ALGO == 2 || ALGO == 3
108 |             extraSteps = extraStepsBase;
109 |         else
110 |             extraSteps = 1;
111 |         end
112 |         
113 |         for steps = 1:extraSteps
114 |         switch ALGO
115 |             case 1  % deterministic gradient descent (full batch)
116 |                 a   = yX*w;     % helper variable
117 |                 grad  = yX'*( -(a<=1) ); % full gradient step
118 |             case {2,3} % SGD, single draw
119 |                 ind     = randperm(N,1);
120 |                 a       = yX(ind,:)*w;     % helper variable
121 |                 grad    = N*yX(ind,:)'*( -(a<=1) );
122 |             case {4} % minibatch
123 |                 ind     = randperm(N,minibatch_n);
124 |                 a       = yX(ind,:)*w;     % helper variable
125 |                 grad      = (N/minibatch_n)*yX(ind,:)'*( -(a<=1) );
126 |             case {5} % SAGA
127 |                 if k==1
128 |                     % First iteration is special: make full pass through
129 |                     % data
130 |                     a       = yX*w;     % helper variable
131 |                     grad    = yX'*( -(a<=1) ); % full gradient step
132 |                     a_storage   = a; % store this
133 |                     grad_storage= grad;
134 |                 else
135 |                     %ind     = randperm(N,1);
136 |                     ind     = randperm(N,minibatch_n);
137 |                     a       = yX(ind,:)*w;     % helper variable
138 |                     grad_ind_new    = yX(ind,:)'*( -(a<=1) );
139 |                     
140 |                     % Combine:
141 |                     grad_ind_old  = yX(ind,:)'*( -(a_storage(ind)<=1) );
142 |                     grad    = N/minibatch_n*grad_ind_new - ...
143 |                         N/minibatch_n*grad_ind_old + grad_storage;
144 |                     % Update storage table:
145 |                     a_storage(ind)      = a;
146 |                     grad_storage        = grad_storage ...
147 |                         - grad_ind_old + grad_ind_new;
148 |                 end
149 |             case 6
150 |                 % SRVG                
151 |                 a   = yX*w;     % helper variable
152 |                 grad  = yX'*( -(a<=1) ); % full gradient step
153 |                 % Make a bunch of micro steps now
154 |                 z   = w;
155 |                 for kk = 1:50
156 |                     ind = randperm(N,round(N/minibatch_n));
157 |                     a_z     = yX(ind,:)*z;     % helper variable
158 |                     grad_z    = yX(ind,:)'*( -(a_z<=1) );
159 |                     z       = z - gamma*( C1*z + 1/minibatch_n*grad_z +C2*grad );
160 |                 end
161 |                 w   = z;
162 |                 
163 |         end
164 |         
165 |         if decay_gamma && ~mod( k, 50*extraSteps )
166 |             gamma   = gamma/2;
167 |         end
168 |     
169 |         % Combine to get full gradient, take gradient descent step
170 |         if ALGO ~= 6 % SVRG does its own update
171 |             w   = w - gamma*(C1*w + C2*grad );
172 |         end
173 |         end % end extraSteps
174 |         
175 |         % Record metrics:
176 |         % Cost function (expensive to calculate... for academic purposes)
177 |         Xw  = X*w;
178 |         f   = C1*norm(w)^2/2 + C2*sum( max(0,1-y.*Xw) );
179 |         % Percent correct (pc) for test/train
180 |         IDX_Train   = sign( Xw ); % no need to find best permutation
181 |         pc          = percentCorrect(IDX_Train,y);
182 |         IDX_Test    = sign( Test_2class*w(1:end-1) + w(end) ); % allow offset
183 |         pc_test     = percentCorrect( IDX_Test, Test_labels_2class); % already -1, +1
184 |         errList(ALGO,k,1) = f;
185 |         errList(ALGO,k,2) = pc;
186 |         errList(ALGO,k,3) = pc_test;
187 |         if ~mod( k, 25 )
188 |             fprintf('Iter %3d, train accuracy %.2f%%, test accuracy %.2f%%, objective %.2f\n', ...
189 |                 k, pc*100, pc_test*100, f );
190 |         end
191 |     end
192 | end
193 | %% Plot, x-axis is iteration (so misleading)
194 | figure(1); clf;
195 | % subplot(1,3,1)
196 | offset = min( min( errList(:,:,1) ) )-1e-5; 
197 | semilogy( errList(1,:,1) - offset, 'linewidth',2 )
198 | hold all
199 | % semilogy( errList(2,:,1) - offset, 'linewidth',2 )
200 | % semilogy( errList(3,:,1) - offset, 'linewidth',2 )
201 | semilogy( linspace(0,maxIts*extraStepsBase,maxIts), errList(2,:,1) - offset, 'linewidth',2 )
202 | semilogy( linspace(0,maxIts*extraStepsBase,maxIts), errList(3,:,1) - offset, 'linewidth',2 )
203 | 
204 | semilogy( errList(4,:,1) - offset, 'linewidth',2 )
205 | semilogy( errList(5,:,1) - offset, 'linewidth',2 )
206 | semilogy( errList(6,:,1) - offset, 'linewidth',2 )
207 | title('SVM Objective fuction - true value');
208 | ylabel('SVM Objective fuction - true value');
209 | xlabel('Iteration');
210 | % xlim([0,maxIts]);
211 | legend( ALGONAMES )
212 | %% Replot, with corrected x-axis (now epochs)
213 | figure(1); clf;
214 | % subplot(1,3,1)
215 | % plotFcn = @semilogy;
216 | plotFcn = @loglog;
217 | plotFcn( errList(1,:,1) - offset, 'linewidth',2 )
218 | hold all
219 | plotFcn( linspace(0,maxIts/N*extraStepsBase,maxIts), errList(2,:,1) - offset, 'linewidth',2 )
220 | plotFcn( linspace(0,maxIts/N*extraStepsBase,maxIts), errList(3,:,1) - offset, 'linewidth',2 )
221 | plotFcn( linspace(0,maxIts/minibatch_n,maxIts), errList(4,:,1) - offset, 'linewidth',2 )
222 | plotFcn( 1+linspace(0,maxIts/minibatch_n,maxIts), errList(5,:,1) - offset, 'linewidth',2 )
223 | plotFcn( 1:2:(2*maxIts), errList(6,:,1) - offset, 'linewidth',2 )
224 | title('SVM Objective fuction - true value');
225 | ylabel('SVM Objective fuction - true value');
226 | xlabel('Epoch');
227 | legend( ALGONAMES )
228 | % xlim([0,3]);
229 | 
230 | %% Look at miss-classification rate (x-axis is iteration, misleading)
231 | figure(1); clf;
232 | errMetric   = 2; % train
233 | % errMetric   = 3; % test
234 | % plotFcn = @semilogy;
235 | plotFcn = @plot;
236 | plotFcn( 1-errList(1,:,errMetric), 'linewidth',2 )
237 | hold all
238 | plotFcn( linspace(0,maxIts*extraStepsBase,maxIts), 1-errList(2,:,errMetric), 'linewidth',2 )
239 | plotFcn( linspace(0,maxIts*extraStepsBase,maxIts), 1-errList(3,:,errMetric), 'linewidth',2 )
240 | plotFcn( 1-errList(4,:,errMetric), 'linewidth',2 )
241 | plotFcn( 1-errList(5,:,errMetric), 'linewidth',2 )
242 | plotFcn( 1-errList(6,:,errMetric), 'linewidth',2 )
243 | title('Error, training data');
244 | % title('Error, testing data');
245 | ylabel('Missclassification rate');
246 | legend( ALGONAMES )
247 | xlabel('Iteration');
248 | ylim([0,.15]);
249 | xlim([0,maxIts]);
250 | %% Look at miss-classification rate, corrected axis
251 | figure(1); clf;
252 | % errMetric   = 2; % train
253 | errMetric   = 3;% test
254 | plotFcn = @loglog;
255 | plotFcn( 1-errList(1,:,errMetric), 'linewidth',2 )
256 | hold all
257 | plotFcn( linspace(0,maxIts/N*extraStepsBase,maxIts), 1-errList(2,:,errMetric), 'linewidth',2 )
258 | plotFcn( linspace(0,maxIts/N*extraStepsBase,maxIts), 1-errList(3,:,errMetric), 'linewidth',2 )
259 | plotFcn( linspace(0,maxIts/minibatch_n,maxIts), 1-errList(4,:,errMetric), 'linewidth',2 )
260 | plotFcn( 1+linspace(0,maxIts/minibatch_n,maxIts), 1-errList(5,:,errMetric), 'linewidth',2 )
261 | plotFcn( 1:2:(2*maxIts), 1-errList(6,:,errMetric), 'linewidth',2 )
262 | if errMetric == 2
263 |     title('Error, training data');
264 | elseif errMetric == 3
265 |     title('Error, testing data');
266 | end
267 | ylabel('Missclassification rate');
268 | legend( ALGONAMES )
269 | xlabel('Epoch');
270 | %% Visualize separating hyperplane
271 | clf;
272 | imagesc( reshape(w(1:end-1),28,28) )
273 | 


--------------------------------------------------------------------------------
/Demos/demo16_LSH.m:
--------------------------------------------------------------------------------
  1 | %{
  2 | Examples of Localitiy Sensitive Hashing (LSH):
  3 | 
  4 | (1) MinHash for Jaccard Similarity
  5 | (2) Euclidian Norm
  6 | (3) SimHash for Cosine distance
  7 | 
  8 | For MinHash, let's suppose we're motivated by hashing documents and then checking for their
  9 |  similarity, so we can check if there is plagiarism in a work
 10 | See https://www.bowdoin.edu/dean-of-students/judicial-board/academic-honesty-and-plagiarism/examples.html
 11 | for plagiarism examples
 12 | 
 13 | Stephen Becker
 14 | %}
 15 | 
 16 | %% Plagiarism Demo: First, convert some sentences/documents to nicer form
 17 | sentence = {};
 18 | sentence{1} = 'Only two years later, all these friendly Sioux were suddenly plunged into new conditions, including starvation, martial law on all their reservations, and constant urging by their friends and relations to join in warfare against the treacherous government that had kept faith with neither friend nor foe';
 19 | sentence{2} = 'Only two years later, all these nice Sioux were suddenly thrust into new types of conditions, including starvation, martial law on all their reservations, and constant urging by their friends and relations to join in warfare against the treacherous government that had kept faith with neither friend nor foe';
 20 | sentence{3} = 'In ages which have no record these islands were the home of millions of "Contrast the condition into which all these friendly Indians are suddenly plunged now, with their condition only two years previous: martial law now in force on all their reservations; themselves in danger of starvation, and constantly exposed to the influence of emissaries from their friends and relations, urging them to join in fighting this treacherous government that had kept faith with nobody--neither with friend nor with foe';
 21 | 
 22 | % Pre-process, and make k-shingles 
 23 | % (and usually you then has the k-shingles down further)
 24 | k   = 9;
 25 | 
 26 | % https://www.mathworks.com/matlabcentral/answers/3314-hash-function-for-matlab-struct
 27 | Engine = java.security.MessageDigest.getInstance('MD5');
 28 | for i = 1:3
 29 |     s   = sentence{i};
 30 |     s   = s( ~isspace( s ) & (s~=',') & (s~='"') ); % remove some characters
 31 |     vec = [];
 32 |     for start = 1:length(s)-k+1
 33 |         ss = s(start:start+k-1); % k-shingle
 34 |         % Now, hash it. Use MD5 or SHA since Java does that for me
 35 |         % Engine = java.security.MessageDigest.getInstance('MD5');
 36 |         Engine = java.security.MessageDigest.getInstance('SHA');
 37 |         Engine.update(typecast(uint16(ss), 'uint8'));
 38 |         hash    = Engine.digest; % 8 bits per (so 1 byte); keep a few of these
 39 |         hash    = uint16(typecast( hash(1:2), 'uint8' )); % and remove signs
 40 | %         h       = dec2bin( hash );
 41 | %         h       = uint16( bin2dec( h(:)' ) ); % we kept 2 bytes, so 16 bit
 42 |         % (Above line is slow, and it merges binary vectors in a funny way.
 43 |         % Better is this line below:)
 44 |         h       = 2^8*hash(1) + hash(2) + 1; % make it 1-based; be careful to make sure everything is uint16 not uint8 or you have overflow!
 45 |         vec     = [vec,h]; % append
 46 |         % sprintf('%.2x',double(typecast(hash, 'uint8'))) 
 47 |     end
 48 |     sentence{i}=vec;
 49 | end
 50 | % intmax('uint16') % max is 2^6 = 65k
 51 | %%
 52 | JaccardSim = @(A,B) length(intersect(A,B))/length(union(A,B));
 53 | disp( JaccardSim( sentence{1}, sentence{2} ) )
 54 | disp( JaccardSim( sentence{1}, sentence{3} ) )
 55 | disp( JaccardSim( sentence{2}, sentence{3} ) )
 56 | % for i = 1:3, disp(length(sentence{i})); end
 57 | %% Apply minhash (many of them), naive version
 58 | % Need a universe of all possible entries
 59 | % Either take union(...) or use max of uint...
 60 | L   = 10; % Number of hashes to draw (L=20 to visualize)
 61 | % L   = 1e3; % To check
 62 | MinHashSignatures = zeros(3,L);
 63 | for ell = 1:L
 64 |     P = randperm( intmax('uint16') ); % random permutation of 1, ..., 65k
 65 |     for i = 1:3
 66 |         MinHashSignatures(i,ell) = min(P(sentence{i}));
 67 |     end
 68 | end
 69 | if L <= 20
 70 |     disp(MinHashSignatures)
 71 | end
 72 | %% Check
 73 | for i = 1:3
 74 |     for j = (i+1):3
 75 |         prob    = sum( MinHashSignatures(i,:) == MinHashSignatures(j,:) )/L;
 76 |         fprintf('%d vs %d: JaccDiff is %.2f, %% hash collisions is %.2f\n', ...
 77 |             i,j,JaccardSim( sentence{i}, sentence{j} ), prob );
 78 |     end
 79 | end
 80 | 
 81 | %% To tune definition of "neighbors", make bands
 82 | % If documents match in *any* band, then declare them
 83 | %   a possible neighbor.
 84 | rng(0);
 85 | b   = 20; % # of bands
 86 | r   = 5;  % # hashes per band (if small, then more collisions)
 87 | L   = b*r;
 88 | MinHashSignatures = zeros(3,b);
 89 | for bi = 1:b
 90 |     temp = zeros(3,r);
 91 |     for ri = 1:r
 92 |         P = randperm( intmax('uint16') );
 93 |         for i = 1:3
 94 |             temp(i,ri) = min(P(sentence{i}));
 95 |         end
 96 |     end
 97 |     % For this band, we have r LSH hashes. Combine these r LSH hashes
 98 |     %   by... hashing them together!
 99 |     % (This last hash is not a LSH, it's a traditional one)
100 |     for i = 1:3
101 |         Engine = java.security.MessageDigest.getInstance('SHA');
102 |         Engine.update(typecast(uint16(temp(i,:)), 'uint8'));
103 |         hash    = Engine.digest;
104 |         hash    = uint16(typecast( hash(1:2), 'uint8' )); % remove signs
105 | %         h       = dec2bin( hash );
106 | %         h       = uint16( bin2dec( h(:)' ) );
107 |         h       = 2^8*hash(1) + hash(2) + 1; % make it 1-based
108 |         MinHashSignatures(i,bi) = h;
109 |     end
110 | end
111 | MinHashSignatures
112 | 
113 | 
114 | %% Try some other hashes, like Euclidean norm distance
115 | % Note: for this LSH, probability of collision isn't identically
116 | % proprtional to the Euclidean distance, but it is a valid LSH
117 | addpath ~/Repos/randomized-algorithm-class/Code/
118 | 
119 | rng(0);
120 | p   = 100;
121 | N   = 10;
122 | X   = randn(N/2,p);
123 | X   = [ X; X + .1*randn(N/2,p) ]; % so some correlated rows
124 | 
125 | Dist    = pdist2_faster( X, X,  'sqeuclidean' );
126 | 
127 | % Now, let's hash these...
128 | a       = .1; 
129 | 
130 | % Now, combine via banded strategy
131 | rng(0);
132 | b   = 20; % # of bands
133 | r   = 5;  % # hashes per band (if small, then more collisions)
134 | L   = b*r;
135 | Signatures = zeros(N,b);
136 | for bi = 1:b
137 |     temp = zeros(N,r);
138 |     for ri = 1:r
139 |         v   = randn(p,1); v     = v/norm(p); % random unit normal
140 |         bb  = a*rand(1);    % random offset, uniform in [0,a]
141 |         temp(:,ri)  = floor( (X*v + bb)/a );
142 |     end
143 |     % For this band, we have r LSH hashes. Combine these r LSH hashes
144 |     %   by... hashing them together!
145 |     % (This last hash is not a LSH, it's a traditional one)
146 |     for i = 1:N
147 |         Engine = java.security.MessageDigest.getInstance('SHA');
148 |         Engine.update(typecast(uint16(temp(i,:)), 'uint8'));
149 |         hash    = Engine.digest;
150 |         hash    = uint16(typecast( hash(1:2), 'uint8' )); % remove signs
151 | %         h       = dec2bin( hash );
152 | %         h       = uint16( bin2dec( h(:)' ) );
153 |         h       = 2^8*hash(1) + hash(2) + 1; % make it 1-based
154 |         Signatures(i,bi) = h;
155 |     end
156 | end
157 | Signatures
158 | 
159 | 
160 | %% Try some other hashes, like SimHash for cosine distances
161 | % For this one, chance of collision is directly proportional to distance
162 | 
163 | rng(0);
164 | p   = 100;
165 | % N   = 10;
166 | N   = 1e2;
167 | X   = randn(N/2,p);
168 | X   = [ X; X + .1*randn(N/2,p) ]; % so some correlated rows
169 | 
170 | % Look at cosine distances between all the points in X
171 | nrms    = sqrt( sum(X.^2,2) );
172 | cosDist = real( acos( X*X'./( nrms*nrms' ) ));
173 | 
174 | 
175 | % Check if we have collisions at a rate proportional to cosDist: yes!
176 | r   = 1e4;  % repeat it a lot to collect statistics
177 | CollisionFrequency    = zeros(N,N);
178 | temp = zeros(N,1);
179 | for ri = 1:r
180 |     v   = randn(p,1);
181 |     temp  = sign(X*v);
182 |     % This will be slow...
183 |     for i = 1:N
184 |         simInd  = find( temp == temp(i) );
185 |         CollisionFrequency(i,simInd)  = CollisionFrequency(i,simInd) + 1;
186 |     end
187 | end
188 | CollisionFrequency    = CollisionFrequency/r;
189 | TrueFrequency         = 1 - cosDist/pi;
190 | if N <= 10
191 |     disp( CollisionFrequency )
192 |     disp( TrueFrequency )
193 |     disp( CollisionFrequency - TrueFrequency )
194 | else
195 |     [TrueFreq_sorted, sort_ind] = sort( TrueFrequency(:) );
196 |     figure(1); clf;
197 |     scatter( TrueFreq_sorted(:), CollisionFrequency( sort_ind ),'r.' );
198 |     hold all
199 |     line( [0,1],[0,1],'linestyle','--','color','k')
200 |     xlabel('True cosine distance');
201 |     ylabel('Frequency of LSH collision');
202 | end
203 | 
204 | % We can also combine these in the same banding technique...


--------------------------------------------------------------------------------
/Demos/demo17_kNN_via_LSH.m:
--------------------------------------------------------------------------------
  1 | %{
  2 | unweighted k-Nearest Neighbors
  3 | 
  4 | Note: everything would probably be faster if we stored the data
  5 |  where columns are new data points, not rows, but we're sticking
  6 |  with the row convention since it's more common (and Matlab uses it
  7 |  for their functions, even though it goes against their natural
  8 |  data structure).
  9 | 
 10 | Compare also with Matlab's knnsearch implementation
 11 |   If dimension p < 10 then this can exploit a kd-tree at training time,
 12 |   but the complexity of that scales very poorly with dimension,
 13 |   so not applicable to MNIST without doing some dimensionality reduction.
 14 | 
 15 | Stephen Becker
 16 | %}
 17 | clc
 18 | 
 19 | addpath ~/Repos/randomized-algorithm-class/Code/
 20 | % Load MNIST data:
 21 | addpath('~/Google Drive/TeachingDocs/APPM4720_5720_Spring19_Randomized/Code');
 22 | load mnist_data_all
 23 | 
 24 | percentCorrect = @(labels1,labels2) length(find(labels1(:)==labels2(:)))/length(labels1(:));
 25 | 
 26 | rng(1);
 27 | % Try with various sizes to get an idea how it scales...
 28 | % test_subset     = randsample( 1e4, 1e3  );
 29 | test_subset     = randsample( 1e4, 16e2  );
 30 | 
 31 | TestPoints      = Test(test_subset,:);
 32 | TestLabels      = Test_labels( test_subset );
 33 | K               = 10;   % # of nearest neighbors to use
 34 | %% Try k-NN using true distances, so plain implementation
 35 | 
 36 | fprintf('\n-- Vanilla k-NN\n  Finding pairwise distances\n');
 37 | t1  = tic;
 38 | tic
 39 | % ind   = dsearchn( Train, TestPoints ); % Slow
 40 | % D     = pdist2( Train, TestPoints ); % Slow
 41 | % for 1e3 test points,takes 34 sec with pdist, or 2.87 with pdist_faster
 42 | D   = pdist2_faster( Train, TestPoints );
 43 | toc
 44 | fprintf('  Sorting those distances\n');
 45 | tic;
 46 | [~,Ind]   = sort(D); % per row, sort the columns
 47 | toc
 48 | 
 49 | fprintf('  Final processing\n'); % Find the labels of the neigbhors
 50 | labels      = Train_labels( Ind(1:K,:) );
 51 | prediction  = mode( labels, 1 );
 52 | pc          = percentCorrect( prediction, TestLabels );
 53 | fprintf('  Standard k-NN has %.1f%% accuracy\n', pc*100 );
 54 | 
 55 | t_plain     = toc(t1);
 56 | 
 57 | 
 58 | 
 59 | %% Do with Matlab's knn
 60 | % If p >= 10, it won't se a kd tree (see KDTreeSearcher)
 61 | % See "Classificiation Using Nearest Neighbors" help topic
 62 | % Mdl     = ExhaustiveSearcher( Train, 'Distance', 'seuclidean' );
 63 | % [ind2,dist_ind] = knnsearch(Mdl,TestPoints,'k',K); % gave NaNs
 64 | if size(TestPoints,1) < 500
 65 |     fprintf('\n-- Vanilla k-NN via Matlab''s implementation\n');
 66 |     t1 = tic;
 67 |     [idx,d]     = knnsearch( Train, TestPoints, 'K', K );
 68 |     toc(t1)
 69 |     
 70 |     labels  = zeros(size(TestLabels));
 71 |     t2=tic;
 72 |     for i = 1:size(TestPoints,1)
 73 |         labels(i)  = mode(Train_labels( idx(i,:) ));
 74 |     end
 75 |     toc(t2)
 76 |     t_Matlab = toc(t1);
 77 |     pc_Matlab          = percentCorrect( labels, TestLabels );
 78 |     fprintf('  k-NN via Matlab has %.1f%% accuracy\n', pc_Matlab*100 );
 79 | else
 80 |     pc_Matlab = nan;
 81 |     t_Matlab = Inf;
 82 | end
 83 | 
 84 | 
 85 | %% Do it with LSH and bands
 86 | % b = 1, r = 30 is very bad (very few neighbors, but lots of false
 87 | % negatives); better to increase b and decrease r
 88 | % (for Cosine vs Euclidean distances, parameters will vary
 89 | %  and for Euclidean distance, also work with "a" parameter)
 90 | b       = 15; % number of bands (decrease this to reduce # neighbors found)
 91 | r       = 3;  % hashes per band (increase this to reduce # neighbors found)
 92 | a       = 5e2; % controls fineness; check length( unique( Train_hashed(:,1) ) )
 93 | 
 94 | t1  = tic;
 95 | rng(1);
 96 | p       = size(Test,2);
 97 | fprintf('\n-- k-NN via LSH\n');
 98 | tic
 99 | 
100 | COSINE_DISTANCE = false;
101 | neighborList    = zeros( size(Train,1), length(TestLabels),'logical' );
102 | 
103 | for bi = 1:b
104 |     if COSINE_DISTANCE
105 |         % Cosine distance "SimHash"
106 |         Omega   = randn(p,r);
107 |         LSH     = @(X) sign((X-mean(X,2))*Omega);
108 |         
109 |         Train_hashed     = LSH(Train);
110 |         Test_hashed      = LSH(TestPoints);
111 |         
112 |         innProd       = Train_hashed*Test_hashed';
113 |         neighborList  = neighborList | (innProd==r); % binary "or"
114 |     else
115 |         % Euclidean distance hash
116 |         V   = randn(p,r);
117 |         bb  = a*rand(1,r);
118 |         LSH = @(X) floor( (X*V + bb )/a );
119 |         Train_hashed     = LSH(Train);
120 |         Test_hashed      = LSH(TestPoints);
121 | 
122 |         % Do group updates, assuming we only have a few hash values
123 |         universe = unique( Test_hashed );
124 |         tempList = zeros( size( neighborList), 'uint8' );
125 |         % Avoid "unique" call by using big matrix
126 |         for ri = 1:r
127 |             for val_i = 1:length(universe)
128 |                 val     = universe( val_i ); % bucket value
129 |                 ind_test    = find( Test_hashed(:,ri) == val );
130 |                 ind_train   = find( Train_hashed(:,ri) == val );
131 |                 tempList( ind_train, ind_test ) = tempList(ind_train, ind_test) + 1;
132 |             end
133 |         end
134 |         % Need to hash all ri things together (need them *all* to agree)
135 |         %  or, check when tempList == r
136 |         neighborList = neighborList | (tempList==r);
137 |         
138 |     end
139 | end
140 | toc
141 | 
142 | fprintf('  Reduced # of neighbors to %.1f%%\n', 100*nnz(neighborList)/numel(neighborList) );
143 | 
144 | labels  = zeros(size(TestLabels));
145 | tic
146 | for i = 1:size(TestPoints,1)
147 |     ind     = find( neighborList(:,i) );
148 |     [~,ind2] = sort( pdist2_faster( Train(ind,:), TestPoints(i,:) ) );
149 |     KK       = min( length(ind2),K );
150 |     labels(i)  = mode(Train_labels( ind(ind2(1:KK)) ));
151 | end
152 | toc
153 | pc_LSH          = percentCorrect( labels, TestLabels );
154 | fprintf('  LSH k-NN has %.1f%% accuracy\n', pc_LSH*100 );
155 | 
156 | t_LSH   = toc(t1);
157 | %% Overall
158 | fprintf('\n== SUMMARY ==\n %6d training points, %5.1f s via plain k-NN (%5.1f via Matlab''s), %5.1f s via LSH k-NN\n',...
159 |     size(TestPoints,1), t_plain,t_Matlab, t_LSH );
160 | fprintf('\tand respective accuracies: %.1f%%, %.1f%% and %.1f%%\n', pc*100, pc_Matlab*100, pc_LSH*100 );
161 | 
162 | 


--------------------------------------------------------------------------------
/Demos/demo18_names.m:
--------------------------------------------------------------------------------
  1 | %{
  2 | With what frequency do names occur in the US?
  3 | Use social security data
  4 | 
  5 | From https://www.ssa.gov/oact/babynames/limits.html
  6 | download https://www.ssa.gov/oact/babynames/names.zip
  7 | (about 9 MB)
  8 | 
  9 | Top 10 from 2017 are here: https://www.ssa.gov/oact/babynames/
 10 | 
 11 | 
 12 | This demo applies the CountMin sketch to estimate the frequency
 13 |  of occurrence of each name (using less memory than the straightforward
 14 |  data structure).
 15 |  Note: for both the "straightforward" data structure (which we call
 16 |  "fullData") as well as the sketch, we're really storing a hash table (I
 17 |  use the SHA hash, since it's easy to call via Matlab/Java),
 18 |  so I don't actually store the names themselves! But if you think of a
 19 |  name, we can then hash it, and check if it's in the table.
 20 | 
 21 |  Note: for the hash table, I use the first 2 bytes of the SHA hash,
 22 |  so about 65k unique buckets. There are at least 50,686 names in the
 23 |  database, so about 77% of buckets are occupied, so there are a lot
 24 |  of name collisions. This is bad! The fix is to use more bytes
 25 |  of the SHA hash, but I'm too lazy to implement that now.
 26 |  [Update: I fixed my laziness and used 2^20 buckets]
 27 | 
 28 |  Some of the hashes, the ones used in the sketch, only need pairwise
 29 |  independence, and you can do things faster than SHA and MD5
 30 |  e.g., if w is a prime number, then drawing a and b randomly
 31 |     from [0, w-1], the function h(x) = a*x + b (mod w)
 32 |     has the pairwise independence probability: the chance
 33 |     of two inputs colliding (over the randomness of choosing a and b)
 34 |     is 1/w. 
 35 | 
 36 | Stephen Becker, University of Colorado
 37 | 
 38 | Reference: 
 39 | Graham Cormode, ?http://dimacs.rutgers.edu/~graham/pubs/html/TalkSimons13.html 
 40 | and his monograph: "?Sketch techniques for approximate query processing"
 41 | 2011 (?http://www.cs.umass.edu/~mcgregor/711S12/sketches1.pdf)
 42 | %}
 43 | 
 44 | % nNameBuckets = intmax('uint16'); % too small
 45 | % nNameBuckets = intmax('uint32'); % too big
 46 | nNameBuckets    = 2^20;
 47 | fullData     = zeros(nNameBuckets,1);
 48 | fullDataNames     = cell(nNameBuckets,1);
 49 | if 2==exist('demo18_data.mat','file') %&& false
 50 |     load demo18_data
 51 |     fullData = full( fullDataSparse );
 52 | else
 53 |     tic
 54 |     fprintf('Reading in year     ');
 55 |     for yr = 1880:2017
 56 |         fprintf('\b\b\b\b%d',yr);
 57 |         prfx    = '~/Downloads/names';
 58 |         filename = fullfile(prfx, sprintf('yob%d.txt',yr) );
 59 |         fid     = fopen(filename);
 60 |         data    = textscan( fid, '%s%c%d','Delimiter',',');
 61 |         names   = data{1}; % data{2} is gender, 'M' or 'F'
 62 |         occurences  = data{3};
 63 |         for line = 1:length(names)
 64 |             Engine  = java.security.MessageDigest.getInstance('SHA');
 65 |             name    = lower( names{line} );
 66 |             Engine.update(typecast(uint16(name), 'uint8'));
 67 |             hash    = Engine.digest; % 8 bits per (so 1 byte); keep a few of these
 68 |             hash    = uint32(typecast( hash(1:3), 'uint8' )); % and remove signs
 69 |             %         h       = dec2bin( hash );
 70 |             %         % hash is 0 to nNameBuckets-1, so need a +1 offset
 71 |             %         h       = uint16( bin2dec( h(:)' ) ) + 1;  % NO, not quite
 72 |             %         right...
 73 |             h       = 2^16*hash(1) + 2^8*hash(2) + hash(3);
 74 |             h       = mod( h, nNameBuckets ) + 1;
 75 |             
 76 |             fullData(h) = fullData(h) + occurences(line);
 77 |             
 78 |             % Also, add the name to the list of names
 79 |             if isempty( fullDataNames{h} )
 80 |                 fullDataNames{h} = name;
 81 |             elseif isempty( strfind(fullDataNames{h},name) )
 82 |                 fullDataNames{h} = [fullDataNames{h},',',name];
 83 |             end
 84 |                 
 85 |             
 86 |         end
 87 |         
 88 |         fclose(fid);
 89 |     end
 90 |     fprintf('  finished.\n'); % take about 3.8 minutes
 91 |     toc
 92 |     fullDataSparse = sparse( fullData ); % compress from 8 MB to 1.4 MB; fullDataNames is 18 MB
 93 |     save demo18_data fullDataSparse fullDataNames % .mat file compresses it anyhow...
 94 | end
 95 | %%
 96 | fprintf('Found at least %d distinct names (maybe more, since could be collisions)\n', nnz(fullData) );
 97 | fprintf(' And there were %.1f million people in the dataset\n', sum(fullData)/1e6 );
 98 | % Find collisions this way:
 99 | fullDataCollisions     = zeros(nNameBuckets,1);
100 | for j = 1:nNameBuckets
101 |     if ~isempty( fullDataNames{j} )
102 |         str = fullDataNames{j};
103 |         fullDataCollisions(j) = length( strfind(str,',') ) + 1;
104 |     end
105 | end
106 | numberUniqueNames = sum( fullDataCollisions );
107 | fprintf(' Checking for collisions, we found exactly %d distinct names, so %d collisions\n', numberUniqueNames, numberUniqueNames-nnz(fullData) );
108 | collisionIndex = find( fullDataCollisions > 1 );
109 | fprintf(' For example, a few collisions:\n');
110 | fullDataNames{ collisionIndex(1:2) }
111 | 
112 | %% Warning...
113 | % Lot's of bugs because of datatypes, e.g.,
114 | % j = 981698; 
115 | % typecast(j, 'uint8')
116 | % typecast( uint32(j), 'uint8' )
117 | % The above two things are NOT the same!!
118 | 
119 | % Most bugs are hopefully fixed!
120 | 
121 | %% Try CountMin sketch
122 | % We could have applied this as we read in the data files, since
123 | %   it is a linear sketch, so easy to update. But since we have
124 | %   the full data anyhow, let's do it the easy way.
125 | d   = 7; % e.g., ceil( log2( 1/.01 ) ), so result holds with 99% chance;
126 | w   = 2^8;  % number of buckets per each hash
127 | C   = zeros( d, w );
128 |  
129 | for j = find( fullData )' % only loop over non-empty ones rather than for j = 1:nNameBuckets
130 |     Engine  = java.security.MessageDigest.getInstance('SHA');
131 |     Engine.update(typecast(uint32(j), 'uint8')); % uint16(j) is a bug (if j is too big)
132 |     L       = typecast( Engine.digest, 'uint8' );  % make it non-negative
133 |     for k = 1:d
134 |         ell     = L(k) + 1; % make it 1-based not 0-based indexing
135 |         C( k, ell ) = C( k, ell ) + fullData( j );
136 |     end
137 | end
138 | 
139 | %% And repeat, but with more buckets
140 | d2   = 7; 
141 | w2  = 2^12;
142 | C2  = zeros( d2, w2 );
143 | for j = find( fullData )'
144 |     Engine  = java.security.MessageDigest.getInstance('SHA');
145 |     Engine.update(typecast(uint32(j), 'uint8'));
146 |     L       = uint16(typecast( Engine.digest, 'uint8' ));
147 |     for k = 1:d2
148 |         ell     = L(2*k-1)*2^8 + L(2*k);
149 |         ell     = mod( ell, w2 ) + 1;
150 |         C2( k, ell ) = C2( k, ell ) + fullData( j );
151 |     end
152 | end
153 | %% Difference in sizes:
154 | fprintf('Full data has %d entries\n', length(fullData) );
155 | fprintf('  CountMin structure has %d = %d x %d entries, so %.1fx compression\n', ...
156 |     d*w, d, w, length(fullData)/(d*w) );
157 | fprintf('  and more accurate CountMin structure has %d = %d x %d entries, so %.1fx compression\n', ...
158 |     d*w2, d2, w2, length(fullData)/(d2*w2) );
159 | 
160 | % More accurate estimate of compression ratio
161 | fullDataSparse = sparse( fullData );
162 | stat    = whos('fullData'); b1  = stat.bytes;
163 | stat    = whos('fullDataSparse'); b2  = stat.bytes;
164 | stat    = whos('C'); b3  = stat.bytes;
165 | stat    = whos('C2'); b4  = stat.bytes;
166 | kB      = 1/1024;
167 | fprintf('Naive: %.1f kB, compressed naive: %.1f kB, CountMin: %.1f kB, CountMin v2: %.1f kB\n', ...
168 |     b1*kB, b2*kB, b3*kB, b4*kB );
169 | fprintf('  So compression ratios %.1fx, %.1fx, %.1fx, %.1fx (relative to naive)\n', ...
170 |     b1/b1, b1/b2, b1/b3, b1/b4 );
171 | fprintf('  ...compression ratios %.1fx, %.1fx, %.1fx, %.1fx (relative to compressed naive)\n', ...
172 |     b2/b1, b2/b2, b2/b3, b2/b4 );
173 | %% Now, try it out
174 | name    = 'james'; % Most popular name in database
175 | % name    = 'alexander';
176 | % name    = 'samantha';
177 | % name    = 'john';
178 | % name    = 'hendrix'; % works
179 | % name    = 'sophie';
180 | % name    = 'sophia'; 
181 | % name    = 'marta';
182 | % name    = 'abigayll';
183 | % name    = 'Isabella';
184 | % name    = 'ryan';
185 | % name    = 'padraig';
186 | % name    = 'kathryn'; % known collision
187 | % name    = 'fortino';
188 | % name    = 'tomasz'; % known collision
189 | % name    = 'stephen';
190 | 
191 | totalNames  = sum( fullData );
192 | %totalNames  = sum( C(:) )/d; % equivalent
193 | % sum(C,2) - totalNames  % sanity check for debugging purposes
194 | 
195 | % Figure out the index j
196 | Engine  = java.security.MessageDigest.getInstance('SHA');
197 | Engine.update(typecast(uint16(lower(name)), 'uint8'));
198 | hash    = Engine.digest; % 8 bits per (so 1 byte); keep a few of these
199 | hash    = uint32(typecast( hash(1:3), 'uint8' )); % and remove signs
200 | j       = mod( 2^16*hash(1) + 2^8*hash(2) + hash(3), nNameBuckets ) + 1;
201 | 
202 | 
203 | 
204 | % And now try the CountMin sketch
205 | Engine  = java.security.MessageDigest.getInstance('SHA');
206 | Engine.update(typecast(uint32(j), 'uint8'));
207 | L       = uint16(typecast( Engine.digest, 'uint8' ));
208 | c       = Inf;
209 | c2      = Inf;
210 | for k = 1:d
211 |     ell     = L(k) + 1; % make it 1-based not 0-based indexing
212 |     c       = min( [c, C( k, ell )] );
213 | end
214 | for k = 1:d2
215 |     ell2    = L(2*k-1)*2^8 + L(2*k);
216 |     ell2    = mod( ell2, w2 ) + 1;
217 |     c2      = min( [c2, C2( k, ell2 )] );
218 | end
219 | fprintf('\nName: %s\n', name);
220 | fprintf('True frequency is\t%.5f%%\n', 100*fullData(j)/totalNames );
221 | fprintf('Estimated frequency is\t%.5f%% (with CountMin sketch)\n', 100*c/totalNames );
222 | fprintf('Estimated frequency is\t%.5f%% (with larger CountMin sketch)\n', 100*c2/totalNames );
223 | if fullDataCollisions(j) > 1
224 |     fprintf(' Careful! There were other names with hash collisions:  ');
225 |     disp( fullDataNames{j} );
226 | elseif fullDataCollisions(j) == 0
227 |     fprintf(' Careful! This name was not in database, we''re getting only noise\n');
228 | end


--------------------------------------------------------------------------------
/Demos/demo19_AMS_sketch_vs_JL.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | Short demonstration of the AMS sketch
 3 | AMS is named for the authors of this paper:
 4 |  "The space complexity of approximating the frequency moments" (N. Alon, Y. Matias, and M. Szegedy, STOC 1996)
 5 | 
 6 | There have been many updates, improvements to this sketch over the years.
 7 | I'm following *some* of the improvements, mainly as described
 8 | in Cormode's 2013 lecture http://dimacs.rutgers.edu/~graham/pubs/html/TalkSimons13.html
 9 | but see also Cormode's 2011 monograph for more precise statements,
10 |  "Sketch techniques for approximate query processing" 
11 |   Foundations and Trends in Databases
12 |   http://www.cs.umass.edu/~mcgregor/711S12/sketches1.pd
13 | 
14 | 
15 | Stephen Becker
16 | This code uses the AMS_sketch.m code, in the ../Code subdirectory
17 |     on this same github repo.
18 | %}
19 | rng(0);
20 | addpath ~/Repos/randomized-algorithm-class/Code/
21 | 
22 | p   = 1e4;
23 | n   = 1e2;
24 | w   = 2^10;
25 | d   = 7;
26 | 
27 | X   = randn(p,n);
28 | % Y   = randn(p,n);
29 | % C   = AMS_sketch( X, w, d );    % calls several count sketches
30 | % CY  = AMS_sketch( Y, w, d );
31 | % C2  = AMS_sketch( X - 3.14*Y, w, d );
32 | % norm( (C-3.14*CY) - C2, 'fro' ) % confirm linearity of sketch
33 | 
34 | if false
35 |     addpath('~/Google Drive/TeachingDocs/APPM4720_5720_Spring19_Randomized/Code');
36 |     load mnist_data_all
37 |     X   = Train';
38 |     Xt  = Train;
39 |     d   = 7;
40 |     m   = 20;
41 |     n   = size(X,2);
42 | end
43 | 
44 | columnNorms     = @(X) sqrt( sum(X.^2) );
45 | %%
46 | saltSeed    = 1;
47 | tic
48 | if n > 1e4
49 |     C   = AMS_sketch( Xt, w, d, 'transposedX', true ); % calls several count sketches
50 | else
51 |     C   = AMS_sketch( X, w, d ); % calls several count sketches
52 | end
53 | toc
54 | %%
55 | cNorms  = zeros(d,n);
56 | for k = 1:d
57 |     CC  = C( (1+(k-1)*w):k*w, :);
58 |     cNorms(k,:)     = columnNorms( CC );
59 | end
60 | cNormEstimate   = median( cNorms, 1 );
61 | cNormEstimate_variant   = sqrt( mean( cNorms.^2, 1 ) ); % e.g., w<-- w*d, d<-- 1
62 | %%
63 | figure(1); clf;
64 | scatter( columnNorms(X), cNormEstimate, 'o' )
65 | hold all
66 | scatter( columnNorms(X), cNormEstimate_variant, 'x' )
67 | line( [96,104],[96,104] );
68 | axis equal % make it dramatic!
69 | legend('Median of rows','Mean of rows');
70 | %%
71 | figure(1); clf;
72 | histogram( cNormEstimate./columnNorms(X), 'binwidth',.01 )
73 | hold all
74 | histogram( cNormEstimate_variant./columnNorms(X), 'binwidth',.01 )
75 | legend('Median of rows','Mean of rows');
76 | 
77 | 


--------------------------------------------------------------------------------
/Demos/demo20_CoreSets_for_Kmeans.m:
--------------------------------------------------------------------------------
  1 | %{
  2 | Following review paper of:
  3 | "?Practical Coreset Constructions for Machine Learning"
  4 | by Bachem, Lucic, Krause 2017; ?http://arxiv.org/abs/1703.06476
  5 | %}
  6 | 
  7 | % Needs pdist2_faster, kmeansPlusPlus, hungarian, bestMap, mnist_data_all.mat
  8 | addpath ~/Repos/randomized-algorithm-class/Code/
  9 | addpath('~/Google Drive/TeachingDocs/APPM4720_5720_Spring19_Randomized/Code');
 10 | load mnist_data_all
 11 | percentCorrect = @(labels1,labels2) length(find(labels1==labels2))/length(labels1);
 12 | %%
 13 | p           = size(Train,2);
 14 | K           = 10; % ask for 10 labels
 15 | 
 16 | ALGO_NAMES  = {'Kmeans','Kmeans++','Kmeans-Coresets-uniform','Kmeans-Coresets'};
 17 | [TrainError,TestError,Timing]  = deal(zeros(length(ALGO_NAMES),1));
 18 | 
 19 | ALGO        = 1;
 20 | tic
 21 | [IDX_Train, ClusterCenters]    = kmeans( Train, K );
 22 | Timing(ALGO) = toc;
 23 | [~,IDX_Test   ]     = pdist2_faster(ClusterCenters,Test,'squaredeuclidean','smallest',1);
 24 | IDX_Train_permuted  = bestMap( Train_labels, IDX_Train );
 25 | IDX_Test_permuted   = bestMap( Test_labels, IDX_Test );
 26 | TrainError(ALGO) = percentCorrect(IDX_Train_permuted,Train_labels);
 27 | TestError(ALGO)  = percentCorrect(IDX_Test_permuted,Test_labels);
 28 | %% Use K-means++
 29 | ALGO        = 2;
 30 | tic
 31 | ClusterCenters = kmeansPlusPlus( Train, K );
 32 | Timing(ALGO) = toc;
 33 | [Dist_Kpp,IDX_Train_Kpp   ] = pdist2_faster(ClusterCenters,Train,'squaredeuclidean','smallest',1);
 34 | [~,IDX_Test_Kpp   ]  = pdist2_faster(ClusterCenters,Test,'squaredeuclidean','smallest',1);
 35 | 
 36 | IDX_Train_permuted  = bestMap( Train_labels, IDX_Train_Kpp );
 37 | IDX_Test_permuted   = bestMap( Test_labels, IDX_Test_Kpp );
 38 | 
 39 | TrainError(ALGO) = percentCorrect(IDX_Train_permuted,Train_labels);
 40 | TestError(ALGO)  = percentCorrect(IDX_Test_permuted,Test_labels);
 41 | 
 42 | %% Use K-means to get a core-set
 43 | N   = size( Train, 1 );
 44 | M   = round( N/100 ); % size of the core-set
 45 | % The naive/uniform coreset works fine if M is about N/10
 46 | %   but if we start sub-sampling further, e.g., N/100,
 47 | %   then the fancier weighted coreset starts to show improvement
 48 | %   (a worst-case improvement; on some random samples, it works fine)
 49 | alpha   = 16*(log(K)+2);
 50 | clusterAvgDist  = zeros(K,1);
 51 | c               = mean( Dist_Kpp );
 52 | weights         = zeros(N,1);
 53 | weights         = weights + alpha*Dist_Kpp'/c;
 54 | for k = 1:K
 55 |     ind         = find( IDX_Train_Kpp == k );
 56 |     clusterSize     = length( ind );
 57 |     ci          = mean( Dist_Kpp( ind ) );
 58 |     weights( ind ) = weights( ind ) + 2*alpha*ci/(c*clusterSize) + 4*N/clusterSize;
 59 | end
 60 | weights     = weights/sum(weights);
 61 | histogram( weights );
 62 | 
 63 | naive_coreset   = randsample( N, M ); % uniform weights
 64 | coreset         = randsample( N, M, true, weights );
 65 | % Now, to really do core-sets, we also need to update the weights for each
 66 | % entry that is sampled, e.g., before, it was implicitly 1/N
 67 | %   Now, it's now 1/M, but rather 1/(M*N*weights)
 68 | %   Not sure how to do that with Lloyd's algorithm much less
 69 | %   having to write our own kmeans script, so just ignore...
 70 | %% Now, re-run Kmeans on these sampled data
 71 | 
 72 | ALGO        = 3;
 73 | tic
 74 | [~, ClusterCenters]    = kmeans( Train(naive_coreset,:), K );
 75 | Timing(ALGO) = toc;
 76 | [~,IDX_Train  ]     = pdist2_faster(ClusterCenters,Train,'squaredeuclidean','smallest',1);
 77 | [~,IDX_Test   ]     = pdist2_faster(ClusterCenters,Test,'squaredeuclidean','smallest',1);
 78 | IDX_Train_permuted  = bestMap( Train_labels, IDX_Train );
 79 | IDX_Test_permuted   = bestMap( Test_labels, IDX_Test );
 80 | TrainError(ALGO)    = percentCorrect(IDX_Train_permuted,Train_labels);
 81 | TestError(ALGO)     = percentCorrect(IDX_Test_permuted,Test_labels);
 82 | 
 83 | ALGO        = 4;
 84 | tic
 85 | [~, ClusterCenters]    = kmeans( Train(coreset,:), K );
 86 | Timing(ALGO) = toc;
 87 | [~,IDX_Train  ]     = pdist2_faster(ClusterCenters,Train,'squaredeuclidean','smallest',1);
 88 | [~,IDX_Test   ]     = pdist2_faster(ClusterCenters,Test,'squaredeuclidean','smallest',1);
 89 | IDX_Train_permuted  = bestMap( Train_labels, IDX_Train );
 90 | IDX_Test_permuted   = bestMap( Test_labels, IDX_Test );
 91 | TrainError(ALGO)    = percentCorrect(IDX_Train_permuted,Train_labels);
 92 | TestError(ALGO)     = percentCorrect(IDX_Test_permuted,Test_labels);
 93 | 
 94 | %% Print out results
 95 | for ALGO = 1:4
 96 |     fprintf('Training error, %23s: %.2f\n', ALGO_NAMES{ALGO}, TrainError(ALGO) );
 97 | end
 98 | fprintf('\n');
 99 | for ALGO = 1:4
100 |     fprintf('Test error, %23s: %.2f\n', ALGO_NAMES{ALGO}, TestError(ALGO) );
101 | end
102 | fprintf('\n');
103 | for ALGO = 1:2
104 |     fprintf('Timing, %23s: %.2f sec\n', ALGO_NAMES{ALGO}, Timing(ALGO) );
105 | end
106 | for ALGO = 3:4
107 |     fprintf('Timing, %23s: %.2f sec = %.2f + %.2f\n', ALGO_NAMES{ALGO},...
108 |         Timing(2)+Timing(ALGO),Timing(ALGO),Timing(2) );
109 | end
110 | fprintf('Coresets used M=%d (of %d possible, so %.1f%%) points\n', ...
111 |     M, N, 100*M/N );


--------------------------------------------------------------------------------
/Demos/vignette-rsvd.jl:
--------------------------------------------------------------------------------
 1 | #=
 2 | Algorithm RSVD.
 3 | given a matrix A \in \reals^{m \times n}, a target rank k and an oversampling
 4 |       parameter p (e.g., p = 10 is a good choice).
 5 | stage A. Find an approximate range.
 6 |     1. form an n \times (k+p) Gaussian random matrix G.
 7 |     2. form the sample matrix Y = AG.
 8 |     3. orthonormalize the columns of Y via a QR factorization.
 9 | stage B. Form a specific factorization.
10 |     4. form the (k+p) \times n matrix B = Q'A.
11 |     5. form the SVD of the (small) matrix B as B = \hat{U} D V'.
12 |     6. form U = Q \hat{U}.
13 | return matrices U, D, and V as an approximate rank (k+p) SVD of A.
14 | =#
15 | using Random, LinearAlgebra, Plots, Statistics
16 | # ---------------------------------------------------------------------------- #
17 | function vignette_rsvd()
18 | rng = Random.seed!(2);              # set seed for reproducibility
19 | n_sims = 10;                        # number of simulations
20 | n_subs = 25;                        # number of subsamples
21 | m = 2000;                           # rows of matrix
22 | n = 20*ceil(log(m));                # columns of matrix
23 | # n_sims = 50; n_subs = 25; m = 1000; n = 15*ceil(log(m)); # alt. run parameters
24 | # n_sims = 1;  n_subs = 25; m = 5000; n = 15*ceil(log(m)); # alt. run parameters
25 | k = ceil(log(m));                   # target rank
26 | p = max(ceil(log(m)), 10);          # oversampling parameter
27 | n = Int16(n); k = Int16(k); p = Int16(p);   # convert floats to integers
28 | # ---------------------------------------------------------------------------- #
29 | times = zeros(n_sims, 2);           # bookkeeping for run times
30 | fro_mean = zeros(n_sims, n_subs);   # bookkeeping for mean Frobenius norm
31 | op_mean = zeros(n_sims, n_subs);    # bookkeeping for mean operator norm
32 | norm_bound = zeros(n_sims, 2);      # bookkeeping for theoretrical norm
33 | fro_lo = zeros(n_sims, 1);          # bookkeeping for Frobenius norm bound
34 | op_lo = zeros(n_sims, 1);           # bookkeeping for operator norm bound
35 | # ---------------------------------------------------------------------------- #
36 | for j in 1:n_sims
37 |     # "data" matrix
38 |     A = [2*ones(m,2)+rand(m,2) randn(m, k-2) 0.01*randn(m, n-k)]/sqrt(m);
39 |     # NOTE: the matrix A has k columns that lead to "important" singular values.
40 |     # The remaing n-k columns correspond to fast-decaying singular values.
41 |     for i in 1:n_subs
42 |         # stage A of randomized SVD
43 |         G = randn(n, k+p);                              # Gaussian random matrix
44 |         Y = A*G;                                        # sample matrix
45 |         F = qr(Y); Q = Matrix(F.Q);                     # orthonormalize Y
46 |         # stage B of randomized SVD
47 |         B = Q'*A;                                       # form small matrix
48 |         U_B, D_B, V_B = svd(B);                         # SVD of B
49 |         U = Q*U_B;                                      # rank k matrix
50 |         # bookkeeping and comparisons
51 |         U_A, D_A, V_A = svd(A);                         # SVD of A
52 |         fro_mean[j,i] = norm(A - Q*Q'*A, 2);
53 |         op_mean[j,i] = opnorm(A - Q*Q'*A);
54 |     end # end of inner simulation loop (i.e., simulation for a fixed matrix A)
55 |     fro_lo[j,1] = sum( D_A[k+1:min(m,n)].^2 )^0.5;
56 |     op_lo[j,1] =  D_A[k+1];
57 |     norm_bound[j, 1] = (1 + k / (p-1))^(0.5) * fro_lo[j,1];     # Frobenius
58 |     norm_bound[j, 2] = (1 + sqrt(k / (p-1))) * op_lo[j,1] +     # operator
59 |                             exp(1) * (sqrt(k+p) / p) * fro_lo[j,1];
60 | end # end of outer simulation loop
61 | # ---------------------------------------------------------------------------- #
62 | # plot showing singular value decay
63 | p3 = plot(D_A, yscale = :log10, linecolor = :blue,
64 |             marker = :circle, markercolor = :blue, label = "full",
65 |             title = "sing. value decay (final sim.)");
66 | p4 = plot(D_A[1:k+p], yscale = :log10, linecolor = :blue,
67 |             marker = :circle, markercolor = :blue, label = "full",
68 |             title = "sing. value comparison (final sim.)")
69 | plot!(D_B, yscale = :log10, linecolor = :red, linestyle = :dash,
70 |             marker = :x, markerstrokecolor = :red, label = "randomized");
71 | 
72 | # ---------------------------------------------------------------------------- #
73 | # plots of average Frobenius and operator norms vs. theoretical bounds
74 | p1 = plot(norm_bound[:,1],linecolor = :blue, marker = :circle,
75 |             markercolor = :blue, label = "upper",
76 |             title = "E-Y bounds: Frobenius");
77 | plot!(mean(fro_mean, dims = 2),linecolor = :red, linestyle = :dash,
78 |      marker = :x, markerstrokecolor = :red, label = "mean");
79 |      plot!(fro_lo, linecolor = :blue, linestyle = :dot,
80 |            marker = :star8, markercolor = :blue, label = "lower",
81 |            legend = :bottomright);
82 | p2 = plot(norm_bound[:,2],linecolor = :blue, marker = :circle,
83 |             markercolor = :blue, label = "upper",
84 |             title = "E-Y bounds: operator");
85 | plot!(mean(op_mean, dims = 2),linecolor = :red, linestyle = :dash,
86 |      marker = :x, markerstrokecolor = :red, label = "mean");
87 | plot!(op_lo,linecolor = :blue, linestyle = :dot,
88 |       marker = :star8, markercolor = :blue, label = "lower",
89 |       legend = :bottomright);
90 | # return summary plot as "output" of the function
91 | plot(p3, p4, p1, p2, layout=(2,2))
92 | end # end of function
93 | # ---------------------------------------------------------------------------- #
94 | vignette_rsvd()
95 | 


--------------------------------------------------------------------------------
/Demos/vignette_rsvd.m:
--------------------------------------------------------------------------------
 1 | function vignette_rsvd
 2 | %{
 3 | Vignette - randomized singular value decomposition (RSVD).
 4 | APPM 4720/5720 Randomized Algorithms, Spring 2019
 5 | 
 6 | Demo to illustrate a simple RSVD based on the two-stage approach of Martinsson. 
 7 | In particular, see section four of "Randomized Methods for Matrix Computations"
 8 | by P.G. Martinsson as appearing in "The Mathematics of Data" for details and 
 9 | additional analysis. An updated version of the survey paper can be found at:
10 | https://arxiv.org/pdf/1607.01649.pdf. (Again, see section four, pages 8-9.)
11 | 
12 | Algorithm RSVD.
13 | given a matrix A \in \reals^{m \times n}, a target rank k and an oversampling 
14 |       parameter p (e.g., p = 10 is a good choice).
15 | stage A. Find an approximate range.
16 |     1. form an n \times (k+p) Gaussian random matrix G.
17 |     2. form the sample matrix Y = AG.
18 |     3. orthonormalize the columns of Y via a QR factorization.
19 | stage B. Form a specific factorization.
20 |     4. form the (k+p) \times n matrix B = Q'A.
21 |     5. form the SVD of the (small) matrix B as B = \hat{U} D V'.
22 |     6. form U = Q \hat{U}.
23 | return matrices U, D, and V as an approximate rank (k+p) SVD of A.
24 | %}
25 | % ---------------------------------------------------------------------------- %
26 | rng(2);                             % set seed for reproducibility
27 | n_sims = 5;                         % number of simulations
28 | n_subs = 25;                        % number of subsamples
29 | m = 2000;                           % rows of matrix
30 | % n_sims = 50; n_subs = 25; m = 1000;         % alt. run parameters
31 | % n_sims = 1;  n_subs = 25; m = 5000;         % alt. run parameters
32 | n = 15*ceil(log(m));                % columns of matrix
33 | k = ceil(log(m));                   % target rank
34 | p = max([ceil(log(m)) 10]);         % oversampling parameter
35 | % ---------------------------------------------------------------------------- %
36 | times = zeros(n_sims, 2);           % bookkeeping for run times
37 | fro_mean = zeros(n_sims, n_subs);   % bookkeeping for mean Frobenius norm
38 | op_mean = zeros(n_sims, n_subs);    % bookkeeping for mean operator norm
39 | norm_bound = zeros(n_sims, 2);      % bookkeeping for theoretrical norm
40 | fro_lo = zeros(n_sims, 1);          % bookkeeping for Frobenius norm bound
41 | op_lo = zeros(n_sims, 1);           % bookkeeping for operator norm bound
42 | % ---------------------------------------------------------------------------- %
43 | for j = 1:n_sims
44 |     A = [2+rand(m, 2) randn(m, k-2) 0.01*randn(m, n-k)]/sqrt(m); % "data" matrix
45 |     % NOTE: the matrix A has k columns that lead to "important" singular values.
46 |     % The remaing n-k columns correspond to fast-decaying singular values.
47 |     for i = 1:n_subs
48 |         % stage A of randomized SVD
49 |         G = randn(n, k+p);                              % Gaussian random matrix
50 |         Y = A*G;                                        % sample matrix
51 |         [Q ~] = qr(Y,0);                                % orthonormalize Y
52 |         % stage B of randomized SVD
53 |         B = Q'*A;                                       % form small matrix
54 |         tic; [U_B, D_B, V_B] = svd(B, 'econ'); times(j, 1) = toc;   % SVD of B
55 |         U = Q*U_B;                                      % rank k matrix
56 |         % bookkeeping and comparisons
57 |         tic; [U_A, D_A, V_A] = svd(A, 'econ'); times(j, 2) = toc;   % SVD of A
58 |         fro_mean(j,i) = norm(A - Q*Q'*A, 'fro');
59 |         op_mean(j,i) = norm(A - Q*Q'*A);
60 |     end % end of inner simulation loop (i.e., simulation for a fixed matrix A)
61 |     fro_lo(j, 1) = sum(diag(D_A(k+1:min([m,n]),k+1:min([m,n]))).^2)^0.5;
62 |     op_lo(j, 1) =  D_A(k+1,k+1);
63 |     norm_bound(j, 1) = (1 + k / (p-1))^(0.5) * fro_lo(j, 1);     % Frobenius 
64 |     norm_bound(j, 2) = (1 + sqrt(k / (p-1))) * op_lo(j, 1) + ... % operator
65 |                             exp(1) * (sqrt(k+p) / p) * fro_lo(j, 1);
66 | end % end of outer simulation loop
67 | % ---------------------------------------------------------------------------- %
68 | figure; % plot showing singular value decay
69 | semilogy(diag(D_A), '-bo'), hold on, semilogy(diag(D_B), '--xr'), hold off;
70 | title('singular value comparison/decay (final simulation)'); 
71 | legend('full','randomized')
72 | % ---------------------------------------------------------------------------- %
73 | figure; subplot(2,1,1); % plots comparing computation times for SVDs
74 | semilogy(times(:,1), '--xr'), hold on, semilogy(times(:,2), '-bo'), hold off;
75 | legend('randomized', 'full', 'Location', 'east')
76 | title('SVD timing comparison')
77 | subplot(2,1,2); 
78 | plot(times(:,2) ./ times(:,1), '-ks');
79 | title('ratio of SVD times'); 
80 | legend('full-to-randomized', 'Location', 'southeast');
81 | % ---------------------------------------------------------------------------- %
82 | figure; % plots of average Frobenius and operator norms vs. theoretical bounds
83 | subplot(1,2,1);
84 | plot(norm_bound(:,1),'-ob'), hold on, plot(mean(fro_mean, 2),'--xr');
85 |         plot(fro_lo, ':*b'); ylim([0 max(max(norm_bound))*1.1]); hold off; 
86 |         title('Eckhart-Young bounds: Frobenius')
87 |         legend('upper','mean','lower','Location','southoutside')
88 | subplot(1,2,2);
89 | plot(norm_bound(:,2),'-ob'), hold on, plot(mean(op_mean, 2),'--xr');
90 |         plot(op_lo, ':*b'); ylim([0 max(max(norm_bound))*1.1]); hold off; 
91 |         title('E-Y bounds: operator')
92 |         legend('upper','mean','lower','Location','southoutside')
93 | end % end of function
94 | % ---------------------------------------------------------------------------- %


--------------------------------------------------------------------------------
/Handouts/Linear_algebra_notes_matrices.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Handouts/Linear_algebra_notes_matrices.pdf


--------------------------------------------------------------------------------
/Handouts/README.md:
--------------------------------------------------------------------------------
1 | # Handouts
2 | 
3 | For APPM 5650 Randomized Algorithms, Fall 2021.
4 | 
5 | 
6 | - [Linear algebra facts, focusing on matrix decompositions](Linear_algebra_notes_matrices.pdf).  The source code is on [overleaf](https://www.overleaf.com/read/yprqvktvsxgb). If you have something to add or fix, email Stephen.
7 | 


--------------------------------------------------------------------------------
/Handouts/SamplingLecture_Sept29_2021.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Handouts/SamplingLecture_Sept29_2021.pdf


--------------------------------------------------------------------------------
/Homeworks/APPM5650Fall21_RandomizedAlgos_HW01.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW01.pdf


--------------------------------------------------------------------------------
/Homeworks/APPM5650Fall21_RandomizedAlgos_HW02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW02.pdf


--------------------------------------------------------------------------------
/Homeworks/APPM5650Fall21_RandomizedAlgos_HW03.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW03.pdf


--------------------------------------------------------------------------------
/Homeworks/APPM5650Fall21_RandomizedAlgos_HW04.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW04.pdf


--------------------------------------------------------------------------------
/Homeworks/APPM5650Fall21_RandomizedAlgos_HW05.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW05.pdf


--------------------------------------------------------------------------------
/Homeworks/APPM5650Fall21_RandomizedAlgos_HW06.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW06.pdf


--------------------------------------------------------------------------------
/Homeworks/APPM5650Fall21_RandomizedAlgos_HW07.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW07.pdf


--------------------------------------------------------------------------------
/Homeworks/APPM5650Fall21_RandomizedAlgos_HW08.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW08.pdf


--------------------------------------------------------------------------------
/Homeworks/APPM5650Fall21_RandomizedAlgos_HW09.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW09.pdf


--------------------------------------------------------------------------------
/Homeworks/APPM5650Fall21_RandomizedAlgos_HW10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW10.pdf


--------------------------------------------------------------------------------
/Homeworks/ProjectInformation.md:
--------------------------------------------------------------------------------
 1 | # APPM 5650 Randomized Algorithms Final Project
 2 | 
 3 | - The project is due at the beginning of class on Monday Dec 6, 2021.  The last two days of class (Mon Dec 6 and Wed Dec 8) will be used for presentations
 4 | - The project consists of a 10 minute presentation and a short paper. There is no strict length requirement for the paper, but aim for 4 to 6 pages (including figures). Paper lengths are a guideline, as I’m aware you can add figures, code, adjust white space and font to make it longer/shorter.
 5 | - If you really wanted to, you could write a longer, much more detailed paper and skip the presentation
 6 | - On Monday Dec 6, we will determine the presentation order. You must be ready to present that day in case you are chosen.
 7 | - You are encouraged to form groups; group sizes can be 1 (single person), 2 or 3. No larger please.
 8 | - By Friday Dec 3, email me to let me know your group, so I know how many groups there are; this will help me schedule the presentations.
 9 | - The project is 25% of your overall class grade. It cannot be dropped.
10 | - Deliverables:
11 |   - Written paper (typeset; Latex is suggested but not required), turned in via Canvas
12 |   - 10 minute talk
13 |   - Slides for the talk (turned in via Canvas)
14 | - More details:
15 |   - Class participation is extra important these days, as a sign of respect to your fellow students.  You lose 10% of your project grade if you do not attend any of these last 2 days (unless you have a valid reason, like international travel, and contacted me about it beforehand).
16 |   - You also lose 10% of your project grade if you are not ready to present on the day when you're scheduled to speak.
17 | ## What is a valid project?
18 | - The project can be theoretical or computational (or both)
19 | - One option is a "traditional" class-project, where you investigate an idea, and/or run simulations or do derivations or proofs, and/or connect several different ideas (e.g., create new ideas, though the originality/impact obviously does not have to rise to the level of a journal publication);
20 | - You're encouraged to pursue an area related to your research interests
21 | - Specific journal articles are a good starting point
22 |   - You can reproduce their results
23 |   - You can compare several methods
24 |   - You can apply a method to a new problem or area
25 |   - You can tweak a method
26 |   - You can redo theoretical derivations more clearly (or with more details)
27 |   - You can do a "book-report style"”" analysis of a paper, critical evaluating it (think of it as a peer-review for an article)
28 | - Your results do not have to be novel; you do not need to write a journal quality paper!
29 | - You'll want to related your project to something you learned in the class (see Rubric item #2)
30 | - When in doubt about whether a project idea is valid, you can always email the instructor and ask!
31 | - You can see the project titles from previous years at the [Student projects](https://github.com/stephenbeckr/randomized-algorithm-class#student-projects) part of the main README file in the parent github directory.
32 | ## Rubric
33 | Because the type of report is a bit open-ended, the rubric below is necessarily a bit vague:
34 | 1. Valid/interesting project (25%), and point of project is clear. For example, for an independent investigation, the problem you are trying to solve is explained and motivated and non-trivial. For a book-report style project, presenting on a paper, you need some kind of thesis (e.g., “This paper shows the power of this approach...”) and not just a summary, and explain why you chose that paper. For reproducing the results of a paper (computationally or analytically by going through a proof in extra detail), explain why you chose the paper, and why you are interested in their results (are they amazing results? do you distrust them? do they nicely illustrate concepts from class)?
35 | 
36 | 2. Relate the project to a concept from class (25%). **Your project must include a paragraph describing how it involves concepts learned in class.**
37 | 
38 | 3. Insightful discussion (25%). You should discuss/analyze your results, and/or validate a conclusion. For a paper review, you should discuss the strengths and weaknesses of the paper. For a project that involves generating your own results, the quality of the actual work is included in this category.
39 | 
40 | 4. Professional communication (25%) of the written document and the oral presentation (and the slides). Well-organized and precise communication, grammatically correct writing, nicely format- ted documents and figures. Figures should be labeled appropriately.
41 | 
42 | Here is a [more detailed rubric](ProjectRubric.pdf) that I will actually use for grading the projects.
43 | 


--------------------------------------------------------------------------------
/Homeworks/ProjectRubric.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/ProjectRubric.pdf


--------------------------------------------------------------------------------
/Homeworks/README.md:
--------------------------------------------------------------------------------
 1 | # Homeworks
 2 | 
 3 | For APPM 5650 Randomized Algorithms, Fall 2021.
 4 | 
 5 | HW solutions are on Canvas
 6 | 
 7 | - [HW 1](APPM5650Fall21_RandomizedAlgos_HW01.pdf), due Friday Aug 27, 2021 (topics: read some of Mahoney, Martinsson and Tropp, and Vershynin)
 8 | - [HW 2](APPM5650Fall21_RandomizedAlgos_HW02.pdf), due Monday Sep 6, 2021 (topics: linear algebra, sparse matrices, Freivald's algorithm, random orthogonal matrices). Turn this in via Gradescope since there's no class Monday (Labor Day)
 9 | - [HW 3](APPM5650Fall21_RandomizedAlgos_HW03.pdf), due Monday Sep 13, 2021 (topics: linear algebra, basis probability, account on [CU's research computing](https://www.colorado.edu/rc).
10 | - [HW 4](APPM5650Fall21_RandomizedAlgos_HW04.pdf), due Monday Sep 20, 2021 (topics: large data files, random projections vs tSNE)
11 | - [HW 5](APPM5650Fall21_RandomizedAlgos_HW05.pdf), due Monday Sep 27, 2021 (topics: random projections)
12 | - [HW 6](APPM5650Fall21_RandomizedAlgos_HW06.pdf), due Monday Oct 4, 2021 (topics: different types of sampling without replacement)
13 | - [HW 7](APPM5650Fall21_RandomizedAlgos_HW07.pdf), due Monday Oct 11, 2021 (topics: least squares)
14 | - [HW 8](APPM5650Fall21_RandomizedAlgos_HW08.pdf), due Monday Oct 18, 2021 (topics: entry-wise sampling)
15 | - [HW 9](APPM5650Fall21_RandomizedAlgos_HW09.pdf), due Monday Oct 25, 2021 (topics: randomized K-means clustering)
16 | - [HW 10](APPM5650Fall21_RandomizedAlgos_HW10.pdf), due Monday Nov 1, 2021 (topics: randomized SVD). Last Homework
17 | - [Project information](ProjectInformation.md). The project is due Monday Dec 6 2021, and we'll have presentations the last two days of class (Dec 6 and 8).
18 | 
19 | # Turning in HW
20 | While the class meets in person, please turn in **hard-copies** of your homework in class.
21 | 
22 | If we end up meeting remotely due to COVID, then we will switch to gradescope.  As of now, we are *not using Gradescope*.
23 | 
24 | 
25 | ## FAQ for electronic submissions
26 | 
27 | ### General
28 | 
29 | **Gradescope** has a [submission guide](https://gradescope-static-assets.s3.amazonaws.com/help/submitting_hw_guide.pdf) that recommends software for your phone to take pictures of written homework and convert it to a PDF (your final submission to Gradescope must be a PDF).
30 | 
31 | Note: the links in the PDFs will not work if you view the PDF on github, but if you open the PDF in its own tab, or download it, all the links should work.
32 | 
33 | **Collaboration**: Collaboration with your fellow students is OK and in fact recommended, although direct copying is not allowed.  Please write down the names of the students that you worked with.
34 | 
35 | **Internet**: The internet is allowed for basic tasks (e.g., looking up definitions on wikipedia) but it is
36 | not permissible to search for proofs or to post requests for help on forums such as [math.stackexchange.com](http://math.stackexchange.com/)
37 | or to look at solution manuals
38 | 
39 | #### Merging multiple PDF files
40 | 
41 | **Mac** You can use the Preview software that comes with Mac, and drag-and-drop in the Thumbnail view, or follow these [instructions](https://support.apple.com/en-us/HT202945).
42 | 
43 | **Linux** install `pdftk` (e.g., `apt-get install pdftk`), and the on the command line, it's just `pdftk inputFile1.pdf inputFile2.pdf cat output outputFileName.pdf`.  This works on Mac and Windows too (on Mac, the exact command line works; on Windows, I'm not sure).
44 | 
45 | **Windows** there are [lists of free web- and desktop-based software](https://superuser.com/a/34294), but [PDFtk](https://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/) is one of the most classic and respected (no viruses). I haven't used PDFtk on Windows, but the website claims they have a GUI; or if you don't like their GUI, try a [3rd party GUI that uses PDFtk](https://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/).
46 | 
47 | ### Python
48 | For overall Python, and numpy in particular, Matlab users might like [NumPy for Matlab users](https://numpy.org/doc/stable/user/numpy-for-matlab-users.html).
49 | 
50 | For **plotting** in Python using Matplotlib, try these [plotting cheatsheets](https://github.com/matplotlib/cheatsheets) and [controlling figure aesthetics](https://seaborn.pydata.org/tutorial/aesthetics.html) with seaborn.
51 | 
52 | 
53 | #### Jupyter
54 | 
55 | Tips for exporting jupyter notebook code to a PDF:
56 | 
57 | - You can try this [Notebook to PDF conversion website](https://htmtopdf.herokuapp.com/ipynbviewer/) that some of our students have had good luck with
58 | 
59 | - Or try `nbconvert` which requires [`pandoc`](https://pandoc.org/installing.html). You can do this on [Colab](https://colab.research.google.com/), following the [instructions here](https://stackoverflow.com/a/54191922) (but note that you may need to add a backslash before any white space when you run commands, e.g., change a command like
60 | 
61 | `!cp drive/My Drive/Colab Notebooks/Untitled.ipynb ./`
62 | to
63 | ``!cp drive/My\ Drive/Colab\ Notebooks/Untitled.ipynb ./``
64 | )
65 | 
66 | Note that if you include latex in the jupyter notebook, when you run `nbconvert`, you cannot have any whitespace near the `\$` symbols for math due to a requirement of `pandoc` (see [here](https://pandoc.org/MANUAL.html#extension-tex_math_dollars)).  So, ``$ f(x) = 3x^2 $`` will not work, but `$f(x) = 3x^2$` will be OK.
67 | 
68 | The downside of `nbconvert` is that images are saved as png, not pdf, so fonts don't come through, but that's not a big deal for homework.
69 | 
70 | If you run jupyter locally, you might be able to run `nbconvert` without using the command line; go to "Download" the "PDF via LaTeX".
71 | 
72 | #### Python source code (not Jupyter)
73 | The *non-preferred* ways are (1) screenshot of your editor (not so nice since it's an image not text, but at least you get syntax color highlighting), and (2) export from a text editor to PDF (not so nice if you don't get syntax color highlighting).
74 | 
75 | It's not nice to the graders to submit code without syntax color highlighting!
76 | 
77 | Better ways: it depends on your system and editor, but there are many ways. For example, this [stackoverflow 'printing python code to PDF'](https://stackoverflow.com/q/20412038) offers several suggestions. For example, since I already use `vim` and its setup with syntax highlighting, I can do [this answer](https://stackoverflow.com/a/20412421) and do `vim abc.py -c ":hardcopy > abc.ps" -c ":q"` followed by `ps2pdf abc.ps abc.pdf` -- no extra software needed!
78 | 
79 | 
80 | ### Matlab
81 | 
82 | You can use the export notebook features in Matlab (it can handle latex) if you want; see the [Live-Editor](https://www.mathworks.com/products/matlab/live-editor.html); there are also claims on the internet that it's easy to get Jupyter to run with a Matlab kernel, so you could use Jupyter.
83 | 
84 | To just export a figure, there are builtin methods, but one of the nicer ways is to use [export_fig](https://www.mathworks.com/matlabcentral/fileexchange/23629-export_fig), which works like `export_fig MyFileName -pdf -transparent` and makes a file `MyFileName.pdf` (note that PDF files for figures are preferred, since then the text is saved as a font and not bitmapped)
85 | 


--------------------------------------------------------------------------------
/Homeworks/custom_headers.tex:
--------------------------------------------------------------------------------
  1 | % Some commonly used latex settings. Stephen Becker, July 2013
  2 | \pagestyle{plain}
  3 | %--------------
  4 | \newtheorem{theorem}{Theorem}[section]
  5 | \newtheorem{lemma}[theorem]{Lemma}
  6 | \newtheorem{corollary}[theorem]{Corollary}
  7 | \newtheorem{proposition}[theorem]{Proposition}
  8 | \newtheorem{definition}[theorem]{Definition}
  9 | \newtheorem{conjecture}[theorem]{Conjecture}
 10 | \newtheorem{problem}[theorem]{Problem} 
 11 | \newtheorem{fact}[theorem]{Fact} % added Jan 2014
 12 | \newtheorem{assumption}[theorem]{Assumption} % added Jan 2014
 13 | \newtheorem{remark}[theorem]{Remark}
 14 | \newtheorem{remarks}[subsection]{Remarks}
 15 | \newtheorem{example}[subsection]{Example}
 16 | %\newtheorem{example}[theorem]{Example}
 17 | %\floatname{algorithm}{Listing}
 18 | %\numberwithin{equation}{section}  % For now, commenting this out since I do NOT want eq numbers like (0.1)
 19 | % Theorems
 20 | %\newtheorem{theorem}{Theorem}
 21 | %\newtheorem{lemma}{Lemma}
 22 | %\newtheorem{remark}{Remark}
 23 | %\newtheorem{corollary}{Corollary}%[section]
 24 | %\newtheorem{proposition}{Proposition}%[section]
 25 | %\newtheorem{definition}{Definition}%[section] % number this the same as theorem and lemma
 26 | 
 27 | 
 28 | %% commenting
 29 | % Affect margins:
 30 | %\setlength{\marginparwidth}{1.2in}
 31 | \setlength{\marginparwidth}{.8in}
 32 | \let\oldmarginpar\marginpar
 33 | \renewcommand\marginpar[1]{\-\oldmarginpar[\raggedleft\footnotesize #1]%
 34 | {\raggedright\footnotesize #1}}
 35 | 
 36 | % macros for the outline
 37 | \newcommand{\todo}{{\bf \textcolor{red}{TODO} }}
 38 | \newcommand{\TODO}[1]{{\bf TODO: #1}}
 39 | \newcommand{\red}{\textcolor{red}}
 40 | \newcommand{\note}[1]{{\bf [{\em Note:} #1]}}
 41 | 
 42 | % Editing commands
 43 | \newcommand{\fix}[1]{\textcolor{red}{#1}}
 44 | \usepackage[normalem]{ulem} % for sout, not needed for final version
 45 | \newcommand{\add}[1]{\textcolor{blue}{#1}}
 46 | \newcommand{\new}[1]{\textcolor{blue}{#1}} % synonym
 47 | \newcommand{\del}[1]{{\color{Bittersweet}\sout{#1}}}
 48 | \newcommand{\remove}[1]{{\color{Bittersweet}\sout{#1}}} % synonym
 49 | 
 50 | % Better, use e.g., (with comma)
 51 | %\newcommand\eg{e.g.\xspace}
 52 | %\newcommand\ie{i.e.\xspace}
 53 | 
 54 | 
 55 | 
 56 | 
 57 | 
 58 | \newcommand{\Id}{\text{\em I}}
 59 | \newcommand{\OpId}{\mathcal{I}}
 60 | 
 61 | % -- Operators --
 62 | %First of all, one must of course recall that \operatorname and \DeclareMathOperator are provided by the amsopn package, which is automatically loaded by amsmath, but is also available standalone
 63 | \DeclareMathOperator{\dom}{dom} 
 64 | \DeclareMathOperator{\vect}{vec}            % vec(X) = X(:) in matlab notation
 65 | \DeclareMathOperator{\VEC}{vec}            % vec(X) = X(:) in matlab notation
 66 | \DeclareMathOperator{\mat}{mat}             % mat(x) = reshape(x,N,N)
 67 | \DeclareMathOperator{\prox}{prox}           
 68 | \DeclareMathOperator{\tr}{trace}
 69 | \DeclareMathOperator{\logdet}{log det}
 70 | %\newcommand{\sgn}{\textrm{sgn}}
 71 | %\newcommand{\sign}{\textrm{sgn}}  or instead \operatorname
 72 | \DeclareMathOperator{\shr}{shrink}
 73 | \DeclareMathOperator{\shrink}{shrink}
 74 | \DeclareMathOperator{\trunc}{trunc}
 75 | \DeclareMathOperator{\range}{range}
 76 | \DeclareMathOperator{\rank}{rank}
 77 | \DeclareMathOperator{\diag}{diag}
 78 | \DeclareMathOperator{\trace}{trace}
 79 | \DeclareMathOperator{\supp}{supp}
 80 | \DeclareMathOperator*{\argmax}{argmax}        % puts subscripts in the right place
 81 | \DeclareMathOperator*{\argmin}{argmin}
 82 | \DeclareMathOperator*{\minimize}{minimize}
 83 | \DeclareMathOperator*{\maximize}{maximize}
 84 | % -- Misc --
 85 | \newcommand\thalf{{\textstyle\frac{1}{2}}}
 86 | \newcommand{\eps}{\varepsilon}
 87 | \newcommand{\e}{\mathrm{e}}
 88 | \renewcommand{\i}{\imath}
 89 | %\newcommand{\bmat}[1]{\begin{bmatrix} #1 \end{bmatrix}}
 90 | \newcommand{\smax}{\sigma_{\max}}
 91 | \newcommand{\smin}{\sigma_{\min}}
 92 | %\newcommand{\T}{*}                           % (see also \transp, \adj below)
 93 | \newcommand{\T}{T}                            % for the adjoint/transpose
 94 | \newcommand{\transp}{T}
 95 | \newcommand{\adj}{*}
 96 | \newcommand{\psinv}{\dagger}
 97 | % -- Mathbb --
 98 | \newcommand{\R}{\mathbb{R}}
 99 | \newcommand{\RR}{\mathbb{R}}
100 | \newcommand{\Rn}{\R^{n}}
101 | \newcommand{\Rmn}{\R^{m \times n}}
102 | \newcommand{\Rnn}{\R^{n \times n}}
103 | \newcommand{\Rmm}{\R^{m \times m}}
104 | \newcommand{\C}{\mathbb{C}}
105 | \newcommand{\Z}{\mathbb{Z}}
106 | \newcommand{\HH}{\mathcal{H}}                  % for Hilbert space (\H already defined).
107 | \newcommand{\EE}{\operatorname{\mathbb{E}}}   % for probability and expectations
108 | \newcommand{\E}{\operatorname{\mathbb{E}}} % is operatorname necessary?
109 | \renewcommand{\P}{\operatorname{\mathbb{P}}}  % for probability
110 | % -- Mathcal --
111 | \newcommand{\id}{\mathcal{I}} % identity operator
112 | \newcommand{\AAA}{\ensuremath{\mathcal{A}}}   % generic linear operator
113 | \newcommand{\cA}{\ensuremath{\mathcal{A}}}    % generic linear operator
114 | \newcommand{\K}{\ensuremath{\mathcal{K}}}     % cone
115 | \newcommand{\cK}{\ensuremath{\mathcal{K}}}    % cone
116 | \newcommand{\proj}{\ensuremath{\mathcal{P}}}  % Projection
117 | \newcommand{\PP}{\operatorname{\mathcal{P}}}  % for projections
118 | \newcommand{\lag}{\ensuremath{\mathcal{L}}}   % Lagrangian
119 | \renewcommand{\L}{{\mathcal L}}
120 | \newcommand{\N}{{\mathcal{N}}}                % for normal N(0,1) variables...
121 | \newcommand{\order}{\mathcal{O}}              % big O notation
122 | % -- Text shortcuts --
123 | \newcommand{\st}{\ensuremath{\;\text{such that}\;}}
124 | %\newcommand{\st}{\text{subject to}}
125 | \newcommand{\gs}{g_\text{sm}}             % smooth part of dual objective
126 | 
127 | 
128 | %  -- To get the ones vector to look nice (without using the bbold package)
129 | \newcommand{\bbfamily}{\fontencoding{U}\fontfamily{bbold}\selectfont}
130 | \newcommand{\textbb}[1]{{\bbfamily#1}}
131 | \DeclareMathAlphabet{\mathbbb}{U}{bbold}{m}{n}
132 | \newcommand{\ones}{\mathbbb 1}                % ones vector 
133 | 
134 | % -- For := type stuff --
135 | %\newcommand{\defeq}{\mathrel{\mathop:}=}      % for definitions, e.g. z := y + 3
136 | %\newcommand{\defeq}{\triangleq}               %   another alternative
137 | %\newcommand{\defeq}{\equiv}                   %   another alternative
138 | \newcommand{\defeq}{\stackrel{\text{\tiny def}}{=}}  %   another alternative
139 | %\newcommand{\defeq}{\stackrel{\text{\tiny def}}{\hbox{\equalsfill}}}  % another alternative, doesn't work
140 | 
141 | 
142 | % -- Inner products and norms --
143 | \newcommand{\<}{\langle}
144 | \renewcommand{\>}{\rangle}
145 | \newcommand{\restrict}[1]{\big\vert_{#1}}
146 | % If using < x | y > or { x | x < 0 }
147 | %http://tex.stackexchange.com/questions/498/mid-vertical-bar-vert-lvert-rvert-divides
148 | %use \mid not | (bar, bracket) for inner products and such.
149 | %\newcommand{\iprod}[2]{\left\langle #1 , #2 \right\rangle}
150 | \newcommand{\iprod}[2]{\left\langle #1,\,#2 \right\rangle}
151 | \newcommand{\iprodMed}[2]{\Bigl\langle #1 , #2 \Bigr\rangle}
152 | \newcommand{\scal}[2]{\left\langle{#1},\,{#2}\right\rangle}
153 | \newcommand{\norm}[1]{{\left\lVert{#1}\right\rVert}}
154 | \newcommand{\dist}[2]{\left\| #1 - #2 \right\|_2}
155 | \newcommand{\vectornormbig}[1]{\big\|#1\big\|}
156 | \newcommand{\vectornormmed}[1]{\big\|#1\big\|}
157 | 
158 | 
159 | % Linear algebra macros
160 | %\newcommand{\vct}[1]{\bm{#1}}
161 | %\newcommand{\mtx}[1]{\bm{#1}}
162 | \newcommand{\vct}[1]{{#1}}
163 | \newcommand{\mtx}[1]{{#1}}
164 | %\newcommand{\mtx}[1]{\mathsfsl{#1}}
165 | \renewcommand{\vec}[1]{{\boldsymbol{#1}}}
166 | 
167 | 
168 | 
169 | % -- use amsthm instead --
170 | %\def \endprf{\hfill {\vrule height6pt width6pt depth0pt}\medskip}
171 | %\newenvironment{proof}{\noindent {\bf Proof} }{\endprf\par}
172 | %\newcommand{\qed}{{\unskip\nobreak\hfil\penalty50\hskip2em\vadjust{}
173 |            %\nobreak\hfil$\Box$\parfillskip=0pt\finalhyphendemerits=0\par}}
174 | 
175 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Stephen Becker
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Randomized Algorithm Class, APPM/STAT 5650, Fall 2021
 2 | Randomized algorithm class at CU Boulder, Fall 2021, [Professor Becker](http://amath.colorado.edu/faculty/becker/)
 3 | 
 4 | NOTE: Fall 2021, this is APPM/STAT 5650.  These course materials were created Spring 2019 when the same class was taught (also by Stephen Becker) under the special topics designation APPM 4720/5720; an earlier version of the special topics course was taught by Prof. Gunnar Martinsson.
 5 | 
 6 | The course meets MWF	10:20 AM - 11:10 AM	ECCR 257 "Newton Lab". The current plan is for the course to meet in person, assuming COVID-19 levels remain reasonable in Boulder.
 7 | 
 8 | The actual topics we covered, and links to references, are on this [google sheet](https://docs.google.com/spreadsheets/d/1z2yT99o8nCiotU0OZbrmmk0kAjff5iUDhKo3fpRVORA/edit?usp=sharing).  See below for a high-level list of what we covered. There was no single textbook for the class (and no standard set of notes).
 9 | 
10 | This git repo contains things like code demos used in class. Most of the code is in Matlab; if any students want to translate demos to other languages and then push them, just make a pull request
11 | - [Demos](Demos/)
12 | - [Homeworks](Homeworks/) (homework solutions and code are on the private Canvas website)
13 | - [Syllabus](syllabus.md), which replaces the [OLD Class policies, etc., for Spring 2019](APPM4720_5720_Spr2019_Syllabus.pdf)
14 |   - The syllabus has details on class policies, grading, textbooks and resources, topics covered, etc.
15 | 
16 | Other material (grades, HW solutions) are in our LMS [Canvas](https://canvas.colorado.edu/courses/76997)
17 | 
18 | # Student projects
19 | - Spring 2019. Here is a [PDF showing a brief summary of the end-of-semester student projects from Spring '19](SlideshowAllPresentations_4720Spr19_Randomized.pdf).  If you're interested in the Shazam-like song matching algorithm (using locality sensitive hashing), their code is at [Padraig's github site](https://github.com/Lysandr/minHash_Shazam)
20 | 
21 | [![image for spring 2019](SlideshowAllPresentations_4720Spr19_Randomized.jpeg)](SlideshowAllPresentations_4720Spr19_Randomized.pdf)
22 | 
23 | - Fall 2021:
24 | 
25 | [![image for fall 2021](SlideshowAllPresentations_5650_Fall21.jpg)](SlideshowAllPresentations_5650_Fall21.pdf)
26 | 


--------------------------------------------------------------------------------
/SlideshowAllPresentations_4720Spr19_Randomized.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/SlideshowAllPresentations_4720Spr19_Randomized.jpeg


--------------------------------------------------------------------------------
/SlideshowAllPresentations_4720Spr19_Randomized.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/SlideshowAllPresentations_4720Spr19_Randomized.pdf


--------------------------------------------------------------------------------
/SlideshowAllPresentations_5650_Fall21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/SlideshowAllPresentations_5650_Fall21.jpg


--------------------------------------------------------------------------------
/SlideshowAllPresentations_5650_Fall21.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/SlideshowAllPresentations_5650_Fall21.pdf


--------------------------------------------------------------------------------