├── .gitignore ├── APPM4720_5720_Spr2019_Syllabus.pdf ├── Code ├── AMS_sketch.m ├── Hadamard_teaching_code.m ├── README.md ├── countSketch.c ├── countSketch_sparse.c ├── my_normest.m ├── pdist2_faster.m ├── power_method.py ├── sketch.m └── sketch.py ├── Demos ├── README.md ├── demo01_exactRankR.ipynb ├── demo01_exactRankR.m ├── demo01_exactRankR.py ├── demo02_sorts.ipynb ├── demo02_sorts.m ├── demo02_sorts.py ├── demo03_FrobeniusNorm.c ├── demo03_FrobeniusNorm.ipynb ├── demo04_FrobeniusNorm_sparse.ipynb ├── demo04_FrobeniusNorm_sparse.m ├── demo05_5_HadamardTransform.ipynb ├── demo05_FastJL_speedTest.jl ├── demo05_FastJL_speedTest.m ├── demo05_results_excludingSetup.pdf ├── demo05_results_withSetup.pdf ├── demo06_leverageScores.ipynb ├── demo06_leverageScores.m ├── demo07_rand_mat_mult.m ├── demo07_rand_mat_mult.py ├── demo07_rand_mat_mult_ortho.py ├── demo08_higherAccuracyRegression.ipynb ├── demo08_higherAccuracyRegression.m ├── demo09_RandomizedKaczmarz.ipynb ├── demo09_RandomizedKaczmarz.m ├── demo10_05_SLQ.ipynb ├── demo10_l1_regression.ipynb ├── demo10_l1_regression.m ├── demo11_JamesSteinEstimator.m ├── demo12_CompressedSensing.m ├── demo13_EDM.mlx ├── demo13_EDM.pdf ├── demo14_MonteCarlo_and_improvements.ipynb ├── demo14_MonteCarlo_and_improvements.m ├── demo15_SGD.m ├── demo16_LSH.m ├── demo17_kNN_via_LSH.m ├── demo18_names.m ├── demo19_AMS_sketch_vs_JL.m ├── demo20_CoreSets_for_Kmeans.m ├── demo21_randomizedSVDs.ipynb ├── vignette-rsvd.jl └── vignette_rsvd.m ├── Handouts ├── Linear_algebra_notes_matrices.pdf ├── README.md └── SamplingLecture_Sept29_2021.pdf ├── Homeworks ├── APPM5650Fall21_RandomizedAlgos_HW01.pdf ├── APPM5650Fall21_RandomizedAlgos_HW02.pdf ├── APPM5650Fall21_RandomizedAlgos_HW03.pdf ├── APPM5650Fall21_RandomizedAlgos_HW04.pdf ├── APPM5650Fall21_RandomizedAlgos_HW05.pdf ├── APPM5650Fall21_RandomizedAlgos_HW06.pdf ├── APPM5650Fall21_RandomizedAlgos_HW07.pdf ├── APPM5650Fall21_RandomizedAlgos_HW08.pdf ├── APPM5650Fall21_RandomizedAlgos_HW09.pdf ├── APPM5650Fall21_RandomizedAlgos_HW10.pdf ├── ProjectInformation.md ├── ProjectRubric.pdf ├── README.md └── custom_headers.tex ├── LICENSE ├── README.md ├── SlideshowAllPresentations_4720Spr19_Randomized.jpeg ├── SlideshowAllPresentations_4720Spr19_Randomized.pdf ├── SlideshowAllPresentations_5650_Fall21.jpg ├── SlideshowAllPresentations_5650_Fall21.pdf └── syllabus.md /.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | *.fmt 10 | *.fot 11 | *.cb 12 | *.cb2 13 | .*.lb 14 | 15 | ## Intermediate documents: 16 | *.dvi 17 | *.xdv 18 | *-converted-to.* 19 | # these rules might exclude image files for figures etc. 20 | # *.ps 21 | # *.eps 22 | # *.pdf 23 | 24 | ## Generated if empty string is given at "Please type another file name for output:" 25 | .pdf 26 | 27 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 28 | *.bbl 29 | *.bcf 30 | *.blg 31 | *-blx.aux 32 | *-blx.bib 33 | *.run.xml 34 | 35 | ## Build tool auxiliary files: 36 | *.fdb_latexmk 37 | *.synctex 38 | *.synctex(busy) 39 | *.synctex.gz 40 | *.synctex.gz(busy) 41 | *.pdfsync 42 | 43 | ## Auxiliary and intermediate files from other packages: 44 | # algorithms 45 | *.alg 46 | *.loa 47 | 48 | # achemso 49 | acs-*.bib 50 | 51 | # amsthm 52 | *.thm 53 | 54 | # beamer 55 | *.nav 56 | *.pre 57 | *.snm 58 | *.vrb 59 | 60 | # changes 61 | *.soc 62 | 63 | # cprotect 64 | *.cpt 65 | 66 | # elsarticle (documentclass of Elsevier journals) 67 | *.spl 68 | 69 | # endnotes 70 | *.ent 71 | 72 | # fixme 73 | *.lox 74 | 75 | # feynmf/feynmp 76 | *.mf 77 | *.mp 78 | *.t[1-9] 79 | *.t[1-9][0-9] 80 | *.tfm 81 | 82 | #(r)(e)ledmac/(r)(e)ledpar 83 | *.end 84 | *.?end 85 | *.[1-9] 86 | *.[1-9][0-9] 87 | *.[1-9][0-9][0-9] 88 | *.[1-9]R 89 | *.[1-9][0-9]R 90 | *.[1-9][0-9][0-9]R 91 | *.eledsec[1-9] 92 | *.eledsec[1-9]R 93 | *.eledsec[1-9][0-9] 94 | *.eledsec[1-9][0-9]R 95 | *.eledsec[1-9][0-9][0-9] 96 | *.eledsec[1-9][0-9][0-9]R 97 | 98 | # glossaries 99 | *.acn 100 | *.acr 101 | *.glg 102 | *.glo 103 | *.gls 104 | *.glsdefs 105 | 106 | # gnuplottex 107 | *-gnuplottex-* 108 | 109 | # gregoriotex 110 | *.gaux 111 | *.gtex 112 | 113 | # htlatex 114 | *.4ct 115 | *.4tc 116 | *.idv 117 | *.lg 118 | *.trc 119 | *.xref 120 | 121 | # hyperref 122 | *.brf 123 | 124 | # knitr 125 | *-concordance.tex 126 | # TODO Comment the next line if you want to keep your tikz graphics files 127 | *.tikz 128 | *-tikzDictionary 129 | 130 | # listings 131 | *.lol 132 | 133 | # makeidx 134 | *.idx 135 | *.ilg 136 | *.ind 137 | *.ist 138 | 139 | # minitoc 140 | *.maf 141 | *.mlf 142 | *.mlt 143 | *.mtc[0-9]* 144 | *.slf[0-9]* 145 | *.slt[0-9]* 146 | *.stc[0-9]* 147 | 148 | # minted 149 | _minted* 150 | *.pyg 151 | 152 | # morewrites 153 | *.mw 154 | 155 | # nomencl 156 | *.nlg 157 | *.nlo 158 | *.nls 159 | 160 | # pax 161 | *.pax 162 | 163 | # pdfpcnotes 164 | *.pdfpc 165 | 166 | # sagetex 167 | *.sagetex.sage 168 | *.sagetex.py 169 | *.sagetex.scmd 170 | 171 | # scrwfile 172 | *.wrt 173 | 174 | # sympy 175 | *.sout 176 | *.sympy 177 | sympy-plots-for-*.tex/ 178 | 179 | # pdfcomment 180 | *.upa 181 | *.upb 182 | 183 | # pythontex 184 | *.pytxcode 185 | pythontex-files-*/ 186 | 187 | # thmtools 188 | *.loe 189 | 190 | # TikZ & PGF 191 | *.dpth 192 | *.md5 193 | *.auxlock 194 | 195 | # todonotes 196 | *.tdo 197 | 198 | # easy-todo 199 | *.lod 200 | 201 | # xmpincl 202 | *.xmpi 203 | 204 | # xindy 205 | *.xdy 206 | 207 | # xypic precompiled matrices 208 | *.xyc 209 | 210 | # endfloat 211 | *.ttt 212 | *.fff 213 | 214 | # Latexian 215 | TSWLatexianTemp* 216 | 217 | ## Editors: 218 | # WinEdt 219 | *.bak 220 | *.sav 221 | 222 | # Texpad 223 | .texpadtmp 224 | 225 | # Kile 226 | *.backup 227 | 228 | # KBibTeX 229 | *~[0-9]* 230 | 231 | # auto folder when using emacs and auctex 232 | ./auto/* 233 | *.el 234 | 235 | # expex forward references with \gathertags 236 | *-tags.tex 237 | 238 | # standalone packages 239 | *.sta 240 | 241 | # generated if using elsarticle.cls 242 | *.spl 243 | -------------------------------------------------------------------------------- /APPM4720_5720_Spr2019_Syllabus.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/APPM4720_5720_Spr2019_Syllabus.pdf -------------------------------------------------------------------------------- /Code/AMS_sketch.m: -------------------------------------------------------------------------------- 1 | function C = AMS_sketch( X, w, d, varargin ) 2 | % C = AMS_sketch( X, w, d ) 3 | % returns a linear sketch of the input matrix X, X is p x n 4 | % (note convention: *columns* not *rows* of X are the data pts) 5 | % and the output sketch C is of size w*d x n 6 | % where C(:,i) = AMS_sketch( X(:,i) ) 7 | % 8 | % w controls number of buckets per hash (e.g., 2^8, 2^12) 9 | % d controls how many hashes we do (e.g., 7, or ceil(log2(1/.01)) ) 10 | % 11 | % C = AMS_sketch( X, w, d, parameters, values ) 12 | % gives more options, e.g., 13 | % 14 | % 'saltSeed', seed 15 | % gives a new seed to the random number generator (default: 0) 16 | % which controls both hashes 17 | % 18 | % 'transposedX', false 19 | % if True, assumes X is n x p not p x n 20 | % 21 | % Stephen Becker 22 | % This version is good for matrices that are not too sparse, 23 | % and have more than a few columns n. If you are applying 24 | % this to a very sparse vector, then you really should 25 | % use hash functions if you want sub-linear time 26 | % (this implementation calls several Count sketches, 27 | % which I have implemented in a way that is not sub-linear 28 | % for a single column). 29 | 30 | prs = inputParser; 31 | addParameter(prs,'saltSeed',.0); 32 | addParameter(prs,'transposedX',false); 33 | parse(prs,varargin{:}); 34 | saltSeed = prs.Results.saltSeed; 35 | transposedX = prs.Results.transposedX; 36 | 37 | 38 | if transposedX 39 | [n,p] = size(X); 40 | else 41 | [p,n] = size(X); 42 | end 43 | % if p > intmax('uint32') 44 | % error('Dimensions of input matrix are too large!'); 45 | % end 46 | % if w > intmax('uint16') 47 | % error('Code needs to be updated if you want w > 2^16'); 48 | % end 49 | % if 2*d > 20 50 | % error('Code needs to be updated if you want 2*d > 20'); 51 | % end 52 | 53 | rng( saltSeed ); 54 | % saltPerm = randperm( 20 ); % output of SHA has 20 int8's 55 | % 56 | % 57 | % 58 | % % C = zeros( d, w, n ); 59 | % C = zeros( d*w, n ); 60 | % 61 | % for j = 1:p 62 | % Engine = java.security.MessageDigest.getInstance('SHA'); 63 | % Engine.update(typecast( uint32(j), 'uint8')); 64 | % L = uint16(typecast( Engine.digest, 'uint8' )); 65 | % L = L( saltPerm ); 66 | % binaryHashes = sign( randn(d,1) ); % don't even need a hash "function" 67 | % for k = 1:d 68 | % ell = L(2*k-1)*2^8 + L(2*k); 69 | % ell = mod( ell, w ) + 1; % keep it in range, and make it 0-based 70 | % ind = sub2ind( [d,w], k, ell ); 71 | % C ( ind, : ) = C( ind, : ) + binaryHashes(k)*X( j, : ); 72 | % %C ( k, ell, : ) = C( k, ell, : ) + X( j, : ); % same idea, if C is 73 | % % a tensor 74 | % end 75 | % end 76 | % 77 | 78 | % % Make it faster... by skipping the hash! 79 | % Instead, just call Count Sketch function 80 | 81 | 82 | m = w; 83 | M = p; 84 | useTranspose = true; 85 | C = zeros( d*w, n ); 86 | for k = 1:d 87 | D = spdiags(sign(randn(M,1)),0,M,M); % bsxfun() is another efficient way to do this 88 | indx_map = int64(randi(m,M,1)); 89 | if transposedX 90 | % I want X', but it's already transposed, so don't worry! 91 | C( (1+(k-1)*w):k*w, :) = countSketch_BLAS(X*D,indx_map,m,useTranspose)'; 92 | else 93 | C( (1+(k-1)*w):k*w, :) = countSketch_BLAS(X'*D,indx_map,m,useTranspose)'; 94 | end 95 | end -------------------------------------------------------------------------------- /Code/Hadamard_teaching_code.m: -------------------------------------------------------------------------------- 1 | function y = Hadamard_teaching_code(x) 2 | % y = Hadamard_teaching_code(x) 3 | % applies the Hadamard transform to x 4 | % If x has more than one column, the transform is applied 5 | % to each column. 6 | % This code is not fast, but it shows you how to exploit 7 | % the structure of the transform. 8 | % Note: this code does not do any sub-sampling 9 | 10 | [m,n] = size(x); 11 | if 2^nextpow2(m) ~= m 12 | error('Must have leading dimension of x be power of 2'); 13 | end 14 | 15 | y = x; 16 | for bit = 1:log2(m) 17 | k = 2^bit; % e.g., 2, 4, ..., m 18 | k2 = 2^(bit-1); % e.g., 1, 2, ..., m/2 19 | 20 | y = reshape( y, k, [], n ); 21 | tmp = y(1:k2,:,:); 22 | y(1:k2,:,:) = y(1:k2,:,:) + y(k2+1:k,:,:); 23 | y(k2+1:k,:,:) = tmp - y(k2+1:k,:,:); 24 | y = reshape( y, m, n); 25 | end -------------------------------------------------------------------------------- /Code/README.md: -------------------------------------------------------------------------------- 1 | # Code useful for the randomized algorithm class 2 | 3 | ## Sketching code 4 | - [sketch](sketch.m) is a multipurpose code that can call a variety of sketches, and give you a function handle (and a matrix, if requested), and it also has a self-test mode to check if E[S^TS] = I (where S is the sketch), which is useful for spotting mistakes in scaling. This code relies on other functions inside this directory for the actual sketching. For count sketch and hadamard transforms, make sure to compile the C code with the mex compiler, otherwise you'll have slow performance. 5 | 6 | ## Misc utilities 7 | 8 | - [my_normest](my_normest.m) for estimating the spectral norm in Matlab (variant of Matlab's normest that allows for function handles) 9 | - [power_method](power_method.py) for estimating the spectral norm in Python 10 | - [pdist2_faster](pdist2_faster.m) for calculating all pairwise distances, similar to Matlabs pdist2 but faster often (this version uses simple matrix multiplies) 11 | 12 | ## Hadamard Transform 13 | [Hadamard Transform Code](https://github.com/jeffeverett/hadamard-transform) written by Stephen Becker and Jeff Everett, much faster than Matlab's code. You can also use this [Hadamard_teaching_code.m](Hadamard_teaching_code.m) simple .m file, which is actually faster than Matlab's code sometimes, and it's much simpler, so you can get a better idea of how the fast Hadamard transform works 14 | 15 | ## CountSketch 16 | 17 | For a python version, see [scipy.linalg.clarkson_woodruff_transform](https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.clarkson_woodruff_transform.html#scipy.linalg.clarkson_woodruff_transform). I haven't used it myself so not sure how efficient it is. 18 | 19 | [CountSketch.c](countSketch.c) is an implementation of CountSketch in Matlab's mex interface. You'll need to compile it. See the source code for some fancy compilation options, but a basic version is: 20 | ``` 21 | >> mex countSketch.c 22 | ``` 23 | and a complicated faster version is: 24 | ``` 25 | >> mex countSketch.c -output countSketch_BLAS -DUSE_BLAS -lmwblas CFLAGS="\$CFLAGS -O3 -malign-double -march=native" 26 | ``` 27 | and run it like 28 | ``` 29 | >> mSmall = 10; mBig = 100; n = 7; 30 | >> A = randn(mBig,n); 31 | >> indx_map = int64(randi(mSmall,mBig,1)); 32 | >> D = spdiags( sign(randn(mBig,1)), 0, mBig, mBig ); 33 | >> P = countSketch( D*A, indx_map, mSmall, false ); 34 | >> P2 = countSketch( A'*D, indx_map, mSmall, true )'; % alternative way 35 | >> P2 = countSketch_BLAS( A'*D, indx_map, mSmall, true )'; % if you did the fancy compile 36 | ``` 37 | 38 | [CountSketch_sparse.c](countSketch_sparse.c) is similar but works with sparse matrices. It doesn't have an option to use BLAS, so compilation is easy: 39 | ``` 40 | >> mex countSketch_sparse.c 41 | ``` 42 | This always assumes the transpose version. So, use it like: 43 | ``` 44 | >> A = sparse(A); 45 | >> P2 = countSketch_sparse( A'*D, indx_map, mSmall )'; 46 | ``` 47 | 48 | -------------------------------------------------------------------------------- /Code/countSketch.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Standard Usage: 3 | * 4 | * indx_map = int64(randi(mSmall,mBig,1)); 5 | * D = spdiags( sign(randn(mBig,1)), 0, mBig, mBig ); 6 | * 7 | * doTranspose = false; 8 | * P = countSketch( D*A, indx_map, mSmall, doTranspose ) 9 | * 10 | * or 11 | * 12 | * doTranspose = true; 13 | * P = countSketch( A'*D, indx_map, mSmall, doTranspose )' 14 | * (note the transposes; this version should be faster 15 | * due to how Matlab stores matrices, and the fact that 16 | * Matlab can transpose a matrix very efficiently) 17 | * 18 | * A is an mBig x N matrix 19 | * P is an mSmall x N matrix 20 | * indx_map is an int64 vector of length mBig where each entry 21 | * is an integer in [1,mSmall] (i.e., 1-based indexing, Matlab-style) 22 | * 23 | * 24 | * Note: the random sign flips are NOT done in this mex file, 25 | * that is why you should multiply by the diagonal D matrix as suggested 26 | * above. The code is written this way because Matlab is very efficient 27 | * at the diagonal multiply, so writing that myself in C would lead to 28 | * worse performance. Or maybe it would be fast when using the BLAS 29 | * version but not by much, so I was too lazy to do it... 30 | * 31 | * Implements the "CountSketch" 32 | * as known in the data streaming literature 33 | * (e.g., 7] Moses Charikar, Kevin Chen, and Martin Farach-Colton. 34 | * "Finding frequent items in data streams". Theor. 35 | * Comput. Sci., 312(1):3–15, 2004 ) 36 | * 37 | * In the compressed least squares literature, this was analyzed in 38 | * 39 | * "Low Rank Approximation and Regression in Input Sparsity Time" 40 | * Kenneth L. Clarkson, David P. Woodruff 41 | * http://arxiv.org/abs/1207.6365 42 | * STOC '13, co-winner of best paper award 43 | * 44 | * Computational complexity is nnz(A) 45 | * 46 | * */ 47 | 48 | /* Compiling: 49 | * 50 | * Compile with just: mex countSketch.c 51 | * 52 | * or you can try to get more performance with things like 53 | * 54 | * mex countSketch.c CFLAGS="\$CFLAGS -O3 -malign-double -march=native" 55 | * 56 | * or 57 | * 58 | * mex countSketch.c -output countSketch_BLAS -DUSE_BLAS -lmwblas CFLAGS="\$CFLAGS -O3 -malign-double -march=native" 59 | * 60 | * (these are for GCC compilers, you'll need to change the flags slightly 61 | * for MVCC or LLVM/Clang compilers, e.g., Clang doens't use -malign-double ) 62 | * NOTE: pass the flag -DLONGLONG to use LONG LONG pointer to indx_map. This is 63 | * necessary if LONG is of size 32 bit. 64 | * 65 | */ 66 | 67 | /* Notes: 68 | * 69 | * For efficiency, since Matlab uses column-major order, 70 | * the input should be At ( = A') and NOT A 71 | * Likewise, the output is Pt ( = P' = (Sketch(A))' ) 72 | * 73 | * In theory, this can be applied to sparse matrices 74 | * It would only be efficient if they are stored in csr order 75 | * (Matlab uses csc), or if we have the transpose of a csc matrix 76 | * (i.e., do the exact same transpose trick we do for sparse 77 | * matrices ) 78 | * 79 | * For now, does NOT do sparse matrices 80 | * There is a separate code just for sparse matrices 81 | * 82 | * Warning: the code does not do error checks, so it can easily crash. 83 | * Make sure that the "indx_map" is of type int64 84 | * 85 | * Stephen Becker, srbecker@us.ibm.com, June 5 2014 86 | * The use of CountSketch was suggested by Haim Avron 87 | * 88 | * Updates by Stephen Becker, Feb 2019 89 | **/ 90 | #if defined(__GNUC__) && !(defined(__clang__)) 91 | #include 92 | #endif 93 | #include 94 | #include "mex.h" 95 | 96 | #ifdef USE_BLAS 97 | #include "blas.h" 98 | #endif 99 | 100 | 101 | void mexFunction( int nlhs, mxArray *plhs[], 102 | int nrhs, const mxArray*prhs[] ) 103 | 104 | { 105 | double *At,*Pt; 106 | double *A, *P; 107 | double alpha; 108 | #ifdef LONGLONG 109 | long long *indx_map; 110 | #else 111 | long *indx_map; 112 | #endif 113 | mwSize mBig,mSmall,n, i,j,k; 114 | int DO_TRANSPOSE=1; 115 | #ifdef USE_BLAS 116 | ptrdiff_t size, stride, strideBig; 117 | #endif 118 | 119 | /* Check for proper number of arguments */ 120 | if (nrhs != 4) { 121 | mexErrMsgIdAndTxt( "MATLAB:countSketch:invalidNumInputs", 122 | "Four input arguments required."); 123 | } else if (nlhs > 1) { 124 | mexErrMsgIdAndTxt( "MATLAB:countSketch:maxlhs", 125 | "Too many output arguments."); 126 | } 127 | if ( !(mxIsInt64(prhs[1])) ) 128 | mexErrMsgTxt("2nd input must be of type int64"); 129 | #ifdef LONGLONG 130 | indx_map = (long long *)mxGetData( prhs[1] ); 131 | #else 132 | indx_map = (long *)mxGetData( prhs[1] ); 133 | #endif 134 | mSmall = mxGetScalar( prhs[2] ); 135 | DO_TRANSPOSE = (int)mxGetScalar( prhs[3] ); 136 | 137 | if (mxIsSparse(prhs[0])) 138 | mexErrMsgTxt("Cannot handle sparse 'A' matrix, try countSketch_sparse.c instead"); 139 | 140 | if ( DO_TRANSPOSE == 1 ) { 141 | At = mxGetPr(prhs[0] ); 142 | n = mxGetM( prhs[0] ); 143 | mBig= mxGetN( prhs[0] ); 144 | /* Create a matrix for the return argument */ 145 | plhs[0] = mxCreateDoubleMatrix( (mwSize)n, (mwSize)mSmall, mxREAL); 146 | Pt = mxGetPr( plhs[0] ); 147 | P = NULL; /* try to prevent bugs */ 148 | A = NULL; 149 | #ifdef USE_BLAS 150 | size = (ptrdiff_t)n; 151 | stride = (ptrdiff_t)1; 152 | #endif 153 | /* And the actual computation: 154 | * Copy columns of At to Pt */ 155 | alpha = 1.; 156 | for ( i=0; i < mBig ; i++ ) { 157 | k = indx_map[i]-1; /* 0-based */ 158 | /* copy Pt(:,k) <-- At(:,i) 159 | * e.g. since height of Pt is N, 160 | * P + k*n <-- At + i*n */ 161 | #ifdef USE_BLAS 162 | daxpy(&size,&alpha,At+i*n,&stride,Pt+k*n,&stride); 163 | #else 164 | for ( j=0; j 47 | #endif 48 | #include 49 | #include "mex.h" 50 | 51 | /* NOTE: pass the flag -DLONGLONG to use LONG LONG pointer to indx_map. This is 52 | * necessary if LONG is of size 32 bit. 53 | */ 54 | 55 | void mexFunction( int nlhs, mxArray *plhs[], 56 | int nrhs, const mxArray*prhs[] ) 57 | 58 | { 59 | double *At,*Pt; 60 | #ifdef LONGLONG 61 | long long *indx_map; 62 | #else 63 | long *indx_map; 64 | #endif 65 | mwSize mBig,mSmall,n, i,j,k; 66 | 67 | mwIndex *ir, *jc; 68 | double *a; 69 | 70 | /* Check for proper number of arguments */ 71 | if (nrhs != 3) { 72 | mexErrMsgIdAndTxt( "MATLAB:countSketch_sparse:invalidNumInputs", 73 | "Three input arguments required."); 74 | } else if (nlhs > 1) { 75 | mexErrMsgIdAndTxt( "MATLAB:countSketch_sparse:maxlhs", 76 | "Too many output arguments."); 77 | } 78 | if ( !(mxIsInt64(prhs[1])) ) 79 | mexErrMsgTxt("2nd input must be of type int64"); 80 | #ifdef LONGLONG 81 | indx_map = (long long *)mxGetData( prhs[1] ); 82 | #else 83 | indx_map = (long *)mxGetData( prhs[1] ); 84 | #endif 85 | mSmall = mxGetScalar( prhs[2] ); 86 | 87 | 88 | At = mxGetPr(prhs[0] ); 89 | n = mxGetM( prhs[0] ); 90 | mBig= mxGetN( prhs[0] ); 91 | /* Create a matrix for the return argument */ 92 | plhs[0] = mxCreateDoubleMatrix( (mwSize)n, (mwSize)mSmall, mxREAL); 93 | Pt = mxGetPr( plhs[0] ); 94 | 95 | if (mxIsComplex(prhs[0])) 96 | mexErrMsgTxt("Cannot handle complex data yet"); 97 | 98 | if (mxIsSparse(prhs[0])) { 99 | 100 | ir = mxGetIr(prhs[0]); /* Row indexing */ 101 | jc = mxGetJc(prhs[0]); /* Column count */ 102 | a = mxGetPr(prhs[0]); /* Non-zero elements */ 103 | 104 | /* Loop through columns of At */ 105 | 106 | for ( i=0; i < mBig; i++ ) { 107 | k = indx_map[i]-1; /* 0-based */ 108 | /* copy Pt(:,k) <-- At(:,i) 109 | * e.g. since height of Pt is N, 110 | * P + k*n <-- At + i*n */ 111 | 112 | for ( j=jc[i]; j= 2 && isnumeric(St) && numel(St)==1, tol = St; end 46 | if nargin >= 3 && isnumeric(n) && numel(n)==1, maxiter = n; end 47 | n = size(S,2); 48 | end 49 | 50 | if isempty(nVectors) 51 | if numel(n) > 1 52 | nVectors = 1; 53 | else 54 | nVectors = 1; 55 | end 56 | end 57 | 58 | if ~IMPLICIT 59 | x = sum(abs(S),1)'; 60 | if nVectors > 1 61 | x = [x, randn(n,nVectors-1)]; 62 | end 63 | else 64 | % x = ones(n,1); % can interact with some special operators 65 | if numel(n) == 1 66 | x = randn(n,nVectors); 67 | else 68 | if nVectors > 1 69 | error('Not compatible in this mode'); 70 | end 71 | x = randn(n); % assume n is a size vector 72 | end 73 | end 74 | 75 | cnt = 0; 76 | if nVectors > 1 77 | e = sqrt(max( sum(x.^2,1) ) ); 78 | [x,~] = qr(x,0); 79 | else 80 | e = norm(x(:)); 81 | if e == 0, return, end 82 | x = x/e; 83 | end 84 | e0 = 0; 85 | while abs(e-e0) > tol*e && cnt < maxiter 86 | e0 = e; 87 | if ~IMPLICIT 88 | Sx = S*x; 89 | else 90 | Sx = S(x); 91 | end 92 | if nnz(Sx) == 0 93 | Sx = rand(size(Sx)); 94 | end 95 | if nVectors > 1 96 | e = sqrt(max( sum(Sx.^2,1) ) ); 97 | % [Q,~] = qr(Sx,0); 98 | % Sx = Q; 99 | else 100 | e = norm(Sx(:)); 101 | end 102 | if ~IMPLICIT 103 | x = S'*Sx; 104 | else 105 | x = St(Sx); 106 | end 107 | if nVectors > 1 108 | [Q,~] = qr(x,0); 109 | x = Q; 110 | else 111 | x = x/norm(x(:)); 112 | end 113 | cnt = cnt+1; 114 | end 115 | -------------------------------------------------------------------------------- /Code/pdist2_faster.m: -------------------------------------------------------------------------------- 1 | function [D,I] = pdist2_faster(X,Y,style,smallStr,K) 2 | % pdist2_faster Pairwise distance between two sets of observations. 3 | % D = pdist2(X,Y) returns a matrix D containing the Euclidean distances 4 | % between each pair of observations in the MX-by-N data matrix X and 5 | % MY-by-N data matrix Y. Rows of X and Y correspond to observations, 6 | % and columns correspond to variables. D is an MX-by-MY matrix, with the 7 | % (I,J) entry equal to distance between observation I in X and 8 | % observation J in Y. 9 | % 10 | % D = pdist2(X,Y,DISTANCE) computes D using DISTANCE. Choices are: 11 | % 12 | % 'euclidean' - Euclidean distance (default) 13 | % 'squaredeuclidean' - Squared Euclidean distance 14 | % 15 | % Also gives the index corresponding to the smallest entry if requested ... 16 | % see documentation for pdist2 from the Statistics Toolbox 17 | % Code by Stephen Becker, March 2019 18 | % See also pdist2 19 | 20 | if nargin < 2 || isempty(Y) 21 | Y = X; 22 | end 23 | 24 | [Mx,N] = size(X); 25 | [My,N] = size(Y); 26 | 27 | XtY = X*Y'; 28 | nrm1 = sum(X.^2,2); 29 | nrm2 = sum(Y.^2,2); 30 | 31 | D = nrm1*ones(1,My) + ones(Mx,1)*nrm2' - 2*XtY; 32 | 33 | if nargin > 2 && ~isempty(style) 34 | if isa(style,'function_handle') 35 | % apply the function handle! 36 | D = style(D); 37 | else 38 | switch lower(style) 39 | case 'euclidean' 40 | D = sqrt(D); 41 | case 'squaredeuclidean' 42 | end 43 | end 44 | else 45 | % by default, Euclidean 46 | D = sqrt(D); 47 | end 48 | 49 | % and similar to pdist2, 50 | if nargin > 3 51 | if nargin ==5 && ~isempty(K) 52 | if K~=1 53 | error('K ~= 1 not supported'); 54 | end 55 | end 56 | if strcmpi(smallStr,'smallest') 57 | [~,I] = min( D, [], 1 ); % over 1st dimension, like Matlab's convention 58 | D = D(I); 59 | else 60 | fprintf('Bad string: %s\n'); 61 | error('Wrong 4th input'); 62 | end 63 | end 64 | end -------------------------------------------------------------------------------- /Code/power_method.py: -------------------------------------------------------------------------------- 1 | from scipy.sparse.linalg import LinearOperator 2 | from scipy.sparse import spmatrix 3 | import numpy as np 4 | import logging 5 | 6 | def spectral_norm(A, tol=1e-8, max_iter=1000): 7 | """Computes the spectral norm of a linear operator A using power iteration. 8 | 9 | Parameters 10 | =================== 11 | - `A` (`numpy.ndarray`, `scipy.sparse.spmatrix`, or `scipy.sparse.linalg.LinearOperator`): 12 | the matrix for which we want to compute the spectral norm. 13 | 14 | Keyword parameters 15 | ==================== 16 | - `tol` (float, default = `1e-8`): tolerance used to determine whether or not we 17 | should stop iterating. Once the estimates for the spectral norm are within distance 18 | `tol` of one another, we stop the power iterations and return. 19 | - `max_iter` (int, default = `1000`): maximum number of power iterations to do. If 20 | we reach this number of iterations then this function will return, but will display 21 | a warning that we reached the maximum number of iterations. 22 | - Power iteration can be extremely slow to converge, so you may need a large value 23 | of `max_iter` in order to find the true spectral norm. 24 | 25 | Return 26 | ==================== 27 | - `sp_norm` (float): the estimated spectral norm of `A`. 28 | 29 | Code by Will Shand at the request of Stephen Becker, March 2019 30 | """ 31 | if not any(issubclass(type(A),T) for T in [np.ndarray, spmatrix, LinearOperator]): 32 | raise ValueError("spectral_norm can only take arguments of type " 33 | "numpy.ndarray, scipy.sparse.spmatrix, or " 34 | "scipy.sparse.linalg.LinearOperator.") 35 | 36 | # Create an anonymous function matvec_op whose effect is equivalent to multiplying 37 | # the input by A'A. 38 | if issubclass(type(A), LinearOperator): 39 | matvec_op = lambda x: A.adjoint().matvec(A.matvec(x)) 40 | else: 41 | matvec_op = lambda x: A.T.dot(A.dot(x)) 42 | 43 | sp_norm = 0. 44 | sp_iter = np.random.normal(size = A.shape[-1]) 45 | for ii in range(max_iter): 46 | Ax = matvec_op(sp_iter) 47 | new_sp_norm = np.linalg.norm(sp_iter) 48 | 49 | # Stopping condition when eigenvalue estimates get sufficiently close 50 | if abs(new_sp_norm - sp_norm) < tol: 51 | break 52 | else: 53 | sp_norm = new_sp_norm 54 | sp_iter = Ax / new_sp_norm 55 | 56 | if ii == max_iter-1: 57 | logging.warn(" spectral_norm ran for max_iter = %d iterations " 58 | "without converging. Returning..." % max_iter) 59 | 60 | return np.sqrt(sp_norm) 61 | 62 | """ 63 | TESTING 64 | """ 65 | if __name__ == "__main__": 66 | from scipy.sparse import random as sprandom 67 | 68 | # 1. Test on some random numpy arrays 69 | for ii in range(50): 70 | X = np.random.normal(size=(50,30)) 71 | assert(abs(np.linalg.norm(X,2) - spectral_norm(X, max_iter=5000, tol=1e-8)) <= 1e-7) 72 | 73 | # 2. Test on some LinearOperator instances 74 | for ii in range(50): 75 | X1 = sprandom(50,50,density=0.2) 76 | X2 = np.random.normal(size=(10,50)) 77 | 78 | # Dense representation of difference X1 - X2'X2 79 | A = X1 - X2.T.dot(X2) 80 | 81 | # LinearOperator representation of X1 - X2'X2 82 | mv = lambda x: X1.dot(x) - X2.T.dot(X2.dot(x)) 83 | rmv = lambda x: X1.T.dot(x) - X2.T.dot(X2.dot(x)) 84 | L = LinearOperator(X1.shape, matvec=mv, rmatvec=rmv) 85 | 86 | assert(abs(np.linalg.norm(A,2) - spectral_norm(L, max_iter=5000, tol=1e-8)) <= 1e-7) 87 | -------------------------------------------------------------------------------- /Code/sketch.m: -------------------------------------------------------------------------------- 1 | function [fcn,S] = sketch( m, M, typeOfSketch, performTest, varargin ) 2 | % fcn = sketch( m, M, 'type' ) 3 | % returns a function that implements a sketch of the requested type 4 | % so that Y = fcn(A) is a sketched version of A. 5 | % Every time you call this function, you get a new random sketch 6 | % Y has m rows, A has M rows. The number of columns of A is arbitrary 7 | % (Y will have the same number of columns) 8 | % 9 | % Valid types of sketches: 10 | % gaussian, haar, count, fjlt, hadamard, sparse, subsample 11 | % 12 | % [fcn,S] = sketch( ... ) 13 | % also returns the explicit matrix representation of the sketch 14 | % e.g., fcn(A) is the same as S*A. S is of size m x M 15 | % 16 | % sketch( ..., performTest ) 17 | % if performTest = true, then this checks that the sketch 18 | % really has the property E[ S'S ] = I_M 19 | % errorHistory = sketch( ..., performTest ) 20 | % will return the full history of the errrors 21 | % 22 | % sketch( ..., parameterName, parameterValue, ... ) 23 | % allows optional parameter, such as: 24 | % 'sparsity' (for sparse sketches) 25 | % 'weights' (for subsample, if you want it non-uniform) 26 | % 'nReps' (for how many repetitions to use when testing) 27 | % 28 | % Stephen Becker, Feb 2019, updates Oct 2021 29 | 30 | % todo: instead of countSketch_slow, make a sparse matrix, 31 | % as in scipy.linalg.clarkson_woodruff_transform 32 | % todo: create adjoints, as in the python version of this code 33 | 34 | S = []; 35 | if nargin < 4 || isempty(performTest) 36 | performTest = false; 37 | end 38 | 39 | 40 | prs = inputParser; 41 | defaultSparsity = 0.01; 42 | addParameter(prs,'sparsity',defaultSparsity); 43 | addParameter(prs,'weights',[]); 44 | addParameter(prs,'nReps',100); 45 | parse(prs,varargin{:}); 46 | sparsity = prs.Results.sparsity; 47 | weights = prs.Results.weights; 48 | nReps = prs.Results.nReps; 49 | 50 | if performTest 51 | sumS = zeros(M); 52 | if nargout > 0 53 | errHist = zeros(nReps,1); 54 | end 55 | fprintf('\nRunning test to see of E[S''S] = I (for sketch of type %s)\n', typeOfSketch); 56 | warning('off','sketch:buildMatrix') 57 | printEvery = round( nReps / 10 ); 58 | for rep = 1:nReps 59 | % Call this own function recursively 60 | [~,S] = sketch( m, M, typeOfSketch, false, varargin{:} ); % 10/26/21 fixed bug here 61 | sumS = sumS + S'*S; 62 | if nargout > 0 63 | errHist(rep) = norm( sumS/rep - eye(M), 'fro' )/M; 64 | end 65 | if ~mod(rep,printEvery) 66 | err = norm( sumS/rep - eye(M), 'fro' )/M; 67 | fprintf('%3d trials, error || sampleMean(S''S)-I ||_F is %4.1e', ... 68 | rep, err ); 69 | if rep > printEvery 70 | fprintf(', %.2f change', err/errOld); 71 | end 72 | fprintf('\n'); 73 | errOld = err; 74 | end 75 | end 76 | sumS = sumS/nReps; 77 | fprintf('The first 5 x 5 block of sampleMean is: \n'); 78 | disp( sumS(1:5,1:5) ); 79 | fprintf('Average diagonal entry is %.7f, should be 1\n', mean(diag(sumS)) ); 80 | if nargout > 0 81 | fcn = errHist; 82 | end 83 | warning('on','sketch:buildMatrix') 84 | return; 85 | end 86 | 87 | 88 | 89 | switch lower(typeOfSketch) 90 | 91 | case 'gaussian' 92 | S = randn(m,M)/sqrt(m); 93 | fcn = @(A) S*A; 94 | 95 | case 'haar' % see http://arxiv.org/abs/math-ph/0609050 by Mezzadri 96 | [Q,R] = qr( randn(M,m), 0 ); 97 | d = sign(diag(R)); 98 | Q = Q*spdiags(d,0,m,m); 99 | S = sqrt(M/m)*Q'; 100 | fcn = @(A) S*A; 101 | 102 | case {'count', 'countsketch'} 103 | d = sign(randn(M,1)); 104 | D = spdiags(d,0,M,M); % bsxfun() is another efficient way to do this 105 | useTranspose = true; 106 | indx_map = int64(randi(m,M,1)); % don't do this in C! 107 | if exist( 'countSketch_BLAS', 'file' ) 108 | fcn = @(A) countSketch_BLAS(A'*D,indx_map,m,useTranspose)'; 109 | elseif exist( 'countSketch', 'file' ) 110 | fcn = @(A) countSketch(A'*D,indx_map,m,useTranspose)'; 111 | else 112 | msg = 'Using slow countSketch, please compile countSketch.c'; 113 | msg = [msg,'\n To turn this warning off, call warning(''off'',''sketch:slowCount'')']; 114 | warning('sketch:slowCount',msg); 115 | fcn = @(A) slowCountSketch( D*A, double(indx_map), m ); 116 | end 117 | 118 | case 'fjlt' 119 | d = sign(randn(M,1)); 120 | D = spdiags(d,0,M,M); % bsxfun() is another efficient way to do this 121 | ind = randperm( M, m ); 122 | subsample = @(X) X(ind,:); 123 | fcn = @(A) sqrt(M/m)*subsample( dct( D*A ) ); % FIXME 124 | 125 | case {'fljt_hadamard','hadamard'} % Hadamard version of FJLT 126 | M2 = 2^nextpow2(M); 127 | if M ~= M2 128 | % need to zero pad 129 | upsample = @(X) [X; zeros(M2 - M, size(X,2) ) ]; 130 | else 131 | upsample = @(X) X; % do nothing 132 | end 133 | 134 | d = sign(randn(M2,1)); 135 | D = spdiags(d,0,M2,M2); % bsxfun() is another efficient way to do this 136 | ind = randperm( M2, m ); 137 | subsample = @(X) X(ind,:); 138 | 139 | if exist('hadamard_pthreads','file')==3 140 | fcn = @(x) 1/sqrt(m)*subsample( hadamard_pthreads( D*upsample(full(x))) ); 141 | elseif exist('hadamard','file')==3 142 | fcn = @(x) 1/sqrt(m)*subsample( hadamard( D*upsample(full(x))) ); 143 | elseif exist('Hadamard_teaching_code','file')==2 144 | % It turns out our naive Matlab implementation is better than 145 | % the fwht function! 146 | fcn = @(x) 1/sqrt(m)*subsample( Hadamard_teaching_code( ... 147 | D*upsample(full(x)) ) ); 148 | else 149 | % This is slow! 150 | fcn = @(x) (M2*sqrt(1/m))*subsample( fwht( D*upsample(full(x)), [], 'hadamard') ); 151 | end 152 | 153 | case 'sparse' 154 | S = sign(sprandn(m,M,sparsity)); 155 | nz = nnz(S); 156 | if nz == 0 157 | warning('skech:sparse','Sparse sketch is all zeros! Increase sparsity and/or dimensions'); 158 | end 159 | sparsity_actual = nz/(m*M); % often slightly under sparse 160 | S = sqrt(1/(m*sparsity_actual))*S; % we may not have had exactly sparsity*m*n entries 161 | fcn = @(A) S*A; 162 | 163 | case 'subsample' 164 | if isempty(weights) 165 | ind = randperm( M, m ); 166 | subsample = @(X) X(ind,:); 167 | fcn = @(A) sqrt(M/m)*subsample(A); 168 | else 169 | weights = weights/sum(weights); % normalize to a valid probability 170 | ind = randsample( M, m, true, weights ); 171 | subsample = @(X) X(ind,:); 172 | fcn = @(A) spdiags(sqrt(1./(m*weights(ind))),0,m,m)*subsample(A); 173 | end 174 | otherwise 175 | error('Invalid type of sketch requested'); 176 | end 177 | 178 | if isempty(S) && nargout >= 2 179 | msg='You have requested the explicit sketch matrix which is slow!'; 180 | msg=[msg,'\n To turn this warning off, call warning(''off'',''sketch:buildMatrix'')']; 181 | warning('sketch:buildMatrix',msg); 182 | S = fcn(eye(M)); 183 | end 184 | 185 | end % end of main function 186 | 187 | function Y = slowCountSketch( DX, targetRows, m ) 188 | % slow version of count sketch 189 | Y = zeros(m, size(DX,2) ); 190 | for j = 1:size(DX,1) 191 | i = targetRows(j); 192 | Y(i,:) = Y(i,:) + DX(j,:); 193 | end 194 | end 195 | 196 | -------------------------------------------------------------------------------- /Demos/README.md: -------------------------------------------------------------------------------- 1 | # Demos 2 | 3 | Below are the demos from Spring 2019, which we will use and modify Fall 2021. Most are Matlab, some are Python or Julia. TODO: create ipynb versions (with colab links) for the Python demos. (If a student wants to do this, please go ahead, and make a pull request) 4 | 5 | The hyperlinks below are to ipynb (jupyter) notebooks using [nbviewer](https://nbviewer.jupyter.org) since github's default markdown interpreter doesn't always work (it often works if you refresh the page a few times, but not always) 6 | 7 | - [Demo 1](https://nbviewer.jupyter.org/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo01_exactRankR.ipynb): a simple randomized SVD algorithm assuming exactly low-rank matrix, ([colab link for ipynb](https://colab.research.google.com/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo01_exactRankR.ipynb)) 8 | - [Demo 2](https://nbviewer.jupyter.org/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo02_sorts.ipynb): compare deterministic and randomized bubble and quicksorts, ([colab link for ipynb](https://colab.research.google.com/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo02_sorts.ipynb)) 9 | - Demo 3: compare different ways to compute the Frobenius norm of a matrix (in C) 10 | - Demo 4: same as demo 3 but for sparse matrices (in Matlab). Comparse row vs column access 11 | - Demo 5: speed/timing results for various Fast Johnson-Lindenstrauss Transforms 12 | - Demo 6: statistical leverage scores applied to least-squares regressoin 13 | - Demo 7: random matrix mulitplication via sub-sampling, following Drineas/Mahoney summer school notes 14 | - Demo 8: high accuracy l2 regression via either iterative Hessian sketch or preconditioning (BLENDENPIK/LSRN) 15 | - Demo 9: Randomized Kacmarz for solving consistent over-determined systems of equations 16 | - Demo 10: l1 regression and p-stable distributions (for p=1,2, i.e., Cauchy and Gaussian) 17 | - Demo 11: James-Stein estimator 18 | - Demo 12: Basic noiseless Compressed Sensing demo 19 | - Demo 13: Euclidean Distance Matrix (EDM) completion example, using nuclear norm minimization 20 | - Demo 14: Monte Carlo integration and improvements (quasi-Monte Carlo, control variates), and comparison with quadrature 21 | - Demo 15: Stochastic Gradient Descent (SGD) and improvements (SAGA, SVRG, Mini-batches, iterate averaging) 22 | - Demo 16: Locality Sensitive Hashing (LSH): MinHash, SimHash, Euclidean distance hash 23 | - Demo 17: LSH applied to k-Nearest-Neighbors (kNN) 24 | - Demo 18: CountMin sketch to efficiently find frequencies of data names (data from Soc Security administration) 25 | - Demo 19: AMS sketch vs Count sketch (median vs mean postprocessing) 26 | - Demo 20: Core sets for Kmeans using Kmeans++ as coarse approximation 27 | 28 | At some point I thought I had a distinction between "vignettes" and "demos" but I think that is gone now, and I've tried to rename them all to "demo" 29 | 30 | 31 | ### ipynb notebooks not rendering 32 | Do you ever get the error message "Sorry, something went wrong. Reload?" when clicking on a `ipynb` file? If so, try refreshing the page a few times. If that doesn't resolve it soon (sometimes it does, sometimes it doesn't), then you can try either 33 | 1. go to directly and "open" the file using the github interface 34 | 2. view the file by going to and pasting in the URL 35 | -------------------------------------------------------------------------------- /Demos/demo01_exactRankR.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "demo01_exactRankR.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyNs9oSNhJJJsBsL89hqpJF/", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "tpCtpbyG6Od3" 34 | }, 35 | "source": [ 36 | "# Demo #1\n", 37 | "\n", 38 | "APPM 5650 Randomized Algorithms, Fall 2021\n", 39 | "\n", 40 | "Stephen Becker (original MATLAB '19, jupyter version '21) & Jake Knigge (Python '19)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "metadata": { 46 | "id": "azcvL3hp6Ir9" 47 | }, 48 | "source": [ 49 | "import numpy as np\n", 50 | "from numpy.linalg import norm\n", 51 | "from scipy.sparse.linalg import LinearOperator, svds\n", 52 | "import scipy\n", 53 | "\n", 54 | "np.set_printoptions(precision = 4) # display only four digits\n", 55 | "rng = np.random.default_rng(12345)\n", 56 | "n = np.int(4e3); m = n # dimension of problem\n", 57 | "r = np.int(100) # rank of matrix\n", 58 | "\n", 59 | "Left = rng.standard_normal( size=(m,r))\n", 60 | "Right= rng.standard_normal( size=(r,n))\n", 61 | "A = Left@Right\n", 62 | "# Another case is that we *know* A has this structure, in which case we can exploit:\n", 63 | "A_operator = LinearOperator( (m,n), matvec = lambda x : Left@(Right@x), \n", 64 | " rmatvec = lambda y : Right.T@(Left.T@y) )\n", 65 | "\n", 66 | "def printError(U,s,Vh):\n", 67 | " S = np.reshape( s, (len(s),1) )\n", 68 | " A_estimate = U@(S*Vh)\n", 69 | " err = norm( A - A_estimate ) / norm( A )\n", 70 | " print(f'The error ||A-A_estimate||_F/||A||_F is {err:0.2e}')\n", 71 | " print(f'The largest and smallest (non-zero) singular values are {s[0]:0.4f} and {s[-1]:0.4f}')" 72 | ], 73 | "execution_count": 1, 74 | "outputs": [] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": { 79 | "id": "figfEc3m6raF" 80 | }, 81 | "source": [ 82 | "## Find SVD of $A$ with conventional methods" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": { 88 | "id": "jDN11ngv-a3B" 89 | }, 90 | "source": [ 91 | "Dense SVD" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "metadata": { 97 | "colab": { 98 | "base_uri": "https://localhost:8080/" 99 | }, 100 | "id": "32Z8kWeA6q4M", 101 | "outputId": "97da52da-58f6-4d6d-ff43-027acd13bec8" 102 | }, 103 | "source": [ 104 | "%time U, S, Vh = np.linalg.svd(A, full_matrices=False)\n", 105 | "\n", 106 | "printError(U,S,Vh)" 107 | ], 108 | "execution_count": 2, 109 | "outputs": [ 110 | { 111 | "output_type": "stream", 112 | "text": [ 113 | "CPU times: user 1min 31s, sys: 3.36 s, total: 1min 34s\n", 114 | "Wall time: 48.8 s\n", 115 | "The error ||A-A_estimate||_F/||A||_F is 2.60e-15\n", 116 | "The largest and smallest (non-zero) singular values are 4854.7887 and 0.0000\n" 117 | ], 118 | "name": "stdout" 119 | } 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": { 125 | "id": "2JU63wKz-fG-" 126 | }, 127 | "source": [ 128 | "Krylov subspace method (usually best for sparse matrices or some kind of structure)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "metadata": { 134 | "colab": { 135 | "base_uri": "https://localhost:8080/" 136 | }, 137 | "id": "HCBTmURM_Dav", 138 | "outputId": "57e07f79-60db-4f44-ef14-aa3a9ba0b016" 139 | }, 140 | "source": [ 141 | "%time U, S, Vh = scipy.sparse.linalg.svds( A, k=r)\n", 142 | "\n", 143 | "printError(U,S,Vh)" 144 | ], 145 | "execution_count": 4, 146 | "outputs": [ 147 | { 148 | "output_type": "stream", 149 | "text": [ 150 | "CPU times: user 7.61 s, sys: 4.85 s, total: 12.5 s\n", 151 | "Wall time: 6.49 s\n", 152 | "The error ||A-A_estimate||_F/||A||_F is 9.28e-16\n", 153 | "The largest and smallest (non-zero) singular values are 3159.3836 and 4854.7887\n" 154 | ], 155 | "name": "stdout" 156 | } 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": { 162 | "id": "JObhNvxwAX0v" 163 | }, 164 | "source": [ 165 | "... and **if we knew the structure of $A$** :\n", 166 | "(careful: for `svds` the documentation says \"The order of the singular values is not guaranteed.\")" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "metadata": { 172 | "colab": { 173 | "base_uri": "https://localhost:8080/" 174 | }, 175 | "id": "XQq_XTcmASK_", 176 | "outputId": "d658d6c6-3308-4b18-d23a-d6d1ebf75ce5" 177 | }, 178 | "source": [ 179 | "%time U, S, Vh = scipy.sparse.linalg.svds( A_operator, k=r)\n", 180 | "\n", 181 | "printError(U,S,Vh)" 182 | ], 183 | "execution_count": 5, 184 | "outputs": [ 185 | { 186 | "output_type": "stream", 187 | "text": [ 188 | "CPU times: user 879 ms, sys: 526 ms, total: 1.4 s\n", 189 | "Wall time: 759 ms\n", 190 | "The error ||A-A_estimate||_F/||A||_F is 1.29e-15\n", 191 | "The largest and smallest (non-zero) singular values are 3159.3836 and 4854.7887\n" 192 | ], 193 | "name": "stdout" 194 | } 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": { 200 | "id": "8oQdOtlD6ysQ" 201 | }, 202 | "source": [ 203 | "## Find SVD of $A$ with randomized method" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": { 209 | "id": "x0mHeQc0AeTm" 210 | }, 211 | "source": [ 212 | "(no knowledge of the structure of $A$ required, other than knowing a good value for $r$)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "metadata": { 218 | "colab": { 219 | "base_uri": "https://localhost:8080/" 220 | }, 221 | "id": "e0osJVd560y9", 222 | "outputId": "12aa4286-8014-4ba0-e8c8-2fbb008b85f3" 223 | }, 224 | "source": [ 225 | "%%time\n", 226 | "Omega = rng.standard_normal(size=(n, r))\n", 227 | "Y = A@Omega # matrix multiply\n", 228 | "Q, R = np.linalg.qr(Y, mode='reduced')\n", 229 | "QtA = Q.T@A\n", 230 | "# A = Q@QtA, which is a low-rank factorization. If we also want\n", 231 | "# the SVD of A, then continue a little bit more:\n", 232 | "U_temp, S, Vh = np.linalg.svd(QtA, full_matrices=False)\n", 233 | "U = Q@U_temp" 234 | ], 235 | "execution_count": 12, 236 | "outputs": [ 237 | { 238 | "output_type": "stream", 239 | "text": [ 240 | "CPU times: user 707 ms, sys: 72.9 ms, total: 780 ms\n", 241 | "Wall time: 439 ms\n" 242 | ], 243 | "name": "stdout" 244 | } 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "metadata": { 250 | "colab": { 251 | "base_uri": "https://localhost:8080/" 252 | }, 253 | "id": "9k6tub2965rj", 254 | "outputId": "46d59c6b-9d19-44da-ade7-3545d7c316f9" 255 | }, 256 | "source": [ 257 | "printError( U, S, Vh )" 258 | ], 259 | "execution_count": 8, 260 | "outputs": [ 261 | { 262 | "output_type": "stream", 263 | "text": [ 264 | "The error ||A-A_estimate||_F/||A||_F is 8.11e-15\n", 265 | "The largest and smallest (non-zero) singular values are 4854.7887 and 3159.3836\n" 266 | ], 267 | "name": "stdout" 268 | } 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": { 274 | "id": "e-cUh8Wth-fa" 275 | }, 276 | "source": [ 277 | "By the way, if we do know the structure of $A$, we can also exploit that in the randomized method and get something a bit faster:" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "metadata": { 283 | "colab": { 284 | "base_uri": "https://localhost:8080/" 285 | }, 286 | "id": "Ac3SkJtN7OZp", 287 | "outputId": "55b0dc9f-f728-4611-9ba5-ce4131e2ffd4" 288 | }, 289 | "source": [ 290 | "%%time\n", 291 | "Omega = rng.standard_normal(size=(n, r))\n", 292 | "Y = A_operator@Omega # matrix multiply\n", 293 | "Q, R = np.linalg.qr(Y, mode='reduced')\n", 294 | "QtA = (A_operator.T@Q).T\n", 295 | "# A = Q@QtA, which is a low-rank factorization. If we also want\n", 296 | "# the SVD of A, then continue a little bit more:\n", 297 | "U_temp, S, Vh = np.linalg.svd(QtA, full_matrices=False)\n", 298 | "U = Q@U_temp" 299 | ], 300 | "execution_count": 11, 301 | "outputs": [ 302 | { 303 | "output_type": "stream", 304 | "text": [ 305 | "CPU times: user 353 ms, sys: 81.6 ms, total: 435 ms\n", 306 | "Wall time: 259 ms\n" 307 | ], 308 | "name": "stdout" 309 | } 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "metadata": { 315 | "colab": { 316 | "base_uri": "https://localhost:8080/" 317 | }, 318 | "id": "AgmSrSrfhwJO", 319 | "outputId": "4cb9766b-e4af-4cc1-a47a-191f571f5e7a" 320 | }, 321 | "source": [ 322 | "printError( U, S, Vh )" 323 | ], 324 | "execution_count": 10, 325 | "outputs": [ 326 | { 327 | "output_type": "stream", 328 | "text": [ 329 | "The error ||A-A_estimate||_F/||A||_F is 1.00e-14\n", 330 | "The largest and smallest (non-zero) singular values are 4854.7887 and 3159.3836\n" 331 | ], 332 | "name": "stdout" 333 | } 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "metadata": { 339 | "id": "PaxlQvAWh9Qv" 340 | }, 341 | "source": [ 342 | "" 343 | ], 344 | "execution_count": null, 345 | "outputs": [] 346 | } 347 | ] 348 | } -------------------------------------------------------------------------------- /Demos/demo01_exactRankR.m: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | Vignette #1 4 | APPM 4720/5720 Randomized Algorithms, Spring 2019 5 | Stephen Becker 6 | 7 | This is not a practical algorithm, since it only works 8 | if the matrix is *exactly* low rank 9 | 10 | %} 11 | 12 | rng(0); % seed the random number generator so this is reproducible 13 | 14 | % -- Generate a low-rank matrix (m x n, with rank r ) 15 | n = 4e3; 16 | m = n; 17 | r = 100; % rank 18 | 19 | A = randn(m,r)*randn(r,n); 20 | 21 | %% -- Find its SVD with conventional methods 22 | tic 23 | [U,Sigma,V] = svd(A,'econ'); 24 | toc 25 | % 35 seconds 26 | % semilogy( diag(Sigma), 'o-' ) 27 | % Matlab doesn't understand that the matrix is not full rank 28 | 29 | clear U Sigma V 30 | %% -- Find its SVD with a randomized method 31 | tt = tic; 32 | tic; Omega = randn(n,r); toc 33 | tic; Y = A*Omega; toc 34 | tic; [Q,R] = qr(Y,0); toc 35 | tic; QtA = Q'*A; toc 36 | tm = toc(tt); 37 | 38 | A_estimate = Q*QtA; 39 | err = norm( A - A_estimate, 'fro' )/norm(A,'fro'); 40 | fprintf('||A-A_estimate||_F/||A||_F is %g\n', err ); 41 | fprintf('Overall time: %g seconds\n', tm ); -------------------------------------------------------------------------------- /Demos/demo01_exactRankR.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------- # 2 | # -------------------------------------------------------------------------------------- # 3 | # Vignette #1 4 | # APPM 4720/5720 Randomized Algorithms, Spring 2019 5 | # Stephen Becker (original MATLAB) & Jake Knigge (Python) 6 | # This is not a practical algorithm, since it only works if the matrix is *exactly* low 7 | # rank. 8 | # -------------------------------------------------------------------------------------- # 9 | import numpy as np # import NumPy package 10 | import time as time # import time package 11 | # -------------------------------------------------------------------------------------- # 12 | np.set_printoptions(precision = 4) # display only four digits 13 | np.random.seed(seed = 2) # set seed for reproducibility 14 | n = np.int(4e3); m = n # dimension of problem 15 | r = np.int(100) # rank of matrix 16 | mu, sigma = 0, 1 # mean and standard deviation 17 | zz = np.random.normal(mu, sigma, n*r) # generate (normal) random numbers 18 | Z = zz.reshape(n,r) # reshape to matrix 19 | A = np.matmul(Z, Z.T) # compute outerproduct matrix 20 | # -------------------------------------------------------------------------------------- # 21 | # Find its SVD with conventional methods 22 | t = time.time() # time SVD calculation 23 | U, S, Vh = np.linalg.svd(A); V = Vh.T # compute SVD of A and transpose V 24 | elapsed = time.time() - t 25 | print('The full SVD took', round(elapsed, 4), 'seconds.') 26 | # -------------------------------------------------------------------------------------- # 27 | # Find its SVD with a randomized method 28 | tt = time.time() 29 | t = time.time(); Omega = np.random.normal(mu, sigma, (n, r)); print(round(time.time() - t, 4), 'seconds') 30 | t = time.time(); Y = np.matmul(A, Omega); print(round(time.time() - t, 4), 'seconds') 31 | t = time.time(); Q, R = np.linalg.qr(Y, mode='reduced'); print(round(time.time() - t, 4), 'seconds') 32 | t = time.time(); QtA = np.matmul(Q.T, A); print(round(time.time() - t, 4), 'seconds') 33 | tm = time.time() - tt 34 | print('The approximate SVD took', round(tm, 4), 'seconds.') 35 | # -------------------------------------------------------------------------------------- # 36 | A_estimate = np.matmul(Q, QtA) 37 | err = np.linalg.norm( np.ravel(A) - np.ravel(A_estimate) ) / np.linalg.norm( np.ravel(A) ) 38 | print('The error ||A-A_estimate||_F/||A||_F is ', '{:0.4e}'.format(err), '.', sep = '') 39 | # -------------------------------------------------------------------------------------- # 40 | # -------------------------------------------------------------------------------------- # 41 | -------------------------------------------------------------------------------- /Demos/demo02_sorts.m: -------------------------------------------------------------------------------- 1 | function vignette02 2 | 3 | %{ 4 | Demo to show the effect of randomized perturbations on the speed 5 | of sorting algorithms 6 | 7 | Stephen Becker 8 | %} 9 | 10 | 11 | rng(0); % make it reproducible 12 | 13 | n = 100; 14 | % x = randn(n,1); 15 | % x = (1:n)'; % good for bubble_sort, bad for quick_sort 16 | x = (n:-1:1)'; % bad for bubble_sort, bad for quick_sort 17 | 18 | 19 | % == Gather systematic data == 20 | 21 | nList = round( logspace( 1, 3, 10) ); 22 | N = length( nList ); 23 | nReps = 10; 24 | 25 | [bubble, quick] = deal( zeros(N,1) ); 26 | [bubbleRandom, quickRandom] = deal( zeros(N,nReps) ); 27 | for ni = 1:N 28 | n = nList(ni); 29 | fprintf('%d of %d trials\n', ni, N ); 30 | 31 | x = (n:-1:1)'; 32 | 33 | a_less_than_b(); % zero out the conter 34 | y = bubble_sort(x); 35 | bubble(ni) = a_less_than_b(); % number of comparisons 36 | 37 | % issorted( y ) % <-- if you don't trust the implementation, check it! 38 | 39 | a_less_than_b(); % zero out the conter 40 | y = quick_sort(x); 41 | quick(ni) = a_less_than_b(); % number of comparisons 42 | 43 | for r = 1:nReps 44 | x = x(randperm(n)); 45 | 46 | a_less_than_b(); % zero out the conter 47 | y = bubble_sort(x); 48 | bubbleRandom(ni,r) = a_less_than_b(); % number of comparisons 49 | 50 | a_less_than_b(); % zero out the conter 51 | y = quick_sort(x); 52 | quickRandom(ni,r) = a_less_than_b(); % number of comparisons 53 | 54 | end 55 | 56 | end 57 | % == Plot == 58 | figure(1); clf; 59 | loglog( nList, bubble, 'o-','linewidth',2,'markersize',8); 60 | hold all 61 | loglog( nList, quick, 's:','linewidth',2,'markersize',8); 62 | loglog( nList, mean(bubbleRandom,2), '*:','linewidth',2,'markersize',8); 63 | loglog( nList, mean(quickRandom,2), 'd:','linewidth',2,'markersize',8); 64 | loglog( nList, nList.^2/2, '--' ) 65 | loglog( nList, nList.*log2(nList), '.-' ) 66 | set(gca,'fontsize',18); 67 | lh=legend('Bubble Sort','Quicksort','Bubble (randomized)',... 68 | 'Quick (randomized','n^2/2','n log(n)','location','northwest'); 69 | xlabel('Length of list'); 70 | ylabel('Number of comparisons'); 71 | grid on 72 | 73 | end % end of main function 74 | 75 | %%%%% 76 | % SUBROUTINES 77 | %%%%% 78 | 79 | function x = bubble_sort( x ) 80 | % x = bubble_sort(x) 81 | % sorts x in increasing order 82 | 83 | n = length(x); 84 | 85 | for iteration = 1:n-1 86 | 87 | endIndex = n - iteration; 88 | 89 | alreadySorted = true; 90 | 91 | for i = 1:endIndex 92 | 93 | if a_less_than_b( x(i+1), x(i) ) 94 | % swap them: 95 | tmp = x(i+1); 96 | x(i+1) = x(i); 97 | x(i) = tmp; 98 | alreadySorted = false; 99 | end 100 | 101 | end 102 | if alreadySorted 103 | break; % early return 104 | end 105 | end 106 | end 107 | 108 | 109 | function x = quick_sort( x ) 110 | % x = quick_sort(x) 111 | % sorts x in increasing order 112 | 113 | n = length(x); 114 | 115 | % check for an early return (e.g., the base case in recursion) 116 | % (a real implementation would have a larger base case) 117 | if n <= 1 118 | return; 119 | end 120 | 121 | % Pick a pivot: 122 | pivot = x(n); % the last element 123 | % Note: quick sort is never implemented this way, since 124 | % it uses tricks so that it doesn't have to use extra memory. 125 | smallerList = []; 126 | largerList = []; 127 | for i = 1:(n-1) 128 | value = x(i); 129 | if a_less_than_b( value, pivot ) 130 | smallerList(end+1) = value; 131 | else 132 | largerList(end+1) = value; 133 | end 134 | end 135 | 136 | % Now, recurse 137 | smallerList = quick_sort( smallerList ); 138 | largerList = quick_sort( largerList ); 139 | 140 | % and combine 141 | x = [smallerList; pivot; largerList]; 142 | end 143 | 144 | 145 | function y = a_less_than_b( a, b ) 146 | % y = a_less_than_b( a, b ) 147 | % returns "true" if a < b 148 | % and "false: if a >= b 149 | 150 | persistent counter 151 | if nargin == 0 152 | y = counter; 153 | counter = []; 154 | return; 155 | end 156 | if isempty( counter ), counter = 0; end 157 | 158 | % main code: 159 | 160 | y = (a < b ); 161 | counter = counter + 1; 162 | end -------------------------------------------------------------------------------- /Demos/demo02_sorts.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------- # 2 | # -------------------------------------------------------------------------------------- # 3 | # Vignette #2 4 | # APPM 4720/5720 Randomized Algorithms, Spring 2019 5 | # Stephen Becker (original MATLAB) & Jake Knigge (Python) 6 | # Demo to show the effect of randomized perturbations on the speed of sorting algorithms. 7 | # -------------------------------------------------------------------------------------- # 8 | import numpy as np # import numpy package 9 | import time as time # import time package 10 | import matplotlib.pyplot as plt # import matplotlib package 11 | # -------------------------------------------------------------------------------------- # 12 | def vignette02(): 13 | np.random.seed(seed = 2) # set seed for reproducibility 14 | nList = np.round(np.logspace(1, 2.75, num = 10)); 15 | N = len(nList); 16 | nReps = 5 17 | bubble = np.zeros((N,1)); quick = np.zeros((N,1)); 18 | bubbleRandom = np.zeros((N,nReps)); quickRandom = np.zeros((N,nReps)); 19 | for ni in range(N): 20 | n = nList[ni] 21 | print(ni+1, 'of', N, 'trials') 22 | x = np.linspace(n,1,n); 23 | a_less_than_b.counter = 0 24 | y = bubble_sort(x); 25 | bubble[ni] = a_less_than_b.counter; 26 | a_less_than_b.counter = 0 27 | x = np.linspace(1,n,n); 28 | x = np.linspace(n,1,n); 29 | y = quick_sort(x); 30 | quick[ni] = a_less_than_b.counter; 31 | for r in range(nReps): 32 | x = x[np.random.permutation(np.int(n))]; 33 | a_less_than_b.counter = 0; 34 | y = bubble_sort(x); 35 | bubbleRandom[ni, r] = a_less_than_b.counter; 36 | x = x[np.random.permutation(np.int(n))]; 37 | a_less_than_b.counter = 0; 38 | y = quick_sort(x); 39 | quickRandom[ni, r] = a_less_than_b.counter; 40 | fig, ax = plt.subplots() 41 | line1, = ax.loglog(nList, bubble, label='bubble', marker='o') 42 | line2, = ax.loglog(nList, quick, label='quick' , marker='s') 43 | line3, = ax.loglog(nList, np.mean(quickRandom,1), label='quick random', marker=".") 44 | line4, = ax.loglog(nList, np.mean(bubbleRandom,1), label='bubble random', marker="*") 45 | line5, = ax.loglog(nList, np.log(nList)*nList, label='n log(n)', marker="x") 46 | line6, = ax.loglog(nList, nList**2 / 2, label='n^2/2', marker=',') 47 | ax.legend(loc='upper left') 48 | ax.grid(True) 49 | ax.set_xlabel('list length') 50 | ax.set_ylabel('number of comparisons') 51 | plt.title('comparison test: randomized vs. ordered lists') 52 | plt.show() 53 | 54 | # -------------------------------------------------------------------------------------- # 55 | # subroutines 56 | # -------------------------------------------------------------------------------------- # 57 | def bubble_sort(x): # sorts x in increasing order 58 | n = len(x) 59 | for iteration in range(n-1, 0, -1): 60 | alreadySorted = True 61 | for j in range(iteration): 62 | if a_less_than_b( x[j+1], x[j] ) == True: # swap them 63 | tmp = x[j+1] 64 | x[j+1] = x[j] 65 | x[j] = tmp 66 | alreadySorted = False 67 | if alreadySorted == True: 68 | break 69 | return x 70 | 71 | # -------------------------------------------------------------------------------------- # 72 | def quick_sort(x): # sorts x in increasing order 73 | # quick_sort and supporting code follows example from... 74 | # http://interactivepython.org/runestone/static/pythonds/SortSearch/TheQuickSort.html 75 | quick_sort_r(x,0,len(x)-1) 76 | return x 77 | 78 | # -------------------------------------------------------------------------------------- # 79 | def quick_sort_r(x, first, last): # recursive workhorse for quick_sort 80 | if first < last: 81 | split = partition(x, first, last) 82 | quick_sort_r(x, first, split-1) 83 | quick_sort_r(x, split+1, last) 84 | 85 | # -------------------------------------------------------------------------------------- # 86 | def partition(x,first,last): # find the split point and move other items 87 | pivotvalue = x[first] 88 | left = first + 1 89 | right = last 90 | done = False 91 | while not done: 92 | while left <= right and x[left] <= pivotvalue: 93 | a_less_than_b.counter = a_less_than_b.counter + 1 94 | left = left + 1 95 | while x[right] >= pivotvalue and right >= left: 96 | a_less_than_b.counter = a_less_than_b.counter + 1 97 | right = right - 1 98 | if right < left: 99 | done = True 100 | else: 101 | temp = x[left] 102 | x[left] = x[right] 103 | x[right] = temp 104 | temp = x[first] 105 | x[first] = x[right] 106 | x[right] = temp 107 | return right 108 | 109 | # -------------------------------------------------------------------------------------- # 110 | def a_less_than_b(a, b): 111 | y = (a < b) 112 | a_less_than_b.counter += 1 # counter property used to track function calls 113 | return y 114 | 115 | # -------------------------------------------------------------------------------------- # 116 | vignette02() 117 | -------------------------------------------------------------------------------- /Demos/demo03_FrobeniusNorm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "blas.h" 5 | 6 | /* Stephen Becker, Jan 15 2018 7 | * Make sure to allocate the array A on the heap, 8 | * not on the stack! 9 | * e.g., double A[1000*1000]; is not a good idea 10 | * 11 | * Usage: 12 | * gcc demo03_FrobeniusNorm.c -O3 13 | * ./a.out n # uses a n x n matrix 14 | * ./a.out n 1 # if a 2nd argument is positive, use row-based version 15 | * gcc demo03_FrobeniusNorm.c -O3 -I/Applications/MATLAB_R2017b.app/extern/include -lblas 16 | * ./a.out n 1 1 # if a 3rd argument is positve, use BLAS for rows/columns 17 | * ... if this 3rd argument is negative, use BLAS and vectorize 18 | * 19 | * Results for 10,000 x 10,000: 20 | * NO BLAS: 21 | * 2.8 s Looping over rows, inner loop over columns 22 | * .75 s Looping over columns, inner loop over rows 23 | * WITH BLAS: 24 | * 2.5 s Looping over rows, inner loop over columns 25 | * .52 Looping over columns, inner loop over rows 26 | * */ 27 | 28 | /* See dnrm2( ptrdiff_t *N, double *X, ptrdiff_t *INCX) */ 29 | 30 | int main(int argc, char *argv[]) { 31 | 32 | ptrdiff_t m, n, length; 33 | ptrdiff_t INCX; 34 | int i, j; /* counters */ 35 | int ROW_BASED, USE_BLAS, VECTORIZE = 0; /* boolean flags */ 36 | double *A; 37 | double s=0.; /* sum */ 38 | double t = 0.; 39 | 40 | 41 | m = 10; 42 | if (argc > 1) 43 | m = atoi( argv[1] ); 44 | n = m; 45 | 46 | 47 | /* We're storing it in column-major format */ 48 | /* The problem is, this code can be slow itself, 49 | * so for speed runs, set it all to zero and hope 50 | * compiler doesn't try to be too clever */ 51 | /* 52 | A = malloc( m * n * sizeof( double ) ); 53 | for (i=0; i 2 ) && (atoi(argv[2])>0) ) 64 | ROW_BASED = 1; 65 | else 66 | ROW_BASED = 0; 67 | if ( (argc > 3 ) && (atoi(argv[3])>0) ) 68 | USE_BLAS = 1; 69 | else if ( (argc > 3 ) && (atoi(argv[3])<0) ) { 70 | USE_BLAS = 1; 71 | VECTORIZE = 1; 72 | } 73 | else 74 | USE_BLAS = 0; 75 | if (USE_BLAS) 76 | printf("Using BLAS\n"); 77 | if (VECTORIZE){ 78 | printf("Vectorizing (this is the *proper* way, no 'for' loops)\n"); 79 | INCX = 1; 80 | length = m * n; 81 | s = dnrm2( &length, A, &INCX ); 82 | 83 | free( A ); 84 | printf("... sum of squared entries is %e\n", s ); 85 | 86 | return 0; 87 | 88 | 89 | } 90 | 91 | if (ROW_BASED == 1 ){ 92 | printf("Looping over columns, inner loop over rows\n"); 93 | if (USE_BLAS) { 94 | INCX = 1; 95 | for (j=0; j\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "RojslPeDFSWI" 34 | }, 35 | "source": [ 36 | "# Demo 3: calculating the Frobenius norm, looping over rows vs columns\n", 37 | "\n", 38 | "Demonstrates effect of stride length, and row- or column-based storage\n", 39 | "\n", 40 | "See also the `c` language demo\n", 41 | "\n", 42 | "Stephen Becker, Aug 2021, APPM 5650 Randomized Algorithms, University of Colorado Boulder" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "metadata": { 48 | "id": "NfqPCp2PBtKj" 49 | }, 50 | "source": [ 51 | "import numpy as np\n", 52 | "rng = np.random.default_rng(12345)" 53 | ], 54 | "execution_count": 1, 55 | "outputs": [] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "metadata": { 60 | "id": "y37x6bdACOqq" 61 | }, 62 | "source": [ 63 | "def FrobeniusNormByRow(A, use_blas = True):\n", 64 | " \"\"\" Outer loop over rows (inner loop over columns) \"\"\"\n", 65 | " m,n = A.shape\n", 66 | " nrm = 0.\n", 67 | " if use_blas:\n", 68 | " for row in range(m):\n", 69 | " nrm += np.linalg.norm( A[row,:] )**2 # this is Euclidean norm, not Frobenius\n", 70 | " else:\n", 71 | " for row in range(m):\n", 72 | " for col in range(n):\n", 73 | " nrm += A[row,col]**2\n", 74 | " return np.sqrt(nrm)\n", 75 | "\n", 76 | "def FrobeniusNormByColumn(A, use_blas = True):\n", 77 | " \"\"\" Outer loop over columns (inner loop over rows) \"\"\"\n", 78 | " m,n = A.shape\n", 79 | " nrm = 0.\n", 80 | " if use_blas:\n", 81 | " for col in range(n):\n", 82 | " nrm += np.linalg.norm( A[:,col] )**2 # this is Euclidean norm, not Frobenius\n", 83 | " else:\n", 84 | " for col in range(n):\n", 85 | " for row in range(m):\n", 86 | " nrm += A[row,col]**2\n", 87 | " return np.sqrt(nrm)" 88 | ], 89 | "execution_count": 24, 90 | "outputs": [] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": { 95 | "id": "oWBT6c9rEkGd" 96 | }, 97 | "source": [ 98 | "#### Run some experiments" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "metadata": { 104 | "colab": { 105 | "base_uri": "https://localhost:8080/" 106 | }, 107 | "id": "lPX-0wH6CB__", 108 | "outputId": "4cc5db7b-0d15-4c6d-96ac-a4ef8bc830d7" 109 | }, 110 | "source": [ 111 | "n = int(1e4)\n", 112 | "A = rng.standard_normal( size=(n,n) )\n", 113 | "\n", 114 | "%time nrm = np.linalg.norm(A)\n", 115 | "print(f'The true norm is {nrm-1e4:.6f} + 1e4')" 116 | ], 117 | "execution_count": 26, 118 | "outputs": [ 119 | { 120 | "output_type": "stream", 121 | "text": [ 122 | "CPU times: user 121 ms, sys: 1.02 ms, total: 122 ms\n", 123 | "Wall time: 64.3 ms\n", 124 | "The true norm is -1.311721 + 1e4\n" 125 | ], 126 | "name": "stdout" 127 | } 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "metadata": { 133 | "colab": { 134 | "base_uri": "https://localhost:8080/" 135 | }, 136 | "id": "oAfXR-rSClJ8", 137 | "outputId": "5d0c821f-1b74-41ed-af9c-e09260517f95" 138 | }, 139 | "source": [ 140 | "%time nrmRow = FrobeniusNormByRow(A, use_blas = True)\n", 141 | "print(f'Looping over rows, the discrepancy in the norm is {nrmRow-nrm:.8f}')" 142 | ], 143 | "execution_count": 22, 144 | "outputs": [ 145 | { 146 | "output_type": "stream", 147 | "text": [ 148 | "CPU times: user 153 ms, sys: 0 ns, total: 153 ms\n", 149 | "Wall time: 154 ms\n", 150 | "Looping over rows, the discrepancy in the norm is -0.00000000\n" 151 | ], 152 | "name": "stdout" 153 | } 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "metadata": { 159 | "colab": { 160 | "base_uri": "https://localhost:8080/" 161 | }, 162 | "id": "8rzsXn1FCn4L", 163 | "outputId": "69145613-f5c9-4146-db75-c4de1aa3982d" 164 | }, 165 | "source": [ 166 | "%time nrmRow = FrobeniusNormByColumn(A, use_blas = True)\n", 167 | "print(f'Looping over columns, the discrepancy in the norm is {nrmRow-nrm:.8f}')" 168 | ], 169 | "execution_count": 25, 170 | "outputs": [ 171 | { 172 | "output_type": "stream", 173 | "text": [ 174 | "CPU times: user 615 ms, sys: 2.93 ms, total: 618 ms\n", 175 | "Wall time: 628 ms\n", 176 | "Looping over columns, the discrepancy in the norm is -0.00000000\n" 177 | ], 178 | "name": "stdout" 179 | } 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": { 185 | "id": "aOCZbKZHEVkW" 186 | }, 187 | "source": [ 188 | "### Repeat the experiment without using BLAS\n", 189 | "Let's make the matrix smaller so we don't have to wait so long\n", 190 | "\n", 191 | "Here there is less difference, because there's already a lot of overhead just due to the `for` loop (since Python isn't compiled)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "metadata": { 197 | "colab": { 198 | "base_uri": "https://localhost:8080/" 199 | }, 200 | "id": "vN2AitEPDYaT", 201 | "outputId": "b7a5333f-194b-451f-a4c3-c0d65826f59f" 202 | }, 203 | "source": [ 204 | "n = int(4e3)\n", 205 | "A = rng.standard_normal( size=(n,n) )\n", 206 | "\n", 207 | "%time nrm = np.linalg.norm(A)\n", 208 | "print(f'The true norm is {nrm-n:.6f} + ', n)" 209 | ], 210 | "execution_count": 31, 211 | "outputs": [ 212 | { 213 | "output_type": "stream", 214 | "text": [ 215 | "CPU times: user 18.9 ms, sys: 1.03 ms, total: 20 ms\n", 216 | "Wall time: 10.4 ms\n", 217 | "The true norm is -0.319010 + 4000\n" 218 | ], 219 | "name": "stdout" 220 | } 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "metadata": { 226 | "colab": { 227 | "base_uri": "https://localhost:8080/" 228 | }, 229 | "id": "-jWTJsKIEwaE", 230 | "outputId": "f46dc6a2-f676-40b2-de80-e880ea85f0ae" 231 | }, 232 | "source": [ 233 | "%time nrmRow = FrobeniusNormByRow(A, use_blas = True)\n", 234 | "print(f'Looping over rows, the discrepancy in the norm is {nrmRow-nrm:.8f}')\n", 235 | "\n", 236 | "%time nrmRow = FrobeniusNormByRow(A, use_blas = False)\n", 237 | "print(f'Looping over rows (no BLAS), the discrepancy in the norm is {nrmRow-nrm:.8f}')" 238 | ], 239 | "execution_count": 32, 240 | "outputs": [ 241 | { 242 | "output_type": "stream", 243 | "text": [ 244 | "CPU times: user 44.9 ms, sys: 3.03 ms, total: 47.9 ms\n", 245 | "Wall time: 51.7 ms\n", 246 | "Looping over rows, the discrepancy in the norm is 0.00000000\n", 247 | "CPU times: user 10.4 s, sys: 20.1 ms, total: 10.5 s\n", 248 | "Wall time: 10.5 s\n", 249 | "Looping over rows (no BLAS), the discrepancy in the norm is 0.00000000\n" 250 | ], 251 | "name": "stdout" 252 | } 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "metadata": { 258 | "colab": { 259 | "base_uri": "https://localhost:8080/" 260 | }, 261 | "id": "ryB36awVE0N8", 262 | "outputId": "10e74b6e-36dd-465e-f03c-fd25e0d3a9b9" 263 | }, 264 | "source": [ 265 | "%time nrmRow = FrobeniusNormByColumn(A, use_blas = True)\n", 266 | "print(f'Looping over columns, the discrepancy in the norm is {nrmRow-nrm:.8f}')\n", 267 | "\n", 268 | "%time nrmRow = FrobeniusNormByColumn(A, use_blas = False)\n", 269 | "print(f'Looping over columns (no BLAS), the discrepancy in the norm is {nrmRow-nrm:.8f}')" 270 | ], 271 | "execution_count": 33, 272 | "outputs": [ 273 | { 274 | "output_type": "stream", 275 | "text": [ 276 | "CPU times: user 107 ms, sys: 2 ms, total: 109 ms\n", 277 | "Wall time: 113 ms\n", 278 | "Looping over columns, the discrepancy in the norm is 0.00000000\n", 279 | "CPU times: user 10.6 s, sys: 18.7 ms, total: 10.6 s\n", 280 | "Wall time: 10.6 s\n", 281 | "Looping over columns (no BLAS), the discrepancy in the norm is -0.00000000\n" 282 | ], 283 | "name": "stdout" 284 | } 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "metadata": { 290 | "id": "1_qbYnswE4Vu" 291 | }, 292 | "source": [ 293 | "" 294 | ], 295 | "execution_count": null, 296 | "outputs": [] 297 | } 298 | ] 299 | } -------------------------------------------------------------------------------- /Demos/demo04_FrobeniusNorm_sparse.m: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | Accessing rows vs accessing columns of a sparse matrix 4 | 5 | We saw in demo03 that for a dense matrix, in one particular 6 | code and size of matrix, there was about a factor of 3 or 4x 7 | improvement if we looped over columns vs rows 8 | 9 | What about for sparse matrices? 10 | 11 | %} 12 | 13 | %% Generate a sparse matrix with random entries, 20% sparse 14 | A = sprandn(1e4,1e4,0.2); % this is 10,000 x 10,000 15 | 16 | %% 17 | disp('Compute Frobenius norm by looping over columns'); 18 | s=0; 19 | tic 20 | for i=1:size(A,2) 21 | s = s + norm( A(:,i) )^2; 22 | end 23 | t1=toc; 24 | fprintf('Frobenius norm is %e, took %f seconds\n\n', s^2, t1 ); 25 | 26 | %% 27 | disp('Compute Frobenius norm by looping over rows') 28 | s=0; 29 | tic 30 | for i=1:size(A,1) 31 | s = s + norm( A(i,:) )^2; 32 | end 33 | t2 = toc; 34 | fprintf('Frobenius norm is %e, took %f seconds\n', s^2, t2 ); 35 | 36 | fprintf('Access via column vs row is %.1fx faster\n', t2/t1 ); 37 | 38 | %% or, if you really want to loop over rows, do so on the transpos 39 | % i.e., first transpose, then loop over columns. 40 | disp('Compute Frobenius norm by looping over columns of transpose'); 41 | s=0; 42 | tic 43 | At = A'; % include the time to transpose 44 | for i=1:size(A,2) 45 | s = s + norm( At(:,i) )^2; 46 | end 47 | t1=toc; 48 | fprintf('Frobenius norm is %e, took %f seconds\n\n', s^2, t1 ); 49 | 50 | 51 | 52 | %% Extra: can you tell what's the difference? 53 | % Why do we get different values? Why is one slower? 54 | X = randn(5e3); 55 | tic; s1=norm(X(:)); toc 56 | tic; s2=norm(X); toc -------------------------------------------------------------------------------- /Demos/demo05_FastJL_speedTest.jl: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- # 2 | #= 3 | Testing computational speed of various Fast JL transforms 4 | Feb 11 2019 5 | Y = S*X, X is M x N and S is m x M 6 | 7 | Code by: Stephen Becker 8 | Julia modifications: Jake Knigge 9 | =# 10 | # ---------------------------------------------------------------------------- # 11 | # load packages 12 | using Random, LinearAlgebra, Plots, Statistics, Hadamard, FFTW, SparseArrays 13 | # ---------------------------------------------------------------------------- # 14 | function Hadamard_teaching_code(x) 15 | # Hadamard_teaching_code(x) 16 | # applies the Hadamard transform to x. 17 | # If x has more than one column, the transform is applied 18 | # to each column. 19 | # This code is not fast, but it shows you how to exploit 20 | # the structure of the transform. 21 | # Note: this code does not do any sub-sampling. 22 | # Reference: https://en.wikipedia.org/wiki/Fast_Walsh–Hadamard_transform 23 | m = size(x,1); n = size(x,2); 24 | if nextpow(2,m) != m 25 | print("ERROR: The number of rows of x must be a power of 2.") 26 | return 27 | end 28 | y = copy(x) 29 | for bit in 1:log2(m) 30 | k = Int64(2^bit); # e.g., 2, 4, ..., m 31 | k2 = Int64(2^(bit-1)); # e.g., 1, 2, ..., m/2 32 | y = reshape(y, k, :, n); 33 | tmp = y[1:k2,:,:]; 34 | y[1:k2,:,:] = y[1:k2,:,:] + y[k2+1:k,:,:]; 35 | y[(k2+1):k,:,:] = tmp - y[k2+1:k,:,:]; 36 | y = reshape(y, m, n) 37 | end # loop 38 | return y 39 | end # function 40 | # ---------------------------------------------------------------------------- # 41 | function slowCountSketch( DX, targetRows ) 42 | # slow version of count sketch 43 | m = length( targetRows ); 44 | Y = zeros(m, size(DX,2) ); 45 | for j in 1:size(DX,1) 46 | i = targetRows[j]; 47 | Y[i,:] = Y[i,:] + DX[j,:]; 48 | end # loop 49 | end # function 50 | # ---------------------------------------------------------------------------- # 51 | # test to generate an error 52 | x = randn(65,5); 53 | y = Hadamard_teaching_code(x) 54 | 55 | # test to generate an error 56 | x = randn(64,5); 57 | y = Hadamard_teaching_code(x); 58 | y_alt = 64*fwht_natural(x, 1); 59 | norm(y - y_alt) 60 | # Check Implementations for correctness 61 | X = randn(2^13,100); M, N = size(X); m = M/4; 62 | 63 | # == Hadamard Code == 64 | # Check for normalization and ordering 65 | Y1 = Hadamard_teaching_code(Matrix{Float64}(I, 4, 4)) 66 | Y2 = 4*fwht_natural(Matrix{Float64}(I, 4, 4), 1) 67 | norm(Y1 - Y2) 68 | # Compare times---run more than once b/c of Julia's "just-in-time" compiler! 69 | @time Y1 = Hadamard_teaching_code(X); 70 | @time Y2 = M*fwht_natural(X, 1); 71 | norm(Y1 - Y2) 72 | # ---------------------------------------------------------------------------- # 73 | # Test speed 74 | N = 100; 75 | M_list = 2 .^(11:14)# (^).(2,10:13); # defined using "broadcast" operation 76 | nTrials = 10; # get some averages 77 | nAlgos = 6; # change to 7 if/when CountSketch is included 78 | Times = zeros(nAlgos,length(M_list),nTrials); 79 | Times_setup = Times; 80 | ALGO_NAMES = ["Gaussian","FJLT - DCT","FJLT - Hadamard", #"Count", 81 | "Very Very sparse","Very sparse","Sparse"]; 82 | rng = Random.seed!(9); 83 | # ---------------------------------------------------------------------------- # 84 | for Mi in 1:length( M_list ) 85 | println("Mi is ", Mi, " of ", length(M_list), "."); 86 | for trial = 1:nTrials 87 | M = M_list[Mi]; 88 | m = round(M/4); 89 | X = randn(M,N); 90 | 91 | ALGO = 1; # Gaussian sketch 92 | tic = time(); 93 | S = randn(Int64(m),M); 94 | Times_setup[ALGO,Mi,trial] = time() - tic; 95 | tic = time(); 96 | Y = S*X; 97 | Times[ALGO,Mi,trial] = time() - tic; 98 | 99 | ALGO = 2; # Fast JL, DCT 100 | tic = time(); 101 | D = spdiagm(0 => broadcast(sign,randn(M)) ); 102 | ind = rand(1:M, Int64(m)); 103 | Times_setup[ALGO,Mi,trial] = time() - tic; 104 | tic = time(); 105 | Y = dct( D*X ); 106 | Y = Y[ind,:]; 107 | Times[ALGO,Mi,trial] = time() - tic; 108 | 109 | ALGO = 3; # Fast JL, Hadamard 110 | tic = time(); 111 | D = spdiagm(0 => broadcast(sign,randn(M)) ); 112 | ind = rand(1:M, Int64(m)); 113 | Times_setup[ALGO,Mi,trial] = time() - tic; 114 | tic = time(); 115 | Y = M*fwht_natural( D*X, 1 ); 116 | Y = Y[ind,:]; 117 | Times[ALGO,Mi,trial] = time() - tic; 118 | 119 | # ALGO = 4; # Count 120 | # tic = time(); 121 | # D = spdiagm(0 => broadcast(sign,randn(M)) ); 122 | # indx_map = Int64.(rand(1:m,M)); 123 | # Times_setup[ALGO,Mi,trial] = time() - tic; 124 | # tic = time(); 125 | # Y = slowCountSketch(X'*D,indx_map); 126 | # Times[ALGO,Mi,trial] = time() - tic; 127 | 128 | let ALGO = ALGO 129 | # Sparse. We can normalize later. Does that help speed? 130 | for Sparsity = 1:3 131 | ALGO = ALGO + 1; 132 | s = 2.0^(1-Sparsity)*sqrt(M); 133 | density = 1/(2*s); 134 | ALGO_NAMES[ALGO] = string(round(100*density,digits=2), "% sparse"); 135 | tic = time(); 136 | S = sprandn(Int64(m),M,density); # this takes longer than the multiply! 137 | S = broadcast(sign, S); 138 | Times_setup[ALGO,Mi,trial] = time() - tic; 139 | tic = time(); 140 | Y = sqrt(s)*(S*X); 141 | Times[ALGO,Mi,trial] = time() - tic; 142 | end # for loop for sparsity 143 | end # let block 144 | end # for loop for trials 145 | end # for loop M list 146 | # ---------------------------------------------------------------------------- # 147 | # Plots 148 | Data = Times; 149 | mn = reshape(mean(Data, dims = 3), nAlgos, length(M_list)); 150 | plot(M_list, mn', yscale = :log10, xscale = :log10, legend = :topleft, 151 | label = ALGO_NAMES, title = "times to apply sketch", 152 | titlefontsize = 10) 153 | xlabel!("size M") 154 | ylabel!("times in seconds") 155 | y = M_list/M_list[1]; 156 | plot!(M_list, mn[1].*y.^2, label = "M^2", linecolor = :black) 157 | plot!(M_list, mn[3,1].*(M_list.*log.(M_list)/(M_list[1]*log.(M_list[1]))), 158 | label = "M log M", linecolor = :black, linestyle = :dash) 159 | plot!(M_list, minimum(mn[:,1])*y, label = "M", linecolor = :black, 160 | linestyle = :dot) 161 | 162 | # ---------------------------------------------------------------------------- # 163 | Data = Times_setup + Times; 164 | mn = reshape(mean(Data,dims=3),nAlgos, length(M_list));; 165 | plot(M_list, mn', yscale = :log10, xscale = :log10, legend = :topleft, 166 | label = ALGO_NAMES, title = "times to apply sketch with setup", 167 | titlefontsize = 10) 168 | xlabel!("size M") 169 | ylabel!("times in seconds") 170 | y = M_list/M_list[1]; 171 | plot!(M_list, mn[1].*y.^2, label = "M^2", linecolor = :black) 172 | plot!(M_list, mn[3,1].*(M_list.*log.(M_list)/(M_list[1]*log.(M_list[1]))), 173 | label = "M log M", linecolor = :black, linestyle = :dash) 174 | plot!(M_list, minimum(mn[:,1])*y, label = "M", linecolor = :black, 175 | linestyle = :dot) 176 | 177 | # ---------------------------------------------------------------------------- # 178 | -------------------------------------------------------------------------------- /Demos/demo05_FastJL_speedTest.m: -------------------------------------------------------------------------------- 1 | %{ 2 | Testing computational speed of various Fast JL transforms 3 | Feb 11 2019 4 | 5 | Y = S*X, X is M x N and S is m x M 6 | 7 | %} 8 | %% Setup paths 9 | % addpath ~/'Google Drive'/GroupDocuments/MatlabUtilities/ % for countSketch 10 | addpath ~/Repos/randomized-algorithm-class/Code/ % from https://github.com/stephenbeckr/randomized-algorithm-class 11 | addpath ~/Repos/hadamard-transform/ % from https://github.com/jeffeverett/hadamard-transform 12 | 13 | %% Check Implementations for correctness 14 | X = randn(2^13,100); 15 | [M,N] = size(X); m = M/4; 16 | 17 | % == Hadamard Code == 18 | % Check for normalization 19 | Hadamard_teaching_code( eye(4) ) 20 | 4*fwht(eye(4),[],'hadamard') 21 | %% 22 | tic 23 | Y1 = Hadamard_teaching_code(X); 24 | toc 25 | tic 26 | Y2 = M*fwht(X,[],'hadamard'); 27 | toc 28 | tic 29 | Y3 = hadamard_pthreads(X); % my mex code 30 | toc 31 | fprintf('Hadamard code discrepancies: %g and %g\n', norm(Y1-Y2,'fro'), ... 32 | norm(Y1-Y3,'fro') ); 33 | 34 | %% == Count sketch -- 35 | d = sign(randn(M,1)); 36 | D = spdiags(d,0,M,M); % bsxfun() is another efficient way to do this 37 | useTranspose = true; 38 | indx_map = int64(randi(m,M,1)); % don't do this in C! 39 | Y2 = countSketch_BLAS(X'*D,indx_map,m,useTranspose)'; 40 | 41 | % Do Count sketch slowly to check 42 | DX = D*X; 43 | targetRows = double(indx_map); 44 | Y = zeros(m,N); 45 | for j = 1:M 46 | i = targetRows(j); 47 | Y(i,:) = Y(i,:) + DX(j,:); 48 | end 49 | fprintf('Count sketch code discrepancies: %g\n', norm( Y - Y2, 'fro' ) ) 50 | 51 | 52 | %% Test speed 53 | N = 100; 54 | % M_list = round(logspace( 3, 4, 4 )); % 4 points between 10^3 and 10^4 55 | M_list = 2.^(10:13); 56 | 57 | nTrials = 10; % get some averages 58 | nAlgos = 7; 59 | Times = zeros(nAlgos,length(M_list),nTrials); 60 | % Times_setup = zeros(3,length(M_list),nTrials); % time to make sparse matrix 61 | Times_setup = Times; 62 | ALGO_NAMES = {'Gaussian','FJLT, DCT','FJLT, Hadamard','Count','Very Very sparse',... 63 | 'Very sparse','Sparse'}; 64 | 65 | for Mi = 1:length( M_list ) 66 | fprintf('Mi is %d of %d\n', Mi, length(M_list) ); 67 | for trial = 1:nTrials 68 | 69 | M = M_list(Mi); 70 | m = round( M/4 ); 71 | 72 | X = randn(M,N ); 73 | 74 | ALGO = 1; % Gaussian sketch 75 | tic 76 | S = randn(m,M); 77 | Times_setup(ALGO,Mi,trial) = toc; 78 | Y = S*X; 79 | Times(ALGO,Mi,trial) = toc; 80 | 81 | ALGO = 2; % Fast JL, DCT 82 | tic; 83 | D = spdiags( sign(randn(M,1)) ,0,M,M); 84 | ind = randsample(M,m); % in Stats toolbox 85 | ind = randperm(M,m); % faster than randsample, doesn't need toolbox 86 | Times_setup(ALGO,Mi,trial) = toc; 87 | Y = dct( D*X ); 88 | Y = Y(ind,:); 89 | Times(ALGO,Mi,trial) = toc; 90 | 91 | ALGO = 3; % Fast JL, Hadamard 92 | tic; 93 | D = spdiags( sign(randn(M,1)) ,0,M,M); 94 | %ind = randsample(M,m); % in Stats toolbox 95 | ind = randperm(M,m); % faster than randsample, doesn't need toolbox 96 | Times_setup(ALGO,Mi,trial) = toc; 97 | Y = hadamard_pthreads( D*X ); 98 | Y = Y(ind,:); 99 | Times(ALGO,Mi,trial) = toc; 100 | 101 | ALGO = 4; % Count 102 | tic; 103 | D = spdiags( sign(randn(M,1)) ,0,M,M); 104 | useTranspose = true; 105 | indx_map = int64(randi(m,M,1)); 106 | Times_setup(ALGO,Mi,trial) = toc; 107 | Y = countSketch_BLAS(X'*D,indx_map,m,useTranspose)'; 108 | Times(ALGO,Mi,trial) = toc; 109 | 110 | 111 | % Sparse. We can normalize later. Does that help speed? 112 | for Sparsity = 1:3 113 | ALGO = ALGO + 1; 114 | s = 2^(1-Sparsity)*sqrt(M); 115 | density = 1/(2*s); 116 | ALGO_NAMES{ALGO} = sprintf('%.1f%% sparse',100*density); 117 | tic 118 | S = sprandn(m,M,density); % this takes longer than the multiply! 119 | S = sign(S); 120 | % SS = logical(S); % alternative 121 | Times_setup(ALGO,Mi,trial) = toc; 122 | Y = sqrt(s)*(S*X); 123 | % is this faster if S is "logical"? that doesn't work, 124 | % it only has 1 bit, need 2 bits, but sparse of type uint8 not 125 | % supported 126 | Times(ALGO,Mi,trial) = toc; 127 | end 128 | end 129 | end 130 | %% Plot 131 | 132 | Data = Times; 133 | % Data = Times - Times_setup; 134 | mn = mean(Data,3); 135 | 136 | figure(1); clf; 137 | h=loglog( M_list, mn','o-','linewidth',2 ); 138 | set(gca,'fontsize',16); 139 | h(2).LineStyle = '--'; 140 | h(3).LineStyle = '--'; 141 | h(5).LineStyle = ':'; h(6).LineStyle = ':'; h(7).LineStyle = ':'; 142 | legend(ALGO_NAMES,'location','northwest','box','off'); 143 | 144 | % Add something for reference 145 | hold all 146 | y = M_list/M_list(1); 147 | h2 = loglog( M_list, mn(1)*y.^2, 'k--','DisplayName','M^2','linewidth',2 ); 148 | h3 = loglog( M_list, mn(3,1)*... 149 | (M_list.*log(M_list)/(M_list(1)*log(M_list(1)))),... 150 | 'k-.','DisplayName','M log M','linewidth',2 ); 151 | h4 = loglog( M_list, mn(4,1)*y, 'k:','DisplayName','M','linewidth',2 ); 152 | 153 | xlim([M_list(1),M_list(end)]); 154 | ylim([min(mn(:)),max(mn(:))]); 155 | xlabel('Size M'); 156 | ylabel('Time (seconds)'); 157 | title('Total times, including setup'); 158 | %% 159 | % export_fig 'FastJLtimes_withSetup' '-pdf' -transparent 160 | 161 | %% 162 | Data = Times - Times_setup; 163 | 164 | mn = mean(Data,3); 165 | 166 | figure(1); clf; 167 | h=loglog( M_list, mn','o-','linewidth',2 ); 168 | set(gca,'fontsize',16); 169 | h(2).LineStyle = '--'; 170 | h(3).LineStyle = '--'; 171 | h(5).LineStyle = ':'; h(6).LineStyle = ':'; h(7).LineStyle = ':'; 172 | legend(ALGO_NAMES,'location','northwest'); 173 | 174 | % Add something for reference 175 | hold all 176 | y = M_list/M_list(1); 177 | h2 = loglog( M_list, mn(1)*y.^2, 'k--','DisplayName','M^2','linewidth',2 ); 178 | h3 = loglog( M_list, mn(3,1)*... 179 | (M_list.*log(M_list)/(M_list(1)*log(M_list(1)))),... 180 | 'k-.','DisplayName','M log M','linewidth',2 ); 181 | h4 = loglog( M_list, mn(4,1)*y, 'k:','DisplayName','M','linewidth',2 ); 182 | 183 | xlim([M_list(1),M_list(end)]); 184 | ylim([min(mn(:)),max(mn(:))]); 185 | xlabel('Size M'); 186 | ylabel('Time (seconds)'); 187 | title('Times to apply sketch, excluding one-time setup cost'); 188 | %% 189 | % export_fig 'FastJLtimes_excludingSetup' '-pdf' -transparent -------------------------------------------------------------------------------- /Demos/demo05_results_excludingSetup.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Demos/demo05_results_excludingSetup.pdf -------------------------------------------------------------------------------- /Demos/demo05_results_withSetup.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Demos/demo05_results_withSetup.pdf -------------------------------------------------------------------------------- /Demos/demo06_leverageScores.m: -------------------------------------------------------------------------------- 1 | %{ 2 | Demo for 1D leverages scores 3 | 4 | Fig. 2 in Section 6.1 of 5 | "Randomized algorithms for matrices and data" 6 | (Mahoney, 2011, http://arxiv.org/abs/1104.5557 ) 7 | 8 | is misleading, since it discussing perturbing regressors and data 9 | 10 | Consider a 1D regression problem, 11 | min_{beta} || x*beta - y ||_2 12 | 13 | where x is a n x 1 vector of covariates, and y are the data. 14 | 15 | By perturbing x, we change the solution. 16 | This is captured by the leverages scores of x. 17 | Since x is a vector, leverage scores are simply proportional 18 | to the magnitude of each entry. 19 | 20 | Therefore, the idea of "leverage" is that if we perturb 21 | entries of x that have more leverage, i.e., that are large 22 | in magnitude, then the effect on the regression is greater. 23 | 24 | Do we observate that? 25 | 26 | Stephen Becker, Feb 14 2018 27 | 28 | %} 29 | 30 | rng(0); 31 | n = 5; 32 | % x = 1:n; 33 | % x = -n:-1; 34 | x = -2:2; 35 | 36 | 37 | x = x'; 38 | slope = 1; 39 | y = slope*x + .1*randn(n,1); 40 | 41 | slopeEst = x\y; 42 | 43 | delta = -.9; % try +/- 44 | i = 1; 45 | figure(1); clf; 46 | 47 | for i = 1:5 48 | 49 | subplot(2,3,i); 50 | 51 | plot( x, y ,'o','markersize',10) 52 | hold all 53 | plot( x, slopeEst*x, '--' ,'linewidth',2) 54 | xx = x; xx(i) = xx(i) + delta; 55 | plot( xx(i), y(i),'s','markersize',10,'MarkerFaceColor','k'); 56 | line( [x(i),xx(i)], y(i)*[1,1]); 57 | slopeEst_perturbed = xx\y; 58 | plot( x, slopeEst_perturbed*x, '-','linewidth',2 ) 59 | xlim([min(x)-abs(delta),max(x)+abs(delta)]); 60 | title(sprintf('Moving %d^{th} data point',i)) 61 | 62 | end 63 | 64 | %% Part 2: weighted sampling 65 | % This was added October 2021 (and ipynb also updated) 66 | % Shows how to do weighted sampling, where the weights could be from, e.g., leverage scores 67 | % We confirm that the scaling is done correctly by seeing if we converge to the identity 68 | 69 | M = 10; 70 | m = 5; 71 | % S is m x M 72 | 73 | % Usually weights is calculated based on A (if we're doing leverage scores, 74 | % but for now let's just use random weights. 75 | rng(1); 76 | weights = rand(M,1) + 1e-2; 77 | weights(1) = 3; 78 | weights = weights/sum(weights); % these are our normalized leverage scores 79 | 80 | nReps = 1e5; 81 | StS = zeros(M); 82 | I = eye(M); 83 | errList = zeros(nReps,1); 84 | 85 | withReplacement = true; 86 | 87 | for trial = 1:nReps 88 | omega = randsample( M, m, withReplacement, weights ); 89 | scaling = 1./sqrt(m.*weights(omega)); 90 | % S = diag( normalization ) * I( omega, : ); 91 | % or, another way to do this 92 | S = bsxfun( @times, scaling, I(omega,:) ); 93 | 94 | StS = StS + S'*S; 95 | % We want E[ S'*S ] = I 96 | % E[ S'*S ] = E[ sum_j S(j,:)'*S(j,:) ] 97 | 98 | errList( trial ) = norm( StS/trial - I, 'fro' ); 99 | 100 | end 101 | %% and plot it 102 | 103 | figure(1); clf; 104 | loglog( smooth( errList, 1e3 ) ) 105 | xlabel('Repetitions'); 106 | ylabel('Error ||S^TS-I||_F'); 107 | -------------------------------------------------------------------------------- /Demos/demo07_rand_mat_mult.m: -------------------------------------------------------------------------------- 1 | % function vignette07_rand_mat_mult 2 | %{ 3 | Vignette #7 4 | APPM 4720/5720 Randomized Algorithms, Spring 2019 5 | 6 | Demo to show how to approximate a matrix-matrix product using randomization 7 | based on the approach of P. Drineas and M. Mahoney. See section four of 8 | "Lectures on Randomized Linear Algebra" by Drineas and Mahoney for additional 9 | details and analysis (?http://arxiv.org/abs/1712.08880) 10 | 11 | Algorithm randomized matrix multiplication. 12 | Inputs: 13 | A is m x n 14 | B is n x p 15 | integer 1 <= c <= n 16 | probability distribution of length n 17 | 18 | repeat for k = 1,\dots, c: 19 | 1. pick i \in {1,\dots,n} with P(i = k) = p_k iid with replacement. 20 | 2. set C(:,k) = 1/sqrt(c * p_i)*A(:,i) and R(k,:) = 1/sqrt(c * p_i)*B(i,:). 21 | 22 | return C, R, and CR = sum_{k=1}^c 1/(c * p_i) * A(:,i) B(i,:). 23 | 24 | Code by Jake Knigge, modifications by Stephen Becker 25 | 26 | Exercise: can you use leverage scores to improve sampling? 27 | Issue #1: you have to balance leverage scores of A *and* of B 28 | Issue #2: what happens if n > p or n > m? 29 | %} 30 | % ---------------------------------------------------------------------------- % 31 | rng(2); % set seed for reproducibility 32 | n_sims = 1000; m = 500; n = 20; p = 250; % parameters for matrix sizes 33 | c = min(max(ceil(0.5*n),1),n); % subsampling parameter 34 | % storage for simulations 35 | fro_norms = zeros(n_sims, 1); fro_norms_opt = fro_norms; 36 | % ---------------------------------------------------------------------------- % 37 | % generate "data" matrices 38 | A = randn(m,n)/sqrt(n); A(:,1) = 5 + 5*rand(m,1); % make the first column "big" 39 | B = randn(n,p)/sqrt(n); AB = A*B; 40 | % ell_2 norms: columns of A and rows of B 41 | col_norm_A = sqrt(sum(A.^2, 1)); row_norm_B = sqrt(sum(B.^2, 2)); 42 | % MATLAB R2018b includes a 'vecnorm' function to calculate column- or row-wise 43 | % norms of matrices; see https://www.mathworks.com/help/matlab/ref/vecnorm.html. 44 | % ---------------------------------------------------------------------------- % 45 | % probabilities for sampling---naive = uniform; optimal uses norm information 46 | probs = ones(n, 1)/n; 47 | probs_opt = ( (col_norm_A .* row_norm_B') / (col_norm_A * row_norm_B) )'; 48 | % compute theoretical upper bounds on expected squared Frobenius norms 49 | upper_bound = sum(col_norm_A.^2 .* (row_norm_B.^2)' ./ (c*probs')); 50 | upper_bound_opt = (col_norm_A * row_norm_B)^2 / c; 51 | % ---------------------------------------------------------------------------- % 52 | % simulation 53 | for t = 1:n_sims 54 | % take samples 55 | samplesUniform = randperm( n, c ); 56 | replace = true; 57 | samplesOptimal = randsample( n, c, replace, probs_opt ); 58 | rescale = 1./sqrt( c/n ); 59 | rescale_opt = 1./sqrt( c*probs_opt( samplesOptimal) ); 60 | C = A(:,samplesUniform)*diag( rescale ); 61 | C_opt = A(:,samplesOptimal)*diag( rescale_opt ); 62 | R = diag( rescale ) *B(samplesUniform,:); 63 | R_opt = diag( rescale_opt )*B(samplesOptimal,:); 64 | 65 | % compute random matrix product via outerproduct 66 | CR = C*R; 67 | CR_opt = C_opt*R_opt; 68 | % calculate Frobenius norms of actual vs. randomized 69 | fro_norms(t) = norm(AB-CR,'fro'); 70 | fro_norms_opt(t)= norm(AB-CR_opt,'fro'); 71 | end 72 | 73 | % display comparisons of simulations 74 | formatSpec = '||AB-CR||_F simulation %4.2f vs. upper bound %4.2f (naive)\n'; 75 | fprintf(formatSpec, sqrt(mean(fro_norms.^2)), sqrt(upper_bound)) 76 | formatSpec = '||AB-CR||_F simulation %4.2f vs. upper bound %4.2f (optimal)\n'; 77 | fprintf(formatSpec, sqrt(mean(fro_norms_opt.^2)), sqrt(upper_bound_opt)) 78 | 79 | % fprintf('relative err: %.3f (naive), %.3f (optimal)\n', ... 80 | 81 | 82 | % NOTE: the Python verison of this script compares the squared Frobenius norms 83 | % from the simulations vs. the upper bounds. The comparisons in this script are 84 | % different (and make use Jensen's inequality for concave functions), but their 85 | % interpretations are the same. 86 | % end % vignette_rand_mat_mult function 87 | % ---------------------------------------------------------------------------- % 88 | % If you don't have randsample (in new versions of Stat toolbox) 89 | % then repeatedly call this randSample function 90 | % helper function 91 | % function y = randSample(x) 92 | % % function randSample 93 | % % samples an integer from a prob. distribution x (i.e., x >= 0, sum(x) = 1) 94 | % idx = (1:length(x))'; cdf = cumsum(x); z = rand(); y = min(idx(z <= cdf)); 95 | % end 96 | % ---------------------------------------------------------------------------- % 97 | % test of randSample() function using optimal sampling probabilities from the 98 | % above code 99 | % y = zeros(1e5,1); 100 | % for t = 1:(length(y)) 101 | % y(t) = randSample(probs_opt); 102 | % end 103 | % histogram(y,'Normalization','probability'); hold on, plot(1:n,probs_opt,'*r'); 104 | -------------------------------------------------------------------------------- /Demos/demo07_rand_mat_mult.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------- # 2 | # -------------------------------------------------------------------------------------- # 3 | # Vignette #XX 4 | # APPM 4720/5720 Randomized Algorithms, Spring 2019 5 | # 6 | # Demo to show how to approximate a matrix-matrix product using randomization based on 7 | # the approach of P. Drineas and M. Mahoney. See section four of "Lectures on Randomized 8 | # Linear Algebra" by Drineas and Mahoney for additional details and analysis. 9 | # 10 | # Algorithm randomized matrix multiplication. 11 | # given A \in \reals^{m \times n}, B \in \reals^{n \times p}, an integer c 12 | # (1 \le c \le n), and a probability distribution p of length n. 13 | # repeat for k = 1,\dots, c: 14 | # 1. pick i \in {1,\dots,n} with P(i = k) = p_k iid with replacement. 15 | # 2. set C(:,k) = 1/sqrt(c * p_i) * A(:,i) and R(k,:) = 1/sqrt(c * p_i) * B(i,:). 16 | # return C, R, and CR = sum_{k=1}^c 1/(c * p_i) * A(:,i) B(i,:). 17 | # -------------------------------------------------------------------------------------- # 18 | import numpy as np # import numpy package 19 | import time as time # import time package 20 | import matplotlib.pyplot as plt # import matplotlib package 21 | np.set_printoptions(precision = 2) # display only four digits 22 | # -------------------------------------------------------------------------------------- # 23 | def vignette_rand_mat_mult(n_sims = 1000, m = 100, n = 20, p = 80): 24 | np.random.seed(seed = 2) # set seed for reproducibility 25 | # ensure parameters are integers 26 | n_sims = np.int(n_sims); m = np.int(m); n = np.int(n); p = np.int(p) 27 | c = min(max(np.int(np.round(0.5*n)), 1), n) 28 | # storage for simulations 29 | fro_norms = np.zeros(n_sims); fro_norms_opt = np.zeros(n_sims) 30 | # generate "data" matrices 31 | A = np.random.normal(scale = 1 / np.sqrt(n), size = (m,n)) # isotropic rows 32 | B = np.random.normal(scale = 1 / np.sqrt(n), size = (n,p)) # isotropic columns 33 | AB = np.matmul(A, B); 34 | # ell_2 norms: columns of A and rows of B 35 | col_norm_A = np.linalg.norm(A, axis = 0); row_norm_B = np.linalg.norm(B, axis = 1) 36 | # probabilities for sampling 37 | probs = np.ones(n)/n # naive probabilities---i.e., uniform distribution 38 | probs_opt = (col_norm_A * row_norm_B) / sum(col_norm_A * row_norm_B) # optimal probs 39 | probs_opt = probs_opt / sum(probs_opt) # undo roundoff---ensure they sum to 1 40 | # compute theoretical upper bounds on expected squared Frobenius norms 41 | upper_bound = sum(col_norm_A**2 * row_norm_B**2 / (c*probs)) # naive probabilities 42 | upper_bound_opt = sum(col_norm_A * row_norm_B)**2 / c # optimal probabilities 43 | # simulation 44 | for t in range(n_sims): 45 | # initialize / re-zero matrices 46 | C = np.zeros((m,c)); R = np.zeros((c,p)) 47 | C_opt = np.zeros((m,c)); R_opt = np.zeros((c,p)) 48 | for k in range(c): 49 | # step 1 50 | i = np.random.choice(a = np.arange(n), replace = True, p = probs) 51 | i_opt = np.random.choice(a = np.arange(n), replace = True, p = probs_opt) 52 | # calculate rescaling coefficients 53 | rescale = 1 / np.sqrt(c*probs[i]); 54 | rescale_opt = 1 / np.sqrt(c*probs_opt[i_opt]) 55 | # step 2 56 | C[:,k] = rescale * A[:,i]; R[k,:] = rescale * B[i,:] 57 | C_opt[:,k] = rescale_opt * A[:,i_opt]; R_opt[k,:] = rescale_opt * B[i_opt,:] 58 | # compute random matrix product via outerproduct 59 | CR = np.matmul(C, R) 60 | CR_opt = np.matmul(C_opt, R_opt) 61 | # calculate Frobenius norms of actual vs. randomized 62 | fro_norms[t] = np.linalg.norm(AB - CR, ord = 'fro')**2 63 | fro_norms_opt[t] = np.linalg.norm(AB - CR_opt, ord = 'fro')**2 64 | # print comparisons averaged across the number of simulations 65 | print('||AB - CR||_F^2 simulation vs. upper bound:', 66 | np.round(np.mean(fro_norms),2), 'vs.', 67 | np.round(upper_bound,2), '(using naive sampling probabilities)') 68 | print('||AB - CR||_F^2 simulation vs. upper bound:', 69 | np.round(np.mean(fro_norms_opt),2), 'vs.', 70 | np.round(upper_bound_opt,2), '(using optimal sampling probabilities)') 71 | # return np.mean(fro_norms), upper_bound, np.mean(fro_norms_opt), upper_bound_opt 72 | 73 | # small problem 74 | vignette_rand_mat_mult(n_sims = 10000, m = 10, n = 4, p = 10) 75 | 76 | # moderate problem 77 | vignette_rand_mat_mult(n_sims = 1000, m = 1000, n = 20, p = 1000) 78 | 79 | # big problem 80 | vignette_rand_mat_mult(n_sims = 1000, m = 5000, n = 10, p = 5000) 81 | 82 | # Question: If you see that the estimated squared Frobenius norm for the naive 83 | # probabilities is smaller than that for the optimal probabilities, what should 84 | # you do? 85 | -------------------------------------------------------------------------------- /Demos/demo07_rand_mat_mult_ortho.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------------- # 2 | # -------------------------------------------------------------------------------------- # 3 | # Vignette #XX 4 | # APPM 4720/5720 Randomized Algorithms, Spring 2019 5 | # 6 | # Demo to show how to approximate a matrix-matrix product using randomization based on 7 | # the approach of P. Drineas and M. Mahoney. See section four of "Lectures on Randomized 8 | # Linear Algebra" by Drineas and Mahoney for additional details and analysis. 9 | # 10 | # This script focuses on the case where U \in \reals^{n \times d} with n >> d is an 11 | # orthogonal matrix (specifically its columns are orthogonal). We choose c so that the 12 | # deviation between our randomized matrix and the one we're approximating is bounded with 13 | # high probability. The details for can be found in the Drineas and Mahoney paper. 14 | # 15 | # Algorithm randomized matrix multiplication. 16 | # given U \in \reals^{n \times d} with n >> d, an integer c (1 \le c \le n), 17 | # and a probability distribution p of length n. 18 | # repeat for k = 1,\dots, c: 19 | # 1. pick i_k \in {1,\dots,n} with P(i = k) = p_k iid with replacement. 20 | # 2. set R(k,:) = 1/sqrt(c * p_{i_k}) * U(i_k,:). 21 | # return R^T R = sum_{k=1}^c 1/(c * p_{i_k}) * U(:,i_k) U(i_k,:). 22 | # -------------------------------------------------------------------------------------- # 23 | import numpy as np # import numpy package 24 | import time as time # import time package 25 | import matplotlib.pyplot as plt # import matplotlib package 26 | np.set_printoptions(precision = 2) # display only four digits 27 | # -------------------------------------------------------------------------------------- # 28 | def vignette_rand_mat_mult_ortho(n_sims = 100, n = 1000, d = 6): 29 | # n_sims = 100; n = 1000; d = 6 # choose d ~ log n 30 | np.random.seed(seed = 2) # set seed for reproducibility 31 | # ensure parameters are integers 32 | n_sims = np.int(n_sims); n = np.int(n); d = np.int(d); 33 | beta = 1; epsilon = 0.9; delta = 0.1; 34 | c_big_n = np.int(np.ceil(96*d/(beta*epsilon**2) * 35 | np.log(96*d/(beta*epsilon**2*np.sqrt(delta))))) 36 | c_sm_n = np.int(np.ceil(10*d**2 / (beta*epsilon**2))) 37 | c = np.amin([c_big_n, c_sm_n]) 38 | # storage for simulations 39 | fro_norms = np.zeros(n_sims) 40 | # generate "data" matrices 41 | U = np.random.normal(scale = 1 / np.sqrt(n), size = (n,d)) # isotropic columns 42 | U, R_qr = np.linalg.qr(U) 43 | # ell_2 norms: columns of U and rows of U 44 | col_norm_U = np.linalg.norm(U, axis = 0); row_norm_U = np.linalg.norm(U, axis = 1) 45 | # probabilities for sampling 46 | probs = beta * row_norm_U**2 / d # nearly optimal probs 47 | probs = probs / sum(probs) # undo roundoff---ensure they sum to 1 48 | # compute theoretical upper bounds on expected squared Frobenius norms 49 | upper_bound = d**2 / (c*beta) # optimal probabilities 50 | # simulation 51 | for t in range(n_sims): 52 | # initialize / re-zero matrices 53 | RTR = np.zeros((d,d)) 54 | for k in range(c): 55 | # step 1 56 | i = np.random.choice(a = np.arange(n), replace = True, p = probs) 57 | # calculate rescaling coefficients 58 | rescale = 1 / np.sqrt(c*probs[i]) 59 | # step 2 60 | R = rescale * U[i,:] 61 | # compute random matrix product via outerproduct 62 | RTR = np.outer(R, R) + RTR 63 | # calculate Frobenius norms of actual vs. randomized 64 | fro_norms[t] = np.linalg.norm(np.eye(d) - RTR, ord = 'fro') 65 | # print comparisons averaged across the number of simulations 66 | print('||U^T U - R^T R||_F^2 =', np.round(np.mean(fro_norms**2), 4), 67 | 'vs', np.round(upper_bound, 4)) 68 | 69 | vignette_rand_mat_mult_ortho() 70 | 71 | # Question: If you see that the estimated squared Frobenius norm for the naive 72 | # probabilities is smaller than that for the optimal probabilities, what should 73 | # you do? 74 | -------------------------------------------------------------------------------- /Demos/demo08_higherAccuracyRegression.m: -------------------------------------------------------------------------------- 1 | %{ 2 | Demo of the Iterative Hessian Sketch (IHS) cf. Pilanci and Wainwright 3 | and of the preconditioned approaches (BLENDENPIK, LSRN) 4 | 5 | These are two methods to get high-accuracy l2 regression 6 | 7 | The goal is to approximate the solution of 8 | min_{x} || Ax-b ||^2 9 | where 10 | A is M x N 11 | and we are assuming M >> N. 12 | 13 | Code: Stephen Becker 14 | 15 | References: 16 | 17 | - "Iterative Hessian Sketch: Fast and Accurate Solution 18 | Approximation for Constrained Least-Squares" (Pilanci, Wainwright; JMLR 2016 19 | http://www.jmlr.org/papers/volume17/14-460/14-460.pdf ) 20 | - "Blendenpik: Supercharging LAPACK's Least-Squares Solver" (Avron et al. 2010, https://epubs.siam.org/doi/abs/10.1137/090767911); 21 | - "LSRN: A Parallel Iterative Solver for Strongly Over- or Underdetermined Systems" (Meng et al. 2014, https://epubs.siam.org/doi/abs/10.1137/120866580 ) 22 | 23 | %} 24 | 25 | addpath ~/Repos/randomized-algorithm-class/Code/ 26 | rng(0); 27 | 28 | M = 8e4; 29 | N = 5e2; 30 | % M = 5e3; N = 20; 31 | A = randn(M,N)*diag(logspace(0,3,N))*(randn(N)+.1*eye(N)); 32 | fprintf('Condition number is %.3e\n', cond(A) ) 33 | 34 | x = randn(N,1); 35 | b = A*x; 36 | b = b + .3*norm(b)/sqrt(M)*randn(M,1); 37 | 38 | %% Solve via dense method, about 7 seconds 39 | tic 40 | xLS = A\b; 41 | tm_LS = toc; 42 | fprintf('\nSolved via classical least-squares in %.2f seconds\n\n',tm_LS); 43 | 44 | %% Solve via another dense method, not as safe if ill=conditioned 45 | tic 46 | [Q,R] = qr(A,0); 47 | xHat = R\(Q'*b); 48 | tm_LS_2 = toc; 49 | 50 | err1 = norm( A*xHat - b )/norm(A*xLS-b) - 1; 51 | err2 = norm( xHat - xLS)/norm(xLS); 52 | err3 = norm( A*(xHat - xLS))/norm(A*xLS); 53 | 54 | fprintf('== Classical algorithm 1, QR without pivoting, less robust but faster ==\n'); 55 | fprintf('Took %.2f sec, err metrics %.2e and %.2e and %.2e\n\n', ... 56 | tm_LS_2, err1,err2,err3); 57 | 58 | % At least do pivoting 59 | tic 60 | [Q,R,e] = qr(A,0); 61 | xHat(e) = R\(Q'*b); % 62 | tm_LS_3 = toc; 63 | 64 | err1 = norm( A*xHat - b )/norm(A*xLS-b) - 1; 65 | err2 = norm( xHat - xLS)/norm(xLS); 66 | err3 = norm( A*(xHat - xLS))/norm(A*xLS); 67 | 68 | fprintf('== Classical algorithm 2, QR with column pivoting ==\n'); 69 | fprintf('Took %.2f sec, err metrics %.2e and %.2e and %.2e\n\n', ... 70 | tm_LS_3, err1,err2,err3); 71 | 72 | %% Take a sketch 73 | rng(2); 74 | m = 40*N; 75 | sketchType = {'count','FJLT'}; % Gaussian is too slow! 76 | for i = 1:2 77 | type = sketchType{i}; 78 | tic; 79 | sketchFcn = sketch( m, M, type ); 80 | SAb = sketchFcn([A,b]); 81 | time_preprocess = toc; 82 | fprintf(' -- Sketch type %s took %.2f sec\n', type, time_preprocess ); 83 | end 84 | 85 | 86 | %% Try a normal style sketches 87 | 88 | tic; 89 | SA = SAb(:,1:N); 90 | Sb = SAb(:,N+1); 91 | xHat = SA\Sb; 92 | time_solve = toc; 93 | err1 = norm( A*xHat - b )/norm(A*xLS-b) - 1; 94 | err2 = norm( xHat - xLS)/norm(xLS); 95 | err3 = norm( A*(xHat - xLS))/norm(A*xLS); 96 | fprintf('== Standard JL style sketch ==\n'); 97 | fprintf('Took %.2f = %.2f + %.2f sec, err metrics %.2e and %.2e and %.2e\n\n', ... 98 | time_preprocess+time_solve, time_preprocess,time_solve, err1,err2,err3); 99 | 100 | %% Try the iterative Hessian Sketch (run the above block first to get SAb) 101 | nBlocks = 4; 102 | mm = floor(m/nBlocks); 103 | tic 104 | xHat = zeros(N,1); 105 | bHat = b; 106 | fprintf('== Iterative Hessian Sketch ==\n'); 107 | tic 108 | for i = 1:nBlocks 109 | startInd = 1 + (i-1)*mm; 110 | endInd = i*mm; 111 | SA = sqrt(m/mm)*SAb(startInd:endInd,1:N); % renormalize! 112 | % Sb = SAb(startInd:endInd,N+1); 113 | % xx = SA\Sb; $ regular sketching 114 | xx = (SA'*SA )\(A'*bHat); 115 | xHat = xHat + xx; 116 | bHat = bHat - A*xx; 117 | err1 = norm( A*xHat - b )/norm(A*xLS-b) - 1; 118 | err2 = norm( xHat - xLS)/norm(xLS); 119 | err3 = norm( A*(xHat - xLS))/norm(A*xLS); % need < 1 120 | fprintf(' contraction factor at iter %d is %.4f\n', i, err3 ); 121 | end 122 | time_solve_IHS = toc; 123 | fprintf('Took %.2f = %.2f + %.2f sec, err metrics %.2e and %.2e and %.2e\n\n', ... 124 | time_preprocess+time_solve_IHS, time_preprocess,time_solve_IHS, err1,err2,err3); 125 | 126 | %% Try preconditioning 127 | fprintf('== Computing thin QR on sketched data ==\n'); 128 | tic 129 | [Q,R] = qr( SA, 0 ); % thin QR 130 | time_QR = toc; 131 | 132 | k1=cond( SA/R ); % unless we had precision issues, this ought to be 1 133 | k2=cond( A/R ); % and this thing we *hope* is small 134 | % Note: cond( A/R ) is a nicer way to write cond( A*inv(R) ) 135 | 136 | fprintf('QR on SA took %.2f sec, cond(SA*inv(R)) is %.2f, cond(A*inv(R)) is %.2f\n\n',... 137 | time_QR,k1,k2); 138 | %% For reference, use LSQR to solve, without preconditioning 139 | fprintf('== LSQR, for reference, with 100 iterations ==\n'); 140 | tol = 1e-8; 141 | maxit = 1e2; 142 | tic 143 | xHat = lsqr(A,b,tol,maxit); 144 | time_LSQR = toc; 145 | err1 = norm( A*xHat - b )/norm(A*xLS-b) - 1; 146 | err2 = norm( xHat - xLS)/norm(xLS); 147 | err3 = norm( A*(xHat - xLS))/norm(A*xLS); 148 | fprintf('Took %.2f sec for LSQR, err metrics %.2e and %.2e and %.2e\n\n', ... 149 | time_LSQR, err1,err2,err3); 150 | 151 | 152 | fprintf('== LSQR, for reference, with 500 iterations ==\n'); 153 | tol = 1e-8; 154 | maxit = 5e2; 155 | tic 156 | xHat = lsqr(A,b,tol,maxit); 157 | time_LSQR = toc; 158 | err1 = norm( A*xHat - b )/norm(A*xLS-b) - 1; 159 | err2 = norm( xHat - xLS)/norm(xLS); 160 | err3 = norm( A*(xHat - xLS))/norm(A*xLS); 161 | fprintf('Took %.2f sec for LSQR, err metrics %.2e and %.2e and %.2e\n\n', ... 162 | time_LSQR, err1,err2,err3); 163 | 164 | %% Now try LSQR with preconditioning 165 | fprintf('== Preconditioned LSQR (a la BLENDENPIK/LSRN) ==\n'); 166 | tol = 1e-9; 167 | maxit = 1e2; 168 | tic 169 | xHat = lsqr(A,b,tol,maxit,R); 170 | time_LSQR_R = toc; 171 | err1 = norm( A*xHat - b )/norm(A*xLS-b) - 1; 172 | err2 = norm( xHat - xLS)/norm(xLS); 173 | err3 = norm( A*(xHat - xLS))/norm(A*xLS); 174 | fprintf('Took %.2f = %.2f + %.2f + %.2f sec for LSQR, err metrics %.2e and %.2e and %.2e\n\n', ... 175 | time_preprocess+time_QR+time_LSQR_R, time_preprocess,time_QR,time_LSQR_R, err1,err2,err3); 176 | -------------------------------------------------------------------------------- /Demos/demo09_RandomizedKaczmarz.m: -------------------------------------------------------------------------------- 1 | %{ 2 | Demonstration of the basic Randomized Kaczmarz Algorithm 3 | (cf. "?A Randomized Kaczmarz Algorithm with Exponential Convergence" 4 | by Strohmer and Vershynin, 2008 5 | ?http://www.springerlink.com/index/10.1007/s00041-008-9030-4 ) 6 | 7 | For fancier versions, see: 8 | 9 | - "?Acceleration of randomized Kaczmarz method via the 10 | Johnson?Lindenstrauss Lemma" by Y. Eldar and D. Needell 2011 11 | - "?Paved with good intentions: analysis of a randomized block kaczmarz 12 | method" by D. Needell and J. Tropp 2012 13 | - "?Stochastic gradient descent, weighted sampling, and the randomized 14 | Kaczmarz algorithm" by D. Needell, N. Srebro and R. Ward 2016 15 | 16 | Code: Stephen Becker 2019 17 | %} 18 | 19 | 20 | rng(0); 21 | M = 3e5; 22 | N = 1e2; 23 | A = randn(M,N); 24 | x0 = randn(N,1); 25 | b = A*x0; % no noise, since we're not doing least squares, we're solving a system 26 | 27 | tic 28 | xLS = A\b; 29 | tm_LS = toc; 30 | fprintf('Solving %d x %d system via Matlab classical method takes %.2f sec\n', ... 31 | M,N,tm_LS ); 32 | 33 | tic 34 | [Q,R] = qr(A,0); 35 | xLS2 = R\(Q'*b); 36 | tm_LS2 = toc; 37 | fprintf('... or via a thin QR w/o pivoting takes %.2f sec\n', tm_LS2 ); 38 | %% 39 | tic 40 | rowNorms = sum(A.^2,2); 41 | % At = A'; % slow, but if I do this, then can accelerate iterations 42 | tm_preprocess = toc; 43 | % stem( rowNorms ) 44 | prob = rowNorms/sum(rowNorms); 45 | %% 46 | x = zeros(N,1); 47 | maxIter = 1e2; 48 | errFcn = @(x) norm(x - xLS ); 49 | errList = zeros(maxIter,1); 50 | tic 51 | for k = 1:maxIter 52 | % i = randsample(M,1,true,prob); 53 | % x = x + (b(i)-A(i,:)*x)/rowNorms(i) * A(i,:)'; 54 | 55 | iList = randsample(M,500,true,prob); 56 | for ind = 1:500 57 | i = iList(ind); 58 | x = x + (b(i)-A(i,:)*x)/rowNorms(i) * A(i,:)'; 59 | % x = x + (b(i)-At(:,i)'*x)/rowNorms(i) * At(:,i); % faster 60 | end 61 | 62 | errList(k) = errFcn(x); 63 | if errList(k) < 1e-13 64 | break 65 | end 66 | end 67 | tm_Kaczmarz = toc; 68 | %% 69 | fprintf('Randomized Kaczmarz took %.2f sec = %.2f + %.2f sec; final error %.2e\n',... 70 | tm_preprocess + tm_Kaczmarz, tm_preprocess, tm_Kaczmarz, errList(k) ); 71 | %% 72 | semilogy( errList,'o-','linewidth',2 ) 73 | xlabel('Epochs'); ylabel('Error'); set(gca,'fontsize',18); grid on 74 | %% For a fair comparison, try with LSQR 75 | tic 76 | [xHat,flag,relres,iter] = lsqr( A, b, 1e-13, 1e3 ); 77 | tm_CG = toc; 78 | fprintf('LSQR took %.2f sec in %d iterations; final error %.2e\n',... 79 | tm_CG, iter, errFcn(xHat) ); -------------------------------------------------------------------------------- /Demos/demo10_l1_regression.m: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | (1) Motivate l1 or l_p (1<=p<=infty) regression 4 | 5 | (2) Sketching approaches 6 | Ref: "The Fast Cauchy Transform..." by Clarkson et all 2016 7 | in SIAM J Sci Comp, ?http://epubs.siam.org/doi/10.1137/140963698 8 | 9 | see also David Woodruff's 2014 monograph 10 | 11 | Stephen Becker 12 | 13 | %} 14 | 15 | %% Why l1 regression? Let's do a 1D example 16 | rng(0); 17 | x = 2; 18 | A = (1:6)'; 19 | [M,N] = size(A); % N = 1 since 1D example 20 | z = .3*randn(M,1); 21 | b = A*x + z; 22 | 23 | b(6) = 1.2; % gross corruption, e.g., someone typed "1.2" instead of "12" 24 | 25 | % Notation is funny: "x" is really like a slope, 26 | % and "A" is really like "x" 27 | figure(1); clf; 28 | h=plot( A, b, 'd', 'DisplayName', 'Samples','markersize',10,... 29 | 'markerfacecolor','b'); 30 | hold all 31 | % plot( A, A*x, '-', 'DisplayName', 'True data','linewidth',2); 32 | 33 | % Find line of best fit, in l2 sense 34 | xLS = A\b; 35 | plot( A, A*xLS, '--', 'DisplayName', 'l2 fit','linewidth',2); 36 | cvx_begin quiet 37 | variable x1(N) 38 | minimize norm( A*x1 - b , 1 ) 39 | cvx_end 40 | plot( A, A*x1, '--', 'DisplayName', 'l1 fit','linewidth',2); 41 | legend('location','northwest'); 42 | set(gca,'fontsize',18); 43 | 44 | 45 | %% Check basic Johnson-Lindenstrauss results: preserve distance in lp sense 46 | rng(0); 47 | % nPoints = 1e3; 48 | M = 5e2; % dimension of each point 49 | % make the data points, some of them sparse, some weird distributions, ... 50 | A = [log(abs(gallery('binomial',M))), gallery('dramadah',M), ... 51 | gallery('cauchy',M), gallery('hanowa',M), gallery('lotkin',M) ]; 52 | nPoints = size(A,2); 53 | 54 | clf; cspy(A); title('Depiction of matrix "A"'); 55 | % normalize it 56 | nrms2_A = sqrt( sum(A.^2,1) ); 57 | nrms1_A = sum(abs(A),1); 58 | % A = bsxfun( @times, A, 1./nrms ); 59 | 60 | %% Take the l2 sketch 61 | m = round(.3*M); 62 | addpath ~/Repos/randomized-algorithm-class/Code/ 63 | %% Gaussian sketch 64 | S = randn(m,M)/sqrt(m); 65 | SA = S*A; 66 | %% FJLT sketch 67 | ind = randperm(M,m); 68 | SA = dct(spdiags(sign(randn(M,1)),0,M,M)*A); 69 | SA = sqrt(M/m)*SA(ind,:); 70 | %% Cauchy sketch 71 | % Same as student-t with 1 degree of freedom 72 | S = trnd(1,m,M); 73 | SA = 1/m*S*A; 74 | %% Check if we've preserved l1 and l2 norms 75 | nrms2 = sqrt( sum(SA.^2,1) ); 76 | nrms1 = sum( abs(SA),1); 77 | figure(1); clf; 78 | subplot(1,2,1); 79 | histogram( nrms2./nrms2_A,'Normalization','probability' ) 80 | xlim([0,2]); 81 | title('$\|Sx\|_2/\|x\|_2$','interpreter','latex','fontsize',20); 82 | 83 | subplot(1,2,2); 84 | histogram( nrms1./nrms1_A,'Normalization','probability' ) 85 | title('$\|Sx\|_1/\|x\|_1$','interpreter','latex','fontsize',20); 86 | 87 | %% Zoom in on histograms for the case of Cauchy sketch 88 | figure(1); clf; 89 | BMIN = .5; 90 | BMAX = 40; 91 | subplot(1,2,1); 92 | histogram( nrms2./nrms2_A,'BinLimits',[BMIN,BMAX] ,'Normalization','pdf') 93 | % xlim([0,2]); 94 | title('$\|Sx\|_2/\|x\|_2$','interpreter','latex','fontsize',20); 95 | 96 | subplot(1,2,2); 97 | histogram( nrms1./nrms1_A ,'BinLimits',[BMIN,BMAX],'Normalization','pdf') 98 | title('$\|Sx\|_1/\|x\|_1$','interpreter','latex','fontsize',20); 99 | 100 | 101 | 102 | %% Interpret another way: use p-stable to estimate p-norms 103 | innerProds = sqrt( abs(sum(SA,1)) ); % abs after sum 104 | 105 | figure(1); clf; 106 | BMIN = .5; 107 | BMAX = 10; 108 | histogram( innerProds./nrms1_A,'BinLimits',[BMIN,BMAX] ,'Normalization','pdf') 109 | % xlim([0,2]); 110 | title('$\sqrt{E|\langle x, s \rangle|^2} /\|x\|_1$','interpreter','latex','fontsize',20); 111 | 112 | 113 | 114 | 115 | 116 | 117 | %% Regression 118 | rng(0); 119 | M = 1e3; 120 | N = 1e2; 121 | A = rand(M,N); 122 | x0 = randn(N,1); 123 | b = A*x0 + randn(M,1); 124 | 125 | % Solve large problem for reference solution 126 | tic 127 | cvx_begin quiet 128 | variable x(N) 129 | minimize norm( A*x - b, 1 ) 130 | cvx_end 131 | toc 132 | xRef = x; 133 | %% Make well-conditioned basis 134 | rng(1); 135 | m = round(.5*M); 136 | for i = 1:2 137 | switch i 138 | case 1 139 | % Cauchy sketch: 140 | S = trnd(1,m,M)/sqrt(m); 141 | fprintf('\nUsing Cauchy sketch\n'); 142 | case 2 143 | % Gaussian sketch 144 | S = randn(m,M)/sqrt(m); 145 | fprintf('\nUsing Gaussian sketch\n'); 146 | end 147 | 148 | SA = S*A; 149 | 150 | 151 | [Q,R] = qr(SA,0); 152 | Q = A/R; 153 | % estimate l1 leverage scores 154 | levScores = sum( abs(Q), 2 ); 155 | % weighted sampling 156 | ind = randsample(M,round(.5*M), true,levScores ); 157 | 158 | % == Solve smaller problem 159 | tic 160 | cvx_begin quiet 161 | variable x(N) 162 | minimize norm( A(ind,:)*x - b(ind), 1 ) 163 | cvx_end 164 | toc 165 | er1=norm( x - xRef )/norm(xRef ); 166 | er2=norm( A*x - b, 1 )/norm( A*xRef - b, 1 ) - 1; 167 | fprintf('||x-xRef| is %.2e, ||Ax-b||_1/||AxRef-b||_1-1 is %.2e\n', er1,er2); 168 | 169 | end 170 | -------------------------------------------------------------------------------- /Demos/demo11_JamesSteinEstimator.m: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | James-Stein Estimate 4 | Proof that the MLE is not admissible (in dimensions p>=3 at least) 5 | 6 | see 7 | http://statweb.stanford.edu/~ckirby/brad/LSI/chapter1.pdf 8 | and 9 | http://statweb.stanford.edu/~ckirby/brad/other/CASI_Chap7_Nov2014.pdf 10 | for connection to Empirical Bayes... 11 | 12 | (it's also similar to the idea of control variates) 13 | 14 | %} 15 | 16 | p = 50; % dimension 17 | sigma = .3; 18 | 19 | nReps = 1e3; 20 | 21 | mu = zeros(p,1); 22 | v = .5*ones(p,1); % arbitrary fixed vector 23 | % v = .1*randn(p,1); 24 | 25 | sampleMeans = zeros(p,2); 26 | firstCoordinate = zeros(nReps,2); 27 | avgError = zeros(nReps,2); 28 | errFcn = @(xhat) norm(xhat-mu)^2; 29 | for r = 1:nReps 30 | 31 | y = mu + sigma*randn(p,1); 32 | 33 | % MLE is y 34 | sampleMeans(:,1) = sampleMeans(:,1) + y; 35 | firstCoordinate(r,1)= y(1); 36 | avgError(r,1) = errFcn(y); 37 | 38 | % James-Stein estimator 39 | xhat = (1 - (p-3)*sigma^2/( norm(y-v)^2 ) )*(y-v) + v; 40 | sampleMeans(:,2) = sampleMeans(:,2) + xhat; 41 | firstCoordinate(r,2)=xhat(1); 42 | avgError(r,2) = errFcn(xhat); 43 | 44 | end 45 | sampleMeans = sampleMeans/nReps; 46 | 47 | %% Analyze results 48 | figure(1); clf; 49 | boxplot( avgError,'Labels',{'MLE','James-Stein'} ) 50 | set(gca,'fontsize',18); 51 | title('Values of $\|\hat{x} - \mu\|_2^2$','interpreter','latex') 52 | 53 | %% Look at estimate of the first coordinate: is it biased? 54 | % (mu and v are all the same in all coordinates, so just pick the first 55 | % coordinate, since then it's easy to show graphically) 56 | figure(1); clf; 57 | boxplot( firstCoordinate,'Labels',{'MLE (unbiased)','James-Stein (biased!)'} ) 58 | set(gca,'fontsize',18); 59 | title('First coordinate of the estimate'); 60 | line([-.5,2.5],[0,0],'color','k','linestyle','--') -------------------------------------------------------------------------------- /Demos/demo12_CompressedSensing.m: -------------------------------------------------------------------------------- 1 | % demonstrate Compressed Sensing ideas 2 | % This code requires CVX (cvxr.org) 3 | 4 | rng(0); 5 | 6 | N = 100; % dimensionality of signal 7 | s = 5; % sparsity of signal 8 | 9 | x0 = zeros(N,1); 10 | x0( randperm(N,s) ) = rand(s,1); % random entries 11 | 12 | % Try this for different values of m. How low can you go? 13 | m = 4*s; 14 | % m = round( 2.5*s ); % theoretical lower limit is 2*s 15 | A = randn(m,N); % Sensing matrix 16 | 17 | % figure(1); clf; imagesc(A); axis image 18 | 19 | y = A*x0; 20 | 21 | cvx_begin quiet 22 | variable x1(N) 23 | minimize norm(x1,1) 24 | subject to 25 | A*x1 == y 26 | cvx_end 27 | 28 | x1( abs(x1) < 1e-9 ) = 0; 29 | 30 | x2 = pinv(A)*y; % least-squares solution 31 | 32 | %% Plot 33 | figure(1); clf; 34 | stem( find(x0), x0(find(x0)), 'd' , 'markersize',10); 35 | hold all 36 | stem( find(x1), x1(find(x1)), 'o','MarkerFaceColor','r'); 37 | stem( x2, '*' ); 38 | set(gca,'fontsize',16) 39 | legend('Original','l1','l2','location','best'); 40 | 41 | %% Can we do this with a combinatorial algorithm? No 42 | % Assuming we know s 43 | nchoosek(N,s) % # of permutations to try 44 | 45 | % Make a list of all permutations... or not. Pretty slow! 46 | % This is even't account for the cost required per permutation 47 | tic; 48 | list = nchoosek( 1:N, 2 ); 49 | toc 50 | 51 | tic; 52 | list = nchoosek( 1:N, 3 ); 53 | toc 54 | 55 | tic; 56 | list = nchoosek( 1:N, 4 ); 57 | toc 58 | 59 | tic; 60 | list = nchoosek( 1:N, 5 ); 61 | toc -------------------------------------------------------------------------------- /Demos/demo13_EDM.mlx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Demos/demo13_EDM.mlx -------------------------------------------------------------------------------- /Demos/demo13_EDM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Demos/demo13_EDM.pdf -------------------------------------------------------------------------------- /Demos/demo14_MonteCarlo_and_improvements.m: -------------------------------------------------------------------------------- 1 | %{ 2 | Discusses Monte Carlo in the context of integration: 3 | 4 | - There are many ways to integrate functions 5 | 6 | - Deterministic "quadrature" rules are fancy Riemann Sums, and 7 | will work *very well* if the integrand is smooth 8 | They break down when the integrand is highly oscillatory, 9 | and/or for high-dimensional integrals. Special versions targeted 10 | for oscillatory integrals is the subject of current applied math 11 | research. 12 | 13 | - Monte Carlo integration interprets the integral as an expectation 14 | of a random variable, and draws samples to approximate the true mean 15 | with a sample mean. 16 | For a smooth function, Monte Carlo integration is a bad idea because 17 | classical quadrature rules are much, much better 18 | 19 | - Monte Carlo is slow/inaccurate, but the inaccuracy is independent 20 | of the dimension of the integral. So for large enough dimensions, 21 | it makes sense (while in large dimensions, making a deterministic 22 | grid is impossible since it will be too large) 23 | 24 | - Since Monte Carlo is useful sometimes, there are many known techniques 25 | to make it better. We examine two: 26 | -- Quasi Monte Carlo, which uses low-discrepancy sequences, and 27 | inherits some of the advantages and disadvantages from 28 | both Monte Carlo and grid/quadrature methods. 29 | Refs: 30 | - https://en.wikipedia.org/wiki/Low-discrepancy_sequence#Construction_of_low-discrepancy_sequences 31 | - "High-dimensional integration: The quasi-Monte Carlo way" by Dick, Kuo 32 | and Sloan (Acta Numerica, 2013) 33 | -- Control variates as a means of variance reduction 34 | Refs: 35 | - https://en.wikipedia.org/wiki/Control_variates 36 | 37 | Stephen Becker, University of Colorado, April 2019 38 | %} 39 | 40 | %% Integrate sin(x)/x from 0 to 1 (e.g. Si(1), Si is Sine Integral) 41 | %{ 42 | The sine integral, Si(z), is the integral of sin(x)/x from 0 to z 43 | where we define sin(0)/0 to be 0 (consistent with the limit) 44 | 45 | This integral is not known in closed form 46 | See https://en.wikipedia.org/wiki/Trigonometric_integral#Sine_integral 47 | 48 | How can we approximate it? There are specialized techniques that are 49 | faster and more accurate than what we will discuss here, but we'll 50 | treat it via the integral definition and try to numerically 51 | evaluate the integral. 52 | %} 53 | si = sinint(1); % get fairly accurate answer using Matlab's symbolic toolbox 54 | f = @(x) sinc(x/pi); % equivalent to sin(x)/x and f(0)=0 55 | N = 1e2+1; % keep it odd for my composite Simpson's to work 56 | xgrid = linspace(0,1,N); 57 | dx = xgrid(2)-xgrid(1); 58 | fx = f(xgrid); 59 | composite_mid = dx*sum(f(xgrid(2:end)-dx/2)); % open formula 60 | composite_trap = dx*( sum(fx) -fx(1)/2 - fx(end)/2 ); 61 | composite_simp = dx/3*( fx(1)+fx(end)+ 4*sum(fx(2:2:end-1)) + 2*sum(fx(3:2:end-1)) ); 62 | si - composite_mid 63 | si - composite_trap 64 | si - composite_simp 65 | 66 | %% 2a visualize discrepancy of random numbers on [0,1] 67 | 68 | N = 1e3; 69 | setA = sort(rand(N,1)); 70 | setB = [.5*setA(1:2:end); .5 + .5*setA(2:2:end)]; 71 | 72 | figure(1); clf; 73 | plot( setA, 'linewidth',2 ); hold all; plot( setB, 'linewidth',2 ); 74 | legend('uniform random','lower discrepancy'); 75 | line([0,N],[0,1],'linestyle','--','color','k'); 76 | %% more plots 77 | figure(1); clf; 78 | area( smooth(setA - linspace(0,1,N)') ); hold all 79 | ar=area( smooth(setB - linspace(0,1,N)') ); line([0,N],[0,0],'color','k'); 80 | ar.FaceAlpha = 0.5; ar.FaceColor = 'r'; 81 | legend('uniform random','lower discrepancy'); 82 | %% ore plots 83 | clf; 84 | histogram( diff(setA) ); hold all 85 | histogram( diff(setB) ); legend('uniform random','lower discrepancy'); 86 | title('Separation distances in random "grid"') 87 | %% Try Monte Carlo evaluation of Si(1) 88 | N = 1e2; 89 | setA = sort(rand(N,1)); 90 | setB = [.5*setA(1:2:end); .5 + .5*setA(2:2:end)]; 91 | 92 | int_MonteCarlo = mean(f(setA)); 93 | int_QuasiMonteCarlo = mean( f(setB) ); 94 | 95 | % Add in control variate 96 | % Use sin(x)/x ~ 1 - x^2/6 (first part of Taylor series) 97 | g = @(x) 1 - x.^2/6; 98 | % The integral (or mean/expectation) of g over [0,1] is: 99 | int_g = 17/18; 100 | % si - int_g % already a fairly good approximation 101 | fx = f(setA); 102 | gx = g(setA); 103 | % Estimate covariance and variance of gx 104 | cv = cov(fx,gx); 105 | c = -cv(1,2)/cv(2,2); % estimated optimal control variate parameter 106 | int_ControlVariate = int_MonteCarlo + c*(mean(gx)-int_g); 107 | int_ControlVariate_quasi = int_QuasiMonteCarlo + c*(mean(g(setB))-int_g); 108 | fprintf('\nError is %10.3e for plain Monte Carlo\n', si - int_MonteCarlo ); 109 | fprintf('Error is %10.3e for Quasi Monte Carlo\n', si - int_QuasiMonteCarlo ); 110 | fprintf('Error is %10.3e for 2nd order Taylor Series\n', si - int_g ); 111 | fprintf('Error is %10.3e for Control-Variate Monte Carlo\n', si - int_ControlVariate ); 112 | fprintf('Error is %10.3e for Control-Variate Quasi Monte Carlo\n', si - int_ControlVariate_quasi ); 113 | fprintf('Error is %10.3e for quadrature (composite Trapezoidal Rule)\n',si - composite_trap); 114 | fprintf('Error is %10.3e for quadrature (composite Simposon''s Rule)\n',si - composite_simp); -------------------------------------------------------------------------------- /Demos/demo15_SGD.m: -------------------------------------------------------------------------------- 1 | %{ 2 | Stochastic Gradient Descent (SGD) and variants, 3 | demonstrated on the primal support vector machine (SVM) problem 4 | with MNIST data. 5 | 6 | Primal SVM: 7 | 8 | min_w ||w||^2/2 + C \sum_i^N hinge( y_i w'*x_i ) 9 | where 10 | hinge(a) = max( 0, 1 - a ) 11 | 12 | We will pick 2 classes from MNIST (not all 10) 13 | If we pick class 0 and class 1, it's very easy (accuracy is > 99% in 1 14 | step) 15 | Try harder classes, like 4 and 9 16 | 17 | Compare SGD with batch gradient descent, 18 | as well as 19 | - minibatch 20 | - SAGA (minibatch) 21 | - SVRG (minibatch) 22 | 23 | It's all quite messy, since stepsizes are a big issue (and other 24 | parameters) 25 | 26 | %} 27 | 28 | load('~/Google Drive/TeachingDocs/APPM5650_Fall21_Randomized/Code/mnist_data_all.mat'); 29 | percentCorrect = @(labels1,labels2) length(find(labels1==labels2))/length(labels1); 30 | 31 | %% In prep for SVM, reduce to just two labels 32 | A = 4; B = 9; % harder to distinguish 33 | % A = 0; B = 1; % very easy to distinguish 34 | 35 | indx = (Train_labels==A) | (Train_labels==B); 36 | % convert to -1, +1 labels 37 | Train_labels_2class = Train_labels( indx ); 38 | Train_labels_2class( Train_labels_2class==A ) = -1; 39 | Train_labels_2class( Train_labels_2class==B ) = +1; 40 | Train_2class = Train( indx, : ); 41 | 42 | indx = (Test_labels==A) | (Test_labels==B); 43 | Test_labels_2class = Test_labels( indx ); 44 | Test_labels_2class( Test_labels_2class==A ) = -1; 45 | Test_labels_2class( Test_labels_2class==B ) = +1; 46 | Test_2class = Test( indx, : ); 47 | 48 | clear Test Test_labels Train Train_labels 49 | 50 | hist(Train_labels_2class ) % make sure it looks OK, about equal 51 | %% Plot hinge loss 52 | x = linspace(-3,3,40); 53 | plot( x, max(0,1-x),'linewidth',2) 54 | line([-3,3],[0,0],'linestyle','--','color','k'); 55 | line([0,0],[-1,4],'linestyle','--','color','k'); 56 | ylim([-1,4]); title('Hinge loss'); 57 | 58 | %% Try SVM... but apply to just two classes 59 | % Pick the dataset 60 | X = Train_2class; 61 | X = [X,ones(size(X,1),1)]; % allow for an offset 62 | y = Train_labels_2class; % this is now -1, +1 63 | 64 | yX = bsxfun( @times, X, y ); 65 | [N,p] = size( X ); 66 | 67 | C1 = 1e-2; % constant for SVM model 68 | C2 = 1/N; % constant for SVM model 69 | 70 | maxIts = 5e2; 71 | minibatch_ratio = .1; % 10% sampling 72 | minibatch_n = round(minibatch_ratio*N); 73 | 74 | ALGONAMES = {'Gradient Descent','SGD fixed stepsize','SGD decaying stepsize',... 75 | 'SGD minibatch','SAGA minibatch','SVRG'}; 76 | errList = zeros(length(ALGONAMES),maxIts,3); % 3 types of errors 77 | for ALGO = 1:length(ALGONAMES) 78 | fprintf('\nAlgorithm: %s\n', ALGONAMES{ALGO} ); 79 | 80 | w = zeros(p,1); % our variable 81 | switch ALGO 82 | case {1,2} 83 | decay_gamma = false; 84 | otherwise 85 | decay_gamma = true; 86 | end 87 | switch ALGO % choose learning rate 88 | case {1,4} 89 | gamma = 1e-5; % 1e-2 is too big 90 | case 5 % SAGA 91 | gamma = 1e-4; % do it with minibatch 92 | case 6 % SVRG 93 | gamma = 1e-6; 94 | otherwise 95 | gamma = 1e-5; % SGD needs smaller stepsize 96 | end 97 | 98 | for k = 1:maxIts 99 | 100 | % f(w) = C1 ||w||^2/2 + C2 ones(n,1)*hinge( diag(y)*X*w ) 101 | % where 102 | % hinge(a) = max( 0, 1 - a ) 103 | % so d(hinge)/da = { -1 (a <= 1); 0 (a > 1) } 104 | 105 | % for SGD without minibatch, let's take more steps 106 | extraStepsBase = 100; 107 | if ALGO == 2 || ALGO == 3 108 | extraSteps = extraStepsBase; 109 | else 110 | extraSteps = 1; 111 | end 112 | 113 | for steps = 1:extraSteps 114 | switch ALGO 115 | case 1 % deterministic gradient descent (full batch) 116 | a = yX*w; % helper variable 117 | grad = yX'*( -(a<=1) ); % full gradient step 118 | case {2,3} % SGD, single draw 119 | ind = randperm(N,1); 120 | a = yX(ind,:)*w; % helper variable 121 | grad = N*yX(ind,:)'*( -(a<=1) ); 122 | case {4} % minibatch 123 | ind = randperm(N,minibatch_n); 124 | a = yX(ind,:)*w; % helper variable 125 | grad = (N/minibatch_n)*yX(ind,:)'*( -(a<=1) ); 126 | case {5} % SAGA 127 | if k==1 128 | % First iteration is special: make full pass through 129 | % data 130 | a = yX*w; % helper variable 131 | grad = yX'*( -(a<=1) ); % full gradient step 132 | a_storage = a; % store this 133 | grad_storage= grad; 134 | else 135 | %ind = randperm(N,1); 136 | ind = randperm(N,minibatch_n); 137 | a = yX(ind,:)*w; % helper variable 138 | grad_ind_new = yX(ind,:)'*( -(a<=1) ); 139 | 140 | % Combine: 141 | grad_ind_old = yX(ind,:)'*( -(a_storage(ind)<=1) ); 142 | grad = N/minibatch_n*grad_ind_new - ... 143 | N/minibatch_n*grad_ind_old + grad_storage; 144 | % Update storage table: 145 | a_storage(ind) = a; 146 | grad_storage = grad_storage ... 147 | - grad_ind_old + grad_ind_new; 148 | end 149 | case 6 150 | % SRVG 151 | a = yX*w; % helper variable 152 | grad = yX'*( -(a<=1) ); % full gradient step 153 | % Make a bunch of micro steps now 154 | z = w; 155 | for kk = 1:50 156 | ind = randperm(N,round(N/minibatch_n)); 157 | a_z = yX(ind,:)*z; % helper variable 158 | grad_z = yX(ind,:)'*( -(a_z<=1) ); 159 | z = z - gamma*( C1*z + 1/minibatch_n*grad_z +C2*grad ); 160 | end 161 | w = z; 162 | 163 | end 164 | 165 | if decay_gamma && ~mod( k, 50*extraSteps ) 166 | gamma = gamma/2; 167 | end 168 | 169 | % Combine to get full gradient, take gradient descent step 170 | if ALGO ~= 6 % SVRG does its own update 171 | w = w - gamma*(C1*w + C2*grad ); 172 | end 173 | end % end extraSteps 174 | 175 | % Record metrics: 176 | % Cost function (expensive to calculate... for academic purposes) 177 | Xw = X*w; 178 | f = C1*norm(w)^2/2 + C2*sum( max(0,1-y.*Xw) ); 179 | % Percent correct (pc) for test/train 180 | IDX_Train = sign( Xw ); % no need to find best permutation 181 | pc = percentCorrect(IDX_Train,y); 182 | IDX_Test = sign( Test_2class*w(1:end-1) + w(end) ); % allow offset 183 | pc_test = percentCorrect( IDX_Test, Test_labels_2class); % already -1, +1 184 | errList(ALGO,k,1) = f; 185 | errList(ALGO,k,2) = pc; 186 | errList(ALGO,k,3) = pc_test; 187 | if ~mod( k, 25 ) 188 | fprintf('Iter %3d, train accuracy %.2f%%, test accuracy %.2f%%, objective %.2f\n', ... 189 | k, pc*100, pc_test*100, f ); 190 | end 191 | end 192 | end 193 | %% Plot, x-axis is iteration (so misleading) 194 | figure(1); clf; 195 | % subplot(1,3,1) 196 | offset = min( min( errList(:,:,1) ) )-1e-5; 197 | semilogy( errList(1,:,1) - offset, 'linewidth',2 ) 198 | hold all 199 | % semilogy( errList(2,:,1) - offset, 'linewidth',2 ) 200 | % semilogy( errList(3,:,1) - offset, 'linewidth',2 ) 201 | semilogy( linspace(0,maxIts*extraStepsBase,maxIts), errList(2,:,1) - offset, 'linewidth',2 ) 202 | semilogy( linspace(0,maxIts*extraStepsBase,maxIts), errList(3,:,1) - offset, 'linewidth',2 ) 203 | 204 | semilogy( errList(4,:,1) - offset, 'linewidth',2 ) 205 | semilogy( errList(5,:,1) - offset, 'linewidth',2 ) 206 | semilogy( errList(6,:,1) - offset, 'linewidth',2 ) 207 | title('SVM Objective fuction - true value'); 208 | ylabel('SVM Objective fuction - true value'); 209 | xlabel('Iteration'); 210 | % xlim([0,maxIts]); 211 | legend( ALGONAMES ) 212 | %% Replot, with corrected x-axis (now epochs) 213 | figure(1); clf; 214 | % subplot(1,3,1) 215 | % plotFcn = @semilogy; 216 | plotFcn = @loglog; 217 | plotFcn( errList(1,:,1) - offset, 'linewidth',2 ) 218 | hold all 219 | plotFcn( linspace(0,maxIts/N*extraStepsBase,maxIts), errList(2,:,1) - offset, 'linewidth',2 ) 220 | plotFcn( linspace(0,maxIts/N*extraStepsBase,maxIts), errList(3,:,1) - offset, 'linewidth',2 ) 221 | plotFcn( linspace(0,maxIts/minibatch_n,maxIts), errList(4,:,1) - offset, 'linewidth',2 ) 222 | plotFcn( 1+linspace(0,maxIts/minibatch_n,maxIts), errList(5,:,1) - offset, 'linewidth',2 ) 223 | plotFcn( 1:2:(2*maxIts), errList(6,:,1) - offset, 'linewidth',2 ) 224 | title('SVM Objective fuction - true value'); 225 | ylabel('SVM Objective fuction - true value'); 226 | xlabel('Epoch'); 227 | legend( ALGONAMES ) 228 | % xlim([0,3]); 229 | 230 | %% Look at miss-classification rate (x-axis is iteration, misleading) 231 | figure(1); clf; 232 | errMetric = 2; % train 233 | % errMetric = 3; % test 234 | % plotFcn = @semilogy; 235 | plotFcn = @plot; 236 | plotFcn( 1-errList(1,:,errMetric), 'linewidth',2 ) 237 | hold all 238 | plotFcn( linspace(0,maxIts*extraStepsBase,maxIts), 1-errList(2,:,errMetric), 'linewidth',2 ) 239 | plotFcn( linspace(0,maxIts*extraStepsBase,maxIts), 1-errList(3,:,errMetric), 'linewidth',2 ) 240 | plotFcn( 1-errList(4,:,errMetric), 'linewidth',2 ) 241 | plotFcn( 1-errList(5,:,errMetric), 'linewidth',2 ) 242 | plotFcn( 1-errList(6,:,errMetric), 'linewidth',2 ) 243 | title('Error, training data'); 244 | % title('Error, testing data'); 245 | ylabel('Missclassification rate'); 246 | legend( ALGONAMES ) 247 | xlabel('Iteration'); 248 | ylim([0,.15]); 249 | xlim([0,maxIts]); 250 | %% Look at miss-classification rate, corrected axis 251 | figure(1); clf; 252 | % errMetric = 2; % train 253 | errMetric = 3;% test 254 | plotFcn = @loglog; 255 | plotFcn( 1-errList(1,:,errMetric), 'linewidth',2 ) 256 | hold all 257 | plotFcn( linspace(0,maxIts/N*extraStepsBase,maxIts), 1-errList(2,:,errMetric), 'linewidth',2 ) 258 | plotFcn( linspace(0,maxIts/N*extraStepsBase,maxIts), 1-errList(3,:,errMetric), 'linewidth',2 ) 259 | plotFcn( linspace(0,maxIts/minibatch_n,maxIts), 1-errList(4,:,errMetric), 'linewidth',2 ) 260 | plotFcn( 1+linspace(0,maxIts/minibatch_n,maxIts), 1-errList(5,:,errMetric), 'linewidth',2 ) 261 | plotFcn( 1:2:(2*maxIts), 1-errList(6,:,errMetric), 'linewidth',2 ) 262 | if errMetric == 2 263 | title('Error, training data'); 264 | elseif errMetric == 3 265 | title('Error, testing data'); 266 | end 267 | ylabel('Missclassification rate'); 268 | legend( ALGONAMES ) 269 | xlabel('Epoch'); 270 | %% Visualize separating hyperplane 271 | clf; 272 | imagesc( reshape(w(1:end-1),28,28) ) 273 | -------------------------------------------------------------------------------- /Demos/demo16_LSH.m: -------------------------------------------------------------------------------- 1 | %{ 2 | Examples of Localitiy Sensitive Hashing (LSH): 3 | 4 | (1) MinHash for Jaccard Similarity 5 | (2) Euclidian Norm 6 | (3) SimHash for Cosine distance 7 | 8 | For MinHash, let's suppose we're motivated by hashing documents and then checking for their 9 | similarity, so we can check if there is plagiarism in a work 10 | See https://www.bowdoin.edu/dean-of-students/judicial-board/academic-honesty-and-plagiarism/examples.html 11 | for plagiarism examples 12 | 13 | Stephen Becker 14 | %} 15 | 16 | %% Plagiarism Demo: First, convert some sentences/documents to nicer form 17 | sentence = {}; 18 | sentence{1} = 'Only two years later, all these friendly Sioux were suddenly plunged into new conditions, including starvation, martial law on all their reservations, and constant urging by their friends and relations to join in warfare against the treacherous government that had kept faith with neither friend nor foe'; 19 | sentence{2} = 'Only two years later, all these nice Sioux were suddenly thrust into new types of conditions, including starvation, martial law on all their reservations, and constant urging by their friends and relations to join in warfare against the treacherous government that had kept faith with neither friend nor foe'; 20 | sentence{3} = 'In ages which have no record these islands were the home of millions of "Contrast the condition into which all these friendly Indians are suddenly plunged now, with their condition only two years previous: martial law now in force on all their reservations; themselves in danger of starvation, and constantly exposed to the influence of emissaries from their friends and relations, urging them to join in fighting this treacherous government that had kept faith with nobody--neither with friend nor with foe'; 21 | 22 | % Pre-process, and make k-shingles 23 | % (and usually you then has the k-shingles down further) 24 | k = 9; 25 | 26 | % https://www.mathworks.com/matlabcentral/answers/3314-hash-function-for-matlab-struct 27 | Engine = java.security.MessageDigest.getInstance('MD5'); 28 | for i = 1:3 29 | s = sentence{i}; 30 | s = s( ~isspace( s ) & (s~=',') & (s~='"') ); % remove some characters 31 | vec = []; 32 | for start = 1:length(s)-k+1 33 | ss = s(start:start+k-1); % k-shingle 34 | % Now, hash it. Use MD5 or SHA since Java does that for me 35 | % Engine = java.security.MessageDigest.getInstance('MD5'); 36 | Engine = java.security.MessageDigest.getInstance('SHA'); 37 | Engine.update(typecast(uint16(ss), 'uint8')); 38 | hash = Engine.digest; % 8 bits per (so 1 byte); keep a few of these 39 | hash = uint16(typecast( hash(1:2), 'uint8' )); % and remove signs 40 | % h = dec2bin( hash ); 41 | % h = uint16( bin2dec( h(:)' ) ); % we kept 2 bytes, so 16 bit 42 | % (Above line is slow, and it merges binary vectors in a funny way. 43 | % Better is this line below:) 44 | h = 2^8*hash(1) + hash(2) + 1; % make it 1-based; be careful to make sure everything is uint16 not uint8 or you have overflow! 45 | vec = [vec,h]; % append 46 | % sprintf('%.2x',double(typecast(hash, 'uint8'))) 47 | end 48 | sentence{i}=vec; 49 | end 50 | % intmax('uint16') % max is 2^6 = 65k 51 | %% 52 | JaccardSim = @(A,B) length(intersect(A,B))/length(union(A,B)); 53 | disp( JaccardSim( sentence{1}, sentence{2} ) ) 54 | disp( JaccardSim( sentence{1}, sentence{3} ) ) 55 | disp( JaccardSim( sentence{2}, sentence{3} ) ) 56 | % for i = 1:3, disp(length(sentence{i})); end 57 | %% Apply minhash (many of them), naive version 58 | % Need a universe of all possible entries 59 | % Either take union(...) or use max of uint... 60 | L = 10; % Number of hashes to draw (L=20 to visualize) 61 | % L = 1e3; % To check 62 | MinHashSignatures = zeros(3,L); 63 | for ell = 1:L 64 | P = randperm( intmax('uint16') ); % random permutation of 1, ..., 65k 65 | for i = 1:3 66 | MinHashSignatures(i,ell) = min(P(sentence{i})); 67 | end 68 | end 69 | if L <= 20 70 | disp(MinHashSignatures) 71 | end 72 | %% Check 73 | for i = 1:3 74 | for j = (i+1):3 75 | prob = sum( MinHashSignatures(i,:) == MinHashSignatures(j,:) )/L; 76 | fprintf('%d vs %d: JaccDiff is %.2f, %% hash collisions is %.2f\n', ... 77 | i,j,JaccardSim( sentence{i}, sentence{j} ), prob ); 78 | end 79 | end 80 | 81 | %% To tune definition of "neighbors", make bands 82 | % If documents match in *any* band, then declare them 83 | % a possible neighbor. 84 | rng(0); 85 | b = 20; % # of bands 86 | r = 5; % # hashes per band (if small, then more collisions) 87 | L = b*r; 88 | MinHashSignatures = zeros(3,b); 89 | for bi = 1:b 90 | temp = zeros(3,r); 91 | for ri = 1:r 92 | P = randperm( intmax('uint16') ); 93 | for i = 1:3 94 | temp(i,ri) = min(P(sentence{i})); 95 | end 96 | end 97 | % For this band, we have r LSH hashes. Combine these r LSH hashes 98 | % by... hashing them together! 99 | % (This last hash is not a LSH, it's a traditional one) 100 | for i = 1:3 101 | Engine = java.security.MessageDigest.getInstance('SHA'); 102 | Engine.update(typecast(uint16(temp(i,:)), 'uint8')); 103 | hash = Engine.digest; 104 | hash = uint16(typecast( hash(1:2), 'uint8' )); % remove signs 105 | % h = dec2bin( hash ); 106 | % h = uint16( bin2dec( h(:)' ) ); 107 | h = 2^8*hash(1) + hash(2) + 1; % make it 1-based 108 | MinHashSignatures(i,bi) = h; 109 | end 110 | end 111 | MinHashSignatures 112 | 113 | 114 | %% Try some other hashes, like Euclidean norm distance 115 | % Note: for this LSH, probability of collision isn't identically 116 | % proprtional to the Euclidean distance, but it is a valid LSH 117 | addpath ~/Repos/randomized-algorithm-class/Code/ 118 | 119 | rng(0); 120 | p = 100; 121 | N = 10; 122 | X = randn(N/2,p); 123 | X = [ X; X + .1*randn(N/2,p) ]; % so some correlated rows 124 | 125 | Dist = pdist2_faster( X, X, 'sqeuclidean' ); 126 | 127 | % Now, let's hash these... 128 | a = .1; 129 | 130 | % Now, combine via banded strategy 131 | rng(0); 132 | b = 20; % # of bands 133 | r = 5; % # hashes per band (if small, then more collisions) 134 | L = b*r; 135 | Signatures = zeros(N,b); 136 | for bi = 1:b 137 | temp = zeros(N,r); 138 | for ri = 1:r 139 | v = randn(p,1); v = v/norm(p); % random unit normal 140 | bb = a*rand(1); % random offset, uniform in [0,a] 141 | temp(:,ri) = floor( (X*v + bb)/a ); 142 | end 143 | % For this band, we have r LSH hashes. Combine these r LSH hashes 144 | % by... hashing them together! 145 | % (This last hash is not a LSH, it's a traditional one) 146 | for i = 1:N 147 | Engine = java.security.MessageDigest.getInstance('SHA'); 148 | Engine.update(typecast(uint16(temp(i,:)), 'uint8')); 149 | hash = Engine.digest; 150 | hash = uint16(typecast( hash(1:2), 'uint8' )); % remove signs 151 | % h = dec2bin( hash ); 152 | % h = uint16( bin2dec( h(:)' ) ); 153 | h = 2^8*hash(1) + hash(2) + 1; % make it 1-based 154 | Signatures(i,bi) = h; 155 | end 156 | end 157 | Signatures 158 | 159 | 160 | %% Try some other hashes, like SimHash for cosine distances 161 | % For this one, chance of collision is directly proportional to distance 162 | 163 | rng(0); 164 | p = 100; 165 | % N = 10; 166 | N = 1e2; 167 | X = randn(N/2,p); 168 | X = [ X; X + .1*randn(N/2,p) ]; % so some correlated rows 169 | 170 | % Look at cosine distances between all the points in X 171 | nrms = sqrt( sum(X.^2,2) ); 172 | cosDist = real( acos( X*X'./( nrms*nrms' ) )); 173 | 174 | 175 | % Check if we have collisions at a rate proportional to cosDist: yes! 176 | r = 1e4; % repeat it a lot to collect statistics 177 | CollisionFrequency = zeros(N,N); 178 | temp = zeros(N,1); 179 | for ri = 1:r 180 | v = randn(p,1); 181 | temp = sign(X*v); 182 | % This will be slow... 183 | for i = 1:N 184 | simInd = find( temp == temp(i) ); 185 | CollisionFrequency(i,simInd) = CollisionFrequency(i,simInd) + 1; 186 | end 187 | end 188 | CollisionFrequency = CollisionFrequency/r; 189 | TrueFrequency = 1 - cosDist/pi; 190 | if N <= 10 191 | disp( CollisionFrequency ) 192 | disp( TrueFrequency ) 193 | disp( CollisionFrequency - TrueFrequency ) 194 | else 195 | [TrueFreq_sorted, sort_ind] = sort( TrueFrequency(:) ); 196 | figure(1); clf; 197 | scatter( TrueFreq_sorted(:), CollisionFrequency( sort_ind ),'r.' ); 198 | hold all 199 | line( [0,1],[0,1],'linestyle','--','color','k') 200 | xlabel('True cosine distance'); 201 | ylabel('Frequency of LSH collision'); 202 | end 203 | 204 | % We can also combine these in the same banding technique... -------------------------------------------------------------------------------- /Demos/demo17_kNN_via_LSH.m: -------------------------------------------------------------------------------- 1 | %{ 2 | unweighted k-Nearest Neighbors 3 | 4 | Note: everything would probably be faster if we stored the data 5 | where columns are new data points, not rows, but we're sticking 6 | with the row convention since it's more common (and Matlab uses it 7 | for their functions, even though it goes against their natural 8 | data structure). 9 | 10 | Compare also with Matlab's knnsearch implementation 11 | If dimension p < 10 then this can exploit a kd-tree at training time, 12 | but the complexity of that scales very poorly with dimension, 13 | so not applicable to MNIST without doing some dimensionality reduction. 14 | 15 | Stephen Becker 16 | %} 17 | clc 18 | 19 | addpath ~/Repos/randomized-algorithm-class/Code/ 20 | % Load MNIST data: 21 | addpath('~/Google Drive/TeachingDocs/APPM4720_5720_Spring19_Randomized/Code'); 22 | load mnist_data_all 23 | 24 | percentCorrect = @(labels1,labels2) length(find(labels1(:)==labels2(:)))/length(labels1(:)); 25 | 26 | rng(1); 27 | % Try with various sizes to get an idea how it scales... 28 | % test_subset = randsample( 1e4, 1e3 ); 29 | test_subset = randsample( 1e4, 16e2 ); 30 | 31 | TestPoints = Test(test_subset,:); 32 | TestLabels = Test_labels( test_subset ); 33 | K = 10; % # of nearest neighbors to use 34 | %% Try k-NN using true distances, so plain implementation 35 | 36 | fprintf('\n-- Vanilla k-NN\n Finding pairwise distances\n'); 37 | t1 = tic; 38 | tic 39 | % ind = dsearchn( Train, TestPoints ); % Slow 40 | % D = pdist2( Train, TestPoints ); % Slow 41 | % for 1e3 test points,takes 34 sec with pdist, or 2.87 with pdist_faster 42 | D = pdist2_faster( Train, TestPoints ); 43 | toc 44 | fprintf(' Sorting those distances\n'); 45 | tic; 46 | [~,Ind] = sort(D); % per row, sort the columns 47 | toc 48 | 49 | fprintf(' Final processing\n'); % Find the labels of the neigbhors 50 | labels = Train_labels( Ind(1:K,:) ); 51 | prediction = mode( labels, 1 ); 52 | pc = percentCorrect( prediction, TestLabels ); 53 | fprintf(' Standard k-NN has %.1f%% accuracy\n', pc*100 ); 54 | 55 | t_plain = toc(t1); 56 | 57 | 58 | 59 | %% Do with Matlab's knn 60 | % If p >= 10, it won't se a kd tree (see KDTreeSearcher) 61 | % See "Classificiation Using Nearest Neighbors" help topic 62 | % Mdl = ExhaustiveSearcher( Train, 'Distance', 'seuclidean' ); 63 | % [ind2,dist_ind] = knnsearch(Mdl,TestPoints,'k',K); % gave NaNs 64 | if size(TestPoints,1) < 500 65 | fprintf('\n-- Vanilla k-NN via Matlab''s implementation\n'); 66 | t1 = tic; 67 | [idx,d] = knnsearch( Train, TestPoints, 'K', K ); 68 | toc(t1) 69 | 70 | labels = zeros(size(TestLabels)); 71 | t2=tic; 72 | for i = 1:size(TestPoints,1) 73 | labels(i) = mode(Train_labels( idx(i,:) )); 74 | end 75 | toc(t2) 76 | t_Matlab = toc(t1); 77 | pc_Matlab = percentCorrect( labels, TestLabels ); 78 | fprintf(' k-NN via Matlab has %.1f%% accuracy\n', pc_Matlab*100 ); 79 | else 80 | pc_Matlab = nan; 81 | t_Matlab = Inf; 82 | end 83 | 84 | 85 | %% Do it with LSH and bands 86 | % b = 1, r = 30 is very bad (very few neighbors, but lots of false 87 | % negatives); better to increase b and decrease r 88 | % (for Cosine vs Euclidean distances, parameters will vary 89 | % and for Euclidean distance, also work with "a" parameter) 90 | b = 15; % number of bands (decrease this to reduce # neighbors found) 91 | r = 3; % hashes per band (increase this to reduce # neighbors found) 92 | a = 5e2; % controls fineness; check length( unique( Train_hashed(:,1) ) ) 93 | 94 | t1 = tic; 95 | rng(1); 96 | p = size(Test,2); 97 | fprintf('\n-- k-NN via LSH\n'); 98 | tic 99 | 100 | COSINE_DISTANCE = false; 101 | neighborList = zeros( size(Train,1), length(TestLabels),'logical' ); 102 | 103 | for bi = 1:b 104 | if COSINE_DISTANCE 105 | % Cosine distance "SimHash" 106 | Omega = randn(p,r); 107 | LSH = @(X) sign((X-mean(X,2))*Omega); 108 | 109 | Train_hashed = LSH(Train); 110 | Test_hashed = LSH(TestPoints); 111 | 112 | innProd = Train_hashed*Test_hashed'; 113 | neighborList = neighborList | (innProd==r); % binary "or" 114 | else 115 | % Euclidean distance hash 116 | V = randn(p,r); 117 | bb = a*rand(1,r); 118 | LSH = @(X) floor( (X*V + bb )/a ); 119 | Train_hashed = LSH(Train); 120 | Test_hashed = LSH(TestPoints); 121 | 122 | % Do group updates, assuming we only have a few hash values 123 | universe = unique( Test_hashed ); 124 | tempList = zeros( size( neighborList), 'uint8' ); 125 | % Avoid "unique" call by using big matrix 126 | for ri = 1:r 127 | for val_i = 1:length(universe) 128 | val = universe( val_i ); % bucket value 129 | ind_test = find( Test_hashed(:,ri) == val ); 130 | ind_train = find( Train_hashed(:,ri) == val ); 131 | tempList( ind_train, ind_test ) = tempList(ind_train, ind_test) + 1; 132 | end 133 | end 134 | % Need to hash all ri things together (need them *all* to agree) 135 | % or, check when tempList == r 136 | neighborList = neighborList | (tempList==r); 137 | 138 | end 139 | end 140 | toc 141 | 142 | fprintf(' Reduced # of neighbors to %.1f%%\n', 100*nnz(neighborList)/numel(neighborList) ); 143 | 144 | labels = zeros(size(TestLabels)); 145 | tic 146 | for i = 1:size(TestPoints,1) 147 | ind = find( neighborList(:,i) ); 148 | [~,ind2] = sort( pdist2_faster( Train(ind,:), TestPoints(i,:) ) ); 149 | KK = min( length(ind2),K ); 150 | labels(i) = mode(Train_labels( ind(ind2(1:KK)) )); 151 | end 152 | toc 153 | pc_LSH = percentCorrect( labels, TestLabels ); 154 | fprintf(' LSH k-NN has %.1f%% accuracy\n', pc_LSH*100 ); 155 | 156 | t_LSH = toc(t1); 157 | %% Overall 158 | fprintf('\n== SUMMARY ==\n %6d training points, %5.1f s via plain k-NN (%5.1f via Matlab''s), %5.1f s via LSH k-NN\n',... 159 | size(TestPoints,1), t_plain,t_Matlab, t_LSH ); 160 | fprintf('\tand respective accuracies: %.1f%%, %.1f%% and %.1f%%\n', pc*100, pc_Matlab*100, pc_LSH*100 ); 161 | 162 | -------------------------------------------------------------------------------- /Demos/demo18_names.m: -------------------------------------------------------------------------------- 1 | %{ 2 | With what frequency do names occur in the US? 3 | Use social security data 4 | 5 | From https://www.ssa.gov/oact/babynames/limits.html 6 | download https://www.ssa.gov/oact/babynames/names.zip 7 | (about 9 MB) 8 | 9 | Top 10 from 2017 are here: https://www.ssa.gov/oact/babynames/ 10 | 11 | 12 | This demo applies the CountMin sketch to estimate the frequency 13 | of occurrence of each name (using less memory than the straightforward 14 | data structure). 15 | Note: for both the "straightforward" data structure (which we call 16 | "fullData") as well as the sketch, we're really storing a hash table (I 17 | use the SHA hash, since it's easy to call via Matlab/Java), 18 | so I don't actually store the names themselves! But if you think of a 19 | name, we can then hash it, and check if it's in the table. 20 | 21 | Note: for the hash table, I use the first 2 bytes of the SHA hash, 22 | so about 65k unique buckets. There are at least 50,686 names in the 23 | database, so about 77% of buckets are occupied, so there are a lot 24 | of name collisions. This is bad! The fix is to use more bytes 25 | of the SHA hash, but I'm too lazy to implement that now. 26 | [Update: I fixed my laziness and used 2^20 buckets] 27 | 28 | Some of the hashes, the ones used in the sketch, only need pairwise 29 | independence, and you can do things faster than SHA and MD5 30 | e.g., if w is a prime number, then drawing a and b randomly 31 | from [0, w-1], the function h(x) = a*x + b (mod w) 32 | has the pairwise independence probability: the chance 33 | of two inputs colliding (over the randomness of choosing a and b) 34 | is 1/w. 35 | 36 | Stephen Becker, University of Colorado 37 | 38 | Reference: 39 | Graham Cormode, ?http://dimacs.rutgers.edu/~graham/pubs/html/TalkSimons13.html 40 | and his monograph: "?Sketch techniques for approximate query processing" 41 | 2011 (?http://www.cs.umass.edu/~mcgregor/711S12/sketches1.pdf) 42 | %} 43 | 44 | % nNameBuckets = intmax('uint16'); % too small 45 | % nNameBuckets = intmax('uint32'); % too big 46 | nNameBuckets = 2^20; 47 | fullData = zeros(nNameBuckets,1); 48 | fullDataNames = cell(nNameBuckets,1); 49 | if 2==exist('demo18_data.mat','file') %&& false 50 | load demo18_data 51 | fullData = full( fullDataSparse ); 52 | else 53 | tic 54 | fprintf('Reading in year '); 55 | for yr = 1880:2017 56 | fprintf('\b\b\b\b%d',yr); 57 | prfx = '~/Downloads/names'; 58 | filename = fullfile(prfx, sprintf('yob%d.txt',yr) ); 59 | fid = fopen(filename); 60 | data = textscan( fid, '%s%c%d','Delimiter',','); 61 | names = data{1}; % data{2} is gender, 'M' or 'F' 62 | occurences = data{3}; 63 | for line = 1:length(names) 64 | Engine = java.security.MessageDigest.getInstance('SHA'); 65 | name = lower( names{line} ); 66 | Engine.update(typecast(uint16(name), 'uint8')); 67 | hash = Engine.digest; % 8 bits per (so 1 byte); keep a few of these 68 | hash = uint32(typecast( hash(1:3), 'uint8' )); % and remove signs 69 | % h = dec2bin( hash ); 70 | % % hash is 0 to nNameBuckets-1, so need a +1 offset 71 | % h = uint16( bin2dec( h(:)' ) ) + 1; % NO, not quite 72 | % right... 73 | h = 2^16*hash(1) + 2^8*hash(2) + hash(3); 74 | h = mod( h, nNameBuckets ) + 1; 75 | 76 | fullData(h) = fullData(h) + occurences(line); 77 | 78 | % Also, add the name to the list of names 79 | if isempty( fullDataNames{h} ) 80 | fullDataNames{h} = name; 81 | elseif isempty( strfind(fullDataNames{h},name) ) 82 | fullDataNames{h} = [fullDataNames{h},',',name]; 83 | end 84 | 85 | 86 | end 87 | 88 | fclose(fid); 89 | end 90 | fprintf(' finished.\n'); % take about 3.8 minutes 91 | toc 92 | fullDataSparse = sparse( fullData ); % compress from 8 MB to 1.4 MB; fullDataNames is 18 MB 93 | save demo18_data fullDataSparse fullDataNames % .mat file compresses it anyhow... 94 | end 95 | %% 96 | fprintf('Found at least %d distinct names (maybe more, since could be collisions)\n', nnz(fullData) ); 97 | fprintf(' And there were %.1f million people in the dataset\n', sum(fullData)/1e6 ); 98 | % Find collisions this way: 99 | fullDataCollisions = zeros(nNameBuckets,1); 100 | for j = 1:nNameBuckets 101 | if ~isempty( fullDataNames{j} ) 102 | str = fullDataNames{j}; 103 | fullDataCollisions(j) = length( strfind(str,',') ) + 1; 104 | end 105 | end 106 | numberUniqueNames = sum( fullDataCollisions ); 107 | fprintf(' Checking for collisions, we found exactly %d distinct names, so %d collisions\n', numberUniqueNames, numberUniqueNames-nnz(fullData) ); 108 | collisionIndex = find( fullDataCollisions > 1 ); 109 | fprintf(' For example, a few collisions:\n'); 110 | fullDataNames{ collisionIndex(1:2) } 111 | 112 | %% Warning... 113 | % Lot's of bugs because of datatypes, e.g., 114 | % j = 981698; 115 | % typecast(j, 'uint8') 116 | % typecast( uint32(j), 'uint8' ) 117 | % The above two things are NOT the same!! 118 | 119 | % Most bugs are hopefully fixed! 120 | 121 | %% Try CountMin sketch 122 | % We could have applied this as we read in the data files, since 123 | % it is a linear sketch, so easy to update. But since we have 124 | % the full data anyhow, let's do it the easy way. 125 | d = 7; % e.g., ceil( log2( 1/.01 ) ), so result holds with 99% chance; 126 | w = 2^8; % number of buckets per each hash 127 | C = zeros( d, w ); 128 | 129 | for j = find( fullData )' % only loop over non-empty ones rather than for j = 1:nNameBuckets 130 | Engine = java.security.MessageDigest.getInstance('SHA'); 131 | Engine.update(typecast(uint32(j), 'uint8')); % uint16(j) is a bug (if j is too big) 132 | L = typecast( Engine.digest, 'uint8' ); % make it non-negative 133 | for k = 1:d 134 | ell = L(k) + 1; % make it 1-based not 0-based indexing 135 | C( k, ell ) = C( k, ell ) + fullData( j ); 136 | end 137 | end 138 | 139 | %% And repeat, but with more buckets 140 | d2 = 7; 141 | w2 = 2^12; 142 | C2 = zeros( d2, w2 ); 143 | for j = find( fullData )' 144 | Engine = java.security.MessageDigest.getInstance('SHA'); 145 | Engine.update(typecast(uint32(j), 'uint8')); 146 | L = uint16(typecast( Engine.digest, 'uint8' )); 147 | for k = 1:d2 148 | ell = L(2*k-1)*2^8 + L(2*k); 149 | ell = mod( ell, w2 ) + 1; 150 | C2( k, ell ) = C2( k, ell ) + fullData( j ); 151 | end 152 | end 153 | %% Difference in sizes: 154 | fprintf('Full data has %d entries\n', length(fullData) ); 155 | fprintf(' CountMin structure has %d = %d x %d entries, so %.1fx compression\n', ... 156 | d*w, d, w, length(fullData)/(d*w) ); 157 | fprintf(' and more accurate CountMin structure has %d = %d x %d entries, so %.1fx compression\n', ... 158 | d*w2, d2, w2, length(fullData)/(d2*w2) ); 159 | 160 | % More accurate estimate of compression ratio 161 | fullDataSparse = sparse( fullData ); 162 | stat = whos('fullData'); b1 = stat.bytes; 163 | stat = whos('fullDataSparse'); b2 = stat.bytes; 164 | stat = whos('C'); b3 = stat.bytes; 165 | stat = whos('C2'); b4 = stat.bytes; 166 | kB = 1/1024; 167 | fprintf('Naive: %.1f kB, compressed naive: %.1f kB, CountMin: %.1f kB, CountMin v2: %.1f kB\n', ... 168 | b1*kB, b2*kB, b3*kB, b4*kB ); 169 | fprintf(' So compression ratios %.1fx, %.1fx, %.1fx, %.1fx (relative to naive)\n', ... 170 | b1/b1, b1/b2, b1/b3, b1/b4 ); 171 | fprintf(' ...compression ratios %.1fx, %.1fx, %.1fx, %.1fx (relative to compressed naive)\n', ... 172 | b2/b1, b2/b2, b2/b3, b2/b4 ); 173 | %% Now, try it out 174 | name = 'james'; % Most popular name in database 175 | % name = 'alexander'; 176 | % name = 'samantha'; 177 | % name = 'john'; 178 | % name = 'hendrix'; % works 179 | % name = 'sophie'; 180 | % name = 'sophia'; 181 | % name = 'marta'; 182 | % name = 'abigayll'; 183 | % name = 'Isabella'; 184 | % name = 'ryan'; 185 | % name = 'padraig'; 186 | % name = 'kathryn'; % known collision 187 | % name = 'fortino'; 188 | % name = 'tomasz'; % known collision 189 | % name = 'stephen'; 190 | 191 | totalNames = sum( fullData ); 192 | %totalNames = sum( C(:) )/d; % equivalent 193 | % sum(C,2) - totalNames % sanity check for debugging purposes 194 | 195 | % Figure out the index j 196 | Engine = java.security.MessageDigest.getInstance('SHA'); 197 | Engine.update(typecast(uint16(lower(name)), 'uint8')); 198 | hash = Engine.digest; % 8 bits per (so 1 byte); keep a few of these 199 | hash = uint32(typecast( hash(1:3), 'uint8' )); % and remove signs 200 | j = mod( 2^16*hash(1) + 2^8*hash(2) + hash(3), nNameBuckets ) + 1; 201 | 202 | 203 | 204 | % And now try the CountMin sketch 205 | Engine = java.security.MessageDigest.getInstance('SHA'); 206 | Engine.update(typecast(uint32(j), 'uint8')); 207 | L = uint16(typecast( Engine.digest, 'uint8' )); 208 | c = Inf; 209 | c2 = Inf; 210 | for k = 1:d 211 | ell = L(k) + 1; % make it 1-based not 0-based indexing 212 | c = min( [c, C( k, ell )] ); 213 | end 214 | for k = 1:d2 215 | ell2 = L(2*k-1)*2^8 + L(2*k); 216 | ell2 = mod( ell2, w2 ) + 1; 217 | c2 = min( [c2, C2( k, ell2 )] ); 218 | end 219 | fprintf('\nName: %s\n', name); 220 | fprintf('True frequency is\t%.5f%%\n', 100*fullData(j)/totalNames ); 221 | fprintf('Estimated frequency is\t%.5f%% (with CountMin sketch)\n', 100*c/totalNames ); 222 | fprintf('Estimated frequency is\t%.5f%% (with larger CountMin sketch)\n', 100*c2/totalNames ); 223 | if fullDataCollisions(j) > 1 224 | fprintf(' Careful! There were other names with hash collisions: '); 225 | disp( fullDataNames{j} ); 226 | elseif fullDataCollisions(j) == 0 227 | fprintf(' Careful! This name was not in database, we''re getting only noise\n'); 228 | end -------------------------------------------------------------------------------- /Demos/demo19_AMS_sketch_vs_JL.m: -------------------------------------------------------------------------------- 1 | %{ 2 | Short demonstration of the AMS sketch 3 | AMS is named for the authors of this paper: 4 | "The space complexity of approximating the frequency moments" (N. Alon, Y. Matias, and M. Szegedy, STOC 1996) 5 | 6 | There have been many updates, improvements to this sketch over the years. 7 | I'm following *some* of the improvements, mainly as described 8 | in Cormode's 2013 lecture http://dimacs.rutgers.edu/~graham/pubs/html/TalkSimons13.html 9 | but see also Cormode's 2011 monograph for more precise statements, 10 | "Sketch techniques for approximate query processing" 11 | Foundations and Trends in Databases 12 | http://www.cs.umass.edu/~mcgregor/711S12/sketches1.pd 13 | 14 | 15 | Stephen Becker 16 | This code uses the AMS_sketch.m code, in the ../Code subdirectory 17 | on this same github repo. 18 | %} 19 | rng(0); 20 | addpath ~/Repos/randomized-algorithm-class/Code/ 21 | 22 | p = 1e4; 23 | n = 1e2; 24 | w = 2^10; 25 | d = 7; 26 | 27 | X = randn(p,n); 28 | % Y = randn(p,n); 29 | % C = AMS_sketch( X, w, d ); % calls several count sketches 30 | % CY = AMS_sketch( Y, w, d ); 31 | % C2 = AMS_sketch( X - 3.14*Y, w, d ); 32 | % norm( (C-3.14*CY) - C2, 'fro' ) % confirm linearity of sketch 33 | 34 | if false 35 | addpath('~/Google Drive/TeachingDocs/APPM4720_5720_Spring19_Randomized/Code'); 36 | load mnist_data_all 37 | X = Train'; 38 | Xt = Train; 39 | d = 7; 40 | m = 20; 41 | n = size(X,2); 42 | end 43 | 44 | columnNorms = @(X) sqrt( sum(X.^2) ); 45 | %% 46 | saltSeed = 1; 47 | tic 48 | if n > 1e4 49 | C = AMS_sketch( Xt, w, d, 'transposedX', true ); % calls several count sketches 50 | else 51 | C = AMS_sketch( X, w, d ); % calls several count sketches 52 | end 53 | toc 54 | %% 55 | cNorms = zeros(d,n); 56 | for k = 1:d 57 | CC = C( (1+(k-1)*w):k*w, :); 58 | cNorms(k,:) = columnNorms( CC ); 59 | end 60 | cNormEstimate = median( cNorms, 1 ); 61 | cNormEstimate_variant = sqrt( mean( cNorms.^2, 1 ) ); % e.g., w<-- w*d, d<-- 1 62 | %% 63 | figure(1); clf; 64 | scatter( columnNorms(X), cNormEstimate, 'o' ) 65 | hold all 66 | scatter( columnNorms(X), cNormEstimate_variant, 'x' ) 67 | line( [96,104],[96,104] ); 68 | axis equal % make it dramatic! 69 | legend('Median of rows','Mean of rows'); 70 | %% 71 | figure(1); clf; 72 | histogram( cNormEstimate./columnNorms(X), 'binwidth',.01 ) 73 | hold all 74 | histogram( cNormEstimate_variant./columnNorms(X), 'binwidth',.01 ) 75 | legend('Median of rows','Mean of rows'); 76 | 77 | -------------------------------------------------------------------------------- /Demos/demo20_CoreSets_for_Kmeans.m: -------------------------------------------------------------------------------- 1 | %{ 2 | Following review paper of: 3 | "?Practical Coreset Constructions for Machine Learning" 4 | by Bachem, Lucic, Krause 2017; ?http://arxiv.org/abs/1703.06476 5 | %} 6 | 7 | % Needs pdist2_faster, kmeansPlusPlus, hungarian, bestMap, mnist_data_all.mat 8 | addpath ~/Repos/randomized-algorithm-class/Code/ 9 | addpath('~/Google Drive/TeachingDocs/APPM4720_5720_Spring19_Randomized/Code'); 10 | load mnist_data_all 11 | percentCorrect = @(labels1,labels2) length(find(labels1==labels2))/length(labels1); 12 | %% 13 | p = size(Train,2); 14 | K = 10; % ask for 10 labels 15 | 16 | ALGO_NAMES = {'Kmeans','Kmeans++','Kmeans-Coresets-uniform','Kmeans-Coresets'}; 17 | [TrainError,TestError,Timing] = deal(zeros(length(ALGO_NAMES),1)); 18 | 19 | ALGO = 1; 20 | tic 21 | [IDX_Train, ClusterCenters] = kmeans( Train, K ); 22 | Timing(ALGO) = toc; 23 | [~,IDX_Test ] = pdist2_faster(ClusterCenters,Test,'squaredeuclidean','smallest',1); 24 | IDX_Train_permuted = bestMap( Train_labels, IDX_Train ); 25 | IDX_Test_permuted = bestMap( Test_labels, IDX_Test ); 26 | TrainError(ALGO) = percentCorrect(IDX_Train_permuted,Train_labels); 27 | TestError(ALGO) = percentCorrect(IDX_Test_permuted,Test_labels); 28 | %% Use K-means++ 29 | ALGO = 2; 30 | tic 31 | ClusterCenters = kmeansPlusPlus( Train, K ); 32 | Timing(ALGO) = toc; 33 | [Dist_Kpp,IDX_Train_Kpp ] = pdist2_faster(ClusterCenters,Train,'squaredeuclidean','smallest',1); 34 | [~,IDX_Test_Kpp ] = pdist2_faster(ClusterCenters,Test,'squaredeuclidean','smallest',1); 35 | 36 | IDX_Train_permuted = bestMap( Train_labels, IDX_Train_Kpp ); 37 | IDX_Test_permuted = bestMap( Test_labels, IDX_Test_Kpp ); 38 | 39 | TrainError(ALGO) = percentCorrect(IDX_Train_permuted,Train_labels); 40 | TestError(ALGO) = percentCorrect(IDX_Test_permuted,Test_labels); 41 | 42 | %% Use K-means to get a core-set 43 | N = size( Train, 1 ); 44 | M = round( N/100 ); % size of the core-set 45 | % The naive/uniform coreset works fine if M is about N/10 46 | % but if we start sub-sampling further, e.g., N/100, 47 | % then the fancier weighted coreset starts to show improvement 48 | % (a worst-case improvement; on some random samples, it works fine) 49 | alpha = 16*(log(K)+2); 50 | clusterAvgDist = zeros(K,1); 51 | c = mean( Dist_Kpp ); 52 | weights = zeros(N,1); 53 | weights = weights + alpha*Dist_Kpp'/c; 54 | for k = 1:K 55 | ind = find( IDX_Train_Kpp == k ); 56 | clusterSize = length( ind ); 57 | ci = mean( Dist_Kpp( ind ) ); 58 | weights( ind ) = weights( ind ) + 2*alpha*ci/(c*clusterSize) + 4*N/clusterSize; 59 | end 60 | weights = weights/sum(weights); 61 | histogram( weights ); 62 | 63 | naive_coreset = randsample( N, M ); % uniform weights 64 | coreset = randsample( N, M, true, weights ); 65 | % Now, to really do core-sets, we also need to update the weights for each 66 | % entry that is sampled, e.g., before, it was implicitly 1/N 67 | % Now, it's now 1/M, but rather 1/(M*N*weights) 68 | % Not sure how to do that with Lloyd's algorithm much less 69 | % having to write our own kmeans script, so just ignore... 70 | %% Now, re-run Kmeans on these sampled data 71 | 72 | ALGO = 3; 73 | tic 74 | [~, ClusterCenters] = kmeans( Train(naive_coreset,:), K ); 75 | Timing(ALGO) = toc; 76 | [~,IDX_Train ] = pdist2_faster(ClusterCenters,Train,'squaredeuclidean','smallest',1); 77 | [~,IDX_Test ] = pdist2_faster(ClusterCenters,Test,'squaredeuclidean','smallest',1); 78 | IDX_Train_permuted = bestMap( Train_labels, IDX_Train ); 79 | IDX_Test_permuted = bestMap( Test_labels, IDX_Test ); 80 | TrainError(ALGO) = percentCorrect(IDX_Train_permuted,Train_labels); 81 | TestError(ALGO) = percentCorrect(IDX_Test_permuted,Test_labels); 82 | 83 | ALGO = 4; 84 | tic 85 | [~, ClusterCenters] = kmeans( Train(coreset,:), K ); 86 | Timing(ALGO) = toc; 87 | [~,IDX_Train ] = pdist2_faster(ClusterCenters,Train,'squaredeuclidean','smallest',1); 88 | [~,IDX_Test ] = pdist2_faster(ClusterCenters,Test,'squaredeuclidean','smallest',1); 89 | IDX_Train_permuted = bestMap( Train_labels, IDX_Train ); 90 | IDX_Test_permuted = bestMap( Test_labels, IDX_Test ); 91 | TrainError(ALGO) = percentCorrect(IDX_Train_permuted,Train_labels); 92 | TestError(ALGO) = percentCorrect(IDX_Test_permuted,Test_labels); 93 | 94 | %% Print out results 95 | for ALGO = 1:4 96 | fprintf('Training error, %23s: %.2f\n', ALGO_NAMES{ALGO}, TrainError(ALGO) ); 97 | end 98 | fprintf('\n'); 99 | for ALGO = 1:4 100 | fprintf('Test error, %23s: %.2f\n', ALGO_NAMES{ALGO}, TestError(ALGO) ); 101 | end 102 | fprintf('\n'); 103 | for ALGO = 1:2 104 | fprintf('Timing, %23s: %.2f sec\n', ALGO_NAMES{ALGO}, Timing(ALGO) ); 105 | end 106 | for ALGO = 3:4 107 | fprintf('Timing, %23s: %.2f sec = %.2f + %.2f\n', ALGO_NAMES{ALGO},... 108 | Timing(2)+Timing(ALGO),Timing(ALGO),Timing(2) ); 109 | end 110 | fprintf('Coresets used M=%d (of %d possible, so %.1f%%) points\n', ... 111 | M, N, 100*M/N ); -------------------------------------------------------------------------------- /Demos/vignette-rsvd.jl: -------------------------------------------------------------------------------- 1 | #= 2 | Algorithm RSVD. 3 | given a matrix A \in \reals^{m \times n}, a target rank k and an oversampling 4 | parameter p (e.g., p = 10 is a good choice). 5 | stage A. Find an approximate range. 6 | 1. form an n \times (k+p) Gaussian random matrix G. 7 | 2. form the sample matrix Y = AG. 8 | 3. orthonormalize the columns of Y via a QR factorization. 9 | stage B. Form a specific factorization. 10 | 4. form the (k+p) \times n matrix B = Q'A. 11 | 5. form the SVD of the (small) matrix B as B = \hat{U} D V'. 12 | 6. form U = Q \hat{U}. 13 | return matrices U, D, and V as an approximate rank (k+p) SVD of A. 14 | =# 15 | using Random, LinearAlgebra, Plots, Statistics 16 | # ---------------------------------------------------------------------------- # 17 | function vignette_rsvd() 18 | rng = Random.seed!(2); # set seed for reproducibility 19 | n_sims = 10; # number of simulations 20 | n_subs = 25; # number of subsamples 21 | m = 2000; # rows of matrix 22 | n = 20*ceil(log(m)); # columns of matrix 23 | # n_sims = 50; n_subs = 25; m = 1000; n = 15*ceil(log(m)); # alt. run parameters 24 | # n_sims = 1; n_subs = 25; m = 5000; n = 15*ceil(log(m)); # alt. run parameters 25 | k = ceil(log(m)); # target rank 26 | p = max(ceil(log(m)), 10); # oversampling parameter 27 | n = Int16(n); k = Int16(k); p = Int16(p); # convert floats to integers 28 | # ---------------------------------------------------------------------------- # 29 | times = zeros(n_sims, 2); # bookkeeping for run times 30 | fro_mean = zeros(n_sims, n_subs); # bookkeeping for mean Frobenius norm 31 | op_mean = zeros(n_sims, n_subs); # bookkeeping for mean operator norm 32 | norm_bound = zeros(n_sims, 2); # bookkeeping for theoretrical norm 33 | fro_lo = zeros(n_sims, 1); # bookkeeping for Frobenius norm bound 34 | op_lo = zeros(n_sims, 1); # bookkeeping for operator norm bound 35 | # ---------------------------------------------------------------------------- # 36 | for j in 1:n_sims 37 | # "data" matrix 38 | A = [2*ones(m,2)+rand(m,2) randn(m, k-2) 0.01*randn(m, n-k)]/sqrt(m); 39 | # NOTE: the matrix A has k columns that lead to "important" singular values. 40 | # The remaing n-k columns correspond to fast-decaying singular values. 41 | for i in 1:n_subs 42 | # stage A of randomized SVD 43 | G = randn(n, k+p); # Gaussian random matrix 44 | Y = A*G; # sample matrix 45 | F = qr(Y); Q = Matrix(F.Q); # orthonormalize Y 46 | # stage B of randomized SVD 47 | B = Q'*A; # form small matrix 48 | U_B, D_B, V_B = svd(B); # SVD of B 49 | U = Q*U_B; # rank k matrix 50 | # bookkeeping and comparisons 51 | U_A, D_A, V_A = svd(A); # SVD of A 52 | fro_mean[j,i] = norm(A - Q*Q'*A, 2); 53 | op_mean[j,i] = opnorm(A - Q*Q'*A); 54 | end # end of inner simulation loop (i.e., simulation for a fixed matrix A) 55 | fro_lo[j,1] = sum( D_A[k+1:min(m,n)].^2 )^0.5; 56 | op_lo[j,1] = D_A[k+1]; 57 | norm_bound[j, 1] = (1 + k / (p-1))^(0.5) * fro_lo[j,1]; # Frobenius 58 | norm_bound[j, 2] = (1 + sqrt(k / (p-1))) * op_lo[j,1] + # operator 59 | exp(1) * (sqrt(k+p) / p) * fro_lo[j,1]; 60 | end # end of outer simulation loop 61 | # ---------------------------------------------------------------------------- # 62 | # plot showing singular value decay 63 | p3 = plot(D_A, yscale = :log10, linecolor = :blue, 64 | marker = :circle, markercolor = :blue, label = "full", 65 | title = "sing. value decay (final sim.)"); 66 | p4 = plot(D_A[1:k+p], yscale = :log10, linecolor = :blue, 67 | marker = :circle, markercolor = :blue, label = "full", 68 | title = "sing. value comparison (final sim.)") 69 | plot!(D_B, yscale = :log10, linecolor = :red, linestyle = :dash, 70 | marker = :x, markerstrokecolor = :red, label = "randomized"); 71 | 72 | # ---------------------------------------------------------------------------- # 73 | # plots of average Frobenius and operator norms vs. theoretical bounds 74 | p1 = plot(norm_bound[:,1],linecolor = :blue, marker = :circle, 75 | markercolor = :blue, label = "upper", 76 | title = "E-Y bounds: Frobenius"); 77 | plot!(mean(fro_mean, dims = 2),linecolor = :red, linestyle = :dash, 78 | marker = :x, markerstrokecolor = :red, label = "mean"); 79 | plot!(fro_lo, linecolor = :blue, linestyle = :dot, 80 | marker = :star8, markercolor = :blue, label = "lower", 81 | legend = :bottomright); 82 | p2 = plot(norm_bound[:,2],linecolor = :blue, marker = :circle, 83 | markercolor = :blue, label = "upper", 84 | title = "E-Y bounds: operator"); 85 | plot!(mean(op_mean, dims = 2),linecolor = :red, linestyle = :dash, 86 | marker = :x, markerstrokecolor = :red, label = "mean"); 87 | plot!(op_lo,linecolor = :blue, linestyle = :dot, 88 | marker = :star8, markercolor = :blue, label = "lower", 89 | legend = :bottomright); 90 | # return summary plot as "output" of the function 91 | plot(p3, p4, p1, p2, layout=(2,2)) 92 | end # end of function 93 | # ---------------------------------------------------------------------------- # 94 | vignette_rsvd() 95 | -------------------------------------------------------------------------------- /Demos/vignette_rsvd.m: -------------------------------------------------------------------------------- 1 | function vignette_rsvd 2 | %{ 3 | Vignette - randomized singular value decomposition (RSVD). 4 | APPM 4720/5720 Randomized Algorithms, Spring 2019 5 | 6 | Demo to illustrate a simple RSVD based on the two-stage approach of Martinsson. 7 | In particular, see section four of "Randomized Methods for Matrix Computations" 8 | by P.G. Martinsson as appearing in "The Mathematics of Data" for details and 9 | additional analysis. An updated version of the survey paper can be found at: 10 | https://arxiv.org/pdf/1607.01649.pdf. (Again, see section four, pages 8-9.) 11 | 12 | Algorithm RSVD. 13 | given a matrix A \in \reals^{m \times n}, a target rank k and an oversampling 14 | parameter p (e.g., p = 10 is a good choice). 15 | stage A. Find an approximate range. 16 | 1. form an n \times (k+p) Gaussian random matrix G. 17 | 2. form the sample matrix Y = AG. 18 | 3. orthonormalize the columns of Y via a QR factorization. 19 | stage B. Form a specific factorization. 20 | 4. form the (k+p) \times n matrix B = Q'A. 21 | 5. form the SVD of the (small) matrix B as B = \hat{U} D V'. 22 | 6. form U = Q \hat{U}. 23 | return matrices U, D, and V as an approximate rank (k+p) SVD of A. 24 | %} 25 | % ---------------------------------------------------------------------------- % 26 | rng(2); % set seed for reproducibility 27 | n_sims = 5; % number of simulations 28 | n_subs = 25; % number of subsamples 29 | m = 2000; % rows of matrix 30 | % n_sims = 50; n_subs = 25; m = 1000; % alt. run parameters 31 | % n_sims = 1; n_subs = 25; m = 5000; % alt. run parameters 32 | n = 15*ceil(log(m)); % columns of matrix 33 | k = ceil(log(m)); % target rank 34 | p = max([ceil(log(m)) 10]); % oversampling parameter 35 | % ---------------------------------------------------------------------------- % 36 | times = zeros(n_sims, 2); % bookkeeping for run times 37 | fro_mean = zeros(n_sims, n_subs); % bookkeeping for mean Frobenius norm 38 | op_mean = zeros(n_sims, n_subs); % bookkeeping for mean operator norm 39 | norm_bound = zeros(n_sims, 2); % bookkeeping for theoretrical norm 40 | fro_lo = zeros(n_sims, 1); % bookkeeping for Frobenius norm bound 41 | op_lo = zeros(n_sims, 1); % bookkeeping for operator norm bound 42 | % ---------------------------------------------------------------------------- % 43 | for j = 1:n_sims 44 | A = [2+rand(m, 2) randn(m, k-2) 0.01*randn(m, n-k)]/sqrt(m); % "data" matrix 45 | % NOTE: the matrix A has k columns that lead to "important" singular values. 46 | % The remaing n-k columns correspond to fast-decaying singular values. 47 | for i = 1:n_subs 48 | % stage A of randomized SVD 49 | G = randn(n, k+p); % Gaussian random matrix 50 | Y = A*G; % sample matrix 51 | [Q ~] = qr(Y,0); % orthonormalize Y 52 | % stage B of randomized SVD 53 | B = Q'*A; % form small matrix 54 | tic; [U_B, D_B, V_B] = svd(B, 'econ'); times(j, 1) = toc; % SVD of B 55 | U = Q*U_B; % rank k matrix 56 | % bookkeeping and comparisons 57 | tic; [U_A, D_A, V_A] = svd(A, 'econ'); times(j, 2) = toc; % SVD of A 58 | fro_mean(j,i) = norm(A - Q*Q'*A, 'fro'); 59 | op_mean(j,i) = norm(A - Q*Q'*A); 60 | end % end of inner simulation loop (i.e., simulation for a fixed matrix A) 61 | fro_lo(j, 1) = sum(diag(D_A(k+1:min([m,n]),k+1:min([m,n]))).^2)^0.5; 62 | op_lo(j, 1) = D_A(k+1,k+1); 63 | norm_bound(j, 1) = (1 + k / (p-1))^(0.5) * fro_lo(j, 1); % Frobenius 64 | norm_bound(j, 2) = (1 + sqrt(k / (p-1))) * op_lo(j, 1) + ... % operator 65 | exp(1) * (sqrt(k+p) / p) * fro_lo(j, 1); 66 | end % end of outer simulation loop 67 | % ---------------------------------------------------------------------------- % 68 | figure; % plot showing singular value decay 69 | semilogy(diag(D_A), '-bo'), hold on, semilogy(diag(D_B), '--xr'), hold off; 70 | title('singular value comparison/decay (final simulation)'); 71 | legend('full','randomized') 72 | % ---------------------------------------------------------------------------- % 73 | figure; subplot(2,1,1); % plots comparing computation times for SVDs 74 | semilogy(times(:,1), '--xr'), hold on, semilogy(times(:,2), '-bo'), hold off; 75 | legend('randomized', 'full', 'Location', 'east') 76 | title('SVD timing comparison') 77 | subplot(2,1,2); 78 | plot(times(:,2) ./ times(:,1), '-ks'); 79 | title('ratio of SVD times'); 80 | legend('full-to-randomized', 'Location', 'southeast'); 81 | % ---------------------------------------------------------------------------- % 82 | figure; % plots of average Frobenius and operator norms vs. theoretical bounds 83 | subplot(1,2,1); 84 | plot(norm_bound(:,1),'-ob'), hold on, plot(mean(fro_mean, 2),'--xr'); 85 | plot(fro_lo, ':*b'); ylim([0 max(max(norm_bound))*1.1]); hold off; 86 | title('Eckhart-Young bounds: Frobenius') 87 | legend('upper','mean','lower','Location','southoutside') 88 | subplot(1,2,2); 89 | plot(norm_bound(:,2),'-ob'), hold on, plot(mean(op_mean, 2),'--xr'); 90 | plot(op_lo, ':*b'); ylim([0 max(max(norm_bound))*1.1]); hold off; 91 | title('E-Y bounds: operator') 92 | legend('upper','mean','lower','Location','southoutside') 93 | end % end of function 94 | % ---------------------------------------------------------------------------- % -------------------------------------------------------------------------------- /Handouts/Linear_algebra_notes_matrices.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Handouts/Linear_algebra_notes_matrices.pdf -------------------------------------------------------------------------------- /Handouts/README.md: -------------------------------------------------------------------------------- 1 | # Handouts 2 | 3 | For APPM 5650 Randomized Algorithms, Fall 2021. 4 | 5 | 6 | - [Linear algebra facts, focusing on matrix decompositions](Linear_algebra_notes_matrices.pdf). The source code is on [overleaf](https://www.overleaf.com/read/yprqvktvsxgb). If you have something to add or fix, email Stephen. 7 | -------------------------------------------------------------------------------- /Handouts/SamplingLecture_Sept29_2021.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Handouts/SamplingLecture_Sept29_2021.pdf -------------------------------------------------------------------------------- /Homeworks/APPM5650Fall21_RandomizedAlgos_HW01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW01.pdf -------------------------------------------------------------------------------- /Homeworks/APPM5650Fall21_RandomizedAlgos_HW02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW02.pdf -------------------------------------------------------------------------------- /Homeworks/APPM5650Fall21_RandomizedAlgos_HW03.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW03.pdf -------------------------------------------------------------------------------- /Homeworks/APPM5650Fall21_RandomizedAlgos_HW04.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW04.pdf -------------------------------------------------------------------------------- /Homeworks/APPM5650Fall21_RandomizedAlgos_HW05.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW05.pdf -------------------------------------------------------------------------------- /Homeworks/APPM5650Fall21_RandomizedAlgos_HW06.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW06.pdf -------------------------------------------------------------------------------- /Homeworks/APPM5650Fall21_RandomizedAlgos_HW07.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW07.pdf -------------------------------------------------------------------------------- /Homeworks/APPM5650Fall21_RandomizedAlgos_HW08.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW08.pdf -------------------------------------------------------------------------------- /Homeworks/APPM5650Fall21_RandomizedAlgos_HW09.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW09.pdf -------------------------------------------------------------------------------- /Homeworks/APPM5650Fall21_RandomizedAlgos_HW10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/APPM5650Fall21_RandomizedAlgos_HW10.pdf -------------------------------------------------------------------------------- /Homeworks/ProjectInformation.md: -------------------------------------------------------------------------------- 1 | # APPM 5650 Randomized Algorithms Final Project 2 | 3 | - The project is due at the beginning of class on Monday Dec 6, 2021. The last two days of class (Mon Dec 6 and Wed Dec 8) will be used for presentations 4 | - The project consists of a 10 minute presentation and a short paper. There is no strict length requirement for the paper, but aim for 4 to 6 pages (including figures). Paper lengths are a guideline, as I’m aware you can add figures, code, adjust white space and font to make it longer/shorter. 5 | - If you really wanted to, you could write a longer, much more detailed paper and skip the presentation 6 | - On Monday Dec 6, we will determine the presentation order. You must be ready to present that day in case you are chosen. 7 | - You are encouraged to form groups; group sizes can be 1 (single person), 2 or 3. No larger please. 8 | - By Friday Dec 3, email me to let me know your group, so I know how many groups there are; this will help me schedule the presentations. 9 | - The project is 25% of your overall class grade. It cannot be dropped. 10 | - Deliverables: 11 | - Written paper (typeset; Latex is suggested but not required), turned in via Canvas 12 | - 10 minute talk 13 | - Slides for the talk (turned in via Canvas) 14 | - More details: 15 | - Class participation is extra important these days, as a sign of respect to your fellow students. You lose 10% of your project grade if you do not attend any of these last 2 days (unless you have a valid reason, like international travel, and contacted me about it beforehand). 16 | - You also lose 10% of your project grade if you are not ready to present on the day when you're scheduled to speak. 17 | ## What is a valid project? 18 | - The project can be theoretical or computational (or both) 19 | - One option is a "traditional" class-project, where you investigate an idea, and/or run simulations or do derivations or proofs, and/or connect several different ideas (e.g., create new ideas, though the originality/impact obviously does not have to rise to the level of a journal publication); 20 | - You're encouraged to pursue an area related to your research interests 21 | - Specific journal articles are a good starting point 22 | - You can reproduce their results 23 | - You can compare several methods 24 | - You can apply a method to a new problem or area 25 | - You can tweak a method 26 | - You can redo theoretical derivations more clearly (or with more details) 27 | - You can do a "book-report style"”" analysis of a paper, critical evaluating it (think of it as a peer-review for an article) 28 | - Your results do not have to be novel; you do not need to write a journal quality paper! 29 | - You'll want to related your project to something you learned in the class (see Rubric item #2) 30 | - When in doubt about whether a project idea is valid, you can always email the instructor and ask! 31 | - You can see the project titles from previous years at the [Student projects](https://github.com/stephenbeckr/randomized-algorithm-class#student-projects) part of the main README file in the parent github directory. 32 | ## Rubric 33 | Because the type of report is a bit open-ended, the rubric below is necessarily a bit vague: 34 | 1. Valid/interesting project (25%), and point of project is clear. For example, for an independent investigation, the problem you are trying to solve is explained and motivated and non-trivial. For a book-report style project, presenting on a paper, you need some kind of thesis (e.g., “This paper shows the power of this approach...”) and not just a summary, and explain why you chose that paper. For reproducing the results of a paper (computationally or analytically by going through a proof in extra detail), explain why you chose the paper, and why you are interested in their results (are they amazing results? do you distrust them? do they nicely illustrate concepts from class)? 35 | 36 | 2. Relate the project to a concept from class (25%). **Your project must include a paragraph describing how it involves concepts learned in class.** 37 | 38 | 3. Insightful discussion (25%). You should discuss/analyze your results, and/or validate a conclusion. For a paper review, you should discuss the strengths and weaknesses of the paper. For a project that involves generating your own results, the quality of the actual work is included in this category. 39 | 40 | 4. Professional communication (25%) of the written document and the oral presentation (and the slides). Well-organized and precise communication, grammatically correct writing, nicely format- ted documents and figures. Figures should be labeled appropriately. 41 | 42 | Here is a [more detailed rubric](ProjectRubric.pdf) that I will actually use for grading the projects. 43 | -------------------------------------------------------------------------------- /Homeworks/ProjectRubric.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/Homeworks/ProjectRubric.pdf -------------------------------------------------------------------------------- /Homeworks/README.md: -------------------------------------------------------------------------------- 1 | # Homeworks 2 | 3 | For APPM 5650 Randomized Algorithms, Fall 2021. 4 | 5 | HW solutions are on Canvas 6 | 7 | - [HW 1](APPM5650Fall21_RandomizedAlgos_HW01.pdf), due Friday Aug 27, 2021 (topics: read some of Mahoney, Martinsson and Tropp, and Vershynin) 8 | - [HW 2](APPM5650Fall21_RandomizedAlgos_HW02.pdf), due Monday Sep 6, 2021 (topics: linear algebra, sparse matrices, Freivald's algorithm, random orthogonal matrices). Turn this in via Gradescope since there's no class Monday (Labor Day) 9 | - [HW 3](APPM5650Fall21_RandomizedAlgos_HW03.pdf), due Monday Sep 13, 2021 (topics: linear algebra, basis probability, account on [CU's research computing](https://www.colorado.edu/rc). 10 | - [HW 4](APPM5650Fall21_RandomizedAlgos_HW04.pdf), due Monday Sep 20, 2021 (topics: large data files, random projections vs tSNE) 11 | - [HW 5](APPM5650Fall21_RandomizedAlgos_HW05.pdf), due Monday Sep 27, 2021 (topics: random projections) 12 | - [HW 6](APPM5650Fall21_RandomizedAlgos_HW06.pdf), due Monday Oct 4, 2021 (topics: different types of sampling without replacement) 13 | - [HW 7](APPM5650Fall21_RandomizedAlgos_HW07.pdf), due Monday Oct 11, 2021 (topics: least squares) 14 | - [HW 8](APPM5650Fall21_RandomizedAlgos_HW08.pdf), due Monday Oct 18, 2021 (topics: entry-wise sampling) 15 | - [HW 9](APPM5650Fall21_RandomizedAlgos_HW09.pdf), due Monday Oct 25, 2021 (topics: randomized K-means clustering) 16 | - [HW 10](APPM5650Fall21_RandomizedAlgos_HW10.pdf), due Monday Nov 1, 2021 (topics: randomized SVD). Last Homework 17 | - [Project information](ProjectInformation.md). The project is due Monday Dec 6 2021, and we'll have presentations the last two days of class (Dec 6 and 8). 18 | 19 | # Turning in HW 20 | While the class meets in person, please turn in **hard-copies** of your homework in class. 21 | 22 | If we end up meeting remotely due to COVID, then we will switch to gradescope. As of now, we are *not using Gradescope*. 23 | 24 | 25 | ## FAQ for electronic submissions 26 | 27 | ### General 28 | 29 | **Gradescope** has a [submission guide](https://gradescope-static-assets.s3.amazonaws.com/help/submitting_hw_guide.pdf) that recommends software for your phone to take pictures of written homework and convert it to a PDF (your final submission to Gradescope must be a PDF). 30 | 31 | Note: the links in the PDFs will not work if you view the PDF on github, but if you open the PDF in its own tab, or download it, all the links should work. 32 | 33 | **Collaboration**: Collaboration with your fellow students is OK and in fact recommended, although direct copying is not allowed. Please write down the names of the students that you worked with. 34 | 35 | **Internet**: The internet is allowed for basic tasks (e.g., looking up definitions on wikipedia) but it is 36 | not permissible to search for proofs or to post requests for help on forums such as [math.stackexchange.com](http://math.stackexchange.com/) 37 | or to look at solution manuals 38 | 39 | #### Merging multiple PDF files 40 | 41 | **Mac** You can use the Preview software that comes with Mac, and drag-and-drop in the Thumbnail view, or follow these [instructions](https://support.apple.com/en-us/HT202945). 42 | 43 | **Linux** install `pdftk` (e.g., `apt-get install pdftk`), and the on the command line, it's just `pdftk inputFile1.pdf inputFile2.pdf cat output outputFileName.pdf`. This works on Mac and Windows too (on Mac, the exact command line works; on Windows, I'm not sure). 44 | 45 | **Windows** there are [lists of free web- and desktop-based software](https://superuser.com/a/34294), but [PDFtk](https://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/) is one of the most classic and respected (no viruses). I haven't used PDFtk on Windows, but the website claims they have a GUI; or if you don't like their GUI, try a [3rd party GUI that uses PDFtk](https://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/). 46 | 47 | ### Python 48 | For overall Python, and numpy in particular, Matlab users might like [NumPy for Matlab users](https://numpy.org/doc/stable/user/numpy-for-matlab-users.html). 49 | 50 | For **plotting** in Python using Matplotlib, try these [plotting cheatsheets](https://github.com/matplotlib/cheatsheets) and [controlling figure aesthetics](https://seaborn.pydata.org/tutorial/aesthetics.html) with seaborn. 51 | 52 | 53 | #### Jupyter 54 | 55 | Tips for exporting jupyter notebook code to a PDF: 56 | 57 | - You can try this [Notebook to PDF conversion website](https://htmtopdf.herokuapp.com/ipynbviewer/) that some of our students have had good luck with 58 | 59 | - Or try `nbconvert` which requires [`pandoc`](https://pandoc.org/installing.html). You can do this on [Colab](https://colab.research.google.com/), following the [instructions here](https://stackoverflow.com/a/54191922) (but note that you may need to add a backslash before any white space when you run commands, e.g., change a command like 60 | 61 | `!cp drive/My Drive/Colab Notebooks/Untitled.ipynb ./` 62 | to 63 | ``!cp drive/My\ Drive/Colab\ Notebooks/Untitled.ipynb ./`` 64 | ) 65 | 66 | Note that if you include latex in the jupyter notebook, when you run `nbconvert`, you cannot have any whitespace near the `\$` symbols for math due to a requirement of `pandoc` (see [here](https://pandoc.org/MANUAL.html#extension-tex_math_dollars)). So, ``$ f(x) = 3x^2 $`` will not work, but `$f(x) = 3x^2$` will be OK. 67 | 68 | The downside of `nbconvert` is that images are saved as png, not pdf, so fonts don't come through, but that's not a big deal for homework. 69 | 70 | If you run jupyter locally, you might be able to run `nbconvert` without using the command line; go to "Download" the "PDF via LaTeX". 71 | 72 | #### Python source code (not Jupyter) 73 | The *non-preferred* ways are (1) screenshot of your editor (not so nice since it's an image not text, but at least you get syntax color highlighting), and (2) export from a text editor to PDF (not so nice if you don't get syntax color highlighting). 74 | 75 | It's not nice to the graders to submit code without syntax color highlighting! 76 | 77 | Better ways: it depends on your system and editor, but there are many ways. For example, this [stackoverflow 'printing python code to PDF'](https://stackoverflow.com/q/20412038) offers several suggestions. For example, since I already use `vim` and its setup with syntax highlighting, I can do [this answer](https://stackoverflow.com/a/20412421) and do `vim abc.py -c ":hardcopy > abc.ps" -c ":q"` followed by `ps2pdf abc.ps abc.pdf` -- no extra software needed! 78 | 79 | 80 | ### Matlab 81 | 82 | You can use the export notebook features in Matlab (it can handle latex) if you want; see the [Live-Editor](https://www.mathworks.com/products/matlab/live-editor.html); there are also claims on the internet that it's easy to get Jupyter to run with a Matlab kernel, so you could use Jupyter. 83 | 84 | To just export a figure, there are builtin methods, but one of the nicer ways is to use [export_fig](https://www.mathworks.com/matlabcentral/fileexchange/23629-export_fig), which works like `export_fig MyFileName -pdf -transparent` and makes a file `MyFileName.pdf` (note that PDF files for figures are preferred, since then the text is saved as a font and not bitmapped) 85 | -------------------------------------------------------------------------------- /Homeworks/custom_headers.tex: -------------------------------------------------------------------------------- 1 | % Some commonly used latex settings. Stephen Becker, July 2013 2 | \pagestyle{plain} 3 | %-------------- 4 | \newtheorem{theorem}{Theorem}[section] 5 | \newtheorem{lemma}[theorem]{Lemma} 6 | \newtheorem{corollary}[theorem]{Corollary} 7 | \newtheorem{proposition}[theorem]{Proposition} 8 | \newtheorem{definition}[theorem]{Definition} 9 | \newtheorem{conjecture}[theorem]{Conjecture} 10 | \newtheorem{problem}[theorem]{Problem} 11 | \newtheorem{fact}[theorem]{Fact} % added Jan 2014 12 | \newtheorem{assumption}[theorem]{Assumption} % added Jan 2014 13 | \newtheorem{remark}[theorem]{Remark} 14 | \newtheorem{remarks}[subsection]{Remarks} 15 | \newtheorem{example}[subsection]{Example} 16 | %\newtheorem{example}[theorem]{Example} 17 | %\floatname{algorithm}{Listing} 18 | %\numberwithin{equation}{section} % For now, commenting this out since I do NOT want eq numbers like (0.1) 19 | % Theorems 20 | %\newtheorem{theorem}{Theorem} 21 | %\newtheorem{lemma}{Lemma} 22 | %\newtheorem{remark}{Remark} 23 | %\newtheorem{corollary}{Corollary}%[section] 24 | %\newtheorem{proposition}{Proposition}%[section] 25 | %\newtheorem{definition}{Definition}%[section] % number this the same as theorem and lemma 26 | 27 | 28 | %% commenting 29 | % Affect margins: 30 | %\setlength{\marginparwidth}{1.2in} 31 | \setlength{\marginparwidth}{.8in} 32 | \let\oldmarginpar\marginpar 33 | \renewcommand\marginpar[1]{\-\oldmarginpar[\raggedleft\footnotesize #1]% 34 | {\raggedright\footnotesize #1}} 35 | 36 | % macros for the outline 37 | \newcommand{\todo}{{\bf \textcolor{red}{TODO} }} 38 | \newcommand{\TODO}[1]{{\bf TODO: #1}} 39 | \newcommand{\red}{\textcolor{red}} 40 | \newcommand{\note}[1]{{\bf [{\em Note:} #1]}} 41 | 42 | % Editing commands 43 | \newcommand{\fix}[1]{\textcolor{red}{#1}} 44 | \usepackage[normalem]{ulem} % for sout, not needed for final version 45 | \newcommand{\add}[1]{\textcolor{blue}{#1}} 46 | \newcommand{\new}[1]{\textcolor{blue}{#1}} % synonym 47 | \newcommand{\del}[1]{{\color{Bittersweet}\sout{#1}}} 48 | \newcommand{\remove}[1]{{\color{Bittersweet}\sout{#1}}} % synonym 49 | 50 | % Better, use e.g., (with comma) 51 | %\newcommand\eg{e.g.\xspace} 52 | %\newcommand\ie{i.e.\xspace} 53 | 54 | 55 | 56 | 57 | 58 | \newcommand{\Id}{\text{\em I}} 59 | \newcommand{\OpId}{\mathcal{I}} 60 | 61 | % -- Operators -- 62 | %First of all, one must of course recall that \operatorname and \DeclareMathOperator are provided by the amsopn package, which is automatically loaded by amsmath, but is also available standalone 63 | \DeclareMathOperator{\dom}{dom} 64 | \DeclareMathOperator{\vect}{vec} % vec(X) = X(:) in matlab notation 65 | \DeclareMathOperator{\VEC}{vec} % vec(X) = X(:) in matlab notation 66 | \DeclareMathOperator{\mat}{mat} % mat(x) = reshape(x,N,N) 67 | \DeclareMathOperator{\prox}{prox} 68 | \DeclareMathOperator{\tr}{trace} 69 | \DeclareMathOperator{\logdet}{log det} 70 | %\newcommand{\sgn}{\textrm{sgn}} 71 | %\newcommand{\sign}{\textrm{sgn}} or instead \operatorname 72 | \DeclareMathOperator{\shr}{shrink} 73 | \DeclareMathOperator{\shrink}{shrink} 74 | \DeclareMathOperator{\trunc}{trunc} 75 | \DeclareMathOperator{\range}{range} 76 | \DeclareMathOperator{\rank}{rank} 77 | \DeclareMathOperator{\diag}{diag} 78 | \DeclareMathOperator{\trace}{trace} 79 | \DeclareMathOperator{\supp}{supp} 80 | \DeclareMathOperator*{\argmax}{argmax} % puts subscripts in the right place 81 | \DeclareMathOperator*{\argmin}{argmin} 82 | \DeclareMathOperator*{\minimize}{minimize} 83 | \DeclareMathOperator*{\maximize}{maximize} 84 | % -- Misc -- 85 | \newcommand\thalf{{\textstyle\frac{1}{2}}} 86 | \newcommand{\eps}{\varepsilon} 87 | \newcommand{\e}{\mathrm{e}} 88 | \renewcommand{\i}{\imath} 89 | %\newcommand{\bmat}[1]{\begin{bmatrix} #1 \end{bmatrix}} 90 | \newcommand{\smax}{\sigma_{\max}} 91 | \newcommand{\smin}{\sigma_{\min}} 92 | %\newcommand{\T}{*} % (see also \transp, \adj below) 93 | \newcommand{\T}{T} % for the adjoint/transpose 94 | \newcommand{\transp}{T} 95 | \newcommand{\adj}{*} 96 | \newcommand{\psinv}{\dagger} 97 | % -- Mathbb -- 98 | \newcommand{\R}{\mathbb{R}} 99 | \newcommand{\RR}{\mathbb{R}} 100 | \newcommand{\Rn}{\R^{n}} 101 | \newcommand{\Rmn}{\R^{m \times n}} 102 | \newcommand{\Rnn}{\R^{n \times n}} 103 | \newcommand{\Rmm}{\R^{m \times m}} 104 | \newcommand{\C}{\mathbb{C}} 105 | \newcommand{\Z}{\mathbb{Z}} 106 | \newcommand{\HH}{\mathcal{H}} % for Hilbert space (\H already defined). 107 | \newcommand{\EE}{\operatorname{\mathbb{E}}} % for probability and expectations 108 | \newcommand{\E}{\operatorname{\mathbb{E}}} % is operatorname necessary? 109 | \renewcommand{\P}{\operatorname{\mathbb{P}}} % for probability 110 | % -- Mathcal -- 111 | \newcommand{\id}{\mathcal{I}} % identity operator 112 | \newcommand{\AAA}{\ensuremath{\mathcal{A}}} % generic linear operator 113 | \newcommand{\cA}{\ensuremath{\mathcal{A}}} % generic linear operator 114 | \newcommand{\K}{\ensuremath{\mathcal{K}}} % cone 115 | \newcommand{\cK}{\ensuremath{\mathcal{K}}} % cone 116 | \newcommand{\proj}{\ensuremath{\mathcal{P}}} % Projection 117 | \newcommand{\PP}{\operatorname{\mathcal{P}}} % for projections 118 | \newcommand{\lag}{\ensuremath{\mathcal{L}}} % Lagrangian 119 | \renewcommand{\L}{{\mathcal L}} 120 | \newcommand{\N}{{\mathcal{N}}} % for normal N(0,1) variables... 121 | \newcommand{\order}{\mathcal{O}} % big O notation 122 | % -- Text shortcuts -- 123 | \newcommand{\st}{\ensuremath{\;\text{such that}\;}} 124 | %\newcommand{\st}{\text{subject to}} 125 | \newcommand{\gs}{g_\text{sm}} % smooth part of dual objective 126 | 127 | 128 | % -- To get the ones vector to look nice (without using the bbold package) 129 | \newcommand{\bbfamily}{\fontencoding{U}\fontfamily{bbold}\selectfont} 130 | \newcommand{\textbb}[1]{{\bbfamily#1}} 131 | \DeclareMathAlphabet{\mathbbb}{U}{bbold}{m}{n} 132 | \newcommand{\ones}{\mathbbb 1} % ones vector 133 | 134 | % -- For := type stuff -- 135 | %\newcommand{\defeq}{\mathrel{\mathop:}=} % for definitions, e.g. z := y + 3 136 | %\newcommand{\defeq}{\triangleq} % another alternative 137 | %\newcommand{\defeq}{\equiv} % another alternative 138 | \newcommand{\defeq}{\stackrel{\text{\tiny def}}{=}} % another alternative 139 | %\newcommand{\defeq}{\stackrel{\text{\tiny def}}{\hbox{\equalsfill}}} % another alternative, doesn't work 140 | 141 | 142 | % -- Inner products and norms -- 143 | \newcommand{\<}{\langle} 144 | \renewcommand{\>}{\rangle} 145 | \newcommand{\restrict}[1]{\big\vert_{#1}} 146 | % If using < x | y > or { x | x < 0 } 147 | %http://tex.stackexchange.com/questions/498/mid-vertical-bar-vert-lvert-rvert-divides 148 | %use \mid not | (bar, bracket) for inner products and such. 149 | %\newcommand{\iprod}[2]{\left\langle #1 , #2 \right\rangle} 150 | \newcommand{\iprod}[2]{\left\langle #1,\,#2 \right\rangle} 151 | \newcommand{\iprodMed}[2]{\Bigl\langle #1 , #2 \Bigr\rangle} 152 | \newcommand{\scal}[2]{\left\langle{#1},\,{#2}\right\rangle} 153 | \newcommand{\norm}[1]{{\left\lVert{#1}\right\rVert}} 154 | \newcommand{\dist}[2]{\left\| #1 - #2 \right\|_2} 155 | \newcommand{\vectornormbig}[1]{\big\|#1\big\|} 156 | \newcommand{\vectornormmed}[1]{\big\|#1\big\|} 157 | 158 | 159 | % Linear algebra macros 160 | %\newcommand{\vct}[1]{\bm{#1}} 161 | %\newcommand{\mtx}[1]{\bm{#1}} 162 | \newcommand{\vct}[1]{{#1}} 163 | \newcommand{\mtx}[1]{{#1}} 164 | %\newcommand{\mtx}[1]{\mathsfsl{#1}} 165 | \renewcommand{\vec}[1]{{\boldsymbol{#1}}} 166 | 167 | 168 | 169 | % -- use amsthm instead -- 170 | %\def \endprf{\hfill {\vrule height6pt width6pt depth0pt}\medskip} 171 | %\newenvironment{proof}{\noindent {\bf Proof} }{\endprf\par} 172 | %\newcommand{\qed}{{\unskip\nobreak\hfil\penalty50\hskip2em\vadjust{} 173 | %\nobreak\hfil$\Box$\parfillskip=0pt\finalhyphendemerits=0\par}} 174 | 175 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Stephen Becker 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Randomized Algorithm Class, APPM/STAT 5650, Fall 2021 2 | Randomized algorithm class at CU Boulder, Fall 2021, [Professor Becker](http://amath.colorado.edu/faculty/becker/) 3 | 4 | NOTE: Fall 2021, this is APPM/STAT 5650. These course materials were created Spring 2019 when the same class was taught (also by Stephen Becker) under the special topics designation APPM 4720/5720; an earlier version of the special topics course was taught by Prof. Gunnar Martinsson. 5 | 6 | The course meets MWF 10:20 AM - 11:10 AM ECCR 257 "Newton Lab". The current plan is for the course to meet in person, assuming COVID-19 levels remain reasonable in Boulder. 7 | 8 | The actual topics we covered, and links to references, are on this [google sheet](https://docs.google.com/spreadsheets/d/1z2yT99o8nCiotU0OZbrmmk0kAjff5iUDhKo3fpRVORA/edit?usp=sharing). See below for a high-level list of what we covered. There was no single textbook for the class (and no standard set of notes). 9 | 10 | This git repo contains things like code demos used in class. Most of the code is in Matlab; if any students want to translate demos to other languages and then push them, just make a pull request 11 | - [Demos](Demos/) 12 | - [Homeworks](Homeworks/) (homework solutions and code are on the private Canvas website) 13 | - [Syllabus](syllabus.md), which replaces the [OLD Class policies, etc., for Spring 2019](APPM4720_5720_Spr2019_Syllabus.pdf) 14 | - The syllabus has details on class policies, grading, textbooks and resources, topics covered, etc. 15 | 16 | Other material (grades, HW solutions) are in our LMS [Canvas](https://canvas.colorado.edu/courses/76997) 17 | 18 | # Student projects 19 | - Spring 2019. Here is a [PDF showing a brief summary of the end-of-semester student projects from Spring '19](SlideshowAllPresentations_4720Spr19_Randomized.pdf). If you're interested in the Shazam-like song matching algorithm (using locality sensitive hashing), their code is at [Padraig's github site](https://github.com/Lysandr/minHash_Shazam) 20 | 21 | [![image for spring 2019](SlideshowAllPresentations_4720Spr19_Randomized.jpeg)](SlideshowAllPresentations_4720Spr19_Randomized.pdf) 22 | 23 | - Fall 2021: 24 | 25 | [![image for fall 2021](SlideshowAllPresentations_5650_Fall21.jpg)](SlideshowAllPresentations_5650_Fall21.pdf) 26 | -------------------------------------------------------------------------------- /SlideshowAllPresentations_4720Spr19_Randomized.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/SlideshowAllPresentations_4720Spr19_Randomized.jpeg -------------------------------------------------------------------------------- /SlideshowAllPresentations_4720Spr19_Randomized.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/SlideshowAllPresentations_4720Spr19_Randomized.pdf -------------------------------------------------------------------------------- /SlideshowAllPresentations_5650_Fall21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/SlideshowAllPresentations_5650_Fall21.jpg -------------------------------------------------------------------------------- /SlideshowAllPresentations_5650_Fall21.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenbeckr/randomized-algorithm-class/ae755d31df9f7523ac513d0ce8a88aed8db6ae33/SlideshowAllPresentations_5650_Fall21.pdf --------------------------------------------------------------------------------