├── MinMaxSelection ├── Contents.txt ├── Internal_mxArray.h ├── Internal_mxArray_2010B.h ├── buildInternal_mxArrayDef.m ├── getmexopts.m ├── inplacecolumnmex.c ├── inplacecolumnmex.mexa64 ├── inplacecolumnmex.mexw64 ├── maxk.m ├── maxkmex.c ├── maxkmex.m ├── maxkmex.mexa64 ├── maxkmex.mexw64 ├── mink.m ├── minkmex.c ├── minkmex.m ├── minkmex.mexa64 ├── minkmex.mexw64 ├── minmax_install.m ├── minmaxk.m ├── releaseinplace.c ├── releaseinplace.mexa64 ├── releaseinplace.mexw64 └── testminmax.m ├── README.md ├── clusteredMP_simrank.m ├── metrics ├── eval_pres.m └── eval_recall.m ├── mp_simrank.m ├── pipelined_simrank.m └── utils ├── kmedioids.m └── norm_by_col.m /MinMaxSelection/Contents.txt: -------------------------------------------------------------------------------- 1 | Main Functions: 2 | mink.m -> Matlab file to look for k-smallest elements 3 | maxk.m -> Matlab file to look for k-largest elements 4 | 5 | Other files: 6 | minmaxk.m -> Common Matlab wrapper for mink.m and maxk.m 7 | minkmex.c -> Mex engine for mink.m 8 | minkmex.m -> Help file for Mex minkmex 9 | maxkmex.c -> Mex engine for maxk.m 10 | maxkmex.m -> Help file for Mex maxkmex 11 | buildInternal_mxArrayDef.m -> building the typedef for mxArray 12 | inplacecolumnmex.c -> Create inplace column of a full matrix 13 | releaseinplace.c -> release the data of the inplace array 14 | minmax_install.m -> Installation function (mex build) 15 | getmexopts.m -> Tool to get the current MEX setup 16 | testminmax.m -> Test program of Min/Max Selection Package 17 | Contents.txt -> This file 18 | Internal_mxArray_2010B.h -> Prototype file for R2010b 19 | 20 | Author Bruno Luong 21 | Contributor: Matt Fig, James Tursa 22 | Last update: 27-August-2011 23 | -------------------------------------------------------------------------------- /MinMaxSelection/Internal_mxArray.h: -------------------------------------------------------------------------------- 1 | /* 2 | Internal_mxArray.h 3 | Matlab version: 2010B 4 | */ 5 | 6 | typedef struct { 7 | void *reserved; 8 | int reserved1[2]; 9 | void *reserved2; 10 | size_t number_of_dims; 11 | unsigned int reserved3; 12 | struct { 13 | unsigned int flag0 : 1; 14 | unsigned int flag1 : 1; 15 | unsigned int flag2 : 1; 16 | unsigned int flag3 : 1; 17 | unsigned int flag4 : 1; 18 | unsigned int flag5 : 1; 19 | unsigned int flag6 : 1; 20 | unsigned int flag7 : 1; 21 | unsigned int flag7a: 1; 22 | unsigned int flag8 : 1; 23 | unsigned int flag9 : 1; 24 | unsigned int flag10 : 1; 25 | unsigned int flag11 : 4; 26 | unsigned int flag12 : 8; 27 | unsigned int flag13 : 8; 28 | } flags; 29 | size_t reserved4[2]; 30 | union { 31 | struct { 32 | void *pdata; 33 | void *pimag_data; 34 | void *reserved5; 35 | size_t reserved6[3]; 36 | } number_array; 37 | } data; 38 | } Internal_mxArray; -------------------------------------------------------------------------------- /MinMaxSelection/Internal_mxArray_2010B.h: -------------------------------------------------------------------------------- 1 | /* 2 | Internal_mxArray.h 3 | Matlab version: 2010B 4 | */ 5 | 6 | typedef struct { 7 | void *reserved; 8 | int reserved1[2]; 9 | void *reserved2; 10 | size_t number_of_dims; 11 | unsigned int reserved3; 12 | struct { 13 | unsigned int flag0 : 1; 14 | unsigned int flag1 : 1; 15 | unsigned int flag2 : 1; 16 | unsigned int flag3 : 1; 17 | unsigned int flag4 : 1; 18 | unsigned int flag5 : 1; 19 | unsigned int flag6 : 1; 20 | unsigned int flag7 : 1; 21 | unsigned int flag7a: 1; 22 | unsigned int flag8 : 1; 23 | unsigned int flag9 : 1; 24 | unsigned int flag10 : 1; 25 | unsigned int flag11 : 4; 26 | unsigned int flag12 : 8; 27 | unsigned int flag13 : 8; 28 | } flags; 29 | size_t reserved4[2]; 30 | union { 31 | struct { 32 | void *pdata; 33 | void *pimag_data; 34 | void *reserved5; 35 | size_t reserved6[3]; 36 | } number_array; 37 | } data; 38 | } Internal_mxArray; -------------------------------------------------------------------------------- /MinMaxSelection/buildInternal_mxArrayDef.m: -------------------------------------------------------------------------------- 1 | function content = buildInternal_mxArrayDef(mxArraydefFilename) 2 | % function content = buildInternal_mxArrayDef(mxArraydefFilename) 3 | % 4 | % Building the typedef of internal structure MxArray by looking inside 5 | % the header file include file MATRIX.H. This ensure the definition used 6 | % is compatible with the Matlab version 7 | % The internal definition will be used by MEX file inplacecolumnmex and 8 | % releaseinplace 9 | % 10 | % EXAMPLE USAGE: 11 | % buildInternal_mxArrayDef('Internal_mxArray.h') 12 | % 13 | % Author: Bruno Luong 14 | % 15 | % History 16 | % Original: 28-Jun-2009 17 | 18 | % Location of the header file matrix.h 19 | MLincludepath = [matlabroot() filesep 'extern' filesep 'include']; 20 | matrixhfile = 'matrix.h'; 21 | 22 | fid = fopen([MLincludepath filesep matrixhfile]); 23 | if fid>0 24 | c = textscan(fid, '%s', 'Delimiter', '\n', 'Whitespace', ''); 25 | try 26 | fclose(fid); 27 | end 28 | 29 | content = c{1}; 30 | 31 | % Look for the line containing "struct mxArray_tag {" 32 | idxmxArray_tag = strfind(content,'struct mxArray_tag {'); 33 | l1 = find(~cellfun('isempty',idxmxArray_tag),1,'first'); 34 | if isempty(l1) 35 | error('Cannot parse matrix.h file'); 36 | end 37 | 38 | % Modify the mxArray_tag to typedef definition 39 | content{l1} = strrep(content{l1}, ... 40 | 'struct mxArray_tag', 'typedef struct'); 41 | 42 | % Loop on the line and stop when the last curly bracket after 43 | % find the corresponding closed curly bracket 44 | % "struct mxArray_tag { ... }" 45 | l9 = 0; 46 | ncurlybrackets = 0; 47 | nlevels = 0; 48 | for l=l1:length(content) 49 | line = content{l}; 50 | nopen = sum(line=='{'); 51 | nclose = sum(line=='}'); 52 | ncurlybrackets = ncurlybrackets + (nopen + nclose); 53 | nlevels = nlevels + (nopen - nclose); 54 | if ncurlybrackets>0 && nlevels==0 55 | l9 = l; 56 | % Modify the last line with the typedef name 'Internal_mxArray' 57 | lastcurly = find(line=='}',1,'last'); 58 | line = [line(1:lastcurly) ... 59 | ' Internal_mxArray' ... 60 | line(lastcurly+1:end)]; 61 | content{l} = line; 62 | break; 63 | end 64 | end 65 | if l9==0 66 | error('Cannot parse matrix.h file'); 67 | end 68 | % Here is the definition we are interested in 69 | content = content(l1:l9); 70 | 71 | if nargin>=1 72 | thisfile = mfilename(); 73 | fid = fopen(mxArraydefFilename,'wt'); 74 | % Write a comment header 75 | fprintf(fid, ['/* Built automatically by ' thisfile '.m\n']); 76 | fprintf(fid, ['\tBuilt date: ' datestr(now) '\n']); 77 | fprintf(fid, ['\tMatlab version: ' version('-release') '\n']); 78 | fprintf(fid, '*/\n\n'); 79 | 80 | if fid>0 81 | % Write the content to header file 82 | for l=1:length(content) 83 | fprintf(fid, '%s\n', content{l}); 84 | end 85 | try 86 | fclose(fid); 87 | end 88 | else 89 | error('Cannot write the header file %s', mxArraydefFilename); 90 | end 91 | end 92 | else % fail to open matrix.h 93 | error('Cannot find ML file'); 94 | end -------------------------------------------------------------------------------- /MinMaxSelection/getmexopts.m: -------------------------------------------------------------------------------- 1 | function res = getmexopts(Tag) 2 | % function res = getmexopts(Tag) 3 | % Get the MCC or MEX configuration 4 | % Author Bruno Luong 5 | % Last update: 29-Jun-2009 6 | 7 | if ispc() 8 | optpath=prefdir; 9 | optfile=[optpath filesep 'compopts.bat']; 10 | mexoptfile=[optpath filesep 'mexopts.bat']; 11 | else 12 | optpath=matlabroot; 13 | optfile=[optpath '/bin/mbuildopts.sh']; 14 | mexoptfile=[optpath '/bin/mexopts.sh']; % not sure correct path 15 | end 16 | 17 | % Try to get MEX option first 18 | fid=fopen(mexoptfile,'r'); 19 | if fid<=0 20 | % Next MCC options 21 | fid=fopen(optfile,'r'); 22 | end 23 | 24 | if fid>0 25 | iscompilerline=@(S) (strcmp(S,['set ' Tag])); 26 | C=textscan(fid,'%s %s', 'delimiter', '=', 'whitespace', ''); 27 | fclose(fid); 28 | cline=find(cellfun(iscompilerline,C{1})); 29 | if isempty(cline) 30 | error('getmexopt [Bruno]: cannot get Tag %s', Tag) 31 | end 32 | res=C{2}{cline}; 33 | root=regexprep(matlabroot,'\\','\\\\'); 34 | res = regexprep(res,'%MATLAB%',root); 35 | else 36 | error('getmexopts [Bruno]: cannot open comopts.bat file') 37 | end 38 | 39 | % Bruno -------------------------------------------------------------------------------- /MinMaxSelection/inplacecolumnmex.c: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * function B = inplacecolumnmex(A, k) 3 | * Return the inplace-column A(:,k) 4 | * Important notes: 5 | * - use MEX function releaseinplace(B) to release properly shared-data 6 | * pointer before clear/reuse B. 7 | * - All inplace variables shared data with A must be released before 8 | * the original array A is cleared/reused. 9 | * Thanks to James Tursa 10 | ************************************************************************/ 11 | #include "mex.h" 12 | #include "matrix.h" 13 | 14 | /* Uncomment this on older Matlab version where size_t has not been 15 | defined */ 16 | /* 17 | #define mwSize int 18 | #define size_t int 19 | */ 20 | 21 | /* The following file defines the internal representation of mxArray, 22 | * inspired from mxArray_tag declared in the header . 23 | * This file is built by calling the MATLAB function 24 | * buildInternal_mxArrayDef.m */ 25 | #include "Internal_mxArray.h" 26 | 27 | /* Gateway of inplacecolumnmex */ 28 | void mexFunction(int nlhs, mxArray *plhs[], 29 | int nrhs, const mxArray *prhs[]) { 30 | 31 | mwSize k, N, M; 32 | double *Pr; 33 | 34 | /* Check arguments */ 35 | if (nrhs!=2) 36 | mexErrMsgTxt("INPLACECOLUMN: Two input arguments required."); 37 | 38 | if (!mxIsNumeric(prhs[0])) 39 | mexErrMsgTxt("INPLACECOLUMN: First input A argument must be numeric."); 40 | 41 | if (!mxIsNumeric(prhs[1])) 42 | mexErrMsgTxt("INPLACECOLUMN: Second input K must be numeric."); 43 | 44 | /* Get the size */ 45 | M = mxGetM(prhs[0]); 46 | N = mxGetN(prhs[0]); 47 | 48 | /* Get the column number k from the second input */ 49 | if (mxGetM(prhs[1])!=1 || mxGetN(prhs[1])!=1) 50 | mexErrMsgTxt("INPLACECOLUMN: Second input K must be a scalar."); 51 | 52 | if (mxGetClassID(prhs[1]) != mxDOUBLE_CLASS) 53 | mexErrMsgTxt("INPLACECOLUMN: Second input K must be a double."); 54 | 55 | k = (mwSize)(*mxGetPr(prhs[1])); 56 | /* Make sure k is valid */ 57 | if (k<1 || k>N) 58 | mexErrMsgTxt("INPLACECOLUMN: K is not valid."); 59 | 60 | /* Create the Matrix result (first output) */ 61 | plhs[0] = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxREAL); 62 | mxFree(mxGetPr(plhs[0])); /* Free the data, normally Pr is NULL and 63 | * this call does nothing */ 64 | 65 | /* Set the dimension as one column */ 66 | mxSetM(plhs[0], M); 67 | mxSetN(plhs[0], 1); 68 | 69 | /* Inplace data pointer of A */ 70 | Pr = mxGetPr(prhs[0]); 71 | Pr += (k-1)*M; /* Point to the column #k */ 72 | /* Equivalent to doing this: mxSetPr(plhs[0], Pr); 73 | but access directly to data pointer in order to by pass Matlab 74 | checking */ 75 | ((Internal_mxArray*)(plhs[0]))->data.number_array.pdata = Pr; 76 | 77 | return; 78 | 79 | } /* Gateway of INPLACECOLUMNMEX.c */ 80 | 81 | -------------------------------------------------------------------------------- /MinMaxSelection/inplacecolumnmex.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PreferredAI/mp-simrank/e10b2a7be9f3be8a4c250760c5f286116daf6ad3/MinMaxSelection/inplacecolumnmex.mexa64 -------------------------------------------------------------------------------- /MinMaxSelection/inplacecolumnmex.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PreferredAI/mp-simrank/e10b2a7be9f3be8a4c250760c5f286116daf6ad3/MinMaxSelection/inplacecolumnmex.mexw64 -------------------------------------------------------------------------------- /MinMaxSelection/maxk.m: -------------------------------------------------------------------------------- 1 | function varargout = maxk(varargin) 2 | % function res = MAXK(list, k) 3 | % 4 | % If LIST is a vector, MAXK returns in RES the K largest elements of LIST 5 | % RES is sorted in descending order 6 | % [res loc] = MAXK(...) 7 | % Location of the largest elements: RES=LIST(LOC) 8 | % If list is a matrix, MAXK operates along the first dimension 9 | % Use MAXK(..., dim) to operate along the dimension dim 10 | % MAXK(..., dim, 'sorting', false) to disable the post-sorting step 11 | % (true by default) 12 | % 13 | % Author Bruno Luong 14 | % Contributor: Matt Fig 15 | % Last update: 07/April/2009 16 | % 10/Jan/2010: possibility to disable post-sorting step 17 | 18 | nout=cell(1,max(1,nargout)); 19 | [nout{:}] = minmaxk(@maxkmex, varargin{:}); 20 | varargout=nout; 21 | 22 | -------------------------------------------------------------------------------- /MinMaxSelection/maxkmex.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * function [res loc] = maxkmex(list, k) 3 | * Matlab C-Mex 4 | * Purpose: Same as MAXK, i.e., 5 | * Return in RES the K largest elements of 2D matrix 6 | * LOC is Location of the largest values 7 | * - For full matrix, LOC contains linear indexing of the matrix. 8 | * RES == LIST(LOC) 9 | * - For sparse, location is returned in subindexes form by calling 10 | * [RES I J] = minkmex(list, k) 11 | * RES == getsparse(list,I,J) 12 | * This MEX works on double array only, and output RES is unsorted column 13 | * Complexity O(n) where n is size of list 14 | * Note: Implementation of type "non-destructive", i.e., the original data 15 | * will not be effectively swapped, but we keep track of a table of 16 | * permutation indexes. 17 | * Algorithm according to http://en.wikipedia.org/wiki/Selection_algorithm 18 | * Compilation: mex -O -v maxkmex.c 19 | * Author Bruno Luong 20 | * Last update: 10-Jan-2010 21 | * 16-August-2010: change type to mwSignedIndex and 22 | * check nansplit>=0 23 | * 27-Aug-2011: correct bug for sparse array 24 | * 26-Apr-2013: fix memset warning and remove C++ comment style 25 | *************************************************************************/ 26 | 27 | #include "mex.h" 28 | #include "matrix.h" 29 | 30 | /* Define correct type depending on platform */ 31 | #if defined(_MSC_VER) || defined(__BORLANDC__) 32 | typedef unsigned __int64 ulong64; 33 | #elif defined(_LCC) 34 | typedef long long long64; 35 | typedef unsigned long long ulong64; 36 | #else 37 | typedef unsigned long long ulong64; 38 | #endif 39 | 40 | /* Global variables, used to avoid stacking them during recusive call since 41 | they do not change */ 42 | mwIndex k; 43 | mwIndex *pos; 44 | double *list; 45 | 46 | #define MIDPOINT 0 47 | #define MEDIAN3 1 48 | #define MEDIANMEDIANS 2 49 | 50 | /* Pivot Strategy, use one of the above */ 51 | #define PIVOT MIDPOINT 52 | 53 | /*************************************************************************/ 54 | /*Find the index of the Median of the elements 55 | of array that occur at every "shift" positions.*/ 56 | mwIndex findMedianIndex(mwIndex left, mwIndex right, mwIndex shift) 57 | { 58 | mwIndex tmp, groups, k; 59 | double maxValue; 60 | mwIndex *pi, *pj, *pk, *pright, *pmaxIndex; 61 | 62 | groups = (right-left)/shift + 1; 63 | pk = pos + (k = left + (groups/2)*shift); 64 | pright = pos + right; 65 | for (pi=pos+left; pi<=pk; pi+= shift) 66 | { 67 | pmaxIndex = pi; 68 | maxValue = list[*pmaxIndex]; 69 | 70 | for (pj=pi; pj<=pright; pj+=shift) 71 | if (list[*pj]>maxValue) /* Comparison */ 72 | maxValue = list[*(pmaxIndex=pj)]; 73 | /* Swap pos[i] with pos[maxIndex] */ 74 | tmp = *pi; 75 | *pi = *pmaxIndex; 76 | *pmaxIndex = tmp; 77 | } 78 | 79 | return k; 80 | 81 | } /* findMedianIndex */ 82 | 83 | /*Computes the median of each group of 5 elements and stores 84 | it as the first element of the group (left). Recursively does this 85 | till there is only one group and hence only one Median */ 86 | mwIndex findMedianOfMedians(mwIndex left, mwIndex right) 87 | { 88 | mwIndex i, shift, step, tmp; 89 | mwIndex endIndex, medianIndex; 90 | 91 | if (left==right) return left; 92 | 93 | shift = 1; 94 | while (shift <= (right-left)) 95 | { 96 | step=shift*5; 97 | for (i=left; i<=right; i+=step) 98 | { 99 | if ((endIndex=i+step-1)>=right) 100 | endIndex=right; 101 | medianIndex = findMedianIndex(i, endIndex, shift); 102 | /* Swap pos[i] with pos[medianIndex] */ 103 | tmp = pos[i]; 104 | pos[i] = pos[medianIndex]; 105 | pos[medianIndex] = tmp; 106 | } 107 | shift = step; 108 | } 109 | return left; 110 | } /* findMedianOfMedians */ 111 | 112 | /*************************************************************************/ 113 | /*Computes the median of three points (left,right,and mid) */ 114 | mwIndex findMedianThree(mwIndex left, mwIndex right) 115 | { 116 | double vleft, vright, vmid; 117 | mwIndex mid; 118 | 119 | if (left==right) return left; 120 | 121 | vleft = list[pos[left]]; 122 | vright = list[pos[right]]; 123 | vmid = list[pos[mid = (left+right+1)/2]]; 124 | 125 | if (vleftvright) 128 | return right; 129 | else if (vmid=vright) */ 135 | 136 | if (vmid>vleft) 137 | return left; 138 | else if (vmid=pfirst && ISNAN(list[*pright])) 180 | pright--; 181 | return (pright-pos); 182 | } 183 | } /* for-loop */ 184 | } /* partNaN */ 185 | 186 | /*************************************************************************/ 187 | 188 | /* Partitioning the list around pivot pivotValue := l[pivotIndex]; 189 | After runing, at exit we obtain: 190 | l[left]...l[index-1] > pivotValue >= l[index] ... l[right] 191 | where l[i] := list[pos[i]] for all i */ 192 | mwIndex partition(mwIndex left, mwIndex right, mwIndex pivotIndex) { 193 | 194 | double pivotValue; 195 | mwIndex *pindex, *pi, *pright; 196 | mwIndex tmp; 197 | 198 | pright=pos+right; 199 | pindex=pos+pivotIndex; 200 | pivotValue = list[tmp = *pindex]; 201 | /* Swap pos[pivotIndex] with pos[right] */ 202 | *pindex = *pright; 203 | *pright = tmp; 204 | 205 | pindex=pos+left; 206 | for (pi=pindex; pi pivotValue) { 209 | /* if larger; Swap pos[index] with pos[i] */ 210 | tmp = *pi; 211 | *pi = *pindex; 212 | *(pindex++) = tmp; 213 | } 214 | 215 | /* Swap pos[index] with pos[right] */ 216 | tmp = *pindex; 217 | *pindex = *pright; 218 | *pright = tmp; 219 | 220 | return (pindex-pos); /* Pointer arithmetic */ 221 | } /* Partition */ 222 | 223 | /* Partitioning the list around pivot 0; 224 | * After runing, at exit we obtain: 225 | l[left]...l[index-1] > 0 >= l[index] ... l[right] 226 | where l[i] := list[pos[i]] for all i 227 | Note: at return, index might be larger than right (if all elements are 228 | strictly greater than zero) */ 229 | mwIndex part0(mwIndex left, mwIndex right) { 230 | 231 | mwIndex *pindex, *pi, *pright; 232 | mwIndex tmp; 233 | 234 | pright=pos+right; 235 | pindex=pos+left; 236 | for (pi=pindex; pi<=pright; pi++) 237 | /* Compare with pivotValue of zero */ 238 | if (list[*pi] > 0.0) { /* Compare */ 239 | /* if larger; Swap pos[index] with pos[i] */ 240 | tmp = *pi; 241 | *pi = *pindex; 242 | *(pindex++) = tmp; 243 | } 244 | 245 | return (pindex-pos); /* Pointer arithmetic */ 246 | } /* part0 */ 247 | 248 | /* Recursive engine (partial quicksort) */ 249 | void findFirstK(mwIndex left, mwIndex right) { 250 | 251 | mwIndex pivotIndex; 252 | 253 | if (right > left) { 254 | 255 | #if (PIVOT==MEDIANMEDIANS) 256 | pivotIndex = findMedianOfMedians(left, right); 257 | #elif (PIVOT==MEDIAN3) 258 | pivotIndex = findMedianThree(left, right); 259 | #else /* MIDPOINT */ 260 | pivotIndex = (left+right+1)/2; 261 | #endif 262 | 263 | pivotIndex = partition(left, right, pivotIndex); 264 | if (pivotIndex > k) 265 | findFirstK(left, pivotIndex-1); 266 | else if (pivotIndex < k) 267 | findFirstK(pivotIndex+1, right); 268 | } 269 | 270 | return; 271 | } /* findFirstK */ 272 | 273 | /* Create the result contains k largest values */ 274 | mxArray* MinMaxResult(mwIndex k, mwIndex p0, mwIndex nz, 275 | mwIndex kout) 276 | { 277 | mwIndex i; 278 | mwSize dims[2]; 279 | mxArray* Result; 280 | double *data; 281 | 282 | /* Create the Matrix result (first output) */ 283 | dims[0] = kout; dims[1] = 1; 284 | Result = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL); 285 | if (Result == NULL) 286 | mexErrMsgTxt("Out of memory."); 287 | data = mxGetPr(Result); 288 | /* copy positive part (p0) */ 289 | for (i=0; ikout-p0) 292 | nz = kout-p0; 293 | /* Fill nz zeros */ 294 | memset((void*)(data+p0), 0, sizeof(double)*nz); 295 | 296 | /* copy negative part (kout - (p0+nz)) */ 297 | for (i=p0+nz; ikout-p0) 321 | nz = kout-p0; 322 | /* Fill nz zeros */ 323 | memset((void*)(data+p0), 0, sizeof(double)*nz); 324 | 325 | /* index of negative part */ 326 | for (i=p0+nz; inzS) nz=(mwIndex)nzS; 359 | 360 | /* Get the sparse index pointers */ 361 | irs = mxGetIr(S); 362 | jcs = mxGetJc(S); 363 | 364 | /* i is index of I J */ 365 | i = 0; 366 | /* (ai,aj) current subindex of zero element */ 367 | ai = aj = 0; 368 | 369 | /* (bi,bj) current subindex of nonzero element */ 370 | if ((ib=0)kout-p0) 444 | nz = kout-p0; 445 | /* Find the place where zeros are hidden */ 446 | FindSPzero(S, nz, dataI+p0, dataJ+p0); 447 | 448 | /* index of positive part */ 449 | for (i=p0+nz; il) k=l; 516 | 517 | /* Clip kout */ 518 | if (kout>nelem) kout=nelem; 519 | 520 | /* Clean programming */ 521 | pos=NULL; 522 | 523 | /* Work for non-empty array */ 524 | if (l>0) { 525 | /* Vector of index */ 526 | pos = mxMalloc(sizeof(mwSize)*l); 527 | if (pos==NULL) 528 | mexErrMsgTxt("Out of memory."); 529 | /* Initialize the array of position (zero-based index) */ 530 | for (i=0; i=0) 536 | findFirstK(0, nansplit); 537 | 538 | /* Look for the split of positive/negative numbers */ 539 | if (sparseflag) { 540 | p0 = part0(0, k); /* number of strict negative elements */ 541 | if (p0 < k) /* There are at least two positive elements */ 542 | { 543 | /* Number of implicite zeros */ 544 | nz = nelem-l; 545 | if (nz) /* in case the positive set is unordered */ 546 | { 547 | k -= nz; 548 | findFirstK(p0, nansplit); 549 | k += nz; 550 | } 551 | } 552 | /* ++ to restore one-based Matlab index */ 553 | k++; 554 | } 555 | else 556 | /* ++ to Restore one-based Matlab index */ 557 | p0 = ++k; 558 | } /* if (l>0) */ 559 | else p0 = 0; 560 | 561 | /* Number of implicite zero in (sparse) */ 562 | nz = nelem-l; 563 | /* Create the Matrix result (first output) */ 564 | plhs[0] = MinMaxResult(k, p0, nz, kout); 565 | 566 | /* Create the Matrix position (second output) */ 567 | if (nlhs>=2) 568 | { 569 | if (sparseflag) 570 | SpLocResult(k, p0, nz, kout, prhs[0], &(plhs[1]), &(plhs[2])); 571 | else 572 | plhs[1] = LocResult(k, p0, nz, kout); 573 | } 574 | 575 | /* Free the array of position */ 576 | if (pos) mxFree(pos); 577 | pos = NULL; /* clean programming */ 578 | 579 | return; 580 | 581 | } /* Gateway of maxkmex.c */ 582 | 583 | 584 | -------------------------------------------------------------------------------- /MinMaxSelection/maxkmex.m: -------------------------------------------------------------------------------- 1 | % function [res loc] = maxkmex(list, k) 2 | % 3 | % Matlab C-Mex 4 | % Purpose: Same as MAXK, i.e., 5 | % Return in RES the K largest elements of LIST 6 | % LOC is Location of the largest values: RES=LIST(LOC) 7 | % This MEX works on double only, and output RES is unsorted 8 | % Algorithm according to http://en.wikipedia.org/wiki/Selection_algorithm 9 | % Compilation: mex -O -v maxkmex.c 10 | % Author Bruno Luong 11 | % Last update: 07/April/2009 12 | % 13 | 14 | error('Mex file not yet compiled. Action: mex -O -v maxkmex.c'); -------------------------------------------------------------------------------- /MinMaxSelection/maxkmex.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PreferredAI/mp-simrank/e10b2a7be9f3be8a4c250760c5f286116daf6ad3/MinMaxSelection/maxkmex.mexa64 -------------------------------------------------------------------------------- /MinMaxSelection/maxkmex.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PreferredAI/mp-simrank/e10b2a7be9f3be8a4c250760c5f286116daf6ad3/MinMaxSelection/maxkmex.mexw64 -------------------------------------------------------------------------------- /MinMaxSelection/mink.m: -------------------------------------------------------------------------------- 1 | function varargout = mink(varargin) 2 | % function res = MINK(list, k) 3 | % 4 | % If LIST is a vector, MINK returns in RES the K smallest elements of LIST 5 | % RES is sorted in ascending order 6 | % [res loc] = MINK(...) 7 | % Location of the smallest elements: RES=LIST(LOC) 8 | % If list is a matrix, MINK operates along the first dimension 9 | % Use MINK(..., dim) to operate along the dimension dim 10 | % MINK(..., dim, 'sorting', false) to disable the post-sorting step 11 | % (true by default) 12 | % 13 | % Author Bruno Luong 14 | % Contributor: Matt Fig 15 | % Last update: 07/April/2009 16 | % 10/Jan/2010: possibility to disable post-sorting step 17 | 18 | 19 | nout=cell(1,max(1,nargout)); 20 | [nout{:}] = minmaxk(@minkmex, varargin{:}); 21 | varargout=nout; 22 | -------------------------------------------------------------------------------- /MinMaxSelection/minkmex.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * function [res loc] = minkmex(list, k) 3 | * Matlab C-Mex 4 | * Purpose: Same as MINK, i.e., 5 | * Return in RES the K smallest elements of 2D matrix 6 | * LOC is Location of the smallest values 7 | * - For full matrix, LOC contains linear indexing of the matrix. 8 | * RES == LIST(LOC) 9 | * - For sparse, location is returned in subindexes form by calling 10 | * [RES I J] = minkmex(list, k) 11 | * RES == getsparse(list,I,J) 12 | * This MEX works on double array only, and output RES is unsorted column 13 | * Complexity O(n) where n is size of list 14 | * Note: Implementation of type "non-destructive", i.e., the original data 15 | * will not be effectively swapped, but we keep track of a table of 16 | * permutation indexes. 17 | * Algorithm according to http://en.wikipedia.org/wiki/Selection_algorithm 18 | * Compilation: mex -O -v minkmex.c 19 | * Author Bruno Luong 20 | * Last update: 10-Jan-2010 21 | * 16-August-2010: change type to mwSignedIndex and 22 | * check nansplit>=0 23 | * 27-Aug-2011: correct bug for sparse array 24 | * 26-Apr-2013: fix memset warning and remove C++ comment style 25 | *************************************************************************/ 26 | 27 | #include "mex.h" 28 | #include "matrix.h" 29 | 30 | /* Define correct type depending on platform */ 31 | #if defined(_MSC_VER) || defined(__BORLANDC__) 32 | typedef unsigned __int64 ulong64; 33 | #elif defined(_LCC) 34 | typedef long long long64; 35 | typedef unsigned long long ulong64; 36 | #else 37 | typedef unsigned long long ulong64; 38 | #endif 39 | 40 | /* Global variables, used to avoid stacking them during recusive call since 41 | they do not change */ 42 | mwIndex k; 43 | mwIndex *pos; 44 | double *list; 45 | 46 | #define MIDPOINT 0 47 | #define MEDIAN3 1 48 | #define MEDIANMEDIANS 2 49 | 50 | /* Pivot Strategy, use one of the above */ 51 | #define PIVOT MIDPOINT 52 | 53 | /*************************************************************************/ 54 | /*Find the index of the Median of the elements 55 | of array that occur at every "shift" positions.*/ 56 | mwSize findMedianIndex(mwSize left, mwSize right, mwSize shift) 57 | { 58 | mwSize tmp, groups, k; 59 | double minValue; 60 | mwSize *pi, *pj, *pk, *pright, *pminIndex; 61 | 62 | groups = (right-left)/shift + 1; 63 | pk = pos + (k = left + (groups/2)*shift); 64 | pright = pos + right; 65 | for (pi=pos+left; pi<=pk; pi+= shift) 66 | { 67 | pminIndex = pi; 68 | minValue = list[*pminIndex]; 69 | 70 | for (pj=pi; pj<=pright; pj+=shift) 71 | if (list[*pj]=right) 100 | endIndex=right; 101 | medianIndex = findMedianIndex(i, endIndex, shift); 102 | /* Swap pos[i] with pos[medianIndex] */ 103 | tmp = pos[i]; 104 | pos[i] = pos[medianIndex]; 105 | pos[medianIndex] = tmp; 106 | } 107 | shift = step; 108 | } 109 | return left; 110 | } /* findMedianOfMedians */ 111 | 112 | /*************************************************************************/ 113 | /*Computes the median of three points (left,right,and mid) */ 114 | mwIndex findMedianThree(mwIndex left, mwIndex right) 115 | { 116 | double vleft, vright, vmid; 117 | mwIndex mid; 118 | 119 | if (left==right) return left; 120 | 121 | vleft = list[pos[left]]; 122 | vright = list[pos[right]]; 123 | vmid = list[pos[mid = (left+right+1)/2]]; 124 | 125 | if (vleftvright) 128 | return right; 129 | else if (vmid=vright) */ 135 | 136 | if (vmid>vleft) 137 | return left; 138 | else if (vmid=pfirst && ISNAN(list[*pright])) 180 | pright--; 181 | return (pright-pos); 182 | } 183 | } /* for-loop */ 184 | } /* partNaN */ 185 | 186 | /*************************************************************************/ 187 | /* Partitioning the list around pivot pivotValue := l[pivotIndex]; 188 | After runing, at exit we obtain: 189 | l[left]...l[index-1] < pivotValue <= l[index] ... l[right] 190 | where l[i] := list[pos[i]] for all i */ 191 | mwSize partition(mwSize left, mwSize right, mwSize pivotIndex) { 192 | 193 | double pivotValue; 194 | mwSize *pindex, *pi, *pright; 195 | mwSize tmp; 196 | 197 | pright=pos+right; 198 | pindex=pos+pivotIndex; 199 | pivotValue = list[tmp = *pindex]; 200 | /* Swap pos[pivotIndex] with pos[right] */ 201 | *pindex = *pright; 202 | *pright = tmp; 203 | 204 | pindex=pos+left; 205 | for (pi=pindex; pi left) { 254 | 255 | #if (PIVOT==MEDIANMEDIANS) 256 | pivotIndex = findMedianOfMedians(left, right); 257 | #elif (PIVOT==MEDIAN3) 258 | pivotIndex = findMedianThree(left, right); 259 | #else /* MIDPOINT */ 260 | pivotIndex = (left+right+1)/2; 261 | #endif 262 | 263 | pivotIndex = partition(left, right, pivotIndex); 264 | if (pivotIndex > k) 265 | findFirstK(left, pivotIndex-1); 266 | else if (pivotIndex < k) 267 | findFirstK(pivotIndex+1, right); 268 | } 269 | 270 | return; 271 | } /* findFirstK */ 272 | 273 | /* Create the result contains k smallest values */ 274 | mxArray* MinMaxResult(mwIndex k, mwIndex p0, mwIndex nz, 275 | mwIndex kout) 276 | { 277 | mwIndex i; 278 | mwSize dims[2]; 279 | mxArray* Result; 280 | double *data; 281 | 282 | /* Create the Matrix result (first output) */ 283 | dims[0] = kout; dims[1] = 1; 284 | Result = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL); 285 | if (Result == NULL) 286 | mexErrMsgTxt("Out of memory."); 287 | data = mxGetPr(Result); 288 | /* copy negative part (p0) */ 289 | for (i=0; ikout-p0) 292 | nz = kout-p0; 293 | /* Fill nz zeros */ 294 | memset((void*)(data+p0), 0, sizeof(double)*nz); 295 | 296 | /* copy positive part (kout - (p0+nz)) */ 297 | for (i=p0+nz; ikout-p0) 321 | nz = kout-p0; 322 | /* Fill nz zeros */ 323 | memset((void*)(data+p0), 0, sizeof(double)*nz); 324 | 325 | /* index of positive part */ 326 | for (i=p0+nz; inzS) nz=(mwIndex)nzS; 359 | 360 | /* Get the sparse index pointers */ 361 | irs = mxGetIr(S); 362 | jcs = mxGetJc(S); 363 | 364 | /* i is index of I J */ 365 | i = 0; 366 | /* (ai,aj) current subindex of zero element */ 367 | ai = aj = 0; 368 | 369 | /* (bi,bj) current subindex of nonzero element */ 370 | if ((ib=0)kout-p0) 444 | nz = kout-p0; 445 | /* Find the place where zeros are hidden */ 446 | FindSPzero(S, nz, dataI+p0, dataJ+p0); 447 | 448 | /* index of positive part */ 449 | for (i=p0+nz; il) k=l; 516 | 517 | /* Clip kout */ 518 | if (kout>nelem) kout=nelem; 519 | 520 | /* Clean programming */ 521 | pos=NULL; 522 | 523 | /* Work for non-empty array */ 524 | if (l>0) { 525 | /* Vector of index */ 526 | pos = mxMalloc(sizeof(mwSize)*l); 527 | if (pos==NULL) 528 | mexErrMsgTxt("Out of memory."); 529 | /* Initialize the array of position (zero-based index) */ 530 | for (i=0; i=0) 536 | findFirstK(0, nansplit); 537 | 538 | /* Look for the split of positive/negative numbers */ 539 | if (sparseflag) { 540 | p0 = part0(0, k); /* number of strict negative elements */ 541 | if (p0 < k) /* There are at least two positive elements */ 542 | { 543 | /* Number of implicite zeros */ 544 | nz = nelem-l; 545 | if (nz) /* in case the positive set is unordered */ 546 | { 547 | k -= nz; 548 | findFirstK(p0, nansplit); 549 | k += nz; 550 | } 551 | } 552 | /* ++ to restore one-based Matlab index */ 553 | k++; 554 | } 555 | else 556 | /* ++ to Restore one-based Matlab index */ 557 | p0 = ++k; 558 | } /* if (l>0) */ 559 | else p0 = 0; 560 | 561 | /* Number of implicite zero in (sparse) */ 562 | nz = nelem-l; 563 | /* Create the Matrix result (first output) */ 564 | plhs[0] = MinMaxResult(k, p0, nz, kout); 565 | 566 | /* Create the Matrix position (second output) */ 567 | if (nlhs>=2) 568 | { 569 | if (sparseflag) 570 | SpLocResult(k, p0, nz, kout, prhs[0], &(plhs[1]), &(plhs[2])); 571 | else 572 | plhs[1] = LocResult(k, p0, nz, kout); 573 | } 574 | 575 | /* Free the array of position */ 576 | if (pos) mxFree(pos); 577 | pos = NULL; /* clean programming */ 578 | 579 | return; 580 | 581 | } /* Gateway of minkmex.c */ 582 | 583 | 584 | -------------------------------------------------------------------------------- /MinMaxSelection/minkmex.m: -------------------------------------------------------------------------------- 1 | % function [res loc] = minkmex(list, k) 2 | % 3 | % Matlab C-Mex 4 | % Purpose: Same as MINK, i.e., 5 | % Return in RES the K smallest elements of LIST 6 | % LOC is Location of the smallest values: RES=LIST(LOC) 7 | % This MEX works on double only, and output RES is unsorted 8 | % Algorithm according to http://en.wikipedia.org/wiki/Selection_algorithm 9 | % Compilation: mex -O -v minkmex.c 10 | % Author Bruno Luong 11 | % Last update: 07/April/2009 12 | % 13 | 14 | error('Mex file not yet compiled. Action: mex -O -v minkmex.c') -------------------------------------------------------------------------------- /MinMaxSelection/minkmex.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PreferredAI/mp-simrank/e10b2a7be9f3be8a4c250760c5f286116daf6ad3/MinMaxSelection/minkmex.mexa64 -------------------------------------------------------------------------------- /MinMaxSelection/minkmex.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PreferredAI/mp-simrank/e10b2a7be9f3be8a4c250760c5f286116daf6ad3/MinMaxSelection/minkmex.mexw64 -------------------------------------------------------------------------------- /MinMaxSelection/minmax_install.m: -------------------------------------------------------------------------------- 1 | function minmax_install 2 | % function minmax_install 3 | % Installation by building the C-mex files for min/max selection package 4 | % 5 | % Author Bruno Luong 6 | % Last update: 29-Jun-2009 built inplace functions 7 | 8 | thisfile = mfilename('fullpath'); 9 | path = fileparts(thisfile); 10 | oldpath = cd(path); 11 | 12 | arch=computer('arch'); 13 | mexopts = {'-v' '-O' ['-' arch]}; 14 | % 64-bit platform 15 | if ~isempty(strfind(computer(),'64')) 16 | mexopts(end+1) = {'-largeArrayDims'}; 17 | end 18 | 19 | 20 | 21 | if ispc() && datenum(version('-date')) < datenum('January 11, 2014') 22 | compiler = getmexopts('COMPILER'); 23 | islcc = strcmpi(compiler,'lcc'); 24 | % Define the C-symbol for LCC compiler 25 | if islcc 26 | mexopts(end+1) = {'-D_LCC'}; 27 | end 28 | end 29 | 30 | % Internal representation of mxArray 31 | try 32 | buildInternal_mxArrayDef('Internal_mxArray.h'); 33 | catch 34 | if ispc() 35 | cpcmd = 'copy'; 36 | else 37 | cpcmd = ' cp'; 38 | end 39 | cmd = [cpcmd ' Internal_mxArray_2010B.h Internal_mxArray.h']; 40 | system(cmd); 41 | end 42 | 43 | % Inplace tool 44 | mex(mexopts{:},'inplacecolumnmex.c'); 45 | mex(mexopts{:},'releaseinplace.c'); 46 | 47 | % Mex MIN/MAX functions 48 | mex(mexopts{:},'minkmex.c'); 49 | mex(mexopts{:},'maxkmex.c'); 50 | 51 | cd(oldpath); 52 | 53 | end -------------------------------------------------------------------------------- /MinMaxSelection/minmaxk.m: -------------------------------------------------------------------------------- 1 | function [res loc] = minmaxk(mexfun, list, k, dim, varargin) 2 | % function [res loc] = minmaxk(mexfun, list, k, dim) 3 | % 4 | % Return in RES the K smallest/largest elements of LIST 5 | % RES is sorted in ascending/descending order 6 | % [res loc] = minmaxk(...) 7 | % Location of the smallest/largest: RES=LIST(LOC) 8 | % [res loc] = minmaxk(..., dim) 9 | % specify the dimension to operate 10 | % [res loc] = minmaxk(..., dim, 'sorting', false) 11 | % to disable the post-sorting step (true by default) 12 | % 13 | % Author Bruno Luong 14 | % Contributor: Matt Fig (suggest return same loc as SORT for full blowed 15 | % result) 16 | % Last update: 24/May/2009: work on sparse matrix list 17 | % 28/Jun/2009: used inplace columns for full-array 18 | % 10/Aug/2009: releaseinplace is called to cleanup 19 | % if MEX fails 20 | % 10/Jan/2010: possibility to disable post-sorting step 21 | 22 | clist=class(list); 23 | % Mex functions requires input in double 24 | if ~strcmpi(clist,'double') 25 | list=double(list); 26 | end 27 | 28 | % Look for single selection value by default 29 | if nargin<3 || isempty(k) 30 | k=1; 31 | else 32 | k=double(k); 33 | end 34 | 35 | szlist = size(list); 36 | if nargin<4 37 | if isvector(list) && szlist(1)==1 38 | dim=2; 39 | else 40 | dim=1; 41 | end 42 | end 43 | 44 | if mod(length(varargin),2) 45 | error('MINMAXK: options must come as property/value pairs'); 46 | end 47 | 48 | postsorting = getoptionpair({'postsorting', 'sorting', 'sort'}, ... 49 | true, varargin); 50 | 51 | nd=ndims(list); 52 | if dim<1 || dim>nd 53 | error('MINMAXK: dim must be between 1 and ndims(LIST)=%d', nd); 54 | end 55 | 56 | % Will be used for sorting 57 | if isequal(mexfun,@minkmex) 58 | smode='ascend'; 59 | else 60 | smode='descend'; 61 | end 62 | 63 | % Do we need to get location? 64 | getloc=nargout>=2; 65 | 66 | % Put operating dimension to the first 67 | list=shiftdim(list,dim-1); 68 | 69 | % operating length 70 | l=size(list,1); 71 | % Number of vectors 72 | szl=size(list); 73 | N=prod(szl(2:end)); 74 | 75 | szres=szl; 76 | k=min(k,l); 77 | szres(1)=k; 78 | res=zeros(szres,clist); % Allocate array having the same class with list 79 | if getloc 80 | loc=zeros(szres,'double'); 81 | end 82 | if k>=l % Matt Fig's suggestion 83 | res = list; 84 | if getloc 85 | repvec=size(loc); repvec(1)=1; 86 | loc = repmat((1:k)',repvec); 87 | end 88 | else 89 | try % use try/catch instead of onCleanup for better compatibility 90 | if getloc 91 | if issparse(list)z 92 | for n=1:N 93 | [res(:,n) loc(:,n) dummy] = mexfun(list(:,n),k); %#ok 94 | end 95 | else 96 | for n=1:N 97 | cn = inplacecolumnmex(list,n); % inplace column 98 | [res(:,n) loc(:,n)] = mexfun(cn,k); 99 | releaseinplace(cn); 100 | %[res(:,n) loc(:,n)] = mexfun(list(:,n),k); 101 | end 102 | end 103 | else 104 | if issparse(list) 105 | for n=1:N 106 | res(:,n) = mexfun(list(:,n),k); 107 | end 108 | else 109 | for n=1:N 110 | cn = inplacecolumnmex(list,n); % inplace column 111 | res(:,n) = mexfun(cn,k); 112 | releaseinplace(cn); 113 | %res(:,n) = mexfun(list(:,n),k); 114 | end 115 | end 116 | end 117 | catch 118 | % If something is wrong 119 | % It crashes if cn is not released properly 120 | if exist('cn','var') && ~isempty(cn) 121 | releaseinplace(cn); 122 | end 123 | % rethrow the error (likely memory) 124 | rethrow(lasterror); 125 | end 126 | end 127 | 128 | % This is the post processing step of sorting the selection data 129 | % The purpose is to have a nicely formatted output, that's all 130 | 131 | if getloc 132 | if postsorting 133 | [res is] = sort(res,1,smode); 134 | j=(0:N-1)*k; 135 | % Use reshape instead of bsxfun for backward compatible 136 | if exist('bsxfun','builtin') 137 | is = bsxfun(@plus, reshape(is,[k N]), j); 138 | else 139 | is = reshape(is,[k N]) + repmat(j,[k 1]); 140 | end 141 | loc = reshape(loc(is),size(loc)); 142 | end 143 | % Put the operating dimension at the right place 144 | loc = shiftdim(loc,nd+1-dim); 145 | else 146 | if postsorting 147 | res = sort(res,1,smode); 148 | end 149 | end 150 | 151 | % Put the operating dimension at the right place 152 | res = shiftdim(res,nd+1-dim); 153 | 154 | end % minmaxk 155 | 156 | function val = getoptionpair(name, defaultval, vargin) 157 | % Get the value from property/value pairs 158 | val = defaultval; 159 | for k=1:2:length(vargin) 160 | if strmatch(vargin{k},name) 161 | val = vargin{k+1}; 162 | return 163 | end 164 | end 165 | end % getoptionpair -------------------------------------------------------------------------------- /MinMaxSelection/releaseinplace.c: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * function releaseinplace(b) 3 | * Release the data from an inplace column mxArray that was created 4 | * with the inplacecolumnmex function. 5 | * Author Bruno Luong 6 | * Last update: 27/June/2009 7 | ************************************************************************/ 8 | 9 | #include "mex.h" 10 | #include "matrix.h" 11 | 12 | /* Uncomment this on older Matlab version where size_t has not been 13 | defined */ 14 | /*#define size_t int*/ 15 | 16 | /* The following file defines the internal representation of mxArray, 17 | * inspired from mxArray_tag declared in the header . 18 | * This file is built by calling the MATLAB function 19 | * buildInternal_mxArrayDef.m */ 20 | #include "Internal_mxArray.h" 21 | 22 | /* Gateway of releaseinplace */ 23 | void mexFunction(int nlhs, mxArray *plhs[], 24 | int nrhs, const mxArray *prhs[]) { 25 | 26 | /* Check arguments */ 27 | if (nrhs!=1) 28 | mexErrMsgTxt("RELEASEINPLACE: One input argument required."); 29 | 30 | if( nlhs != 0 ) { 31 | mexErrMsgTxt("RELEASEINPLACE: Zero output arguments required."); 32 | } 33 | 34 | mxSetM((mxArray *)prhs[0], 0); 35 | mxSetN((mxArray *)prhs[0], 0); 36 | /* Equivalent to doing this: mxSetPr(prhs[0], NULL); 37 | but access directly to data pointer in order to by pass Matlab 38 | checking - Thanks to James Tursa */ 39 | ((Internal_mxArray*)(prhs[0]))->data.number_array.pdata = NULL; 40 | ((Internal_mxArray*)(prhs[0]))->data.number_array.pimag_data = NULL; 41 | 42 | return; 43 | 44 | } /* Gateway of releaseinplace.c */ -------------------------------------------------------------------------------- /MinMaxSelection/releaseinplace.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PreferredAI/mp-simrank/e10b2a7be9f3be8a4c250760c5f286116daf6ad3/MinMaxSelection/releaseinplace.mexa64 -------------------------------------------------------------------------------- /MinMaxSelection/releaseinplace.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PreferredAI/mp-simrank/e10b2a7be9f3be8a4c250760c5f286116daf6ad3/MinMaxSelection/releaseinplace.mexw64 -------------------------------------------------------------------------------- /MinMaxSelection/testminmax.m: -------------------------------------------------------------------------------- 1 | % Author Bruno Luong 2 | % Last update: 07/April/2009 3 | % Script to tets min/max selection 4 | 5 | clear 6 | 7 | 8 | try 9 | minkmex(1,1); 10 | minkmex(1,1); 11 | catch 12 | minmax_install(); 13 | end 14 | 15 | n=1e7; 16 | k=10; 17 | 18 | ntest=10; 19 | 20 | % Timing 21 | disp('Time the algorithms for few seconds...'); 22 | tmink=zeros(1,ntest); 23 | tmaxk=zeros(1,ntest); 24 | tsort=zeros(1,ntest); 25 | for i=1:ntest 26 | list=rand(1,n); 27 | 28 | tic 29 | mn=mink(list,k); 30 | tmink(i)=toc; 31 | 32 | tic 33 | mx=maxk(list,k); 34 | tmaxk(i)=toc; 35 | 36 | tic 37 | s=sort(list); 38 | smn=s(1:k); 39 | smx=s(end:-1:end-k+1); 40 | tsort(i)=toc; 41 | 42 | if ~isequal(mn,smn) || ~isequal(mx,smx) 43 | keyboard; 44 | end 45 | end 46 | 47 | fprintf('Timing mink: %f [s]\n',median(tmink)); 48 | fprintf('Timing maxk: %f [s]\n',median(tmaxk)); 49 | fprintf('Timing sort: %f [s]\n',median(tsort)); -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MP-SimRank: A Graph-Theoretic Similarity Measure 2 | This repository provides a Matlab implementation of MP-SimRank, an multiperspective similarity graph-theoretic measure. 3 | More details are provided in the follwing paper: 4 | 5 | **Multiperspective Graph-Theoretic Similarity Measure, by Dung D. Le and Hady W. Lauw, ACM Conference on Information and Knowledge Management (CIKM'18), Oct 2018.** 6 | 7 | If you need any information, please contact: ddle.2015@smu.edu.sg 8 | -------------------------------------------------------------------------------- /clusteredMP_simrank.m: -------------------------------------------------------------------------------- 1 | addpath(genpath(['MinMaxSelection'])); 2 | addpath(genpath(['utils'])); 3 | addpath(genpath(['metrics'])); 4 | 5 | 6 | %% create neighbor (label: 1) and non-neighbor (label: -1)graphs from pairs of objects 7 | data_names = {'animal'}; 8 | split_ratios = [0.7]; 9 | num_sample = 10; 10 | 11 | for dataset = data_names 12 | disp(strcat('--- dataset = ', dataset , '-----')) 13 | for split_ratio = split_ratios 14 | disp(strcat('--- split-ratio = ', num2str(split_ratio) , '-----')) 15 | dataPath = strcat('data/', dataset, '/', num2str(split_ratio), '/'); 16 | 17 | maxIter = 20; 18 | C_1 = 0.8; 19 | C_2 = 0.85; 20 | 21 | for k = 1:num_sample 22 | %% ----- load the adj matrix and test data ----- 23 | load(strcat(dataPath, 'sample_', num2str(k))); 24 | 25 | %% ----- initialization ------ 26 | perSim = cell(num_user, 1); 27 | userSim = zeros(num_user); 28 | 29 | %% ---- similarity propagation ---- 30 | for u = 1 : num_user 31 | S = eye(num_item); 32 | 33 | W = full(trainAdj{u}); 34 | W = W - diag(diag(W)) + eye(num_item); 35 | 36 | % normalize each column of the adj matrix W 37 | W = norm_by_col(W); 38 | 39 | for iter = 1 : maxIter 40 | S = C_2 * W' * S * W; 41 | S = S - diag(diag(S)) + eye(num_item); 42 | S = S .* trainAdjZero{u} + full(trainAdjOne{u}) .* full(trainAdj{u}); 43 | end 44 | 45 | perSim{u} = S; 46 | end 47 | 48 | %% compute the distance matrix dist 49 | dist = zeros(num_user); 50 | for u = 1 : (num_user - 1) 51 | for u_ = (u + 1) : num_user 52 | froNorm = norm(perSim{u} - perSim{u_}, 'fro'); 53 | dist(u, u_) = froNorm; 54 | dist(u_, u) = froNorm; 55 | end 56 | end 57 | 58 | for num_cluster = 1:num_user 59 | fprintf('number of cluster: %d', num_cluster) 60 | [inds, ~] = kmedioids(dist, num_cluster); 61 | 62 | % create a merged graph for each cluster 63 | adjMat = cell(num_cluster, 1); 64 | 65 | for clusterId = 1:num_cluster 66 | W = zeros(num_item); 67 | members = find(inds == clusterId); 68 | 69 | for m = members 70 | W = W + trainAdj{m}; 71 | end 72 | W = (W > 0); 73 | adjMat{clusterId} = (W > 0); 74 | end 75 | 76 | %% Step I: ----- initialization ----- 77 | simCluster = eye(num_cluster); 78 | perClusterSim = cell(num_cluster, 1); 79 | for u = 1 : num_user 80 | perClusterSim{u} = eye(num_item); 81 | end 82 | 83 | %% STEP II: ---- personalized simrank --- 84 | for t = 1 : maxIter 85 | perClusterSim_t = perClusterSim; 86 | simCluster_t = simCluster; 87 | 88 | % -- update the personalized similarities -- 89 | for u = 1 : num_cluster 90 | S = zeros(num_item); 91 | 92 | for u_ = 1 : num_cluster 93 | %W - adj matrix of u' 94 | W = full(adjMat{u_}); 95 | W = W - diag(diag(W)) + eye(num_item); 96 | 97 | % normalized column Adj matrix W 98 | W = norm_by_col(W); 99 | 100 | % update equation 101 | S = S + C_1/num_cluster * simCluster_t(u, u_) * C_2 * W' * perClusterSim_t{u_} * W; 102 | end 103 | 104 | % -- diagonal elements are 1s -- 105 | S = S - diag(diag(S)) + eye(num_item); 106 | 107 | % -- update the similarity matrix - 108 | perClusterSim{u} = S; 109 | end 110 | 111 | for u = 1 : (num_cluster - 1) 112 | for u_ = (u + 1) : num_cluster 113 | froNorm = norm(perClusterSim{u} - perClusterSim{u_}, 'fro')/num_item; 114 | simCluster(u, u_) = 1 - froNorm; 115 | simCluster(u_, u) = 1 - froNorm; 116 | end 117 | end 118 | end 119 | 120 | for clusterId = 1:num_cluster 121 | members = find(inds == clusterId); 122 | for m = members 123 | perSim{m} = perClusterSim{clusterId}; 124 | end 125 | end 126 | 127 | %%-- Evaluation--- 128 | recall = eval_recall(perSim, trainCluster, testCluster, num_user, num_item); 129 | pres = eval_pres(perSim, trainCluster, testCluster, num_user, num_item); 130 | disp([recall, pres]) 131 | end 132 | end 133 | end 134 | end -------------------------------------------------------------------------------- /metrics/eval_pres.m: -------------------------------------------------------------------------------- 1 | function pres = eval_pres(perSim, trainCluster, testCluster, num_user, num_item) 2 | pres = 0; 3 | nz_num_user = num_user; 4 | for u = 1 : num_user 5 | s = perSim{u}; 6 | trCluster = trainCluster{u}; 7 | tCluster = testCluster{u}; 8 | 9 | u_pres = 0; 10 | count = 0; 11 | for gid = 1 : length(trCluster) 12 | trItems = trCluster{gid}; 13 | tItems = tCluster{gid}; 14 | ntItems = length(tItems); 15 | if (length(tItems) > 0 && (length(trItems) > 0)) %#ok 16 | count = count + 1; 17 | candidate = setdiff(1 : num_item, trItems); 18 | gsim = s(trItems, candidate); 19 | gsim_score = sum(gsim); 20 | [~, topk] = maxk(gsim_score, length(candidate)); 21 | rankedCandidate = candidate(topk); 22 | position = find(ismember(rankedCandidate, tItems)); 23 | u_pres = u_pres + 1 - (sum(position)/ntItems - (ntItems + 1)/2)/(num_item - length(trItems)); 24 | end 25 | end 26 | 27 | if count ~= 0 28 | pres = pres + u_pres/count; 29 | else 30 | nz_num_user = nz_num_user - 1; 31 | end 32 | end 33 | pres = pres/nz_num_user; 34 | end -------------------------------------------------------------------------------- /metrics/eval_recall.m: -------------------------------------------------------------------------------- 1 | function recall = eval_recall(per_sim, trainCluster, testCluster, num_user, num_item) 2 | recall = 0; 3 | for u = 1 : num_user 4 | s = per_sim{u}; 5 | trCluster = trainCluster{u}; 6 | tCluster = testCluster{u}; 7 | 8 | u_recall = 0; 9 | count = 0; 10 | for gid = 1 : length(trCluster) 11 | trItems = trCluster{gid}; 12 | tItems = tCluster{gid}; 13 | %disp(length(tItems)) 14 | %disp(length(trItems)) 15 | if (length(tItems) > 0 && (length(trItems) > 0)) %#ok 16 | count = count + 1; 17 | candidate = setdiff(1 : num_item, trItems); 18 | gsim = s(trItems, candidate); 19 | gsim_score = sum(gsim); 20 | [~, topk] = maxk(gsim_score, length(tItems)); 21 | u_recall = u_recall + length(intersect(tItems, candidate(topk)))/length(tItems); 22 | end 23 | end 24 | 25 | recall = recall + u_recall/count; 26 | end 27 | recall = recall/num_user; 28 | end -------------------------------------------------------------------------------- /mp_simrank.m: -------------------------------------------------------------------------------- 1 | addpath(genpath(['MinMaxSelection'])); 2 | addpath(genpath(['utils'])); 3 | addpath(genpath(['metrics'])); 4 | 5 | %% create neighbor (label: 1) and non-neighbor (label: -1)graphs from pairs of objects 6 | dataset = 'pcc'; 7 | split_ratios = [0.7]; 8 | num_sample = 10; 9 | 10 | %for dataset = data_names 11 | disp(strcat('--- dataset = ', dataset , '-----')) 12 | for split_ratio = split_ratios 13 | disp(strcat('--- split-ratio = ', num2str(split_ratio) , '-----')) 14 | %path_to_your_data 15 | dataPath = strcat('data/', dataset, '/', num2str(split_ratio), '/'); 16 | 17 | maxIter = 30; 18 | C_1 = 1; 19 | C_2 = 0.85; 20 | disp('persim: 1 - fro_norm') 21 | for k = 1 : num_sample 22 | %% ----- load the adj matrix and test data ----- 23 | load(strcat(dataPath, 'sample_', num2str(k))); 24 | 25 | %% Step I: ----- initialization ----- 26 | simUser = eye(num_user); 27 | perSim = cell(num_user, 1); 28 | for u = 1 : num_user 29 | perSim{u} = eye(num_item); 30 | end 31 | 32 | %% STEP II: ---- personalized simrank --- 33 | for t = 1 : maxIter 34 | 35 | perSim_t = perSim; 36 | simUser_t = simUser; 37 | 38 | % -- update the personalized similarities -- 39 | for u = 1 : num_user 40 | S = zeros(num_item); 41 | 42 | for u_ = 1 : num_user 43 | %W - adj matrix of u' 44 | W = full(trainAdj{u_}); 45 | W = W - diag(diag(W)) + eye(num_item); 46 | 47 | % normalized column Adj matrix W 48 | W = norm_by_col(W); 49 | 50 | % update equation 51 | S = S + C_1/num_user * simUser_t(u, u_) * C_2 * W' * perSim_t{u_} * W; 52 | end 53 | 54 | % -- diagonal elements are 1s -- 55 | S = S - diag(diag(S)) + eye(num_item); 56 | S = S .* trainAdjZero{u} + full(trainAdjOne{u}) .* full(trainAdj{u}); 57 | 58 | % -- update the similarity matrix - 59 | perSim{u} = S; 60 | end 61 | %% --- convergence analysis 62 | D = 0; 63 | for u = 1:num_user 64 | D = D + norm(perSim{u} - perSim_t{u}, 'fro')/(num_item * num_user); 65 | end 66 | disp(D) 67 | 68 | %% --- update the user-user similarity matrix --- 69 | for u = 1 : (num_user - 1) 70 | for u_ = (u + 1) : num_user 71 | froNorm = norm(perSim{u} - perSim{u_}, 'fro')/num_item; 72 | 73 | simUser(u, u_) = 1 - froNorm; 74 | simUser(u_, u) = 1 - froNorm; 75 | end 76 | end 77 | end 78 | 79 | 80 | %% --- Evaluation --- 81 | recall = eval_recall(perSim, trainCluster, testCluster, num_user, num_item); 82 | pres = eval_pres(perSim, trainCluster, testCluster, num_user, num_item); 83 | disp([recall, pres]) 84 | 85 | end 86 | end 87 | %end 88 | -------------------------------------------------------------------------------- /pipelined_simrank.m: -------------------------------------------------------------------------------- 1 | addpath(genpath(['MinMaxSelection'])); 2 | addpath(genpath(['utils'])); 3 | addpath(genpath(['metrics'])); 4 | 5 | 6 | %% create neighbor (label: 1) and non-neighbor (label: -1)graphs from pairs of objects 7 | dataset = 'pcc'; 8 | split_ratios = [0.7]; 9 | num_sample = 10; 10 | 11 | disp('--- pipelined_simrank ---') 12 | %for dataset = data_names 13 | disp(strcat('--- dataset = ', dataset , '-----')) 14 | for split_ratio = split_ratios 15 | disp(strcat('--- split-ratio = ', num2str(split_ratio) , '-----')) 16 | dataPath = strcat('data/', dataset, '/', num2str(split_ratio), '/'); 17 | 18 | maxIter = 30; 19 | C = 1; 20 | C_2 = 0.85; 21 | 22 | for k = 1:num_sample 23 | %% ----- load the adj matrix and test data ----- 24 | load(strcat(dataPath, 'sample_', num2str(k))); 25 | num_pair = size(user_pairObj, 2); 26 | 27 | %% ----- initialization ------ 28 | perSim = cell(num_user, 1); 29 | for u = 1 : num_user 30 | perSim{u} = eye(num_item); 31 | end 32 | 33 | simUser = eye(num_user); 34 | simPair = eye(num_pair); 35 | 36 | %% --- user similarity with bipartie simrank --- 37 | for iter = 1 : maxIter 38 | simUser_t = simUser; 39 | simPair_t = simPair; 40 | 41 | W = full(user_pairObj); 42 | cnorm_W = norm_by_col(W); 43 | rnorm_W = norm_by_col(W'); 44 | 45 | simUser = C * rnorm_W' * simPair_t * rnorm_W; 46 | simPair = C * cnorm_W' * simUser_t * cnorm_W; 47 | 48 | simUser = simUser - diag(diag(simUser)) + eye(num_user); 49 | simPair = simPair - diag(diag(simPair)) + eye(num_pair); 50 | end 51 | simUser = simUser - diag(diag(simUser)) + eye(num_user); 52 | 53 | %% --- similarity propagation ---- 54 | for iter = 1 : maxIter 55 | perSim_t = perSim; 56 | for u = 1 : num_user 57 | S = zeros(num_item); 58 | 59 | for u_ = 1 : num_user 60 | % W - adj matrix of u' 61 | W_2 = full(trainAdj{u_}); 62 | W_2 = W_2 - diag(diag(W_2)) + eye(num_item); 63 | W_2 = norm_by_col(W_2); 64 | 65 | % update equation 66 | S = S + 1/num_user * simUser(u, u_) * C_2 * W_2' * perSim_t{u_} * W_2; 67 | end 68 | 69 | % -- diagonal elements are 1s -- 70 | S = S - diag(diag(S)) + eye(num_item); 71 | S = S .* trainAdjZero{u} + full(trainAdjOne{u}) .* full(trainAdj{u}); 72 | 73 | % -- update the similarity matrix - 74 | perSim{u} = S; 75 | end 76 | 77 | D = 0; 78 | for u = 1:num_user 79 | D = D + norm(perSim{u} - perSim_t{u}, 'fro')/(num_item * num_user); 80 | end 81 | disp(D) 82 | end 83 | 84 | 85 | %% ---- evaluation ---- 86 | recall = eval_recall(perSim, trainCluster, testCluster, num_user, num_item); 87 | pres = eval_pres(perSim, trainCluster, testCluster, num_user, num_item); 88 | disp([recall, pres]); 89 | end 90 | end 91 | %end -------------------------------------------------------------------------------- /utils/kmedioids.m: -------------------------------------------------------------------------------- 1 | function [inds,cidx] = kmedioids(D,k) 2 | % [inds,cidx] = kmedioids(D,k) 3 | % 4 | % Performs k-mediods clustering; only requires a distance matrix D and 5 | % number of clusters k. Finds cluster assignments "inds" to minimize the 6 | % following cost function: 7 | 8 | % sum(D(inds==i,inds==i),2), summed over i=1:k 9 | 10 | % Determining cluster assignments and cluster centers are both done in an 11 | % efficient, vectorized way. Cluster assignment is O(nk) and cluster 12 | % centering is O(k*(max cluster size)^2) 13 | % 14 | % INPUTS 15 | % D: nxn all-pairs distance matrix 16 | % k: number of clusters 17 | % 18 | % OUTPUTS 19 | % inds: nx1 vector of assignments of each sample to a cluster id 20 | % cidx: kx1 vector of sample indices which make up the cluster centers 21 | % 22 | % DEMO 23 | % Run with no arguments for demo with 2d points sampled from 3 gaussians, 24 | % using the gmdistribution function from the stats toolbox 25 | 26 | % Written by Ben Sapp, September 2010 27 | % benjamin.sapp@gmail.com 28 | 29 | if nargin == 0 30 | demo(); 31 | return; 32 | end 33 | 34 | n = size(D,1); 35 | 36 | % randomly assign centers: 37 | cidx = randperm(n); 38 | cidx = sort(cidx(1:k)); 39 | 40 | iter = 0; 41 | while 1 42 | inds = assign_pts_to_clusters(D,cidx); 43 | [cidx,energy_next] = update_centers(D,inds,k); 44 | 45 | if iter>0 && energy_next == energy 46 | break; 47 | end 48 | energy = energy_next; 49 | 50 | % fprintf('iter: %04d, energy: %.02f\n',iter,energy) 51 | iter = iter+1; 52 | end 53 | 54 | function inds = assign_pts_to_clusters(D,cidx) 55 | S = D(cidx,:); 56 | [vals,inds] = min(S,[],1); 57 | 58 | function [cidx,energy] = update_centers(D,inds,k,pts) 59 | energy = nan(k,1); 60 | for i=1:k 61 | indsi = find(inds==i); 62 | [energy(i),minind] = min(sum(D(indsi,indsi),2)); 63 | cidx(i) = indsi(minind); 64 | end 65 | energy = sum(energy); 66 | 67 | function demo() 68 | % problem params 69 | k = 3; 70 | n = 2000; 71 | MU = [1 2;-1 -2; 3 0]; 72 | SIGMA = cat(3,[2 0;0 .5],[1 0;0 1], eye(2)); 73 | p = ones(1,3)/3; 74 | obj = gmdistribution(MU,SIGMA,p); 75 | pts = random(obj,n)'; 76 | 77 | %form all-pairs distance matrix in an efficient way 78 | X = pts'; 79 | temp = sum(X.^2,2); 80 | X=sqrt(2)*X; 81 | D=-X*X'; 82 | D=bsxfun(@plus,D,temp); 83 | D=bsxfun(@plus,D,temp'); 84 | 85 | %run kmedioids 86 | [inds,cidx] = kmedioids(D,k); 87 | 88 | %display 89 | clf, hold on, axis square 90 | c = lines(k); 91 | for i=1:k 92 | ptsi = pts(:,inds==i); 93 | ctrpt = pts(:,cidx(i)); 94 | plot(ptsi(1,:),ptsi(2,:),'.','color',c(i,:)) 95 | plot(ctrpt(1),ctrpt(2),'kx','markersize',22,'linewidth',6) 96 | end 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /utils/norm_by_col.m: -------------------------------------------------------------------------------- 1 | function [W_norm] = norm_by_col(W) 2 | % Functon normalizes input adjacency matrix by columns 3 | % Input 4 | % W - [n,n] - adjacency matrix 5 | % 6 | % Output 7 | % W_norm - [n,n] - adjacency matrix normalized by columns 8 | % 9 | % Author: Aleksandr Katrutsa 10 | % E-mail: aleksandr.katrutsa@phystech.edu 11 | % Date: 20.11.2014 12 | 13 | W_norm = W; 14 | col_sum = sum(W); 15 | idx_nonzero_sum = find(col_sum); 16 | for i = idx_nonzero_sum 17 | W_norm(:, i) = W(:, i) / col_sum(1, i); 18 | end 19 | end 20 | --------------------------------------------------------------------------------