├── Exp-Amazon ├── .DS_Store ├── Largest_Experiments.jl ├── Medium_Experiments.jl ├── Output │ ├── Large_200_10000_1.0.mat │ ├── Medium_50_2000_1.0.mat │ ├── Medium_50_3000_1.0.mat │ ├── Smallest_10_200_0.001.mat │ ├── Smallest_10_200_0.01.mat │ ├── Smallest_10_200_0.1.mat │ ├── Smallest_10_200_1.0.mat │ ├── Smallest_10_200_10.0.mat │ ├── Smallest_10_300_0.001.mat │ ├── Smallest_10_300_0.01.mat │ ├── Smallest_10_300_0.1.mat │ ├── Smallest_10_300_1.0.mat │ ├── Smallest_10_300_10.0.mat │ ├── Smallest_10_500_0.001.mat │ ├── Smallest_10_500_0.01.mat │ ├── Smallest_10_500_0.1.mat │ ├── Smallest_10_500_1.0.mat │ ├── Smallest_10_500_10.0.mat │ ├── Smallest_5_200_0.001.mat │ ├── Smallest_5_200_0.01.mat │ ├── Smallest_5_200_0.1.mat │ ├── Smallest_5_200_1.0.mat │ ├── Smallest_5_200_10.0.mat │ ├── Smallest_5_300_0.001.mat │ ├── Smallest_5_300_0.01.mat │ ├── Smallest_5_300_0.1.mat │ ├── Smallest_5_300_1.0.mat │ ├── Smallest_5_300_10.0.mat │ ├── Smallest_5_500_0.001.mat │ ├── Smallest_5_500_0.01.mat │ ├── Smallest_5_500_0.1.mat │ ├── Smallest_5_500_1.0.mat │ └── Smallest_5_500_10.0.mat ├── Output_VaryDelta │ ├── .DS_Store │ ├── Label_12_10_200.mat │ ├── Label_15_200_10000.mat │ ├── Label_17_50_2000.mat │ ├── Label_18_50_2000.mat │ ├── Label_1_10_200.mat │ ├── Label_24_200_10000.mat │ ├── Label_25_200_10000.mat │ ├── Label_2_10_200.mat │ └── Label_3_10_200.mat ├── Output_VaryEps │ ├── .DS_Store │ ├── Label_12_10_300.mat │ ├── Label_18_10_300.mat │ ├── Label_1_10_300.mat │ ├── Label_2_10_300.mat │ └── Label_3_10_300.mat ├── Plots │ ├── .DS_Store │ ├── AmazonVaryDelta.pdf │ └── Smallest_VaryEps_seednum_10_grownum_300.pdf ├── Plots_Vary_Delta.jl ├── Plots_Vary_Epsilon.jl ├── Print_to_Table_F1_T_run.jl ├── Run_Experiments.jl ├── Smallest_Experiments.jl ├── VaryDelta_Experimentsl.jl └── VaryEpsilon_Experiments.jl ├── Exp-Stackoverflow ├── .DS_Store ├── Clique_Expansion_Stack.jl ├── Output_Stack │ └── Set45_1.0_5000.0.mat ├── Output_VaryDelta │ ├── .DS_Store │ ├── Cluster_22943_10000_100_1.0.mat │ ├── Cluster_25849_10000_100_1.0.mat │ ├── Cluster_27596_10000_100_1.0.mat │ ├── Cluster_28886_10000_100_1.0.mat │ ├── Cluster_28918_10000_100_1.0.mat │ ├── Cluster_29386_10000_100_1.0.mat │ ├── Cluster_43507_10000_100_1.0.mat │ └── Cluster_5713_10000_100_1.0.mat ├── Plots │ ├── .DS_Store │ └── StackDots_Wide.pdf ├── Plots_Stack_Experiments.jl ├── Plots_VaryDeltaStack.jl ├── Stack_LargeDelta_Experiments.jl └── Stack_LargeDelta_Table.jl ├── LICENSE ├── README.txt ├── data └── Amazon-Clusters.txt ├── include └── FlowSeed.jl └── src ├── .DS_Store ├── Helper_Functions.jl ├── HyperLocal.jl └── maxflow.jl /Exp-Amazon/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/.DS_Store -------------------------------------------------------------------------------- /Exp-Amazon/Largest_Experiments.jl: -------------------------------------------------------------------------------- 1 | labels = [15; 24] 2 | lnum = length(labels) 3 | 4 | # See outer parameters 5 | delta = 1.0 # all-or-nothing cut 6 | seednum = 200 7 | ntimes = 5 8 | epsis = [1.0] 9 | grownum = 10000 10 | 11 | for e = 1:length(epsis) 12 | 13 | # Output from HyperLocal 14 | hl_pr = zeros(lnum,ntimes) 15 | hl_re = zeros(lnum,ntimes) 16 | hl_f1 = zeros(lnum,ntimes) 17 | hl_time = zeros(lnum,ntimes) 18 | hl_size = zeros(lnum,ntimes) 19 | newS = zeros(lnum,ntimes) 20 | hl_cond = zeros(lnum,ntimes) 21 | 22 | # Output from first baseline 1 23 | b1_pr = zeros(lnum,ntimes) 24 | b1_re = zeros(lnum,ntimes) 25 | b1_f1 = zeros(lnum,ntimes) 26 | b1_cond = zeros(lnum,ntimes) 27 | 28 | # Output from baseline 2 29 | b2_pr = zeros(lnum,ntimes) 30 | b2_re = zeros(lnum,ntimes) 31 | b2_f1 = zeros(lnum,ntimes) 32 | b2_cond = zeros(lnum,ntimes) 33 | 34 | # Keep track of R 35 | r_pr = zeros(lnum,ntimes) 36 | r_re = zeros(lnum,ntimes) 37 | r_f1 = zeros(lnum,ntimes) 38 | r_cond = zeros(lnum,ntimes) 39 | 40 | # For each epsilon we store a different matrix of outputs 41 | epsilon = epsis[e] 42 | outputmat = "Output/Large_$seednum"*"_$grownum"*"_$epsilon.mat" 43 | println(outputmat) 44 | 45 | # For a fixed epsilon, seednum, and grownum, run experiments 46 | # on each cluster multiple times 47 | for lab = 1:length(labels) 48 | label = labels[lab] 49 | T = findall(x->x ==label,NodeLabels) 50 | nT = length(T) 51 | 52 | for index = 1:ntimes 53 | 54 | # Generate a new seed set 55 | p = randperm(nT) 56 | Rstart = T[p[1:seednum]] 57 | OneHop = get_immediate_neighbors(H,Ht,Rstart) 58 | Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum) 59 | R = union(Rmore,Rstart) 60 | Rs = findall(x->in(x,Rstart),R) # Force seed nodes to be in output set 61 | prr, rer, f1r = PRF(T,R) 62 | r_pr[index] = prr 63 | r_re[index] = rer 64 | r_f1[index] = f1r 65 | condR, volR, cutR = tl_cond(H,R,d,1.0,volA,order) 66 | r_cond[index] = condR 67 | 68 | # Run HyperLocal 69 | s = time() 70 | S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true) 71 | hl_time[lab,index] = time()-s 72 | condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order) 73 | pr, re, f1 = PRF(T,S) 74 | hl_pr[lab,index] = pr 75 | hl_re[lab,index] = re 76 | hl_f1[lab,index] = f1 77 | hl_size[lab,index] = length(S) 78 | hl_cond[lab,index] = condS 79 | nS = length(setdiff(S,R)) 80 | newS[lab,index] = nS 81 | 82 | # First baseline 83 | kS = nT-length(Rstart) 84 | B1 = BestNeighbors(H,d,Rstart,OneHop,kS) 85 | pr1, re1, f11 = PRF(T,B1) 86 | b1_pr[lab,index] = pr1 87 | b1_re[lab,index] = re1 88 | b1_f1[lab,index] = f11 89 | cond, vol, cut = tl_cond(H,B1,d,1.0,volA,order) 90 | b1_cond[index] = cond 91 | 92 | 93 | # Baseline 2 94 | B2 = TopNeighbors(H,Rstart,OneHop,kS) 95 | pr2, re2, f12 = PRF(T,B2) 96 | b2_pr[lab,index] = pr2 97 | b2_re[lab,index] = re2 98 | b2_f1[lab,index] = f12 99 | cond, vol, cut = tl_cond(H,B2,d,1.0,volA,order) 100 | b2_cond[index] = cond 101 | 102 | println("$label ($nT): $f11 \t $f12 \t $f1 \t $nS") 103 | end 104 | 105 | matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time, 106 | "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond, 107 | "b1_pr"=>b1_pr, "b1_re"=>b1_re, "b1_f1"=>b1_f1,"b1_cond"=>b1_cond,"b2_cond"=>b2_cond, 108 | "r_pr"=>r_pr, "r_re"=>r_re, "r_f1"=>r_f1, "r_cond"=>r_cond, 109 | "b2_pr"=>b2_pr, "b2_re"=>b2_re, "b2_f1"=>b2_f1)) 110 | 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /Exp-Amazon/Medium_Experiments.jl: -------------------------------------------------------------------------------- 1 | labels = [17; 25] 2 | lnum = length(labels) 3 | 4 | # See outer parameters 5 | seednum = 50 6 | ntimes = 5 7 | epsis = [1.0] 8 | delta = 1.0 # stick with the all-or-nothing cut 9 | 10 | for grownum = [2000 3000] 11 | 12 | for e = 1:length(epsis) 13 | 14 | # Output from HyperLocal 15 | hl_pr = zeros(lnum,ntimes) 16 | hl_re = zeros(lnum,ntimes) 17 | hl_f1 = zeros(lnum,ntimes) 18 | hl_time = zeros(lnum,ntimes) 19 | hl_size = zeros(lnum,ntimes) 20 | newS = zeros(lnum,ntimes) 21 | hl_cond = zeros(lnum,ntimes) 22 | 23 | # Output from first baseline 1 24 | b1_pr = zeros(lnum,ntimes) 25 | b1_re = zeros(lnum,ntimes) 26 | b1_f1 = zeros(lnum,ntimes) 27 | b1_cond = zeros(lnum,ntimes) 28 | 29 | # Output from baseline 2 30 | b2_pr = zeros(lnum,ntimes) 31 | b2_re = zeros(lnum,ntimes) 32 | b2_f1 = zeros(lnum,ntimes) 33 | b2_cond = zeros(lnum,ntimes) 34 | 35 | # Keep track of R 36 | r_pr = zeros(ntimes) 37 | r_re = zeros(ntimes) 38 | r_f1 = zeros(ntimes) 39 | r_cond = zeros(ntimes) 40 | 41 | # For each epsilon we store a different matrix of outputs 42 | epsilon = epsis[e] 43 | outputmat = "Output/Medium_$seednum"*"_$grownum"*"_$epsilon.mat" 44 | println(outputmat) 45 | 46 | # For a fixed epsilon, seednum, and grownum, run experiments 47 | # on each cluster multiple times 48 | for lab = 1:length(labels) 49 | label = labels[lab] 50 | T = findall(x->x ==label,NodeLabels) 51 | nT = length(T) 52 | 53 | for index = 1:ntimes 54 | 55 | # Generate a new seed set 56 | p = randperm(nT) 57 | Rstart = T[p[1:seednum]] 58 | OneHop = get_immediate_neighbors(H,Ht,Rstart) 59 | Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum) 60 | R = union(Rmore,Rstart) 61 | Rs = findall(x->in(x,Rstart),R) # Force seed nodes to be in output set 62 | prr, rer, f1r = PRF(T,R) 63 | r_pr[index] = prr 64 | r_re[index] = rer 65 | r_f1[index] = f1r 66 | condR, volR, cutR = tl_cond(H,R,d,1.0,volA,order) 67 | r_cond[index] = condR 68 | 69 | # Run HyperLocal 70 | s = time() 71 | S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true) 72 | hl_time[lab,index] = time()-s 73 | condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order) 74 | pr, re, f1 = PRF(T,S) 75 | hl_pr[lab,index] = pr 76 | hl_re[lab,index] = re 77 | hl_f1[lab,index] = f1 78 | hl_size[lab,index] = length(S) 79 | hl_cond[lab,index] = condS 80 | nS = length(setdiff(S,R)) 81 | newS[lab,index] = nS 82 | 83 | # First baseline 84 | kS = nT-length(Rstart) 85 | B1 = BestNeighbors(H,d,Rstart,OneHop,kS) 86 | pr1, re1, f11 = PRF(T,B1) 87 | b1_pr[lab,index] = pr1 88 | b1_re[lab,index] = re1 89 | b1_f1[lab,index] = f11 90 | cond, vol, cut = tl_cond(H,B1,d,1.0,volA,order) 91 | b1_cond[index] = cond 92 | 93 | # Baseline 2 94 | B2 = TopNeighbors(H,Rstart,OneHop,kS) 95 | pr2, re2, f12 = PRF(T,B2) 96 | b2_pr[lab,index] = pr2 97 | b2_re[lab,index] = re2 98 | b2_f1[lab,index] = f12 99 | cond, vol, cut = tl_cond(H,B2,d,1.0,volA,order) 100 | b2_cond[index] = cond 101 | 102 | println("$label ($nT): $f11 \t $f12 \t $f1 \t $nS") 103 | end 104 | 105 | matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time, 106 | "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond, 107 | "b1_pr"=>b1_pr, "b1_re"=>b1_re, "b1_f1"=>b1_f1,"b1_cond"=>b1_cond,"b2_cond"=>b2_cond, 108 | "r_pr"=>r_pr, "r_re"=>r_re, "r_f1"=>r_f1, "r_cond"=>r_cond, 109 | "b2_pr"=>b2_pr, "b2_re"=>b2_re, "b2_f1"=>b2_f1)) 110 | 111 | end 112 | end 113 | 114 | end 115 | -------------------------------------------------------------------------------- /Exp-Amazon/Output/Large_200_10000_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Large_200_10000_1.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Medium_50_2000_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Medium_50_2000_1.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Medium_50_3000_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Medium_50_3000_1.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_200_0.001.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_200_0.001.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_200_0.01.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_200_0.01.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_200_0.1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_200_0.1.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_200_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_200_1.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_200_10.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_200_10.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_300_0.001.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_300_0.001.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_300_0.01.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_300_0.01.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_300_0.1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_300_0.1.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_300_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_300_1.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_300_10.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_300_10.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_500_0.001.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_500_0.001.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_500_0.01.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_500_0.01.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_500_0.1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_500_0.1.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_500_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_500_1.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_10_500_10.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_500_10.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_200_0.001.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_200_0.001.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_200_0.01.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_200_0.01.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_200_0.1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_200_0.1.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_200_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_200_1.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_200_10.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_200_10.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_300_0.001.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_300_0.001.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_300_0.01.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_300_0.01.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_300_0.1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_300_0.1.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_300_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_300_1.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_300_10.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_300_10.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_500_0.001.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_500_0.001.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_500_0.01.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_500_0.01.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_500_0.1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_500_0.1.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_500_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_500_1.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output/Smallest_5_500_10.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_500_10.0.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryDelta/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/.DS_Store -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryDelta/Label_12_10_200.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_12_10_200.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryDelta/Label_15_200_10000.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_15_200_10000.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryDelta/Label_17_50_2000.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_17_50_2000.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryDelta/Label_18_50_2000.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_18_50_2000.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryDelta/Label_1_10_200.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_1_10_200.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryDelta/Label_24_200_10000.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_24_200_10000.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryDelta/Label_25_200_10000.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_25_200_10000.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryDelta/Label_2_10_200.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_2_10_200.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryDelta/Label_3_10_200.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_3_10_200.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryEps/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/.DS_Store -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryEps/Label_12_10_300.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/Label_12_10_300.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryEps/Label_18_10_300.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/Label_18_10_300.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryEps/Label_1_10_300.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/Label_1_10_300.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryEps/Label_2_10_300.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/Label_2_10_300.mat -------------------------------------------------------------------------------- /Exp-Amazon/Output_VaryEps/Label_3_10_300.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/Label_3_10_300.mat -------------------------------------------------------------------------------- /Exp-Amazon/Plots/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Plots/.DS_Store -------------------------------------------------------------------------------- /Exp-Amazon/Plots/AmazonVaryDelta.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Plots/AmazonVaryDelta.pdf -------------------------------------------------------------------------------- /Exp-Amazon/Plots/Smallest_VaryEps_seednum_10_grownum_300.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Plots/Smallest_VaryEps_seednum_10_grownum_300.pdf -------------------------------------------------------------------------------- /Exp-Amazon/Plots_Vary_Delta.jl: -------------------------------------------------------------------------------- 1 | using MAT 2 | using Random 3 | using Plots 4 | labels = [1; 2; 3; 12; 18; 17; 25; 15; 24] 5 | 6 | plot() 7 | s1 = 300 8 | s2 = 250 9 | ms = 4 10 | using LaTeXStrings 11 | x_label = L"\delta" 12 | y_label = "F1 Scores" 13 | deltas = 10 .^LinRange(0,3,10) 14 | for lab = 1:7 15 | label = labels[lab] 16 | 17 | if lab < 5 18 | grownum = 200 19 | seednum = 10 20 | color = :blue 21 | elseif lab < 7 22 | grownum = 2000 23 | seednum = 50 24 | color = :red 25 | else 26 | grownum = 10000 27 | seednum = 200 28 | color = :green 29 | end 30 | 31 | outputmat = "Output_VaryDelta/Label_$label"*"_$seednum"*"_$grownum.mat" 32 | mat = matread(outputmat) 33 | hl = mat["hl_f1"] 34 | plot!(deltas, hl, grid = false, #label = LabelNames[label], 35 | markerstrokewidth = 0, markershape = :circle, linewidth = 2, xaxis = :log10, 36 | size = (s1,s2), markersize = ms,xguidefont=font(18), legend = false, 37 | xlabel = x_label, ylabel = y_label, legendfont=font(7)) 38 | #color = color) 39 | 40 | end 41 | 42 | savefig("Plots/AmazonVaryDelta.pdf") 43 | -------------------------------------------------------------------------------- /Exp-Amazon/Plots_Vary_Epsilon.jl: -------------------------------------------------------------------------------- 1 | using MAT 2 | 3 | ## Colors 4 | C = [1 0 0; 5 | 0 .75 0; 6 | 0 0 1; 7 | 0 .5 .5; 8 | .5 0 .5; 9 | 1 .5 0; 10 | 0 0 0; 11 | .75 .75 0; 12 | 0.1 0.5 0.6; 13 | 0 .75 .75; 14 | .4 .5 .3] 15 | 16 | using Plots 17 | # Must be kept in this order, unless you are careful what you do 18 | labels = [1; 2; 3; 12; 18; 17; 25; 15; 24] 19 | names = ["Amazon Fashion", "All Beauty", "Appliances", 20 | "Gift Cards", "Magazine Subsciptions", "Luxury Beauty", "Software", 21 | "Industrial and Scientific","Prime Pantry" ] 22 | sizes = [31; 85; 48; 148; 157; 1581; 802; 5334; 4970] 23 | lnum = length(labels) 24 | 25 | # See outer parameters 26 | seednum = 10 # 5 27 | grownum = 300 #[200; 300; 500] 28 | epsis = [10.0 1.0 0.1 0.01 0.001] 29 | 30 | plot() 31 | for lab = [1;3;4;5] # For each cluster 32 | label = labels[lab] 33 | outputmat = "Output_VaryEps/Label_$label"*"_$seednum"*"_$grownum.mat" 34 | 35 | mat = matread(outputmat) 36 | hl_time = mean(mat["hl_time"],dims = 2) 37 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2) 38 | b1 = round.(mat["b1_f1"],digits = 2) 39 | b2 = round.(mat["b2_f1"],digits = 2) 40 | r = round.(mat["r_f1"],digits = 2) 41 | re = round.(mat["r_re"],digits = 2) 42 | pr = round.(mat["r_pr"],digits = 2) 43 | nS = mat["newS"] 44 | 45 | lw = 3 46 | plot!(epsis',hl,linewidth = lw, grid = false, markershape =:circle, 47 | legend = :bottomleft, color = RGBA(C[lab,1],C[lab,2],C[lab,3],1), 48 | xaxis=:log10, label = "label $(label)") 49 | 50 | end 51 | savefig("Plots/Smallest_VaryEps_seednum_$seednum"*"_grownum_$grownum.pdf") 52 | -------------------------------------------------------------------------------- /Exp-Amazon/Print_to_Table_F1_T_run.jl: -------------------------------------------------------------------------------- 1 | using MAT 2 | using Statistics 3 | 4 | # Labels for Amazon datasets 5 | labels = [1; 2; 3; 12; 18; 17; 25; 15; 24] 6 | names = ["Amazon Fashion", "All Beauty", "Appliances", 7 | "Gift Cards", "Magazine Subscriptions", "Luxury Beauty", "Software", 8 | "Industrial and Scientific","Prime Pantry" ] 9 | sizes = [31; 85; 48; 148; 157; 1581; 802; 5334; 4970] 10 | lnum = length(labels) 11 | 12 | # See outer parameters 13 | epsilon = 1.0 14 | s1 = 10 15 | s2 = 50 16 | s3 = 200 17 | g1 = 200 18 | g2 = 2000 19 | g3 = 10000 20 | outputmat = "Output/Smallest_$s1"*"_$g1"*"_$epsilon.mat" 21 | 22 | println("\nSeeds = ($s1, $s2, $s3), Grow by ($g1, $g2, $g3), Epsilon = $epsilon") 23 | mat = matread(outputmat) 24 | hl_time = round.(mean(mat["hl_time"],dims = 2),digits = 1) 25 | hl_size= round.(mean(mat["hl_size"],dims = 2),digits = 1) 26 | r = round.(mean(mat["r_f1"],dims = 2),digits = 2) 27 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2) 28 | b1 = round.(mean(mat["b1_f1"],dims = 2),digits = 2) 29 | b2 = round.(mean(mat["b2_f1"],dims = 2),digits = 2) 30 | nS = round.(mean(mat["newS"],dims = 2),digits = 2) 31 | 32 | for i = 1:size(hl_time,1) 33 | println(names[i]*" & $(sizes[i]) & $(hl_time[i])& $(hl[i]) & $(b1[i]) & $(b2[i]) & $(r[i]) \\\\") 34 | end 35 | 36 | ## Get medium clusters 37 | outputmat = "Output/Medium_$s2"*"_$g2"*"_$epsilon.mat" 38 | mat = matread(outputmat) 39 | hl_time = round.(mean(mat["hl_time"],dims = 2),digits = 1) 40 | hl_size= round.(mean(mat["hl_size"],dims = 2),digits = 1) 41 | nS = round.(mean(mat["newS"],dims = 2),digits = 2) 42 | r = round.(mean(mat["r_f1"],dims = 2),digits = 2) 43 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2) 44 | b1 = round.(mean(mat["b1_f1"],dims = 2),digits = 2) 45 | b2 = round.(mean(mat["b2_f1"],dims = 2),digits = 2) 46 | 47 | for i = 1:size(hl_time,1) 48 | println(names[i+5]*" & $(sizes[i+5]) & $(hl_time[i])& $(hl[i]) & $(b1[i]) & $(b2[i]) & $(r[i]) \\\\") 49 | end 50 | 51 | 52 | ## Get large clusters 53 | outputmat = "Output/Large_$s3"*"_$g3"*"_$epsilon.mat" 54 | mat = matread(outputmat) 55 | hl_time = round.(mean(mat["hl_time"],dims = 2),digits = 1) 56 | hl_size= round.(Int64,mean(mat["hl_size"],dims = 1)) 57 | r = round.(mean(mat["r_f1"],dims = 2),digits = 2) 58 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2) 59 | b1 = round.(mean(mat["b1_f1"],dims = 2),digits = 2) 60 | b2 = round.(mean(mat["b2_f1"],dims = 2),digits = 2) 61 | nS = round.(mean(mat["newS"],dims = 2),digits = 2) 62 | 63 | for i = 1:size(hl_time,1) 64 | println(names[i+7]*" & $(sizes[i+7]) & $(hl_time[i]) & $(hl[i]) & $(b1[i]) & $(b2[i]) & $(r[i]) \\\\") 65 | end 66 | -------------------------------------------------------------------------------- /Exp-Amazon/Run_Experiments.jl: -------------------------------------------------------------------------------- 1 | using MAT 2 | using Random 3 | include("../src/HyperLocal.jl") 4 | 5 | ## Read in the matrix 6 | s = time() 7 | @time M = matread("../data/AmazonReview5core_H.mat") 8 | NodeLabels = vec(M["NodeLabels"]) 9 | NodeNames = M["NodeNames"] 10 | LabelNames = M["LabelNames"] 11 | H = M["H"] 12 | order = round.(Int64,vec(sum(H,dims=2))) 13 | 14 | ## Extract hyperedges with only one node 15 | e = findall(x->x>1,order) 16 | H = H[e,:] 17 | d = vec(sum(H,dims=1)) 18 | order = round.(Int64,vec(sum(H,dims=2))) 19 | volA = sum(d) 20 | m,n = size(H) 21 | Ht = sparse(H') 22 | toc = time()-s 23 | println("Done loading things into memory in $toc seconds.") 24 | 25 | ## Run several sets of experiments 26 | include("Smallest_Experiments.jl") 27 | include("Medium_Experiments.jl") 28 | include("Largest_Experiments.jl") 29 | -------------------------------------------------------------------------------- /Exp-Amazon/Smallest_Experiments.jl: -------------------------------------------------------------------------------- 1 | labels = [1; 2; 3; 12; 18] 2 | lnum = length(labels) 3 | 4 | # See outer parameters 5 | seednum = 10 6 | ntimes = 5 7 | epsis = [1.0] 8 | delta = 1.0 # stick with the all-or-nothing cut 9 | 10 | for grownum = [200; 300] 11 | 12 | for e = 1:length(epsis) 13 | 14 | # Output from HyperLocal 15 | hl_pr = zeros(lnum,ntimes) 16 | hl_re = zeros(lnum,ntimes) 17 | hl_f1 = zeros(lnum,ntimes) 18 | hl_time = zeros(lnum,ntimes) 19 | hl_size = zeros(lnum,ntimes) 20 | newS = zeros(lnum,ntimes) 21 | hl_cond = zeros(lnum,ntimes) 22 | 23 | # Output from first baseline 1 24 | b1_pr = zeros(lnum,ntimes) 25 | b1_re = zeros(lnum,ntimes) 26 | b1_f1 = zeros(lnum,ntimes) 27 | b1_cond = zeros(lnum,ntimes) 28 | 29 | # Output from baseline 2 30 | b2_pr = zeros(lnum,ntimes) 31 | b2_re = zeros(lnum,ntimes) 32 | b2_f1 = zeros(lnum,ntimes) 33 | b2_cond = zeros(lnum,ntimes) 34 | 35 | # Keep track of R 36 | r_pr = zeros(ntimes) 37 | r_re = zeros(ntimes) 38 | r_f1 = zeros(ntimes) 39 | r_cond = zeros(ntimes) 40 | 41 | # For each epsilon we store a different .mat file of outputs 42 | epsilon = epsis[e] 43 | outputmat = "Output/Smallest_$seednum"*"_$grownum"*"_$epsilon.mat" 44 | println(outputmat) 45 | 46 | # For a fixed epsilon, seednum, and grownum, run experiments 47 | # on each cluster multiple times 48 | for lab = 1:length(labels) 49 | label = labels[lab] 50 | T = findall(x->x ==label,NodeLabels) 51 | nT = length(T) 52 | 53 | for index = 1:ntimes 54 | 55 | # Generate a new seed set 56 | p = randperm(nT) 57 | Rstart = T[p[1:seednum]] 58 | OneHop = get_immediate_neighbors(H,Ht,Rstart) 59 | Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum) 60 | R = union(Rmore,Rstart) 61 | Rs = findall(x->in(x,Rstart),R) # Force seed nodes to be in output set 62 | prr, rer, f1r = PRF(T,R) 63 | r_pr[index] = prr 64 | r_re[index] = rer 65 | r_f1[index] = f1r 66 | condR, volR, cutR = tl_cond(H,R,d,1.0,volA,order) 67 | r_cond[index] = condR 68 | 69 | # Run HyperLocal 70 | s = time() 71 | S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true) 72 | hl_time[lab,index] = time()-s 73 | condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order) 74 | pr, re, f1 = PRF(T,S) 75 | hl_pr[lab,index] = pr 76 | hl_re[lab,index] = re 77 | hl_f1[lab,index] = f1 78 | hl_size[lab,index] = length(S) 79 | hl_cond[lab,index] = condS 80 | nS = length(setdiff(S,R)) 81 | newS[lab,index] = nS 82 | 83 | # First baseline 84 | kS = nT-length(Rstart) 85 | B1 = BestNeighbors(H,d,Rstart,OneHop,kS) 86 | pr1, re1, f11 = PRF(T,B1) 87 | b1_pr[lab,index] = pr1 88 | b1_re[lab,index] = re1 89 | b1_f1[lab,index] = f11 90 | cond, vol, cut = tl_cond(H,B1,d,1.0,volA,order) 91 | b1_cond[index] = cond 92 | 93 | 94 | # Baseline 2 95 | B2 = TopNeighbors(H,Rstart,OneHop,kS) 96 | pr2, re2, f12 = PRF(T,B2) 97 | b2_pr[lab,index] = pr2 98 | b2_re[lab,index] = re2 99 | b2_f1[lab,index] = f12 100 | cond, vol, cut = tl_cond(H,B2,d,1.0,volA,order) 101 | b2_cond[index] = cond 102 | 103 | println("$label ($nT): $f11 \t $f12 \t $f1 \t $nS") 104 | end 105 | 106 | matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time, 107 | "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond, 108 | "b1_pr"=>b1_pr, "b1_re"=>b1_re, "b1_f1"=>b1_f1,"b1_cond"=>b1_cond,"b2_cond"=>b2_cond, 109 | "r_pr"=>r_pr, "r_re"=>r_re, "r_f1"=>r_f1, "r_cond"=>r_cond, 110 | "b2_pr"=>b2_pr, "b2_re"=>b2_re, "b2_f1"=>b2_f1)) 111 | 112 | end 113 | end 114 | 115 | end 116 | -------------------------------------------------------------------------------- /Exp-Amazon/VaryDelta_Experimentsl.jl: -------------------------------------------------------------------------------- 1 | using MAT 2 | using Random 3 | using Statistics 4 | include("../src/HyperLocal.jl") 5 | 6 | Add path to wherever matrix is store. Loading take a long time. 7 | s = time() 8 | @time M = matread("../data/AmazonReview5core_H.mat") 9 | NodeLabels = vec(M["NodeLabels"]) 10 | NodeNames = M["NodeNames"] 11 | LabelNames = M["LabelNames"] 12 | H = M["H"] 13 | order = round.(Int64,vec(sum(H,dims=2))) 14 | 15 | ## Extract hyperedges with only one node 16 | e = findall(x->x>1,order) 17 | H = H[e,:] 18 | d = vec(sum(H,dims=1)) 19 | order = round.(Int64,vec(sum(H,dims=2))) 20 | volA = sum(d) 21 | m,n = size(H) 22 | Ht = sparse(H') 23 | toc = time()-s 24 | println("Done loading things into memory in $toc seconds.") 25 | 26 | labels = [1; 2; 3; 12; 18; 17; 25; 15; 24] # Labels for small nodes 27 | lnum = length(labels) 28 | 29 | # See outer parameters 30 | seednum = 10 31 | ntimes = 1 32 | epsilon = 1.0 33 | grownum = 200 # How much to grow seed set by using BestNeighbors 34 | deltas = 10 .^LinRange(0,3,10) 35 | enum = length(deltas) 36 | 37 | for lab = 1:9 38 | label = labels[lab] 39 | T = findall(x->x ==label,NodeLabels) 40 | nT = length(T) 41 | 42 | # Different seed set sizes depending on dataset 43 | if lab < 5 44 | grownum = 200 45 | seednum = 10 46 | elseif lab < 7 47 | grownum = 2000 48 | seednum = 50 49 | else 50 | grownum = 10000 51 | seednum = 200 52 | end 53 | 54 | # For each epsilon we store a different .mat file of outputs 55 | outputmat = "Output_VaryDelta/Label_$label"*"_$seednum"*"_$grownum.mat" 56 | println(outputmat) 57 | 58 | # Output from HyperLocal 59 | hl_pr = zeros(enum) 60 | hl_re = zeros(enum) 61 | hl_f1 = zeros(enum) 62 | hl_time = zeros(enum) 63 | hl_size = zeros(enum) 64 | newS = zeros(enum) 65 | hl_cond = zeros(enum) 66 | 67 | # Generate a new seed set 68 | p = randperm(nT) 69 | Rstart = T[p[1:seednum]] 70 | OneHop = get_immediate_neighbors(H,Ht,Rstart) 71 | Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum) 72 | R = union(Rmore,Rstart) 73 | 74 | # Force seed nodes to be contained in output set 75 | Rs = findall(x->in(x,Rstart),R) 76 | 77 | for e = 1:length(deltas) # Try each seed set with each delta 78 | 79 | delta = deltas[e] 80 | 81 | # Run HyperLocal 82 | s = time() 83 | S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true) 84 | hl_time[e] = time()-s 85 | condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order) 86 | pr, re, f1 = PRF(T,S) 87 | hl_pr[e] = pr 88 | hl_re[e] = re 89 | hl_f1[e] = f1 90 | hl_size[e] = length(S) 91 | hl_cond[e] = condS 92 | nS = length(setdiff(S,R)) 93 | newS[e] = nS 94 | 95 | println("$label ($nT): $f1 \t $nS \t $epsilon") 96 | end 97 | 98 | matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time, 99 | "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond)) 100 | 101 | end 102 | -------------------------------------------------------------------------------- /Exp-Amazon/VaryEpsilon_Experiments.jl: -------------------------------------------------------------------------------- 1 | labels = [1; 2; 3; 12; 18] 2 | lnum = length(labels) 3 | 4 | # See outer parameters 5 | seednum = 10 6 | ntimes = 5 7 | epsis = [10.0 1.0 0.1 0.01 0.001] 8 | delta = 1.0 # stick with the all-or-nothing cut 9 | grownum = 300 10 | 11 | enum = length(epsis) 12 | 13 | for lab = 2 # For each cluster 14 | label = labels[lab] 15 | T = findall(x->x ==label,NodeLabels) 16 | nT = length(T) 17 | 18 | # For each epsilon we store a different .mat file of outputs 19 | 20 | outputmat = "Output_VaryEps/Label_$label"*"_$seednum"*"_$grownum.mat" 21 | println(outputmat) 22 | 23 | # Output from HyperLocal, multiple values of epsilon 24 | hl_pr = zeros(enum,ntimes) 25 | hl_re = zeros(enum,ntimes) 26 | hl_f1 = zeros(enum,ntimes) 27 | hl_time = zeros(enum,ntimes) 28 | hl_size = zeros(enum,ntimes) 29 | newS = zeros(enum,ntimes) 30 | hl_cond = zeros(enum,ntimes) 31 | 32 | # Output from first baseline 1 33 | b1_pr = zeros(ntimes) 34 | b1_re = zeros(ntimes) 35 | b1_f1 = zeros(ntimes) 36 | b1_cond = zeros(ntimes) 37 | 38 | # Keep track of R 39 | r_pr = zeros(ntimes) 40 | r_re = zeros(ntimes) 41 | r_f1 = zeros(ntimes) 42 | r_cond = zeros(ntimes) 43 | 44 | # Output from baseline 2 45 | b2_pr = zeros(ntimes) 46 | b2_re = zeros(ntimes) 47 | b2_f1 = zeros(ntimes) 48 | b2_cond = zeros(ntimes) 49 | 50 | for index = 1:ntimes # Try ntimes different seed sets 51 | 52 | # Generate a new seed set 53 | p = randperm(nT) 54 | Rstart = T[p[1:seednum]] 55 | OneHop = get_immediate_neighbors(H,Ht,Rstart) 56 | Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum) 57 | R = union(Rmore,Rstart) 58 | # Force seed nodes to be in output set 59 | Rs = findall(x->in(x,Rstart),R) 60 | prr, rer, f1r = PRF(T,R) 61 | r_pr[index] = prr 62 | r_re[index] = rer 63 | r_f1[index] = f1r 64 | condR, volR, cutR = tl_cond(H,R,d,1.0,volA,order) 65 | r_cond[index] = condR 66 | 67 | # First baseline 68 | kS = nT-length(Rstart) 69 | B1 = BestNeighbors(H,d,Rstart,OneHop,kS) 70 | pr1, re1, f11 = PRF(T,B1) 71 | b1_pr[index] = pr1 72 | b1_re[index] = re1 73 | b1_f1[index] = f11 74 | 75 | # Baseline 2 76 | B2 = TopNeighbors(H,Rstart,OneHop,kS) 77 | pr2, re2, f12 = PRF(T,B2) 78 | b2_pr[index] = pr2 79 | b2_re[index] = re2 80 | b2_f1[index] = f12 81 | 82 | for e = 1:length(epsis) # Try each seed set with each epsilon 83 | 84 | epsilon = epsis[e] 85 | 86 | # Run HyperLocal 87 | s = time() 88 | S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true) 89 | hl_time[e,index] = time()-s 90 | condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order) 91 | pr, re, f1 = PRF(T,S) 92 | hl_pr[e,index] = pr 93 | hl_re[e,index] = re 94 | hl_f1[e,index] = f1 95 | hl_size[e,index] = length(S) 96 | hl_cond[e,index] = condS 97 | nS = length(setdiff(S,R)) 98 | newS[e,index] = nS 99 | 100 | println("$label ($nT): $f11 \t $f12 \t $f1 \t $nS \t $epsilon") 101 | end 102 | 103 | matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time, 104 | "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond, 105 | "b1_pr"=>b1_pr, "b1_re"=>b1_re, "b1_f1"=>b1_f1,"epsis"=>epsis, 106 | "r_pr"=>r_pr, "r_re"=>r_re, "r_f1"=>r_f1, "r_cond"=>r_cond, 107 | "b2_pr"=>b2_pr, "b2_re"=>b2_re, "b2_f1"=>b2_f1)) 108 | 109 | end 110 | end 111 | -------------------------------------------------------------------------------- /Exp-Stackoverflow/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/.DS_Store -------------------------------------------------------------------------------- /Exp-Stackoverflow/Clique_Expansion_Stack.jl: -------------------------------------------------------------------------------- 1 | using MAT 2 | using Random 3 | using Statistics 4 | using StatsBase 5 | include("../src/HyperLocal.jl") 6 | 7 | @time M = matread("../data/stackoverflow_answer_H.mat") 8 | LabelMatrix = M["LabelMatrix"] 9 | LabelNames = M["LabelNames"] 10 | H = M["H"] 11 | order = round.(Int64,vec(sum(H,dims=2))) 12 | m,n = size(H) 13 | @show size(H) 14 | 15 | esmall = findall(x->x<50,order) 16 | @time Az = WeightedCliqueExpansion(H[esmall,:], order) 17 | 18 | matwrite("ZCE_Stack.mat", Dict("Az"=>Az)) 19 | 20 | @time As = SimpleCliqueExp(H[esmall,:]) 21 | 22 | matwrite("SCE_Stack.mat", Dict("As"=>As)) 23 | -------------------------------------------------------------------------------- /Exp-Stackoverflow/Output_Stack/Set45_1.0_5000.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_Stack/Set45_1.0_5000.0.mat -------------------------------------------------------------------------------- /Exp-Stackoverflow/Output_VaryDelta/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/.DS_Store -------------------------------------------------------------------------------- /Exp-Stackoverflow/Output_VaryDelta/Cluster_22943_10000_100_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_22943_10000_100_1.0.mat -------------------------------------------------------------------------------- /Exp-Stackoverflow/Output_VaryDelta/Cluster_25849_10000_100_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_25849_10000_100_1.0.mat -------------------------------------------------------------------------------- /Exp-Stackoverflow/Output_VaryDelta/Cluster_27596_10000_100_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_27596_10000_100_1.0.mat -------------------------------------------------------------------------------- /Exp-Stackoverflow/Output_VaryDelta/Cluster_28886_10000_100_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_28886_10000_100_1.0.mat -------------------------------------------------------------------------------- /Exp-Stackoverflow/Output_VaryDelta/Cluster_28918_10000_100_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_28918_10000_100_1.0.mat -------------------------------------------------------------------------------- /Exp-Stackoverflow/Output_VaryDelta/Cluster_29386_10000_100_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_29386_10000_100_1.0.mat -------------------------------------------------------------------------------- /Exp-Stackoverflow/Output_VaryDelta/Cluster_43507_10000_100_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_43507_10000_100_1.0.mat -------------------------------------------------------------------------------- /Exp-Stackoverflow/Output_VaryDelta/Cluster_5713_10000_100_1.0.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_5713_10000_100_1.0.mat -------------------------------------------------------------------------------- /Exp-Stackoverflow/Plots/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Plots/.DS_Store -------------------------------------------------------------------------------- /Exp-Stackoverflow/Plots/StackDots_Wide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Plots/StackDots_Wide.pdf -------------------------------------------------------------------------------- /Exp-Stackoverflow/Plots_Stack_Experiments.jl: -------------------------------------------------------------------------------- 1 | using MAT 2 | using Random 3 | include("../include/FlowSeed.jl") 4 | include("../src/HyperLocal.jl") 5 | 6 | ## Read in the matrix 7 | s = time() 8 | @time M = matread("../data/stackoverflow_answer_H.mat") 9 | LabelMatrix = M["LabelMatrix"] 10 | LabelNames = M["LabelNames"] 11 | MainLabels = M["MainLabels"] 12 | H = M["H"] 13 | order = round.(Int64,vec(sum(H,dims=2))) 14 | d = vec(sum(H,dims=1)) 15 | volA = sum(d) 16 | m,n = size(H) 17 | toc = time()-s 18 | println("Done loading things into memory in $toc seconds.") 19 | 20 | # Updated MainLabels be the set of topics between 2000-10000 21 | # (already stored in current MainLabels), to be only those with conductanc < .2 22 | Tconds = Vector{Float64}() 23 | Tsizes = Vector{Int64}() 24 | labels = Vector{Int64}() 25 | for lab = 1:length(MainLabels) 26 | label = MainLabels[lab] 27 | T = findnz(LabelMatrix[:,label])[1] 28 | nT = length(T) 29 | condT, volT, cutT = tl_cond(H,T,d,1.0,volA,order) 30 | if condT < .2 31 | # println("$nT \t $condT \t"*LabelNames[label]) 32 | push!(Tconds,condT) 33 | push!(labels,label) 34 | push!(Tsizes,nT) 35 | end 36 | end 37 | 38 | ## 39 | 40 | using Plots 41 | using Statistics 42 | ntimes = 1 43 | epsilon = 1.0 44 | delta = 5000.0 45 | outputmat = "Output_Stack/Set45_$(epsilon)_$(delta).mat" 46 | 47 | mat = matread(outputmat) 48 | hl_time = round.(mean(mat["hl_time"],dims = 2),digits = 2) 49 | sl_time = round.(mean(mat["sl_time"],dims = 2),digits = 2) 50 | zl_time = round.(mean(mat["zl_time"],dims = 2),digits = 2) 51 | hl_size= round.(mean(mat["hl_size"],dims = 2),digits = 2) 52 | r = round.(mean(mat["r_f1"],dims = 2),digits = 2) 53 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2) 54 | b1 = round.(mean(mat["b1_f1"],dims = 2),digits = 2) 55 | b2 = round.(mean(mat["b2_f1"],dims = 2),digits = 2) 56 | nS = round.(mean(mat["newS"],dims = 2),digits = 2) 57 | sl = round.(mean(mat["sl_f1"],dims = 2),digits = 2) 58 | zl = round.(mean(mat["zl_f1"],dims = 2),digits = 2) 59 | 60 | ln = LabelNames[labels] 61 | plot() 62 | p = (sortperm(vec(hl))) 63 | y1 = hl[p] 64 | yb1 = b1[p] 65 | yr = r[p] 66 | yb2 = b2[p] 67 | y2 = max.(yb1,yb2) 68 | y4 = max.(sl[p],zl[p]) 69 | x = 1:length(y1) 70 | xlabels = ln[p] 71 | 72 | ## Wide plot 73 | s1 = 750 74 | s2 = 400 75 | ms = 6 76 | stepy = 1 77 | scatter(x,y1,grid = false, markersize = ms, label = "HyperLocal", 78 | markerstrokewidth = 0, markershape = :circle, linewidth = 0, 79 | legend = :topleft, size = (s1, s2), ymirror = false, ylabel = "F1 Scores", 80 | xticks = (1:stepy:length(ln), "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t".*ln[1:stepy:length(ln)]),xrotation = 40, 81 | xtickfont=font(8), 82 | ytickfont=font(11), 83 | guidefont=font(12), 84 | titlefont=font(10), 85 | legendfont=font(9) 86 | ) 87 | scatter!(x,y2, markersize = ms,markerstrokewidth = 0,label = "TN/BN", 88 | markershape = :circle) 89 | scatter!(x,y4,markersize = ms,markerstrokewidth = 0, label = "FlowSeed", 90 | markershape = :circle) 91 | 92 | savefig("Plots/StackDots_Wide.pdf") 93 | -------------------------------------------------------------------------------- /Exp-Stackoverflow/Plots_VaryDeltaStack.jl: -------------------------------------------------------------------------------- 1 | using MAT 2 | deltas = 10 .^LinRange(0,3.7,10) 3 | seednum = 100 4 | epsilon = 1.0 5 | grownum = 10000 6 | 7 | # M = matread("../data/processed/stackoverflow_answer_H.mat") 8 | LabelMatrix = M["LabelMatrix"] 9 | LabelNames = M["LabelNames"] 10 | H = M["H"] 11 | order = round.(Int64,vec(sum(H,dims=2))) 12 | d = vec(sum(H,dims=1)) 13 | volA = sum(d) 14 | m,n = size(H) 15 | 16 | 17 | labels = [25849; 27596;28918;29386;43507] 18 | lnum = length(labels) 19 | v = [22943;28886;5713] 20 | 21 | 22 | ## 23 | using Plots 24 | plot() 25 | s1 = 300 26 | s2 = 250 27 | ms = 4 28 | lw = 2 29 | using LaTeXStrings 30 | x_label = L"\delta" 31 | y_label = "F1 Scores" 32 | for i = 1:3 33 | label = labels[i] 34 | outputmat = "Output_VaryDelta/Cluster_$(label)_$grownum"*"_$seednum"*"_$epsilon.mat" 35 | mat = matread(outputmat) 36 | hl_f1 = mat["hl_f1"] 37 | deltas = mat["deltas"] 38 | 39 | plot!(deltas, hl_f1[i,:],grid = false,label = LabelNames[label], 40 | markerstrokewidth = 0, markershape = :circle, linewidth = 2, xaxis = :log10, 41 | size = (s1,s2), markersize = ms, 42 | xlabel = x_label, ylabel = y_label, legend = :bottomright) 43 | end 44 | 45 | ## These were run and stored and slightly differently 46 | v = [22943;28886;5713] 47 | for i = 1:3 48 | label = v[i] 49 | outputmat = "Output_VaryDelta/Cluster_$(label)_$grownum"*"_$seednum"*"_$epsilon.mat" 50 | mat = matread(outputmat) 51 | hl_f1 = mat["hl_f1"] 52 | 53 | plot!(deltas, vec(hl_f1[:,1]),grid = false,label = LabelNames[label], 54 | markerstrokewidth = 0, markershape = :circle, linewidth = 2, xaxis = :log10, 55 | size = (s1,s2), markersize = ms,xguidefont=font(18),legend = false, 56 | xlabel = x_label, ylabel = y_label, legendfont=font(7)) 57 | 58 | end 59 | 60 | savefig("Plots/Stack_VaryDelta.pdf") 61 | -------------------------------------------------------------------------------- /Exp-Stackoverflow/Stack_LargeDelta_Experiments.jl: -------------------------------------------------------------------------------- 1 | using MAT 2 | using Random 3 | include("../src/HyperLocal.jl") 4 | include("../include/FlowSeed.jl") 5 | 6 | ## Read in the matrix, if it isn't read in already 7 | 8 | s = time() 9 | @time M = matread("../data/stackoverflow_answer_H.mat") 10 | LabelMatrix = M["LabelMatrix"] 11 | LabelNames = M["LabelNames"] 12 | MainLabels = M["MainLabels"] 13 | H = M["H"] 14 | order = round.(Int64,vec(sum(H,dims=2))) 15 | d = vec(sum(H,dims=1)) 16 | volA = sum(d) 17 | m,n = size(H) 18 | Ht = sparse(H') 19 | toc = time()-s 20 | println("Done loading things into memory in $toc seconds.") 21 | 22 | ## 23 | Tconds = Vector{Float64}() 24 | Tsizes = Vector{Int64}() 25 | labels = Vector{Int64}() 26 | for lab = 1:length(MainLabels) 27 | label = MainLabels[lab] 28 | T = findnz(LabelMatrix[:,label])[1] 29 | nT = length(T) 30 | condT, volT, cutT = tl_cond(H,T,d,1.0,volA,order) 31 | if condT < .2 32 | println("$nT \t $condT \t"*LabelNames[label]) 33 | push!(Tconds,condT) 34 | push!(labels,label) 35 | push!(Tsizes,nT) 36 | end 37 | end 38 | 39 | ## 40 | lnum = length(labels) 41 | 42 | # See outer parameters 43 | ntimes = 1 44 | epsilon = 1.0 45 | delta = 5000.0 46 | # seednum = 100 47 | # grownum = 10000 48 | 49 | # Output from HyperLocal, large delta 50 | hl_pr = zeros(lnum,ntimes) 51 | hl_re = zeros(lnum,ntimes) 52 | hl_f1 = zeros(lnum,ntimes) 53 | hl_time = zeros(lnum,ntimes) 54 | hl_size = zeros(lnum,ntimes) 55 | newS = zeros(lnum,ntimes) 56 | hl_cond = zeros(lnum,ntimes) 57 | 58 | # Output from HyperLocal, delta = 1.0 59 | hl_pr1 = zeros(lnum,ntimes) 60 | hl_re1 = zeros(lnum,ntimes) 61 | hl_f11 = zeros(lnum,ntimes) 62 | hl_time1 = zeros(lnum,ntimes) 63 | hl_size1 = zeros(lnum,ntimes) 64 | newS1 = zeros(lnum,ntimes) 65 | hl_cond1 = zeros(lnum,ntimes) 66 | 67 | # Output from first baseline 1 68 | b1_pr = zeros(lnum,ntimes) 69 | b1_re = zeros(lnum,ntimes) 70 | b1_f1 = zeros(lnum,ntimes) 71 | b1_cond = zeros(lnum,ntimes) 72 | 73 | # Output from baseline 2 74 | b2_pr = zeros(lnum,ntimes) 75 | b2_re = zeros(lnum,ntimes) 76 | b2_f1 = zeros(lnum,ntimes) 77 | b2_cond = zeros(lnum,ntimes) 78 | 79 | # Keep track of R 80 | r_pr = zeros(lnum,ntimes) 81 | r_re = zeros(lnum,ntimes) 82 | r_f1 = zeros(lnum,ntimes) 83 | r_cond = zeros(lnum,ntimes) 84 | 85 | sl_pr = zeros(lnum,ntimes) 86 | sl_re = zeros(lnum,ntimes) 87 | sl_f1 =zeros(lnum,ntimes) 88 | sl_cond = zeros(lnum,ntimes) 89 | sl_size = zeros(lnum,ntimes) 90 | sl_time = zeros(lnum,ntimes) 91 | 92 | zl_pr = zeros(lnum,ntimes) 93 | zl_re = zeros(lnum,ntimes) 94 | zl_f1 = zeros(lnum,ntimes) 95 | zl_cond = zeros(lnum,ntimes) 96 | zl_size = zeros(lnum,ntimes) 97 | zl_time = zeros(lnum,ntimes) 98 | 99 | # For each epsilon we store a different matrix of outputs 100 | outputmat = "Output_Stack/Set45_$(epsilon)_$(delta).mat" 101 | println(outputmat) 102 | 103 | maz = matread("ZCE_Stack.mat") 104 | Az = maz["Az"] 105 | mas = matread("SCE_Stack.mat") 106 | As = mas["As"] 107 | 108 | ## For a fixed epsilon, seednum, and grownum, run experiments 109 | # on each cluster multiple times 110 | 111 | for lab = 1:length(labels) 112 | label = labels[lab] 113 | T = findnz(LabelMatrix[:,label])[1] 114 | nT = length(T) 115 | @show nT,label 116 | 117 | for index = 1:ntimes 118 | 119 | # Generate a new seed set 120 | seednum = round(Int64,nT/20) 121 | grownum = round(Int64,min(nT*2,n)) 122 | p = randperm(nT) 123 | Rstart = T[p[1:seednum]] 124 | OneHop = get_immediate_neighbors(H,Ht,Rstart) 125 | Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum) 126 | R = union(Rmore,Rstart) 127 | Rs = findall(x->in(x,Rstart),R) # Force seed nodes to be in output set 128 | prr, rer, f1r = PRF(T,R) 129 | r_pr[lab,index] = prr 130 | r_re[lab,index] = rer 131 | r_f1[lab,index] = f1r 132 | condR, volR, cutR = tl_cond(H,R,d,1.0,volA,order) 133 | r_cond[lab,index] = condR 134 | 135 | # Run HyperLocal with delta = 1.0 136 | s = time() 137 | S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,1.0,Rs,true) 138 | hl_time1[lab,index] = time()-s 139 | condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order) 140 | pr, re, f1_d1 = PRF(T,S) 141 | hl_pr1[lab,index] = pr 142 | hl_re1[lab,index] = re 143 | hl_f11[lab,index] = f1_d1 144 | hl_size1[lab,index] = length(S) 145 | hl_cond1[lab,index] = condS 146 | nS = length(setdiff(S,R)) 147 | newS1[lab,index] = nS 148 | 149 | # Run HyperLocal with delta = 1000 150 | s = time() 151 | S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true) 152 | hl_time[lab,index] = time()-s 153 | condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order) 154 | pr, re, f1 = PRF(T,S) 155 | hl_pr[lab,index] = pr 156 | hl_re[lab,index] = re 157 | hl_f1[lab,index] = f1 158 | hl_size[lab,index] = length(S) 159 | hl_cond[lab,index] = condS 160 | nS = length(setdiff(S,R)) 161 | newS[lab,index] = nS 162 | 163 | # First baseline 164 | kS = nT-length(Rstart) 165 | B1 = BestNeighbors(H,d,Rstart,OneHop,kS) 166 | pr1, re1, f11 = PRF(T,B1) 167 | b1_pr[lab,index] = pr1 168 | b1_re[lab,index] = re1 169 | b1_f1[lab,index] = f11 170 | 171 | # Baseline 2 172 | B2 = TopNeighbors(H,Rstart,OneHop,kS) 173 | pr2, re2, f12 = PRF(T,B2) 174 | b2_pr[lab,index] = pr2 175 | b2_re[lab,index] = re2 176 | b2_f1[lab,index] = f12 177 | 178 | # Simple Clique Expansion 179 | nR = length(R) 180 | Rs_vec = zeros(length(R)) 181 | Rs_vec[Rs] .= 1 182 | starter = time() 183 | SL, lcond = FlowSeed(As,R,epsilon,zeros(nR),Rs_vec) 184 | sl_time[lab,index] = time()-starter 185 | pr3, re3, f13 = PRF(T,SL) 186 | condS, volS, cutS = tl_cond(H,SL,d,1.0,volA,order) 187 | sl_pr[lab,index] = pr3 188 | sl_re[lab,index] = re3 189 | sl_f1[lab,index] = f13 190 | sl_size[lab,index] = length(SL) 191 | sl_cond[lab,index] = condS 192 | 193 | # Zhou Clique Expansion + SimpleLocal 194 | starter = time() 195 | ZL, lcond = FlowSeed(Az,R,epsilon,zeros(nR),Rs_vec) 196 | zl_time[lab,index] = time()-starter 197 | pr4, re4, f14 = PRF(T,ZL) 198 | condS, volS, cutS = tl_cond(H,ZL,d,1.0,volA,order) 199 | zl_pr[lab,index] = pr4 200 | zl_re[lab,index] = re4 201 | zl_f1[lab,index] = f14 202 | zl_size[lab,index] = length(ZL) 203 | zl_cond[lab,index] = condS 204 | 205 | println("$label ($nT):\n $f1r \n $f11 \n $f12 \n $f1 \n $f1_d1\n \t $nS "*LabelNames[label]) 206 | end 207 | 208 | matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time, 209 | "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond, 210 | "hl_pr1"=>hl_pr1, "hl_re1"=>hl_re1, "hl_f11"=>hl_f11, "hl_cond1"=>hl_cond1, 211 | "sl_pr"=>sl_pr, "sl_re"=>sl_re, "sl_f1"=>sl_f1, "sl_cond"=>sl_cond, "sl_time"=>sl_time, 212 | "zl_pr"=>zl_pr, "zl_re"=>zl_re, "zl_f1"=>zl_f1, "zl_cond"=>zl_cond, "zl_time"=>zl_time, 213 | "sl_size"=>sl_size, "zl_size"=>zl_size, 214 | "b1_pr"=>b1_pr, "b1_re"=>b1_re, "b1_f1"=>b1_f1, 215 | "r_pr"=>r_pr, "r_re"=>r_re, "r_f1"=>r_f1, 216 | "b2_pr"=>b2_pr, "b2_re"=>b2_re, "b2_f1"=>b2_f1)) 217 | 218 | end 219 | -------------------------------------------------------------------------------- /Exp-Stackoverflow/Stack_LargeDelta_Table.jl: -------------------------------------------------------------------------------- 1 | using MAT 2 | using Random 3 | include("../src/HyperLocal.jl") 4 | include("../include/FlowSeed.jl") 5 | # 6 | ## Read in the matrix, if it isn't read in already 7 | using SparseArrays 8 | s = time() 9 | @time M = matread("../data/processed/stackoverflow_answer_H.mat") 10 | LabelMatrix = M["LabelMatrix"] 11 | LabelNames = M["LabelNames"] 12 | MainLabels = M["MainLabels"] 13 | H = M["H"] 14 | order = round.(Int64,vec(sum(H,dims=2))) 15 | d = vec(sum(H,dims=1)) 16 | volA = sum(d) 17 | m,n = size(H) 18 | Ht = sparse(H') 19 | toc = time()-s 20 | println("Done loading things into memory in $toc seconds.") 21 | 22 | ## 23 | Tconds = Vector{Float64}() 24 | Tsizes = Vector{Int64}() 25 | labels = Vector{Int64}() 26 | for lab = 1:length(MainLabels) 27 | label = MainLabels[lab] 28 | T = findnz(LabelMatrix[:,label])[1] 29 | nT = length(T) 30 | condT, volT, cutT = tl_cond(H,T,d,1.0,volA,order) 31 | if condT < .2 32 | # println("$nT \t $condT \t"*LabelNames[label]) 33 | push!(Tconds,condT) 34 | push!(labels,label) 35 | push!(Tsizes,nT) 36 | end 37 | end 38 | 39 | using Statistics 40 | ntimes = 1 41 | epsilon = 1.0 42 | delta = 5000.0 43 | outputmat = "Output_Stack/Set45_$(epsilon)_$(delta).mat" 44 | 45 | mat = matread(outputmat) 46 | hl_time = round.(mean(mat["hl_time"],dims = 2),digits = 2) 47 | sl_time = round.(mean(mat["sl_time"],dims = 2),digits = 2) 48 | zl_time = round.(mean(mat["zl_time"],dims = 2),digits = 2) 49 | hl_size= round.(mean(mat["hl_size"],dims = 2),digits = 2) 50 | r = round.(mean(mat["r_f1"],dims = 2),digits = 2) 51 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2) 52 | b1 = round.(mean(mat["b1_f1"],dims = 2),digits = 2) 53 | b2 = round.(mean(mat["b2_f1"],dims = 2),digits = 2) 54 | nS = round.(mean(mat["newS"],dims = 2),digits = 2) 55 | sl = round.(mean(mat["sl_f1"],dims = 2),digits = 2) 56 | zl = round.(mean(mat["zl_f1"],dims = 2),digits = 2) 57 | 58 | for i = 1:length(labels) 59 | lab = labels[i] 60 | println(" $(hl[i]) \t $(sl[i]) \t $(zl[i]) \t $(b1[i]) \t $(b2[i])\t $(r[i]) \t $(hl_time[i]) \t $(sl_time[i]) \t $(zl_time[i])\t $(Tconds[i])"*"\t"*LabelNames[lab]) 61 | end 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Nate Veldt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | README 2 | 3 | Code for 4 | 5 | Minimizing Localized Ratio Cut Objectives in Hypergraphs 6 | Nate Veldt, Austin Benson, Jon Kleinberg 7 | KDD '20: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining 8 | 9 | Datasets are too large to include in a GitHub repository. Text versions of the datasets can be found at: 10 | 11 | https://www.cs.cornell.edu/~arb/data/amazon-reviews/ 12 | 13 | and 14 | 15 | https://www.cs.cornell.edu/~arb/data/stackoverflow-answers/ -------------------------------------------------------------------------------- /data/Amazon-Clusters.txt: -------------------------------------------------------------------------------- 1 | Cluster information for Amazon reviews hypergraph 2 | Conductance |T| Category 3 | Label 1 0.05526315789473684 31 Amazon_Fashion 4 | Label 2 0.11689899730985572 85 All_Beauty 5 | Label 3 0.18333333333333332 48 Appliances 6 | Label 4 0.09941355767439959 22931 Arts_Crafts_and_Sewing 7 | Label 5 0.07920483552884328 79437 Automotive 8 | Label 6 0.023304263291091188 704093 Books 9 | Label 7 0.04535683668942734 73713 CDs_and_Vinyl 10 | Label 8 0.10469066052559024 48186 Cell_Phones_and_Accessories 11 | Label 9 0.0569535873958006 376858 Clothing_Shoes_and_Jewelry 12 | Label 10 0.08635143506091265 11797 Digital_Music 13 | Label 11 0.06919062211927213 160052 Electronics 14 | Label 12 0.1315345699831366 148 Gift_Cards 15 | Label 13 0.0987035757730488 41320 Grocery_and_Gourmet_Food 16 | Label 14 0.0843442282774712 189172 Home_and_Kitchen 17 | Label 15 0.14216482677220024 5334 Industrial_and_Scientific 18 | Label 16 0.05826591377991949 98824 Kindle_Store 19 | Label 17 0.10889854659447136 1581 Luxury_Beauty 20 | Label 18 0.13249348392701998 157 Magazine_Subscriptions 21 | Label 19 0.05845032232905592 60175 Movies_and_TV 22 | Label 20 0.08932531461894376 10620 Musical_Instruments 23 | Label 21 0.12032337895363579 27965 Office_Products 24 | Label 22 0.11865049228984761 32918 Patio_Lawn_and_Garden 25 | Label 23 0.08520030256037095 42531 Pet_Supplies 26 | Label 24 0.09661968393434621 4970 Prime_Pantry 27 | Label 25 0.13708659429437012 802 Software 28 | Label 26 0.0881832010246336 104687 Sports_and_Outdoors 29 | Label 27 0.10269628837546725 73649 Tools_and_Home_Improvement 30 | Label 28 0.08729156649814647 78772 Toys_and_Games 31 | Label 29 0.08020151239913818 17408 Video_Games 32 | -------------------------------------------------------------------------------- /include/FlowSeed.jl: -------------------------------------------------------------------------------- 1 | # This is self-contained Julia code for FlowSeed, the flow-based 2 | # method for local clustering introduced in the paper: 3 | # 4 | # Flow-Based Local Graph Clustering with Better Seed Set Inclusion 5 | # Nate Veldt, Christine Klymko, and David Gleich 6 | # Proceedings of the 2019 SIAM International Conference on Data Mining 7 | # 8 | # ArXiv preprint: https://arxiv.org/abs/1811.12280 9 | # 10 | # The main subroutine is LocalPushRelabel. 11 | # Unlike previous local flow methods, this repeatedly updates Phase 1 of the 12 | # push-relabel maximum s-t flow algorithm. This phase returns a minimum s-t cut, 13 | # which is all we need for the algorithm. The push-relabel algorithm 14 | # is made efficient by a global relabeling heuristic. 15 | # 16 | # Previous flow-based methods repeatedly called a black-box min-cut 17 | # solver and didn't use warm starts. Here we use warm starts and call a 18 | # white-box subroutine that makes the code much faster in practice. 19 | 20 | using SparseArrays 21 | 22 | # This computes the precision, recall, and F1 score for a set Returned 23 | # compared against a Target set 24 | function PRF(Target,Returned) 25 | 26 | TruePos = intersect(Returned,Target) 27 | pr = length(TruePos)/length(Returned) 28 | re = length(TruePos)/length(Target) 29 | F1 = 2*(pr*re)/(pr+re) 30 | 31 | return pr, re, F1 32 | 33 | end 34 | 35 | # Starting from a set of seed nodes R, do a breadth first search to get 36 | # a k-hop neighborhood of R 37 | function neighborhood(A::SparseMatrixCSC,R::Array{Int64},k::Int64) 38 | 39 | rp = A.rowval 40 | ci = A.colptr 41 | n = size(A,1) 42 | 43 | eS = zeros(n) 44 | eS[R] .= 1 45 | 46 | # For node i, the neighbors of i are rp[ci[i]:ci[i+1]-1] 47 | for i = R 48 | neighbs = rp[ci[i]:ci[i+1]-1] 49 | eS[neighbs] .= 1 50 | end 51 | 52 | # This could be more efficient, but recursively calling won't take too long 53 | # as long as k isn't too large 54 | if k == 1 55 | return findall(x->x!=0,eS) 56 | else 57 | return neighborhood(A,findall(x->x!=0,eS),k-1) 58 | end 59 | 60 | end 61 | 62 | # For a set S in a graph with adjacency matrix A, return some information about 63 | # S including its conductance, number of interior edges, volume, and cut. 64 | function set_stats(A::SparseMatrixCSC{Float64,Int64}, 65 | S::Vector{Int64},volA::Float64) 66 | 67 | if volA == 0.0 68 | volA = sum(A.nzval) 69 | end 70 | 71 | if length(S) == size(A,1) 72 | # then we have an indicator vector 73 | S = findall(x->x!=0,eS) 74 | AS = A[:,S] 75 | else 76 | # then we have a subset 77 | @assert(minimum(S) >= 1) 78 | @assert(maximum(S) <= size(A,1)) 79 | AS = A[:,S] 80 | end 81 | 82 | vol = sum(AS.nzval); 83 | SAS = AS[S,:] 84 | edges = sum(SAS.nzval); 85 | cut = vol-edges 86 | 87 | cond = cut/minimum([vol,volA-vol]); 88 | 89 | return cut, vol, edges, cond 90 | 91 | end 92 | 93 | # Compute the s-t cut score corresponding to a set S, in an augmented graph 94 | # with source and sink node 95 | function cutval(A::SparseMatrixCSC{Float64,Int64},S::Vector{Int64}, 96 | R::Vector{Int64},d::Array{Float64,2},alpha::Float64,epsilon::Float64, 97 | volA::Float64,pR::Array{Float64},RinS::Array{Float64}) 98 | 99 | n = size(A,1) 100 | if volA == 0.0 101 | volA = sum(A.nzval) 102 | end 103 | 104 | strongR = R[findall(x->x!=0,RinS)] 105 | @assert(length(setdiff(strongR,S)) == 0) # S should contain strongR 106 | 107 | @assert(minimum(S) >= 1) 108 | @assert(maximum(S) <= size(A,1)) 109 | AS = A[:,S]; 110 | 111 | 112 | volS = sum(AS.nzval); 113 | SAS = AS[S,:] 114 | edges = sum(SAS.nzval); 115 | cutS = volS-edges 116 | 117 | volR = sum(d[R]) 118 | 119 | # penalty vector, should only be nonzero for R nodes 120 | penalty = zeros(n) 121 | penalty[R] = pR.*d[R] 122 | 123 | RS = intersect(R,S) 124 | volRS = sum(d[RS]) 125 | RnotinS = setdiff(R,RS) # the set of nodes in R that aren't in S 126 | pRnotinS = sum(penalty[RnotinS]) # the penalty for excluding R nodes from A 127 | 128 | cutScore = cutS - alpha*volRS + alpha*volR + alpha*epsilon*(volS-volRS) + alpha*pRnotinS 129 | 130 | @assert(cutScore >= 0) 131 | 132 | relcond = cutS/(volRS - epsilon*(volS-volRS) - pRnotinS) 133 | 134 | return relcond 135 | end 136 | 137 | # The main function, which minimizes a localized variant of conductance which 138 | # penalizes the exclusion of seed nodes from the output set. 139 | # 140 | # Parameters: 141 | # 142 | # A = adjacency matrix for a graph 143 | # 144 | # R = node indices for a seed set, 145 | # Rn = immediate neighbors of R 146 | # Rc = complement set of R 147 | # 148 | # epsilon = locality parameter 149 | # 150 | # pR = a length(R) vector with penalties on exluding seed nodes in R from 151 | # the output set. pR[i] is the penalty or excluding R[i] from the output 152 | # 153 | # RinS = a length(R) zero-one vector indicating which nodes in R are stricly 154 | # required to be in the output set 155 | # 156 | # relcondFlag = a boolean flag indicating whether to compute the relative 157 | # conductance score or the exact conductance score for each 158 | # intermediate improved set. Choosing false (i.e. updating with 159 | # exact conductance) will sometimes lead to fewer iterations and 160 | # lower conductance output, but will not actually minimize the 161 | # relative conductance or seed penalized conductance. 162 | # 163 | # localFlag = a boolean flag indicating whether or not to use the local 164 | # computations. If volR is large and epsilon is small, in some 165 | # cases it may be better for the subroutine to perform one 166 | # global caluculations that multiple "local" computations. 167 | # 168 | # d = weighted degree vector of the graph 169 | # 170 | # 171 | # volA, volR = volumes of the entire graph and seed set respectively 172 | 173 | # FlowSeed with simplified parameters 174 | function FlowSeed(A::SparseMatrixCSC{Float64,Int64},R::Vector{Int64}, 175 | epsilon::Float64,pR::Array{Float64},RinS::Array{Float64}, 176 | relcondFlag::Bool= true,localFlag::Bool=true) 177 | 178 | d = sum(A,dims = 2) 179 | volA = sum(A.nzval) 180 | volR = sum(d[R]) 181 | n = size(A,1) 182 | 183 | # Find one-hop neighbors of R, and get the complement set 184 | Rn = neighborhood(A,R,1) # get the immediate neighbors of R... 185 | Rn = setdiff(Rn,R) # ...but we exclude R itself 186 | inRc = ones(n) 187 | inRc[R] .= 0 188 | Rc = findall(x->x!=0,inRc) # complement of R 189 | 190 | if volA*epsilon/volR < 10 191 | localFlag = false 192 | end 193 | FlowSeed(A,R,Rn,Rc,epsilon,pR,RinS,d,volA,volR,relcondFlag,localFlag) 194 | 195 | end 196 | 197 | # More in depth parameters, in case one wants to run the method multiple times 198 | # and now always recompute Rn, Rc, volA, volR, and d each time 199 | function FlowSeed(A::SparseMatrixCSC{Float64,Int64},R::Vector{Int64}, 200 | Rn::Vector{Int64},Rc::Vector{Int64},epsilon::Float64,pR::Array{Float64}, 201 | RinS::Array{Float64},d::Array{Float64},volA::Float64=0.0,volR::Float64=0.0, 202 | relcondFlag::Bool= true,localFlag::Bool=true) 203 | 204 | fR = volR/(volA - volR) 205 | if epsilon < fR 206 | println("Locality parameter epsilon was set to small. Setting it to lower bound of $fR. Computations will not be local.") 207 | epsilon = fR 208 | end 209 | 210 | n = size(A,1) 211 | 212 | if localFlag 213 | if volA*epsilon/volR < 10 214 | println("Note that vol(R)/epsilon = O(vol(G)). 215 | For these parameters \nit may be faster to run the algorithm 216 | without the locality setting.") 217 | end 218 | end 219 | 220 | # Call nodes that must be S the "strong seed nodes" 221 | localStrong = findall(x->x!=0,RinS) 222 | 223 | StrongSeeds = R[localStrong] 224 | numstrong = length(StrongSeeds) 225 | 226 | # If something is marked as a strong seed, put an infinite penalty 227 | # on excluding it from the output set 228 | pR[localStrong] .= Inf 229 | 230 | # Conductance of R 231 | Stats = set_stats(A,R,volA) 232 | alphaCurrent = Stats[4] 233 | # Conductance of R is same as localized seed penalized conductance of R 234 | # alpha2 = cutval(A,R,R,d,1.0,epsilon,volA,pR,RinS) 235 | # println("$alpha2, $alphaCurrent") 236 | 237 | 238 | println("\nEpsilon = $epsilon"); 239 | println("There are $numstrong strong seed nodes.") 240 | println("The full seed set has conductance $alphaCurrent "); 241 | println("-------------------------------------------------------") 242 | BestS = R 243 | alph0 = 2 244 | alphaBest = alphaCurrent 245 | 246 | source = zeros(n) 247 | sink = zeros(n) 248 | dr = d[R] 249 | drc = d[Rc] 250 | 251 | while alphaCurrent < alph0 252 | 253 | # Prepare source-side and sink-side edge weights for the augmented 254 | # local flow graph 255 | # Seed nodes have an edge to the source of the following weight 256 | source[R] = alphaCurrent*(pR .+ 1).*dr 257 | 258 | # Non-seed nodes have an edge to the sink 259 | sink[Rc] = alphaCurrent*epsilon*drc 260 | 261 | # Compute the new min s-t cut 262 | if localFlag 263 | # Do it by repeatedly solving smaller problems, starting 264 | # by looking at the immediate neighbors Rn 265 | S = LocalPushRelabel(A,R,source,sink,Rn) 266 | else 267 | # Run a single min-cut computation on the whole graph 268 | S = NonLocalPushRelabel(A,R,source,sink) 269 | end 270 | 271 | if length(S) > 0 && length(S) < n 272 | 273 | # Check stats for new set 274 | if relcondFlag 275 | alphaS = cutval(A,S,R,d,1.0,epsilon,volA,pR,RinS) 276 | else 277 | Stats = set_stats(A,S,volA) 278 | alphaS = Stats[4] 279 | end 280 | 281 | if alphaS < alphaCurrent 282 | numS = size(S,1) 283 | ra = round(alphaS,digits =4) 284 | println("Improvement found: R-Conductance = $ra, Size = $numS") 285 | BestS = S 286 | alphaBest = alphaS 287 | end 288 | 289 | else 290 | alphaS = alphaCurrent 291 | end 292 | 293 | alph0 = alphaCurrent 294 | alphaCurrent = alphaS 295 | 296 | end 297 | 298 | SL = BestS 299 | sizeSL = length(SL) 300 | cond = alphaBest 301 | println("------------------------------------------------------") 302 | println("Final Answer: Conductance = $cond, Size = $sizeSL ") 303 | 304 | return SL, cond 305 | end 306 | 307 | # LocalPushRelabel: computes the minimumn s-t cut for a flow graph in strongly-local 308 | # time. It repeatedly solves localized min-cut problems. 309 | # 310 | # Input Parameters: 311 | # 312 | # A = a symmetric matrix representing an undirected graph. It can be weighted. 313 | # 314 | # R = a list of nodes that share an edge with the source node 315 | # 316 | # sWeights and tWeight store the nonnegative weight of each node to the source 317 | # and sink. For node i, exactly one of sWeights[i] and tWeights[i] is nonzero 318 | # 319 | # Rn = a list of nodes not in R that neighbor a node in R 320 | function LocalPushRelabel(A::SparseMatrixCSC{Float64,Int64},R::Vector{Int64}, 321 | sWeights::Array{Float64},tWeights::Array{Float64},Rn::Array{Int64}) 322 | 323 | timer = 0.0 324 | 325 | n = size(A,1) 326 | rp = A.rowval 327 | ci = A.colptr 328 | 329 | # Now we want to locally compute maximum flows 330 | # C = indices of "complete" nodes in the local graph L, which are nodes 331 | # whose degree in the local graph equals the degree in the global graph. 332 | # I = local indices of nodes that are in L, but not complete. These do 333 | # share edges with one another, but only with complete nodes. 334 | 335 | # Initialize the complete set to be the set of nodes adjacent to the source 336 | C_global = R 337 | I_global = Rn # everything else is incomplete 338 | Ac = A[C_global,:] # set of edges from the complete set to the rest of the graph 339 | 340 | # We will maintain a map from indices in a local subgraph, to global indices in A. 341 | # These don't include the sink node in the flow graph, we are considering 342 | # just a growing local subgraph of A 343 | Local2Global = [C_global; I_global] 344 | # Node i in the local graph corresponds to the node with index 345 | # Local2Glocal[i] in the global graph A 346 | 347 | # Number of nodes in the local graph 348 | Lsize = length(Local2Global) 349 | 350 | # Indices, in the local graph, of complete and incomplete nodes 351 | C_local = collect(1:length(R)) 352 | I_local = collect(length(R)+1:Lsize) 353 | numI = length(I_global) # number of incomplete nodes 354 | 355 | # Build the initial local graph 356 | 357 | AcToI = Ac[:,I_global] # edges between complete and incomplete nodes 358 | AcToc = Ac[:,C_global] # edges between complete nodes 359 | L = [AcToc AcToI; 360 | AcToI' spzeros(numI,numI)] # adjacency matrix for local graph 361 | 362 | # We distinguish between L the "local graph", and Lf, the "local flow graph" 363 | # which additionally contains the sink node t (as node 1). 364 | 365 | # In the local flow graph, each non-terminal node has either a source-side 366 | # or sink-side edge. 367 | tToL = reshape(tWeights[Local2Global],Lsize) 368 | sToL = reshape(sWeights[Local2Global],Lsize) 369 | 370 | # By adding the edges to the sink, 371 | # we transform the local graph L into the local flow graph Lf 372 | 373 | Lf = [spzeros(1,1) sparse(tToL'); 374 | sparse(tToL) L] 375 | 376 | # Initialize the flow matrix; allocate space for non-zero flow values 377 | nLf = size(Lf,1) 378 | F = SparseMatrixCSC(nLf,nLf,Lf.colptr,Lf.rowval,zeros(length(Lf.rowval))) 379 | # Find the minimum cut for Lf. 380 | # 381 | # The first node in Lf is the sink, so offset indices of R by 1. 382 | start = time() 383 | S_local,F,excess = Main_Push_Relabel_fs(Lf,F,collect(2:length(R)+1),[0; sToL]) 384 | timer += time()-start 385 | 386 | # F is a preflow that is returned. It is NOT the maximum flow for Lf. 387 | # S is the set of nodes in the min s-t cut of Lf. S_local are the local 388 | # indices in L, (not the indices in A or Lf) 389 | 390 | # We "expand" L around nodes in S that were previously "incomplete" 391 | E_local = setdiff(S_local,C_local) # Nodes to expand around 392 | E_global = Local2Global[E_local] # their global indices 393 | 394 | # Keep track of which nodes are in the local graph L 395 | inL = zeros(Bool,n) 396 | inL[Local2Global] .= true 397 | 398 | # As long as we have new nodes to expand around, we haven't yet found 399 | # the global minimum s-t cut, so we continue. 400 | while length(E_local) > 0 401 | 402 | # Update which nodes are complete and which are incomplete 403 | C_local = [C_local; E_local] 404 | C_global = Local2Global[C_local] 405 | 406 | # Take these away from I_local 407 | I_local = setdiff(I_local,E_local) 408 | I_global = Local2Global[I_local] 409 | 410 | # To complete nodes in E, first add all the possible edges in the 411 | # current local graph, so that they match the global graph edges 412 | # (This is one of the most expensive parts of the expansion) 413 | L[E_local,E_local] = A[E_global,E_global] 414 | L[E_local,I_local] = A[E_global,I_global] 415 | L[I_local,E_local] = L[E_local,I_local]' 416 | 417 | # Now we must expand the local graph so that NEW neighbors of E 418 | # are added to L 419 | Lnew = Vector{Int64}() 420 | for v = E_global 421 | # This extracts the neighbor list of node v from the 422 | # rowval and colptr vectors of the adjacency matrix 423 | Neighbs_of_v = rp[ci[v]:ci[v+1]-1] 424 | for nv = Neighbs_of_v 425 | if ~inL[nv] 426 | inL[nv] = true 427 | push!(Lnew,nv) 428 | end 429 | end 430 | end 431 | numNew = length(Lnew) 432 | 433 | # Store local indices for new nodes added to L 434 | Lnew_local = collect((Lsize+1):(Lsize+numNew)) 435 | 436 | # These are going to be "incomplete" nodes 437 | I_local = [I_local; Lnew_local] 438 | 439 | # Expand L by adding edges from the old local graph to Lnew. 440 | # Note that we don't include any edges between nodes in Lnew. 441 | P = A[Local2Global,Lnew] 442 | L = [L P; 443 | P' spzeros(numNew,numNew)] 444 | 445 | # Update the set of indices in L 446 | Local2Global = [Local2Global; Lnew] 447 | 448 | # excess stores the amount of "excess" flow after a flow computation. 449 | # 450 | # Extend the excess vector to accomodate the new size of L. 451 | # Since Lnew were not present in the last flow computation, they 452 | # have zero excess. 453 | excess = [excess; zeros(numNew)] 454 | 455 | # For the next local min-cut computation, we need to know which 456 | # nodes come with nonzero excess. These are "active" nodes. 457 | ExcessNodes = findall(x->x!=0,excess) 458 | 459 | # Update the capacity to the sink. 460 | tToL = [tToL; tWeights[Lnew]] 461 | # Now we construct a new local flow graph, and repeat 462 | 463 | Lf = [spzeros(1,1) sparse(tToL'); 464 | sparse(tToL) L] 465 | 466 | Fold = F # Old flow, saved as a warm start 467 | 468 | # Construct an initial flow F that includes the previous flow Fold 469 | # as a warm start. First, we allocate space for future 470 | # flow. 471 | # (This is one of the most expensive parts of the expansion) 472 | nLf = size(Lf,1) 473 | 474 | F = SparseMatrixCSC(nLf,nLf,Lf.colptr,Lf.rowval,zeros(length(Lf.rowval))) 475 | F[1:Lsize+1,1:Lsize+1] = Fold 476 | 477 | Lsize = size(L,1) 478 | 479 | # Compute min s-t cut for local flow graph and see if we need to expand 480 | S_local,F,excess = Main_Push_Relabel_fs(Lf,F,ExcessNodes,excess) 481 | 482 | E_local = setdiff(S_local,C_local) # the nodes that need completing 483 | E_global = Local2Global[E_local] # their global indices 484 | 485 | end 486 | 487 | # return the global indices of the minimum cut set 488 | return Local2Global[S_local] 489 | 490 | end 491 | 492 | # A non-local version of the min-cut code that works by calling the same 493 | # subroutine, but on the entire graph all at once 494 | function NonLocalPushRelabel(A::SparseMatrixCSC{Float64,Int64},R::Vector{Int64}, 495 | sWeights::Array{Float64},tWeights::Array{Float64}) 496 | 497 | n = size(A,1) 498 | # Directly set up the flow matrix 499 | C = [spzeros(1,1) sparse(tWeights'); 500 | sparse(tWeights) A] 501 | 502 | # Allocate space for the flow we will calculate 503 | F = SparseMatrixCSC(n+1,n+1,C.colptr,C.rowval,zeros(length(C.rowval))) 504 | 505 | # R is the set of nodes with excess, and the excess 506 | # will come from source-side edges that are immediately saturated 507 | S, F, excess = Main_Push_Relabel_fs(C,F,R.+1,[0;sWeights]) 508 | 509 | # The returned F is a preflow, not the maximum flow. 510 | # We are only interested in the cut. 511 | 512 | return S 513 | end 514 | 515 | # Main_Push_Relabel_fs returns a preflow F and the min s-t cut set S for the 516 | # flow graph C. It does not solve the maximum s-t flow problem. 517 | # 518 | # C = the capacity matrix for the flow problem. 519 | # Node 1 is the sink, and there is no explicit representation of a source, 520 | # the preflow immediately pushes all flow from the source to create an 521 | # excess on nodes in the graph. 522 | # 523 | # F = an initial flow. It can be initialize to zero. 524 | # 525 | # ExcessNodes = the set of nodes which at the start begin with some positive excess 526 | # These can be thought of as nodes that are adjacenct to the implicit source node 527 | # and the edges from the source are flooded. Or they may represent nodes that 528 | # have a nonzero excess from the initial flow F. The indices given here 529 | # should account for the fact that node 1 is already reserved for the sink. 530 | # 531 | # excess = the vector of excess values at the start of the algorithm. If F = 0, 532 | # this is the vector of edge capacities from the implicit source to the graph. 533 | # If F != 0, then it's the excess from a previous run of the algorithm 534 | function Main_Push_Relabel_fs(C::SparseMatrixCSC{Float64,Int64}, 535 | F::SparseMatrixCSC{Float64,Int64},ExcessNodes::Array{Int64},excess::Array{Float64}) 536 | 537 | # check excess node list 538 | # assert(countnz(excess) == length(ExcessNodes)) 539 | 540 | # here, n includes only one terminal node, the sink 541 | n = size(C,1) 542 | 543 | height = zeros(Int64,n) # label/height of each node 544 | inQ = zeros(Bool,n) # list whether or not nodes are in the queue 545 | 546 | # Store adjacency list. There are ways to update this if calling 547 | # this function multiple times for growing local graphs, but it 548 | # does not appear to be a bottleneck to simply recompute frequently 549 | Neighbs,d = ConstructAdj(C,n) 550 | 551 | # We will maintain a queue of active nodes. 552 | Queue = Vector{Int64}() 553 | # An actual queue implementation is available in the DataStructures.jl 554 | # Julia package. The performane is nearly identical (and in some cases 555 | # slightly slower), thus to minimize dependency on outside packages, we 556 | # just use a Vector. 557 | 558 | # All nodes with nonzero excess are the first to be processed 559 | for v = ExcessNodes 560 | push!(Queue,v) 561 | end 562 | inQ[ExcessNodes] .= true 563 | 564 | # count the number of nodes that have been relabeled 565 | relabelings::Int64 = 0 566 | 567 | height = relabeling_bfs_fs(C,F) # compute initial distance from sink 568 | 569 | # In the code and comments, height = distance from sink = label of node 570 | 571 | # Continue until the queue no longer contains any active nodes. 572 | while length(Queue) > 0 573 | 574 | u = pop!(Queue) # Select a new active node 575 | inQ[u] = false # It's no longer in the queue 576 | 577 | if height[u] < n # Check that the node is still active 578 | 579 | # discharge flow through node u 580 | relabelings += discharge_fs!(C,F,Queue,u,Neighbs[u],height,excess,n,d[u],inQ) 581 | 582 | # if u is still active, re-place it into the queue 583 | if excess[u] > 0 && height[u] < n 584 | prepend!(Queue,u) 585 | inQ[u] = true 586 | end 587 | 588 | end 589 | 590 | # Global relabeling heuristic for push-relabel algorithm. 591 | # This recomputes distances between nodes and the sink 592 | if relabelings == n 593 | relabelings = 0 594 | dist = relabeling_bfs_fs(C,F) 595 | height = dist 596 | end 597 | 598 | end 599 | 600 | # Compute final distances from sink using BFS. Anything with distance 601 | # n will be the cut set. 602 | finalHeight = relabeling_bfs_fs(C,F) 603 | S = Vector{Int64}() 604 | for i = 2:n 605 | if finalHeight[i] == n 606 | push!(S,i-1) 607 | end 608 | end 609 | 610 | excess[1] = 0.0 # ignore whatever excess there was at the sink. 611 | return S, F, excess 612 | 613 | end 614 | 615 | # Discharege operation: pushes flow away from node u across admissible edges. 616 | # If excess[u] > 0 but no admissible edges exist, we relabel u. 617 | function discharge_fs!(C::SparseMatrixCSC{Float64,Int64},F::SparseMatrixCSC{Float64,Int64}, 618 | Queue::Vector{Int64},u::Int64,uNeighbs::Array{Int64},height::Array{Int64}, 619 | excess::Array{Float64},n::Int64,du::Int64,inQ::Array{Bool}) 620 | 621 | vLocal::Int64 = 1 622 | hu = height[u] 623 | relabeled = 0 624 | while excess[u] > 0 && vLocal <= du 625 | v = uNeighbs[vLocal] 626 | if hu > height[v] && C[u,v] - F[u,v] > 0 627 | pushflow_fs!(C,F,Queue,u,v,excess,height,inQ,n) 628 | vLocal += 1 629 | else 630 | vLocal += 1 631 | end 632 | end 633 | 634 | if vLocal > du 635 | relabeled = 1 636 | relabel_fs!(C,F,Queue,u,uNeighbs,height,du,n) 637 | end 638 | 639 | return relabeled 640 | end 641 | 642 | # Relabel sets the label/height of node u to be equal to the minimum label 643 | # such that an admissible edge exists. An edge (u,v) is admissible if 644 | # height[u] = height[v] + 1 645 | function relabel_fs!(C::SparseMatrixCSC{Float64,Int64},F::SparseMatrixCSC{Float64,Int64}, 646 | Queue::Vector{Int64},u::Int64,uNeighbs::Array{Int64},height::Array{Int64}, 647 | du::Int64,n::Int64) 648 | # find smallest new height making a push possible, 649 | # if such a push is possible at all 650 | 651 | min_height = Inf 652 | # search through the neighbors of u 653 | # and relabel so that height[u] = height[v] + 1 for some v in the neighborhood 654 | for vLocal = 1:du 655 | v = uNeighbs[vLocal] 656 | if C[u,v] - F[u,v] > 0 657 | min_height = min(min_height, height[v]) 658 | height[u] = min_height + 1 659 | end 660 | end 661 | 662 | 663 | end 664 | 665 | # Push flow from an active node u to a node v via an admissible edge (u,v) 666 | function pushflow_fs!(C::SparseMatrixCSC{Float64,Int64},F::SparseMatrixCSC{Float64,Int64}, 667 | Queue::Vector{Int},u::Int64,v::Int64,excess::Array{Float64},height::Array{Int64}, 668 | inQ::Array{Bool},n::Int64) 669 | 670 | send = min(excess[u], C[u,v] - F[u,v]) 671 | F[u,v] += send 672 | F[v,u] -= send 673 | excess[u] -= send 674 | excess[v] += send 675 | 676 | # If v isn't in the queue, isn't the sink, is active, add it to the Queue 677 | if ~inQ[v] && v > 1 && height[v] < n 678 | prepend!(Queue,v) 679 | inQ[v] = true 680 | end 681 | end 682 | 683 | # From the adjacency matrix, build an adjacency list for the graph 684 | function ConstructAdj(C::SparseMatrixCSC{Float64,Int64},n::Int64) 685 | 686 | rp = C.rowval 687 | ci = C.colptr 688 | Neighbs = Vector{Vector{Int64}}() 689 | d = zeros(Int64,n) 690 | for i = 1:n 691 | # chop up the rp vector and put it in Neighbs 692 | push!(Neighbs,rp[ci[i]:ci[i+1]-1]) 693 | d[i] = ci[i+1]-ci[i] 694 | end 695 | 696 | # d is the number of neighbors. This is the unweighted degree, 697 | # but note importantly that if the original graph is weighted this is 698 | # not the same as the degree vector d we will sometimes use 699 | return Neighbs, d 700 | 701 | end 702 | 703 | # Given initial capacity matrix C and flow matrix F, compute the distance 704 | # from each node to the sink via residual edges. Distance = n means there is no 705 | # path to the sink. Sink node is assumed to be node 1. 706 | function relabeling_bfs_fs(C::SparseMatrixCSC{Float64,Int64},F::SparseMatrixCSC{Float64,Int64}) 707 | 708 | # To avoid subtraction cancellation errors that may have ocurred when pushing 709 | # flow, when computing a bfs we round edges to zero if they are under 1e-8 710 | Cf = round.((C-F),digits =6) 711 | n = size(Cf,1) 712 | 713 | rp = Cf.colptr 714 | ci = Cf.rowval 715 | 716 | N=length(rp)-1 717 | 718 | d = n*ones(Int64,N) 719 | sq=zeros(Int64,N) 720 | sqt=0 721 | sqh=0 # search queue and search queue tail/head 722 | 723 | # start bfs at the sink, which is node 1 724 | u = 1 725 | sqt=sqt+1 726 | sq[sqt]=u 727 | d[u]=0 728 | while sqt-sqh>0 729 | sqh=sqh+1 730 | v=sq[sqh] # pop v off the head of the queue 731 | for ri=rp[v]:rp[v+1]-1 732 | w=ci[ri] 733 | if d[w] > n-1 734 | sqt=sqt+1 735 | sq[sqt]=w 736 | d[w]= d[v]+1 737 | end 738 | end 739 | end 740 | 741 | return d 742 | end 743 | -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/src/.DS_Store -------------------------------------------------------------------------------- /src/Helper_Functions.jl: -------------------------------------------------------------------------------- 1 | using SparseArrays 2 | using MAT 3 | using MatrixNetworks 4 | using LinearAlgebra 5 | 6 | # Controlled growth from a seed set R in a hypergraph. Look at the one hop 7 | # neighborhood, and order all of those nodes by how many hyperedges they are in 8 | # that have nodes from R. Order that last, and take the top k 9 | function TopNeighbors(H::SparseMatrixCSC{Float64,Int64},R::Vector{Int64},R1hop::Vector{Int64},k::Int64) 10 | 11 | if length(R1hop) > k 12 | # Get all edges touching R 13 | HR = H[:,R] 14 | rp = HR.rowval 15 | edges = unique(rp) 16 | 17 | # Consider how many touch the 1-hop neighborhood 18 | HL = H[edges,R1hop] 19 | 20 | # For each node in R1hop, compute the number of edges it has that touch R 21 | d2R = vec(sum(HL,dims=1)) 22 | 23 | # order = sortperm(d2R, rev=true) 24 | b = partialsortperm(d2R, 1:k, rev=true) 25 | Rmore = R1hop[b] 26 | else 27 | Rmore = R1hop 28 | end 29 | 30 | return union(R, Rmore) 31 | end 32 | 33 | 34 | # Controlled growth from a seed set R in a hypergraph. Look at the one hop 35 | # neighborhood, and order all of those nodes by what percent of their 36 | # edges touch R. Order that, and take the top k 37 | function BestNeighbors(H::SparseMatrixCSC{Float64,Int64},d::Vector{Float64},R::Vector{Int64},R1hop::Vector{Int64},k::Int64) 38 | 39 | if length(R1hop) > k 40 | # Get all edges touching R 41 | HR = H[:,R] 42 | rp = HR.rowval 43 | edges = unique(rp) 44 | 45 | # Consider how many touch the 1-hop neighborhood 46 | HL = H[edges,R1hop] 47 | 48 | # For each node in R1hop, compute the number of edges it has that touch R 49 | d1 = d[R1hop] 50 | d2 = vec(sum(HL,dims=1)) 51 | 52 | # order = sortperm(d2R, rev=true) 53 | b = partialsortperm(d2./d1, 1:k, rev=true) 54 | Rmore = R1hop[b] 55 | else 56 | Rmore = R1hop 57 | end 58 | return union(R, Rmore) 59 | end 60 | 61 | 62 | # Simple function for returning ALL the indices where we find a maximum 63 | function findallmax(v) 64 | 65 | l = length(v) 66 | m = minimum(v) 67 | M = maximum(v) 68 | if M == m 69 | return collect(1:l) 70 | else 71 | Inds = Vector{Int64}() 72 | a,b = findmax(v) 73 | while a == M 74 | push!(Inds,b) 75 | v[b] = m 76 | a,b = findmax(v) 77 | end 78 | return Inds 79 | end 80 | 81 | end 82 | 83 | 84 | ## Delta-Linear (tl = thresholded linear) conductance computation. 85 | # e.g. tl_cond(H,S,d,delta,volA,order) 86 | function tl_cond(H::SparseMatrixCSC,S::Vector{Int64},d::Vector{Float64},delta::Float64,volA::Float64,order::Vector{Int64}) 87 | 88 | if volA == 0.0 89 | volA = sum(d) 90 | end 91 | n = length(d) 92 | volS = sum(d[S]) 93 | cut = tl_cut(H,S,delta,order) 94 | 95 | cond = cut/min(volS, volA-volS) 96 | 97 | return cond, volS, cut 98 | 99 | end 100 | 101 | ## Delta-Linear (thresholded linear) normalized Cut computation. 102 | # e.g. tl_ncut(H,S,d,delta,volA,order) 103 | function tl_ncut(H::SparseMatrixCSC,S::Vector{Int64},d::Vector{Float64},delta::Float64,volA::Float64,order::Vector{Int64}) 104 | 105 | if volA == 0.0 106 | volA = sum(d) 107 | end 108 | n = length(d) 109 | volS = sum(d[S]) 110 | cut = tl_cut(H,S,delta,order) 111 | 112 | cond = cut/min(volS, volA-volS) 113 | ncut = cut/(volS) + cut/(volA-volS) 114 | 115 | # rncut = round(Int64,ncut) 116 | # rcut = round(Int64,cut) 117 | # rcond = round(cond,digits = 4) 118 | # rvol = round(Int64,volS) 119 | 120 | return cond, ncut, volS, cut 121 | 122 | end 123 | 124 | # Thresholded linear cut value for a set 125 | # calling e.g. tl_cut(H,S,delta,order) 126 | function tl_cut(H::SparseMatrixCSC{Float64,Int64}, S::Vector{Int64}, delta::Float64,order::Vector{Int64}) 127 | 128 | # Check the cut 129 | HS = H[:,S] 130 | sumHS = sum(HS,dims = 2) # Count number of S nodes in each hyperedge 131 | inds = findall(x->x>0,sumHS) # Extract hyperedges with > 0 nodes from S 132 | ES = sumHS[inds] 133 | verts = order[inds] # Get the size of these hyperedges 134 | 135 | # Find number of nodes on small side of cut 136 | SmallSide = round.(Int64,min.(ES, verts-ES)) 137 | # Compute the cardinality-based cut score 138 | cutval = 0.0 139 | for j = 1:length(SmallSide) 140 | sm = SmallSide[j] 141 | if sm > 0 142 | if sm < delta 143 | cutval += sm 144 | else 145 | cutval += delta 146 | end 147 | end 148 | end 149 | 150 | return cutval 151 | end 152 | 153 | 154 | # For a set S in a hypergraph, return the hypergraph local conductance 155 | # score with thresholded linear splitting penalty 156 | # e.g. hlc_tl(H,order,R,S,d,volA,epsilon,delta) 157 | function hlc_tl(H::SparseMatrixCSC{Float64,Int64},order::Vector{Int64},R::Vector{Int64}, 158 | S::Vector{Int64},d::Vector{Float64},volA::Float64,epsilon::Float64, 159 | delta::Float64,) 160 | 161 | volS = sum(d[S]) 162 | RnS = intersect(R,S) 163 | volRnS = sum(d[RnS]) 164 | cut = tl_cut(H,S,delta,order) 165 | 166 | lcond = cut/((1+epsilon)*volRnS - epsilon*volS) 167 | 168 | return lcond 169 | 170 | end 171 | 172 | # Expand a hypergraph using the thresholded linear splitting function. 173 | # 174 | # Hyperedges = Hyperedge list 175 | # delta = TL splitting function parameter 176 | function tl_expansion(Hyperedges::Vector{Vector{Int64}}, order::Vector{Int64}, delta::Float64,n::Int64) 177 | 178 | BigEdges = length(findall(x->x>3,order)) 179 | N = n + 2*BigEdges 180 | 181 | ## Build the adjacency matrix 182 | ap = n+1 # "auxiliary node pointer", points to next "free" aux node 183 | 184 | # Build the sparse matrix 185 | U = Vector{Int64}() 186 | V = Vector{Int64}() 187 | vals = Vector{Float64}() 188 | 189 | for edge = Hyperedges 190 | nv = length(edge) 191 | if nv == 2 192 | i = edge[1]; j = edge[2] 193 | #A[i,j] += 1; A[j,i] += 1 194 | push!(U,i); push!(V,j); push!(vals,1) 195 | push!(U,j); push!(V,i); push!(vals,1) 196 | elseif nv == 3 197 | i = edge[1]; j = edge[2]; k = edge[3] 198 | # A[i,j] += 1/2; A[j,i] += 1/2 199 | # A[k,j] += 1/2; A[j,k] += 1/2 200 | # A[k,i] += 1/2; A[i,k] += 1/2 201 | push!(U,i); push!(V,j); push!(vals,1/2) 202 | push!(U,j); push!(V,i); push!(vals,1/2) 203 | push!(U,i); push!(V,k); push!(vals,1/2) 204 | push!(U,k); push!(V,i); push!(vals,1/2) 205 | push!(U,j); push!(V,k); push!(vals,1/2) 206 | push!(U,k); push!(V,j); push!(vals,1/2) 207 | else 208 | # We need to add auxiliary vertices 209 | for i = edge 210 | # A[i,auxpointer] = 1 211 | # A[auxpointer+1,i] = 1 212 | # A[auxpointer,auxpointer+1] = w2 213 | push!(U,i); push!(V,ap); push!(vals,1) 214 | push!(U,ap+1); push!(V,i); push!(vals,1) 215 | end 216 | push!(U,ap); push!(V,ap+1); push!(vals,delta) 217 | ap += 2 218 | end 219 | 220 | end 221 | @show maximum(U), maximum(V), N 222 | A = sparse(U,V,vals,N,N) 223 | return A 224 | end 225 | 226 | # Given an incidence matrix for a hypergraph and its transpose (having both 227 | # handy makes different parts of the code faster), and a set of nodes R, 228 | # return the immediate neighbors of R that don't include R itself 229 | function get_immediate_neighbors(H::SparseMatrixCSC{Float64,Int64}, 230 | Ht::SparseMatrixCSC{Float64,Int64},R::Vector{Int64}) 231 | 232 | Hr = H[:,R] 233 | rp_r = Hr.rowval 234 | R_edges = unique(rp_r) 235 | 236 | He = Ht[:,R_edges] 237 | rp_e = He.rowval 238 | Rneighbs = unique(rp_e) 239 | Rn = setdiff(Rneighbs,R) 240 | 241 | return Rn 242 | 243 | end 244 | 245 | function neighborhood(H::SparseMatrixCSC{Float64,Int64}, 246 | Ht::SparseMatrixCSC{Float64,Int64},R::Vector{Int64}) 247 | Hr = H[:,R] 248 | rp_r = Hr.rowval 249 | R_edges = unique(rp_r) 250 | 251 | He = Ht[:,R_edges] 252 | rp_e = He.rowval 253 | Rn = unique(rp_e) 254 | 255 | return Rn 256 | end 257 | 258 | function neighborlist(H::SparseMatrixCSC{Float64,Int64}, 259 | Ht::SparseMatrixCSC{Float64,Int64}) 260 | 261 | Neighbs = Dict() 262 | n = size(H,2) 263 | t1 = 0 264 | t2 = 0 265 | t3 = 0 266 | for i = 1:n 267 | # s = time() 268 | ivec = H[:,i] 269 | #n_edges = findnz(ivec)[1] 270 | n_edges = ivec.nzind # get neighboring edges 271 | # t1 += time()-s 272 | 273 | # s = time() 274 | He = Ht[:,n_edges] # nodes touching those edges 275 | rp_e = He.rowval 276 | neighbs_i = unique(rp_e) 277 | # t2 += time()-s 278 | 279 | # s = time() 280 | push!(Neighbs,neighbs_i) 281 | # t3 += time()-s 282 | end 283 | # @show t1, t2, t3 284 | return Neighbs 285 | end 286 | 287 | # Expand a hypergraph using the thresholded linear splitting function. 288 | # 289 | # H = |E| x |V| binary incidence matrix for the hypergraph 290 | # delta = TL splitting function parameter 291 | function tl_expansion_inc(H::SparseMatrixCSC{Float64,Int64}, order::Vector{Int64}, delta::Float64) 292 | 293 | n = size(H,2) 294 | BigEdges = length(findall(x->x>3,order)) 295 | N = n + 2*BigEdges 296 | 297 | Hyperedges = incidence2elist(H) 298 | 299 | ## Build the adjacency matrix 300 | ap = n+1 # "auxiliary node pointer", points to next "free" aux node 301 | 302 | # Build the sparse matrix 303 | U = Vector{Int64}() 304 | V = Vector{Int64}() 305 | vals = Vector{Float64}() 306 | 307 | for ee = 1:length(Hyperedges) 308 | edge = Hyperedges[ee] 309 | nv = length(edge) 310 | # if order[ee] != nv 311 | # @show ee, nv, order[ee], edge 312 | # end 313 | if nv == 1 314 | # ignore 315 | # println("This") 316 | elseif nv == 2 317 | i = edge[1]; j = edge[2] 318 | #A[i,j] += 1; A[j,i] += 1 319 | push!(U,i); push!(V,j); push!(vals,1) 320 | push!(U,j); push!(V,i); push!(vals,1) 321 | elseif nv == 3 322 | i = edge[1]; j = edge[2]; k = edge[3] 323 | # A[i,j] += 1/2; A[j,i] += 1/2 324 | # A[k,j] += 1/2; A[j,k] += 1/2 325 | # A[k,i] += 1/2; A[i,k] += 1/2 326 | push!(U,i); push!(V,j); push!(vals,1/2) 327 | push!(U,j); push!(V,i); push!(vals,1/2) 328 | push!(U,i); push!(V,k); push!(vals,1/2) 329 | push!(U,k); push!(V,i); push!(vals,1/2) 330 | push!(U,j); push!(V,k); push!(vals,1/2) 331 | push!(U,k); push!(V,j); push!(vals,1/2) 332 | else 333 | # We need to add auxiliary vertices 334 | for i = edge 335 | # A[i,auxpointer] = 1 336 | # A[auxpointer+1,i] = 1 337 | # A[auxpointer,auxpointer+1] = delta 338 | push!(U,i); push!(V,ap); push!(vals,1) 339 | push!(U,ap+1); push!(V,i); push!(vals,1) 340 | end 341 | push!(U,ap); push!(V,ap+1); push!(vals,delta) 342 | ap += 2 343 | end 344 | 345 | end 346 | # @show maximum(U), maximum(V), length(U), length(V), N, ap 347 | A = sparse(U,V,vals,N,N) 348 | return A 349 | end 350 | 351 | # Convert a hyperedge list to a hypergraph binary incidence matrix 352 | function incidence2elist(Hin::SparseMatrixCSC{Float64,Int64}) 353 | 354 | H = sparse(Hin') 355 | rp = H.rowval 356 | ci = H.colptr 357 | Hyperedges = Vector{Vector{Int64}}() 358 | n,m = size(H) 359 | 360 | for i = 1:m 361 | startedge = ci[i] 362 | endedge = ci[i+1]-1 363 | edge = rp[startedge:endedge] 364 | push!(Hyperedges,edge) 365 | end 366 | return Hyperedges 367 | end 368 | 369 | 370 | # Take a list of hyperedges and turn it into a hyperedge incidence matrix 371 | # H. N is the number of nodes in the hypergraph. 372 | # H(e,u) = 1 iff node u is in hyperedge e 373 | function elist2incidence(Hyperedges::Vector{Vector{Int64}}, N::Int64) 374 | U = Vector{Int64}() 375 | E = Vector{Int64}() 376 | M = length(Hyperedges) 377 | for enum = 1:length(Hyperedges) 378 | e = Hyperedges[enum] 379 | for node in e 380 | push!(U,node) 381 | push!(E,enum) 382 | end 383 | end 384 | 385 | H = sparse(E,U,ones(length(U)),M,N) 386 | return H 387 | end 388 | 389 | 390 | # This computes the precision, recall, and F1 score for a set Returned 391 | # compared against a Target set 392 | function PRF(Target,Returned) 393 | 394 | if length(Returned) == 0 395 | pr = 0; re = 0; F1 = 0 396 | else 397 | TruePos = intersect(Returned,Target) 398 | pr = length(TruePos)/length(Returned) 399 | re = length(TruePos)/length(Target) 400 | F1 = 2*(pr*re)/(pr+re) 401 | 402 | if length(TruePos) == 0 403 | F1 = 0 404 | end 405 | end 406 | 407 | return pr, re, F1 408 | 409 | end 410 | 411 | 412 | 413 | ## Given a binary incidence matrix H for a hypergraph, find the one-hop 414 | # neighborhood of a set of nodes S 415 | function hyper_neighborhood(H::SparseMatrixCSC{Float64,Int64},S::Vector{Int64}) 416 | 417 | A = H'*H 418 | n = size(A,1) 419 | for i = 1:n 420 | A[i,i] = 0 421 | end 422 | dropzeros!(A) 423 | return neighborhood(A,S,1) 424 | 425 | end 426 | 427 | ## Given a binary incidence matrix H for a hypergraph, find the one-hop 428 | # neighborhood of a set of nodes S, when considering only hyperedges with 429 | # a maximum number of M nodes 430 | function hyper_neighborhood(H::SparseMatrixCSC{Float64,Int64},S::Vector{Int64},order::Vector{Int64},M::Int64) 431 | 432 | good = findall(x->x<=M,order) 433 | H = H[good,:] 434 | 435 | ## 436 | A = H'*H 437 | n = size(A,1) 438 | for i = 1:n 439 | A[i,i] = 0 440 | end 441 | dropzeros!(A) 442 | return neighborhood(A,S,1) 443 | 444 | end 445 | 446 | ## Simple Clique Expansion 447 | # A[i,j] = number of hyperedges nodes i and j share 448 | function SimpleCliqueExp(H::SparseMatrixCSC{Float64,Int64}) 449 | 450 | A = H'*H 451 | for i = 1:size(A,1) 452 | A[i,i] = 0.0 453 | end 454 | dropzeros!(A) 455 | return A 456 | end 457 | 458 | ## Weighted Clique Expansion 459 | # When performing the clique expansion, for each hyperedge expanded into a 460 | # clique, multiply each edge in the expansion by 1/order(e) 461 | function WeightedCliqueExpansion(H::SparseMatrixCSC{Float64,Int64}, order::Vector{Int64}) 462 | 463 | m,n = size(H) 464 | I = Vector{Int64}() 465 | J = Vector{Int64}() 466 | vals = Vector{Float64}() 467 | Hyperedges = incidence2elist(H) 468 | for e = 1:m 469 | Edge = Hyperedges[e] 470 | Ord = order[e] 471 | for ii = 1:length(Edge) 472 | for jj = ii+1:length(Edge) 473 | i = Edge[ii] 474 | j = Edge[jj] 475 | push!(I,i); push!(J,j); push!(vals,1/Ord) 476 | end 477 | end 478 | if mod(e,10000)==0 479 | println("$e") 480 | end 481 | end 482 | 483 | A = sparse(I,J,vals,n,n) 484 | A = sparse(A+A') 485 | return A 486 | end 487 | -------------------------------------------------------------------------------- /src/HyperLocal.jl: -------------------------------------------------------------------------------- 1 | # Strongly-local code for minimizing the HCL objective 2 | # Implemented with the thresholded linear hyperedge splitting penalty. 3 | 4 | include("Helper_Functions.jl") 5 | include("maxflow.jl") 6 | 7 | """ 8 | HyperLocal: minimizes HLC with the thresholded-linear (TL) hypergraph cut 9 | function, with parameter delta. In other words, the splitting function 10 | penalty is min { |S| , |e - S|, delta}. 11 | Strongly-local time 12 | 13 | H: Binary indicence matrix for hypergraph 14 | Hyperedges: A list of hyperedges defining the hypergraph 15 | order: Order (number of nodes) in each hyperedge 16 | d: Degree vector, d[v] = number of hyperedges a node is in 17 | R: Set of nodes in seed/reference set 18 | epsilon: Locality parameter, must exceed vol(R)/vol(bar{R}) 19 | delta: Threshold cut penalty. 20 | """ 21 | function HyperLocal(H::SparseMatrixCSC{Float64,Int64},Ht::SparseMatrixCSC{Float64,Int64}, 22 | order::Vector{Int64},d::Vector{Float64},R::Vector{Int64}, 23 | epsilon::Float64, delta::Float64,Rs_local::Vector{Int64},localflag::Bool=true) 24 | 25 | m,n = size(H) 26 | 27 | volA = sum(d) 28 | volR = sum(d[R]) 29 | # @assert(volR <= volA/2) 30 | Rstrong = R[Rs_local] 31 | # Check Locality Parameter 32 | fR = volR/(volA - volR) 33 | @show fR, volR, volA 34 | if epsilon < fR 35 | println("Locality parameter epsilon was set too small. 36 | Setting it to lower bound of $fR. Computations will not be local.") 37 | epsilon = fR 38 | localflag = false 39 | end 40 | A = 0; N = 0; 41 | if localflag 42 | 43 | if volA*epsilon/volR < 10 44 | println("Note that vol(R)/epsilon = O(vol(G)). 45 | For these parameters \nit may be faster to run the algorithm 46 | without the locality setting.") 47 | end 48 | 49 | else 50 | A = tl_expansion_inc(H,order,delta) 51 | N = round(Int64,size(A,1)) 52 | end 53 | 54 | # Store useful sets 55 | # Rn = hyper_neighborhood(H,R) # get the immediate neighbors of R... 56 | # Rn = setdiff(Rn,R) # ...but we exclude R itself 57 | Rn = get_immediate_neighbors(H,Ht,R) 58 | Rc = setdiff(1:n,R) # Complement set of R 59 | nR = length(R) 60 | 61 | condR,volR, cutR = tl_cond(H,R,d,delta,volA,order) 62 | 63 | println("\nRunning HyperLocal") 64 | println("----------------------------------------") 65 | println("Epsilon = $epsilon \t Delta = $delta") 66 | println("|R| = $nR, cond(R) = $condR") 67 | println("-------------------------------------------------------------------------") 68 | 69 | S_best = R 70 | a_best = condR 71 | a_old = condR 72 | still_improving = true 73 | Iteration = 1 74 | while still_improving 75 | 76 | still_improving = false 77 | 78 | stepstart = time() 79 | if localflag 80 | S_new = HyperLocal_Step(H,Ht,order,R,Rn,a_best,epsilon,delta,d,Rs_local) 81 | else 82 | S_new = HLC_Step(A,R,Rc,a_best,epsilon,N,d,n,Rs_local) 83 | end 84 | stime = round(time()-stepstart,digits=1) 85 | 86 | a_new = hlc_tl(H,order,R,S_new,d,volA,epsilon,delta) 87 | 88 | if a_new < a_old 89 | still_improving = true 90 | S_best = S_new 91 | nS = length(S_best) 92 | a_old = a_new 93 | a_best = a_new 94 | println("Iter $Iteration: |S| = $nS, lcond(S) = $a_new, min-cut took $stime seconds") 95 | else 96 | println("Iter $Iteration: Algorithm converged. Last min-cut took $stime sec") 97 | println("-------------------------------------------------------------------------") 98 | end 99 | Iteration += 1 100 | end 101 | 102 | return S_best, a_best 103 | end 104 | 105 | 106 | # A non-local version of the min-cut code that works by calling the same 107 | # subroutine, but on the entire graph all at once 108 | function HLC_Step(A::SparseMatrixCSC{Float64,Int64},R::Vector{Int64},Rbar::Vector{Int64}, 109 | alpha::Float64, epsilon::Float64, N::Int64, d::Vector{Float64},n::Int64,Rs_local::Vector{Int64}) 110 | 111 | Rstrong = R[Rs_local] 112 | # Directly set up the flow matrix 113 | sVec = zeros(N) 114 | tVec = zeros(N) 115 | sVec[R] .= alpha*d[R] 116 | sVec[Rstrong] .= N^2 117 | tVec[Rbar] .= alpha*epsilon*d[Rbar] 118 | F = maxflow(A,sVec,tVec,0) 119 | Src = source_nodes_min(F)[2:end].-1 120 | S = intersect(1:n,Src) 121 | 122 | return S 123 | end 124 | 125 | # Strongly-local subroutine for computing a minimum s-t cut 126 | # This uses the thresholded linear splitting function for each hyperegde 127 | function HyperLocal_Step(H::SparseMatrixCSC{Float64,Int64},Ht::SparseMatrixCSC{Float64,Int64}, 128 | order::Vector{Int64}, R::Vector{Int64},Rn::Vector{Int64},alpha::Float64, 129 | epsilon::Float64,delta::Float64,d::Vector{Float64},Rs_local::Vector{Int64}) 130 | 131 | # Map from local node indices to global node indices 132 | Local2Global = [R; Rn] 133 | 134 | n = length(d) 135 | 136 | # Keep track of which nodes are in the local hypergraph L 137 | inL = zeros(Bool,n) 138 | inL[Local2Global] .= true 139 | 140 | # Number of nodes in the local graph 141 | Lsize = length(Local2Global) 142 | 143 | # Complete nodes = nodes whose hyperedge set in the local hypergraph 144 | # is the same as their global hyperedge set 145 | # Incomplete nodes = everything else in the local hypergraph 146 | # (must be a neighbor of a complete node) 147 | # 148 | # Initialize the complete set to be R 149 | # Incomplete set is R-complement 150 | C_global = R 151 | I_global = Rn 152 | 153 | # Indices, in the local graph, of complete and incomplete nodes 154 | C_local = collect(1:length(R)) 155 | I_local = collect(length(R)+1:Lsize) 156 | R_local = collect(1:length(R)) 157 | Rstrong_local = R_local[Rs_local] 158 | 159 | # Get the set of hyperedges to expand around. 160 | # At first this is every hyperedge that touches 161 | # a node from R. 162 | Hc = H[:,C_global] 163 | rp_c = Hc.rowval 164 | # ci_c = Hc.colptr 165 | L_edges = unique(rp_c) 166 | 167 | # Binary indicence matrix for the local hypergraph (without terminal edges) 168 | HL = H[L_edges,Local2Global] 169 | order_L = order[L_edges] 170 | 171 | # Expand into a directed graph 172 | A_L = tl_expansion_inc(HL,order_L,delta) 173 | N_L = size(A_L,1) # includes auxiliary nodes 174 | n_L = length(Local2Global) # number of non-auxiliary nodes in A_L 175 | 176 | # Find the first mincut, which can be done by calling HLC_Step 177 | # with localized objects 178 | S_local = HLC_Step(A_L,C_local,I_local,alpha,epsilon,N_L,d[Local2Global],n_L,Rstrong_local) 179 | 180 | # Find nodes to "expand" around: 181 | # any nodes in the cut set tha are "incomplete" still 182 | E_local = intersect(S_local,I_local) 183 | E_global = Local2Global[E_local] 184 | 185 | # ne = length(E_global) 186 | # println("There are $ne new nodes to expand on") 187 | 188 | # As long as we have new nodes to expand around, we haven't yet found 189 | # the global minimum s-t cut, so we continue. 190 | while length(E_local) > 0 191 | 192 | # Update which nodes are complete and which are incomplete 193 | C_local = [C_local; E_local] 194 | C_global = Local2Global[C_local] 195 | 196 | # Take these away from I_local 197 | I_local = setdiff(I_local,E_local) 198 | 199 | # This is better 200 | Nbs_of_E = get_immediate_neighbors(H,Ht,E_global) 201 | Lnew = setdiff(Nbs_of_E,Local2Global) 202 | numNew = length(Lnew) 203 | # Update the set of indices in L 204 | Local2Global = [Local2Global; Lnew] 205 | 206 | # Store local indices for new nodes added to L 207 | Lnew_local = collect((Lsize+1):(Lsize+numNew)) 208 | Lsize = length(Local2Global) 209 | 210 | # These are going to be "incomplete" nodes 211 | I_local = [I_local; Lnew_local] 212 | I_global = Local2Global[I_local] 213 | 214 | # Now we have a new set of complete and incomplete edges, 215 | # we do the same thing over again to find a localize min-cut 216 | Hc = H[:,C_global] 217 | rp_c = Hc.rowval 218 | # ci_c = Hc.colptr 219 | L_edges = unique(rp_c) 220 | 221 | # Binary indicence matrix for the local hypergraph (without terminal edges) 222 | HL = H[L_edges,Local2Global] 223 | order_L = order[L_edges] 224 | 225 | # Expand into a directed graph 226 | A_L = tl_expansion_inc(HL,order_L,delta) 227 | N_L = size(A_L,1) # includes auxiliary nodes 228 | n_L = length(Local2Global) # number of non-auxiliary nodes in A_L 229 | 230 | # Find the first mincut, which can be done by calling HLC_Step 231 | # with localized objects 232 | R_bar_l = setdiff(1:n_L,R_local) 233 | S_local = HLC_Step(A_L,R_local,R_bar_l,alpha,epsilon,N_L,d[Local2Global],n_L,Rstrong_local) 234 | 235 | # Find nodes to "expand" around: 236 | # any nodes in the cut set tha are "incomplete" still 237 | E_local = intersect(S_local,I_local) 238 | E_global = Local2Global[E_local] 239 | # ne = length(E_global) 240 | # println("There are $ne new nodes to expand on") 241 | end 242 | 243 | return Local2Global[S_local] 244 | end 245 | -------------------------------------------------------------------------------- /src/maxflow.jl: -------------------------------------------------------------------------------- 1 | using MatrixNetworks 2 | using SparseArrays 3 | 4 | # Push Relabel solver for maximum s-t flow, minimum s-t cut problems 5 | 6 | mutable struct stFlow 7 | flowvalue::Float64 # gives you the max-flow value 8 | cutvalue::Float64 # gives min-cut value, which should equal flowvalue, 9 | # but may differ by a small tolerance value. 10 | source_nodes::Vector{Int64} # give the indices of the nodes attached to the source 11 | C::SparseMatrixCSC # gives the original capacity matrix 12 | F::SparseMatrixCSC # gives the values of the flows on each edge 13 | s::Int64 # index of source node 14 | t::Int64 # index of sink node 15 | end 16 | 17 | """ 18 | maxflow 19 | 20 | Given a sparse matrix A representing a weighted and possibly directed graph, 21 | a source node s, and a sink node t, return the maximum s-t flow. 22 | 23 | flowtol = tolerance parameter for whether there is still capacity available on 24 | an edge. Helps avoid rounding errors. Default is 1e-6. 25 | 26 | Returns F, which is of type stFlow. 27 | """ 28 | function maxflow(B::Union{SparseMatrixCSC,MatrixNetwork},s::Int,t::Int, flowtol::Union{Float64,Int}= 1e-6) 29 | 30 | if flowtol >= .1 31 | println("flowtol is a tolerance parameter for rounding small residual capacity edges to zero, and should be much smaller than $flowtol. Changing it to default value 1e-6") 32 | flowtol = 1e-6 33 | end 34 | 35 | # The code actually assumes a SparseMatrixCSC input 36 | if typeof(B) <: SparseMatrixCSC 37 | else 38 | B = sparse(B) 39 | end 40 | 41 | N = size(B,1) 42 | 43 | # Extract weights from source s to non-terminal nodes, 44 | # and from non-terminal nodes to sink node t 45 | sWeights = Array(B[s,:]) 46 | tWeights = Array(B[:,t]) 47 | NonTerminal = setdiff(collect(1:N),[s t]) 48 | 49 | sWeights = sWeights[NonTerminal] 50 | tWeights = tWeights[NonTerminal] 51 | 52 | # Extract the edges between non-terminal nodes 53 | A = B[NonTerminal,NonTerminal] 54 | 55 | # A = the matrix of capacities for all nodes EXCEPT the source and sink 56 | # sWeights = a vector of weights for edges from source to non-terminal nodes 57 | # tWeights = vector of weights from non-terminal nodes to the sink node t. 58 | 59 | # This is the map from the original node indices to the rearranged 60 | # version in which the source is the first node and the sink is the last 61 | Map = [s; NonTerminal; t] 62 | 63 | # Directly set up the flow matrix 64 | C = [spzeros(1,1) sparse(sWeights') spzeros(1,1); 65 | sparse(sWeights) A sparse(tWeights); 66 | spzeros(1,1) sparse(tWeights') spzeros(1,1)] 67 | 68 | # Allocate space for the flow we will calculate 69 | # In a flow problem, we will eventually need to send flow the reverse 70 | # direction, so it's important to allocate space for F[i,j] if C[j,i] is an 71 | # edge, even if C[i,j] is not directed 72 | Cundir = C+C' 73 | F = SparseMatrixCSC(N,N,Cundir.colptr,Cundir.rowval,zeros(length(Cundir.rowval))) 74 | ExcessNodes = vec(round.(Int64,findall(x->x!=0,sWeights).+1)) 75 | 76 | # Initialize the Preflow and the excess vector 77 | for v = ExcessNodes 78 | F[1,v] = C[1,v] 79 | F[v,1] = -C[1,v] 80 | end 81 | excess = [0;sWeights;0] 82 | source_nodes, FlowMat, value = Main_Push_Relabel(C,F,ExcessNodes,excess,flowtol) 83 | 84 | smap = sortperm(Map) 85 | F = stFlow(value, value, sort(Map[source_nodes]),C[smap,smap],FlowMat[smap,smap],s,t) 86 | return F 87 | end 88 | 89 | """ 90 | This maxflow code assumes that A represents the adjacencies between 91 | non-terminal nodes. Edges adjecent to source node s and sink node t 92 | are given by vectors svec and tvec. 93 | 94 | This code sets s as the first node, and t as the last node. 95 | """ 96 | function maxflow(A::Union{SparseMatrixCSC,MatrixNetwork},svec::Vector{Float64},tvec::Vector{Float64}, flowtol::Union{Float64,Int}= 1e-6) 97 | 98 | if flowtol >= .1 99 | println("flowtol is a tolerance parameter for rounding small residual capacity edges to zero, and should be much smaller than $flowtol. Changing it to default value 1e-6") 100 | flowtol = 1e-6 101 | end 102 | if typeof(A) <: SparseMatrixCSC 103 | else 104 | A = sparse(A) 105 | end 106 | 107 | 108 | # Directly set up the flow matrix 109 | C = [spzeros(1,1) sparse(svec') spzeros(1,1); 110 | sparse(svec) A sparse(tvec); 111 | spzeros(1,1) sparse(tvec') spzeros(1,1)] 112 | 113 | N = size(C,1) 114 | 115 | # Allocate space for the flow we will calculate 116 | # In a flow problem, we will eventually need to send flow the reverse 117 | # direction, so it's important to allocate space for F[i,j] if C[j,i] is an 118 | # edge, even if C[i,j] is not directed. 119 | Cundir = C+C' 120 | F = SparseMatrixCSC(N,N,Cundir.colptr,Cundir.rowval,zeros(length(Cundir.rowval))) 121 | ExcessNodes = vec(round.(Int64,findall(x->x!=0,svec).+1)) 122 | 123 | # Initialize the Preflow and the excess vector 124 | for v = ExcessNodes 125 | F[1,v] = C[1,v] 126 | F[v,1] = -C[1,v] 127 | end 128 | excess = [0;svec;0] 129 | source_nodes, FlowMat, value = Main_Push_Relabel(C,F,ExcessNodes,excess,flowtol) 130 | 131 | F = stFlow(value,value,source_nodes,C,FlowMat,1,N) 132 | end 133 | 134 | maxflow(A::Union{SparseMatrixCSC,MatrixNetwork},svec::Vector{Int64},tvec::Vector{Int64},flowtol::Union{Float64,Int}= 1e-6) = 135 | maxflow(A,float(svec),float(tvec),flowtol) 136 | 137 | 138 | flow(F::stFlow) = 139 | F.flowvalue 140 | 141 | """ 142 | Given a flow, stored in an stFlow object, return the set of nodes attached to 143 | the source 144 | """ 145 | function source_nodes(F::stFlow,flowtol::Union{Float64,Int}= 1e-6) 146 | # Run a bfs from the sink node. Anything with distance 147 | # n is disconnected from the sink. Thus it's part of the minimium cut set 148 | n = size(F.C,2) 149 | finalHeight = relabeling_bfs(F.C,F.F,flowtol, F.t) 150 | S = Vector{Int64}() 151 | for i = 1:n 152 | if finalHeight[i] == n 153 | push!(S,i) 154 | end 155 | end 156 | 157 | # Sanity checks: source node is on source side, sink node is on sink side 158 | @assert(~in(F.t,S)) 159 | @assert(in(F.s,S)) 160 | 161 | return S 162 | end 163 | 164 | # Get the smallest source-side set 165 | function source_nodes_min(F::stFlow,flowtol::Union{Float64,Int}= 1e-6) 166 | # Run a bfs from the source node. Anything with distance 167 | # 0 296 | 297 | u = pop!(Queue) # Select a new active node 298 | 299 | inQ[u] = false # Take it out of the queue 300 | 301 | # discharge flow through node u 302 | relabelings += discharge!(C,F,Queue,u,Neighbs[u],height,excess,n,d[u],inQ,flowtol) 303 | 304 | # if u is still active, put it back into the queue 305 | if excess[u] > flowtol 306 | prepend!(Queue,u) 307 | inQ[u] = true 308 | end 309 | 310 | # Global relabeling heuristic for push-relabel algorithm. 311 | # This periodically recomputes distances between nodes and the sink 312 | if relabelings == n 313 | relabelings = 0 314 | dist = relabeling_bfs(C,F,flowtol) 315 | height = dist 316 | end 317 | 318 | end 319 | 320 | # Compute final distances from sink using BFS. Anything with distance 321 | # n is disconnected from the sink. Thus it's part of the minimium cut set 322 | finalHeight = relabeling_bfs(C,F,flowtol,n) 323 | S = Vector{Int64}() 324 | push!(S,1) # Include the source node 325 | for i = 2:n 326 | if finalHeight[i] == n 327 | push!(S,i) 328 | end 329 | end 330 | 331 | mflow = excess[n] # the excess at the sink equals the maximum flow value 332 | 333 | return S, F, mflow 334 | 335 | end 336 | 337 | # Discharege operation: pushes flow away from node u across admissible edges. 338 | # If excess[u] > 0 but no admissible edges exist, we relabel u. 339 | function discharge!(C::SparseMatrixCSC,F::SparseMatrixCSC, 340 | Queue::Vector{Int64},u::Int64,uNeighbs::Array{Int64},height::Array{Int64}, 341 | excess::Array{Float64},n::Int64,du::Int64,inQ::Array{Bool}, 342 | flowtol::Union{Float64,Int}= 1e-6) 343 | 344 | vLocal::Int64 = 1 # Start at the first neighbor of node u 345 | hu = height[u] 346 | relabeled = 0 347 | 348 | # As long as there is excess at node u and there is another neighbor to explore... 349 | while excess[u] > flowtol && vLocal <= du 350 | 351 | # ...grab the next neighbor of node u 352 | v = uNeighbs[vLocal] 353 | 354 | # ... if edge (u,v) is admissible, push more flow. 355 | # Otherwise, move to the next neighbor of u 356 | if hu > height[v] && C[u,v] - F[u,v] > flowtol 357 | pushflow!(C,F,Queue,u,v,excess,height,inQ,n) 358 | vLocal += 1 359 | else 360 | vLocal += 1 361 | end 362 | end 363 | 364 | # if we needed to visit every neighbor of u, we must relabel u, 365 | # so that at least one admissible edge is created 366 | if vLocal > du 367 | relabeled = 1 368 | relabel!(C,F,Queue,u,uNeighbs,height,du,n,flowtol) 369 | end 370 | 371 | return relabeled 372 | end 373 | 374 | # Relabel sets the label/height of node u to be equal to the minimum label 375 | # such that an admissible edge exists. An edge (u,v) is admissible if 376 | # height[u] = height[v] + 1 377 | function relabel!(C::SparseMatrixCSC,F::SparseMatrixCSC, 378 | Queue::Vector{Int64},u::Int64,uNeighbs::Array{Int64},height::Array{Int64}, 379 | du::Int64,n::Int64,flowtol::Union{Float64,Int}= 1e-6) 380 | # find smallest new height making a push possible, if such a push is possible 381 | 382 | min_height = Inf 383 | # search through the neighbors of u 384 | # and relabel so that height[u] = height[v] + 1 for some v in the neighborhood 385 | for vLocal = 1:du 386 | v = uNeighbs[vLocal] 387 | if C[u,v] - F[u,v] > flowtol 388 | min_height = min(min_height, height[v]) 389 | height[u] = min_height + 1 390 | end 391 | end 392 | 393 | end 394 | 395 | # Push flow from an active node u to a node v via an admissible edge (u,v) 396 | function pushflow!(C::SparseMatrixCSC,F::SparseMatrixCSC, 397 | Queue::Vector{Int},u::Int64,v::Int64,excess::Array{Float64},height::Array{Int64}, 398 | inQ::Array{Bool},n::Int64) 399 | 400 | send = min(excess[u], C[u,v] - F[u,v]) 401 | F[u,v] += send 402 | F[v,u] -= send 403 | excess[u] -= send 404 | excess[v] += send 405 | 406 | # If v isn't in the queue, isn't the sink, isn't the source, 407 | # and is active, then add it to the Queue 408 | if ~inQ[v] && v < n && v > 1 409 | prepend!(Queue,v) 410 | inQ[v] = true 411 | end 412 | end 413 | 414 | # From the adjacency matrix, build an adjacency list for the graph 415 | function ConstructAdj(C::SparseMatrixCSC,n::Int64) 416 | rp = C.rowval 417 | ci = C.colptr 418 | Neighbs = Vector{Vector{Int64}}() 419 | d = zeros(Int64,n) 420 | for i = 1:n 421 | # chop up the rp vector and put it in Neighbs 422 | push!(Neighbs,rp[ci[i]:ci[i+1]-1]) 423 | d[i] = ci[i+1]-ci[i] 424 | end 425 | 426 | # d is the number of neighbors. This is the unweighted degree, 427 | # but note importantly that if the original graph is weighted this is 428 | # not the same as the degree vector d we will sometimes use 429 | return Neighbs, d 430 | 431 | end 432 | 433 | # Given initial capacity matrix C and flow matrix F, compute the distance 434 | # from each node to the specified "start" node. 435 | # Start defaults to node n, which is assumed to be the sink node 436 | function relabeling_bfs(C::SparseMatrixCSC,F::SparseMatrixCSC,flowtol::Union{Float64,Int}=1e-6,start::Int64=0) 437 | 438 | if flowtol >= .1 439 | println("flowtol is a tolerance parameter for rounding small residual capacity edges to zero, and should be much smaller than $flowtol. Changing it to default value 1e-6") 440 | flowtol = 1e-6 441 | end 442 | 443 | # To avoid subtraction cancellation errors that may have ocurred when pushing 444 | # flow, when computing a bfs, round edges to zero if they are under 445 | # a certain tolerance 446 | Cf = C-F 447 | Cf = Cf.*(Cf.>flowtol) 448 | n = size(Cf,1) 449 | 450 | if start == 0 451 | start = n 452 | end 453 | 454 | rp = Cf.colptr 455 | ci = Cf.rowval 456 | 457 | N=length(rp)-1 458 | 459 | d = n*ones(Int64,N) 460 | sq=zeros(Int64,N) 461 | sqt=0 462 | sqh=0 # search queue and search queue tail/head 463 | 464 | # start bfs at the node "start" 465 | u = start 466 | sqt=sqt+1 467 | sq[sqt]=u 468 | d[u]=0 469 | while sqt-sqh>0 470 | sqh=sqh+1 471 | v=sq[sqh] # pop v off the head of the queue 472 | for ri=rp[v]:rp[v+1]-1 473 | w=ci[ri] 474 | if d[w] > n-1 475 | sqt=sqt+1 476 | sq[sqt]=w 477 | d[w]= d[v]+1 478 | end 479 | end 480 | end 481 | 482 | return d 483 | end 484 | --------------------------------------------------------------------------------