├── Exp-Amazon
    ├── .DS_Store
    ├── Largest_Experiments.jl
    ├── Medium_Experiments.jl
    ├── Output
    │   ├── Large_200_10000_1.0.mat
    │   ├── Medium_50_2000_1.0.mat
    │   ├── Medium_50_3000_1.0.mat
    │   ├── Smallest_10_200_0.001.mat
    │   ├── Smallest_10_200_0.01.mat
    │   ├── Smallest_10_200_0.1.mat
    │   ├── Smallest_10_200_1.0.mat
    │   ├── Smallest_10_200_10.0.mat
    │   ├── Smallest_10_300_0.001.mat
    │   ├── Smallest_10_300_0.01.mat
    │   ├── Smallest_10_300_0.1.mat
    │   ├── Smallest_10_300_1.0.mat
    │   ├── Smallest_10_300_10.0.mat
    │   ├── Smallest_10_500_0.001.mat
    │   ├── Smallest_10_500_0.01.mat
    │   ├── Smallest_10_500_0.1.mat
    │   ├── Smallest_10_500_1.0.mat
    │   ├── Smallest_10_500_10.0.mat
    │   ├── Smallest_5_200_0.001.mat
    │   ├── Smallest_5_200_0.01.mat
    │   ├── Smallest_5_200_0.1.mat
    │   ├── Smallest_5_200_1.0.mat
    │   ├── Smallest_5_200_10.0.mat
    │   ├── Smallest_5_300_0.001.mat
    │   ├── Smallest_5_300_0.01.mat
    │   ├── Smallest_5_300_0.1.mat
    │   ├── Smallest_5_300_1.0.mat
    │   ├── Smallest_5_300_10.0.mat
    │   ├── Smallest_5_500_0.001.mat
    │   ├── Smallest_5_500_0.01.mat
    │   ├── Smallest_5_500_0.1.mat
    │   ├── Smallest_5_500_1.0.mat
    │   └── Smallest_5_500_10.0.mat
    ├── Output_VaryDelta
    │   ├── .DS_Store
    │   ├── Label_12_10_200.mat
    │   ├── Label_15_200_10000.mat
    │   ├── Label_17_50_2000.mat
    │   ├── Label_18_50_2000.mat
    │   ├── Label_1_10_200.mat
    │   ├── Label_24_200_10000.mat
    │   ├── Label_25_200_10000.mat
    │   ├── Label_2_10_200.mat
    │   └── Label_3_10_200.mat
    ├── Output_VaryEps
    │   ├── .DS_Store
    │   ├── Label_12_10_300.mat
    │   ├── Label_18_10_300.mat
    │   ├── Label_1_10_300.mat
    │   ├── Label_2_10_300.mat
    │   └── Label_3_10_300.mat
    ├── Plots
    │   ├── .DS_Store
    │   ├── AmazonVaryDelta.pdf
    │   └── Smallest_VaryEps_seednum_10_grownum_300.pdf
    ├── Plots_Vary_Delta.jl
    ├── Plots_Vary_Epsilon.jl
    ├── Print_to_Table_F1_T_run.jl
    ├── Run_Experiments.jl
    ├── Smallest_Experiments.jl
    ├── VaryDelta_Experimentsl.jl
    └── VaryEpsilon_Experiments.jl
├── Exp-Stackoverflow
    ├── .DS_Store
    ├── Clique_Expansion_Stack.jl
    ├── Output_Stack
    │   └── Set45_1.0_5000.0.mat
    ├── Output_VaryDelta
    │   ├── .DS_Store
    │   ├── Cluster_22943_10000_100_1.0.mat
    │   ├── Cluster_25849_10000_100_1.0.mat
    │   ├── Cluster_27596_10000_100_1.0.mat
    │   ├── Cluster_28886_10000_100_1.0.mat
    │   ├── Cluster_28918_10000_100_1.0.mat
    │   ├── Cluster_29386_10000_100_1.0.mat
    │   ├── Cluster_43507_10000_100_1.0.mat
    │   └── Cluster_5713_10000_100_1.0.mat
    ├── Plots
    │   ├── .DS_Store
    │   └── StackDots_Wide.pdf
    ├── Plots_Stack_Experiments.jl
    ├── Plots_VaryDeltaStack.jl
    ├── Stack_LargeDelta_Experiments.jl
    └── Stack_LargeDelta_Table.jl
├── LICENSE
├── README.txt
├── data
    └── Amazon-Clusters.txt
├── include
    └── FlowSeed.jl
└── src
    ├── .DS_Store
    ├── Helper_Functions.jl
    ├── HyperLocal.jl
    └── maxflow.jl


/Exp-Amazon/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/.DS_Store


--------------------------------------------------------------------------------
/Exp-Amazon/Largest_Experiments.jl:
--------------------------------------------------------------------------------
  1 | labels = [15; 24]
  2 | lnum = length(labels)
  3 | 
  4 | # See outer parameters
  5 | delta = 1.0   # all-or-nothing cut
  6 | seednum = 200
  7 | ntimes = 5
  8 | epsis = [1.0]
  9 | grownum = 10000
 10 | 
 11 | for e = 1:length(epsis)
 12 | 
 13 |     # Output from HyperLocal
 14 |     hl_pr = zeros(lnum,ntimes)
 15 |     hl_re = zeros(lnum,ntimes)
 16 |     hl_f1 = zeros(lnum,ntimes)
 17 |     hl_time = zeros(lnum,ntimes)
 18 |     hl_size = zeros(lnum,ntimes)
 19 |     newS = zeros(lnum,ntimes)
 20 |     hl_cond = zeros(lnum,ntimes)
 21 | 
 22 |     # Output from first baseline 1
 23 |     b1_pr = zeros(lnum,ntimes)
 24 |     b1_re = zeros(lnum,ntimes)
 25 |     b1_f1 = zeros(lnum,ntimes)
 26 |     b1_cond = zeros(lnum,ntimes)
 27 | 
 28 |     # Output from baseline 2
 29 |     b2_pr = zeros(lnum,ntimes)
 30 |     b2_re = zeros(lnum,ntimes)
 31 |     b2_f1 = zeros(lnum,ntimes)
 32 |     b2_cond = zeros(lnum,ntimes)
 33 | 
 34 |     # Keep track of R
 35 |     r_pr = zeros(lnum,ntimes)
 36 |     r_re = zeros(lnum,ntimes)
 37 |     r_f1 = zeros(lnum,ntimes)
 38 |     r_cond = zeros(lnum,ntimes)
 39 | 
 40 |     # For each epsilon we store a different matrix of outputs
 41 |     epsilon = epsis[e]
 42 |     outputmat = "Output/Large_$seednum"*"_$grownum"*"_$epsilon.mat"
 43 |     println(outputmat)
 44 | 
 45 |     # For a fixed epsilon, seednum, and grownum, run experiments
 46 |     # on each cluster multiple times
 47 |     for lab = 1:length(labels)
 48 |         label = labels[lab]
 49 |         T = findall(x->x ==label,NodeLabels)
 50 |         nT = length(T)
 51 | 
 52 |         for index = 1:ntimes
 53 | 
 54 |             # Generate a new seed set
 55 |             p = randperm(nT)
 56 |             Rstart = T[p[1:seednum]]
 57 |             OneHop = get_immediate_neighbors(H,Ht,Rstart)
 58 |             Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum)
 59 |             R = union(Rmore,Rstart)
 60 |             Rs = findall(x->in(x,Rstart),R)     # Force seed nodes to be in output set
 61 |             prr, rer, f1r = PRF(T,R)
 62 |             r_pr[index] = prr
 63 |             r_re[index] = rer
 64 |             r_f1[index] = f1r
 65 |             condR, volR, cutR = tl_cond(H,R,d,1.0,volA,order)
 66 |             r_cond[index] = condR
 67 | 
 68 |             # Run HyperLocal
 69 |             s = time()
 70 |             S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true)
 71 |             hl_time[lab,index] = time()-s
 72 |             condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order)
 73 |             pr, re, f1 = PRF(T,S)
 74 |             hl_pr[lab,index] = pr
 75 |             hl_re[lab,index] = re
 76 |             hl_f1[lab,index] = f1
 77 |             hl_size[lab,index] = length(S)
 78 |             hl_cond[lab,index] = condS
 79 |             nS =  length(setdiff(S,R))
 80 |             newS[lab,index] = nS
 81 | 
 82 |             # First baseline
 83 |             kS = nT-length(Rstart)
 84 |             B1 = BestNeighbors(H,d,Rstart,OneHop,kS)
 85 |             pr1, re1, f11 = PRF(T,B1)
 86 |             b1_pr[lab,index] = pr1
 87 |             b1_re[lab,index] = re1
 88 |             b1_f1[lab,index] = f11
 89 |             cond, vol, cut = tl_cond(H,B1,d,1.0,volA,order)
 90 |             b1_cond[index] = cond
 91 | 
 92 | 
 93 |             # Baseline 2
 94 |             B2 = TopNeighbors(H,Rstart,OneHop,kS)
 95 |             pr2, re2, f12 = PRF(T,B2)
 96 |             b2_pr[lab,index] = pr2
 97 |             b2_re[lab,index] = re2
 98 |             b2_f1[lab,index] = f12
 99 |             cond, vol, cut = tl_cond(H,B2,d,1.0,volA,order)
100 |             b2_cond[index] = cond
101 | 
102 |             println("$label ($nT): $f11 \t $f12 \t $f1 \t $nS")
103 |         end
104 | 
105 |         matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time,
106 |         "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond,
107 |         "b1_pr"=>b1_pr, "b1_re"=>b1_re, "b1_f1"=>b1_f1,"b1_cond"=>b1_cond,"b2_cond"=>b2_cond,
108 |         "r_pr"=>r_pr, "r_re"=>r_re, "r_f1"=>r_f1, "r_cond"=>r_cond,
109 |         "b2_pr"=>b2_pr, "b2_re"=>b2_re, "b2_f1"=>b2_f1))
110 | 
111 |     end
112 | end
113 | 


--------------------------------------------------------------------------------
/Exp-Amazon/Medium_Experiments.jl:
--------------------------------------------------------------------------------
  1 | labels = [17; 25]
  2 | lnum = length(labels)
  3 | 
  4 | # See outer parameters
  5 | seednum = 50
  6 | ntimes = 5
  7 | epsis = [1.0]
  8 | delta = 1.0   # stick with the all-or-nothing cut
  9 | 
 10 | for grownum = [2000 3000]
 11 | 
 12 |     for e = 1:length(epsis)
 13 | 
 14 |         # Output from HyperLocal
 15 |         hl_pr = zeros(lnum,ntimes)
 16 |         hl_re = zeros(lnum,ntimes)
 17 |         hl_f1 = zeros(lnum,ntimes)
 18 |         hl_time = zeros(lnum,ntimes)
 19 |         hl_size = zeros(lnum,ntimes)
 20 |         newS = zeros(lnum,ntimes)
 21 |         hl_cond = zeros(lnum,ntimes)
 22 | 
 23 |         # Output from first baseline 1
 24 |         b1_pr = zeros(lnum,ntimes)
 25 |         b1_re = zeros(lnum,ntimes)
 26 |         b1_f1 = zeros(lnum,ntimes)
 27 |         b1_cond = zeros(lnum,ntimes)
 28 | 
 29 |         # Output from baseline 2
 30 |         b2_pr = zeros(lnum,ntimes)
 31 |         b2_re = zeros(lnum,ntimes)
 32 |         b2_f1 = zeros(lnum,ntimes)
 33 |         b2_cond = zeros(lnum,ntimes)
 34 | 
 35 |         # Keep track of R
 36 |         r_pr = zeros(ntimes)
 37 |         r_re = zeros(ntimes)
 38 |         r_f1 = zeros(ntimes)
 39 |         r_cond = zeros(ntimes)
 40 | 
 41 |         # For each epsilon we store a different matrix of outputs
 42 |         epsilon = epsis[e]
 43 |         outputmat = "Output/Medium_$seednum"*"_$grownum"*"_$epsilon.mat"
 44 |         println(outputmat)
 45 | 
 46 |         # For a fixed epsilon, seednum, and grownum, run experiments
 47 |         # on each cluster multiple times
 48 |         for lab = 1:length(labels)
 49 |             label = labels[lab]
 50 |             T = findall(x->x ==label,NodeLabels)
 51 |             nT = length(T)
 52 | 
 53 |             for index = 1:ntimes
 54 | 
 55 |                 # Generate a new seed set
 56 |                 p = randperm(nT)
 57 |                 Rstart = T[p[1:seednum]]
 58 |                 OneHop = get_immediate_neighbors(H,Ht,Rstart)
 59 |                 Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum)
 60 |                 R = union(Rmore,Rstart)
 61 |                 Rs = findall(x->in(x,Rstart),R)     # Force seed nodes to be in output set
 62 |                 prr, rer, f1r = PRF(T,R)
 63 |                 r_pr[index] = prr
 64 |                 r_re[index] = rer
 65 |                 r_f1[index] = f1r
 66 |                 condR, volR, cutR = tl_cond(H,R,d,1.0,volA,order)
 67 |                 r_cond[index] = condR
 68 | 
 69 |                 # Run HyperLocal
 70 |                 s = time()
 71 |                 S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true)
 72 |                 hl_time[lab,index] = time()-s
 73 |                 condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order)
 74 |                 pr, re, f1 = PRF(T,S)
 75 |                 hl_pr[lab,index] = pr
 76 |                 hl_re[lab,index] = re
 77 |                 hl_f1[lab,index] = f1
 78 |                 hl_size[lab,index] = length(S)
 79 |                 hl_cond[lab,index] = condS
 80 |                 nS =  length(setdiff(S,R))
 81 |                 newS[lab,index] = nS
 82 | 
 83 |                 # First baseline
 84 |                 kS = nT-length(Rstart)
 85 |                 B1 = BestNeighbors(H,d,Rstart,OneHop,kS)
 86 |                 pr1, re1, f11 = PRF(T,B1)
 87 |                 b1_pr[lab,index] = pr1
 88 |                 b1_re[lab,index] = re1
 89 |                 b1_f1[lab,index] = f11
 90 |                 cond, vol, cut = tl_cond(H,B1,d,1.0,volA,order)
 91 |                 b1_cond[index] = cond
 92 | 
 93 |                 # Baseline 2
 94 |                 B2 = TopNeighbors(H,Rstart,OneHop,kS)
 95 |                 pr2, re2, f12 = PRF(T,B2)
 96 |                 b2_pr[lab,index] = pr2
 97 |                 b2_re[lab,index] = re2
 98 |                 b2_f1[lab,index] = f12
 99 |                 cond, vol, cut = tl_cond(H,B2,d,1.0,volA,order)
100 |                 b2_cond[index] = cond
101 | 
102 |                 println("$label ($nT): $f11 \t $f12 \t $f1 \t $nS")
103 |             end
104 | 
105 |             matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time,
106 |             "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond,
107 |             "b1_pr"=>b1_pr, "b1_re"=>b1_re, "b1_f1"=>b1_f1,"b1_cond"=>b1_cond,"b2_cond"=>b2_cond,
108 |             "r_pr"=>r_pr, "r_re"=>r_re, "r_f1"=>r_f1, "r_cond"=>r_cond,
109 |             "b2_pr"=>b2_pr, "b2_re"=>b2_re, "b2_f1"=>b2_f1))
110 | 
111 |         end
112 |     end
113 | 
114 | end
115 | 


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Large_200_10000_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Large_200_10000_1.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Medium_50_2000_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Medium_50_2000_1.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Medium_50_3000_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Medium_50_3000_1.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_200_0.001.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_200_0.001.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_200_0.01.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_200_0.01.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_200_0.1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_200_0.1.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_200_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_200_1.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_200_10.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_200_10.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_300_0.001.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_300_0.001.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_300_0.01.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_300_0.01.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_300_0.1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_300_0.1.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_300_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_300_1.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_300_10.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_300_10.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_500_0.001.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_500_0.001.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_500_0.01.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_500_0.01.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_500_0.1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_500_0.1.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_500_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_500_1.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_10_500_10.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_10_500_10.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_200_0.001.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_200_0.001.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_200_0.01.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_200_0.01.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_200_0.1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_200_0.1.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_200_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_200_1.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_200_10.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_200_10.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_300_0.001.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_300_0.001.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_300_0.01.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_300_0.01.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_300_0.1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_300_0.1.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_300_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_300_1.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_300_10.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_300_10.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_500_0.001.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_500_0.001.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_500_0.01.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_500_0.01.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_500_0.1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_500_0.1.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_500_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_500_1.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output/Smallest_5_500_10.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output/Smallest_5_500_10.0.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryDelta/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/.DS_Store


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryDelta/Label_12_10_200.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_12_10_200.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryDelta/Label_15_200_10000.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_15_200_10000.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryDelta/Label_17_50_2000.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_17_50_2000.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryDelta/Label_18_50_2000.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_18_50_2000.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryDelta/Label_1_10_200.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_1_10_200.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryDelta/Label_24_200_10000.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_24_200_10000.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryDelta/Label_25_200_10000.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_25_200_10000.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryDelta/Label_2_10_200.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_2_10_200.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryDelta/Label_3_10_200.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryDelta/Label_3_10_200.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryEps/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/.DS_Store


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryEps/Label_12_10_300.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/Label_12_10_300.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryEps/Label_18_10_300.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/Label_18_10_300.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryEps/Label_1_10_300.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/Label_1_10_300.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryEps/Label_2_10_300.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/Label_2_10_300.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Output_VaryEps/Label_3_10_300.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Output_VaryEps/Label_3_10_300.mat


--------------------------------------------------------------------------------
/Exp-Amazon/Plots/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Plots/.DS_Store


--------------------------------------------------------------------------------
/Exp-Amazon/Plots/AmazonVaryDelta.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Plots/AmazonVaryDelta.pdf


--------------------------------------------------------------------------------
/Exp-Amazon/Plots/Smallest_VaryEps_seednum_10_grownum_300.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Amazon/Plots/Smallest_VaryEps_seednum_10_grownum_300.pdf


--------------------------------------------------------------------------------
/Exp-Amazon/Plots_Vary_Delta.jl:
--------------------------------------------------------------------------------
 1 | using MAT
 2 | using Random
 3 | using Plots
 4 | labels = [1; 2; 3; 12; 18; 17; 25; 15; 24]
 5 | 
 6 | plot()
 7 | s1 = 300
 8 | s2 = 250
 9 | ms = 4
10 | using LaTeXStrings
11 | x_label = L"\delta"
12 | y_label = "F1 Scores"
13 | deltas = 10 .^LinRange(0,3,10)
14 | for lab = 1:7
15 |     label = labels[lab]
16 | 
17 |     if lab < 5
18 |         grownum = 200
19 |         seednum = 10
20 |         color = :blue
21 |     elseif lab < 7
22 |         grownum = 2000
23 |         seednum = 50
24 |         color = :red
25 |     else
26 |         grownum = 10000
27 |         seednum = 200
28 |         color = :green
29 |     end
30 | 
31 |     outputmat = "Output_VaryDelta/Label_$label"*"_$seednum"*"_$grownum.mat"
32 |     mat = matread(outputmat)
33 |     hl = mat["hl_f1"]
34 |     plot!(deltas, hl, grid = false, #label = LabelNames[label],
35 |      markerstrokewidth = 0, markershape = :circle, linewidth = 2, xaxis = :log10,
36 |      size = (s1,s2), markersize = ms,xguidefont=font(18), legend = false,
37 |      xlabel = x_label, ylabel = y_label, legendfont=font(7))
38 |      #color = color)
39 | 
40 | end
41 | 
42 | savefig("Plots/AmazonVaryDelta.pdf")
43 | 


--------------------------------------------------------------------------------
/Exp-Amazon/Plots_Vary_Epsilon.jl:
--------------------------------------------------------------------------------
 1 | using MAT
 2 | 
 3 | ## Colors
 4 | C = [1 0 0;
 5 | 0 .75 0;
 6 | 0 0 1;
 7 | 0 .5 .5;
 8 | .5 0 .5;
 9 | 1 .5 0;
10 | 0 0 0;
11 | .75 .75 0;
12 | 0.1 0.5 0.6;
13 | 0 .75 .75;
14 | .4 .5 .3]
15 | 
16 | using Plots
17 | # Must be kept in this order, unless you are careful what you do
18 | labels = [1; 2; 3; 12; 18; 17; 25; 15; 24]
19 | names = ["Amazon Fashion", "All Beauty", "Appliances",
20 | "Gift Cards", "Magazine Subsciptions", "Luxury Beauty", "Software",
21 | "Industrial and Scientific","Prime Pantry" ]
22 | sizes = [31; 85; 48; 148; 157; 1581; 802; 5334; 4970]
23 | lnum = length(labels)
24 | 
25 | # See outer parameters
26 | seednum = 10   # 5
27 | grownum = 300  #[200; 300; 500]
28 | epsis = [10.0 1.0 0.1 0.01 0.001]
29 | 
30 | plot()
31 | for lab = [1;3;4;5]                 # For each cluster
32 | label = labels[lab]
33 | outputmat = "Output_VaryEps/Label_$label"*"_$seednum"*"_$grownum.mat"
34 | 
35 | mat = matread(outputmat)
36 | hl_time = mean(mat["hl_time"],dims = 2)
37 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2)
38 | b1 = round.(mat["b1_f1"],digits = 2)
39 | b2 = round.(mat["b2_f1"],digits = 2)
40 | r = round.(mat["r_f1"],digits = 2)
41 | re = round.(mat["r_re"],digits = 2)
42 | pr = round.(mat["r_pr"],digits = 2)
43 | nS = mat["newS"]
44 | 
45 | lw = 3
46 | plot!(epsis',hl,linewidth = lw, grid = false, markershape =:circle,
47 |     legend = :bottomleft, color = RGBA(C[lab,1],C[lab,2],C[lab,3],1),
48 |     xaxis=:log10, label = "label $(label)")
49 | 
50 | end
51 | savefig("Plots/Smallest_VaryEps_seednum_$seednum"*"_grownum_$grownum.pdf")
52 | 


--------------------------------------------------------------------------------
/Exp-Amazon/Print_to_Table_F1_T_run.jl:
--------------------------------------------------------------------------------
 1 | using MAT
 2 | using Statistics
 3 | 
 4 | # Labels for Amazon datasets
 5 | labels = [1; 2; 3; 12; 18; 17; 25; 15; 24]
 6 | names = ["Amazon Fashion", "All Beauty", "Appliances",
 7 | "Gift Cards", "Magazine Subscriptions", "Luxury Beauty", "Software",
 8 | "Industrial and Scientific","Prime Pantry" ]
 9 | sizes = [31; 85; 48; 148; 157; 1581; 802; 5334; 4970]
10 | lnum = length(labels)
11 | 
12 | # See outer parameters
13 | epsilon = 1.0
14 | s1 = 10
15 | s2 = 50
16 | s3 = 200
17 | g1 = 200
18 | g2 = 2000
19 | g3 = 10000
20 | outputmat = "Output/Smallest_$s1"*"_$g1"*"_$epsilon.mat"
21 | 
22 | println("\nSeeds = ($s1, $s2, $s3), Grow by ($g1, $g2, $g3), Epsilon = $epsilon")
23 | mat = matread(outputmat)
24 | hl_time = round.(mean(mat["hl_time"],dims = 2),digits = 1)
25 | hl_size= round.(mean(mat["hl_size"],dims = 2),digits = 1)
26 | r = round.(mean(mat["r_f1"],dims = 2),digits = 2)
27 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2)
28 | b1 = round.(mean(mat["b1_f1"],dims = 2),digits = 2)
29 | b2 = round.(mean(mat["b2_f1"],dims = 2),digits = 2)
30 | nS = round.(mean(mat["newS"],dims = 2),digits = 2)
31 | 
32 | for i = 1:size(hl_time,1)
33 |     println(names[i]*" & $(sizes[i]) & $(hl_time[i])& $(hl[i]) & $(b1[i]) & $(b2[i])  & $(r[i]) \\\\")
34 | end
35 | 
36 | ## Get medium clusters
37 | outputmat = "Output/Medium_$s2"*"_$g2"*"_$epsilon.mat"
38 | mat = matread(outputmat)
39 | hl_time = round.(mean(mat["hl_time"],dims = 2),digits = 1)
40 | hl_size= round.(mean(mat["hl_size"],dims = 2),digits = 1)
41 | nS = round.(mean(mat["newS"],dims = 2),digits = 2)
42 | r = round.(mean(mat["r_f1"],dims = 2),digits = 2)
43 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2)
44 | b1 = round.(mean(mat["b1_f1"],dims = 2),digits = 2)
45 | b2 = round.(mean(mat["b2_f1"],dims = 2),digits = 2)
46 | 
47 | for i = 1:size(hl_time,1)
48 |     println(names[i+5]*" & $(sizes[i+5]) & $(hl_time[i])& $(hl[i]) & $(b1[i]) & $(b2[i])  & $(r[i]) \\\\")
49 | end
50 | 
51 | 
52 | ## Get large clusters
53 | outputmat = "Output/Large_$s3"*"_$g3"*"_$epsilon.mat"
54 | mat = matread(outputmat)
55 | hl_time = round.(mean(mat["hl_time"],dims = 2),digits = 1)
56 | hl_size= round.(Int64,mean(mat["hl_size"],dims = 1))
57 | r = round.(mean(mat["r_f1"],dims = 2),digits = 2)
58 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2)
59 | b1 = round.(mean(mat["b1_f1"],dims = 2),digits = 2)
60 | b2 = round.(mean(mat["b2_f1"],dims = 2),digits = 2)
61 | nS = round.(mean(mat["newS"],dims = 2),digits = 2)
62 | 
63 | for i = 1:size(hl_time,1)
64 |     println(names[i+7]*" & $(sizes[i+7]) & $(hl_time[i]) & $(hl[i]) & $(b1[i]) & $(b2[i])  & $(r[i]) \\\\")
65 | end
66 | 


--------------------------------------------------------------------------------
/Exp-Amazon/Run_Experiments.jl:
--------------------------------------------------------------------------------
 1 | using MAT
 2 | using Random
 3 | include("../src/HyperLocal.jl")
 4 | 
 5 | ## Read in the matrix
 6 | s = time()
 7 | @time M = matread("../data/AmazonReview5core_H.mat")
 8 | NodeLabels = vec(M["NodeLabels"])
 9 | NodeNames = M["NodeNames"]
10 | LabelNames = M["LabelNames"]
11 | H = M["H"]
12 | order = round.(Int64,vec(sum(H,dims=2)))
13 | 
14 | ## Extract hyperedges with only one node
15 | e = findall(x->x>1,order)
16 | H = H[e,:]
17 | d = vec(sum(H,dims=1))
18 | order = round.(Int64,vec(sum(H,dims=2)))
19 | volA = sum(d)
20 | m,n = size(H)
21 | Ht = sparse(H')
22 | toc = time()-s
23 | println("Done loading things into memory in $toc seconds.")
24 | 
25 | ## Run several sets of experiments
26 | include("Smallest_Experiments.jl")
27 | include("Medium_Experiments.jl")
28 | include("Largest_Experiments.jl")
29 | 


--------------------------------------------------------------------------------
/Exp-Amazon/Smallest_Experiments.jl:
--------------------------------------------------------------------------------
  1 | labels = [1; 2; 3; 12; 18]
  2 | lnum = length(labels)
  3 | 
  4 | # See outer parameters
  5 | seednum = 10
  6 | ntimes = 5
  7 | epsis = [1.0]
  8 | delta = 1.0   # stick with the all-or-nothing cut
  9 | 
 10 | for grownum = [200; 300]
 11 | 
 12 |     for e = 1:length(epsis)
 13 | 
 14 |         # Output from HyperLocal
 15 |         hl_pr = zeros(lnum,ntimes)
 16 |         hl_re = zeros(lnum,ntimes)
 17 |         hl_f1 = zeros(lnum,ntimes)
 18 |         hl_time = zeros(lnum,ntimes)
 19 |         hl_size = zeros(lnum,ntimes)
 20 |         newS = zeros(lnum,ntimes)
 21 |         hl_cond = zeros(lnum,ntimes)
 22 | 
 23 |         # Output from first baseline 1
 24 |         b1_pr = zeros(lnum,ntimes)
 25 |         b1_re = zeros(lnum,ntimes)
 26 |         b1_f1 = zeros(lnum,ntimes)
 27 |         b1_cond = zeros(lnum,ntimes)
 28 | 
 29 |         # Output from baseline 2
 30 |         b2_pr = zeros(lnum,ntimes)
 31 |         b2_re = zeros(lnum,ntimes)
 32 |         b2_f1 = zeros(lnum,ntimes)
 33 |         b2_cond = zeros(lnum,ntimes)
 34 | 
 35 |         # Keep track of R
 36 |         r_pr = zeros(ntimes)
 37 |         r_re = zeros(ntimes)
 38 |         r_f1 = zeros(ntimes)
 39 |         r_cond = zeros(ntimes)
 40 | 
 41 |         # For each epsilon we store a different .mat file of outputs
 42 |         epsilon = epsis[e]
 43 |         outputmat = "Output/Smallest_$seednum"*"_$grownum"*"_$epsilon.mat"
 44 |         println(outputmat)
 45 | 
 46 |         # For a fixed epsilon, seednum, and grownum, run experiments
 47 |         # on each cluster multiple times
 48 |         for lab = 1:length(labels)
 49 |             label = labels[lab]
 50 |             T = findall(x->x ==label,NodeLabels)
 51 |             nT = length(T)
 52 | 
 53 |             for index = 1:ntimes
 54 | 
 55 |                 # Generate a new seed set
 56 |                 p = randperm(nT)
 57 |                 Rstart = T[p[1:seednum]]
 58 |                 OneHop = get_immediate_neighbors(H,Ht,Rstart)
 59 |                 Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum)
 60 |                 R = union(Rmore,Rstart)
 61 |                 Rs = findall(x->in(x,Rstart),R)     # Force seed nodes to be in output set
 62 |                 prr, rer, f1r = PRF(T,R)
 63 |                 r_pr[index] = prr
 64 |                 r_re[index] = rer
 65 |                 r_f1[index] = f1r
 66 |                 condR, volR, cutR = tl_cond(H,R,d,1.0,volA,order)
 67 |                 r_cond[index] = condR
 68 | 
 69 |                 # Run HyperLocal
 70 |                 s = time()
 71 |                 S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true)
 72 |                 hl_time[lab,index] = time()-s
 73 |                 condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order)
 74 |                 pr, re, f1 = PRF(T,S)
 75 |                 hl_pr[lab,index] = pr
 76 |                 hl_re[lab,index] = re
 77 |                 hl_f1[lab,index] = f1
 78 |                 hl_size[lab,index] = length(S)
 79 |                 hl_cond[lab,index] = condS
 80 |                 nS =  length(setdiff(S,R))
 81 |                 newS[lab,index] = nS
 82 | 
 83 |                 # First baseline
 84 |                 kS = nT-length(Rstart)
 85 |                 B1 = BestNeighbors(H,d,Rstart,OneHop,kS)
 86 |                 pr1, re1, f11 = PRF(T,B1)
 87 |                 b1_pr[lab,index] = pr1
 88 |                 b1_re[lab,index] = re1
 89 |                 b1_f1[lab,index] = f11
 90 |                 cond, vol, cut = tl_cond(H,B1,d,1.0,volA,order)
 91 |                 b1_cond[index] = cond
 92 | 
 93 | 
 94 |                 # Baseline 2
 95 |                 B2 = TopNeighbors(H,Rstart,OneHop,kS)
 96 |                 pr2, re2, f12 = PRF(T,B2)
 97 |                 b2_pr[lab,index] = pr2
 98 |                 b2_re[lab,index] = re2
 99 |                 b2_f1[lab,index] = f12
100 |                 cond, vol, cut = tl_cond(H,B2,d,1.0,volA,order)
101 |                 b2_cond[index] = cond
102 | 
103 |                 println("$label ($nT): $f11 \t $f12 \t $f1 \t $nS")
104 |             end
105 | 
106 |             matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time,
107 |             "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond,
108 |             "b1_pr"=>b1_pr, "b1_re"=>b1_re, "b1_f1"=>b1_f1,"b1_cond"=>b1_cond,"b2_cond"=>b2_cond,
109 |             "r_pr"=>r_pr, "r_re"=>r_re, "r_f1"=>r_f1, "r_cond"=>r_cond,
110 |             "b2_pr"=>b2_pr, "b2_re"=>b2_re, "b2_f1"=>b2_f1))
111 | 
112 |         end
113 |     end
114 | 
115 | end
116 | 


--------------------------------------------------------------------------------
/Exp-Amazon/VaryDelta_Experimentsl.jl:
--------------------------------------------------------------------------------
  1 | using MAT
  2 | using Random
  3 | using Statistics
  4 | include("../src/HyperLocal.jl")
  5 | 
  6 | Add path to wherever matrix is store. Loading take a long time.
  7 | s = time()
  8 | @time M = matread("../data/AmazonReview5core_H.mat")
  9 | NodeLabels = vec(M["NodeLabels"])
 10 | NodeNames = M["NodeNames"]
 11 | LabelNames = M["LabelNames"]
 12 | H = M["H"]
 13 | order = round.(Int64,vec(sum(H,dims=2)))
 14 | 
 15 | ## Extract hyperedges with only one node
 16 | e = findall(x->x>1,order)
 17 | H = H[e,:]
 18 | d = vec(sum(H,dims=1))
 19 | order = round.(Int64,vec(sum(H,dims=2)))
 20 | volA = sum(d)
 21 | m,n = size(H)
 22 | Ht = sparse(H')
 23 | toc = time()-s
 24 | println("Done loading things into memory in $toc seconds.")
 25 | 
 26 | labels = [1; 2; 3; 12; 18; 17; 25; 15; 24]  # Labels for small nodes
 27 | lnum = length(labels)
 28 | 
 29 | # See outer parameters
 30 | seednum = 10
 31 | ntimes = 1
 32 | epsilon = 1.0
 33 | grownum = 200       # How much to grow seed set by using BestNeighbors
 34 | deltas = 10 .^LinRange(0,3,10)
 35 | enum = length(deltas)
 36 | 
 37 | for lab = 1:9
 38 |     label = labels[lab]
 39 |     T = findall(x->x ==label,NodeLabels)
 40 |     nT = length(T)
 41 | 
 42 |     # Different seed set sizes depending on dataset
 43 |     if lab < 5
 44 |         grownum = 200
 45 |         seednum = 10
 46 |     elseif lab < 7
 47 |         grownum = 2000
 48 |         seednum = 50
 49 |     else
 50 |         grownum = 10000
 51 |         seednum = 200
 52 |     end
 53 | 
 54 |     # For each epsilon we store a different .mat file of outputs
 55 |     outputmat = "Output_VaryDelta/Label_$label"*"_$seednum"*"_$grownum.mat"
 56 |     println(outputmat)
 57 | 
 58 |     # Output from HyperLocal
 59 |     hl_pr = zeros(enum)
 60 |     hl_re = zeros(enum)
 61 |     hl_f1 = zeros(enum)
 62 |     hl_time = zeros(enum)
 63 |     hl_size = zeros(enum)
 64 |     newS = zeros(enum)
 65 |     hl_cond = zeros(enum)
 66 | 
 67 |     # Generate a new seed set
 68 |     p = randperm(nT)
 69 |     Rstart = T[p[1:seednum]]
 70 |     OneHop = get_immediate_neighbors(H,Ht,Rstart)
 71 |     Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum)
 72 |     R = union(Rmore,Rstart)
 73 | 
 74 |     # Force seed nodes to be contained in output set
 75 |     Rs = findall(x->in(x,Rstart),R)
 76 | 
 77 |     for e = 1:length(deltas)             # Try each seed set with each delta
 78 | 
 79 |         delta = deltas[e]
 80 | 
 81 |         # Run HyperLocal
 82 |         s = time()
 83 |         S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true)
 84 |         hl_time[e] = time()-s
 85 |         condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order)
 86 |         pr, re, f1 = PRF(T,S)
 87 |         hl_pr[e] = pr
 88 |         hl_re[e] = re
 89 |         hl_f1[e] = f1
 90 |         hl_size[e] = length(S)
 91 |         hl_cond[e] = condS
 92 |         nS =  length(setdiff(S,R))
 93 |         newS[e] = nS
 94 | 
 95 |         println("$label ($nT): $f1 \t $nS \t $epsilon")
 96 |     end
 97 | 
 98 |     matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time,
 99 |     "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond))
100 | 
101 | end
102 | 


--------------------------------------------------------------------------------
/Exp-Amazon/VaryEpsilon_Experiments.jl:
--------------------------------------------------------------------------------
  1 | labels = [1; 2; 3; 12; 18]
  2 | lnum = length(labels)
  3 | 
  4 | # See outer parameters
  5 | seednum = 10
  6 | ntimes = 5
  7 | epsis = [10.0 1.0 0.1 0.01 0.001]
  8 | delta = 1.0   # stick with the all-or-nothing cut
  9 | grownum = 300
 10 | 
 11 | enum = length(epsis)
 12 | 
 13 | for lab = 2                     # For each cluster
 14 |     label = labels[lab]
 15 |     T = findall(x->x ==label,NodeLabels)
 16 |     nT = length(T)
 17 | 
 18 |     # For each epsilon we store a different .mat file of outputs
 19 | 
 20 |     outputmat = "Output_VaryEps/Label_$label"*"_$seednum"*"_$grownum.mat"
 21 |     println(outputmat)
 22 | 
 23 |     # Output from HyperLocal, multiple values of epsilon
 24 |     hl_pr = zeros(enum,ntimes)
 25 |     hl_re = zeros(enum,ntimes)
 26 |     hl_f1 = zeros(enum,ntimes)
 27 |     hl_time = zeros(enum,ntimes)
 28 |     hl_size = zeros(enum,ntimes)
 29 |     newS = zeros(enum,ntimes)
 30 |     hl_cond = zeros(enum,ntimes)
 31 | 
 32 |     # Output from first baseline 1
 33 |     b1_pr = zeros(ntimes)
 34 |     b1_re = zeros(ntimes)
 35 |     b1_f1 = zeros(ntimes)
 36 |     b1_cond = zeros(ntimes)
 37 | 
 38 |     # Keep track of R
 39 |     r_pr = zeros(ntimes)
 40 |     r_re = zeros(ntimes)
 41 |     r_f1 = zeros(ntimes)
 42 |     r_cond = zeros(ntimes)
 43 | 
 44 |     # Output from baseline 2
 45 |     b2_pr = zeros(ntimes)
 46 |     b2_re = zeros(ntimes)
 47 |     b2_f1 = zeros(ntimes)
 48 |     b2_cond = zeros(ntimes)
 49 | 
 50 |     for index = 1:ntimes                        # Try ntimes different seed sets
 51 | 
 52 |         # Generate a new seed set
 53 |         p = randperm(nT)
 54 |         Rstart = T[p[1:seednum]]
 55 |         OneHop = get_immediate_neighbors(H,Ht,Rstart)
 56 |         Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum)
 57 |         R = union(Rmore,Rstart)
 58 |         # Force seed nodes to be in output set
 59 |         Rs = findall(x->in(x,Rstart),R)
 60 |         prr, rer, f1r = PRF(T,R)
 61 |         r_pr[index] = prr
 62 |         r_re[index] = rer
 63 |         r_f1[index] = f1r
 64 |         condR, volR, cutR = tl_cond(H,R,d,1.0,volA,order)
 65 |         r_cond[index] = condR
 66 | 
 67 |         # First baseline
 68 |         kS = nT-length(Rstart)
 69 |         B1 = BestNeighbors(H,d,Rstart,OneHop,kS)
 70 |         pr1, re1, f11 = PRF(T,B1)
 71 |         b1_pr[index] = pr1
 72 |         b1_re[index] = re1
 73 |         b1_f1[index] = f11
 74 | 
 75 |         # Baseline 2
 76 |         B2 = TopNeighbors(H,Rstart,OneHop,kS)
 77 |         pr2, re2, f12 = PRF(T,B2)
 78 |         b2_pr[index] = pr2
 79 |         b2_re[index] = re2
 80 |         b2_f1[index] = f12
 81 | 
 82 |         for e = 1:length(epsis)             # Try each seed set with each epsilon
 83 | 
 84 |             epsilon = epsis[e]
 85 | 
 86 |             # Run HyperLocal
 87 |             s = time()
 88 |             S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true)
 89 |             hl_time[e,index] = time()-s
 90 |             condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order)
 91 |             pr, re, f1 = PRF(T,S)
 92 |             hl_pr[e,index] = pr
 93 |             hl_re[e,index] = re
 94 |             hl_f1[e,index] = f1
 95 |             hl_size[e,index] = length(S)
 96 |             hl_cond[e,index] = condS
 97 |             nS =  length(setdiff(S,R))
 98 |             newS[e,index] = nS
 99 | 
100 |             println("$label ($nT): $f11 \t $f12 \t $f1 \t $nS \t $epsilon")
101 |         end
102 | 
103 |         matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time,
104 |         "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond,
105 |         "b1_pr"=>b1_pr, "b1_re"=>b1_re, "b1_f1"=>b1_f1,"epsis"=>epsis,
106 |         "r_pr"=>r_pr, "r_re"=>r_re, "r_f1"=>r_f1, "r_cond"=>r_cond,
107 |         "b2_pr"=>b2_pr, "b2_re"=>b2_re, "b2_f1"=>b2_f1))
108 | 
109 |     end
110 | end
111 | 


--------------------------------------------------------------------------------
/Exp-Stackoverflow/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/.DS_Store


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Clique_Expansion_Stack.jl:
--------------------------------------------------------------------------------
 1 | using MAT
 2 | using Random
 3 | using Statistics
 4 | using StatsBase
 5 | include("../src/HyperLocal.jl")
 6 | 
 7 | @time M = matread("../data/stackoverflow_answer_H.mat")
 8 | LabelMatrix = M["LabelMatrix"]
 9 | LabelNames = M["LabelNames"]
10 | H = M["H"]
11 | order = round.(Int64,vec(sum(H,dims=2)))
12 | m,n = size(H)
13 | @show size(H)
14 | 
15 | esmall = findall(x->x<50,order)
16 | @time Az = WeightedCliqueExpansion(H[esmall,:], order)
17 | 
18 | matwrite("ZCE_Stack.mat", Dict("Az"=>Az))
19 | 
20 | @time As = SimpleCliqueExp(H[esmall,:])
21 | 
22 | matwrite("SCE_Stack.mat", Dict("As"=>As))
23 | 


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Output_Stack/Set45_1.0_5000.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_Stack/Set45_1.0_5000.0.mat


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Output_VaryDelta/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/.DS_Store


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Output_VaryDelta/Cluster_22943_10000_100_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_22943_10000_100_1.0.mat


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Output_VaryDelta/Cluster_25849_10000_100_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_25849_10000_100_1.0.mat


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Output_VaryDelta/Cluster_27596_10000_100_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_27596_10000_100_1.0.mat


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Output_VaryDelta/Cluster_28886_10000_100_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_28886_10000_100_1.0.mat


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Output_VaryDelta/Cluster_28918_10000_100_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_28918_10000_100_1.0.mat


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Output_VaryDelta/Cluster_29386_10000_100_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_29386_10000_100_1.0.mat


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Output_VaryDelta/Cluster_43507_10000_100_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_43507_10000_100_1.0.mat


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Output_VaryDelta/Cluster_5713_10000_100_1.0.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Output_VaryDelta/Cluster_5713_10000_100_1.0.mat


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Plots/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Plots/.DS_Store


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Plots/StackDots_Wide.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/Exp-Stackoverflow/Plots/StackDots_Wide.pdf


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Plots_Stack_Experiments.jl:
--------------------------------------------------------------------------------
 1 | using MAT
 2 | using Random
 3 | include("../include/FlowSeed.jl")
 4 | include("../src/HyperLocal.jl")
 5 | 
 6 | ## Read in the matrix
 7 | s = time()
 8 | @time M = matread("../data/stackoverflow_answer_H.mat")
 9 | LabelMatrix = M["LabelMatrix"]
10 | LabelNames = M["LabelNames"]
11 | MainLabels = M["MainLabels"]
12 | H = M["H"]
13 | order = round.(Int64,vec(sum(H,dims=2)))
14 | d = vec(sum(H,dims=1))
15 | volA = sum(d)
16 | m,n = size(H)
17 | toc = time()-s
18 | println("Done loading things into memory in $toc seconds.")
19 | 
20 | # Updated MainLabels be the set of topics between 2000-10000
21 | #   (already stored in current MainLabels), to be only those with conductanc < .2
22 | Tconds = Vector{Float64}()
23 | Tsizes = Vector{Int64}()
24 | labels = Vector{Int64}()
25 | for lab = 1:length(MainLabels)
26 |     label = MainLabels[lab]
27 |     T = findnz(LabelMatrix[:,label])[1]
28 |     nT = length(T)
29 |     condT, volT, cutT = tl_cond(H,T,d,1.0,volA,order)
30 |     if condT < .2
31 |         # println("$nT \t $condT \t"*LabelNames[label])
32 |         push!(Tconds,condT)
33 |         push!(labels,label)
34 |         push!(Tsizes,nT)
35 |     end
36 | end
37 | 
38 | ##
39 | 
40 | using Plots
41 | using Statistics
42 | ntimes = 1
43 | epsilon = 1.0
44 | delta = 5000.0
45 | outputmat = "Output_Stack/Set45_$(epsilon)_$(delta).mat"
46 | 
47 | mat = matread(outputmat)
48 | hl_time = round.(mean(mat["hl_time"],dims = 2),digits = 2)
49 | sl_time = round.(mean(mat["sl_time"],dims = 2),digits = 2)
50 | zl_time = round.(mean(mat["zl_time"],dims = 2),digits = 2)
51 | hl_size= round.(mean(mat["hl_size"],dims = 2),digits = 2)
52 | r = round.(mean(mat["r_f1"],dims = 2),digits = 2)
53 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2)
54 | b1 = round.(mean(mat["b1_f1"],dims = 2),digits = 2)
55 | b2 = round.(mean(mat["b2_f1"],dims = 2),digits = 2)
56 | nS = round.(mean(mat["newS"],dims = 2),digits = 2)
57 | sl = round.(mean(mat["sl_f1"],dims = 2),digits = 2)
58 | zl = round.(mean(mat["zl_f1"],dims = 2),digits = 2)
59 | 
60 | ln = LabelNames[labels]
61 | plot()
62 | p = (sortperm(vec(hl)))
63 | y1 = hl[p]
64 | yb1 = b1[p]
65 | yr = r[p]
66 | yb2 = b2[p]
67 | y2 = max.(yb1,yb2)
68 | y4 = max.(sl[p],zl[p])
69 | x = 1:length(y1)
70 | xlabels = ln[p]
71 | 
72 | ## Wide plot
73 | s1 = 750
74 | s2 = 400
75 | ms = 6
76 | stepy = 1
77 | scatter(x,y1,grid = false, markersize = ms, label = "HyperLocal",
78 | markerstrokewidth = 0, markershape = :circle, linewidth = 0,
79 | legend = :topleft, size = (s1, s2), ymirror = false, ylabel = "F1 Scores",
80 | xticks = (1:stepy:length(ln), "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t".*ln[1:stepy:length(ln)]),xrotation = 40,
81 | xtickfont=font(8),
82 | ytickfont=font(11),
83 | guidefont=font(12),
84 | titlefont=font(10),
85 | legendfont=font(9)
86 | )
87 | scatter!(x,y2, markersize = ms,markerstrokewidth = 0,label = "TN/BN",
88 |  markershape = :circle)
89 |  scatter!(x,y4,markersize = ms,markerstrokewidth = 0, label = "FlowSeed",
90 |   markershape = :circle)
91 | 
92 | savefig("Plots/StackDots_Wide.pdf")
93 | 


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Plots_VaryDeltaStack.jl:
--------------------------------------------------------------------------------
 1 | using MAT
 2 | deltas = 10 .^LinRange(0,3.7,10)
 3 | seednum = 100
 4 | epsilon = 1.0
 5 | grownum = 10000
 6 | 
 7 | # M = matread("../data/processed/stackoverflow_answer_H.mat")
 8 | LabelMatrix = M["LabelMatrix"]
 9 | LabelNames = M["LabelNames"]
10 | H = M["H"]
11 | order = round.(Int64,vec(sum(H,dims=2)))
12 | d = vec(sum(H,dims=1))
13 | volA = sum(d)
14 | m,n = size(H)
15 | 
16 | 
17 | labels = [25849; 27596;28918;29386;43507]
18 | lnum = length(labels)
19 | v = [22943;28886;5713]
20 | 
21 | 
22 | ##
23 | using Plots
24 | plot()
25 | s1 = 300
26 | s2 = 250
27 | ms = 4
28 | lw = 2
29 | using LaTeXStrings
30 | x_label = L"\delta"
31 | y_label = "F1 Scores"
32 | for i = 1:3
33 |     label = labels[i]
34 |     outputmat = "Output_VaryDelta/Cluster_$(label)_$grownum"*"_$seednum"*"_$epsilon.mat"
35 |     mat = matread(outputmat)
36 |     hl_f1 = mat["hl_f1"]
37 |     deltas = mat["deltas"]
38 | 
39 |     plot!(deltas, hl_f1[i,:],grid = false,label = LabelNames[label],
40 |      markerstrokewidth = 0, markershape = :circle, linewidth = 2, xaxis = :log10,
41 |      size = (s1,s2), markersize = ms,
42 |      xlabel = x_label, ylabel = y_label, legend = :bottomright)
43 | end
44 | 
45 | ## These were run and stored and slightly differently
46 | v = [22943;28886;5713]
47 | for i = 1:3
48 |     label = v[i]
49 |     outputmat = "Output_VaryDelta/Cluster_$(label)_$grownum"*"_$seednum"*"_$epsilon.mat"
50 |     mat = matread(outputmat)
51 |     hl_f1 = mat["hl_f1"]
52 | 
53 |     plot!(deltas, vec(hl_f1[:,1]),grid = false,label = LabelNames[label],
54 |      markerstrokewidth = 0, markershape = :circle, linewidth = 2, xaxis = :log10,
55 |      size = (s1,s2), markersize = ms,xguidefont=font(18),legend = false,
56 |      xlabel = x_label, ylabel = y_label, legendfont=font(7))
57 | 
58 | end
59 | 
60 | savefig("Plots/Stack_VaryDelta.pdf")
61 | 


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Stack_LargeDelta_Experiments.jl:
--------------------------------------------------------------------------------
  1 | using MAT
  2 | using Random
  3 | include("../src/HyperLocal.jl")
  4 | include("../include/FlowSeed.jl")
  5 | 
  6 | ## Read in the matrix, if it isn't read in already
  7 | 
  8 | s = time()
  9 | @time M = matread("../data/stackoverflow_answer_H.mat")
 10 | LabelMatrix = M["LabelMatrix"]
 11 | LabelNames = M["LabelNames"]
 12 | MainLabels = M["MainLabels"]
 13 | H = M["H"]
 14 | order = round.(Int64,vec(sum(H,dims=2)))
 15 | d = vec(sum(H,dims=1))
 16 | volA = sum(d)
 17 | m,n = size(H)
 18 | Ht = sparse(H')
 19 | toc = time()-s
 20 | println("Done loading things into memory in $toc seconds.")
 21 | 
 22 | ##
 23 | Tconds = Vector{Float64}()
 24 | Tsizes = Vector{Int64}()
 25 | labels = Vector{Int64}()
 26 | for lab = 1:length(MainLabels)
 27 |     label = MainLabels[lab]
 28 |     T = findnz(LabelMatrix[:,label])[1]
 29 |     nT = length(T)
 30 |     condT, volT, cutT = tl_cond(H,T,d,1.0,volA,order)
 31 |     if condT < .2
 32 |         println("$nT \t $condT \t"*LabelNames[label])
 33 |         push!(Tconds,condT)
 34 |         push!(labels,label)
 35 |         push!(Tsizes,nT)
 36 |     end
 37 | end
 38 | 
 39 | ##
 40 | lnum = length(labels)
 41 | 
 42 | # See outer parameters
 43 | ntimes = 1
 44 | epsilon = 1.0
 45 | delta = 5000.0
 46 | # seednum = 100
 47 | # grownum = 10000
 48 | 
 49 | # Output from HyperLocal, large delta
 50 | hl_pr = zeros(lnum,ntimes)
 51 | hl_re = zeros(lnum,ntimes)
 52 | hl_f1 = zeros(lnum,ntimes)
 53 | hl_time = zeros(lnum,ntimes)
 54 | hl_size = zeros(lnum,ntimes)
 55 | newS = zeros(lnum,ntimes)
 56 | hl_cond = zeros(lnum,ntimes)
 57 | 
 58 | # Output from HyperLocal, delta = 1.0
 59 | hl_pr1 = zeros(lnum,ntimes)
 60 | hl_re1 = zeros(lnum,ntimes)
 61 | hl_f11 = zeros(lnum,ntimes)
 62 | hl_time1 = zeros(lnum,ntimes)
 63 | hl_size1 = zeros(lnum,ntimes)
 64 | newS1 = zeros(lnum,ntimes)
 65 | hl_cond1 = zeros(lnum,ntimes)
 66 | 
 67 | # Output from first baseline 1
 68 | b1_pr = zeros(lnum,ntimes)
 69 | b1_re = zeros(lnum,ntimes)
 70 | b1_f1 = zeros(lnum,ntimes)
 71 | b1_cond = zeros(lnum,ntimes)
 72 | 
 73 | # Output from baseline 2
 74 | b2_pr = zeros(lnum,ntimes)
 75 | b2_re = zeros(lnum,ntimes)
 76 | b2_f1 = zeros(lnum,ntimes)
 77 | b2_cond = zeros(lnum,ntimes)
 78 | 
 79 | # Keep track of R
 80 | r_pr =  zeros(lnum,ntimes)
 81 | r_re =  zeros(lnum,ntimes)
 82 | r_f1 =  zeros(lnum,ntimes)
 83 | r_cond =  zeros(lnum,ntimes)
 84 | 
 85 | sl_pr = zeros(lnum,ntimes)
 86 | sl_re = zeros(lnum,ntimes)
 87 | sl_f1 =zeros(lnum,ntimes)
 88 | sl_cond = zeros(lnum,ntimes)
 89 | sl_size = zeros(lnum,ntimes)
 90 | sl_time = zeros(lnum,ntimes)
 91 | 
 92 | zl_pr = zeros(lnum,ntimes)
 93 | zl_re = zeros(lnum,ntimes)
 94 | zl_f1 = zeros(lnum,ntimes)
 95 | zl_cond = zeros(lnum,ntimes)
 96 | zl_size = zeros(lnum,ntimes)
 97 | zl_time = zeros(lnum,ntimes)
 98 | 
 99 | # For each epsilon we store a different matrix of outputs
100 | outputmat = "Output_Stack/Set45_$(epsilon)_$(delta).mat"
101 | println(outputmat)
102 | 
103 | maz = matread("ZCE_Stack.mat")
104 | Az = maz["Az"]
105 | mas = matread("SCE_Stack.mat")
106 | As = mas["As"]
107 | 
108 | ## For a fixed epsilon, seednum, and grownum, run experiments
109 | # on each cluster multiple times
110 | 
111 | for lab = 1:length(labels)
112 |     label = labels[lab]
113 |     T = findnz(LabelMatrix[:,label])[1]
114 |     nT = length(T)
115 |     @show nT,label
116 | 
117 |     for index = 1:ntimes
118 | 
119 |         # Generate a new seed set
120 |         seednum = round(Int64,nT/20)
121 |         grownum = round(Int64,min(nT*2,n))
122 |         p = randperm(nT)
123 |         Rstart = T[p[1:seednum]]
124 |         OneHop = get_immediate_neighbors(H,Ht,Rstart)
125 |         Rmore = BestNeighbors(H,d,Rstart,OneHop,grownum)
126 |         R = union(Rmore,Rstart)
127 |         Rs = findall(x->in(x,Rstart),R)     # Force seed nodes to be in output set
128 |         prr, rer, f1r = PRF(T,R)
129 |         r_pr[lab,index] = prr
130 |         r_re[lab,index] = rer
131 |         r_f1[lab,index] = f1r
132 |         condR, volR, cutR = tl_cond(H,R,d,1.0,volA,order)
133 |         r_cond[lab,index] = condR
134 | 
135 |         # Run HyperLocal with delta = 1.0
136 |         s = time()
137 |         S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,1.0,Rs,true)
138 |         hl_time1[lab,index] = time()-s
139 |         condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order)
140 |         pr, re, f1_d1 = PRF(T,S)
141 |         hl_pr1[lab,index] = pr
142 |         hl_re1[lab,index] = re
143 |         hl_f11[lab,index] = f1_d1
144 |         hl_size1[lab,index] = length(S)
145 |         hl_cond1[lab,index] = condS
146 |         nS =  length(setdiff(S,R))
147 |         newS1[lab,index] = nS
148 | 
149 |         # Run HyperLocal with delta = 1000
150 |         s = time()
151 |         S, lcond = HyperLocal(H,Ht,order,d,R,epsilon,delta,Rs,true)
152 |         hl_time[lab,index] = time()-s
153 |         condS, volS, cutS = tl_cond(H,S,d,1.0,volA,order)
154 |         pr, re, f1 = PRF(T,S)
155 |         hl_pr[lab,index] = pr
156 |         hl_re[lab,index] = re
157 |         hl_f1[lab,index] = f1
158 |         hl_size[lab,index] = length(S)
159 |         hl_cond[lab,index] = condS
160 |         nS =  length(setdiff(S,R))
161 |         newS[lab,index] = nS
162 | 
163 |         # First baseline
164 |         kS = nT-length(Rstart)
165 |         B1 = BestNeighbors(H,d,Rstart,OneHop,kS)
166 |         pr1, re1, f11 = PRF(T,B1)
167 |         b1_pr[lab,index] = pr1
168 |         b1_re[lab,index] = re1
169 |         b1_f1[lab,index] = f11
170 | 
171 |         # Baseline 2
172 |         B2 = TopNeighbors(H,Rstart,OneHop,kS)
173 |         pr2, re2, f12 = PRF(T,B2)
174 |         b2_pr[lab,index] = pr2
175 |         b2_re[lab,index] = re2
176 |         b2_f1[lab,index] = f12
177 | 
178 |         # Simple Clique Expansion
179 |         nR = length(R)
180 |         Rs_vec = zeros(length(R))
181 |         Rs_vec[Rs] .= 1
182 |         starter = time()
183 |         SL, lcond = FlowSeed(As,R,epsilon,zeros(nR),Rs_vec)
184 |         sl_time[lab,index] = time()-starter
185 |         pr3, re3, f13 = PRF(T,SL)
186 |         condS, volS, cutS = tl_cond(H,SL,d,1.0,volA,order)
187 |         sl_pr[lab,index] = pr3
188 |         sl_re[lab,index] = re3
189 |         sl_f1[lab,index] = f13
190 |         sl_size[lab,index] = length(SL)
191 |         sl_cond[lab,index] = condS
192 | 
193 |         # Zhou Clique Expansion + SimpleLocal
194 |         starter = time()
195 |         ZL, lcond = FlowSeed(Az,R,epsilon,zeros(nR),Rs_vec)
196 |         zl_time[lab,index] = time()-starter
197 |         pr4, re4, f14 = PRF(T,ZL)
198 |         condS, volS, cutS = tl_cond(H,ZL,d,1.0,volA,order)
199 |         zl_pr[lab,index] = pr4
200 |         zl_re[lab,index] = re4
201 |         zl_f1[lab,index] = f14
202 |         zl_size[lab,index] = length(ZL)
203 |         zl_cond[lab,index] = condS
204 | 
205 |         println("$label ($nT):\n $f1r \n $f11 \n $f12 \n $f1 \n $f1_d1\n \t $nS "*LabelNames[label])
206 |     end
207 | 
208 |     matwrite(outputmat, Dict("hl_size"=>hl_size, "newS"=>newS, "hl_time"=>hl_time,
209 |     "hl_pr"=>hl_pr, "hl_re"=>hl_re, "hl_f1"=>hl_f1, "hl_cond"=>hl_cond,
210 |     "hl_pr1"=>hl_pr1, "hl_re1"=>hl_re1, "hl_f11"=>hl_f11, "hl_cond1"=>hl_cond1,
211 |     "sl_pr"=>sl_pr, "sl_re"=>sl_re, "sl_f1"=>sl_f1, "sl_cond"=>sl_cond, "sl_time"=>sl_time,
212 |     "zl_pr"=>zl_pr, "zl_re"=>zl_re, "zl_f1"=>zl_f1, "zl_cond"=>zl_cond, "zl_time"=>zl_time,
213 |     "sl_size"=>sl_size, "zl_size"=>zl_size,
214 |     "b1_pr"=>b1_pr, "b1_re"=>b1_re, "b1_f1"=>b1_f1,
215 |     "r_pr"=>r_pr, "r_re"=>r_re, "r_f1"=>r_f1,
216 |     "b2_pr"=>b2_pr, "b2_re"=>b2_re, "b2_f1"=>b2_f1))
217 | 
218 | end
219 | 


--------------------------------------------------------------------------------
/Exp-Stackoverflow/Stack_LargeDelta_Table.jl:
--------------------------------------------------------------------------------
 1 | using MAT
 2 | using Random
 3 | include("../src/HyperLocal.jl")
 4 | include("../include/FlowSeed.jl")
 5 | #
 6 | ## Read in the matrix, if it isn't read in already
 7 | using SparseArrays
 8 | s = time()
 9 | @time M = matread("../data/processed/stackoverflow_answer_H.mat")
10 | LabelMatrix = M["LabelMatrix"]
11 | LabelNames = M["LabelNames"]
12 | MainLabels = M["MainLabels"]
13 | H = M["H"]
14 | order = round.(Int64,vec(sum(H,dims=2)))
15 | d = vec(sum(H,dims=1))
16 | volA = sum(d)
17 | m,n = size(H)
18 | Ht = sparse(H')
19 | toc = time()-s
20 | println("Done loading things into memory in $toc seconds.")
21 | 
22 | ##
23 | Tconds = Vector{Float64}()
24 | Tsizes = Vector{Int64}()
25 | labels = Vector{Int64}()
26 | for lab = 1:length(MainLabels)
27 |     label = MainLabels[lab]
28 |     T = findnz(LabelMatrix[:,label])[1]
29 |     nT = length(T)
30 |     condT, volT, cutT = tl_cond(H,T,d,1.0,volA,order)
31 |     if condT < .2
32 |         # println("$nT \t $condT \t"*LabelNames[label])
33 |         push!(Tconds,condT)
34 |         push!(labels,label)
35 |         push!(Tsizes,nT)
36 |     end
37 | end
38 | 
39 | using Statistics
40 | ntimes = 1
41 | epsilon = 1.0
42 | delta = 5000.0
43 | outputmat = "Output_Stack/Set45_$(epsilon)_$(delta).mat"
44 | 
45 | mat = matread(outputmat)
46 | hl_time = round.(mean(mat["hl_time"],dims = 2),digits = 2)
47 | sl_time = round.(mean(mat["sl_time"],dims = 2),digits = 2)
48 | zl_time = round.(mean(mat["zl_time"],dims = 2),digits = 2)
49 | hl_size= round.(mean(mat["hl_size"],dims = 2),digits = 2)
50 | r = round.(mean(mat["r_f1"],dims = 2),digits = 2)
51 | hl = round.(mean(mat["hl_f1"],dims = 2),digits = 2)
52 | b1 = round.(mean(mat["b1_f1"],dims = 2),digits = 2)
53 | b2 = round.(mean(mat["b2_f1"],dims = 2),digits = 2)
54 | nS = round.(mean(mat["newS"],dims = 2),digits = 2)
55 | sl = round.(mean(mat["sl_f1"],dims = 2),digits = 2)
56 | zl = round.(mean(mat["zl_f1"],dims = 2),digits = 2)
57 | 
58 | for i = 1:length(labels)
59 |     lab = labels[i]
60 |     println(" $(hl[i]) \t $(sl[i]) \t $(zl[i]) \t $(b1[i]) \t $(b2[i])\t $(r[i]) \t $(hl_time[i]) \t $(sl_time[i]) \t $(zl_time[i])\t $(Tconds[i])"*"\t"*LabelNames[lab])
61 | end
62 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Nate Veldt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | README
 2 | 
 3 | Code for 
 4 | 
 5 | Minimizing Localized Ratio Cut Objectives in Hypergraphs
 6 | Nate Veldt, Austin Benson, Jon Kleinberg
 7 | KDD '20: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining
 8 | 
 9 | Datasets are too large to include in a GitHub repository. Text versions of the datasets can be found at:
10 | 
11 | https://www.cs.cornell.edu/~arb/data/amazon-reviews/
12 | 
13 | and
14 | 
15 | https://www.cs.cornell.edu/~arb/data/stackoverflow-answers/


--------------------------------------------------------------------------------
/data/Amazon-Clusters.txt:
--------------------------------------------------------------------------------
 1 | Cluster information for Amazon reviews hypergraph
 2 | 		  Conductance		|T| 	Category
 3 | Label 1 	 0.05526315789473684 	 31 	Amazon_Fashion
 4 | Label 2 	 0.11689899730985572 	 85 	All_Beauty
 5 | Label 3 	 0.18333333333333332 	 48 	Appliances
 6 | Label 4 	 0.09941355767439959 	 22931 	Arts_Crafts_and_Sewing
 7 | Label 5 	 0.07920483552884328 	 79437 	Automotive
 8 | Label 6 	 0.023304263291091188 	 704093 	Books
 9 | Label 7 	 0.04535683668942734 	 73713 	CDs_and_Vinyl
10 | Label 8 	 0.10469066052559024 	 48186 	Cell_Phones_and_Accessories
11 | Label 9 	 0.0569535873958006 	 376858 	Clothing_Shoes_and_Jewelry
12 | Label 10 	 0.08635143506091265 	 11797 	Digital_Music
13 | Label 11 	 0.06919062211927213 	 160052 	Electronics
14 | Label 12 	 0.1315345699831366 	 148 	Gift_Cards
15 | Label 13 	 0.0987035757730488 	 41320 	Grocery_and_Gourmet_Food
16 | Label 14 	 0.0843442282774712 	 189172 	Home_and_Kitchen
17 | Label 15 	 0.14216482677220024 	 5334 	Industrial_and_Scientific
18 | Label 16 	 0.05826591377991949 	 98824 	Kindle_Store
19 | Label 17 	 0.10889854659447136 	 1581 	Luxury_Beauty
20 | Label 18 	 0.13249348392701998 	 157 	Magazine_Subscriptions
21 | Label 19 	 0.05845032232905592 	 60175 	Movies_and_TV
22 | Label 20 	 0.08932531461894376 	 10620 	Musical_Instruments
23 | Label 21 	 0.12032337895363579 	 27965 	Office_Products
24 | Label 22 	 0.11865049228984761 	 32918 	Patio_Lawn_and_Garden
25 | Label 23 	 0.08520030256037095 	 42531 	Pet_Supplies
26 | Label 24 	 0.09661968393434621 	 4970 	Prime_Pantry
27 | Label 25 	 0.13708659429437012 	 802 	Software
28 | Label 26 	 0.0881832010246336 	 104687 	Sports_and_Outdoors
29 | Label 27 	 0.10269628837546725 	 73649 	Tools_and_Home_Improvement
30 | Label 28 	 0.08729156649814647 	 78772 	Toys_and_Games
31 | Label 29 	 0.08020151239913818 	 17408 	Video_Games
32 | 


--------------------------------------------------------------------------------
/include/FlowSeed.jl:
--------------------------------------------------------------------------------
  1 | # This is self-contained Julia code for FlowSeed, the flow-based
  2 | # method for local clustering introduced in the paper:
  3 | #
  4 | # Flow-Based Local Graph Clustering with Better Seed Set Inclusion
  5 | # Nate Veldt, Christine Klymko, and David Gleich
  6 | # Proceedings of the 2019 SIAM International Conference on Data Mining
  7 | #
  8 | # ArXiv preprint: https://arxiv.org/abs/1811.12280
  9 | #
 10 | # The main subroutine is LocalPushRelabel.
 11 | # Unlike previous local flow methods, this repeatedly updates Phase 1 of the
 12 | # push-relabel maximum s-t flow algorithm. This phase returns a minimum s-t cut,
 13 | # which is all we need for the algorithm. The push-relabel algorithm
 14 | # is made efficient by a global relabeling heuristic.
 15 | #
 16 | # Previous flow-based methods repeatedly called a black-box min-cut
 17 | # solver and didn't use warm starts. Here we use warm starts and call a
 18 | # white-box subroutine that makes the code much faster in practice.
 19 | 
 20 | using SparseArrays
 21 | 
 22 | # This computes the precision, recall, and F1 score for a set Returned
 23 | # compared against a Target set
 24 | function PRF(Target,Returned)
 25 | 
 26 |     TruePos = intersect(Returned,Target)
 27 |     pr = length(TruePos)/length(Returned)
 28 |     re = length(TruePos)/length(Target)
 29 |     F1 = 2*(pr*re)/(pr+re)
 30 | 
 31 |     return pr, re, F1
 32 | 
 33 | end
 34 | 
 35 | # Starting from a set of seed nodes R, do a breadth first search to get
 36 | # a k-hop neighborhood of R
 37 | function neighborhood(A::SparseMatrixCSC,R::Array{Int64},k::Int64)
 38 | 
 39 |     rp = A.rowval
 40 |     ci = A.colptr
 41 |     n = size(A,1)
 42 | 
 43 |     eS = zeros(n)
 44 |     eS[R] .= 1
 45 | 
 46 |     # For node i, the neighbors of i are rp[ci[i]:ci[i+1]-1]
 47 |     for i = R
 48 |         neighbs = rp[ci[i]:ci[i+1]-1]
 49 |         eS[neighbs] .= 1
 50 |     end
 51 | 
 52 |     # This could be more efficient, but recursively calling won't take too long
 53 |     # as long as k isn't too large
 54 |     if k == 1
 55 |         return findall(x->x!=0,eS)
 56 |     else
 57 |         return neighborhood(A,findall(x->x!=0,eS),k-1)
 58 |     end
 59 | 
 60 | end
 61 | 
 62 | # For a set S in a graph with adjacency matrix A, return some information about
 63 | # S including its conductance, number of interior edges, volume, and cut.
 64 | function set_stats(A::SparseMatrixCSC{Float64,Int64},
 65 |     S::Vector{Int64},volA::Float64)
 66 | 
 67 |     if volA == 0.0
 68 |         volA = sum(A.nzval)
 69 |     end
 70 | 
 71 |     if length(S) == size(A,1)
 72 |         # then we have an indicator vector
 73 |         S = findall(x->x!=0,eS)
 74 |         AS = A[:,S]
 75 |     else
 76 |         # then we have a subset
 77 |         @assert(minimum(S) >= 1)
 78 |         @assert(maximum(S) <= size(A,1))
 79 |         AS = A[:,S]
 80 |     end
 81 | 
 82 |     vol = sum(AS.nzval);
 83 |     SAS = AS[S,:]
 84 |     edges = sum(SAS.nzval);
 85 |     cut = vol-edges
 86 | 
 87 |     cond = cut/minimum([vol,volA-vol]);
 88 | 
 89 |     return cut, vol, edges, cond
 90 | 
 91 | end
 92 | 
 93 | # Compute the s-t cut score corresponding to a set S, in an augmented graph
 94 | # with source and sink node
 95 | function cutval(A::SparseMatrixCSC{Float64,Int64},S::Vector{Int64},
 96 |     R::Vector{Int64},d::Array{Float64,2},alpha::Float64,epsilon::Float64,
 97 |     volA::Float64,pR::Array{Float64},RinS::Array{Float64})
 98 | 
 99 |     n = size(A,1)
100 |     if volA == 0.0
101 |         volA = sum(A.nzval)
102 |     end
103 | 
104 |     strongR = R[findall(x->x!=0,RinS)]
105 |     @assert(length(setdiff(strongR,S)) == 0)    # S should contain strongR
106 | 
107 |     @assert(minimum(S) >= 1)
108 |     @assert(maximum(S) <= size(A,1))
109 |     AS = A[:,S];
110 | 
111 | 
112 |     volS = sum(AS.nzval);
113 |     SAS = AS[S,:]
114 |     edges = sum(SAS.nzval);
115 |     cutS = volS-edges
116 | 
117 |     volR = sum(d[R])
118 | 
119 |     # penalty vector, should only be nonzero for R nodes
120 |     penalty = zeros(n)
121 |     penalty[R] = pR.*d[R]
122 | 
123 |     RS = intersect(R,S)
124 |     volRS = sum(d[RS])
125 |     RnotinS = setdiff(R,RS)   # the set of nodes in R that aren't in S
126 |     pRnotinS = sum(penalty[RnotinS])    # the penalty for excluding R nodes from A
127 | 
128 |     cutScore = cutS - alpha*volRS + alpha*volR + alpha*epsilon*(volS-volRS) + alpha*pRnotinS
129 | 
130 |     @assert(cutScore >= 0)
131 | 
132 |     relcond = cutS/(volRS - epsilon*(volS-volRS) - pRnotinS)
133 | 
134 |     return relcond
135 | end
136 | 
137 | # The main function, which minimizes a localized variant of conductance which
138 | # penalizes the exclusion of seed nodes from the output set.
139 | #
140 | # Parameters:
141 | #
142 | #   A = adjacency matrix for a graph
143 | #
144 | #   R = node indices for a seed set,
145 | #   Rn = immediate neighbors of R
146 | #   Rc = complement set of R
147 | #
148 | #   epsilon = locality parameter
149 | #
150 | #   pR = a length(R) vector with penalties on exluding seed nodes in R from
151 | #       the output set. pR[i] is the penalty or excluding R[i] from the output
152 | #
153 | #   RinS = a length(R) zero-one vector indicating which nodes in R are stricly
154 | #           required to be in the output set
155 | #
156 | #  relcondFlag = a boolean flag indicating whether to compute the relative
157 | #                conductance score or the exact conductance score for each
158 | #                intermediate improved set. Choosing false (i.e. updating with
159 | #                exact conductance) will sometimes lead to fewer iterations and
160 | #                lower conductance output, but will not actually minimize the
161 | #                relative conductance or seed penalized conductance.
162 | #
163 | #  localFlag = a boolean flag indicating whether or not to use the local
164 | #               computations. If volR is large and epsilon is small, in some
165 | #               cases it may be better for the subroutine to perform one
166 | #               global caluculations that multiple "local" computations.
167 | #
168 | # d = weighted degree vector of the graph
169 | #
170 | #
171 | # volA, volR = volumes of the entire graph and seed set respectively
172 | 
173 | # FlowSeed with simplified parameters
174 | function FlowSeed(A::SparseMatrixCSC{Float64,Int64},R::Vector{Int64},
175 |     epsilon::Float64,pR::Array{Float64},RinS::Array{Float64},
176 |     relcondFlag::Bool= true,localFlag::Bool=true)
177 | 
178 |     d = sum(A,dims = 2)
179 |     volA = sum(A.nzval)
180 |     volR = sum(d[R])
181 |     n = size(A,1)
182 | 
183 |     # Find one-hop neighbors of R, and get the complement set
184 |     Rn = neighborhood(A,R,1)    # get the immediate neighbors of R...
185 |     Rn = setdiff(Rn,R)          # ...but we exclude R itself
186 |     inRc = ones(n)
187 |     inRc[R] .= 0
188 |     Rc = findall(x->x!=0,inRc)             # complement of R
189 | 
190 |     if volA*epsilon/volR < 10
191 |         localFlag = false
192 |     end
193 |     FlowSeed(A,R,Rn,Rc,epsilon,pR,RinS,d,volA,volR,relcondFlag,localFlag)
194 | 
195 | end
196 | 
197 | # More in depth parameters, in case one wants to run the method multiple times
198 | # and now always recompute Rn, Rc, volA, volR, and d each time
199 | function FlowSeed(A::SparseMatrixCSC{Float64,Int64},R::Vector{Int64},
200 |     Rn::Vector{Int64},Rc::Vector{Int64},epsilon::Float64,pR::Array{Float64},
201 |     RinS::Array{Float64},d::Array{Float64},volA::Float64=0.0,volR::Float64=0.0,
202 |     relcondFlag::Bool= true,localFlag::Bool=true)
203 | 
204 |     fR = volR/(volA - volR)
205 |     if epsilon < fR
206 |         println("Locality parameter epsilon was set to small. Setting it to lower bound of $fR. Computations will not be local.")
207 |         epsilon = fR
208 |     end
209 | 
210 |     n = size(A,1)
211 | 
212 |     if localFlag
213 |         if volA*epsilon/volR < 10
214 |             println("Note that vol(R)/epsilon = O(vol(G)).
215 |             For these parameters \nit may be faster to run the algorithm
216 |             without the locality setting.")
217 |         end
218 |     end
219 | 
220 |     # Call nodes that must be S the "strong seed nodes"
221 |     localStrong = findall(x->x!=0,RinS)
222 | 
223 |     StrongSeeds = R[localStrong]
224 |     numstrong = length(StrongSeeds)
225 | 
226 |     # If something is marked as a strong seed, put an infinite penalty
227 |     # on excluding it from the output set
228 |     pR[localStrong] .= Inf
229 | 
230 |     # Conductance of R
231 |     Stats = set_stats(A,R,volA)
232 |     alphaCurrent = Stats[4]
233 |     # Conductance of R is same as localized seed penalized conductance of R
234 |     # alpha2 = cutval(A,R,R,d,1.0,epsilon,volA,pR,RinS)
235 |     # println("$alpha2, $alphaCurrent")
236 | 
237 | 
238 |     println("\nEpsilon = $epsilon");
239 |     println("There are $numstrong strong seed nodes.")
240 |     println("The full seed set has conductance $alphaCurrent ");
241 |     println("-------------------------------------------------------")
242 |     BestS = R
243 |     alph0 = 2
244 |     alphaBest = alphaCurrent
245 | 
246 |     source = zeros(n)
247 |     sink = zeros(n)
248 |     dr = d[R]
249 |     drc = d[Rc]
250 | 
251 |     while alphaCurrent < alph0
252 | 
253 |         # Prepare source-side and sink-side edge weights for the augmented
254 |         # local flow graph
255 |         # Seed nodes have an edge to the source of the following weight
256 |         source[R] = alphaCurrent*(pR .+ 1).*dr
257 | 
258 |         # Non-seed nodes have an edge to the sink
259 |         sink[Rc] = alphaCurrent*epsilon*drc
260 | 
261 |         # Compute the new min s-t cut
262 |         if localFlag
263 |             # Do it by repeatedly solving smaller problems, starting
264 |             # by looking at the immediate neighbors Rn
265 |             S = LocalPushRelabel(A,R,source,sink,Rn)
266 |         else
267 |             # Run a single min-cut computation on the whole graph
268 |             S = NonLocalPushRelabel(A,R,source,sink)
269 |         end
270 | 
271 |         if length(S) > 0 && length(S) < n
272 | 
273 |             # Check stats for new set
274 |             if relcondFlag
275 |                 alphaS = cutval(A,S,R,d,1.0,epsilon,volA,pR,RinS)
276 |             else
277 |                 Stats = set_stats(A,S,volA)
278 |                 alphaS = Stats[4]
279 |             end
280 | 
281 |             if alphaS < alphaCurrent
282 |                 numS = size(S,1)
283 |                 ra = round(alphaS,digits =4)
284 |                 println("Improvement found: R-Conductance = $ra, Size = $numS")
285 |                 BestS = S
286 |                 alphaBest = alphaS
287 |             end
288 | 
289 |         else
290 |             alphaS = alphaCurrent
291 |         end
292 | 
293 |         alph0 = alphaCurrent
294 |         alphaCurrent = alphaS
295 | 
296 |     end
297 | 
298 |     SL = BestS
299 |     sizeSL = length(SL)
300 |     cond = alphaBest
301 |     println("------------------------------------------------------")
302 |     println("Final Answer: Conductance = $cond, Size = $sizeSL ")
303 | 
304 |     return SL, cond
305 | end
306 | 
307 | # LocalPushRelabel: computes the minimumn s-t cut for a flow graph in strongly-local
308 | #               time. It repeatedly solves localized min-cut problems.
309 | #
310 | # Input Parameters:
311 | #
312 | # A = a symmetric matrix representing an undirected graph. It can be weighted.
313 | #
314 | # R = a list of nodes that share an edge with the source node
315 | #
316 | # sWeights and tWeight store the nonnegative weight of each node to the source
317 | # and sink. For node i, exactly one of sWeights[i] and tWeights[i] is nonzero
318 | #
319 | # Rn = a list of nodes not in R that neighbor a node in R
320 | function LocalPushRelabel(A::SparseMatrixCSC{Float64,Int64},R::Vector{Int64},
321 |     sWeights::Array{Float64},tWeights::Array{Float64},Rn::Array{Int64})
322 | 
323 |     timer = 0.0
324 | 
325 |     n = size(A,1)
326 |     rp = A.rowval
327 |     ci = A.colptr
328 | 
329 |     # Now we want to locally compute maximum flows
330 |     # C = indices of "complete" nodes in the local graph L, which are nodes
331 |     #   whose degree in the local graph equals the degree in the global graph.
332 |     # I = local indices of nodes that are in L, but not complete. These do
333 |     #      share edges with one another, but only with complete nodes.
334 | 
335 |     # Initialize the complete set to be the set of nodes adjacent to the source
336 |     C_global = R
337 |     I_global = Rn           # everything else is incomplete
338 |     Ac = A[C_global,:]      # set of edges from the complete set to the rest of the graph
339 | 
340 |     # We will maintain a map from indices in a local subgraph, to global indices in A.
341 |     # These don't include the sink node in the flow graph, we are considering
342 |     # just a growing local subgraph of A
343 |     Local2Global = [C_global; I_global]
344 |     # Node i in the local graph corresponds to the node with index
345 |     # Local2Glocal[i] in the global graph A
346 | 
347 |     # Number of nodes in the local graph
348 |     Lsize = length(Local2Global)
349 | 
350 |     # Indices, in the local graph, of complete and incomplete nodes
351 |     C_local = collect(1:length(R))
352 |     I_local = collect(length(R)+1:Lsize)
353 |     numI = length(I_global)     # number of incomplete nodes
354 | 
355 |     # Build the initial local graph
356 | 
357 |     AcToI = Ac[:,I_global]     # edges between complete and incomplete nodes
358 |     AcToc = Ac[:,C_global]     # edges between complete nodes
359 |     L = [AcToc AcToI;
360 |         AcToI' spzeros(numI,numI)]   # adjacency matrix for local graph
361 | 
362 |     # We distinguish between L the "local graph", and Lf, the "local flow graph"
363 |     # which additionally contains the sink node t (as node 1).
364 | 
365 |     # In the local flow graph, each non-terminal node has either a source-side
366 |     # or sink-side edge.
367 |     tToL = reshape(tWeights[Local2Global],Lsize)
368 |     sToL = reshape(sWeights[Local2Global],Lsize)
369 | 
370 |     # By adding the edges to the sink,
371 |     # we transform the local graph L into the local flow graph Lf
372 | 
373 |     Lf = [spzeros(1,1) sparse(tToL');
374 |          sparse(tToL) L]
375 | 
376 |     # Initialize the flow matrix; allocate space for non-zero flow values
377 |     nLf = size(Lf,1)
378 |     F = SparseMatrixCSC(nLf,nLf,Lf.colptr,Lf.rowval,zeros(length(Lf.rowval)))
379 |     # Find the minimum cut for Lf.
380 |     #
381 |     # The first node in Lf is the sink, so offset indices of R by 1.
382 |     start = time()
383 |     S_local,F,excess = Main_Push_Relabel_fs(Lf,F,collect(2:length(R)+1),[0; sToL])
384 |     timer += time()-start
385 | 
386 |     # F is a preflow that is returned. It is NOT the maximum flow for Lf.
387 |     # S is the set of nodes in the min s-t cut of Lf. S_local are the local
388 |     # indices in L, (not the indices in A or Lf)
389 | 
390 |     # We "expand" L around nodes in S that were previously "incomplete"
391 |     E_local = setdiff(S_local,C_local)         # Nodes to expand around
392 |     E_global = Local2Global[E_local]           # their global indices
393 | 
394 |     # Keep track of which nodes are in the local graph L
395 |     inL = zeros(Bool,n)
396 |     inL[Local2Global] .= true
397 | 
398 |     # As long as we have new nodes to expand around, we haven't yet found
399 |     # the global minimum s-t cut, so we continue.
400 |     while length(E_local) > 0
401 | 
402 |         # Update which nodes are complete and which are incomplete
403 |         C_local = [C_local; E_local]
404 |         C_global = Local2Global[C_local]
405 | 
406 |         # Take these away from I_local
407 |         I_local = setdiff(I_local,E_local)
408 |         I_global = Local2Global[I_local]
409 | 
410 |         # To complete nodes in E, first add all the possible edges in the
411 |         # current local graph, so that they match the global graph edges
412 |         # (This is one of the most expensive parts of the expansion)
413 |         L[E_local,E_local] = A[E_global,E_global]
414 |         L[E_local,I_local] = A[E_global,I_global]
415 |         L[I_local,E_local] = L[E_local,I_local]'
416 | 
417 |         # Now we must expand the local graph so that NEW neighbors of E
418 |         # are added to L
419 |         Lnew = Vector{Int64}()
420 |         for v = E_global
421 |             # This extracts the neighbor list of node v from the
422 |             # rowval and colptr vectors of the adjacency matrix
423 |             Neighbs_of_v = rp[ci[v]:ci[v+1]-1]
424 |             for nv = Neighbs_of_v
425 |                 if ~inL[nv]
426 |                     inL[nv] = true
427 |                     push!(Lnew,nv)
428 |                 end
429 |             end
430 |         end
431 |         numNew = length(Lnew)
432 | 
433 |         # Store local indices for new nodes added to L
434 |         Lnew_local = collect((Lsize+1):(Lsize+numNew))
435 | 
436 |         # These are going to be "incomplete" nodes
437 |         I_local = [I_local; Lnew_local]
438 | 
439 |         # Expand L by adding edges from the old local graph to Lnew.
440 |         # Note that we don't include any edges between nodes in Lnew.
441 |         P = A[Local2Global,Lnew]
442 |         L = [L P;
443 |             P' spzeros(numNew,numNew)]
444 | 
445 |         # Update the set of indices in L
446 |         Local2Global = [Local2Global; Lnew]
447 | 
448 |         # excess stores the amount of "excess" flow after a flow computation.
449 |         #
450 |         # Extend the excess vector to accomodate the new size of L.
451 |         # Since Lnew were not present in the last flow computation, they
452 |         # have zero excess.
453 |         excess = [excess; zeros(numNew)]
454 | 
455 |         # For the next local min-cut computation, we need to know which
456 |         # nodes come with nonzero excess. These are "active" nodes.
457 |         ExcessNodes = findall(x->x!=0,excess)
458 | 
459 |         # Update the capacity to the sink.
460 |         tToL = [tToL; tWeights[Lnew]]
461 |         # Now we construct a new local flow graph, and repeat
462 | 
463 |         Lf = [spzeros(1,1) sparse(tToL');
464 |              sparse(tToL) L]
465 | 
466 |         Fold = F    # Old flow, saved as a warm start
467 | 
468 |         # Construct an initial flow F that includes the previous flow Fold
469 |         # as a warm start. First, we allocate space for future
470 |         # flow.
471 |         # (This is one of the most expensive parts of the expansion)
472 |         nLf = size(Lf,1)
473 | 
474 |         F = SparseMatrixCSC(nLf,nLf,Lf.colptr,Lf.rowval,zeros(length(Lf.rowval)))
475 |         F[1:Lsize+1,1:Lsize+1] = Fold
476 | 
477 |         Lsize = size(L,1)
478 | 
479 |         # Compute min s-t cut for local flow graph and see if we need to expand
480 |         S_local,F,excess = Main_Push_Relabel_fs(Lf,F,ExcessNodes,excess)
481 | 
482 |         E_local = setdiff(S_local,C_local)     # the nodes that need completing
483 |         E_global = Local2Global[E_local]       # their global indices
484 | 
485 |     end
486 | 
487 |     # return the global indices of the minimum cut set
488 |     return Local2Global[S_local]
489 | 
490 | end
491 | 
492 | # A non-local version of the min-cut code that works by calling the same
493 | # subroutine, but on the entire graph all at once
494 | function NonLocalPushRelabel(A::SparseMatrixCSC{Float64,Int64},R::Vector{Int64},
495 |     sWeights::Array{Float64},tWeights::Array{Float64})
496 | 
497 |         n = size(A,1)
498 |         # Directly set up the flow matrix
499 |         C = [spzeros(1,1) sparse(tWeights');
500 |             sparse(tWeights) A]
501 | 
502 |         # Allocate space for the flow we will calculate
503 |         F = SparseMatrixCSC(n+1,n+1,C.colptr,C.rowval,zeros(length(C.rowval)))
504 | 
505 |         # R is the set of nodes with excess, and the excess
506 |         # will come from source-side edges that are immediately saturated
507 |         S, F, excess = Main_Push_Relabel_fs(C,F,R.+1,[0;sWeights])
508 | 
509 |         # The returned F is a preflow, not the maximum flow.
510 |         # We are only interested in the cut.
511 | 
512 |         return S
513 | end
514 | 
515 | # Main_Push_Relabel_fs returns a preflow F and the min s-t cut set S for the
516 | # flow graph C. It does not solve the maximum s-t flow problem.
517 | #
518 | # C = the capacity matrix for the flow problem.
519 | #   Node 1 is the sink, and there is no explicit representation of a source,
520 | #   the preflow immediately pushes all flow from the source to create an
521 | #   excess on nodes in the graph.
522 | #
523 | # F = an initial flow. It can be initialize to zero.
524 | #
525 | # ExcessNodes = the set of nodes which at the start begin with some positive excess
526 | #     These can be thought of as nodes that are adjacenct to the implicit source node
527 | #     and the edges from the source are flooded. Or they may represent nodes that
528 | #     have a nonzero excess from the initial flow F. The indices given here
529 | #     should account for the fact that node 1 is already reserved for the sink.
530 | #
531 | # excess = the vector of excess values at the start of the algorithm. If F = 0,
532 | #   this is the vector of edge capacities from the implicit source to the graph.
533 | #   If F != 0, then it's the excess from a previous run of the algorithm
534 | function Main_Push_Relabel_fs(C::SparseMatrixCSC{Float64,Int64},
535 |     F::SparseMatrixCSC{Float64,Int64},ExcessNodes::Array{Int64},excess::Array{Float64})
536 | 
537 |     # check excess node list
538 |     # assert(countnz(excess) == length(ExcessNodes))
539 | 
540 |     # here, n includes only one terminal node, the sink
541 |     n = size(C,1)
542 | 
543 |     height = zeros(Int64,n)      # label/height of each node
544 |     inQ = zeros(Bool,n)          # list whether or not nodes are in the queue
545 | 
546 |     # Store adjacency list. There are ways to update this if calling
547 |     # this function multiple times for growing local graphs, but it
548 |     # does not appear to be a bottleneck to simply recompute frequently
549 |     Neighbs,d = ConstructAdj(C,n)
550 | 
551 |     # We will maintain a queue of active nodes.
552 |     Queue = Vector{Int64}()
553 |     # An actual queue implementation is available in the DataStructures.jl
554 |     # Julia package. The performane is nearly identical (and in some cases
555 |     # slightly slower), thus to minimize dependency on outside packages, we
556 |     # just use a Vector.
557 | 
558 |     # All nodes with nonzero excess are the first to be processed
559 |     for v = ExcessNodes
560 |         push!(Queue,v)
561 |     end
562 |     inQ[ExcessNodes] .= true
563 | 
564 |     # count the number of nodes that have been relabeled
565 |     relabelings::Int64 = 0
566 | 
567 |     height = relabeling_bfs_fs(C,F)     # compute initial distance from sink
568 | 
569 |     # In the code and comments, height = distance from sink = label of node
570 | 
571 |     # Continue until the queue no longer contains any active nodes.
572 |     while length(Queue) > 0
573 | 
574 |         u = pop!(Queue)     # Select a new active node
575 |         inQ[u] = false      # It's no longer in the queue
576 | 
577 |         if height[u] < n    # Check that the node is still active
578 | 
579 |             # discharge flow through node u
580 |             relabelings += discharge_fs!(C,F,Queue,u,Neighbs[u],height,excess,n,d[u],inQ)
581 | 
582 |             # if u is still active, re-place it into the queue
583 |             if excess[u] > 0 && height[u] < n
584 |                 prepend!(Queue,u)
585 |                 inQ[u] = true
586 |             end
587 | 
588 |         end
589 | 
590 |         # Global relabeling heuristic for push-relabel algorithm.
591 |         # This recomputes distances between nodes and the sink
592 |         if relabelings == n
593 |             relabelings = 0
594 |             dist = relabeling_bfs_fs(C,F)
595 |             height = dist
596 |         end
597 | 
598 |     end
599 | 
600 |     # Compute final distances from sink using BFS. Anything with distance
601 |     # n will be the cut set.
602 |     finalHeight = relabeling_bfs_fs(C,F)
603 |     S = Vector{Int64}()
604 |     for i = 2:n
605 |         if finalHeight[i] == n
606 |             push!(S,i-1)
607 |         end
608 |     end
609 | 
610 |     excess[1] = 0.0     # ignore whatever excess there was at the sink.
611 |     return S, F, excess
612 | 
613 | end
614 | 
615 | # Discharege operation: pushes flow away from node u across admissible edges.
616 | # If excess[u] > 0 but no admissible edges exist, we relabel u.
617 | function discharge_fs!(C::SparseMatrixCSC{Float64,Int64},F::SparseMatrixCSC{Float64,Int64},
618 |     Queue::Vector{Int64},u::Int64,uNeighbs::Array{Int64},height::Array{Int64},
619 |     excess::Array{Float64},n::Int64,du::Int64,inQ::Array{Bool})
620 | 
621 |     vLocal::Int64 = 1
622 |     hu = height[u]
623 |     relabeled = 0
624 |     while excess[u] > 0 && vLocal <= du
625 |             v = uNeighbs[vLocal]
626 |             if hu > height[v] && C[u,v] - F[u,v] > 0
627 |                 pushflow_fs!(C,F,Queue,u,v,excess,height,inQ,n)
628 |                 vLocal += 1
629 |             else
630 |                 vLocal += 1
631 |             end
632 |     end
633 | 
634 |     if vLocal > du
635 |         relabeled = 1
636 |         relabel_fs!(C,F,Queue,u,uNeighbs,height,du,n)
637 |     end
638 | 
639 |     return relabeled
640 | end
641 | 
642 | # Relabel sets the label/height of node u to be equal to the minimum label
643 | # such that an admissible edge exists. An edge (u,v) is admissible if
644 | # height[u] = height[v] + 1
645 | function relabel_fs!(C::SparseMatrixCSC{Float64,Int64},F::SparseMatrixCSC{Float64,Int64},
646 |     Queue::Vector{Int64},u::Int64,uNeighbs::Array{Int64},height::Array{Int64},
647 |     du::Int64,n::Int64)
648 |    # find smallest new height making a push possible,
649 |    # if such a push is possible at all
650 | 
651 |    min_height = Inf
652 |    # search through the neighbors of u
653 |    # and relabel so that height[u] = height[v] + 1 for some v in the neighborhood
654 |    for vLocal = 1:du
655 |        v = uNeighbs[vLocal]
656 |        if C[u,v] - F[u,v] > 0
657 |            min_height = min(min_height, height[v])
658 |            height[u] = min_height + 1
659 |        end
660 |    end
661 | 
662 | 
663 | end
664 | 
665 | # Push flow from an active node u to a node v via an admissible edge (u,v)
666 | function pushflow_fs!(C::SparseMatrixCSC{Float64,Int64},F::SparseMatrixCSC{Float64,Int64},
667 |     Queue::Vector{Int},u::Int64,v::Int64,excess::Array{Float64},height::Array{Int64},
668 |     inQ::Array{Bool},n::Int64)
669 | 
670 |     send = min(excess[u], C[u,v] - F[u,v])
671 |     F[u,v] += send
672 |     F[v,u] -= send
673 |     excess[u] -= send
674 |     excess[v] += send
675 | 
676 |     # If v isn't in the queue, isn't the sink, is active, add it to the Queue
677 |     if ~inQ[v] && v > 1 && height[v] < n
678 |         prepend!(Queue,v)
679 |         inQ[v] = true
680 |     end
681 | end
682 | 
683 | # From the adjacency matrix, build an adjacency list for the graph
684 | function ConstructAdj(C::SparseMatrixCSC{Float64,Int64},n::Int64)
685 | 
686 |     rp = C.rowval
687 |     ci = C.colptr
688 |     Neighbs = Vector{Vector{Int64}}()
689 |     d = zeros(Int64,n)
690 |     for i = 1:n
691 |         # chop up the rp vector and put it in Neighbs
692 |         push!(Neighbs,rp[ci[i]:ci[i+1]-1])
693 |         d[i] = ci[i+1]-ci[i]
694 |     end
695 | 
696 |     # d is the number of neighbors. This is the unweighted degree,
697 |     # but note importantly that if the original graph is weighted this is
698 |     # not the same as the degree vector d we will sometimes use
699 |     return Neighbs, d
700 | 
701 | end
702 | 
703 | # Given initial capacity matrix C and flow matrix F, compute the distance
704 | # from each node to the sink via residual edges. Distance = n means there is no
705 | # path to the sink. Sink node is assumed to be node 1.
706 | function relabeling_bfs_fs(C::SparseMatrixCSC{Float64,Int64},F::SparseMatrixCSC{Float64,Int64})
707 | 
708 |     # To avoid subtraction cancellation errors that may have ocurred when pushing
709 |     # flow, when computing a bfs we round edges to zero if they are under 1e-8
710 |     Cf = round.((C-F),digits =6)
711 |     n = size(Cf,1)
712 | 
713 |     rp = Cf.colptr
714 |     ci = Cf.rowval
715 | 
716 |     N=length(rp)-1
717 | 
718 |     d = n*ones(Int64,N)
719 |     sq=zeros(Int64,N)
720 |     sqt=0
721 |     sqh=0 # search queue and search queue tail/head
722 | 
723 |     # start bfs at the sink, which is node 1
724 |     u = 1
725 |     sqt=sqt+1
726 |     sq[sqt]=u
727 |     d[u]=0
728 |     while sqt-sqh>0
729 |         sqh=sqh+1
730 |         v=sq[sqh] # pop v off the head of the queue
731 |         for ri=rp[v]:rp[v+1]-1
732 |             w=ci[ri]
733 |             if d[w] > n-1
734 |                 sqt=sqt+1
735 |                 sq[sqt]=w
736 |                 d[w]= d[v]+1
737 |             end
738 |         end
739 |     end
740 | 
741 |     return d
742 | end
743 | 


--------------------------------------------------------------------------------
/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nveldt/HypergraphFlowClustering/b1213c63c75ffd2ea065afe222ff282cdabc18d1/src/.DS_Store


--------------------------------------------------------------------------------
/src/Helper_Functions.jl:
--------------------------------------------------------------------------------
  1 | using SparseArrays
  2 | using MAT
  3 | using MatrixNetworks
  4 | using LinearAlgebra
  5 | 
  6 | # Controlled growth from a seed set R in a hypergraph. Look at the one hop
  7 | # neighborhood, and order all of those nodes by how many hyperedges they are in
  8 | # that have nodes from R. Order that last, and take the top k
  9 | function TopNeighbors(H::SparseMatrixCSC{Float64,Int64},R::Vector{Int64},R1hop::Vector{Int64},k::Int64)
 10 | 
 11 |     if length(R1hop) > k
 12 |         # Get all edges touching R
 13 |         HR = H[:,R]
 14 |         rp = HR.rowval
 15 |         edges = unique(rp)
 16 | 
 17 |         # Consider how many touch the 1-hop neighborhood
 18 |         HL = H[edges,R1hop]
 19 | 
 20 |         # For each node in R1hop, compute the number of edges it has that touch R
 21 |         d2R = vec(sum(HL,dims=1))
 22 | 
 23 |         # order = sortperm(d2R, rev=true)
 24 |         b = partialsortperm(d2R, 1:k, rev=true)
 25 |         Rmore = R1hop[b]
 26 |     else
 27 |         Rmore = R1hop
 28 |     end
 29 | 
 30 |     return union(R, Rmore)
 31 | end
 32 | 
 33 | 
 34 | # Controlled growth from a seed set R in a hypergraph. Look at the one hop
 35 | # neighborhood, and order all of those nodes by what percent of their
 36 | # edges touch R. Order that, and take the top k
 37 | function BestNeighbors(H::SparseMatrixCSC{Float64,Int64},d::Vector{Float64},R::Vector{Int64},R1hop::Vector{Int64},k::Int64)
 38 | 
 39 |     if length(R1hop) > k
 40 |         # Get all edges touching R
 41 |         HR = H[:,R]
 42 |         rp = HR.rowval
 43 |         edges = unique(rp)
 44 | 
 45 |         # Consider how many touch the 1-hop neighborhood
 46 |         HL = H[edges,R1hop]
 47 | 
 48 |         # For each node in R1hop, compute the number of edges it has that touch R
 49 |         d1 = d[R1hop]
 50 |         d2 = vec(sum(HL,dims=1))
 51 | 
 52 |         # order = sortperm(d2R, rev=true)
 53 |         b = partialsortperm(d2./d1, 1:k, rev=true)
 54 |         Rmore = R1hop[b]
 55 |     else
 56 |         Rmore = R1hop
 57 |     end
 58 |     return union(R, Rmore)
 59 | end
 60 | 
 61 | 
 62 | # Simple function for returning ALL the indices where we find a maximum
 63 | function findallmax(v)
 64 | 
 65 |     l = length(v)
 66 |     m = minimum(v)
 67 |     M = maximum(v)
 68 |     if M == m
 69 |         return collect(1:l)
 70 |     else
 71 |         Inds = Vector{Int64}()
 72 |         a,b = findmax(v)
 73 |         while a == M
 74 |             push!(Inds,b)
 75 |             v[b] = m
 76 |             a,b = findmax(v)
 77 |         end
 78 |         return Inds
 79 |     end
 80 | 
 81 | end
 82 | 
 83 | 
 84 | ## Delta-Linear (tl = thresholded linear) conductance computation.
 85 | # e.g. tl_cond(H,S,d,delta,volA,order)
 86 | function tl_cond(H::SparseMatrixCSC,S::Vector{Int64},d::Vector{Float64},delta::Float64,volA::Float64,order::Vector{Int64})
 87 | 
 88 |     if volA == 0.0
 89 |         volA = sum(d)
 90 |     end
 91 |     n = length(d)
 92 |     volS = sum(d[S])
 93 |     cut = tl_cut(H,S,delta,order)
 94 | 
 95 |     cond = cut/min(volS, volA-volS)
 96 | 
 97 |     return cond, volS, cut
 98 | 
 99 | end
100 | 
101 | ## Delta-Linear (thresholded linear) normalized Cut computation.
102 | # e.g. tl_ncut(H,S,d,delta,volA,order)
103 | function tl_ncut(H::SparseMatrixCSC,S::Vector{Int64},d::Vector{Float64},delta::Float64,volA::Float64,order::Vector{Int64})
104 | 
105 |     if volA == 0.0
106 |         volA = sum(d)
107 |     end
108 |     n = length(d)
109 |     volS = sum(d[S])
110 |     cut = tl_cut(H,S,delta,order)
111 | 
112 |     cond = cut/min(volS, volA-volS)
113 |     ncut = cut/(volS) + cut/(volA-volS)
114 | 
115 |     # rncut = round(Int64,ncut)
116 |     # rcut = round(Int64,cut)
117 |     # rcond = round(cond,digits = 4)
118 |     # rvol = round(Int64,volS)
119 | 
120 |     return cond, ncut, volS, cut
121 | 
122 | end
123 | 
124 | # Thresholded linear cut value for a set
125 | # calling e.g. tl_cut(H,S,delta,order)
126 | function tl_cut(H::SparseMatrixCSC{Float64,Int64}, S::Vector{Int64}, delta::Float64,order::Vector{Int64})
127 | 
128 |     # Check the cut
129 |     HS = H[:,S]
130 |     sumHS = sum(HS,dims = 2)  # Count number of S nodes in each hyperedge
131 |     inds = findall(x->x>0,sumHS)    # Extract hyperedges with > 0 nodes from S
132 |     ES = sumHS[inds]
133 |     verts = order[inds]               # Get the size of these hyperedges
134 | 
135 |     # Find number of nodes on small side of cut
136 |     SmallSide = round.(Int64,min.(ES, verts-ES))
137 |     # Compute the cardinality-based cut score
138 |     cutval = 0.0
139 |     for j = 1:length(SmallSide)
140 |         sm = SmallSide[j]
141 |         if sm > 0
142 |             if sm < delta
143 |                 cutval += sm
144 |             else
145 |                 cutval += delta
146 |             end
147 |         end
148 |     end
149 | 
150 |     return cutval
151 | end
152 | 
153 | 
154 | # For a set S in a hypergraph, return the hypergraph local conductance
155 | # score with thresholded linear splitting penalty
156 | # e.g. hlc_tl(H,order,R,S,d,volA,epsilon,delta)
157 | function hlc_tl(H::SparseMatrixCSC{Float64,Int64},order::Vector{Int64},R::Vector{Int64},
158 |     S::Vector{Int64},d::Vector{Float64},volA::Float64,epsilon::Float64,
159 |     delta::Float64,)
160 | 
161 |     volS = sum(d[S])
162 |     RnS = intersect(R,S)
163 |     volRnS = sum(d[RnS])
164 |     cut = tl_cut(H,S,delta,order)
165 | 
166 |     lcond = cut/((1+epsilon)*volRnS - epsilon*volS)
167 | 
168 |     return lcond
169 | 
170 | end
171 | 
172 | # Expand a hypergraph using the thresholded linear splitting function.
173 | #
174 | #   Hyperedges = Hyperedge list
175 | #   delta = TL splitting function parameter
176 | function tl_expansion(Hyperedges::Vector{Vector{Int64}}, order::Vector{Int64}, delta::Float64,n::Int64)
177 | 
178 |         BigEdges = length(findall(x->x>3,order))
179 |         N = n + 2*BigEdges
180 | 
181 |         ## Build the adjacency matrix
182 |         ap = n+1   # "auxiliary node pointer", points to next "free" aux node
183 | 
184 |         # Build the sparse matrix
185 |         U = Vector{Int64}()
186 |         V = Vector{Int64}()
187 |         vals = Vector{Float64}()
188 | 
189 |         for edge = Hyperedges
190 |             nv = length(edge)
191 |             if nv == 2
192 |                 i = edge[1]; j = edge[2]
193 |                 #A[i,j] += 1; A[j,i] += 1
194 |                 push!(U,i); push!(V,j); push!(vals,1)
195 |                 push!(U,j); push!(V,i); push!(vals,1)
196 |             elseif nv == 3
197 |                 i = edge[1]; j = edge[2]; k = edge[3]
198 |                 # A[i,j] += 1/2; A[j,i] += 1/2
199 |                 # A[k,j] += 1/2; A[j,k] += 1/2
200 |                 # A[k,i] += 1/2; A[i,k] += 1/2
201 |                 push!(U,i); push!(V,j); push!(vals,1/2)
202 |                 push!(U,j); push!(V,i); push!(vals,1/2)
203 |                 push!(U,i); push!(V,k); push!(vals,1/2)
204 |                 push!(U,k); push!(V,i); push!(vals,1/2)
205 |                 push!(U,j); push!(V,k); push!(vals,1/2)
206 |                 push!(U,k); push!(V,j); push!(vals,1/2)
207 |             else
208 |                 # We need to add auxiliary vertices
209 |                 for i = edge
210 |                     # A[i,auxpointer] = 1
211 |                     # A[auxpointer+1,i] = 1
212 |                     # A[auxpointer,auxpointer+1] = w2
213 |                     push!(U,i); push!(V,ap); push!(vals,1)
214 |                     push!(U,ap+1); push!(V,i); push!(vals,1)
215 |                 end
216 |                 push!(U,ap); push!(V,ap+1); push!(vals,delta)
217 |                 ap += 2
218 |             end
219 | 
220 |         end
221 |         @show maximum(U), maximum(V), N
222 |         A = sparse(U,V,vals,N,N)
223 |         return A
224 | end
225 | 
226 | # Given an incidence matrix for a hypergraph and its transpose (having both
227 | # handy makes different parts of the code faster), and a set of nodes R,
228 | # return the immediate neighbors of R that don't include R itself
229 | function get_immediate_neighbors(H::SparseMatrixCSC{Float64,Int64},
230 |     Ht::SparseMatrixCSC{Float64,Int64},R::Vector{Int64})
231 | 
232 |     Hr = H[:,R]
233 |     rp_r = Hr.rowval
234 |     R_edges = unique(rp_r)
235 | 
236 |     He = Ht[:,R_edges]
237 |     rp_e = He.rowval
238 |     Rneighbs = unique(rp_e)
239 |     Rn = setdiff(Rneighbs,R)
240 | 
241 |     return Rn
242 | 
243 | end
244 | 
245 | function neighborhood(H::SparseMatrixCSC{Float64,Int64},
246 |     Ht::SparseMatrixCSC{Float64,Int64},R::Vector{Int64})
247 |     Hr = H[:,R]
248 |     rp_r = Hr.rowval
249 |     R_edges = unique(rp_r)
250 | 
251 |     He = Ht[:,R_edges]
252 |     rp_e = He.rowval
253 |     Rn = unique(rp_e)
254 | 
255 |     return Rn
256 | end
257 | 
258 | function neighborlist(H::SparseMatrixCSC{Float64,Int64},
259 |     Ht::SparseMatrixCSC{Float64,Int64})
260 | 
261 |     Neighbs = Dict()
262 |     n = size(H,2)
263 |     t1 = 0
264 |     t2 = 0
265 |     t3 = 0
266 |     for i = 1:n
267 |         # s = time()
268 |         ivec = H[:,i]
269 |         #n_edges = findnz(ivec)[1]
270 |         n_edges = ivec.nzind    # get neighboring edges
271 |         # t1 += time()-s
272 | 
273 |         # s = time()
274 |         He = Ht[:,n_edges]      # nodes touching those edges
275 |         rp_e = He.rowval
276 |         neighbs_i = unique(rp_e)
277 |         # t2 += time()-s
278 | 
279 |         # s = time()
280 |         push!(Neighbs,neighbs_i)
281 |         # t3 += time()-s
282 |     end
283 |     # @show t1, t2, t3
284 |     return Neighbs
285 | end
286 | 
287 | # Expand a hypergraph using the thresholded linear splitting function.
288 | #
289 | #   H = |E| x |V| binary incidence matrix for the hypergraph
290 | #   delta = TL splitting function parameter
291 | function tl_expansion_inc(H::SparseMatrixCSC{Float64,Int64}, order::Vector{Int64}, delta::Float64)
292 | 
293 |         n = size(H,2)
294 |         BigEdges = length(findall(x->x>3,order))
295 |         N = n + 2*BigEdges
296 | 
297 |         Hyperedges = incidence2elist(H)
298 | 
299 |         ## Build the adjacency matrix
300 |         ap = n+1   # "auxiliary node pointer", points to next "free" aux node
301 | 
302 |         # Build the sparse matrix
303 |         U = Vector{Int64}()
304 |         V = Vector{Int64}()
305 |         vals = Vector{Float64}()
306 | 
307 |         for ee = 1:length(Hyperedges)
308 |             edge = Hyperedges[ee]
309 |             nv = length(edge)
310 |             # if order[ee] != nv
311 |             #     @show ee, nv, order[ee], edge
312 |             # end
313 |             if nv == 1
314 |                 # ignore
315 |                 # println("This")
316 |             elseif nv == 2
317 |                 i = edge[1]; j = edge[2]
318 |                 #A[i,j] += 1; A[j,i] += 1
319 |                 push!(U,i); push!(V,j); push!(vals,1)
320 |                 push!(U,j); push!(V,i); push!(vals,1)
321 |             elseif nv == 3
322 |                 i = edge[1]; j = edge[2]; k = edge[3]
323 |                 # A[i,j] += 1/2; A[j,i] += 1/2
324 |                 # A[k,j] += 1/2; A[j,k] += 1/2
325 |                 # A[k,i] += 1/2; A[i,k] += 1/2
326 |                 push!(U,i); push!(V,j); push!(vals,1/2)
327 |                 push!(U,j); push!(V,i); push!(vals,1/2)
328 |                 push!(U,i); push!(V,k); push!(vals,1/2)
329 |                 push!(U,k); push!(V,i); push!(vals,1/2)
330 |                 push!(U,j); push!(V,k); push!(vals,1/2)
331 |                 push!(U,k); push!(V,j); push!(vals,1/2)
332 |             else
333 |                 # We need to add auxiliary vertices
334 |                 for i = edge
335 |                     # A[i,auxpointer] = 1
336 |                     # A[auxpointer+1,i] = 1
337 |                     # A[auxpointer,auxpointer+1] = delta
338 |                     push!(U,i); push!(V,ap); push!(vals,1)
339 |                     push!(U,ap+1); push!(V,i); push!(vals,1)
340 |                 end
341 |                 push!(U,ap); push!(V,ap+1); push!(vals,delta)
342 |                 ap += 2
343 |             end
344 | 
345 |         end
346 |         # @show maximum(U), maximum(V), length(U), length(V), N, ap
347 |         A = sparse(U,V,vals,N,N)
348 |         return A
349 | end
350 | 
351 | # Convert a hyperedge list to a hypergraph binary incidence matrix
352 | function incidence2elist(Hin::SparseMatrixCSC{Float64,Int64})
353 | 
354 |     H = sparse(Hin')
355 |     rp = H.rowval
356 |     ci = H.colptr
357 |     Hyperedges = Vector{Vector{Int64}}()
358 |     n,m = size(H)
359 | 
360 |     for i = 1:m
361 |         startedge = ci[i]
362 |         endedge = ci[i+1]-1
363 |         edge = rp[startedge:endedge]
364 |         push!(Hyperedges,edge)
365 |     end
366 |     return Hyperedges
367 | end
368 | 
369 | 
370 | # Take a list of hyperedges and turn it into a hyperedge incidence matrix
371 | # H. N is the number of nodes in the hypergraph.
372 | #  H(e,u) = 1  iff node u is in hyperedge e
373 | function elist2incidence(Hyperedges::Vector{Vector{Int64}}, N::Int64)
374 |     U = Vector{Int64}()
375 |     E = Vector{Int64}()
376 |     M = length(Hyperedges)
377 |     for enum = 1:length(Hyperedges)
378 |         e = Hyperedges[enum]
379 |         for node in e
380 |             push!(U,node)
381 |             push!(E,enum)
382 |         end
383 |     end
384 | 
385 |     H = sparse(E,U,ones(length(U)),M,N)
386 |     return H
387 | end
388 | 
389 | 
390 | # This computes the precision, recall, and F1 score for a set Returned
391 | # compared against a Target set
392 | function PRF(Target,Returned)
393 | 
394 |     if length(Returned) == 0
395 |         pr = 0; re = 0; F1 = 0
396 |     else
397 |         TruePos = intersect(Returned,Target)
398 |         pr = length(TruePos)/length(Returned)
399 |         re = length(TruePos)/length(Target)
400 |         F1 = 2*(pr*re)/(pr+re)
401 | 
402 |         if length(TruePos) == 0
403 |             F1 = 0
404 |         end
405 |     end
406 | 
407 |     return pr, re, F1
408 | 
409 | end
410 | 
411 | 
412 | 
413 | ## Given a binary incidence matrix H for a hypergraph, find the one-hop
414 | # neighborhood of a set of nodes S
415 | function hyper_neighborhood(H::SparseMatrixCSC{Float64,Int64},S::Vector{Int64})
416 | 
417 |     A = H'*H
418 |     n = size(A,1)
419 |     for i = 1:n
420 |         A[i,i] = 0
421 |     end
422 |     dropzeros!(A)
423 |     return neighborhood(A,S,1)
424 | 
425 | end
426 | 
427 | ## Given a binary incidence matrix H for a hypergraph, find the one-hop
428 | # neighborhood of a set of nodes S, when considering only hyperedges with
429 | # a maximum number of M nodes
430 | function hyper_neighborhood(H::SparseMatrixCSC{Float64,Int64},S::Vector{Int64},order::Vector{Int64},M::Int64)
431 | 
432 |     good = findall(x->x<=M,order)
433 |     H = H[good,:]
434 | 
435 |     ##
436 |     A = H'*H
437 |     n = size(A,1)
438 |     for i = 1:n
439 |         A[i,i] = 0
440 |     end
441 |     dropzeros!(A)
442 |     return neighborhood(A,S,1)
443 | 
444 | end
445 | 
446 | ## Simple Clique Expansion
447 | #   A[i,j] = number of hyperedges nodes i and j share
448 | function SimpleCliqueExp(H::SparseMatrixCSC{Float64,Int64})
449 | 
450 |     A = H'*H
451 |     for i = 1:size(A,1)
452 |         A[i,i] = 0.0
453 |     end
454 |     dropzeros!(A)
455 |     return A
456 | end
457 | 
458 | ## Weighted Clique Expansion
459 | #  When performing the clique expansion, for each hyperedge expanded into a
460 | #   clique, multiply each edge in the expansion by 1/order(e)
461 | function WeightedCliqueExpansion(H::SparseMatrixCSC{Float64,Int64}, order::Vector{Int64})
462 | 
463 |     m,n = size(H)
464 |     I = Vector{Int64}()
465 |     J = Vector{Int64}()
466 |     vals = Vector{Float64}()
467 |     Hyperedges = incidence2elist(H)
468 |     for e = 1:m
469 |         Edge = Hyperedges[e]
470 |         Ord = order[e]
471 |         for ii = 1:length(Edge)
472 |             for jj = ii+1:length(Edge)
473 |                 i = Edge[ii]
474 |                 j = Edge[jj]
475 |                 push!(I,i); push!(J,j); push!(vals,1/Ord)
476 |             end
477 |         end
478 |         if mod(e,10000)==0
479 |             println("$e")
480 |         end
481 |     end
482 | 
483 |     A = sparse(I,J,vals,n,n)
484 |     A = sparse(A+A')
485 |     return A
486 | end
487 | 


--------------------------------------------------------------------------------
/src/HyperLocal.jl:
--------------------------------------------------------------------------------
  1 | # Strongly-local code for minimizing the HCL objective
  2 | # Implemented with the thresholded linear hyperedge splitting penalty.
  3 | 
  4 | include("Helper_Functions.jl")
  5 | include("maxflow.jl")
  6 | 
  7 | """
  8 | HyperLocal: minimizes HLC with the thresholded-linear (TL) hypergraph cut
  9 |             function, with parameter delta. In other words, the splitting function
 10 |             penalty is min { |S| , |e - S|, delta}.
 11 |             Strongly-local time
 12 | 
 13 | H:          Binary indicence matrix for hypergraph
 14 | Hyperedges: A list of hyperedges defining the hypergraph
 15 | order:      Order (number of nodes) in each hyperedge
 16 | d:          Degree vector, d[v] = number of hyperedges a node is in
 17 | R:          Set of nodes in seed/reference set
 18 | epsilon:    Locality parameter, must exceed vol(R)/vol(bar{R})
 19 | delta:      Threshold cut penalty.
 20 | """
 21 | function HyperLocal(H::SparseMatrixCSC{Float64,Int64},Ht::SparseMatrixCSC{Float64,Int64},
 22 |     order::Vector{Int64},d::Vector{Float64},R::Vector{Int64},
 23 |     epsilon::Float64, delta::Float64,Rs_local::Vector{Int64},localflag::Bool=true)
 24 | 
 25 |     m,n = size(H)
 26 | 
 27 |     volA = sum(d)
 28 |     volR = sum(d[R])
 29 |     # @assert(volR <= volA/2)
 30 |     Rstrong = R[Rs_local]
 31 |     # Check Locality Parameter
 32 |     fR = volR/(volA - volR)
 33 |     @show fR, volR, volA
 34 |     if epsilon < fR
 35 |         println("Locality parameter epsilon was set too small.
 36 |         Setting it to lower bound of $fR. Computations will not be local.")
 37 |         epsilon = fR
 38 |         localflag = false
 39 |     end
 40 |     A = 0; N = 0;
 41 |     if localflag
 42 | 
 43 |         if volA*epsilon/volR < 10
 44 |             println("Note that vol(R)/epsilon = O(vol(G)).
 45 |             For these parameters \nit may be faster to run the algorithm
 46 |             without the locality setting.")
 47 |         end
 48 | 
 49 |     else
 50 |         A = tl_expansion_inc(H,order,delta)
 51 |         N = round(Int64,size(A,1))
 52 |     end
 53 | 
 54 |     # Store useful sets
 55 |     # Rn = hyper_neighborhood(H,R)    # get the immediate neighbors of R...
 56 |     # Rn = setdiff(Rn,R)                # ...but we exclude R itself
 57 |     Rn = get_immediate_neighbors(H,Ht,R)
 58 |     Rc = setdiff(1:n,R)               # Complement set of R
 59 |     nR = length(R)
 60 | 
 61 |     condR,volR, cutR = tl_cond(H,R,d,delta,volA,order)
 62 | 
 63 |     println("\nRunning HyperLocal")
 64 |     println("----------------------------------------")
 65 |     println("Epsilon = $epsilon \t Delta = $delta")
 66 |     println("|R| = $nR, cond(R) = $condR")
 67 |     println("-------------------------------------------------------------------------")
 68 | 
 69 |     S_best = R
 70 |     a_best = condR
 71 |     a_old = condR
 72 |     still_improving = true
 73 |     Iteration = 1
 74 |     while still_improving
 75 | 
 76 |         still_improving = false
 77 | 
 78 |         stepstart = time()
 79 |         if localflag
 80 |             S_new = HyperLocal_Step(H,Ht,order,R,Rn,a_best,epsilon,delta,d,Rs_local)
 81 |         else
 82 |             S_new = HLC_Step(A,R,Rc,a_best,epsilon,N,d,n,Rs_local)
 83 |         end
 84 |         stime = round(time()-stepstart,digits=1)
 85 | 
 86 |         a_new = hlc_tl(H,order,R,S_new,d,volA,epsilon,delta)
 87 | 
 88 |         if a_new < a_old
 89 |             still_improving = true
 90 |             S_best = S_new
 91 |             nS = length(S_best)
 92 |             a_old = a_new
 93 |             a_best = a_new
 94 |             println("Iter $Iteration: |S| = $nS, lcond(S) = $a_new, min-cut took $stime seconds")
 95 |         else
 96 |             println("Iter $Iteration: Algorithm converged. Last min-cut took $stime sec")
 97 |             println("-------------------------------------------------------------------------")
 98 |         end
 99 |         Iteration += 1
100 |     end
101 | 
102 |     return S_best, a_best
103 | end
104 | 
105 | 
106 | # A non-local version of the min-cut code that works by calling the same
107 | # subroutine, but on the entire graph all at once
108 | function HLC_Step(A::SparseMatrixCSC{Float64,Int64},R::Vector{Int64},Rbar::Vector{Int64},
109 |     alpha::Float64, epsilon::Float64, N::Int64, d::Vector{Float64},n::Int64,Rs_local::Vector{Int64})
110 | 
111 |         Rstrong = R[Rs_local]
112 |         # Directly set up the flow matrix
113 |         sVec = zeros(N)
114 |         tVec = zeros(N)
115 |         sVec[R] .= alpha*d[R]
116 |         sVec[Rstrong] .= N^2
117 |         tVec[Rbar] .= alpha*epsilon*d[Rbar]
118 |         F = maxflow(A,sVec,tVec,0)
119 |         Src = source_nodes_min(F)[2:end].-1
120 |         S = intersect(1:n,Src)
121 | 
122 |         return S
123 | end
124 | 
125 | # Strongly-local subroutine for computing a minimum s-t cut
126 | # This uses the thresholded linear splitting function for each hyperegde
127 | function HyperLocal_Step(H::SparseMatrixCSC{Float64,Int64},Ht::SparseMatrixCSC{Float64,Int64},
128 |     order::Vector{Int64}, R::Vector{Int64},Rn::Vector{Int64},alpha::Float64,
129 |     epsilon::Float64,delta::Float64,d::Vector{Float64},Rs_local::Vector{Int64})
130 | 
131 |     # Map from local node indices to global node indices
132 |     Local2Global = [R; Rn]
133 | 
134 |     n = length(d)
135 | 
136 |     # Keep track of which nodes are in the local hypergraph L
137 |     inL = zeros(Bool,n)
138 |     inL[Local2Global] .= true
139 | 
140 |     # Number of nodes in the local graph
141 |     Lsize = length(Local2Global)
142 | 
143 |     # Complete nodes = nodes whose hyperedge set in the local hypergraph
144 |     #                   is the same as their global hyperedge set
145 |     # Incomplete nodes = everything else in the local hypergraph
146 |     #                   (must be a neighbor of a complete node)
147 |     #
148 |     # Initialize the complete set to be R
149 |     # Incomplete set is R-complement
150 |     C_global = R
151 |     I_global = Rn
152 | 
153 |     # Indices, in the local graph, of complete and incomplete nodes
154 |     C_local = collect(1:length(R))
155 |     I_local = collect(length(R)+1:Lsize)
156 |     R_local = collect(1:length(R))
157 |     Rstrong_local = R_local[Rs_local]
158 | 
159 |     # Get the set of hyperedges to expand around.
160 |     # At first this is every hyperedge that touches
161 |     # a node from R.
162 |     Hc = H[:,C_global]
163 |     rp_c = Hc.rowval
164 |     # ci_c = Hc.colptr
165 |     L_edges = unique(rp_c)
166 | 
167 |     # Binary indicence matrix for the local hypergraph (without terminal edges)
168 |     HL = H[L_edges,Local2Global]
169 |     order_L = order[L_edges]
170 | 
171 |     # Expand into a directed graph
172 |     A_L = tl_expansion_inc(HL,order_L,delta)
173 |     N_L = size(A_L,1)           # includes auxiliary nodes
174 |     n_L = length(Local2Global)  # number of non-auxiliary nodes in A_L
175 | 
176 |     # Find the first mincut, which can be done by calling HLC_Step
177 |     # with localized objects
178 |     S_local = HLC_Step(A_L,C_local,I_local,alpha,epsilon,N_L,d[Local2Global],n_L,Rstrong_local)
179 | 
180 |     # Find nodes to "expand" around:
181 |     #   any nodes in the cut set tha are "incomplete" still
182 |     E_local = intersect(S_local,I_local)
183 |     E_global = Local2Global[E_local]
184 | 
185 |     # ne = length(E_global)
186 |     # println("There are $ne new nodes to expand on")
187 | 
188 |     # As long as we have new nodes to expand around, we haven't yet found
189 |     # the global minimum s-t cut, so we continue.
190 |     while length(E_local) > 0
191 | 
192 |         # Update which nodes are complete and which are incomplete
193 |         C_local = [C_local; E_local]
194 |         C_global = Local2Global[C_local]
195 | 
196 |         # Take these away from I_local
197 |         I_local = setdiff(I_local,E_local)
198 | 
199 |         # This is better
200 |         Nbs_of_E = get_immediate_neighbors(H,Ht,E_global)
201 |         Lnew = setdiff(Nbs_of_E,Local2Global)
202 |         numNew = length(Lnew)
203 |         # Update the set of indices in L
204 |         Local2Global = [Local2Global; Lnew]
205 | 
206 |         # Store local indices for new nodes added to L
207 |         Lnew_local = collect((Lsize+1):(Lsize+numNew))
208 |         Lsize = length(Local2Global)
209 | 
210 |         # These are going to be "incomplete" nodes
211 |         I_local = [I_local; Lnew_local]
212 |         I_global = Local2Global[I_local]
213 | 
214 |         # Now we have a new set of complete and incomplete edges,
215 |         # we do the same thing over again to find a localize min-cut
216 |         Hc = H[:,C_global]
217 |         rp_c = Hc.rowval
218 |         # ci_c = Hc.colptr
219 |         L_edges = unique(rp_c)
220 | 
221 |         # Binary indicence matrix for the local hypergraph (without terminal edges)
222 |         HL = H[L_edges,Local2Global]
223 |         order_L = order[L_edges]
224 | 
225 |         # Expand into a directed graph
226 |         A_L = tl_expansion_inc(HL,order_L,delta)
227 |         N_L = size(A_L,1)           # includes auxiliary nodes
228 |         n_L = length(Local2Global)  # number of non-auxiliary nodes in A_L
229 | 
230 |         # Find the first mincut, which can be done by calling HLC_Step
231 |         # with localized objects
232 |         R_bar_l = setdiff(1:n_L,R_local)
233 |         S_local = HLC_Step(A_L,R_local,R_bar_l,alpha,epsilon,N_L,d[Local2Global],n_L,Rstrong_local)
234 | 
235 |         # Find nodes to "expand" around:
236 |         #   any nodes in the cut set tha are "incomplete" still
237 |         E_local = intersect(S_local,I_local)
238 |         E_global = Local2Global[E_local]
239 |         # ne = length(E_global)
240 |         # println("There are $ne new nodes to expand on")
241 |     end
242 | 
243 |     return Local2Global[S_local]
244 | end
245 | 


--------------------------------------------------------------------------------
/src/maxflow.jl:
--------------------------------------------------------------------------------
  1 | using MatrixNetworks
  2 | using SparseArrays
  3 | 
  4 | # Push Relabel solver for maximum s-t flow, minimum s-t cut problems
  5 | 
  6 | mutable struct stFlow
  7 |     flowvalue::Float64 # gives you the max-flow value
  8 |     cutvalue::Float64 # gives min-cut value, which should equal flowvalue,
  9 |                       # but may differ by a small tolerance value.
 10 |     source_nodes::Vector{Int64} # give the indices of the nodes attached to the source
 11 |     C::SparseMatrixCSC # gives the original capacity matrix
 12 |     F::SparseMatrixCSC # gives the values of the flows on each edge
 13 |     s::Int64  # index of source node
 14 |     t::Int64 # index of sink node
 15 | end
 16 | 
 17 | """
 18 | maxflow
 19 | 
 20 | Given a sparse matrix A representing a weighted and possibly directed graph,
 21 | a source node s, and a sink node t, return the maximum s-t flow.
 22 | 
 23 | flowtol = tolerance parameter for whether there is still capacity available on
 24 |             an edge. Helps avoid rounding errors. Default is 1e-6.
 25 | 
 26 | Returns F, which is of type stFlow.
 27 | """
 28 | function maxflow(B::Union{SparseMatrixCSC,MatrixNetwork},s::Int,t::Int, flowtol::Union{Float64,Int}= 1e-6)
 29 | 
 30 |     if flowtol >= .1
 31 |         println("flowtol is a tolerance parameter for rounding small residual capacity edges to zero, and should be much smaller than $flowtol. Changing it to default value 1e-6")
 32 |         flowtol = 1e-6
 33 |     end
 34 | 
 35 |     # The code actually assumes a SparseMatrixCSC input
 36 |     if typeof(B) <: SparseMatrixCSC
 37 |     else
 38 |         B = sparse(B)
 39 |     end
 40 | 
 41 |     N = size(B,1)
 42 | 
 43 |     # Extract weights from source s to non-terminal nodes,
 44 |     # and from non-terminal nodes to sink node t
 45 |     sWeights = Array(B[s,:])
 46 |     tWeights = Array(B[:,t])
 47 |     NonTerminal = setdiff(collect(1:N),[s t])
 48 | 
 49 |     sWeights = sWeights[NonTerminal]
 50 |     tWeights = tWeights[NonTerminal]
 51 | 
 52 |     # Extract the edges between non-terminal nodes
 53 |     A = B[NonTerminal,NonTerminal]
 54 | 
 55 |     # A = the matrix of capacities for all nodes EXCEPT the source and sink
 56 |     # sWeights = a vector of weights for edges from source to non-terminal nodes
 57 |     # tWeights = vector of weights from non-terminal nodes to the sink node t.
 58 | 
 59 |     # This is the map from the original node indices to the rearranged
 60 |     # version in which the source is the first node and the sink is the last
 61 |     Map = [s; NonTerminal; t]
 62 | 
 63 |     # Directly set up the flow matrix
 64 |     C = [spzeros(1,1) sparse(sWeights') spzeros(1,1);
 65 |          sparse(sWeights) A sparse(tWeights);
 66 |          spzeros(1,1) sparse(tWeights') spzeros(1,1)]
 67 | 
 68 |     # Allocate space for the flow we will calculate
 69 |     # In a flow problem, we will eventually need to send flow the reverse
 70 |     # direction, so it's important to allocate space for F[i,j] if C[j,i] is an
 71 |     # edge, even if C[i,j] is not directed
 72 |     Cundir = C+C'
 73 |     F = SparseMatrixCSC(N,N,Cundir.colptr,Cundir.rowval,zeros(length(Cundir.rowval)))
 74 |     ExcessNodes = vec(round.(Int64,findall(x->x!=0,sWeights).+1))
 75 | 
 76 |     # Initialize the Preflow and the excess vector
 77 |     for v = ExcessNodes
 78 |         F[1,v] = C[1,v]
 79 |         F[v,1] = -C[1,v]
 80 |     end
 81 |     excess = [0;sWeights;0]
 82 |     source_nodes, FlowMat, value = Main_Push_Relabel(C,F,ExcessNodes,excess,flowtol)
 83 | 
 84 |     smap = sortperm(Map)
 85 |     F = stFlow(value, value, sort(Map[source_nodes]),C[smap,smap],FlowMat[smap,smap],s,t)
 86 |     return F
 87 | end
 88 | 
 89 | """
 90 | This maxflow code assumes that A represents the adjacencies between
 91 | non-terminal nodes. Edges adjecent to source node s and sink node t
 92 | are given by vectors svec and tvec.
 93 | 
 94 | This code sets s as the first node, and t as the last node.
 95 | """
 96 | function maxflow(A::Union{SparseMatrixCSC,MatrixNetwork},svec::Vector{Float64},tvec::Vector{Float64}, flowtol::Union{Float64,Int}= 1e-6)
 97 | 
 98 |     if flowtol >= .1
 99 |         println("flowtol is a tolerance parameter for rounding small residual capacity edges to zero, and should be much smaller than $flowtol. Changing it to default value 1e-6")
100 |         flowtol = 1e-6
101 |     end
102 |     if typeof(A) <: SparseMatrixCSC
103 |     else
104 |         A = sparse(A)
105 |     end
106 | 
107 | 
108 |     # Directly set up the flow matrix
109 |     C = [spzeros(1,1) sparse(svec') spzeros(1,1);
110 |          sparse(svec) A sparse(tvec);
111 |          spzeros(1,1) sparse(tvec') spzeros(1,1)]
112 | 
113 |     N = size(C,1)
114 | 
115 |     # Allocate space for the flow we will calculate
116 |     # In a flow problem, we will eventually need to send flow the reverse
117 |     # direction, so it's important to allocate space for F[i,j] if C[j,i] is an
118 |     # edge, even if C[i,j] is not directed.
119 |     Cundir = C+C'
120 |     F = SparseMatrixCSC(N,N,Cundir.colptr,Cundir.rowval,zeros(length(Cundir.rowval)))
121 |     ExcessNodes = vec(round.(Int64,findall(x->x!=0,svec).+1))
122 | 
123 |     # Initialize the Preflow and the excess vector
124 |     for v = ExcessNodes
125 |         F[1,v] = C[1,v]
126 |         F[v,1] = -C[1,v]
127 |     end
128 |     excess = [0;svec;0]
129 |     source_nodes, FlowMat, value = Main_Push_Relabel(C,F,ExcessNodes,excess,flowtol)
130 | 
131 |     F = stFlow(value,value,source_nodes,C,FlowMat,1,N)
132 | end
133 | 
134 | maxflow(A::Union{SparseMatrixCSC,MatrixNetwork},svec::Vector{Int64},tvec::Vector{Int64},flowtol::Union{Float64,Int}= 1e-6) =
135 |     maxflow(A,float(svec),float(tvec),flowtol)
136 | 
137 | 
138 | flow(F::stFlow) =
139 |     F.flowvalue
140 | 
141 | """
142 | Given a flow, stored in an stFlow object, return the set of nodes attached to
143 | the source
144 | """
145 | function source_nodes(F::stFlow,flowtol::Union{Float64,Int}= 1e-6)
146 |     # Run a bfs from the sink node. Anything with distance
147 |     # n is disconnected from the sink. Thus it's part of the minimium cut set
148 |     n = size(F.C,2)
149 |     finalHeight = relabeling_bfs(F.C,F.F,flowtol, F.t)
150 |     S = Vector{Int64}()
151 |     for i = 1:n
152 |         if finalHeight[i] == n
153 |             push!(S,i)
154 |         end
155 |     end
156 | 
157 |     # Sanity checks: source node is on source side, sink node is on sink side
158 |     @assert(~in(F.t,S))
159 |     @assert(in(F.s,S))
160 | 
161 |     return S
162 | end
163 | 
164 | # Get the smallest source-side set
165 | function source_nodes_min(F::stFlow,flowtol::Union{Float64,Int}= 1e-6)
166 |     # Run a bfs from the source node. Anything with distance
167 |     # <n is connected to the source. Thus it's part of the minimium cut set
168 |     n = size(F.C,2)
169 |     finalHeight = relabeling_bfs(SparseMatrixCSC(F.C'),SparseMatrixCSC(F.F'),flowtol,F.s)
170 |     S = Vector{Int64}()
171 |     for i = 1:n
172 |         if finalHeight[i] < n
173 |             push!(S,i)
174 |         end
175 |     end
176 | 
177 |     # Sanity checks: source node is on source side, sink node is on sink side
178 |     @assert(~in(F.t,S))
179 |     @assert(in(F.s,S))
180 | 
181 |     return S
182 | end
183 | 
184 | """
185 | Given a flow, stored in an stFlow object, return the set of nodes attached to
186 | the sink
187 | """
188 | function sink_nodes(F::stFlow,flowtol::Union{Float64,Int}= 1e-6)
189 |     # Run a bfs from the sink node. Anything with distance < n is sink-attached.
190 |     n = size(F.C,2)
191 |     finalHeight = relabeling_bfs(F.C,F.F,flowtol,F.t)
192 |     T = Vector{Int64}()
193 |     for i = 2:n
194 |         if finalHeight[i] < n
195 |             push!(T,i)
196 |         end
197 |     end
198 | 
199 |     # Sanity checks
200 |     @assert(in(F.t,T))
201 |     @assert(~in(F.s,T))
202 | 
203 |     return T
204 | end
205 | 
206 | """
207 | Gives the cut as a list of edges.
208 | """
209 | function cut_edges(F::stFlow,flowtol::Union{Float64,Int}= 1e-6)
210 |     # Run a bfs from the sink node to get source and sink sets
211 |     n = size(F.C,2)
212 |     finalHeight = relabeling_bfs(F.C,F.F,flowtol,F.t)
213 |     T = Vector{Int64}()
214 |     S = Vector{Int64}()
215 |     for i = 1:n
216 |         if finalHeight[i] < n
217 |             push!(T,i)
218 |         else
219 |             push!(S,i)
220 |         end
221 |     end
222 | 
223 |     I,J,V = findnz(F.C[S,T])
224 |     return [S[I] T[J]]
225 | end
226 | 
227 | 
228 | """
229 | Gives the non-terminal cut edges.
230 | """
231 | function cut_edges_nonterminal(F::stFlow,flowtol::Union{Float64,Int}= 1e-6)
232 |     # Run a bfs from the sink node to get source and sink sets
233 |     Edges = cut_edges(F)
234 |     T = Vector{Int64}()
235 |     S = Vector{Int64}()
236 |     for i = 1:size(Edges,1)
237 |         I = Edges[i,1]
238 |         J = Edges[i,1]
239 |         if I != F.t && I!= F.s && J != F.t && J != F.s
240 |             push!(S,I)
241 |             push!(T,J)
242 |         end
243 |     end
244 |     return [S T]
245 | end
246 | 
247 | # Main_Push_Relabel returns a maximum flow F and the min s-t cut set S for the
248 | # flow graph C.
249 | #
250 | # C = the capacity matrix for the flow problem.
251 | #   This code assumes node 1 is the source, and node n is the sink.
252 | #   the preflow immediately pushes all flow from the source to create an
253 | #   excess on nodes in the graph.
254 | #
255 | # F = an initial flow. It can be initialize to zero.
256 | #
257 | # excess = the vector of excess values at the start of the algorithm. If F = 0,
258 | #   this is the vector of edge capacities from the implicit source to the graph.
259 | #   If F != 0, then it's the excess from a previous run of the algorithm
260 | function Main_Push_Relabel(C::SparseMatrixCSC,
261 |     F::SparseMatrixCSC,ExcessNodes::Array{Int64},excess::Array{Float64},flowtol::Union{Float64,Int}= 1e-6)
262 | 
263 |     # here, n includes only one terminal node, the sink
264 |     n = size(C,1)
265 | 
266 |     height = zeros(Int64,n)      # label/height of each node
267 |     inQ = zeros(Bool,n)          # list whether or not nodes are in the queue
268 | 
269 |     # Store adjacency list. Because flow can be sent either direction on an
270 |     # arc during the course of the algorithm, it's important to list all neighbors
271 |     # or each node, counting both incoming and outgoing edges
272 |     Neighbs,d = ConstructAdj(C+C',n)
273 | 
274 |     # We will maintain a queue of active nodes.
275 |     #   An actual queue implementation is available in the DataStructures.jl
276 |     #   Julia package. The performane is nearly identical (and in some cases
277 |     #   slightly slower), thus to minimize dependency on outside packages, we
278 |     #   just use a Vector, rather than an actual implemented Queue
279 |     Queue = Vector{Int64}()
280 | 
281 |     # Start by saturating edges from source to its neighbors
282 |     # All nodes with nonzero excess are the first to be processed
283 |     for v = ExcessNodes
284 |         push!(Queue,v)
285 |     end
286 |     inQ[ExcessNodes] .= true
287 | 
288 |     # count the number of nodes that have been relabeled
289 |     relabelings::Int64 = 0
290 | 
291 |     height = relabeling_bfs(C,F,flowtol,n)  # compute initial distance from sink
292 |     # In the code and comments, height = distance from sink = label of node
293 | 
294 |     # Continue until the queue no longer contains any active nodes.
295 |     while length(Queue) > 0
296 | 
297 |         u = pop!(Queue)     # Select a new active node
298 | 
299 |         inQ[u] = false      # Take it out of the queue
300 | 
301 |         # discharge flow through node u
302 |         relabelings += discharge!(C,F,Queue,u,Neighbs[u],height,excess,n,d[u],inQ,flowtol)
303 | 
304 |         # if u is still active, put it back into the queue
305 |         if excess[u] > flowtol
306 |             prepend!(Queue,u)
307 |             inQ[u] = true
308 |         end
309 | 
310 |         # Global relabeling heuristic for push-relabel algorithm.
311 |         # This periodically recomputes distances between nodes and the sink
312 |         if relabelings == n
313 |             relabelings = 0
314 |             dist = relabeling_bfs(C,F,flowtol)
315 |             height = dist
316 |         end
317 | 
318 |     end
319 | 
320 |     # Compute final distances from sink using BFS. Anything with distance
321 |     # n is disconnected from the sink. Thus it's part of the minimium cut set
322 |     finalHeight = relabeling_bfs(C,F,flowtol,n)
323 |     S = Vector{Int64}()
324 |     push!(S,1)          # Include the source node
325 |     for i = 2:n
326 |         if finalHeight[i] == n
327 |             push!(S,i)
328 |         end
329 |     end
330 | 
331 |     mflow = excess[n]     # the excess at the sink equals the maximum flow value
332 | 
333 |     return S, F, mflow
334 | 
335 | end
336 | 
337 | # Discharege operation: pushes flow away from node u across admissible edges.
338 | # If excess[u] > 0 but no admissible edges exist, we relabel u.
339 | function discharge!(C::SparseMatrixCSC,F::SparseMatrixCSC,
340 |     Queue::Vector{Int64},u::Int64,uNeighbs::Array{Int64},height::Array{Int64},
341 |     excess::Array{Float64},n::Int64,du::Int64,inQ::Array{Bool},
342 |     flowtol::Union{Float64,Int}= 1e-6)
343 | 
344 |     vLocal::Int64 = 1           # Start at the first neighbor of node u
345 |     hu = height[u]
346 |     relabeled = 0
347 | 
348 |     # As long as there is excess at node u and there is another neighbor to explore...
349 |     while excess[u] > flowtol && vLocal <= du
350 | 
351 |             # ...grab the next neighbor of node u
352 |             v = uNeighbs[vLocal]
353 | 
354 |             # ... if edge (u,v) is admissible, push more flow.
355 |             # Otherwise, move to the next neighbor of u
356 |             if hu > height[v] && C[u,v] - F[u,v] > flowtol
357 |                 pushflow!(C,F,Queue,u,v,excess,height,inQ,n)
358 |                 vLocal += 1
359 |             else
360 |                 vLocal += 1
361 |             end
362 |     end
363 | 
364 |     # if we needed to visit every neighbor of u, we must relabel u,
365 |     # so that at least one admissible edge is created
366 |     if vLocal > du
367 |         relabeled = 1
368 |         relabel!(C,F,Queue,u,uNeighbs,height,du,n,flowtol)
369 |     end
370 | 
371 |     return relabeled
372 | end
373 | 
374 | # Relabel sets the label/height of node u to be equal to the minimum label
375 | # such that an admissible edge exists. An edge (u,v) is admissible if
376 | # height[u] = height[v] + 1
377 | function relabel!(C::SparseMatrixCSC,F::SparseMatrixCSC,
378 |     Queue::Vector{Int64},u::Int64,uNeighbs::Array{Int64},height::Array{Int64},
379 |     du::Int64,n::Int64,flowtol::Union{Float64,Int}= 1e-6)
380 |    # find smallest new height making a push possible, if such a push is possible
381 | 
382 |    min_height = Inf
383 |    # search through the neighbors of u
384 |    # and relabel so that height[u] = height[v] + 1 for some v in the neighborhood
385 |    for vLocal = 1:du
386 |        v = uNeighbs[vLocal]
387 |        if C[u,v] - F[u,v] > flowtol
388 |            min_height = min(min_height, height[v])
389 |            height[u] = min_height + 1
390 |        end
391 |    end
392 | 
393 | end
394 | 
395 | # Push flow from an active node u to a node v via an admissible edge (u,v)
396 | function pushflow!(C::SparseMatrixCSC,F::SparseMatrixCSC,
397 |     Queue::Vector{Int},u::Int64,v::Int64,excess::Array{Float64},height::Array{Int64},
398 |     inQ::Array{Bool},n::Int64)
399 | 
400 |     send = min(excess[u], C[u,v] - F[u,v])
401 |     F[u,v] += send
402 |     F[v,u] -= send
403 |     excess[u] -= send
404 |     excess[v] += send
405 | 
406 |     # If v isn't in the queue, isn't the sink, isn't the source,
407 |     # and is active, then add it to the Queue
408 |     if ~inQ[v] && v < n && v > 1
409 |         prepend!(Queue,v)
410 |         inQ[v] = true
411 |     end
412 | end
413 | 
414 | # From the adjacency matrix, build an adjacency list for the graph
415 | function ConstructAdj(C::SparseMatrixCSC,n::Int64)
416 |     rp = C.rowval
417 |     ci = C.colptr
418 |     Neighbs = Vector{Vector{Int64}}()
419 |     d = zeros(Int64,n)
420 |     for i = 1:n
421 |         # chop up the rp vector and put it in Neighbs
422 |         push!(Neighbs,rp[ci[i]:ci[i+1]-1])
423 |         d[i] = ci[i+1]-ci[i]
424 |     end
425 | 
426 |     # d is the number of neighbors. This is the unweighted degree,
427 |     # but note importantly that if the original graph is weighted this is
428 |     # not the same as the degree vector d we will sometimes use
429 |     return Neighbs, d
430 | 
431 | end
432 | 
433 | # Given initial capacity matrix C and flow matrix F, compute the distance
434 | # from each node to the specified "start" node.
435 | # Start defaults to node n, which is assumed to be the sink node
436 | function relabeling_bfs(C::SparseMatrixCSC,F::SparseMatrixCSC,flowtol::Union{Float64,Int}=1e-6,start::Int64=0)
437 | 
438 |     if flowtol >= .1
439 |         println("flowtol is a tolerance parameter for rounding small residual capacity edges to zero, and should be much smaller than $flowtol. Changing it to default value 1e-6")
440 |         flowtol = 1e-6
441 |     end
442 | 
443 |     # To avoid subtraction cancellation errors that may have ocurred when pushing
444 |     # flow, when computing a bfs, round edges to zero if they are under
445 |     # a certain tolerance
446 |     Cf = C-F
447 |     Cf = Cf.*(Cf.>flowtol)
448 |     n = size(Cf,1)
449 | 
450 |     if start == 0
451 |         start = n
452 |     end
453 | 
454 |     rp = Cf.colptr
455 |     ci = Cf.rowval
456 | 
457 |     N=length(rp)-1
458 | 
459 |     d = n*ones(Int64,N)
460 |     sq=zeros(Int64,N)
461 |     sqt=0
462 |     sqh=0 # search queue and search queue tail/head
463 | 
464 |     # start bfs at the node "start"
465 |     u = start
466 |     sqt=sqt+1
467 |     sq[sqt]=u
468 |     d[u]=0
469 |     while sqt-sqh>0
470 |         sqh=sqh+1
471 |         v=sq[sqh] # pop v off the head of the queue
472 |         for ri=rp[v]:rp[v+1]-1
473 |             w=ci[ri]
474 |             if d[w] > n-1
475 |                 sqt=sqt+1
476 |                 sq[sqt]=w
477 |                 d[w]= d[v]+1
478 |             end
479 |         end
480 |     end
481 | 
482 |     return d
483 | end
484 | 


--------------------------------------------------------------------------------