├── test
├── .gitignore
├── Makefile
├── main.stu
├── test_compute_map.m
├── test_eigl.m
├── test_eigl2.m
├── octave
├── test_decompose_dense.m
├── test_decompose.m
├── test_statistic_squares.m
└── test_connect.m
├── m
├── konect_statistic_diameff50.m
├── konect_statistic_diameff90.m
├── konect_statistic_nonbipal.m
├── konect_statistic_ifub0.m
├── konect_statistic_oddcycles.m
├── konect_statistic_cocorel.m
├── konect_statistic_avgmult.m
├── konect_statistic_fconflict.m
├── konect_statistic_format.m
├── konect_fromfieldname.m
├── konect_statistic_weights.m
├── konect_statistic_diam.m
├── konect_jain.m
├── konect_statistic_meandist.m
├── load_eig.m
├── konect_rmse.m
├── konect_ap_sorted.m
├── konect_statistic_prefatt.m
├── konect_usingoctave.m
├── konect_statistic_mediandist.m
├── konect_order_dedicom.m
├── konect_diammedian.m
├── konect_statistic_twostars_normuni.m
├── konect_xpinv.m
├── konect_statistic_cluscoasym.m
├── konect_normalize_additively.m
├── konect_normalized_entropy.m
├── konect_statistic_clusco_normuni.m
├── konect_join.m
├── konect_statistic_mediandegree.m
├── konect_statistic_volume_normuni.m
├── konect_statistic_weight.m
├── konect_tofieldname.m
├── konect_statistic_clusco.m
├── konect_statistic_alconn.m
├── konect_dentropy2.m
├── konect_signx.m
├── konect_absx.m
├── konect_statistic_lines.m
├── konect_imageubu_complex.m
├── konect_connect_back.m
├── konect_denormalize_additively.m
├── konect_statistic_separationl.m
├── konect_statistic_separation.m
├── konect_normalize_rows.m
├── konect_statistic_snorm.m
├── konect_fromto.m
├── konect_statistic_degone.m
├── konect_print_bitmap.m
├── konect_connect_matrix_strong.m
├── konect_dentropy.m
├── private
│ ├── conjx.m
│ ├── konect_spectral_distribution_plain2.m
│ └── konect_spectral_distribution_plain.m
├── konect_statistic_loops.m
├── konect_own.m
├── konect_statistic_seidelnorm.m
├── konect_statistic_triangles_normuni.m
├── konect_statistic_asymmetry.m
├── konect_xinv.m
├── konect_connect_strong.m
├── konect_significance.m
├── konect_statistic_conflictn.m
├── konect_connect_matrix_square.m
├── konect_posnegcolormap.m
├── konect_statistic_negativity.m
├── konect_statistic_opnorm.m
├── konect_connect_matrix_bipartite.m
├── konect_statistic_controllabilityn.m
├── konect_colors_letter.m
├── konect_statistic_network_rank_sq.m
├── konect_gini_direct.m
├── konect_statistic_uniquevolume.m
├── konect_statistic_alcon.m
├── konect_connect_bipartite.m
├── konect_power_law_flat_vector.m
├── konect_statistic_controllability.m
├── konect_statistic_maxdiag.m
├── konect_statistic_dentropy.m
├── konect_statistic_dentropy2.m
├── konect_statistic_jain.m
├── konect_gini.m
├── konect_first_index.m
├── konect_statistic_dentropyn.m
├── konect_statistic_patest.m
├── konect_ap.m
├── konect_statistic_triangles_norm.m
├── konect_statistic_cocos.m
├── konect_connect_square.m
├── konect_diameff.m
├── konect_statistic_diameter.m
├── konect_statistic_size.m
├── konect_statistic_gini.m
├── konect_connect_square_nobgl.m
├── konect_statistic_coco.m
├── konect_statistic_volume.m
├── konect_auc.m
├── konect_imageubu.m
├── konect_statistic_dconflict.m
├── konect_diammean.m
├── konect_significance_legend.m
├── konect_svdn.m
├── konect_statistic_fourstars.m
├── konect_controllability.m
├── konect_statistic_threestars.m
├── konect_statistic_maxdegree.m
├── konect_statistic_sconflict.m
├── konect_roc_curve.m
├── konect_statistic_bip.m
├── konect_statistic_lconflict.m
├── konect_statistic_reciprocity.m
├── konect_statistic_power2.m
├── konect_power_law_flat.m
├── konect_predict_neib3.m
├── konect_network_rank_abs.m
├── konect_statistic_avgdegreeasym.m
├── konect_predict_cosine.m
├── konect_significance_image.m
├── konect_statistic_nonbip.m
├── konect_statistic_assortativity.m
├── konect_normalize_matrix.m
├── konect_connect_bipartite_nobgl.m
├── konect_decomposition_takane.m
├── konect_pagerank.m
├── konect_statistic_inoutassort.m
├── konect_spconvert.m
├── konect_statistic_tconflict.m
├── konect_statistic_nonbipn.m
├── konect_statistic_own.m
├── konect_data_weights.m
├── konect_statistic_power.m
├── konect_map.m
├── konect_statistic.m
├── konect_pa.m
├── konect_decomposition_dedicom3.m
├── konect_styles_statistic.m
├── konect_decomposition_lap.m
├── konect_significance_plot.m
├── konect_statistic_anticonflict.m
├── konect_mauc.m
├── konect_statistic_tour4.m
├── konect_contains_triangle.m
├── konect_statistic_fill.m
├── konect_eign.m
├── konect_significance_legend_bw.m
├── konect_statistic_power3.m
├── konect_statistic_triangles.m
├── konect_eigskew.m
├── konect_significance_plot_bw.m
├── konect_statistic_twostars.m
├── konect_spectral_distribution.m
├── konect_power_law_range.m
├── konect_statistic_cluscoall.m
├── konect_data_tag.m
├── konect_statistic_conflict.m
├── konect_statistic_avgdegree.m
├── konect_print.m
├── konect_statistic_squares.m
├── konect_hopdistr_ex.m
├── konect_decomposition_stoch1.m
├── konect_consts.m
├── konect_clusco_approx.m
├── konect_clusco_simple.m
├── @konect_timer
│ └── konect_timer.m
├── konect_pa_full.m
├── konect_hopdistr.m
├── konect_effective_diameter.m
├── konect_clusco.m
└── konect_statistic_derived.m
├── TODO
├── FAQ
└── README
/test/.gitignore:
--------------------------------------------------------------------------------
1 | error.log
2 |
--------------------------------------------------------------------------------
/m/konect_statistic_diameff50.m:
--------------------------------------------------------------------------------
1 | % GROUP:
2 |
3 |
--------------------------------------------------------------------------------
/m/konect_statistic_diameff90.m:
--------------------------------------------------------------------------------
1 | % GROUP:
2 |
3 |
--------------------------------------------------------------------------------
/m/konect_statistic_nonbipal.m:
--------------------------------------------------------------------------------
1 | % GROUP: square
2 |
--------------------------------------------------------------------------------
/m/konect_statistic_ifub0.m:
--------------------------------------------------------------------------------
1 |
2 |
3 | % GROUP: square
4 |
--------------------------------------------------------------------------------
/m/konect_statistic_oddcycles.m:
--------------------------------------------------------------------------------
1 |
2 | % GROUP: square
3 |
4 |
--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 | TODOs for the KONECT Toolbox are usually kept track of within
2 | konect-analysis/TODO.
3 |
--------------------------------------------------------------------------------
/m/konect_statistic_cocorel.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the size of the relative largest connected component [cocorel].
3 | %
4 | % This is the same as [coco+2].
5 | %
6 |
7 |
--------------------------------------------------------------------------------
/m/konect_statistic_avgmult.m:
--------------------------------------------------------------------------------
1 | %
2 | % Average multiplicity.
3 | %
4 | % GROUP: multi
5 | %
6 |
7 | function values = konect_statistic_avgmult(A, format, weights)
8 |
9 | assert(0); % Not implemented
10 |
--------------------------------------------------------------------------------
/m/konect_statistic_fconflict.m:
--------------------------------------------------------------------------------
1 | % GROUP: negative
2 |
3 | function [value] = konect_statistic_fconflict(A, format, weights)
4 |
5 | values = konect_statistic_conflict(A, format, weights);
6 | value = values(2);
7 |
--------------------------------------------------------------------------------
/m/konect_statistic_format.m:
--------------------------------------------------------------------------------
1 | %
2 | % The "format" as a statistic, i.e., the numerical ID of the format.
3 | %
4 |
5 | function values = konect_statistic_format(A, format, weights)
6 |
7 | values(1) = format;
8 |
9 |
--------------------------------------------------------------------------------
/m/konect_fromfieldname.m:
--------------------------------------------------------------------------------
1 | %
2 | % Convert a field name back to a string. Used in conjunction with
3 | % konect_tofieldname().
4 | %
5 |
6 | function ret = konect_fromfieldname(s)
7 |
8 | ret = regexprep(s, '_', '-');
9 |
--------------------------------------------------------------------------------
/m/konect_statistic_weights.m:
--------------------------------------------------------------------------------
1 | %
2 | % The "weights" as a statistic, i.e., the numerical ID of the weights.
3 | %
4 |
5 | function values = konect_statistic_weights(A, format, weights)
6 |
7 | values(1) = weights;
8 |
9 |
--------------------------------------------------------------------------------
/m/konect_statistic_diam.m:
--------------------------------------------------------------------------------
1 |
2 | function values = konect_statistic_diam(A, format, weights)
3 |
4 | d = konect_hopdistr(A, format, [], 1);
5 |
6 | % When the network is disconnected, return the number of
7 | % nodes
8 |
9 | values = length(d);
10 |
--------------------------------------------------------------------------------
/FAQ:
--------------------------------------------------------------------------------
1 | ______________________________________________________________
2 | I get "Undefined function or variable 'components'"
3 |
4 | -> You need to install the Boost Graph Library (BGL), which the KONECT
5 | Toolbox is using for certain functions. See the file 'README'.
6 |
7 |
--------------------------------------------------------------------------------
/m/konect_jain.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute Jain's fairness index.
3 | %
4 | % RESULT
5 | % value Jain's fairness index
6 | %
7 | % PARAMETERS
8 | % x Vector of values
9 | %
10 |
11 | function [value] = konect_jain(x)
12 |
13 | value = sum(x)^2 / length(x) / sum(x .^ 2);
14 |
--------------------------------------------------------------------------------
/m/konect_statistic_meandist.m:
--------------------------------------------------------------------------------
1 | %
2 | % The average distance between nodes of the graph.
3 | %
4 |
5 | function values = konect_statistic_meandist(A, format, weights)
6 |
7 | d = konect_hopdistr(A, format, [], 1);
8 |
9 | values = konect_diammean(d, size(A,1), 1);
10 |
--------------------------------------------------------------------------------
/test/Makefile:
--------------------------------------------------------------------------------
1 |
2 | DEPRECATED
3 |
4 | test:
5 |
6 |
7 |
8 | # octave: ../../analysis/octave
9 | # cp ../../analysis/octave .
10 |
11 | matlab: ../../analysis/matlab
12 | cp ../../analysis/matlab .
13 |
14 | test: matlab
15 | ./matlab test_decompose
16 |
17 |
--------------------------------------------------------------------------------
/m/load_eig.m:
--------------------------------------------------------------------------------
1 |
2 | function dd = load_eig()
3 |
4 | logfile = getenv('LOGFILE')
5 |
6 | [status, dd_text] = unix(['sh/save_diag ' logfile]);
7 |
8 | assert(status == 0);
9 |
10 | dd_text
11 |
12 | dd_text = regexprep(dd_text, '\n', ' ')
13 |
14 | dd = sscanf(dd_text, '%f')
15 |
16 |
--------------------------------------------------------------------------------
/m/konect_rmse.m:
--------------------------------------------------------------------------------
1 | %
2 | % The root-mean-square error.
3 | %
4 | % RESULT
5 | % ret The root-mean-square error
6 | %
7 | % PARAMETERS
8 | % target, prediction Actual and predicted values
9 | %
10 |
11 | function [ret] = konect_rmse(target, prediction)
12 |
13 | ret = mean((target - prediction) .^ 2) ^ .5;
14 |
--------------------------------------------------------------------------------
/m/konect_ap_sorted.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the average precision, given only the 0/1 vector sorted by
3 | % scores, which don't have to be given.
4 | %
5 | % PARAMETERS
6 | % t (e*1) The 0/1 vector, ranked by descending scores
7 | %
8 |
9 | function ap = konect_ap_sorted(t)
10 |
11 | ap = mean((1:sum(t))' ./ find(t));
12 |
--------------------------------------------------------------------------------
/m/konect_statistic_prefatt.m:
--------------------------------------------------------------------------------
1 | %
2 | % The preferential attachment exponent. This statistic depends on
3 | % the temporal evolution of a network and thus cannot be computed in
4 | % this file. This file only exists to document the TIME group.
5 | %
6 | % GROUP: time
7 | %
8 |
9 | error('Cannot be computed in this way');
10 |
--------------------------------------------------------------------------------
/m/konect_usingoctave.m:
--------------------------------------------------------------------------------
1 | %
2 | % Determine whether GNU Octave is running.
3 | %
4 | % RESULTS
5 | % ret 1 when GNU Octave is running; 0 otherwise
6 | %
7 |
8 | function ret = konect_usingoctave ()
9 |
10 | persistent isoct
11 | if (isempty (isoct))
12 | isoct = exist('OCTAVE_VERSION') ~= 0;
13 | end
14 | ret = isoct;
15 |
--------------------------------------------------------------------------------
/m/konect_statistic_mediandist.m:
--------------------------------------------------------------------------------
1 | %
2 | % The median distance between nodes in the graph. This is related to
3 | % the 50-percentile effective diameter.
4 | %
5 |
6 | function values = konect_statistic_mediandist(A, format, weights)
7 |
8 | d = konect_hopdistr(A, format, [], 1);
9 |
10 | values = konect_diammedian(d, size(A, 1));
11 |
--------------------------------------------------------------------------------
/m/konect_order_dedicom.m:
--------------------------------------------------------------------------------
1 | %
2 | % Reorder a DEDICOM, in a way that important latent dimensions come first.
3 | %
4 | % PARAMETERS
5 | % U,D
6 | %
7 | % RESULT
8 | % U,D
9 | %
10 |
11 | function [U D] = konect_order_dedicom(U, D)
12 |
13 | weights = diag(D * D' + D' * D);
14 |
15 | [w i] = sort(-abs(weights));
16 |
17 | U = U(:,i);
18 | D = D(i,i);
19 |
20 |
--------------------------------------------------------------------------------
/m/konect_diammedian.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the median distance using the (0-based) vector d.
3 | %
4 | % PARAMETERS
5 | % d (1*(diameter+1)) Vector d
6 | % n Number of nodes
7 | %
8 | % RESULT
9 | % ret The median distance
10 | %
11 |
12 | function ret = konect_diammedian(d, n)
13 |
14 | assert(size(d,1) == 1);
15 |
16 | ret = floor(konect_diameff(d, 0.5));
17 |
18 |
--------------------------------------------------------------------------------
/m/konect_statistic_twostars_normuni.m:
--------------------------------------------------------------------------------
1 |
2 |
3 | function values = konect_statistic_twostars_norm2(A, format, weight)
4 |
5 | s = konect_statistic_twostars(A, format, weight);
6 | n = konect_statistic_size(A, format, weight);
7 |
8 | mu = n*(n-1)*(n-2)/8;
9 | sigma_square = (1/8)*n^4 - (19/32)*n^3 + (29/32)*n^2 - (7/16)*n;
10 |
11 | values = (s - mu) / sqrt(sigma_square);
12 |
--------------------------------------------------------------------------------
/m/konect_xpinv.m:
--------------------------------------------------------------------------------
1 | %
2 | % Scalable pseudoinverse. In particular, this can be applied to n*r
3 | % matrices, with n>>r, in which case pinv() will take too much memory.
4 | %
5 | % PARAMETERS
6 | % a Matrix to pseudoinvert
7 | %
8 | % RESULT
9 | % a_i Pseudoinverse
10 | %
11 |
12 | function a_i = konect_xpinv(a)
13 |
14 | [u d v] = svd(a, 'econ');
15 | a_i = v * konect_xinv(d) * u';
16 |
--------------------------------------------------------------------------------
/test/main.stu:
--------------------------------------------------------------------------------
1 | #
2 | # This is a Stu file, instead of a Makefile.
3 | # Get Stu at https://github.com/kunegis/stu
4 | #
5 |
6 | % version 1.9
7 |
8 | @all: @test;
9 |
10 | @test: @test.decompose @test.compute_map;
11 |
12 | matlab: ../../konect-analysis/matlab {
13 | cp ../../konect-analysis/matlab .
14 | }
15 |
16 | @test.$NAME: matlab
17 | {
18 | ./matlab test_$NAME.m
19 | }
20 |
21 |
--------------------------------------------------------------------------------
/m/konect_statistic_cluscoasym.m:
--------------------------------------------------------------------------------
1 | %
2 | % Directed clustering coefficient.
3 | %
4 | % GROUP: asym
5 | %
6 |
7 | function values = konect_statistic_cluscoasym(A, format, weights)
8 |
9 | consts = konect_consts();
10 |
11 | if format ~= consts.ASYM
12 | error('***');
13 | end
14 |
15 | % Round all values to 0/1
16 | A = (A ~= 0);
17 |
18 | [x c c2] = konect_clusco(A);
19 |
20 | values(1) = c;
21 |
--------------------------------------------------------------------------------
/m/konect_normalize_additively.m:
--------------------------------------------------------------------------------
1 | %
2 | % Normalize a dataset additively, given row and column means.
3 | %
4 | % PARAMETERS
5 | % T (r*3) Data matrix
6 | % means Normalization parameters or []
7 | %
8 | % RESULT
9 | % T (r*3) Updated data matrix
10 | %
11 |
12 | function T = konect_normalize_additively(T, means)
13 |
14 | if size(means.U)
15 | T(:,3) = T(:,3) - means.U(T(:,1)) - means.V(T(:,2));
16 | end
17 |
--------------------------------------------------------------------------------
/m/konect_normalized_entropy.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the entropy of a vector.
3 | %
4 | % RESULT
5 | % value Entropy in nats
6 | %
7 | % PARAMETERS
8 | % values Distribution values; nonnegative; need not sum to one
9 | %
10 |
11 | function value = konect_normalized_entropy(values)
12 |
13 | values = values ./ sum(values);
14 |
15 | values_log = log(values);
16 |
17 | value = sum(- values .* log(values));
18 |
19 |
--------------------------------------------------------------------------------
/m/konect_statistic_clusco_normuni.m:
--------------------------------------------------------------------------------
1 | %
2 | % Normalized clustering coefficient, such that for large n, the
3 | % distribution of values over all n-graphs is independent of n.
4 | %
5 |
6 | function values = konect_statistic_clusco_norm(A, format, weights)
7 |
8 | v = konect_statistic_clusco2(A, format, weights);
9 |
10 | v = v(1);
11 |
12 | n = size(A,1);
13 |
14 | values = (v - 0.5) * sqrt(n * (n-1) / 2);
15 |
16 |
--------------------------------------------------------------------------------
/test/test_compute_map.m:
--------------------------------------------------------------------------------
1 | %
2 | % Test the MAP computation.
3 | %
4 |
5 | cd ..
6 |
7 | % [ at_test prediction ]
8 | data = [
9 | 1 2 1 .8; % 1: 1/2
10 | 1 3 0 .9;
11 | 2 1 1 .9; % 2: 1
12 | 2 3 1 .9;
13 | 2 4 0 .7;
14 | 3 1 0 .9; % 3: 1/3
15 | 3 2 0 .8;
16 | 3 4 1 .7
17 | ];
18 |
19 | precision = konect_map(data(:,4), data(:,1:3))
20 |
21 | if abs(precision - (1/2 + 1 + 1/3)/3) > 1e-10
22 | error
23 | end
24 |
--------------------------------------------------------------------------------
/m/konect_join.m:
--------------------------------------------------------------------------------
1 | %
2 | % Join lines of a r*(2 or 3) matrix with same first and second values.
3 | %
4 | % The result has the same number of columns as the input.
5 | %
6 | % PARAMETERS
7 | % T (r*2 or r*3) Sparse entries with duplicates
8 | %
9 | % RESULT
10 | % T (r*3) Sparse entries without duplicates
11 | %
12 |
13 | function T = konect_join(T)
14 |
15 | A = konect_spconvert(T);
16 | [x y z] = find(A);
17 |
18 | T = [x y z];
19 |
--------------------------------------------------------------------------------
/m/konect_statistic_mediandegree.m:
--------------------------------------------------------------------------------
1 | %
2 | % The median degree.
3 | %
4 | % values
5 | % [1] Median degree
6 | % [2] Left median degree (BIP)
7 | % [3] Right median degree (BIP)
8 | % [4] Median outdegree (ASYM)
9 | % [5] Median indegree (ASYM)
10 | %
11 | % GROUP+2: bip
12 | % GROUP+3: bip
13 | % GROUP+4: asym
14 | % GROUP+5: asym
15 | %
16 |
17 | error % Not implemented in Matlab; only in C in konect-analysis/
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/m/konect_statistic_volume_normuni.m:
--------------------------------------------------------------------------------
1 | %
2 | % Normalized number of unique edges, such that the value has standard
3 | % normal distribution in uniform graphs.
4 | %
5 |
6 | function values = konect_statistic_volume_norm(A, format, weights)
7 |
8 | m = konect_statistic_uniquevolume(A, format, weights); m = m(1);
9 | p = 0.5;
10 | n = size(A,1);
11 |
12 | values = (m - p * (1/2) * n * (n-1)) / sqrt(p * (1-p) * (1/2) * n * (n-1));
13 |
--------------------------------------------------------------------------------
/m/konect_statistic_weight.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the weight of a network, i.e., the sum of absolute edge
3 | % weights. For unweighted networks, this equals the volume.
4 | %
5 | % PARAMETERS
6 | % A
7 | % format
8 | % weights
9 | %
10 | % RESULT
11 | % value The weight value
12 | %
13 |
14 | function value = konect_statistic_weight(A, format, weights)
15 |
16 | value = full(sum(sum(konect_absx(A))));
17 |
18 | size_value = size(value)
19 |
--------------------------------------------------------------------------------
/m/konect_tofieldname.m:
--------------------------------------------------------------------------------
1 | %
2 | % Convert a string to a valid field name. This is used whenever
3 | % structs are used in a dynamic way, using generic strings as keys,
4 | % for instance for methods, submethods, decompositions, etc.
5 | %
6 | % The opposite of konect_fromfieldname(), when S does not contain
7 | % '+'.
8 | %
9 |
10 | function ret = konect_tofieldname(s)
11 |
12 | ret = regexprep(s, '-', '_');
13 | ret = regexprep(s, '+', '__');
14 |
--------------------------------------------------------------------------------
/m/konect_statistic_clusco.m:
--------------------------------------------------------------------------------
1 | %
2 | % Only the clustering coefficient, without extra variants.
3 | %
4 | % GROUP: square
5 | %
6 |
7 | function values = konect_statistic_clusco(A, format, weights)
8 |
9 | consts = konect_consts();
10 |
11 | if format == consts.BIP
12 | error '*** Clustering coefficient is trivially zero for bipartite networks';
13 | end
14 |
15 | A_abs = A ~= 0;
16 |
17 | [x c c2] = konect_clusco(A_abs | A_abs');
18 | values(1) = c;
19 |
--------------------------------------------------------------------------------
/m/konect_statistic_alconn.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the normalized algebraic connectivity [alconn].
3 | %
4 | % PARAMETERS
5 | % a Adjacency or biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % value The normalized algebraic connectivity
11 | %
12 |
13 | function value = konect_statistic_alconn(a, format, weights)
14 |
15 | opts.disp = 2;
16 |
17 | [u d] = konect_decomposition('lap-n', a, 2, format, weights, opts);
18 |
19 | value = d(2,2)
20 |
21 |
--------------------------------------------------------------------------------
/m/konect_dentropy2.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the degree distribution entropy [dentropy2].
3 | %
4 | % RESULT
5 | % ret The degree distribution entropy in nats
6 | %
7 | % PARAMETERS
8 | % d Column vector of degrees
9 | %
10 |
11 | function ret = konect_dentropy2(d)
12 |
13 | [counts ids] = sort(full(d));
14 | maxcount = counts(end);
15 | freq = histc(counts, 0 : maxcount);
16 |
17 | % freq(1) is always zero -- this is no problem.
18 |
19 | ret = konect_dentropy(freq);
20 |
--------------------------------------------------------------------------------
/test/test_eigl.m:
--------------------------------------------------------------------------------
1 | cd ..
2 |
3 | addpath ../analysis/lib/matlab_bgl/
4 |
5 | consts = constants();
6 |
7 | opts.disp = 2;
8 |
9 | n = 2e4;
10 | d = 5;
11 |
12 | a = sprand(n, n, d/n);
13 | a = a~=0;
14 |
15 | [a cc n] = connect_matrix_square(a);
16 | l = prepare_matrix_2('lap', a, consts.SYM, consts.UNWEIGHTED);
17 |
18 |
19 | [u1 d1] = eigl(l, 5, opts, 1);
20 |
21 | [u2 d2] = eigl(l, 5, opts, 2);
22 |
23 | diag(d1)'
24 | diag(d2)'
25 | cross = u1' * u2
26 |
27 |
--------------------------------------------------------------------------------
/m/konect_signx.m:
--------------------------------------------------------------------------------
1 | %
2 | % Wrapper for sign() that also accepts sparse logical matrices and just
3 | % returns them, as opposed to sign() which does not work for logical
4 | % matrices.
5 | %
6 | % RESULT
7 | % ret Sign of the argument matrix
8 | %
9 | % PARAMETERS
10 | % A Matrix of which the sign is to be computed;
11 | % may be a logical matrix
12 | %
13 |
14 | function ret = konect_signx(A)
15 |
16 | if islogical(A)
17 | ret = A;
18 | else
19 | ret = sign(A);
20 | end
21 |
--------------------------------------------------------------------------------
/m/konect_absx.m:
--------------------------------------------------------------------------------
1 | %
2 | % Wrapper for abs() that also accepts sparse logical matrices and just
3 | % returns them, as opposed to abs() which does not work for logical
4 | % matrices.
5 | %
6 | % RESULT
7 | % ret Absolute value of the argument matrix
8 | %
9 | % PARAMETERS
10 | % A Matrix of which the absolute value is to be computed;
11 | % may be a logical matrix
12 | %
13 |
14 | function ret = konect_absx(A)
15 |
16 | if islogical(A)
17 | ret = A;
18 | else
19 | ret = abs(A);
20 | end
21 |
--------------------------------------------------------------------------------
/m/konect_statistic_lines.m:
--------------------------------------------------------------------------------
1 | %
2 | % The [lines] statistic in KONECT is not a mathemathical meaningful
3 | % measure, but instead refers to a trivial way to assess the "size" of a
4 | % network. It equals the number of lines in the dataset file. It
5 | % cannot be computed in Matlab (hence this empty file). This file must
6 | % exist to declare to KONECT that the statistic exists, and to declare
7 | % its group (the absence of a "GROUP:" declaration here means that the
8 | % statistic applies to all networks.)
9 | %
10 |
--------------------------------------------------------------------------------
/m/konect_imageubu_complex.m:
--------------------------------------------------------------------------------
1 | %
2 | % Plot a Delta matrix in analogy for konect_imageubu(), but in which
3 | % Delta may be complex.
4 | %
5 |
6 | function konect_imageubu_complex(Delta)
7 |
8 | Delta = Delta / max(max(abs(Delta)));
9 |
10 | for i = 1 : size(Delta,1)
11 | for j = 1 : size(Delta,2)
12 | value = Delta(i,j);
13 | h = angle(value) / (2*pi) + 0.5;
14 | s = 1;
15 | v = abs(value);
16 | x(i,j,1:3) = hsv2rgb([h s v]);
17 | end
18 | end
19 |
20 | image(x);
21 |
22 | axis square;
23 |
--------------------------------------------------------------------------------
/m/konect_connect_back.m:
--------------------------------------------------------------------------------
1 | %
2 | % Maps a reduced matrix decomposition back to a full matrix
3 | % decomposition. Used in conjunction with konect_connect_matrix(). Nodes that
4 | % were not in the subset get eigenvector entries of zero; this is
5 | % compatible with all matrix decompositions.
6 | %
7 | % PARAMETERS
8 | % cc 0/1 vector denoting the extracted subset of vertices
9 | % U Subset eigenvector matrix
10 | %
11 |
12 | function [ret] = konect_connect_back(cc, U)
13 |
14 | ret = zeros(size(cc, 1), size(U, 2));
15 | ret(find(cc), :) = U;
16 |
--------------------------------------------------------------------------------
/m/konect_denormalize_additively.m:
--------------------------------------------------------------------------------
1 | %
2 | % Denormalize a prediction vector additively.
3 | %
4 | % Only the first two columns of at are used.
5 | %
6 | % RESULT
7 | % prediction (e*1) Denormalized prediction values
8 | %
9 | % PARAMETERS
10 | % T (e*[2+k]) Row and column indexes
11 | % prediction (e*1) Prediction vector
12 | % means
13 | %
14 |
15 | function prediction = konect_denormalize_additively(T, prediction, means)
16 |
17 | if size(means.U)
18 | prediction = prediction + means.U(T(:,1)) + means.V(T(:,2));
19 | end
20 |
21 |
--------------------------------------------------------------------------------
/m/konect_statistic_separationl.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the Laplacian eigenvalue separation [separationl].
3 | %
4 | % RESULT
5 | % value lambda_3[L] / lambda_2[L]
6 | %
7 | % PARAMETERS
8 | % A Adjacency or biadjacency matrix
9 | % format
10 | % weights
11 | %
12 |
13 | function value = konect_statistic_separationl(A, format, weights)
14 |
15 | opts.disp = 2;
16 |
17 | [U D] = konect_decomposition('lap', A, 3, format, weights, opts);
18 |
19 | if size(D,1) < 3
20 | value = NaN;
21 | return;
22 | end
23 |
24 | value = D(3,3) / D(2,2);
25 |
26 |
--------------------------------------------------------------------------------
/m/konect_statistic_separation.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the spectral separation [separation].
3 | %
4 | % PARAMETERS
5 | % A Adjacency / biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULTS
10 | % values Spectral separation
11 | % [1] | lambda1/lambda2 |
12 | % [2] | lambda2/lambda1 |
13 | %
14 |
15 | function values = konect_statistic_separation(A, format, weights)
16 |
17 | opts.disp = 2;
18 |
19 | [U D] = konect_decomposition('sym', A, 2, format, weights, opts);
20 |
21 | values = [ abs(D(1,1) / D(2,2)); ...
22 | abs(D(2,2) / D(1,1)) ];
23 |
--------------------------------------------------------------------------------
/m/konect_normalize_rows.m:
--------------------------------------------------------------------------------
1 | %
2 | % Normalize the rows of a decomposition multiplicatively, such that each
3 | % row of U and V has norm 1. This is typically used with Laplacian
4 | % matrices. For a full orthogonal decomposition, this is a no-op.
5 | %
6 | % PARAMETERS
7 | % U (n*r) Matrix of eigenvectors
8 | %
9 | % RESULT
10 | % U (n*r) Normalized matrix
11 | %
12 |
13 | function [U] = konect_normalize_rows(U)
14 |
15 | [n r] = size(U);
16 |
17 | w = sum(conj(U) .* U, 2) .^ -0.5;
18 |
19 | w(isinf(w)) = 0;
20 |
21 | U = spdiags(w, [0], n, n) * U;
22 |
23 |
--------------------------------------------------------------------------------
/m/konect_statistic_snorm.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the spectral norm [snorm].
3 | %
4 | % PARAMETERS
5 | % a Adjacency/biadjacency matrix
6 | % format
7 | % weights
8 | % opts (optional) Options passed to eigs/svds
9 | %
10 | % RESULT
11 | % values The spectral norm
12 | %
13 |
14 | function values = konect_statistic_snorm(A, format, weights, opts)
15 |
16 | if ~exist('opts', 'var'),
17 | opts = struct();
18 | opts.disp = 2;
19 | end
20 |
21 | [U D] = konect_decomposition('sym', A, 1, format, weights, opts);
22 |
23 | values(1) = abs(D(1,1));
24 |
--------------------------------------------------------------------------------
/m/konect_fromto.m:
--------------------------------------------------------------------------------
1 | %
2 | % Split a large range of numbers into chunks. Used when using
3 | % vector/matrix operations on the whole data would use too much memory.
4 | %
5 | % PARAMETERS
6 | % a First index in desired range
7 | % b Last index in desired range
8 | % n Size of one block
9 | %
10 | % RESULT
11 | % k Number of chunks
12 | % from (k) Indexes of first item in each chunk
13 | % to (k) Indexes of last item in each chunk
14 | %
15 |
16 | function [k from to] = konect_fromto(a, b, n)
17 |
18 | from = a:n:b;
19 | to = [(from(2:end)-1) b];
20 |
21 | k = size(from,2);
22 |
23 |
--------------------------------------------------------------------------------
/m/konect_statistic_degone.m:
--------------------------------------------------------------------------------
1 | %
2 | % The proportion of nodes with degree one.
3 | %
4 |
5 | function values = konect_statistic_degone(A, format, weights, opts)
6 |
7 | consts = konect_consts();
8 |
9 | if format == consts.SYM || format == consts.ASYM
10 |
11 | n = size(A, 1);
12 | assert(size(A, 2) == n);
13 | values = [ sum(sum(double((A ~= 0) | (A ~= 0))) == 1) / n ];
14 |
15 | elseif format == consts.BIP
16 |
17 | A = double(A ~= 0);
18 | values = [ (sum(sum(A, 1) == 1) + sum(sum(A, 2) == 1)) / sum(size(A)) ];
19 |
20 | else
21 | assert(0);
22 | end
23 |
--------------------------------------------------------------------------------
/m/konect_print_bitmap.m:
--------------------------------------------------------------------------------
1 | %
2 | % Print a PNG. This is used when the amount of elements in the plot is
3 | % O(n) or larger.
4 | %
5 |
6 | function konect_print_bitmap(filename)
7 |
8 | % Determines the size of the bitmap
9 | % Larger values seem to produce blank or clipped pages in Matlab
10 | factor = 5;
11 |
12 | try
13 |
14 | pp = get(gcf, 'PaperPosition');
15 | pp(3:4) = factor * pp(3:4);
16 | set(gcf,'PaperUnits','inches','PaperPosition', pp);
17 | print(filename, '-dpng');
18 |
19 | catch err
20 |
21 | delete(filename);
22 |
23 | end
24 |
25 | close all;
26 |
--------------------------------------------------------------------------------
/m/konect_connect_matrix_strong.m:
--------------------------------------------------------------------------------
1 | %
2 | % Extract the largest strongly connected component from an adjacency
3 | % matrix. The original matrix can be recovered using konect_connect_back().
4 | %
5 | % RESULT
6 | % As Adjacency matrix of largest strongly connected component
7 | % cc 0/1 vector of chosen vertices
8 | % n Number of vertices in the found component
9 | %
10 | % PARAMETERS
11 | % A Adjacency matrix
12 | %
13 |
14 | function [As cc n] = konect_connect_matrix_strong(A)
15 |
16 | cc = konect_connect_strong(A);
17 |
18 | f = find(cc);
19 |
20 | As = A(f, f);
21 |
22 | n = size(As, 1);
23 |
--------------------------------------------------------------------------------
/m/konect_dentropy.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the edge distribution entropy "dentropy"/"dentropyn".
3 | %
4 | % RESULTS
5 | % ret The edge distribution entropy in nats
6 | %
7 | % PARAMETER
8 | % d Column vector of degrees
9 | % type (optional) Type: 'a'/'n' corresponding to [dentropy] and [dentropyn]. Default is 'a'.
10 | %
11 |
12 | function ret = konect_dentropy(d, type)
13 |
14 | if ~exist('type', 'var')
15 | type = 'a';
16 | end
17 |
18 | d = full(d(find(d)));
19 | d = d / sum(d);
20 | ret = - sum(d .* log(d));
21 |
22 | if strcmp(type, 'n')
23 | ret = ret / log(prod(size(d)));
24 | end
25 |
--------------------------------------------------------------------------------
/m/private/conjx.m:
--------------------------------------------------------------------------------
1 | %
2 | % Replacement for conj() that also works with logical matrices, which
3 | % conj() does not.
4 | %
5 | % RESULT
6 | % B Complex conjugate of the parameter
7 | %
8 | % PARAMETERS
9 | % A Input matrix of which to compute the complex
10 | % conjugate; may be logical
11 | %
12 | % ABOUT
13 | % This file is part of the KONECT Matlab Toolbox version 0.3.
14 | % konect.cc
15 | % (c) Jerome Kunegis 2017; this is Free Software released under
16 | % the GPLv3, see COPYING.
17 | %
18 |
19 | function B = conjx(A)
20 |
21 | if islogical(A)
22 | B = A;
23 | else
24 | B = conj(A);
25 | end
26 |
--------------------------------------------------------------------------------
/m/konect_statistic_loops.m:
--------------------------------------------------------------------------------
1 | %
2 | % Determine the number of loops in a network.
3 | %
4 | % PARAMETERS
5 | % A Adjacency or biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values Column vector of values
11 | % [1] Number of loops
12 | %
13 | % GROUP: square
14 | %
15 |
16 | function values = konect_statistic_loops(A, format, weights)
17 |
18 | consts = konect_consts();
19 |
20 | assert(format ~= consts.BIP);
21 | assert(size(A,1) == size(A,2));
22 |
23 | if weights == consts.POSITIVE
24 | loops = trace(A);
25 | else
26 | loops = sum(diag(A) ~= 0);
27 | end
28 |
29 | values(1) = loops;
30 |
--------------------------------------------------------------------------------
/m/konect_own.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the Balanced Inequality ratio as defined in [1].
3 | %
4 | % [1] Fairness on the Web: Alternatives to the Power Law, Jérôme
5 | % Kunegis and Julia Preusse, Proc. Web Science Conf., 2012,
6 | % pp. 175--184.
7 | %
8 | % RESULT
9 | % own The balanced inequality ratio
10 | %
11 | % PARAMETERS
12 | % p (e*1) Indexes
13 | % q (e*1) Multiplicities or [] for uniform weights
14 | %
15 |
16 | function [own] = konect_own(p, q)
17 |
18 | [gini r_x r_y] = konect_gini(p, q);
19 |
20 | r_x_inv = flipud(r_x);
21 |
22 | v = r_x_inv - r_y;
23 |
24 | i = max(find(v > 0));
25 |
26 | own = r_y(i);
27 |
--------------------------------------------------------------------------------
/m/konect_statistic_seidelnorm.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the "Seidel norm", i.e., the largest absolute value of the
3 | % Seidel adjacency matrix.
4 | %
5 | % PARAMETERS
6 | % a Adjacency/biadjacency matrix
7 | % format
8 | % weights
9 | % opts (optional) Options passed to eigs/svds
10 | %
11 | % RESULT
12 | % values The Seidel norm
13 | %
14 |
15 | function values = konect_statistic_seidelnorm(A, format, weights, opts)
16 |
17 | if ~exist('opts', 'var'),
18 | opts = struct();
19 | opts.disp = 2;
20 | end
21 |
22 | [U D] = konect_decomposition('seidel', A, 1, format, weights, opts);
23 |
24 | values(1) = abs(D(1,1));
25 |
--------------------------------------------------------------------------------
/m/konect_statistic_triangles_normuni.m:
--------------------------------------------------------------------------------
1 | %
2 | % Number of triangles, normalized such that for any n, in the uniform
3 | % distribution, the statistic values have the same distribution.
4 | %
5 | % This is the correct method as derived by a newer estimate of the
6 | % variance.
7 | %
8 |
9 | function values = konect_statistic_triangles_normuni(A, format, weights)
10 |
11 | t = konect_statistic_triangles(A, format, weights);
12 |
13 | n = konect_statistic_size(A, format, weights);
14 |
15 | mu = (1/48) * n * (n-1) * (n-2);
16 | sigma = sqrt(n^4 / 128 - (11 / 384) * n^3 + n^2 / 32 - n / 96);
17 |
18 | values = (t - mu) / sigma;
19 |
20 |
21 |
--------------------------------------------------------------------------------
/test/test_eigl2.m:
--------------------------------------------------------------------------------
1 |
2 | addpath ../
3 |
4 | n = 408;
5 | d = 10;
6 | r = 7;
7 |
8 | A = sprand(n, n, d/n);
9 | A = A+A';
10 | u = sprand(n, 1, d/n);
11 | A = A + 100 * u * u';
12 | A = A ~= 0;
13 |
14 | L = spdiags(sum(A)', [0], n, n) - A;
15 |
16 | opts.disp = 2;
17 |
18 | %
19 | % Method 'sa'
20 | %
21 |
22 | tic
23 | [U0 D0] = eigl(L, r, opts, 0);
24 | time_0 = toc
25 |
26 | %
27 | % Without inverse iteration
28 | %
29 |
30 | tic
31 | [U2 D2] = eigl(L, r, opts, 2);
32 | time_2 = toc
33 |
34 | %
35 | % Compare
36 | %
37 |
38 | diag(D0)'
39 | diag(D2)'
40 | time_0
41 | time_2
42 |
43 | if norm(D0 - D2) > 1e-10, error('***'); end
44 |
--------------------------------------------------------------------------------
/m/konect_statistic_asymmetry.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the spectral asymmetry, i.e. the smallest eigenvalue of the
3 | % Hermitian Laplacian. This is computed for the largest weakly
4 | % connected component.
5 | %
6 | % PARAMETERS
7 | % A Adjacency matrix of a directed graph
8 | % format
9 | % weights
10 | %
11 | % RESULT
12 | % value The algebraic asymmetry
13 | %
14 |
15 | function value = konect_statistic_asymmetry(A, format, weights)
16 |
17 | opts.disp = 2;
18 |
19 | A = konect_connect_matrix_square(A);
20 |
21 | L = konect_matrix('lapherm', A, format, weights);
22 |
23 | [U D] = konect_eigl(L, 1, opts);
24 |
25 | value = real(D(1,1));
26 |
27 |
--------------------------------------------------------------------------------
/m/konect_xinv.m:
--------------------------------------------------------------------------------
1 | %
2 | % Numerically stable pseudoinverse of a diagonal matrix. This will
3 | % pseudoinvert each diagonal element separately.
4 | %
5 | % PARAMETERS
6 | % d Diagonal matrix to pseudoinvert
7 | %
8 | % RESULT
9 | % d_i Pseudoinverse of D
10 | %
11 | % USAGE
12 | %
13 | % Typically, this is used to compute the pseudoinverse of a n*k matrix,
14 | % where n is large and k is small, in the following way:
15 | %
16 | % [u d v] = svd(a, 'econ');
17 | % a_i = v * konect_xinv(d) * u';
18 | %
19 |
20 | function d_i = konect_xinv(d)
21 |
22 | l = diag(d) .^ -1;
23 | l(isinf(l)) = 0;
24 | l(l ~= l) = 0;
25 |
26 | d_i = diag(l);
27 |
28 |
29 |
--------------------------------------------------------------------------------
/m/konect_connect_strong.m:
--------------------------------------------------------------------------------
1 | %
2 | % Find the largest strongly connected component of a directed
3 | % graph. Edge weights are ignored.
4 | %
5 | % PARAMETERS
6 | % A (n*n) Square asymmetric adjacency matrix
7 | %
8 | % RESULT
9 | % cc (n*1) 0/1 vector of vertices in the connected component
10 | %
11 |
12 | function [cc] = konect_connect_strong(A)
13 |
14 | if ~konect_usingoctave()
15 |
16 | A = (A ~= 0);
17 |
18 | [ci sizes] = components(A, 'full2sparse', 1);
19 |
20 | [x i] = sort(sizes);
21 |
22 | cc = zeros(size(A,1), 1);
23 | cc(find(ci == i(end))) = 1;
24 |
25 | else
26 |
27 | cc = konect_connect_strong_nobgl(A);
28 |
29 | end
--------------------------------------------------------------------------------
/m/konect_significance.m:
--------------------------------------------------------------------------------
1 | %
2 | % Statistical significance using a t-test. Return the probability that
3 | % U > V could have arisen from equal distributions. The absolute
4 | % value of the result indicates the direction of the difference.
5 | %
6 | % RESULT
7 | % p Probability
8 | %
9 | % PARAMETERS
10 | % u,v (n*1) Vectors to compare
11 | %
12 |
13 | function [p] = konect_significance(u, v)
14 |
15 | [h p] = ttest(u, v);
16 |
17 | p = 1 - p;
18 |
19 | if sum(u ~= v) == 0
20 | p = 0;
21 | end
22 |
23 | di = u - v;
24 | di = di(~isnan(di) & ~isinf(di));
25 | if mean(di) < 0
26 | p = -p;
27 | end
28 |
29 | if isnan(p)
30 | p = 0;
31 | end
32 |
--------------------------------------------------------------------------------
/m/konect_statistic_conflictn.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the normalized algebraic conflict [conflictn]. This is
3 | % related to the spectrum of the normalized Laplacian, and not just a
4 | % normalization of the regular conflict, which is related to the
5 | % spectrum of the regular Laplacian.
6 | %
7 | % PARAMETERS
8 | % A Adjacency of biadjacency matrix
9 | % format
10 | % weight
11 | %
12 | % RESULT
13 | % values Normalized algebraic conflict; 0 for unweighted networks
14 | %
15 |
16 | function values = konect_statistic_conflictn(A, format, weights)
17 |
18 | opts.disp = 2;
19 |
20 | [U D] = konect_decomposition('sym-n', A, 1, format, weights, opts);
21 |
22 | values = 1 - D(1,1);
23 |
--------------------------------------------------------------------------------
/m/konect_connect_matrix_square.m:
--------------------------------------------------------------------------------
1 | %
2 | % Extract the largest connected component from the adjacency matrix
3 | % of a unipartite graph. The original matrix can be recovered using
4 | % konect_connect_back().
5 | %
6 | % PARAMETERS
7 | % A (nx*nx) Adjacency matrix, need not be symmetric;
8 | % interpreted as an undirected graph
9 | %
10 | % RESULT
11 | % As (n*n) Subset of adjacency matrix corresponding to the
12 | % chosen set of vertices
13 | % cc (nx*1) 0/1 vector of chosen vertices
14 | % n Number of vertices in the found component
15 | %
16 |
17 | function [As cc n] = konect_connect_matrix_square(A)
18 |
19 | cc = konect_connect_square(A);
20 |
21 | As = A(cc, cc);
22 |
23 | n = size(As, 1);
24 |
--------------------------------------------------------------------------------
/m/konect_posnegcolormap.m:
--------------------------------------------------------------------------------
1 | %
2 | % From a matrix of numbers to be displayed using imagesc(), generate
3 | % a colormap such that white=0, green>0 and red<0.
4 | %
5 |
6 | function [colormap_a] = konect_posnegcolormap(matrix)
7 |
8 | gam = 2;
9 |
10 | granularity = 0.01;
11 |
12 | p_min = min(min(matrix))
13 | p_max = max(max(matrix))
14 |
15 | to_one = max(-p_min, p_max)
16 | p_min = p_min / to_one
17 | p_max = p_max / to_one
18 |
19 | colormap_a = [ ...
20 | (-(p_min:granularity:0))' .^ gam * [0 -1 -1] + ones(length(p_min:granularity:0),1) * [1 1 1];
21 | (granularity:granularity:p_max)'.^ gam * [-1 0 -1] + ones(length(granularity:granularity:p_max),1) * [1 1 1] ...
22 | ]
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/m/konect_statistic_negativity.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the reciprocity of a signed or weighted network, i.e., the
3 | % proportion of negatively weighted edges.
4 | %
5 | % PARAMETERS
6 | % A Adjacency or biadjacency matrix
7 | % format
8 | % weights
9 | %
10 | % RESULT
11 | % value The negativity (in the range [0,1])
12 | %
13 | % GROUP: negative
14 | %
15 |
16 | function value = konect_statistic_negativity(A, format, weights)
17 |
18 | consts = konect_consts();
19 |
20 | assert(weights == consts.SIGNED | weights == consts.MULTISIGNED | ...
21 | weights == consts.WEIGHTED | weights == consts.MULTIWEIGHTED);
22 |
23 | m = nnz(A);
24 | m_negative = nnz(A < 0);
25 |
26 | value = m_negative / m;
27 |
28 |
--------------------------------------------------------------------------------
/m/konect_statistic_opnorm.m:
--------------------------------------------------------------------------------
1 | %
2 | % The largest singular value of a directed graphs asymmetric quadratric
3 | % adjacency matrix. This is also known as the Ky Fan 1-norm or the
4 | % operator 2-norm.
5 | %
6 | % GROUP: asym
7 | %
8 |
9 | function values = konect_statistic_opnorm(A, format, weights, opts)
10 |
11 | consts = konect_consts();
12 |
13 | if format ~= consts.ASYM
14 | error('*** Error: [opnorm] expected format to be ASYM');
15 | end
16 |
17 | if ~exist('opts', 'var'),
18 | opts = struct();
19 | opts.disp = 2;
20 | end
21 |
22 | opts.tol = 1e-7; % (default is 1e-14 in Matlab)
23 |
24 | [U D V] = konect_decomposition('svd', A, 1, format, weights, opts);
25 |
26 | values(1) = D(1,1)
27 | assert(values(1) >= 0);
28 |
--------------------------------------------------------------------------------
/m/konect_connect_matrix_bipartite.m:
--------------------------------------------------------------------------------
1 | %
2 | % Extract the largest connected component from a biadjacency matrix.
3 | % The original matrix can be recovered using konect_connect_back().
4 | %
5 | % RESULT
6 | % Bs (nx1*nx2) Biadjacency matrix of largest connected component
7 | % cc1, cc2 (nx1, nx2) 0/1 vector of chosen left/right subset
8 | % n Number of nodes in the found component (n = n1 + n2)
9 | % n1 Number of left nodes in the found component
10 | % n2 Number of right nodes in the found component
11 | %
12 | % PARAMETERS
13 | % B (n1*n2) Biadjacency matrix
14 | %
15 |
16 | function [Bs cc1 cc2 n n1 n2] = konect_connect_matrix_bipartite(B)
17 |
18 | [cc1 cc2] = konect_connect_bipartite(B);
19 |
20 | Bs = B(cc1, cc2);
21 |
22 | [n1 n2] = size(Bs);
23 | n = n1 + n2;
24 |
--------------------------------------------------------------------------------
/m/konect_statistic_controllabilityn.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the relative controllability [controllabilityn].
3 | %
4 | % PARAMETERS
5 | % A Adjacency / biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values The values as a column vector
11 | % [1] The relative controllability (C / |V|)
12 | %
13 |
14 | function values = konect_statistic_controllabilityn(A, format, weights)
15 |
16 | consts = konect_consts();
17 |
18 | A = A ~= 0;
19 |
20 | max_dir_matching = konect_controllability(A, format);
21 |
22 | % Number of nodes |V|, not counting isolated nodes
23 | if format == consts.BIP
24 | n = sum(sum(A, 2) ~= 0) + sum(sum(A, 1) ~= 0);
25 | else
26 | n = sum(sum(A | A', 2) ~= 0);
27 | end
28 |
29 | values = [ (n - max_dir_matching) / n ]
30 |
31 |
32 |
--------------------------------------------------------------------------------
/m/konect_colors_letter.m:
--------------------------------------------------------------------------------
1 | %
2 | % Colors used in various plots.
3 | %
4 | % Each color represents the "side" from which an analysis is done. This
5 | % is used in many different analyses, so the meaning depends on the
6 | % analysis, but the overall structure is the same, so we use consistent
7 | % colors.
8 | %
9 | % See the comments below for uses.
10 | %
11 | function colors = konect_colors_letter()
12 |
13 | % All nodes
14 | colors.a = [0 0 1];
15 |
16 | % Only left nodes (BIP)
17 | % Only outlinks (ASYM)
18 | colors.u = [1 0 0];
19 |
20 | % Only right nodes (BIP)
21 | % Only inlinks (ASYM)
22 | colors.v = [0 0.9 0];
23 |
24 | % Edge weight distribution
25 | colors.weight = [0.7 0.5 0];
26 |
27 | % Out/in-link comparison
28 | colors.b = [0.5 0.8 0];
29 |
--------------------------------------------------------------------------------
/m/konect_statistic_network_rank_sq.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the squared network rank.
3 | %
4 | % PARAMETERS
5 | % a Adjacency or biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % value
11 | %
12 |
13 | function value = konect_statistic_network_rank_sq(a, format, weights)
14 |
15 | opts.disp = 2;
16 |
17 | consts = konect_consts();
18 |
19 | % norm() does not accept logical matrices
20 | if islogical(a)
21 | a = double(a);
22 | end
23 |
24 | % Factor of 2 because we alwas double the effective number of edges
25 | % later on-- either by taking the SVD, or by symmetrization.
26 | nf = 2 * norm(a, 'fro')^2;
27 |
28 | if format == consts.BIP
29 | d = svds(a, 1, 'L', opts);
30 | else
31 | d = eigs(a + a', 1, 'lm', opts);
32 | end
33 |
34 |
35 | value = nf / (d^2);
36 |
--------------------------------------------------------------------------------
/m/konect_gini_direct.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the Gini coefficient and coordinates of the Lorenz curve.
3 | %
4 | % RESULT
5 | % gini The Gini coefficient
6 | % r_x X values of Lorenz plot
7 | % r_y Y values of Lorenz plot
8 | %
9 | % PARAMETERS
10 | % v Array of values, e.g. degree values
11 | %
12 |
13 | function [gini r_x r_y] = konect_gini_direct(v)
14 |
15 | if size(v, 1) < size(v, 2)
16 | v = v';
17 | end
18 |
19 | v = sort(v);
20 |
21 | n = length(v);
22 |
23 | s = cumsum(v) / sum(v);
24 |
25 | r_x = (0:n)' / n;
26 | r_y = [ 0 ; s ];
27 |
28 | gini = 1 - (2 * sum(s) - 1) / n;
29 |
30 | % Prune r_x and r_y
31 | m = length(r_x);
32 | m_max = 200;
33 | if m > m_max
34 | indexes = floor(1:((m-1)/m_max):m);
35 | indexes(end) = m;
36 | r_x = r_x(indexes);
37 | r_y = r_y(indexes);
38 | end
39 |
--------------------------------------------------------------------------------
/m/konect_statistic_uniquevolume.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the unique volume of a network, i.e., the number of edges
3 | % without taking into account edge multiplicities.
4 | %
5 | % Note: this looks trivial, but we can't replace it by the number
6 | % of lines in the out.* file. In other words, [uniquevolume] is
7 | % distinct from [lines].
8 | %
9 | % PARAMETERS
10 | % A Adjacency or biadjacency matrix
11 | % format
12 | % weights
13 | %
14 | % RESULT
15 | % values Column vector of results
16 | % [1] Unique volume
17 | %
18 | %
19 | % GROUP: MULTI
20 | %
21 |
22 | function values = konect_statistic_uniquevolume(A, format, weights)
23 |
24 | [negative, interval_scale, multi] = konect_data_weights();
25 |
26 | multi
27 | weights
28 |
29 | multi(weights)
30 |
31 | assert(multi(weights) == 1);
32 |
33 | values = nnz(A);
34 |
--------------------------------------------------------------------------------
/m/konect_statistic_alcon.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the algebraic connectivity [alcon].
3 | %
4 | % PARAMETERS
5 | % A Adjacency or biadjacency matrix
6 | % format
7 | % weights
8 | % opts (optional) Options passed to eigs/svds
9 | %
10 | % RESULT
11 | % value The algebraic connectivity
12 | %
13 | % TODO
14 | %
15 | % * in analogy to anticonflict, define a normalized variant of the
16 | % algebraic conflict/connectivity as
17 | % lambda_min [L] * 2 * sqrt(n) / e.
18 | %
19 |
20 | function value = konect_statistic_alcon(A, format, weights, opts)
21 |
22 | if ~exist('opts', 'var'),
23 | opts = struct();
24 | opts.disp = 2;
25 | end
26 |
27 | [u d] = konect_decomposition('lap', A, 2, format, weights, opts);
28 |
29 | if size(d,1) < 2
30 | value = NaN;
31 | return;
32 | end
33 |
34 | value = d(2,2);
35 |
--------------------------------------------------------------------------------
/m/konect_connect_bipartite.m:
--------------------------------------------------------------------------------
1 | %
2 | % Find biggest connected component of bipartite graph.
3 | %
4 | % RESULT
5 | % v 0/1 vector of left nodes in connected component
6 | % w 0/1 vector of right nodes in conneced component
7 | %
8 | % Returns v=[] and w=[] when no largest component is found
9 | %
10 | % PARAMETERS
11 | % B Biadjacency matrix of bipartite graph (i.e.,
12 | % [0 B;B' 0] is the actual adjacency matrix.)
13 | %
14 |
15 | function [v, w] = konect_connect_bipartite(B)
16 |
17 | if ~konect_usingoctave()
18 |
19 | [m n] = size(B);
20 |
21 | A = [sparse(m,m) B ; B' sparse(n,n)];
22 |
23 | [ci sizes] = components(A, 'full2sparse', 1);
24 |
25 | [x i] = sort(-sizes);
26 |
27 | v = (ci(1:m) == i(1));
28 | w = (ci((m+1) : (m+n)) == i(1));
29 |
30 | else
31 |
32 | [v, w] = konect_connect_bipartite_nobgl(B);
33 |
34 | end
--------------------------------------------------------------------------------
/m/konect_power_law_flat_vector.m:
--------------------------------------------------------------------------------
1 | %
2 | % Estimate the power-law exponent of the components of a vector.
3 | %
4 | % This uses the simple, fast and robust method from [1] Equations (5-6).
5 | % It will give skewed results if the distribution is not a power law, or
6 | % only a power law in a specific range.
7 | %
8 | % [1] Power laws, Pareto distributions and Zipf's law, M. E. J. Newman,
9 | % 2006.
10 | %
11 | % PARAMETERS
12 | % values Vector of values; zeroes are ignored
13 | %
14 | % RESULT
15 | % gamma Power-law exponent
16 | % sigma Expected statistical error on gamma
17 | %
18 |
19 | function [gamma sigma] = konect_power_law_flat_vector(values)
20 |
21 | values = values(values ~= 0);
22 |
23 | n = length(values);
24 |
25 | v = sum(log(values / min(values)));
26 |
27 | gamma = 1 + n / v;
28 |
29 | sigma = sqrt(n) / v;
30 |
31 |
--------------------------------------------------------------------------------
/m/konect_statistic_controllability.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the controllability [controllability].
3 | %
4 | % PARAMETERS
5 | % A Adjacency / biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values The values as a column vector
11 | % [1] The controllability (C)
12 | % [2] The relative controllability (C / |V|)
13 | %
14 |
15 | function values = konect_statistic_controllability(A, format, weights)
16 |
17 | consts = konect_consts();
18 |
19 | A = A ~= 0;
20 |
21 | max_dir_matching = konect_controllability(A, format);
22 |
23 | % Number of nodes |V|, not counting isolated nodes
24 | if format == consts.BIP
25 | n = sum(sum(A, 2) ~= 0) + sum(sum(A, 1) ~= 0);
26 | else
27 | n = sum(sum(A | A', 2) ~= 0);
28 | end
29 |
30 | values = [
31 | n - max_dir_matching;
32 | (n - max_dir_matching) / n
33 | ]
34 |
35 |
--------------------------------------------------------------------------------
/m/konect_statistic_maxdiag.m:
--------------------------------------------------------------------------------
1 | %
2 | % The cyclic eigenvalue: For directed graphs, we can consider the
3 | % largest eigenvalue of the (generally asymmetric) adjacency matrix.
4 | % This is zero for acyclic graphs. It equals the spectral norm and
5 | % operator 2-norm for symmetric graphs.
6 | %
7 | % GROUP: asym
8 | %
9 |
10 | function values = konect_statistic_maxdiag(A, format, weights, opts)
11 |
12 | consts = konect_consts();
13 |
14 | if format ~= consts.ASYM
15 | error('*** Error: [opnorm] expected format to be ASYM');
16 | end
17 |
18 | if ~exist('opts', 'var'),
19 | opts = struct();
20 | opts.disp = 2;
21 | end
22 |
23 | opts.tol = 1e-7; % (default is 1e-14 in Matlab)
24 |
25 | [U D V] = konect_decomposition('diag', A, 1, format, weights, opts);
26 |
27 | values(1) = D(1,1)
28 | assert(values(1) >= 0);
29 |
--------------------------------------------------------------------------------
/m/konect_statistic_dentropy.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the edge distribution entropy [dentropy].
3 | %
4 | % PARAMETERS
5 | % a Adjacency/biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values Column vector of results. The first value is the global
11 | % entropy. The next next ones are the left/right
12 | % entropies (only BIP and ASYM).
13 | %
14 |
15 | function values = konect_statistic_dentropy(a, format, weights)
16 |
17 | consts = konect_consts();
18 |
19 | a = a ~= 0;
20 |
21 | d1 = sum(a,2);
22 | d2 = sum(a,1)';
23 |
24 | if format == consts.BIP
25 | values = [ konect_dentropy([d1 ; d2]); konect_dentropy(d1); konect_dentropy(d2)];
26 | elseif format == consts.SYM
27 | values = konect_dentropy(d1 + d2);
28 | elseif format == consts.ASYM
29 | values = [ konect_dentropy(d1 + d2); konect_dentropy(d1); konect_dentropy(d2)];
30 | end
31 |
32 |
--------------------------------------------------------------------------------
/m/konect_statistic_dentropy2.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the degree distribution entropy [dentropy2].
3 | %
4 | % PARAMETERS
5 | % a Adjacency/biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values Column vector of results. The first value is the global
11 | % entropy. The next next ones are the left/right
12 | % entropies (only BIP and ASYM).
13 | %
14 |
15 | function values = konect_statistic_dentropy2(a, format, weights)
16 |
17 | consts = konect_consts();
18 |
19 | a = a ~= 0;
20 |
21 | d1 = sum(a,2);
22 | d2 = sum(a,1)';
23 |
24 | if format == consts.BIP
25 | values = [ konect_dentropy2([d1 ; d2]); konect_dentropy2(d1); konect_dentropy2(d2)];
26 | elseif format == consts.SYM
27 | values = konect_dentropy2(d1 + d2);
28 | elseif format == consts.ASYM
29 | values = [ konect_dentropy2(d1 + d2); konect_dentropy2(d1); konect_dentropy2(d2)];
30 | end
31 |
32 |
--------------------------------------------------------------------------------
/m/konect_statistic_jain.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute Jain's fairness index [jain] [740].
3 | %
4 | % PARAMETERS
5 | % a Adjacency/biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values Column vector of results.
11 | % [1] Global value
12 | % [2] Left value (BIP)
13 | % [2] Outdegree value (ASYM)
14 | % [3] Right value (BIP)
15 | % [3] Indegree value (ASYM)
16 | %
17 |
18 | function values = konect_statistic_jain(A, format, weights)
19 |
20 | consts = konect_consts();
21 |
22 | A = A ~= 0;
23 |
24 | d1 = sum(A,2);
25 | d2 = sum(A,1)';
26 |
27 | if format == consts.BIP
28 | values = [ konect_jain([d1 ; d2]); konect_jain(d1); konect_jain(d2)];
29 | elseif format == consts.SYM
30 | values = konect_jain(d1 + d2);
31 | elseif format == consts.ASYM
32 | values = [ konect_jain(d1 + d2); konect_jain(d1); konect_jain(d2)];
33 | end
34 |
35 | values = full(values);
36 |
--------------------------------------------------------------------------------
/m/konect_gini.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the Gini coefficient of a degree distribution, given index
3 | % values. The second and third output parameters give the coordinates
4 | % of points on the Lorenz curve. The returned R_X and R_Y values are
5 | % reduced to at most 200.
6 | %
7 | % RESULT
8 | % gini The Gini coefficient
9 | % r_x X values of Lorenz plot
10 | % r_y Y values of Lorenz plot
11 | %
12 | % PARAMETERS
13 | % p (e*1) Index values, e.g. node IDs
14 | % q (e*1) (optional, default = []) Multiplicities or []
15 | % for unweighted case, e.g. degree values
16 | %
17 |
18 | function [gini r_x r_y] = konect_gini(p, q)
19 |
20 | if ~exist('q', 'var') | length(q) == 0
21 | q = 1;
22 | end
23 |
24 | counts = full(sparse(full(p+1), 1, double(q), max(p) + 1, 1));
25 |
26 | counts = counts(2:end);
27 |
28 | counts = counts(find(counts));
29 |
30 | [gini r_x r_y] = konect_gini_direct(counts);
31 |
--------------------------------------------------------------------------------
/m/konect_first_index.m:
--------------------------------------------------------------------------------
1 | %
2 | % Which latent dimension is the first usable one for a given
3 | % decomposition? This is 1 for most cases, and 2 for: The
4 | % Laplacians and normalized matrices of unsigned graphs.
5 | %
6 | % PARAMETERS
7 | % decomposition
8 | % D (optional) Eigenvalues;
9 | %
10 | % RESULT
11 | % first Index of first usable latent dimension; 1 or 2
12 | %
13 |
14 | function first = konect_first_index(decomposition, D)
15 |
16 | epsilon = 1e-13;
17 |
18 | first = 1;
19 |
20 | data_decomposition = konect_data_decomposition(decomposition);
21 |
22 | if data_decomposition.l
23 | if exist('D', 'var')
24 | if D(1,1) < epsilon
25 | first = 2;
26 | end
27 | else
28 | first = 2;
29 | end
30 | elseif strcmp(decomposition, 'stoch1') | strcmp(decomposition, 'stoch2') ...
31 | | strcmp(decomposition, 'stoch')
32 | first = 2;
33 | end
34 |
--------------------------------------------------------------------------------
/m/konect_statistic_dentropyn.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the normalized edge distribution entropy [dentropyn].
3 | %
4 | % RESULT
5 | % values Column vector of results. The first value is the global
6 | % entropy. The next next ones are the left/right
7 | % entropies (only BIP and ASYM).
8 | %
9 | % PARAMETERS
10 | % A Adjacency/biadjacency matrix
11 | % format
12 | % weights
13 | %
14 |
15 | function values = konect_statistic_dentropyn(A, format, weights)
16 |
17 | consts = konect_consts();
18 |
19 | A = A ~= 0;
20 |
21 | d1 = sum(A,2);
22 | d2 = sum(A,1)';
23 |
24 | if format == consts.BIP
25 | values = [ konect_dentropy([d1 ; d2], 'n'); konect_dentropy(d1, 'n'); konect_dentropy(d2, 'n')];
26 | elseif format == consts.SYM
27 | values = konect_dentropy(d1 + d2, 'n');
28 | elseif format == consts.ASYM
29 | values = [ konect_dentropy(d1 + d2, 'n'); konect_dentropy(d1, 'n'); konect_dentropy(d2, 'n')];
30 | end
31 |
32 |
--------------------------------------------------------------------------------
/m/konect_statistic_patest.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the "patest" statistic. This equals
3 | %
4 | % X = log(d_max) / log(|V|),
5 | %
6 | % where d_max is the maximal degree and |V| is the number of nodes.
7 | %
8 | % We compute it always for RIGHT distributions, since this is used in
9 | % the Preferential Attachment paper.
10 | %
11 | % PARAMETERS
12 | % a Adjacency matrix
13 | % format
14 | % weights
15 | %
16 | % RESULT
17 | % values Column vector of results
18 | % [1] Max degree
19 | % [2] Max outdegree (ASYM); max left degree (BIP)
20 | % [3] Max indegree (ASYM); max right degree (BIP)
21 | %
22 |
23 | function values = konect_statistic_patest(A, format, weights)
24 |
25 | consts = konect_consts();
26 |
27 | A = A ~= 0;
28 |
29 | if format == consts.SYM
30 | A = A + A';
31 | end
32 |
33 | n = size(A, 1)
34 | d_max = max(sum(A,1));
35 |
36 | values = [ (log(d_max) / log(n)) ]
37 |
38 |
39 | values= full(values);
40 |
--------------------------------------------------------------------------------
/m/konect_ap.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the average precision.
3 | %
4 | % If target is not a 0/1 vector, its values are rounded to 0 or 1.
5 | %
6 | % PARAMETERS
7 | % prediction (e*1) Prediction scores
8 | % target (e*1) Correct scores
9 | %
10 | % RESULT
11 | % ret The average precision
12 | %
13 |
14 | function ret = konect_ap(prediction, target)
15 |
16 | if length(prediction) ~= length(target), error('*** both vectors must have same length'); end;
17 |
18 | e = length(prediction);
19 |
20 | % Round target to 0/1
21 | target = target > 0;
22 |
23 | % Randomize order
24 | p = randperm(e);
25 | prediction = prediction(p);
26 | target = target(p);
27 |
28 | % Compte MAP
29 | [tmp,i] = sort(-prediction);
30 | a = target(i);
31 | p_sum = 0;
32 | nz = 0;
33 |
34 | for j = 1 : e
35 | if a(j) ~= 0
36 | nz = nz + a(j);
37 | p_sum = p_sum + a(j) * nz / j;
38 | end;
39 | end;
40 |
41 | ret = p_sum / nz;
42 |
--------------------------------------------------------------------------------
/test/octave:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 | #
3 | # Execute an Octave script. This is unused at the moment.
4 | #
5 | # $1 Name of the Octave program
6 | #
7 |
8 | [ "$PREFIX" ] && PREFIX=.$PREFIX
9 | LOGNAME=$USER."`basename $1 .m`"
10 |
11 | # This is a list of variable we want to be used in the log filename to
12 | # make it unique.
13 | [ "$TYPE" ] && LOGNAME=$LOGNAME.$TYPE
14 | [ "$NAME" ] && LOGNAME=$LOGNAME.$NAME
15 | [ "$NORM" ] && LOGNAME=$LOGNAME.$NORM
16 | [ "$DATASET" ] && LOGNAME=$LOGNAME.$DATASET
17 | [ "$NETWORK" ] && LOGNAME=$LOGNAME.$NETWORK
18 | [ "$DECOMPOSITION" ] && LOGNAME=$LOGNAME.$DECOMPOSITION
19 |
20 | export LOG=${TMP-/tmp}/`basename $0`.$LOGNAME$PREFIX.log
21 | echo >&2 " $LOG"
22 |
23 | #
24 | # -q Quiet
25 | # -f Don't read initialization files
26 | #
27 | # We'd like to usee --no-window-system but it isn't supported by Octave 3.0
28 | #
29 | DISPLAY= GNUTERM=dumb exec octave -qf $1 >$LOG 2>&1
30 |
31 |
32 |
--------------------------------------------------------------------------------
/m/konect_statistic_triangles_norm.m:
--------------------------------------------------------------------------------
1 | %
2 | % Number of triangles, normalized such that for any n, in the uniform
3 | % distribution, the statistic values have the same distribution.
4 | %
5 | % This approximation considers each possible triangle to be
6 | % uncorrelated to others, and thus the total number of triangles is a
7 | % binomial distribution with total count (n; 3) and probability for
8 | % each triangle of 1/8.
9 | %
10 |
11 | function values = konect_statistic_triangles_norm(A, format, weights)
12 |
13 | t = konect_statistic_triangles(A, format, weights);
14 | n = konect_statistic_size(A, format, weights);
15 |
16 | %
17 | % Assume a binomial distribution with the parameters:
18 | %
19 | % N = (n ; 3)
20 | % P = 1/8
21 | %
22 | % Its normal approximation is given by
23 | %
24 | % mu = PN
25 | % sigma = sqrt(NP(1-P))
26 | %
27 |
28 | mu = (1/48) * n * (n-1) * (n-2);
29 |
30 | sigma = sqrt( 7 / 384 * n * (n-1) * (n-2));
31 |
32 | values = (t - mu) / sigma;
33 |
34 |
--------------------------------------------------------------------------------
/m/konect_statistic_cocos.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the size of the largest strongly connected component
3 | % [cocos].
4 | %
5 | % PARAMETERS
6 | % A Adjacency matrix
7 | % format
8 | % weights
9 | %
10 | % RESULT
11 | % values Size of the largest connected component
12 | % [1] Size of largest strongly connected component, in
13 | % nodes (N_s)
14 | % [2] Relative size (N_s / n)
15 | %
16 | % GROUP: asym
17 | %
18 |
19 | function values = konect_statistic_cocos(A, format, weights)
20 |
21 | consts = konect_consts();
22 |
23 | if format ~= consts.ASYM
24 | error(['*** Strongly connected component is only defined for ' ...
25 | 'directed networks']);
26 | end
27 |
28 | n = size(A,1)
29 |
30 | v = konect_connect_strong(A);
31 |
32 | assert(sum(v ~= 0 & v ~= 1) == 0);
33 |
34 | value_1 = sum(v)
35 |
36 | assert(value_1 >= 0);
37 | assert(value_1 <= n);
38 |
39 | value_2 = value_1 / n
40 |
41 | assert(value_2 <= 1.0);
42 |
43 | values = [ value_1; value_2 ];
44 |
--------------------------------------------------------------------------------
/m/konect_connect_square.m:
--------------------------------------------------------------------------------
1 | %
2 | % Find the largest weakly connected component of a unipartite graph.
3 | % Edge weights are ignored. Edge directions are ignored.
4 | %
5 | % This implementation uses BGL.
6 | %
7 | % PARAMETERS
8 | % A (n*n) Half-adjacency matrix of unipartite graph
9 | %
10 | % RESULT
11 | % v 0/1 vector of nodes in the connected component or
12 | % [] when there is no large connected component
13 | %
14 |
15 | function [v] = konect_connect_square(A)
16 |
17 | if ~konect_usingoctave()
18 |
19 | [n,nx] = size(A);
20 | if n ~= nx, error '*** Matrix must be square'; end;
21 |
22 | % Symmetric adjacency matrix. The input matrix to BGL must be symmetric.
23 | A = (A ~= 0);
24 | A = A | A';
25 | A = A - spdiags(diag(A), [0], n, n);
26 |
27 | [ci sizes] = components(A, 'full2sparse', 1);
28 |
29 | [x i] = sort(-sizes);
30 |
31 | v = (ci == i(1));
32 |
33 | else
34 |
35 | v = konect_connect_square_nobgl(A);
36 |
37 | end
38 |
--------------------------------------------------------------------------------
/m/konect_diameff.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the effective diameter from the (0-based) hop plot array.
3 | %
4 | % RESULT
5 | % value Effective diameter
6 | %
7 | % PARAMETERS
8 | % d (1*(diameter+1)) Hop distribution vector, as returned
9 | % by konect_hopdistr().
10 | % p (optional) Percentile (e.g., 0.9 for 90-percentile);
11 | % defaults to 0.9; pass 0.5 and use floor() on the
12 | % result to get the median path length
13 | %
14 |
15 | function [value] = konect_diameff(d, p)
16 |
17 | assert(length(p) == 1);
18 |
19 | d
20 | p
21 |
22 | if ~exist('p', 'var')
23 | p = 0.9;
24 | end
25 |
26 | if size(d,2) == 1
27 | d = d';
28 | end
29 |
30 | amount = p * d(end)
31 |
32 | value = NaN
33 |
34 | for i = 1 : length(d)-1
35 | i
36 | if d(i) <= amount & d(i+1) > amount
37 | value = i - 1 + (amount - d(i)) / (d(i+1) - d(i))
38 | break;
39 | end
40 | end
41 |
42 | if isnan(value)
43 | value = length(d)
44 | end
45 |
46 | assert(length(value) == 1);
47 |
--------------------------------------------------------------------------------
/m/konect_statistic_diameter.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the estimated effective diameter [diameter].
3 | %
4 | % This statistic (named "diameter") gives approximate values of the
5 | % 90-percentile effective diamater. The values are approximate because
6 | % they use a heuristic, node sampling. No estimate of the accuracy of
7 | % the result is returned.
8 | %
9 | % To get exact values of the diameter and effective diameter, use the
10 | % function konect_hopdistr() followed by a call to one of the
11 | % konect_diam*() functions, which correspond to the statistics having
12 | % names like "diam" (the diameter), "diameff90", etc.
13 | %
14 | % RESULT
15 | % value Diameter
16 | %
17 | % PARAMETERS
18 | % A Adjacency or biadjacency matrix
19 | % format
20 | % weights
21 | %
22 |
23 | function value = konect_statistic_diameter(A, format, weights)
24 |
25 | consts = konect_consts();
26 |
27 | if format ~= consts.BIP
28 | A = A + A';
29 | end
30 |
31 | value = konect_effective_diameter(A);
32 |
--------------------------------------------------------------------------------
/m/konect_statistic_size.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the size of the network, i.e. the number of nodes.
3 | %
4 | % PARAMETERS
5 | % A Adjacency or biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values Column vector of values
11 | % [1] Total number of vertices
12 | % [2] BIP: Number of vertices in left group
13 | % [2] ASYM: Number of vertices with nonzero outdegree
14 | % [3] BIP: Number of vertices in right group
15 | % [3] ASYM: Number of vertices with nonzero indegree
16 | %
17 | % GROUP+2: bip
18 | % GROUP+3: bip
19 | %
20 |
21 | function values = konect_statistic_size(A, format, weights)
22 |
23 | consts = konect_consts();
24 |
25 | A = A ~= 0;
26 |
27 | if format == consts.SYM || format == consts.ASYM
28 |
29 | assert(size(A, 1) == size(A, 2));
30 | values = size(A, 1);
31 |
32 | elseif format == consts.BIP
33 |
34 | values = [ sum(size(A)); size(A, 1); size(A, 2) ];
35 |
36 | else
37 |
38 | error('*** Invalid format');
39 |
40 | end
41 |
--------------------------------------------------------------------------------
/m/konect_statistic_gini.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the Gini coefficient [gini].
3 | %
4 | % PARAMETERS
5 | % A Adjacency / biadjacency matrix
6 | % format
7 | % weights
8 | % opts (optional)
9 | %
10 | % RESULT
11 | % values The Gini coefficients
12 | % [1] Total coefficient
13 | % [2,3] Left/right coefficients (only ASYM and BIP)
14 | %
15 |
16 | function values = konect_statistic_gini(A, format, weights, opts)
17 |
18 | consts = konect_consts();
19 |
20 | if weights == consts.POSITIVE
21 | has_z = 1;
22 | [x y z] = find(A);
23 | else
24 | has_z = 0;
25 | [x y] = find(A);
26 | end
27 |
28 | p = [x; y];
29 | if has_z
30 | q = [z; z];
31 | else
32 | q = [];
33 | end
34 |
35 | values = konect_gini(p, q);
36 |
37 | if format == consts.BIP | format == consts.ASYM
38 |
39 | if has_z
40 | q = z;
41 | else
42 | q = [];
43 | end
44 |
45 | v1 = konect_gini(x, q);
46 | v2 = konect_gini(y, q);
47 | values = [ values ; v1 ; v2 ];
48 |
49 | end
50 |
--------------------------------------------------------------------------------
/test/test_decompose_dense.m:
--------------------------------------------------------------------------------
1 |
2 | DEPRECATED -- covered by test_decompose()
3 |
4 | cd ..
5 |
6 | %
7 | % Small directed graph
8 | %
9 |
10 | % A directed graph
11 |
12 | %% 1 --> 2 ---> 3
13 | %% A A
14 | %% | /
15 | %% V <-
16 | %% 4============> 5
17 |
18 | a = [0 1 0 0 0; 0 0 1 1 0; 0 0 0 1 0; 0 1 1 0 2; 0 0 0 0 0];
19 |
20 |
21 | [u x] = decompose_dense(a, 3, 'dedicom1u');
22 | [u x] = decompose_dense(a, 3, 'dedicom1v');
23 | [u x] = decompose_dense(a, 3, 'dedicom2');
24 | [u x] = decompose_dense(a, 3, 'dedicom2s');
25 | [u x] = decompose_dense(a, 3, 'dedicom3');
26 |
27 |
28 | %
29 | % Sparse random graph
30 | %
31 |
32 | opts.disp = 2;
33 |
34 | n = 1000;
35 | d = 20;
36 | a = sprand(n, n, d/n);
37 |
38 | [u x] = decompose_dense(a, 3, 'dedicom1u', opts);
39 | [u x] = decompose_dense(a, 3, 'dedicom1v', opts);
40 | [u x] = decompose_dense(a, 3, 'dedicom2', opts);
41 | [u x] = decompose_dense(a, 3, 'dedicom2s', opts);
42 | [u x] = decompose_dense(a, 3, 'dedicom3', opts);
43 |
44 |
--------------------------------------------------------------------------------
/m/konect_connect_square_nobgl.m:
--------------------------------------------------------------------------------
1 | %
2 | % Find the biggest weakly connected component of a unipartite graph.
3 | %
4 | % PARAMETERS
5 | % A Square adjacency matrix of unipartite graph; doesn't
6 | % have to be symmetric
7 | %
8 | % RESULT
9 | % v 0/1 vector of nodes in the connected component or
10 | % [] when there is no large connected component
11 | %
12 |
13 | function [v] = konect_connect_square_nobgl(A)
14 |
15 | ite_max = 15;
16 |
17 | [n,nx] = size(A);
18 |
19 | Al = (A~=0) | (A'~=0);
20 | ite = 0;
21 |
22 | while ite < ite_max
23 | ite = ite + 1;
24 |
25 | v = zeros(n,1);
26 | v(1+floor(rand * n)) = 1;
27 | count_last = 0;
28 | count = 1;
29 | rad = 0;
30 |
31 | while count ~= count_last
32 | count_last = count;
33 | Alv = Al * double(v);
34 | v = logical(Alv + v);
35 | count = sum(v);
36 | rad = rad + 1;
37 | fprintf(1, '%4d %10d\n', rad, count);
38 | end;
39 |
40 | if count >= .1 * n, return; end;
41 |
42 | end;
43 |
44 | % error '*** No big connected component'
45 | v = [];
46 |
--------------------------------------------------------------------------------
/m/konect_statistic_coco.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the size of the largest connected component [coco].
3 | %
4 | % PARAMETERS
5 | % A Half-adjacency/biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values Column vector of results
11 | % [1] Size of largest connected component
12 | % [2] Relative size, i.e., [1] divided by size of network
13 | % [3,5] Left/right numbers (BIP only)
14 | % [4,6] Left/right relative coco (BIP only)
15 | %
16 | % GROUP+3: bip
17 | % GROUP+4: bip
18 | % GROUP+5: bip
19 | % GROUP+6: bip
20 | %
21 |
22 | function values = konect_statistic_coco(A, format, weights)
23 |
24 | consts = konect_consts();
25 |
26 | if format == consts.BIP
27 |
28 | [v w] = konect_connect_bipartite(A);
29 |
30 | vs = sum(v);
31 | ws = sum(w);
32 |
33 | values = [ vs + ws ; (vs + ws) / sum(size(A)) ; vs ; vs / size(A,1) ; ws ; ws / size(A,2) ];
34 |
35 | else
36 |
37 | v = konect_connect_square(A);
38 |
39 | coco = sum(v);
40 |
41 | values = [ coco ; coco / size(A,1) ];
42 |
43 | end
44 |
--------------------------------------------------------------------------------
/m/konect_statistic_volume.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the volume of a network, i.e. the number of edges.
3 | %
4 | % PARAMETERS
5 | % A Adjacency of biadjacency matrix
6 | % format
7 | % weights
8 | % opts
9 | %
10 | % RESULT
11 | % values Column vectors of values
12 | % [1] Number of edges
13 | % [2] Sum of absolute edge weights (only WEIGHTED,
14 | % SIGNED) (note that for POSITIVE networks,
15 | % multiple edges counted multiple times in
16 | % statistic [1])
17 | %
18 |
19 | function values = konect_statistic_volume(A, format, weights, opts)
20 |
21 | consts = konect_consts();
22 |
23 | if weights == consts.UNWEIGHTED | weights == consts.POSWEIGHTED
24 |
25 | values = nnz(A);
26 |
27 | elseif weights == consts.POSITIVE | weights == consts.DYNAMIC
28 |
29 | values = full(sum(sum(A)));
30 |
31 | elseif weights == consts.SIGNED | weights == consts.WEIGHTED | weights ...
32 | == consts.MULTIWEIGHTED
33 |
34 | values = [ nnz(A) ;
35 | full(sum(sum(abs(A)))) ];
36 |
37 | else
38 | error('*** Invalid weights');
39 | end
40 |
41 |
--------------------------------------------------------------------------------
/m/konect_auc.m:
--------------------------------------------------------------------------------
1 | %
2 | % The area under the ROC curve, i.e. the AUC.
3 | %
4 | % If target is not a 0/1 vector, its values are rounded to 0 or 1.
5 | %
6 | % PARAMETERS
7 | % prediction (e*1) vector of predicted scores
8 | % target (e*1) vector of values to be predicted
9 | %
10 | % RESULT
11 | % ret The AUC
12 | %
13 |
14 | function ret = konect_auc(prediction, target)
15 |
16 | assert(length(prediction) == length(target));
17 |
18 | % All values passed are finite
19 | assert(sum(~isfinite(prediction)) == 0);
20 | assert(sum(~isfinite(target)) == 0);
21 |
22 | target = target > 0;
23 |
24 | e = length(prediction);
25 | k = sum(target);
26 |
27 | if k == e | k == 0
28 | ret = 0;
29 | return;
30 | end
31 |
32 | % Randomize order
33 | p = randperm(e);
34 | prediction = prediction(p);
35 | target = target(p);
36 |
37 | [tmp,x] = sort(prediction, 'descend');
38 | a = target(x);
39 |
40 | s = 0;
41 | c = e-k;
42 | for i = 1 : e
43 | if a(i) == 0
44 | c = c - 1;
45 | else
46 | s = s + c;
47 | end
48 | end
49 |
50 | ret = s / (k * (e-k));
51 |
--------------------------------------------------------------------------------
/m/konect_imageubu.m:
--------------------------------------------------------------------------------
1 | %
2 | % Draw a spectral diagonality test matrix. This uses imagesc() with a
3 | % colorbar. This draws the real part of the matrix only.
4 | %
5 | % See Section 3.2.4 (page 37) in: Jérôme Kunegis, "On the Spectral
6 | % Evolution of Large Networks", PhD Thesis, University of
7 | % Koblenz-Landau, 2011.
8 | %
9 | % This function is also used to draw other matrices.
10 | %
11 | % PARAMETERS
12 | % Delta (r*r) Spectral diagonality test matrix
13 | %
14 |
15 | function konect_imageubu(Delta)
16 |
17 | damp = .5;
18 | steps = 200;
19 | font_size = 24;
20 |
21 | Delta = real(Delta);
22 |
23 | imagesc(Delta);
24 | colorbar;
25 | axis square;
26 |
27 | smin = min(min(Delta));
28 | smax = max(max(Delta));
29 |
30 | s = smin:((smax-smin)/steps):smax;
31 |
32 | dist = max(smax, -smin);
33 |
34 | s_green = min(1 - damp * s/dist, 1 + s/dist) .^ 2;
35 | s_red = min(1 + damp * s/dist, 1 - s/dist) .^ 2;
36 | s_blue = min(1 + s/dist, 1 - s/dist) .^ 2;
37 |
38 | cm = [s_red' s_green' s_blue'];
39 | colormap(cm);
40 |
41 | set(gca, 'FontSize', font_size);
42 |
--------------------------------------------------------------------------------
/m/konect_statistic_dconflict.m:
--------------------------------------------------------------------------------
1 | %
2 | % The dyadic conflict \eta. Only for signed directed networks. This
3 | % is the proportion of node pairs ("dyads") connected by two edges in
4 | % opposite directions in which the node edges have opposite sign. It
5 | % can be seen as a rudimentary measure of conflict in signed directed
6 | % graphs.
7 | %
8 | % PARAMETERS
9 | % A Adjacency matrix
10 | % format must be ASYM
11 | % weights must be one the signed weights
12 | %
13 | % RESULTS
14 | % values Column vector of results
15 | % [1] \eta
16 | %
17 | % GROUP: asymnegative
18 | %
19 |
20 | function values = konect_statistic_dconflict(A, format, weights)
21 |
22 | consts = konect_consts();
23 |
24 | assert(format == consts.ASYM);
25 | assert(weights == consts.SIGNED | weights == consts.MULTISIGNED | ...
26 | weights == consts.WEIGHTED | weights == consts.MULTIWEIGHTED);
27 |
28 | [m n] = size(A)
29 |
30 | assert(m == n);
31 |
32 | A_abs = konect_absx(A);
33 |
34 | m_dyads = nnz(A_abs & A_abs')
35 |
36 | m_conflict = nnz((A > 0) & (A' < 0))
37 |
38 | values = [ (m_conflict / m_dyads) ]
39 |
40 |
41 |
--------------------------------------------------------------------------------
/m/konect_diammean.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the mean path length, based on the result of konect_diameff().
3 | %
4 | % RESULT
5 | % value Mean path length
6 | %
7 | % PARAMETERS
8 | % data Vector of frequency of path lengths, as returned by
9 | % konect_hopdistr()
10 | % n (optional) Number of nodes; if not given, inferred
11 | % from the data
12 | % allow_disconnected (default = 0) Whether to allow
13 | % disconnected networks. N must be given.
14 | %
15 |
16 | function [value] = konect_diammean(data, n, allow_disconnected)
17 |
18 | % Make DATA a column vector
19 | data = data(:)
20 |
21 | if ~exist('allow_disconnected', 'var')
22 | allow_disconnected = 0;
23 | end
24 |
25 | values = (0 : (length(data) - 1))'
26 | counts = data - [0 ; data(1:end-1)]
27 |
28 | if ~allow_disconnected
29 | % This is not necessarily true as DATA may be an estimation
30 | % assert(sum(counts) == n*n);
31 | else
32 | %% For all unconnected nodes, count their distance as n
33 | % counts(n+1) = n*n - sum(counts);
34 | % values = 0 : n;
35 | end
36 |
37 | value = (values' * counts) / sum(counts)
38 |
39 |
--------------------------------------------------------------------------------
/m/konect_significance_legend.m:
--------------------------------------------------------------------------------
1 | %
2 | % Generate the legend to a significance plot.
3 | %
4 | % PARAMETERS
5 | % p_threshold
6 | % maxdiff
7 | % label_measure
8 | %
9 |
10 | function konect_significance_legend(p_threshold, maxdiff, label_measure)
11 |
12 | font_size = 60;
13 |
14 | % Resolution
15 | r = 70;
16 |
17 | % Ranges
18 | range_p = 1.3
19 | range_diff = 1.2
20 |
21 | % Range of values
22 | v_p = linspace(0, range_p * p_threshold, r);
23 | v_d = linspace(range_diff * maxdiff, -range_diff * maxdiff, r);
24 |
25 | I = konect_significance_image(ones(r,1) * v_p, v_d' * ones(1,r), p_threshold, maxdiff);
26 |
27 | image([min(v_p), max(v_p)], [min(v_d), max(v_d)], I);
28 |
29 | set(gca, 'FontSize', font_size);
30 |
31 | set(gca, 'XTick', [0 .05], ...
32 | 'XTickLabels', { '0', '0.05' });
33 | set(gca, 'YTick', [-0.2, 0, +0.2 ], ...
34 | 'YTickLabels', { '+0.2', '0', '-0.2' });
35 | set(gca, 'TickLength', [ 0 0 ]);
36 |
37 | xlabel('p-value', 'FontSize', font_size);
38 | ylabel(sprintf('%s_x - %s_y', label_measure, label_measure), 'FontSize', font_size);
39 |
40 | axis square;
41 |
--------------------------------------------------------------------------------
/m/konect_svdn.m:
--------------------------------------------------------------------------------
1 | %
2 | % Singular value decomposition of a normalized matrix.
3 | %
4 | % A normalized matrix has singular values <= 1. This implementation uses a
5 | % special decomposition method that is faster than just using svds().
6 | %
7 | % RESULT
8 | % U,D,V Singular value decomposition
9 | %
10 | % PARAMETERS
11 | % A A matrix with all singular values not larger that one
12 | % r Rank
13 | % method (optional) The method to use
14 | % 0 svds('L'); uses less memory but is slower
15 | % 1 svds(1+epsilon); faster but uses more memory
16 | %
17 |
18 | function [U,D,V] = konect_svdn(A, r, varargin)
19 |
20 | METHOD_L = 0;
21 | METHOD_EPSILON = 1;
22 |
23 | if (nargin > 2)
24 | method = varargin{1};
25 | else
26 | method = METHOD_L;
27 | end
28 |
29 | opts.disp = 2;
30 |
31 | if method == METHOD_L
32 |
33 | [U,D,V] = svds(A, r, 'L', opts);
34 | % dd = diag(D);
35 |
36 | else
37 |
38 | epsilon = 1e-3;
39 |
40 | [U,D,V] = svds(A, r, 1+epsilon, opts);
41 | dd = diag(D);
42 |
43 | [x,i] = sort(-dd);
44 | U = U(:,i);
45 | V = V(:,i);
46 | D = diag(dd(i));
47 |
48 | end
49 |
--------------------------------------------------------------------------------
/m/konect_statistic_fourstars.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the number of 4-stars in a graphs, i.e., the number of
3 | % triples of incident edges.
4 | %
5 | % Multiple edges in the input graph are ignored. Loops are
6 | % ignored.
7 | %
8 | % PARAMETERS
9 | % A Adjacency/biadjacency matrix
10 | % format
11 | % weights
12 | % opts (optional)
13 | %
14 | % RESULT
15 | % values Column vector of results
16 | % [1] Number of 4-stars
17 | %
18 |
19 | function values = konect_statistic_fourstars(A, format, weights, opts)
20 |
21 | consts = konect_consts();
22 |
23 | % Ignore edge weights and multiplicities
24 | A = (A ~= 0);
25 |
26 | %
27 | % Build degree vector
28 | %
29 | if format == consts.BIP
30 |
31 | d = [ sum(A,2) ; sum(A, 1)' ];
32 |
33 | elseif format == consts.SYM || format == consts.ASYM
34 |
35 | n = size(A, 1);
36 |
37 | % Set diagonal elements to zero to exclude loops
38 | A = A - spdiags(diag(A), [0], n, n);
39 |
40 | d = sum(A, 2) + sum(A, 1)';
41 |
42 | else
43 | error('*** Invalid format');
44 | end
45 |
46 | z = sum(d .* (d-1) .* (d-2) .* (d-3)) / 24;
47 | assert(z == floor(z));
48 | values(1) = z;
49 |
--------------------------------------------------------------------------------
/m/konect_controllability.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the controllability of a given graph, as defined in:
3 | %
4 | % [1] Controllability of Complex Networks, Liu Y.-Y., J.-J. Slotine and
5 | % A.-L. Barabasi, Nature 473:167--173, May 2011.
6 | %
7 | % PARAMETERS
8 | % A Adjacency or biadjacency matrix (depending on FORMAT); weights are ignored
9 | % format The format of the network, as defined in konect_consts.m
10 | %
11 | % RESULT
12 | % ret The maximal directed matching, i.e., the number of nodes
13 | % in the network minus the controllability value (|V| - C)
14 | %
15 | % LIBRARIES
16 | % BGL This function uses BGL
17 | %
18 |
19 | function ret = konect_controllability(A, format)
20 |
21 | consts = konect_consts();
22 |
23 | % Set A to the actual adjacency matrix
24 | if format == consts.BIP
25 | [m n] = size(A);
26 | A = [ sparse(m,m) , A ; A' , sparse(n,n) ];
27 | elseif format == consts.SYM
28 | A = A + A';
29 | end
30 |
31 | [n x] = size(A);
32 |
33 | % Create the bipartite double cover
34 | B = [ sparse(n,n) , A ; A' , sparse(n,n) ];
35 |
36 | mm = maximal_matching(B);
37 | ret = sum(mm > 0) / 2;
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/m/konect_statistic_threestars.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the number of 3-stars in a graphs, i.e., the number of
3 | % triples of incident edges.
4 | %
5 | % Multiple edges in the input graph are ignored. Loops are
6 | % ignored.
7 | %
8 | % PARAMETERS
9 | % A Adjacency/biadjacency matrix
10 | % format
11 | % weights
12 | % opts (optional)
13 | %
14 | % RESULT
15 | % values Column vector of results
16 | % [1] Number of 3-stars
17 | %
18 |
19 | function values = konect_statistic_threestars(A, format, weights, opts)
20 |
21 | consts = konect_consts();
22 |
23 | % Ignore edge weights and multiplicities
24 | A = (A ~= 0);
25 |
26 | %
27 | % Build degree vector
28 | %
29 | if format == consts.BIP
30 |
31 | d = [ sum(A,2) ; sum(A, 1)' ];
32 |
33 | elseif format == consts.SYM || format == consts.ASYM
34 |
35 | n = size(A, 1);
36 |
37 | % Set diagonal elements to zero to exclude loops
38 | A = A - spdiags(diag(A), [0], n, n);
39 |
40 | d = sum(A, 2) + sum(A, 1)';
41 |
42 | else
43 | error('*** Invalid format');
44 | end
45 |
46 | z = sum(d .* (d-1) .* (d-2)) / 6;
47 | assert(z == floor(z));
48 | values(1) = z;
49 |
50 |
51 |
--------------------------------------------------------------------------------
/m/konect_statistic_maxdegree.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the maximum degree [maxdegree].
3 | %
4 | % PARAMETERS
5 | % A Adjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values Column vector of results
11 | % [1] Max degree
12 | % [2] Max outdegree (ASYM)
13 | % [3] Max indegree (ASYM)
14 | % [4] Max left degree (BIP)
15 | % [5] Max right degree (BIP)
16 | %
17 | % GROUP+2: asym
18 | % GROUP+3: asym
19 | % GROUP+4: bip
20 | % GROUP+5: bip
21 | %
22 |
23 | function values = konect_statistic_maxdegree(A, format, weights)
24 |
25 | consts = konect_consts();
26 |
27 | if weights ~= consts.POSITIVE
28 | A = A ~= 0;
29 | end
30 |
31 | if format == consts.SYM
32 |
33 | values = [ max(sum(A,1) + sum(A,2)') ]
34 |
35 | elseif format == consts.ASYM
36 |
37 | values = [ max(sum(A,1) + sum(A,2)') ];
38 | values = [ values ; max(sum(A,2)) ];
39 | values = [ values ; max(sum(A,1)) ]
40 |
41 | else % format == consts.BIP
42 |
43 | values = [ max(max(sum(A,1)), max(sum(A,2))) ; NaN ; NaN ];
44 | values = [ values ; max(sum(A,2)) ];
45 | values = [ values ; max(sum(A,1)) ]
46 |
47 | end
48 |
49 | values = full(values);
50 |
--------------------------------------------------------------------------------
/m/konect_statistic_sconflict.m:
--------------------------------------------------------------------------------
1 | %
2 | % [experimental]
3 | %
4 | % PARAMETERS
5 | % A Adjacency or biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % value
11 | %
12 | % GROUP: negative
13 | %
14 |
15 | function values = konect_statistic_sconflict(A, format, weights)
16 |
17 | opts.disp = 2;
18 |
19 | consts = konect_consts();
20 |
21 | if ~(weights == consts.SIGNED | weights == consts.MULTISIGNED | ...
22 | weights == consts.WEIGHTED | weights == consts.MULTIWEIGHTED)
23 | error
24 | end
25 |
26 | % Make A symmetric
27 | if format == consts.SYM
28 | A = A + A';
29 | elseif format == consts.ASYM
30 | A = A + A';
31 | elseif format == consts.BIP
32 | [n1 n2] = size(A);
33 | A = [ sparse(n1, n1), A; A', sparse(n2, n2) ];
34 | else
35 | error
36 | end
37 |
38 | % Round to +1/-1
39 | A = (A > 0) - (A < 0);
40 |
41 | % Absolute value
42 | A_abs = double(A ~= 0);
43 |
44 | l_max = eigs(A, 1, 'la', opts)
45 | l_min = eigs(A, 1, 'sa', opts)
46 | l_abs_max = eigs(A_abs, 1, 'la', opts)
47 | l_abs_min = eigs(A_abs, 1, 'sa', opts)
48 |
49 | values = (l_max / l_min) / (l_abs_max / l_abs_min)
50 |
51 |
--------------------------------------------------------------------------------
/m/konect_roc_curve.m:
--------------------------------------------------------------------------------
1 | %
2 | % Plot one ROC curve. This calls "plot", so "hold" must be used when
3 | % aggregating multiple ROC curves.
4 | %
5 | % The ROC curve is plotted in the square [0 1]^2. No axes are set.
6 | %
7 | % PARAMETERS
8 | % target (n*1) 0/1 vector of true values
9 | % score (n*1) Predicted scores to evaluate
10 | %
11 | % RESULT
12 | % h Handle of the plot
13 | % m Number of 1s in TARGET
14 | % n length of both input vectors
15 | %
16 |
17 | function [h m n] = konect_roc_curve(target, score, color, line_style, line_width)
18 |
19 | resolution = 150; % Number of points on the plot; determines the
20 | % resolution
21 |
22 | n = size(target,1); % Number of results
23 | m = sum(target); % Number of true results
24 |
25 | % Randomize order to avoid effects of pre-ordered vectors
26 | ii = randperm(n);
27 | target = target(ii);
28 | score = score(ii);
29 |
30 | % True/false vector of results
31 | [s i] = sort(-score);
32 | r = target(i);
33 |
34 | range = 1:round(n/resolution):n;
35 | vals = cumsum(r);
36 |
37 | x = [0 range] / n;
38 | y = [0; vals(range)] / m;
39 |
40 | h = plot(x, y, line_style, 'LineWidth', line_width, 'Color', color);
41 |
--------------------------------------------------------------------------------
/m/konect_statistic_bip.m:
--------------------------------------------------------------------------------
1 | %
2 | % Bipartivity [bip]. This is defined as
3 | %
4 | % [bip] = 1 - [nonbip] = | λ_min[A] / λ_max[A] |,
5 | %
6 | % i.e., the absolute ratio between the smallest and largest eigenvalue
7 | % of the adjacency matrix' eigenvalues. Note: "smallest eigenvalue"
8 | % refers here to the actual smallest eigenvalue, i.e., the one nearest
9 | % to minus infinity, rather than the one nearest to zero.
10 | %
11 | % The highest possible value is +1 for bipartite networks (and only for
12 | % them). The infinimum of possible values is zero, but zero itself
13 | % cannot be attained for nonempty loopless graphs. This can be proved
14 | % easily by noticing that the trace of the adjacency matrix of a
15 | % loopless graph is zero, and therefore there has to be at least one
16 | % negative eigenvalue. Note that a graph containing only loops,
17 | % including a graph containing no edges at all, has a [bip] value of
18 | % zero.
19 | %
20 | % In general, measures of non-bipartivity are prefered over measures of
21 | % bipartivity in KONECT.
22 | %
23 | % GROUP: square
24 | %
25 |
26 | function values = konect_statistic_bip(A, format, weights)
27 |
28 | error unimplemented here
29 |
30 |
--------------------------------------------------------------------------------
/m/konect_statistic_lconflict.m:
--------------------------------------------------------------------------------
1 | %
2 | % [experimental]
3 | %
4 | % PARAMETERS
5 | % A Adjacency or biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % value
11 | %
12 | % GROUP: negative
13 | %
14 |
15 | function values = konect_statistic_lconflict(A, format, weights)
16 |
17 | opts.disp = 2;
18 |
19 | consts = konect_consts();
20 |
21 | if ~(weights == consts.SIGNED | weights == consts.MULTISIGNED | ...
22 | weights == consts.WEIGHTED | weights == consts.MULTIWEIGHTED)
23 | error
24 | end
25 |
26 | % Make A symmetric
27 | if format == consts.SYM
28 | A = A + A';
29 | elseif format == consts.ASYM
30 | A = A + A';
31 | elseif format == consts.BIP
32 | [n1 n2] = size(A);
33 | A = [ sparse(n1, n1), A; A', sparse(n2, n2) ];
34 | else
35 | error
36 | end
37 |
38 | % Round to +1/-1
39 | A = (A > 0) - (A < 0);
40 |
41 | % Absolute value
42 | A_abs = double(A ~= 0);
43 |
44 | [n n2] = size(A)
45 |
46 | % Laplacians
47 | D = spdiags(sum(A_abs)', [0], n, n);
48 | L = D - A;
49 | L_abs = D - A_abs;
50 |
51 | l_max = eigs(L, 1, 'lm', opts);
52 | l_abs_max = eigs(L_abs, 1, 'lm', opts);
53 |
54 | values = l_abs_max / l_max;
55 |
56 |
57 |
--------------------------------------------------------------------------------
/m/konect_statistic_reciprocity.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the reciprocity of a directed network, i.e., the proportion
3 | % of edges for which an edge in the opposite direction exists.
4 | %
5 | % This statistic is always between zero and one. Zero denotes a
6 | % network in which no directed edge is reciprocated; one denotes a
7 | % network in which all edges are reciprocated.
8 | %
9 | % Each edge is counted separately, which means that if two edges are
10 | % opposite to each other, they count twice. For instance, the graph
11 | %
12 | % /------------->
13 | % * -------> * *
14 | % <------------/
15 | %
16 | % has recoprocity 2/3.
17 | %
18 | % Multiple edges are ignored.
19 | %
20 | % PARAMETERS
21 | % A Adjacency matrix of a directed graph
22 | % format
23 | % weights
24 | %
25 | % RESULT
26 | % value The algebraic asymmetry
27 | %
28 | % GROUP: asym
29 | %
30 |
31 | function value = konect_statistic_reciprocity(A, format, weights)
32 |
33 | consts = konect_consts();
34 |
35 | if format ~= consts.ASYM
36 | error('*** reciprocity is only defined for directed networks');
37 | end
38 |
39 | A = konect_absx(A);
40 |
41 | value = nnz(A & A') / nnz(A);
42 |
--------------------------------------------------------------------------------
/m/konect_statistic_power2.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the power law exponent using the correct and slow method [power2].
3 | %
4 | % PARAMETERS
5 | % A Adjacency/biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values Column vector of values
11 | % for undirected graph: power_a, xmin_a, L_a
12 | % for directed graph: power_a, ..., power_u, ..., power_v, ...
13 | % for bipartite graph: power_a, ..., power_u, ..., power_v, ...
14 | %
15 |
16 | function values = konect_statistic_power2(A, format, weights)
17 |
18 | consts = konect_consts();
19 |
20 | values = [];
21 |
22 | % Ignore edge weights
23 | A = (A ~= 0);
24 |
25 | if format == consts.SYM | format == consts.ASYM
26 | nvalues = konect_power_law_range(A | A', weights);
27 | values = [ values ; nvalues];
28 | else % BIP
29 | [m n] = size(A);
30 | nvalues = konect_power_law_range([sparse(m,m) A; A' sparse(n,n)], weights);
31 | values = [ values ; nvalues];
32 | end
33 |
34 | if format == consts.ASYM | format == consts.BIP
35 | nvalues = konect_power_law_range(A, weights);
36 | values = [values ; nvalues];
37 | nvalues = konect_power_law_range(A', weights);
38 | values = [values ; nvalues];
39 | end
40 |
--------------------------------------------------------------------------------
/test/test_decompose.m:
--------------------------------------------------------------------------------
1 | %
2 | % Test decompositions.
3 | %
4 |
5 | cd ..;
6 |
7 | addpath ../analysis/lib/matlab_bgl/
8 |
9 |
10 | decompositions = { ...
11 | 'sym', 'sym-n', 'lap', 'stoch2', ...
12 | 'svd', 'svd-n', ...
13 | 'diag', 'skew', 'herm', ...
14 | 'lapd', 'lapd-n', 'lapherm', ...
15 | 'stoch1', 'diag-n', 'back', ...
16 | 'dedicom1u', 'dedicom1v', 'dedicom2', 'dedicom3', 'dedicom3-0', 'takane', ...
17 | };
18 |
19 |
20 | opts.disp = 2;
21 |
22 | consts = konect_consts();
23 |
24 | % This is directed and strongly connected
25 | A = [0 1 0 0 0; 0 0 1 1 0; 0 0 0 1 0; 0 1 1 0 2; 1 0 0 0 0];
26 | r = 3;
27 |
28 | for i = 1 : length(decompositions)
29 | decomposition = decompositions{i};
30 |
31 | [u d v d_u d_v] = konect_decomposition(decomposition, A, r, consts.ASYM, consts.POSITIVE, opts);
32 | end
33 |
34 | %
35 | % Sparse random graph
36 | %
37 | n = 1000; d = 20; A = sprand(n, n, d/n);
38 | r = 7;
39 |
40 | for i = 1 : length(decompositions)
41 |
42 | decomposition = decompositions{i};
43 |
44 | [u d v d_u d_v] = konect_decomposition(decomposition, A, r, consts.ASYM, ...
45 | consts.POSITIVE, opts);
46 | end
47 |
--------------------------------------------------------------------------------
/test/test_statistic_squares.m:
--------------------------------------------------------------------------------
1 | %
2 | % Test the function that counts the number of squares in a
3 | % network.
4 | %
5 |
6 | cd ..
7 |
8 | consts = konect_consts();
9 |
10 | %
11 | % Test graphs
12 | %
13 | Ats.edge = [ 1 2 ];
14 | Ats.twostar = [ 1 2; 2 3 ];
15 | Ats.triangle = [ 1 2; 2 3; 3 1 ];
16 | Ats.path3 = [ 1 2 ; 2 3 ; 3 4 ];
17 | Ats.square = [ 1 2; 2 3; 3 4; 4 1];
18 | Ats.k4 = [ 1 2 ; 1 3 ; 1 4; 2 3 ; 2 4; 3 4];
19 | Ats.foursquare = [1 2; 1 3; 2 3; 2 6; 3 4; 3 7; 4 5; 4 8; 5 8; 5 9; ...
20 | 6 7; 7 8; 8 9; 8 10; 9 10]
21 | Ats.k5 = [ 1 2; 1 3; 1 4; 1 5; 2 3; 2 4; 2 5; 3 4; 3 5; 4 5];
22 |
23 | %
24 | % Correct number of squares in each
25 | %
26 | counts.edge = 0;
27 | counts.twostar = 0;
28 | counts.triangle = 0;
29 | counts.path3 = 0;
30 | counts.square = 1;
31 | counts.k4 = 3;
32 | counts.foursquare = 4;
33 | counts.k5 = 15;
34 |
35 | names = fieldnames(Ats);
36 |
37 | for i = 1 : length(names)
38 | name = names{i}
39 | At = Ats.(name);
40 | count = counts.(name);
41 |
42 | n = max(max(At));
43 | A = full(sparse(At(:,1), At(:,2), 1, n, n));
44 |
45 | values = konect_statistic_squares(A, consts.SYM, consts.UNWEIGHTED);
46 |
47 | assert(values(1) == count);
48 | end
49 |
--------------------------------------------------------------------------------
/m/konect_power_law_flat.m:
--------------------------------------------------------------------------------
1 | %
2 | % Estimate the power-law exponent of an adjacency matrix.
3 | %
4 | % For asymmetric matrices, this function computes the left
5 | % distribution, i.e. based on row sums.
6 | %
7 | % Use konect_power_law_flat_vector() to estimate the power-law
8 | % exponent of a given vector.
9 | %
10 | % RESULT
11 | % gamma Power-law exponent
12 | % sigma Expected statistical error on gamma
13 | %
14 | % PARAMETERS
15 | % A Adjacency matrix; weights are ignored by
16 | % default. Note: a column vector may be passed, too.
17 | % weights (optional) How to interpret weights in A:
18 | % UNWEIGHTED: weights are ignored (default)
19 | % POSITIVE: weights are interpreted as multiple
20 | % edges (they must be positive)
21 | % others: weights are ignored
22 | %
23 |
24 | function [gamma sigma] = konect_power_law_flat(A, weights)
25 |
26 | consts = konect_consts();
27 |
28 | if ~exist('weights', 'var')
29 | weights = consts.UNWEIGHTED;
30 | end
31 |
32 | if weights ~= consts.POSITIVE
33 | A = A ~= 0;
34 | end
35 |
36 | degrees = sum(A,2);
37 |
38 | assert(sum(degrees < 0) == 0);
39 |
40 | [gamma sigma] = konect_power_law_flat_vector(degrees);
41 |
42 | assert(imag(gamma) == 0);
43 | assert(imag(sigma) == 0);
44 |
--------------------------------------------------------------------------------
/m/konect_predict_neib3.m:
--------------------------------------------------------------------------------
1 | %
2 | % Neighborhood-based link predictions, using paths of length
3 | % 3. Sensible for all networks, including bipartite ones.
4 | %
5 | % Always ignore edge direction and weights.
6 | %
7 | % PARAMETERS
8 | % type 'path3'
9 | % A (m*n) Adjacency/biadjacency matrix
10 | % T (e*2) Indexes of vertex pairs; typically this is the test set
11 | % format How to interpret A
12 | %
13 | % RESULTS
14 | % prediction (e*1) prediction values
15 | %
16 |
17 | function [prediction] = konect_predict_neib3(type, A, T, format)
18 |
19 | consts = konect_consts();
20 |
21 | e = size(T, 1);
22 |
23 | if format == consts.BIP
24 | T(:,2) = T(:,2) + size(A,1);
25 | end
26 |
27 | A = konect_matrix('symfull', A, format);
28 |
29 | w_i = sum(konect_absx(A), 2);
30 | w_j = sum(konect_absx(A), 2);
31 |
32 | %
33 | % Prediction
34 | %
35 | prediction = zeros(e, 1);
36 |
37 | [k, from, to] = konect_fromto(1, e, 100);
38 |
39 | t = konect_timer(e);
40 |
41 | for l = 1:k
42 |
43 | t = konect_timer_tick(t, from(l));
44 |
45 | range = from(l) : to(l);
46 |
47 | A_i = A(T(range, 1), :) * A;
48 | A_j = A(T(range, 2), :);
49 | prediction(range) = sum(A_i .* A_j, 2);
50 | end
51 |
52 | konect_timer_end(t);
53 |
54 |
--------------------------------------------------------------------------------
/m/konect_network_rank_abs.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the absolute network rank. The absolute network rank (formerly called
3 | % "effective rank") equals the sum of absolute eigenvalues divided by
4 | % the absolute value of the largest eigenvalue (in absolute value).
5 | %
6 | % For Laplacian decomposition types, the inverse nonzero eigenvalues are
7 | % used, and zero eigenvalues are ignored. For asymmetric cases, the
8 | % singular values are used.
9 | %
10 | % The parameter DECOMPOSITION is only used to distinguish between
11 | % adjacency and Laplacian decompositions.
12 | %
13 | % PARAMETERS
14 | % dd (r*1) Eigenvalue/singular values
15 | % decomposition (optional) Decomposition type, as in
16 | % decompose.m; default is 'svd'
17 | %
18 | % RESULT
19 | % rank The network rank
20 | %
21 |
22 | function rank = konect_network_rank_abs(dd, decomposition)
23 |
24 | if nargin < 2
25 | decomposition = 'svd';
26 | end
27 |
28 | if sum(size(dd)) == 0
29 | rank = NaN;
30 | else
31 |
32 | dd = abs(dd);
33 |
34 | if size(regexp(decomposition, '^lap'))
35 | dd(dd < 1e-8) = 0;
36 | dd = dd .^ -1
37 | dd(dd ~= dd) = 0;
38 | dd(dd == Inf) = 0;
39 | end
40 |
41 | rank = sum(dd) / max(dd);
42 |
43 | end
44 |
--------------------------------------------------------------------------------
/m/konect_statistic_avgdegreeasym.m:
--------------------------------------------------------------------------------
1 | %
2 | % Directed average degrees: This is only useful when nodes with zero
3 | % degree are ignored, which is done below.
4 | %
5 | % PARAMETERS
6 | % A Adjacency matrix
7 | % format
8 | % weights
9 | %
10 | % RESULT
11 | % values Column vector of results
12 | % [1] average out+in degree (ignoring 0-nodes)
13 | % [2] average outdegree (ignoring 0-nodes)
14 | % [3] average indegree (ignoring 0-nodes)
15 | % [4] average out+in degree (not ignoring 0-nodes)
16 | % [5] average outdegree (not ignoring 0-nodes)
17 | % [6] average indegree (not ignoring 0-nodes)
18 | %
19 | % GROUP: asym
20 | %
21 |
22 | function values = konect_statistic_avgdegreeasym(A, format, weights)
23 |
24 | consts = konect_consts();
25 |
26 | assert(format == consts.ASYM);
27 |
28 | assert(size(A,1) == size(A,2));
29 |
30 | n = size(A,1)
31 |
32 | m = nnz(A)
33 |
34 | if weights ~= consts.POSITIVE
35 | A = (A ~= 0);
36 | end
37 |
38 | d_out = sum(A, 2);
39 | d_in = sum(A, 1)';
40 |
41 | d = d_out + d_in;
42 |
43 | nz = nnz(d)
44 | nz_out = nnz(d_out)
45 | nz_in = nnz(d_in)
46 |
47 | values = [ 2 * m / nz ; ...
48 | m / nz_out ; ...
49 | m / nz_in ; ...
50 | 2 * m / n ; ...
51 | m / n ; ...
52 | m / n ]
53 |
54 |
--------------------------------------------------------------------------------
/m/konect_predict_cosine.m:
--------------------------------------------------------------------------------
1 | %
2 | % Cosine similarity prediction. This function support complex matrices
3 | % A as input. The file konect_predict_neib.m also includes the cosine
4 | % similarity, but does not support complex vectors.
5 | %
6 | % RESULT
7 | % prediction (e*1) Prediction scores
8 | %
9 | % PARAMETERS
10 | % A (n*n) The adjacency matrix
11 | % T (e*2) Indexes of vertex pairs to compute
12 | % format Format
13 | % Pass consts.BIP to not perform any preprocessing
14 | %
15 |
16 | function [prediction] = konect_predict_cosine(A, T, format)
17 |
18 | consts = konect_consts();
19 |
20 | if format == consts.SYM
21 | A = A + A';
22 | elseif format == consts.ASYM
23 | [ m n ] = size(A);
24 | A = [ A ; A' ];
25 | end
26 |
27 | [m n] = size(A);
28 |
29 | w = sum(conjx(A) .* A, 2) .^ -0.5;
30 | w(isinf(w)) = 0;
31 | A = spdiags(w, [0], m,m) * A;
32 |
33 | e = size(T, 1)
34 |
35 | prediction = zeros(e, 1);
36 |
37 | [k, from, to] = konect_fromto(1, e, 1000);
38 |
39 | for i = 1:k
40 |
41 | range = from(i) : to(i);
42 | fprintf(1, 'range(1) = %d\n', range(1));
43 |
44 | i = T(range, 1);
45 | j = T(range, 2);
46 |
47 | prediction_i = real(sum(conjx(A(i,:)) .* A(j,:), 2));
48 |
49 | prediction(range, 1) = prediction_i;
50 | end
51 |
52 |
--------------------------------------------------------------------------------
/m/konect_significance_image.m:
--------------------------------------------------------------------------------
1 | %
2 | % Convert p-value and difference matrices to an image.
3 | %
4 | % PARAMETERS
5 | % P (m*m) Pairwise p-values
6 | % D (m*m) Pairwise differences
7 | % p_threshold Threshold under which to show p-values (e.g., 0.05)
8 | % maxdiff Difference to show in maximal style
9 | %
10 | % RESULT
11 | % I (m*m*3) Image matrix/tensor
12 | %
13 |
14 | function I = konect_significance_image(P, D, p_threshold, maxdiff)
15 |
16 | m = size(P,1);
17 |
18 | % The larger the values, the more detail is visible
19 | gamma_D = 0.6;
20 | gamma_P = 1.3;
21 |
22 | % Hue (m*m)
23 | % Use red for "AUC(i) < AUC(j)", green for "AUC(i) > AUC(j)" and
24 | % yellow for equal AUCs.
25 | D_norm = max(-1, min(+1, D / maxdiff)); % between -1 and +1
26 | D_norm = sign(D_norm) .* (abs(D_norm) .^ gamma_D);
27 | color_H = (-D_norm + 2) / 3;
28 |
29 | % Saturation (m*m)
30 | color_S = (1 - min(1, P / p_threshold)) .^ gamma_P;
31 |
32 | % I (m*m*3) is the image matrix. For each (i,j), I(i,j,:) is the RGB
33 | % vector for the area representing the relation between methods i and
34 | % j.
35 | for j = 1 : m % j is the column of all matrices
36 | C = hsv2rgb([ color_H(:,j) color_S(:,j) ones(m,1) ]);
37 | I(1:m, j, 1) = C(:,1);
38 | I(1:m, j, 2) = C(:,2);
39 | I(1:m, j, 3) = C(:,3);
40 | end
41 |
42 |
--------------------------------------------------------------------------------
/m/konect_statistic_nonbip.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the non-bipartivity measure [nonbip]. It is defined as
3 | %
4 | % 1 - | lambda_min[\bar A] / lambda_max[\bar A] |,
5 | %
6 | % where lambda_min and lambda_max are the smallest and largest
7 | % eigenvalue of the unweighted adjacency matrix \bar A.
8 | %
9 | % GROUP: square
10 | %
11 | % PARAMETERS
12 | % A Half-adjacency matrix
13 | % format Format of the network
14 | % weights Weights in the network
15 | %
16 | % RESULTS
17 | % values Column vector of results
18 | % [1] non-bipartivity value
19 | % [2] lambda_min
20 | % [3] lambda_max
21 | %
22 |
23 | function values = konect_statistic_nonbip(A, format, weights)
24 |
25 | opts.disp = 2;
26 | opts.issym = 1;
27 |
28 | consts = konect_consts();
29 |
30 | if format == consts.BIP
31 | error '*** NONBIP is trivially zero for bipartite networks';
32 | end
33 |
34 | n = size(A, 1);
35 |
36 | if weights ~= consts.POSITIVE
37 | A = A ~= 0;
38 | end
39 |
40 | lambda_max = eigs(@(x)(A * x + A' * x), n, 1, 'la', opts)
41 | lambda_min = eigs(@(x)(A * x + A' * x), n, 1, 'sa', opts)
42 |
43 | nonbip = 1 - abs( lambda_min / lambda_max )
44 |
45 | if nonbip < 0
46 | fprintf(1, 'Warning: negative value of NONBIP, rounding to zero\n');
47 | nonbip = 0;
48 | end
49 |
50 | values = [ nonbip; lambda_min; lambda_max ];
51 |
--------------------------------------------------------------------------------
/m/konect_statistic_assortativity.m:
--------------------------------------------------------------------------------
1 | %
2 | % The assortativity \rho of a network, i.e., the Pearson correlation
3 | % coefficient between the degree of two connected nodes.
4 | %
5 | % Ignore multiplicities, loops and edge directions.
6 | %
7 | % The value of \rho is NaN, when the corresponding Pearson
8 | % correlation is not defined, e.g., when the graph is regular.
9 | %
10 | % PARAMETERS
11 | % A
12 | % format
13 | % weights
14 | % opts (optional)
15 | %
16 | % RESULT
17 | % values Column vector of results
18 | % [1] assortativity \rho
19 | % [2] p-value
20 | %
21 |
22 | function values = konect_statistic_assortativity(A, format, weights, opts)
23 |
24 | consts = konect_consts();
25 |
26 | if format == consts.SYM | format == consts.ASYM
27 |
28 | n = size(A, 1);
29 |
30 | A = A ~= 0;
31 | A = A | A';
32 | A = A - spdiags(diag(A), [0], n, n);
33 |
34 | d = sum(A,2);
35 |
36 | [x y z] = find(A);
37 |
38 | p = full(d(x));
39 | q = full(d(y));
40 |
41 | [rho pvalue] = corr(p, q)
42 |
43 | elseif format == consts.BIP
44 |
45 | A = A ~= 0;
46 |
47 | d1 = sum(A, 2);
48 | d2 = sum(A, 1)';
49 |
50 | [x y z] = find(A);
51 |
52 | p = full(d1(x));
53 | q = full(d2(y));
54 |
55 | [rho pvalue] = corr(p, q)
56 |
57 | end
58 |
59 | values = [ rho; pvalue ];
60 |
--------------------------------------------------------------------------------
/m/konect_normalize_matrix.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the matrix normalization. Given a real rectangular matrix
3 | % A, its normalization is
4 | %
5 | % D_u^-0.5 * A * D_v^-0.5
6 | %
7 | % with D_u and D_v and the left and right diagonal degree matrices
8 | % defined by
9 | %
10 | % D_u(i,i) = sum_{j = 1:m} | A(i,j) |
11 | % D_v(i,i) = sum_{j = 1:n} | A(j,i) |
12 | %
13 | % i.e., D_u and D_v contain the sum of absolute values of rows and
14 | % columns.
15 | %
16 | % This type of normalization is used to contruct many types of
17 | % characteristic graph matrices, for instance the normalized
18 | % adjacency matrix from the adjacency matrix.
19 | %
20 | % The resulting matrix can be denormalized back (e.g., after
21 | % additional changes to it) by using the output parameters T_u and
22 | % T_v.
23 | %
24 | % RESULT
25 | % B (m*n) The normalized matrix; sparse
26 | % T_u (m*m) The matrix D_u^-0.5; sparse
27 | % T_v (n*n) The matrix D_v^-0.5; sparse
28 | %
29 | % PARAMETERS
30 | % A (m*n) The non-normalized matrix; sparse
31 | %
32 |
33 | function [B T_u T_v] = konect_normalize_matrix(A)
34 |
35 | [m,n] = size(A);
36 | A_abs = konect_absx(A);
37 |
38 | T_u = sum(A_abs,2) .^ -.5;
39 | T_v = sum(A_abs,1)' .^ -.5;
40 | T_u(T_u ~= T_u) = 1;
41 | T_v(T_v ~= T_v) = 1;
42 | T_u = spdiags(T_u, [0], m,m);
43 | T_v = spdiags(T_v, [0], n,n);
44 |
45 | B = T_u * A * T_v;
46 |
47 |
--------------------------------------------------------------------------------
/test/test_connect.m:
--------------------------------------------------------------------------------
1 | cd ..
2 | addpath ../analysis/lib/matlab_bgl/
3 |
4 | %
5 | % Largest strongly connected component
6 | %
7 |
8 | ccss = [0 1 2 1 3 2 1 4 3 2 2 5 4 3 3 2];
9 |
10 | %
11 | as{1} = [];
12 |
13 | % 1
14 | as{2} = [0];
15 |
16 | % 2
17 | as{3} = [ 0 1 ; 1 0 ];
18 |
19 | % 1 + 1
20 | as{4} = [ 0 0; 0 0];
21 |
22 | % 3
23 | as{5} = [ 0 1 0; 0 0 1; 1 0 0];
24 |
25 | % 2 + 1
26 | as{6} = [ 0 1 0; 1 0 0; 0 0 0];
27 |
28 | % 1 + 1 + 1
29 | as{7} = [ 0 0 0; 0 0 0; 0 0 0];
30 |
31 | % 4
32 | as{8} = [0 1 0 0; 0 0 1 0; 0 0 0 1; 1 0 0 0];
33 |
34 | % 3 + 1
35 | as{9} = [0 1 0 0; 0 0 1 0; 1 0 0 0; 0 0 0 0];
36 |
37 | % 2 + 2
38 | as{10} = [0 1 0 0; 1 0 0 0; 0 0 0 1; 0 0 1 0];
39 |
40 | % 2 + 1 + 1
41 | as{11} = [0 1 0 0; 1 0 0 0; 0 0 0 0; 0 0 0 0];
42 |
43 | % 5
44 | as{12} = [0 1 0 0 0; 0 0 1 0 0; 0 0 0 1 0; 0 0 0 0 1; 1 0 0 0 0];
45 |
46 | % 4 + 1
47 | as{13} = [0 1 0 0 0; 0 0 1 0 0; 0 0 0 1 0; 1 0 0 0 0; 0 0 0 0 0];
48 |
49 | % 3 + 2
50 | as{14} = [0 1 0 0 0; 0 0 1 0 0; 1 0 0 0 0; 0 0 0 0 1; 0 0 0 1 0];
51 |
52 | % 2 + 3
53 | as{15} = [0 1 0 0 0; 1 0 0 0 0; 0 0 0 1 0; 0 0 0 0 1; 0 0 1 0 0];
54 |
55 | % 2 ~ 1 1 1
56 | as{16} = [0 1 0 0 0; 1 0 1 1 1; 0 0 0 0 0; 0 0 0 0 0; 0 0 0 0 0];
57 |
58 | for k = 1 : prod(size(as))
59 |
60 | k
61 |
62 | a = sparse(as{k});
63 |
64 | v = connect_strong(a);
65 |
66 | if sum(v) ~= ccss(k)
67 | error;
68 | end
69 |
70 | end
71 |
--------------------------------------------------------------------------------
/m/konect_connect_bipartite_nobgl.m:
--------------------------------------------------------------------------------
1 | %
2 | % Find biggest connected component of bipartite graph.
3 | %
4 | % PARAMETERS
5 | % a Biadjacency matrix of bipartite graph (i.e.,
6 | % [0 a;a' 0] is the actual adjacency matrix.)
7 | %
8 | % RESULT
9 | % v 0/1 vector of left nodes in connected component
10 | % w 0/1 vector of right nodes in conneced component
11 | %
12 | % Returns v=[] and w=[] when no largest component is found
13 | %
14 |
15 | function [v, w] = konect_connect_bipartite_nobgl(a)
16 |
17 | [mm,nn] = size(a)
18 |
19 | % Remove empty rows and columns
20 | al = a ~= 0;
21 | vv = find(sum(al,2));
22 | ww = find(sum(al,1)');
23 | al = al(vv,ww);
24 |
25 | [m,n] = size(al);
26 |
27 | if m == 0 | n == 0
28 | v = [];
29 | w = [];
30 | return;
31 | end
32 |
33 | count = 0
34 | maxc = .1 * m
35 | ite = 1;
36 |
37 | while count < maxc
38 |
39 | if ite > 15
40 | error 'No big component';
41 | end;
42 |
43 | v = zeros(m,1);
44 | v(1+floor(rand * m)) = 1;
45 | count_last = 0;
46 | count = 1;
47 | rad = 0;
48 |
49 | while count ~= count_last
50 | count_last = count;
51 | v = logical(al * (al' * v) + v);
52 | count = sum(v);
53 | rad = rad + 1;
54 | end;
55 |
56 | rad
57 | ite = ite + 1
58 |
59 | end;
60 |
61 | w = logical(al' * double(v));
62 |
63 | vi = vv(v);
64 | wi = ww(w);
65 |
66 | v = zeros(mm,1);
67 | w = zeros(nn,1);
68 |
69 | v(vi,:) = 1;
70 | w(wi,:) = 1;
71 |
--------------------------------------------------------------------------------
/m/konect_decomposition_takane.m:
--------------------------------------------------------------------------------
1 | %
2 | % Code to compute the DEDICOM by Yoshio Takane. Adapted to large sparse
3 | % matrices by Jérôme Kunegis.
4 | %
5 | % RESULT
6 | % U (n*r) "Eigenvectors"
7 | % D (r*r) Non-diagonal "eigenvalue" matrix
8 | %
9 | % PARAMETERS
10 | % A (n*n) Square adjacency matrix
11 | % r Rank
12 | % opts (optional) Passed to eigs()
13 | %
14 |
15 | function [U D] = konect_decomposition_takane(A, r, opts)
16 |
17 | itmax = 2000;
18 | conv = 1e-8;
19 |
20 | if ~exist('opts', 'var')
21 | opts = struct();
22 | end
23 |
24 | % Initial estimate
25 | [u0 d0] = eigs(@(x)(A' * (A * x) + A * (A' * x)), size(A,1), r, 'lm', opts);
26 |
27 | st = norm(double(A), 'fro') ^ 2;
28 |
29 | U = u0;
30 | au = A * U;
31 | atu = A' * U;
32 | D = U' * au;
33 | so = norm(D, 'fro') ^ 2;
34 | i = 0;
35 |
36 | G = au * D' + atu * D;
37 | M2 = G - U * (U' * G);
38 | v = norm(M2, 'fro');
39 |
40 | while i < itmax & conv < v
41 | i = i + 1;
42 | utemp = au * D' + atu * D;
43 | [un d2 v2] = svd(utemp, 'econ');
44 | au = A * un;
45 | atu = A' * un;
46 | D = un' * au;
47 | sn = norm(D, 'fro') ^ 2 / 2;
48 |
49 | G = au * D' + atu * D;
50 | M2 = G - U * (U' * G);
51 | v = sqrt(norm(M2, 'fro')^2 / sum(size(M2))) ;
52 |
53 | if mod(i, 20) == 0
54 | fprintf(1, '[%d] norm = %g\n', i, v);
55 | end
56 |
57 | so = sn;
58 | U = un;
59 | end
60 |
61 | [U D] = konect_order_dedicom(U, D);
62 |
--------------------------------------------------------------------------------
/m/konect_pagerank.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute PageRank. This functions supports nodes with zero
3 | % outdegree, and teleportation.
4 | %
5 | % The given graph is directed and may be weighted. To use with an
6 | % undirected graph (with is trivial because the PageRank values are then
7 | % proportional to a power of the degree), pass a symmetric adjacency
8 | % matrix.
9 | %
10 | % This implementation avoids using O(n^2) memory, and needs only O(m)
11 | % memory instead, where m is the number of edges in the graph.
12 | %
13 | % PARAMETERS
14 | % A (n*n) Adjacency matrix of the directed graph
15 | % alpha Amount of teleportation to do, e.g., 0.2. Zero denotes
16 | % no teleportation
17 | % opts (optional) Options to eigs()
18 | %
19 | % RESULTS
20 | % u (n*1) PageRank vector
21 | %
22 |
23 | function [u] = konect_pagerank(A, alpha, opts)
24 |
25 | if ~exist('opts', 'var')
26 | opts.disp = 2;
27 | end
28 |
29 | [m n] = size(A);
30 | assert(m == n);
31 |
32 | d_out = full(sum(A, 2));
33 |
34 | d_0 = (d_out == 0);
35 |
36 | d_out_plus = d_out .^ -1;
37 |
38 | f = find(d_0);
39 |
40 | d_out_plus(f)= 0;
41 |
42 | P_pos = (1 - alpha) * spdiags(d_out_plus, [0], n, n) * A;
43 |
44 | J_1 = (1 - alpha) / n * ones(1, n);
45 | J_2 = ones(n, 1);
46 | J_3 = alpha / n * ones(1, n);
47 |
48 | [u, lambda] = eigs(@(v)(v' * P_pos + (v' * d_0) * J_1 + (v' * J_2) * J_3), ...
49 | n, 1, 'lm', opts);
50 |
51 | u = u * sign(sum(u));
52 |
--------------------------------------------------------------------------------
/m/konect_statistic_inoutassort.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the in/outdegree assortativity, i.e., the Pearson correlation
3 | % between the in-degree and the out-degree in directed networks.
4 | %
5 | % GROUP: asym
6 | %
7 | % RESULT
8 | % [1] Pearson correlation of log(d+1)
9 | % [3] Pearson correlation of log(d), excluding nodes with d=0
10 | % [5] Pearson correlation of d
11 | % [7] Rank correlation of d (Kendall)
12 | % [9] Rank correlation of d (Spearman)
13 | % [2,4,6,8,10] Corresponding p-values
14 | %
15 |
16 | function values = konect_statistic_inoutassort(A, format, weights)
17 |
18 | consts = konect_consts();
19 |
20 | assert(size(A,1) == size(A,2));
21 |
22 | n = size(A,1);
23 |
24 | if weights == consts.POSITIVE | weights == consts.UNWEIGHTED
25 | % Keep the values
26 | else
27 | A = (A ~= 0);
28 | end
29 |
30 | d_out = full(sum(A,2));
31 | d_in = full(sum(A,1)');
32 |
33 | values = zeros(10,1) * NaN;
34 |
35 | [r, p] = corr(log(d_out + 1), log(d_in + 1))
36 | values(1) = r; values(2) = p;
37 |
38 | d_out_log = log(d_out); d_out_log(d_out == 0) = NaN;
39 | d_in_log = log(d_in); d_in_log(d_in == 0) = NaN;
40 | [r, p] = corr(d_out_log, d_in_log, 'rows', 'complete');
41 | values(3) = r; values(4) = p;
42 |
43 | [r, p] = corr(d_out, d_in)
44 | values(5) = r; values(6) = p;
45 |
46 | [r, p] = corr(d_out, d_in, 'type', 'Kendall');
47 | values(7) = r; values(8) = p;
48 |
49 | [r, p] = corr(d_out, d_in, 'type', 'Spearman');
50 | values(9) = r; values(10) = p;
51 |
--------------------------------------------------------------------------------
/m/konect_spconvert.m:
--------------------------------------------------------------------------------
1 | %
2 | % Customized version of spconvert() that also supports the two-column
3 | % format for sparse 0/1 matrices. If only two columns are given,
4 | % create a sparse logical matrix.
5 | %
6 | % PARAMETERS
7 | % T The r*(3 or 2) matrix of row indexes, column indexes and
8 | % (optionally) weights; additional columns beyond the
9 | % third are ignored
10 | % n1,n2 (optional) Number of rows and columns. May be less than
11 | % actual. If not passed, the size is inferred from T.
12 | %
13 | % RESULT
14 | % A (n1*n2) sparse matrix
15 | %
16 |
17 | function [A] = konect_spconvert(T, n1, n2)
18 |
19 | consts = konect_consts();
20 |
21 | if ~exist('n1', 'var'), n1 = 0; end;
22 | if ~exist('n2', 'var'), n2 = 0; end;
23 |
24 | if size(T,2) == 2
25 |
26 | % The matrix is a 0/1 matrix. However if there are multiple entries
27 | % this doesn't work, so we use try-catch.
28 |
29 | if n1 ~= 0 & n2 ~= 0
30 | try
31 | A = sparse(T(:,1), T(:,2), logical(1), n1, n2);
32 | catch exception
33 | A = sparse(T(:,1), T(:,2), 1, n1, n2);
34 | end
35 | else
36 | try
37 | A = sparse(T(:,1), T(:,2), logical(1));
38 | catch exception
39 | A = sparse(T(:,1), T(:,2), 1);
40 | end
41 | end
42 | else
43 | if n1 ~= 0 & n2 ~= 0
44 | A = sparse(T(:,1), T(:,2), T(:,3), n1, n2);
45 | else
46 | A = sparse(T(:,1), T(:,2), T(:,3));
47 | end
48 | end
49 |
--------------------------------------------------------------------------------
/m/konect_statistic_tconflict.m:
--------------------------------------------------------------------------------
1 | %
2 | % The triadic conflict, i.e., the proportion of unbalanced triangles.
3 | %
4 | % GROUP: squarenegative
5 | %
6 |
7 | function [value] = konect_statistic_tconflict(A, format, weights)
8 |
9 | assert(size(A, 1) == size(A, 2));
10 |
11 | n = size(A, 1);
12 |
13 | A = konect_signx(A+A');
14 |
15 | % Remove diagonal elements
16 | [x y z] = find(A);
17 | z(x == y) = 0;
18 | A = sparse(x, y, z, n, n);
19 |
20 | sum_neg = 0;
21 | sum_tot = 0;
22 |
23 | t = konect_timer(n);
24 |
25 | for u = 1:n
26 |
27 | t = konect_timer_tick(t, u);
28 |
29 | % Vectors of neighbors
30 | ao = A(u, :)';
31 | ai = A(:, u);
32 |
33 | % Indexes of positive and negative neighbors
34 | nebs_op = find(ao > 0);
35 | nebs_on = find(ao < 0);
36 | nebs_ip = find(ai > 0);
37 | nebs_in = find(ai < 0);
38 |
39 | % Submatrices of relationships between neighbors
40 | A_pp = A(nebs_ip, nebs_op);
41 | A_pn = A(nebs_ip, nebs_on);
42 | A_np = A(nebs_in, nebs_op);
43 | A_nn = A(nebs_in, nebs_on);
44 |
45 | % Number of negative triangles for that node
46 | user_neg = full(sum(sum(A_pn > 0)) + sum(sum(A_np > 0)) + sum(sum(A_pp < 0)) + sum(sum(A_nn < 0)));
47 |
48 | % Number of triangles
49 | user_tot = full(sum(sum(A_pn ~= 0)) + sum(sum(A_np ~= 0)) + sum(sum(A_pp ~= 0)) + sum(sum(A_nn ~= 0)));
50 |
51 | sum_neg = sum_neg + user_neg;
52 | sum_tot = sum_tot + user_tot;
53 | end;
54 |
55 | konect_timer_end(t);
56 |
57 | value = sum_neg / sum_tot;
58 |
--------------------------------------------------------------------------------
/m/konect_statistic_nonbipn.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the non-bipartivity measure based on the normalized adjacency
3 | % matrix [nonbipn]. It is defined as
4 | %
5 | % lambda_min[N[\bar G]] + 1
6 | %
7 | % where lambda_min[N] is the smallest eigenvalue of the normalized
8 | % adjacency matrix N = D^-1/2 A D^-1/2.
9 | %
10 | % GROUP: square
11 | %
12 | % PARAMETERS
13 | % A Adjacency matrix
14 | % format
15 | % weights
16 | %
17 | % RESULTS
18 | % values Column vector of results
19 | % [1] non-bipartivity value
20 | % [2] lambda_min[N]
21 | %
22 |
23 | function values = konect_statistic_nonbipn(A, format, weights)
24 |
25 | opts.disp = 2;
26 | opts.issym = 1;
27 |
28 | consts = konect_consts();
29 |
30 | % To show more significant digits when opts.disp = 2.
31 | set_format();
32 |
33 | if format == consts.BIP
34 | error '*** NONBIPN is trivially zero for bipartite networks';
35 | end
36 |
37 | if weights ~= consts.POSITIVE
38 | A = A ~= 0;
39 | end
40 |
41 | A = A + A';
42 |
43 | A = A ~= 0;
44 |
45 | [A cc n] = konect_connect_matrix_square(A);
46 |
47 | N = konect_matrix('svd-n', A, format, weights, opts);
48 |
49 | tmp_2 = norm(N - N', 'fro')
50 |
51 | lambda_min = eigs(N, 1, 'sr', opts)
52 |
53 | nonbipn = lambda_min + 1
54 |
55 | if nonbipn < 0
56 | fprintf(1, 'Warning: negative value of NONBIPN, rounding to zero\n');
57 | nonbipn = 0;
58 | end
59 |
60 | values = [ nonbipn; lambda_min ];
61 |
62 | end
63 |
64 | function set_format()
65 |
66 | format long;
67 |
68 | end
69 |
70 |
--------------------------------------------------------------------------------
/m/konect_statistic_own.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the balanced inequality ratio value [own]. In other words,
3 | % the number P such that the proportion P of nodes with most degree
4 | % cover the proportion (1-P) of all half-edges. This is the "height"
5 | % of the Lorenz curve, and therefore highly correlated to the Gini
6 | % coefficient.
7 | %
8 | % PARAMETERS
9 | % A Adjacency / biadjacency matrix
10 | % format
11 | % weights
12 | %
13 | % RESULT
14 | % values The "own" values
15 | % [1] Total
16 | % [2,3] Left/right (only BIP)
17 | % [4,5] Out/in (only ASYM)
18 | %
19 | % GROUP+2: bip
20 | % GROUP+3: bip
21 | % GROUP+4: asym
22 | % GROUP+5: asym
23 | %
24 | %
25 |
26 | function values = konect_statistic_own(A, format, weights)
27 |
28 | consts = konect_consts();
29 |
30 | if weights == consts.POSITIVE
31 | has_z = 1;
32 | [x y z] = find(A);
33 | else
34 | has_z = 0;
35 | [x y] = find(A);
36 | end
37 |
38 | p = [x; y];
39 | if has_z
40 | q = [z; z];
41 | else
42 | q = [];
43 | end
44 |
45 | values = konect_own(p, q);
46 |
47 | if format == consts.BIP | format == consts.ASYM
48 |
49 | if has_z
50 | q = z;
51 | else
52 | q = [];
53 | end
54 |
55 | if format == consts.BIP
56 | values = [ values ; konect_own(x, q) ; konect_own(y, q) ; NaN ; NaN ];
57 | elseif format == consts.ASYM
58 | values = [ values ; NaN ; NaN ; konect_own(x, q) ; konect_own(y, q) ];
59 | else
60 | error('***');
61 | end
62 |
63 | else
64 | values = [ values ; NaN ; NaN ; NaN ; NaN ];
65 | end
66 |
--------------------------------------------------------------------------------
/m/konect_data_weights.m:
--------------------------------------------------------------------------------
1 | %
2 | % Data about WEIGHTS values
3 | %
4 | % RETURN VALUES
5 | % negative Whether the adjacency matrix contains negative values
6 | % interval_scale Whether the values are from an interval scale
7 | % multi Whether multiple edges are allowed
8 | %
9 |
10 | function [negative, interval_scale, multi] = konect_data_weights()
11 |
12 | consts = konect_consts();
13 |
14 | negative(consts.UNWEIGHTED) = 0;
15 | negative(consts.POSITIVE) = 0;
16 | negative(consts.POSWEIGHTED) = 0;
17 | negative(consts.SIGNED) = 1;
18 | negative(consts.MULTISIGNED) = 1;
19 | negative(consts.WEIGHTED) = 1;
20 | negative(consts.MULTIWEIGHTED) = 1;
21 | negative(consts.DYNAMIC) = 0;
22 | negative(consts.MULTIPOSWEIGHTED) = 0;
23 |
24 | interval_scale(consts.UNWEIGHTED) = 0;
25 | interval_scale(consts.POSITIVE) = 0;
26 | interval_scale(consts.POSWEIGHTED) = 0;
27 | interval_scale(consts.SIGNED) = 0;
28 | interval_scale(consts.MULTISIGNED) = 0;
29 | interval_scale(consts.WEIGHTED) = 1;
30 | interval_scale(consts.MULTIWEIGHTED) = 1;
31 | interval_scale(consts.DYNAMIC) = 0;
32 | interval_scale(consts.MULTIPOSWEIGHTED) = 0;
33 |
34 | multi(consts.UNWEIGHTED) = 0;
35 | multi(consts.POSITIVE) = 1;
36 | multi(consts.POSWEIGHTED) = 0;
37 | multi(consts.SIGNED) = 0;
38 | multi(consts.MULTISIGNED) = 1;
39 | multi(consts.WEIGHTED) = 0;
40 | multi(consts.MULTIWEIGHTED) = 1;
41 | multi(consts.DYNAMIC) = 1;
42 | multi(consts.MULTIPOSWEIGHTED) = 1;
43 |
--------------------------------------------------------------------------------
/m/konect_statistic_power.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the power law exponent [power].
3 | %
4 | % PARAMETERS
5 | % A Adjacency/biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values Column vector of values
11 | % for undirected graph: power_a, sigma_a
12 | % for directed graph: power_a, sigma_a, power_u, sigma_u, power_v, sigma_v
13 | % for bipartite graph: power_a, sigma_a, power_u, sigma_u, power_v, sigma_v
14 | % for POSITIVE graphs, append the same on the underlying UNWEIGHTED network
15 | %
16 |
17 | function values = konect_statistic_power(A, format, weights)
18 |
19 | consts = konect_consts();
20 |
21 | values = [];
22 |
23 | if weights ~= consts.POSITIVE;
24 | weights_used = [ consts.UNWEIGHTED ];
25 | else
26 | weights_used = [ weights ; consts.UNWEIGHTED ];
27 | end
28 |
29 | % Ignore edge weights
30 | A = (A ~= 0);
31 |
32 | for k = 1 : length(weights_used)
33 |
34 | w = weights_used(k);
35 |
36 | if format == consts.SYM | format == consts.ASYM
37 | [gamma sigma] = konect_power_law_flat(A | A', w);
38 | values = [ values ; gamma ; sigma];
39 | else % BIP
40 | [m n] = size(A);
41 | [gamma sigma] = konect_power_law_flat([sparse(m,m) A; A' sparse(n,n)], w);
42 | values = [ values ; gamma ; sigma];
43 | end
44 |
45 | if format == consts.ASYM | format == consts.BIP
46 | [gamma sigma] = konect_power_law_flat(A, w);
47 | values = [values ; gamma ; sigma];
48 | [gamma sigma] = konect_power_law_flat(A', w);
49 | values = [values ; gamma ; sigma];
50 | end
51 | end
52 |
--------------------------------------------------------------------------------
/m/private/konect_spectral_distribution_plain2.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the spectral distribution of a symmetric matrix. This is an
3 | % internal function that needs explicit bounds values.
4 | %
5 | % Use the method from:
6 | %
7 | % On Sampling-based Approximate Spectral Decomposition, Sanjiv
8 | % Kumar, Mehryar Mohri, Ameet Talwalkar. ICML 2009.
9 | %
10 | % PARAMETERS
11 | % A (n*n) Symmetric matrix
12 | % lower Lower bound for eigenvalues
13 | % upper Upper bound for eigenvalues
14 | % k Number of bins
15 | %
16 | % ABOUT
17 | % This file is part of the KONECT Matlab Toolbox version 0.3.
18 | % konect.cc
19 | % (c) Jerome Kunegis 2017; this is Free Software released under
20 | % the GPLv3, see COPYING.
21 | %
22 |
23 | function [counts, begins, ends] = konect_spectral_distribution_plain2(A, lower, upper, k)
24 |
25 | l_max = 2000;
26 |
27 | assert(size(A, 1) == size(A, 2));
28 |
29 | 'compute norm for check'
30 | epsi = norm(A - A', 'fro')
31 | if abs(epsi) >= 1e-10, error('*** matrix A is not symmetric'); end
32 |
33 | n = size(A,1)
34 |
35 | begins = (lower + (upper - lower) * (0:(k-1)) / k)';
36 | ends = (lower + (upper - lower) * (1:k) / k)';
37 |
38 |
39 | % Column sample
40 | 'computing permutation'
41 | p = randperm(n);
42 |
43 | l = min(n, l_max)
44 | pl = p(1:l);
45 |
46 | 'building sampled matrix'
47 | C = A(pl, pl);
48 |
49 | 'call eig()'
50 | dd = eig(full(C));
51 | 'done'
52 |
53 | dd'
54 |
55 | dda = dd * (n / l);
56 |
57 | for i = 1 : k
58 |
59 | counts(i) = sum(dda >= begins(i) & dda < ends(i));
60 |
61 | end
62 |
63 | counts = counts'
64 |
65 | counts = counts * (n / sum(counts))
66 |
67 |
--------------------------------------------------------------------------------
/m/konect_map.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the mean average precision (MAP).
3 | %
4 | % Target values can only be 0/1. Other values are rounded to 0/1.
5 | %
6 | % PARAMETERS
7 | % prediction (e*1) Ranking scores; may be any real numbers
8 | % T_test (e*3)
9 | % First column: row indexes
10 | % Second column: column indexes
11 | % Third column: target values (0/1)
12 | %
13 | % RESULT
14 | % precision The MAP value
15 | %
16 |
17 | function precision = konect_map(prediction, T_test)
18 |
19 | e = size(prediction, 1);
20 |
21 | %
22 | % Round target values
23 | %
24 | T_test(:,3) = T_test(:,3) > 0;
25 |
26 | %
27 | % Randomize order
28 | %
29 | p = randperm(e);
30 | prediction = prediction(p);
31 | T_test = T_test(p,:);
32 |
33 | %
34 | % Sort all by prediction
35 | %
36 | [b,x] = sort(-prediction);
37 | prediction = prediction(x);
38 | T_test = T_test(x,:);
39 |
40 | %
41 | % Average individual APs
42 | %
43 | uids = unique(T_test(:,1));
44 |
45 | k = size(uids,1);
46 |
47 | ap_sum = 0;
48 |
49 | count = 0;
50 |
51 | t = konect_timer(k);
52 |
53 | for i = 1:k
54 |
55 | if mod(i,250) == 0
56 | t = konect_timer_tick(t, i);
57 | end
58 |
59 | is = find(T_test(:,1) == uids(i));
60 |
61 | n = size(is,1);
62 |
63 | target_i = T_test(is,3);
64 |
65 | target_sum = sum(target_i);
66 |
67 | if target_sum == 0 | target_sum == n
68 | % noop
69 | else
70 | ap_i = konect_ap_sorted(target_i);
71 |
72 | ap_sum = ap_sum + ap_i;
73 |
74 | count = count + 1;
75 | end
76 |
77 | end
78 |
79 | konect_timer_end(t);
80 |
81 | precision = ap_sum / count;
82 |
--------------------------------------------------------------------------------
/m/konect_statistic.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute a network statistic, i.e., a numerical characteric of a
3 | % network. This is a wrapper function that takes the name of the
4 | % statistic as input and calls the actual function. The functions
5 | % for each statistic have names konect_statistic_$statistic().
6 | %
7 | % A statistic in KONECT is a numerical characteristic of a network,
8 | % e.g., the diameter or the clustering coefficient.
9 | %
10 | % Note that in this interface, isolated nodes (i.e., nodes without
11 | % edges) are not considered part of the network -- this is because it
12 | % allows us to consider the change of a network statistic over time.
13 | % This difference is relevant for elementary statistics such as the size
14 | % (number of nodes) and the fill (edge probability). For advanced
15 | % statistics such as the clustering coefficient, it does not make a
16 | % difference.
17 | %
18 | % ARGUMENTS
19 | % statistic The statistic to compute, as a string
20 | % A Half-adjacency / biadjacency matrix
21 | % format The network format
22 | % weights Edge weight type
23 | % opts (optional) The variable opts.disp can be
24 | % set to control output
25 | %
26 | % RETURN VALUE
27 | % values Column vector of values. The first value is the
28 | % statistic itself. Subsequent values may denote
29 | % additional values, such as an error on the main value.
30 | %
31 |
32 | function values = konect_statistic(statistic, A, format, weights, opts)
33 |
34 | fh = str2func(sprintf('konect_statistic_%s', statistic));
35 |
36 | if ~exist('opts', 'var')
37 | values = fh(A, format, weights);
38 | else
39 | values = fh(A, format, weights, opts);
40 | end
41 |
--------------------------------------------------------------------------------
/m/konect_pa.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the preferential attachment exponent [1].
3 | %
4 | % In short, this takes a set of old and new edges, and computes a number
5 | % β ≥ 0 that characterizes to what extent preferential attachment is
6 | % present in the network. Zero denotes no preferential attachment and one
7 | % denotes linear preferential attachment (as usually defined).
8 | %
9 | % PARAMETERS
10 | % T1 (m1×{2,3}) all old edges (with optional weights)
11 | % T2 (m2×{2,3}) all new edges (with optional weights)
12 | %
13 | % If T2 is not given, T1 is understood to contain all edges, from oldest
14 | % to newest, and is split in a 2/3+1/3 fashion.
15 | %
16 | % RETURN VALUE
17 | % beta The preferential attachment exponent β
18 | %
19 | % REFERENCES
20 | % [1] Preferential Attachment in Online Networks: Measurement and
21 | % Explanations; Jérôme Kunegis, Marcel Blattner and Christine Moser;
22 | % Proc. Web Science Conf., 2013, pp. 205--214.
23 | %
24 |
25 | function [beta] = konect_pa(T1, T2)
26 |
27 | if ~exist('T2', 'var')
28 | assert(size(T1,2) == 2 | size(T1,2) == 3);
29 | m = size(T1,1)
30 | m_split = floor((2/3) * m)
31 | T2 = T1(m_split:end, :);
32 | T1 = T1(1:(m_split-1), :);
33 | end
34 |
35 | assert(size(T1,2) == 2 | size(T1,2) == 3);
36 | assert(size(T2,2) == 2 | size(T2,2) == 3);
37 |
38 | i_1 = [T1(:,1); T1(:,2)];
39 | if size(T1,2) == 3
40 | w_1 = [T1(:,3); T1(:,3)];
41 | else
42 | w_1 = 1;
43 | end
44 | i_2 = [T2(:,1); T2(:,2)];
45 | if size(T2,2) == 3
46 | w_2 = [T2(:,3); T2(:,3)];
47 | else
48 | w_2 = 1;
49 | end
50 |
51 | [ret ret_data] = konect_pa_full(i_1, w_1, i_2, w_2);
52 |
53 | beta = ret.a(1)
54 |
55 |
--------------------------------------------------------------------------------
/m/konect_decomposition_dedicom3.m:
--------------------------------------------------------------------------------
1 | %
2 | % Implementation of the method from
3 | %
4 | % [1] A Generalization of Takane's Algorithm for DEDICOM, Henk A. L. Kiers,
5 | % Jos M. F. Ten Berge, Yoshio Takane, Jan de Leeuw, Psychometrika, 55,
6 | % 151-158, 1990.
7 | %
8 | % PARAMETERS
9 | % A (n*n) Adjacency matrix of directed graph
10 | % r Rank of the decomposition
11 | % enable_alpha (0/1) whether to enable to alpha term. When
12 | % enabled, the algorithm is slower but has better convergence
13 | % behavior.
14 | % opts Passed to eigs()/svds()
15 | %
16 | % RESULT
17 | % U (n*r) Eigenvector matrix
18 | % D (r*r) Nondiagonal and asymmetric eigenvalue matrix
19 | %
20 |
21 | function [U D] = konect_decomposition_dedicom3(A, r, enable_alpha, opts)
22 |
23 | epsilon = 1e-7;
24 | maxit = 200;
25 |
26 | n = size(A,1);
27 |
28 | % Iteratively, update U and X in the decomposition. U is always kept
29 | % orthogonal.
30 |
31 | [U dx v] = svds(double(A), r, 'L', opts);
32 |
33 | if enable_alpha
34 | norm_a = dx(1,1);
35 | end
36 |
37 | D = zeros(r,r);
38 |
39 | for i = 1:maxit
40 | d_old = D;
41 | D = U' * A * U;
42 |
43 | if enable_alpha
44 | alpha = abs(norm_a * norm(D));
45 | U = (A * U) * (U' * A' * U) + (A' * U) * (U' * A * U) + 2 * alpha * U;
46 | else
47 | U = (A * U) * (U' * A' * U) + (A' * U) * (U' * A * U);
48 | end
49 |
50 | [U rr] = qr(U, 0);
51 |
52 | if rem(i,10) == 0
53 | dif = norm(D - d_old, 'fro')^2 / prod(size(D));
54 | fprintf(1, 'iteration %d dif= %g\n', i, dif);
55 | if dif < epsilon, break; end;
56 | end
57 | end
58 |
59 | [U D] = konect_order_dedicom(U, D);
60 |
--------------------------------------------------------------------------------
/m/konect_styles_statistic.m:
--------------------------------------------------------------------------------
1 | %
2 | % Colors used for drawing statistic graphs consistently.
3 | %
4 | % RESULT
5 | % colors Struct by statistic
6 | % .(statistic) (1*3) Color
7 | % line_styles Struct by statistic
8 | % .(statistic) (string) line style
9 | % markers Struct by statistic
10 | % .(statistic) (string) marker
11 | %
12 |
13 | function [colors line_styles markers] = konect_styles_statistic()
14 |
15 | colors = struct();
16 | line_styles = struct();
17 | markers = struct();
18 |
19 |
20 | colors.power2 = [ 1 0 0 ];
21 | colors.gini = [ 0 1 0 ];
22 | colors.own = [ 0 1 1 ];
23 | colors.dentropy = [ 0 0 1 ];
24 | colors.dentropyn = [ 1 0 1 ];
25 | colors.dentropy2 = [ .8 .8 0 ];
26 | colors.volume = [ 0 0 0 ];
27 | colors.twostars = [ 1 0 0 ];
28 | colors.triangles = [ .7 .7 0 ];
29 | colors.diam = [ 0 1 1 ];
30 | colors.meandist = [ 0 .8 .8 ];
31 | colors.squares = [ 0 0 1 ];
32 | colors.snorm = [ 1 1 0 ];
33 | colors.alcon = [ 0 .6 0 ];
34 | colors.threestars = [ 0 .8 0 ];
35 | colors.fourstars = [ .7 0 1 ];
36 | colors.clusco = [ .6 .4 0 ];
37 | colors.size = [0.12 0.99 0.74];
38 | colors.avgdegree = [0.23 0.13 0.11];
39 | colors.twostars_norm_d = [0.65 0.16 0.75];
40 | colors.maxdegree = [0.45 0.99 0.44];
41 | colors.nonbip = [0.92 0.80 0.79];
42 | colors.assortativity = [0.07 0.29 0.90];
43 | colors.jain = [0.66 0.21 0.92];
44 | colors.relmaxdegree = [0.29 0.02 0.70];
45 | colors.cocorelinv = [0.14 0.64 0.43];
46 | colors.degone = [0.91 0.00 0.35];
47 | colors.diameff50 = [0.29 0.70 0.28];
48 | colors.diameff90 = [0.60 0.64 0.46];
49 | colors.power = [0.02 0.19 0.92];
50 |
51 |
--------------------------------------------------------------------------------
/m/konect_decomposition_lap.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the Laplacian decomposition of a network's largest
3 | % connected component.
4 | %
5 | % RESULT
6 | % U (m*r) Eigenvectors of the Laplacian matrix (only left
7 | % nodes when the graph is bipartite)
8 | % V (n*r) Eigenvectors of the Laplacian matrix (only for
9 | % bipartite networks / right nodes; [] otherwise)
10 | % D (r*r) Eigenvalues of the Laplacian matrix; nonnegative
11 | %
12 | % PARAMETERS
13 | % A (m*n) Adjacency or biadjacency matrix
14 | % r Rank of the decomposition
15 | % format
16 | % weights
17 | % opts (optional) Passed to eigs()
18 | %
19 |
20 | function [U D V] = konect_decomposition_lap(A, r, format, weights, opts)
21 |
22 | if ~exist('opts', 'var'),
23 | opts = struct();
24 | end
25 |
26 | consts = konect_consts();
27 | [negative] = konect_data_weights();
28 |
29 | if format ~= consts.BIP
30 | [A cc n] = konect_connect_matrix_square(A);
31 | L = konect_matrix('lap', A, format, weights, opts);
32 | r = min(r, size(A,1) - 1);
33 | [U,D] = konect_eigl(L, r, opts);
34 | U = konect_connect_back(cc, U);
35 | V = [];
36 | else % BIP
37 | [A cc1 cc2 n] = konect_connect_matrix_bipartite(A);
38 | [m,n] = size(A);
39 | L = konect_matrix('lap', A, format, weights, opts);
40 | r = min([r (m-1) (n-1)]);
41 | [uu,D] = konect_eigl(L, r, opts);
42 | U = uu(1:m, :);
43 | V = uu((m+1):(m+n), :);
44 | U = konect_connect_back(cc1, U);
45 | V = konect_connect_back(cc2, V);
46 | end
47 |
48 | if ~negative(weights)
49 | % Numerically, eigs() may return values as high as 1e-15, although
50 | % we know it is exactly zero.
51 | D(1,1) = 0;
52 | end
53 |
54 | f = diag(D) < 0;
55 | D(f,f) = 0;
56 |
57 |
--------------------------------------------------------------------------------
/m/konect_significance_plot.m:
--------------------------------------------------------------------------------
1 | %
2 | % Generate a significance plot. This plots an n*n matrix of pairwise
3 | % comparisons between n methods. The legend for this plot is
4 | % generated by konect_significance_legend().
5 | %
6 | % PARAMETERS
7 | % values (m*n) Matrix of values; there are n methods with m
8 | % values each
9 | % p_threshold
10 | % maxdiff
11 | % labels
12 |
13 | function konect_significance_plot(values, p_threshold, maxdiff, labels)
14 |
15 | font_size = 13;
16 | displacement_x = 0.3;
17 | displacement_y = 0.2;
18 | rotation_y = 27;
19 |
20 | [m n] = size(values);
21 |
22 | % Pairwise p-values
23 | P = zeros(m, m);
24 |
25 | % Pairwise differences
26 | D = zeros(m, m);
27 |
28 | for i = 1 : m
29 | for j = 1 : m
30 | [h p] = ttest(values(i, :), values(j, :));
31 | P(i, j) = p;
32 | D(i, j) = mean(values(i,:) - values(j,:));
33 | end
34 | end
35 |
36 | P
37 | D
38 |
39 | I = konect_significance_image(P, D, p_threshold, maxdiff);
40 |
41 | image(I);
42 |
43 | % Labels
44 | for i = 1 : m
45 | text(0+displacement_x, i , labels(i), 'HorizontalAlignment', 'Right', 'VerticalAlignment', 'Middle', 'FontSize', font_size);
46 | text(i, m+1-displacement_y, labels(i), 'HorizontalAlignment', 'Right', 'VerticalAlignment', 'Middle', 'FontSize', font_size, 'Rotation', rotation_y);
47 | end
48 |
49 | % xticklabel_rotate(1:m, 90, labels, 'FontSize', font_size);
50 | % yticklabel_rotate(1:m, 0, labels, 'FontSize', font_size);
51 | set(gca, 'XTick', []);
52 | set(gca, 'YTick', []);
53 | %set(gca, 'TickLength', [0 0]);
54 | %set(gca, 'FontSize', font_size);
55 |
56 | % xlabel('Method j', 'FontSize', font_size);
57 | % ylabel('Method i', 'FontSize', font_size);
58 |
59 | axis square;
60 |
--------------------------------------------------------------------------------
/m/konect_statistic_anticonflict.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the smallest eigenvalue of the signless Laplacian K = D + A
3 | % of the underlying unweighted graph, multiplied by n/(8m). This is a
4 | % measure of bipartivity in the range [0, 1/2]. The matrix A is
5 | % computed by ignoring edge weights. Multiple edges are not taken
6 | % into account. The computation is restricted to the network's
7 | % largest connected component.
8 | %
9 | % GROUP: square
10 | %
11 | % PARAMETERS
12 | % A Adjacency or biadjacency matrix
13 | % format
14 | % weights
15 | %
16 | % RESULT
17 | % values
18 | % [1] The smallest eigenvalue * n/(8m); zero for bipartite networks
19 | % [2] The eigenvalue it self [nonbipal], \chi
20 | %
21 |
22 | function values = konect_statistic_anticonflict(A, format, weights)
23 |
24 | consts = konect_consts();
25 |
26 | opts.disp = 2;
27 |
28 | if format == consts.BIP
29 | % We know it is zero
30 | values = [ 0 ];
31 | return;
32 | end
33 |
34 | % Ignore edge weights and multiplicities
35 | A = ( A ~= 0 );
36 |
37 | A = A | A';
38 | A = triu(A);
39 |
40 | % Restrict computation to the largest connected component
41 | A = konect_connect_matrix_square(A);
42 |
43 | K = konect_matrix('lapqu', A, format, weights);
44 |
45 | [U D] = konect_eigl(K, 1, opts, 'lm');
46 |
47 | lambda_min_K = D(1,1)
48 |
49 | if lambda_min_K < 0
50 | % K is positive semi-definite, so we now that this is a numerical
51 | % error. Round to zero.
52 | lambda_min_K = 0
53 | end
54 |
55 | A_abs = konect_absx(A);
56 |
57 | n = nnz(sum(A_abs, 1)' + sum(A_abs, 2))
58 | m = nnz(A)
59 |
60 | anticonflict = lambda_min_K * n / 8 / m
61 |
62 | assert(anticonflict >= 0);
63 | assert(anticonflict <= 0.5);
64 |
65 | values = [ anticonflict ; lambda_min_K ];
66 |
--------------------------------------------------------------------------------
/m/konect_mauc.m:
--------------------------------------------------------------------------------
1 | %
2 | % Mean area under the curve (MAUC).
3 | %
4 | % PARAMETERS
5 | % prediction (e*1)
6 | % T_test (e*3)
7 | % First column: row indexes
8 | % Second column: column indexes
9 | % Third column: target values
10 | %
11 | % RESULT
12 | % precision The MAUC value
13 | %
14 |
15 | function precision = konect_mauc(prediction, T_test)
16 |
17 | r = size(prediction,1);
18 |
19 | %
20 | % Scale target values
21 | %
22 | T_test(:,3) = T_test(:,3) > 0;
23 |
24 | %
25 | % Randomize order
26 | %
27 | p = randperm(r);
28 | prediction = prediction(p);
29 | T_test = T_test(p,:);
30 |
31 | %
32 | % Sort all by prediction
33 | %
34 | [b,x] = sort(-prediction);
35 | prediction = prediction(x);
36 | T_test = T_test(x,:);
37 |
38 | %
39 | % Average individual AUCs
40 | %
41 | uids = unique(T_test(:,1));
42 |
43 | k = size(uids,1);
44 |
45 | auc_sum = 0;
46 |
47 | count = 0;
48 |
49 | t = konect_timer(k);
50 |
51 | for i = 1:k
52 |
53 | if mod(i, 100) == 0
54 | t = konect_timer_tick(t, i);
55 | end;
56 |
57 | is = find(T_test(:,1) == uids(i));
58 |
59 | n = size(is,1);
60 |
61 | prediction_i = prediction(is);
62 | target_i = T_test(is,3);
63 |
64 | target_sum = sum(target_i);
65 |
66 | if target_sum == 0 | target_sum == n
67 |
68 | else
69 |
70 | s = 0;
71 | c = n - target_sum;
72 | for j = 1:n
73 | if target_i(j) == 0
74 | c = c - 1;
75 | else
76 | s = s + c;
77 | end
78 | end
79 |
80 | auc_i = s / (target_sum * (n - target_sum));
81 | auc_sum = auc_sum + auc_i;
82 | count = count + 1;
83 | end
84 |
85 |
86 | end
87 |
88 | konect_timer_end(t);
89 |
90 | precision = auc_sum / count;
91 |
--------------------------------------------------------------------------------
/m/konect_statistic_tour4.m:
--------------------------------------------------------------------------------
1 | %
2 | % The number of 4-tours in a graph. This is similar to the number
3 | % of 4-cycles (squares), but allows nodes to overlap, and is
4 | % therefore easier to compute. The multiplicity of edges and loops
5 | % are ignored.
6 | %
7 | % PARAMETERS
8 | % A
9 | % format
10 | % weights
11 | % opts (optional)
12 | %
13 | % RESULT
14 | % values Columns vector of results
15 | % [1] Number of 4-tours
16 | %
17 |
18 | function values = konect_statistic_tour4(A, format, weights, opts)
19 |
20 | % Size in double variables of the largest number of doubles that is to
21 | % be used as temporary memory. Used in the calculation of the default
22 | % value of SIZE_CHUNK.
23 | size_resident = 1e7;
24 |
25 | consts = konect_consts();
26 |
27 | if format == consts.BIP
28 | [n1 n2] = size(A);
29 | A = [sparse(n1,n1), A; A', sparse(n2,n2)];
30 | end
31 |
32 | n = size(A, 1);
33 |
34 | A = konect_absx(A);
35 | A = A | A';
36 |
37 | % Necessary to make matrix multiplication work, because matrix
38 | % multiplication does not work with logical matrices.
39 | A = double(A);
40 |
41 | % Set diagonal elements to zero to ignore loops
42 | A = A - spdiags(diag(A), [0], n, n);
43 |
44 | % Total number of 4-tours
45 | count_total = 0;
46 |
47 | size_chunk = floor(size_resident / n);
48 | if size_chunk < 1, size_chunk = 1; end;
49 |
50 | [k from to] = konect_fromto(1, n, size_chunk);
51 |
52 | t = konect_timer(n);
53 |
54 | for j = 1 : k
55 |
56 | t = konect_timer_tick(t, to(j));
57 |
58 | count_j = sum(sum(A(:,from(j):to(j)) .* (A * (A * A(:,from(j):to(j)))), 1), 2);
59 |
60 | count_total = count_total + count_j;
61 |
62 | end
63 |
64 | konect_timer_end(t);
65 |
66 | values = count_total;
67 |
68 | assert(values >= 0);
69 | assert(values == floor(values));
70 |
--------------------------------------------------------------------------------
/m/konect_contains_triangle.m:
--------------------------------------------------------------------------------
1 | %
2 | % Determine whether a given graph contains at least one triangle. This
3 | % function uses the same algorithm and implementation as
4 | % konect_statistic_triangles(), but aborts on finding the first
5 | % triangle. Refer to that function for documentation.
6 | %
7 | % This function uses a timer in the same way as
8 | % konect_statistic_triangles(), whose iteration is aborted as soon as a
9 | % triangle is found.
10 | %
11 | % PARAMETERS
12 | % A Adjacency matrix
13 | % format
14 | % weights
15 | %
16 | % RESULT
17 | % ret (0/1) Whether the given graph contains at least one triangle
18 | %
19 |
20 | function ret = konect_contains_triangle(A, format, weights)
21 |
22 | % Size in double variables of the largest number of doubles that is to
23 | % be used as temporary memory. Used in the calculation of the default
24 | % value of SIZE_CHUNK.
25 | size_resident = 1e7;
26 |
27 | consts = konect_consts();
28 |
29 | if ~(format == consts.SYM | format == consts.ASYM)
30 | ret = 0;
31 | error('*** Expected graph to be unipartite');
32 | end
33 |
34 | n = size(A, 1);
35 |
36 | A = konect_absx(A);
37 | A = A | A';
38 |
39 | % Necessary to make matrix multiplication work, because matrix
40 | % multiplication does not work with logical matrices.
41 | A = double(A);
42 |
43 | % Set diagonal elements to zero, to exclude triangles that contain
44 | % loops.
45 | A = A - spdiags(diag(A), [0], n, n);
46 |
47 | size_chunk = floor(size_resident / n);
48 | if size_chunk < 1, size_chunk = 1; end;
49 |
50 | [k from to] = konect_fromto(1, n, size_chunk);
51 |
52 | t = konect_timer(n);
53 |
54 | ret = 0;
55 |
56 | for j = 1 : k
57 |
58 | t = konect_timer_tick(t, to(j));
59 |
60 | count_j = sum(sum(A(:,from(j):to(j)) .* (A * A(:,from(j):to(j))), 1), 2);
61 |
62 | if count_j ~= 0
63 | ret = 1;
64 | return;
65 | end
66 | end
67 |
68 | konect_timer_end(t);
69 |
--------------------------------------------------------------------------------
/m/konect_statistic_fill.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the fill of the network, i.e., the proportion of possible
3 | % edges that exist. For networks with multiple edges, edge
4 | % multiplicities are ignored.
5 | %
6 | % Note: the definition of the fill in KONECT takes into account that
7 | % loops are possible.
8 | %
9 | % Note: As always in KONECT, we consider isolated nodes (i.e., nodes
10 | % without edges) to *not* be part of the network, because that allows us
11 | % to consider the change of each statistic over time.
12 | %
13 | % PARAMETERS
14 | % A Adjacency or biadjacency matrix
15 | % format Format of the network
16 | % weights Weights of the network
17 | %
18 | % RESULT
19 | % values Column vector of values
20 | % [1] The fill, taking into account loops if they
21 | % are present
22 | % [2] The fill, ignoring loops
23 | %
24 |
25 | function values = konect_statistic_fill(A, format, weights)
26 |
27 | consts = konect_consts();
28 |
29 | if format == consts.SYM | format == consts.ASYM
30 |
31 | % Note: We assume that it is impossible that a network allows
32 | % loops when it has no loops; this is checked by check.m.
33 |
34 | d = (diag(A) ~= 0);
35 | count_loops = sum(d);
36 | has_loops = count_loops > 0;
37 |
38 | end
39 |
40 | if format == consts.SYM
41 |
42 | A = ( A ~= 0 );
43 |
44 | m = nnz(A);
45 | n = nnz(sum(A, 1)' + sum(A, 2));
46 |
47 | values = [ 2 * m / (n * (n - 1 + 2 * has_loops)); ...
48 | 2 * (m - count_loops) / (n * (n-1)) ];
49 |
50 | elseif format == consts.ASYM
51 |
52 | A = ( A ~= 0 );
53 |
54 | m = nnz(A);
55 | n = nnz(sum(A, 1)' + sum(A, 2));
56 |
57 | values = [ m / (n * (n - 1 + has_loops)); ...
58 | (m - count_loops) / (n * (n-1)) ];
59 |
60 | elseif format == consts.BIP
61 |
62 | m = nnz(A);
63 | [n1 n2] = size(A);
64 |
65 | p = m / (n1 * n2);
66 |
67 | values = [ p ; p ];
68 |
69 | end
70 |
71 |
72 |
--------------------------------------------------------------------------------
/m/konect_eign.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the eigenvalue decomposition of a normalized matrix. By a
3 | % normalized matrix, we mean a matrix whose eigenvalues are in the
4 | % range [-1, +1]. This implementation uses a special decomposition
5 | % method that is faster than just using eigs().
6 | %
7 | % Two methods are supported; see below.
8 | %
9 | % ARGUMENTS
10 | % A Normalized matrix
11 | % r Rank
12 | % method (optional) The method to use
13 | % 0 [power iteration] eigs('lm'); uses less memory but is slower
14 | % 1 [inverse iteration] eigs(1+epsilon); faster but uses more memory
15 | % There is a subtle difference between the two
16 | % methods: the LM method returns the R largest
17 | % eigenvalues by absolute values, while the
18 | % EPSILON method returns the R/2 largest and
19 | % R-R/2 smallest eigenvalues.
20 | %
21 | % RESULT
22 | % u,d Eigenvector decomposition
23 | %
24 |
25 | function [u d] = konect_eign(A, r, varargin)
26 |
27 | METHOD_LM = 0;
28 | METHOD_EPSILON = 1;
29 |
30 | if (nargin > 2)
31 | method = varargin{1};
32 | else
33 | if nnz(A) < 1000000
34 | method = METHOD_EPSILON;
35 | else
36 | method = METHOD_LM;
37 | end
38 | end
39 |
40 | opts.disp = 2;
41 |
42 | if method == METHOD_LM
43 |
44 | [u,d] = eigs(A, r, 'lm', opts);
45 | dd = diag(d);
46 |
47 | else
48 |
49 | r_pos = round(r/2);
50 | r_neg = r - r_pos;
51 |
52 | epsilon = 1e-3;
53 |
54 | [u_pos,d_pos] = eigs(A, r_pos, 1+epsilon, opts);
55 | dd_pos = diag(d_pos);
56 |
57 | if r_neg > 0
58 | [u_neg,d_neg] = eigs(A, r_neg, -1-epsilon, opts);
59 | dd_neg = diag(d_neg);
60 | else
61 | u_neg = zeros(size(A,1), 0);
62 | d_neg = zeros(0, 0);
63 | dd_neg = zeros(1, 0);
64 | end
65 |
66 | u = [u_pos u_neg];
67 | dd = [dd_pos; dd_neg];
68 | [x,i] = sort(-abs(dd));
69 | u = u(:,i);
70 | d = diag(dd(i));
71 |
72 | diag_d = diag(d);
73 |
74 | end
75 |
--------------------------------------------------------------------------------
/m/konect_significance_legend_bw.m:
--------------------------------------------------------------------------------
1 | %
2 | % Same as konect_significance_legend(), but in black-and-white.
3 | %
4 | % PARAMETERS
5 | % p_threshold
6 | % maxdiff
7 | % label_measure
8 | %
9 |
10 | function konect_significance_legend_bw(p_threshold, maxdiff, label_measure)
11 |
12 | font_size = 59;
13 | len = 0.9; % Length of bars relative to cell width
14 | line_width = 8;
15 |
16 | % Resolution
17 | r = 10;
18 |
19 | % Ranges
20 | range_p = 1.3
21 | range_diff = 1.2
22 |
23 | % Range of values
24 | p_min = 0;
25 | p_max = range_p * p_threshold;
26 | d_max = 2 * range_diff * maxdiff;
27 | d_min = - d_max;
28 |
29 | hold on;
30 | axis([p_min p_max d_min d_max]);
31 | axis square;
32 |
33 | for i = 1 : r
34 | for jj = 1 : r
35 | j = r + 1 - jj;
36 | p = p_min + (i - 1/2) * (p_max - p_min) / r;
37 | d = d_min + (jj - 1/2) * (d_max - d_min) / r;
38 | if p >= p_threshold, continue; end;
39 |
40 | theta = (atan(d / maxdiff * pi / 2) + pi / 2) / 2;
41 |
42 | line(...
43 | [ (p_min + (p_max - p_min) / r * (i - 1/2 + (1/2) * len * cos(theta))) ...
44 | (p_min + (p_max - p_min) / r * (i - 1/2 - (1/2) * len * cos(theta)))], ...
45 | [ (d_min + (d_max - d_min) / r * (j - 1/2 - (1/2) * len * sin(theta))) ...
46 | (d_min + (d_max - d_min) / r * (j - 1/2 + (1/2) * len * sin(theta)))], ...
47 | 'Color', [1 1 1] * min(1, p / p_threshold), ...
48 | 'LineWidth', line_width);
49 | end
50 | end
51 |
52 | set(gca, 'FontSize', font_size);
53 |
54 | set(gca, 'XTick', [0 .05], ...
55 | 'XTickLabels', { '0', '0.05' });
56 | set(gca, 'YTick', [-0.2, 0, +0.2 ], ...
57 | 'YTickLabels', { '-0.2', '0', '+0.2' });
58 | set(gca, 'TickLength', [ 0 0 ]);
59 |
60 | xlabel('p-value', 'FontSize', font_size);
61 | ylabel(sprintf('%s_x - %s_y', label_measure, label_measure), 'FontSize', font_size);
62 |
63 | end
64 |
65 | function [x] = to_p(i)
66 |
67 | end
68 |
69 | function [y] = to_d(j)
70 | end
71 |
--------------------------------------------------------------------------------
/m/konect_statistic_power3.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the power law exponent using the correct and slow method, with p-values [power3].
3 | %
4 | % PARAMETERS
5 | % A Adjacency/biadjacency matrix
6 | % format
7 | % weights
8 | %
9 | % RESULT
10 | % values Column vector of values
11 | % Each group of five: power_a, xmin, L, p, gof
12 | % for undirected graphs: (all) (...) (...) (...) (...)
13 | % for directed graph: (all) (...) (...) (out) (in)
14 | % for bipartite graph: (all) (left) (right) (...) (...)
15 | % [1-5] ALL
16 | % [6-15] BIP
17 | % [16-25] ASYM
18 | %
19 | % GROUP+6: BIP
20 | % GROUP+7: BIP
21 | % GROUP+8: BIP
22 | % GROUP+9: BIP
23 | % GROUP+10: BIP
24 | % GROUP+11: BIP
25 | % GROUP+12: BIP
26 | % GROUP+13: BIP
27 | % GROUP+14: BIP
28 | % GROUP+15: BIP
29 | % GROUP+16: ASYM
30 | % GROUP+17: ASYM
31 | % GROUP+18: ASYM
32 | % GROUP+19: ASYM
33 | % GROUP+20: ASYM
34 | % GROUP+21: ASYM
35 | % GROUP+22: ASYM
36 | % GROUP+23: ASYM
37 | % GROUP+24: ASYM
38 | % GROUP+25: ASYM
39 | %
40 |
41 | function values = konect_statistic_power3(A, format, weights)
42 |
43 | consts = konect_consts();
44 |
45 | % Ignore edge weights
46 | A = (A ~= 0);
47 |
48 | if format == consts.SYM | format == consts.ASYM
49 | values_all = konect_power_law_range(A | A', weights, 1);
50 | elseif format == consts.BIP
51 | [m n] = size(A);
52 | values_all = konect_power_law_range([sparse(m,m) A; A' sparse(n,n)], weights, 1);
53 | else
54 | error();
55 | end
56 |
57 | if format == consts.ASYM
58 | nvalues_out = konect_power_law_range(A, weights, 1);
59 | nvalues_in = konect_power_law_range(A', weights, 1);
60 | values = [ values_all ; NaN * ones(10,1) ; nvalues_out ; nvalues_in ]
61 | elseif format == consts.BIP
62 | nvalues_out = konect_power_law_range(A, weights, 1);
63 | nvalues_in = konect_power_law_range(A', weights, 1);
64 | values = [ values_all ; nvalues_out ; nvalues_in ; NaN * ones(10,1) ]
65 | elseif format == consts.SYM
66 | values = [ values_all ; NaN * ones(20,1) ]
67 | else
68 | error();
69 | end
70 |
--------------------------------------------------------------------------------
/m/konect_statistic_triangles.m:
--------------------------------------------------------------------------------
1 | %
2 | % The number of triangles in a graph.
3 | %
4 | % The computed number of triangles is independent of the orientation
5 | % of edges. The multiplicity of edges is ignored. Loops in
6 | % graph are ignored.
7 | %
8 | % PARAMETERS
9 | % A Adjacency matrix
10 | % format
11 | % weights
12 | % opts (optional, unused)
13 | %
14 | % RESULT
15 | % values Columns vector of results
16 | % [1] Number of triangles
17 | %
18 | % GROUP: square
19 | %
20 |
21 | function values = konect_statistic_triangles(A, format, weights, opts)
22 |
23 | % Size in double variables of the largest number of doubles that is to
24 | % be used as temporary memory. Used in the calculation of the default
25 | % value of SIZE_CHUNK.
26 | size_resident = 1e7;
27 |
28 | consts = konect_consts();
29 |
30 | if ~(format == consts.SYM | format == consts.ASYM)
31 | error '*** Number of triangles is trivially zero for bipartite networks';
32 | end
33 |
34 | n = size(A, 1);
35 |
36 | A = konect_absx(A);
37 | A = A | A';
38 |
39 | % Necessary to make matrix multiplication work, because matrix
40 | % multiplication does not work with logical matrices.
41 | A = double(A);
42 |
43 | % Set diagonal elements to zero, to exclude triangles that contain
44 | % loops.
45 | A = A - spdiags(diag(A), [0], n, n);
46 |
47 | % Count all triangles as the sum of the diagonal entries of A^3.
48 | % This will count each triangle six times (3!).
49 | count_total = 0;
50 |
51 | size_chunk = floor(size_resident / n);
52 | if size_chunk < 1, size_chunk = 1; end;
53 |
54 | [k from to] = konect_fromto(1, n, size_chunk);
55 |
56 | t = konect_timer(n);
57 |
58 | for j = 1 : k
59 |
60 | t = konect_timer_tick(t, to(j));
61 |
62 | count_j = sum(sum(A(:,from(j):to(j)) .* (A * A(:,from(j):to(j))), 1), 2);
63 | count_total = count_total + count_j;
64 |
65 | end
66 |
67 | konect_timer_end(t);
68 |
69 | count_reduced = count_total / 6;
70 |
71 | if count_reduced ~= floor(count_reduced)
72 | error '*** count not a multiple of 6';
73 | end
74 |
75 | values = count_reduced;
76 |
--------------------------------------------------------------------------------
/m/konect_eigskew.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the sparse eigenvalue decomposition of a skew-symmetric
3 | % matrix B in real/Gower form.
4 | %
5 | % Given a skew-symmetric matrix B, this computes U, D and V such that
6 | %
7 | % B = U D V' - V D U'
8 | %
9 | % where U and V are the real and complex parts of the eigenvectors of
10 | % B, and D is diagonal, real and nonnegative. Note that if B was
11 | % constructed from a matrix A as B = A - A', then U D V' is in the
12 | % general case *not* the singular value decomposition of A, and in
13 | % fact not even a good approximation of it. This method is faster
14 | % than computing the actual eigenvalue decomposition of A - A'. The
15 | % columns of U and V are orthonormal. The actual eigenvalue
16 | % decomposition of B is given by
17 | %
18 | % B = Q L Q'
19 | %
20 | % with
21 | %
22 | % Q = [ U + iV ; U - iV ] / sqrt(2),
23 | % L = [ iD, 0 ; 0, -iD ].
24 | %
25 | % Another feature of this decomposition is that a rank of floor(n/2)
26 | % is enough to decompose all matrices exactly. I.e., when B has size
27 | % n*n, then there is always a r <= floor(n/2) such that an exact
28 | % decomposition exists in which U and V have size n*r and D has size
29 | % r*r.
30 | %
31 | % RESULT
32 | % U,D,V The decomposition
33 | %
34 | % PARAMETERS
35 | % B (n*n) Real, square, skew-symmetric matrix to be
36 | % decomposed
37 | % r Rank; if more than floor(n/2), it is rounded to that
38 | % value, which is enough in all cases to recover the
39 | % complete matrix A
40 | % opts (optional) Passed to eigs()
41 | %
42 |
43 | function [U D V] = konect_eigskew(B, r, opts)
44 |
45 | if ~exist('opts', 'var')
46 | opts = struct();
47 | end
48 |
49 | r = min(r, floor(size(B,1) / 2));
50 |
51 | % We need only the positive imaginary eigenvalues of B. However, eigs()
52 | % doesn't have an option to find them, so find the largest real
53 | % eigenvalues of -iB.
54 | [U D] = eigs(-1i * B, r, 'lr', opts);
55 |
56 | % For each pair (+/- i lambda, u +/- iv), return (lambda, sqrt(2) u, sqrt(2) v)
57 | U = U * sqrt(2);
58 | V = imag(U);
59 | U = real(U);
60 | D = real(D);
61 |
--------------------------------------------------------------------------------
/m/private/konect_spectral_distribution_plain.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the spectral distribution of a symmetric matrix. This is an
3 | % internal function that needs explicit bounds values.
4 | %
5 | % PARAMETERS
6 | % A (n*n) Symmetric matrix
7 | % lower Lower bound for eigenvalues
8 | % upper Upper bound for eigenvalues
9 | % k Number of bins
10 | %
11 | % ABOUT
12 | % This file is part of the KONECT Matlab Toolbox version 0.3.
13 | % konect.cc
14 | % (c) Jerome Kunegis 2017; this is Free Software released under
15 | % the GPLv3, see COPYING.
16 | %
17 |
18 | function [counts, begins, ends] = konect_spectral_distribution_plain(A, lower, upper, k)
19 |
20 | lower
21 | upper
22 |
23 | assert(size(A, 1) == size(A, 2));
24 |
25 | %ddd = eig(A)
26 |
27 | epsi = norm(A - A', 'fro')
28 |
29 | if abs(epsi) >= 1e-10, error('*** matrix A is not symmetric'); end
30 |
31 | n = size(A,1);
32 |
33 | begins = lower + (upper - lower) * (0:(k-1)) / k;
34 | ends = lower + (upper - lower) * (1:k) / k;
35 |
36 | begins_ends = [ begins' ends' ];
37 |
38 | t = konect_timer(k-1);
39 |
40 | %
41 | % Count eigenvalues
42 | % cumul(i) = number of eigenvalues in bin up to I
43 | %
44 | for i = 1 : (k-1)
45 |
46 | i
47 |
48 | t = konect_timer_tick(t, i);
49 |
50 | threshold = ends(i)
51 | A_shifted = A - threshold * speye(n);
52 | % ddd = eig(A_shifted)
53 | [l,u,p] = lu(A_shifted, 1, 'vector');
54 | % l_diag = diag(l)
55 | % u_diag = diag(u)
56 | negative_count = full(sum(diag(u) < 0));
57 | cumul(i) = negative_count;
58 | fprintf(1, '%d (%g): %d\n', i, threshold, negative_count);
59 |
60 | if i > 1
61 | if cumul(i) < cumul(i-1)
62 | % lu() seems to return weird values which do not always correspond
63 | % to actual eigenvalue signs. Just round the values up.
64 |
65 | fprintf(2, 'Warning: ***negative values\n');
66 |
67 | cumul(i) = cumul(i-1);
68 | end
69 | end
70 | end
71 |
72 | konect_timer_end(t);
73 |
74 | counts = [cumul n] - [0 cumul];
75 |
76 | counts = counts';
77 | begins = begins';
78 | ends = ends';
79 |
--------------------------------------------------------------------------------
/m/konect_significance_plot_bw.m:
--------------------------------------------------------------------------------
1 | %
2 | % Same as konect_significance_plot() but in black-and-white.
3 | %
4 | % PARAMETERS
5 | % values (m*n) Matrix of values; there are n methods with m
6 | % values each
7 | % p_threshold
8 | % maxdiff
9 | % labels
10 | %
11 |
12 | function konect_significance_plot_bw(values, p_threshold, maxdiff, labels)
13 |
14 | font_size = 13;
15 | displacement_x = 0.3; % Of Y axis labels (sic) left of plot
16 | displacement_y = 0.3; % Of X axis labels (sic) below the plot
17 | rotation_y = 27; % Rotation of X axis labels
18 | len = 0.9; % Length of bars relative to cell width
19 | line_width = 8;
20 |
21 | [m n] = size(values);
22 |
23 | % Pairwise p-values
24 | P = zeros(m, m);
25 |
26 | % Pairwise differences
27 | D = zeros(m, m);
28 |
29 | for i = 1 : m
30 | for j = 1 : m
31 | [h p] = ttest(values(i, :), values(j, :));
32 | P(i, j) = p;
33 | D(i, j) = mean(values(i,:) - values(j,:));
34 | end
35 | end
36 |
37 | hold on;
38 | axis([0 m 0 m]);
39 | axis square;
40 |
41 | for i = 1 : m
42 | for jj = 1 : m
43 | j = m + 1 - jj;
44 | p = P(i,jj);
45 | d = D(i,jj);
46 | if p >= p_threshold, continue; end;
47 |
48 | theta = (atan(d / maxdiff * pi / 2) + pi / 2) / 2;
49 |
50 | line(...
51 | [ (i - 1/2 + (1/2) * len * cos(theta)) ...
52 | ( i - 1/2 - (1/2) * len * cos(theta))], ...
53 | [ (j - 1/2 - (1/2) * len * sin(theta)) ...
54 | ( j - 1/2 + (1/2) * len * sin(theta))], ...
55 | 'Color', [1 1 1] * min(1, p / p_threshold), ...
56 | 'LineWidth', line_width);
57 | end
58 | end
59 |
60 | %I = konect_significance_image(P, D, p_threshold, maxdiff);
61 |
62 | %image(I);
63 |
64 | % Labels
65 | for i = 1 : m
66 | text(-displacement_x, m + 1 - i - 1/2 , labels(i), 'HorizontalAlignment', 'Right', 'VerticalAlignment', 'Middle', 'FontSize', font_size);
67 | text(i-1/2, -displacement_y, labels(i), 'HorizontalAlignment', 'Right', 'VerticalAlignment', 'Middle', 'FontSize', font_size, 'Rotation', rotation_y);
68 | end
69 |
70 | set(gca, 'XTick', []);
71 | set(gca, 'YTick', []);
72 |
73 | axis square;
74 |
--------------------------------------------------------------------------------
/m/konect_statistic_twostars.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the number of 2-stars in a graphs, i.e., the number of
3 | % pairs of incident edges, or the number of 2-paths.
4 | %
5 | % Multiple edges and loops are ignored.
6 | %
7 | % For directed graphs, the following patterns are recognized:
8 | %
9 | % * <-- * --> * Outgoing 2-star
10 | %
11 | % * --> * <-- * Ingoing 2-star
12 | %
13 | % * --> * --> * Mixed 2-star
14 | %
15 | % For bipartite graphs a left 2-star has a single left node with
16 | % degree two. Right 2-stars are defined analogously.
17 | %
18 | % PARAMETERS
19 | % A Adjacency/biadjacency matrix
20 | % format
21 | % weights
22 | % opts (optional)
23 | %
24 | % RESULT
25 | % values Column vector of results
26 | % [1] Number of 2-stars
27 | % [2] Number of left 2-stars (BIP only)
28 | % [2] Number of outgoing 2-stars (ASYM only)
29 | % [3] Number of right 2-stars (BIP only)
30 | % [3] Number of ingoing 2-stars (ASYM only)
31 | % [4] Number of mixed 2-stars (ASYM only)
32 | %
33 | %
34 |
35 | function values = konect_statistic_twostars(A, format, weights, opts)
36 |
37 | consts = konect_consts();
38 |
39 | % Ignore multiple edges
40 | A = (A ~= 0);
41 |
42 | % Ignore loops
43 | if format == consts.SYM || format == consts.ASYM
44 | n = size(A, 1);
45 | A = A - spdiags(diag(A), [0], n, n);
46 | end
47 |
48 | % Build degree vector
49 |
50 | d_1 = sum(A,2);
51 | d_2 = sum(A,1)';
52 |
53 | if format == consts.BIP
54 |
55 | %% d = [ sum(A,2) ; sum(A, 1)' ];
56 | %% values(1) = 0.5 * (d' * (d-1));
57 |
58 | values(2) = 0.5 * sum(d_1 .* (d_1 - 1))
59 | values(3) = 0.5 * sum(d_2 .* (d_2 - 1))
60 | values(1) = values(2) + values(3)
61 |
62 | elseif format == consts.SYM
63 |
64 | d = d_1 + d_2;
65 |
66 | values(1) = 0.5 * sum(d .* (d - 1))
67 |
68 | elseif format == consts.ASYM
69 |
70 | %% d = sum(A, 2) + sum(A, 1)';
71 | %% values(1) = 0.5 * (d' * (d-1));
72 |
73 | %% d = d_1 + d_2;
74 |
75 | %% values(1) = 0.5 * sum(d .* (d - 1));
76 |
77 | values(2) = 0.5 * sum(d_1 .* (d_1 - 1))
78 | values(3) = 0.5 * sum(d_2 .* (d_2 - 1))
79 | values(4) = sum(d_1 .* d_2)
80 | values(1) = values(2) + values(3) + values(4)
81 |
82 | else
83 | error('*** Invalid format');
84 | end
85 |
--------------------------------------------------------------------------------
/m/konect_spectral_distribution.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the spectral distribution of a sparse matrix, i.e., the
3 | % distribution of the eigenvalues or singular values.
4 | %
5 | % Splits the spectrum of the matrix A into K bins.
6 | %
7 | % PARAMETERS
8 | % A Sparse adjacency or biadjacency matrix
9 | % decomposition The decomposition to perform. Only 'sym',
10 | % 'sym-n' and 'lap' are supported
11 | % format
12 | % k Number of bins
13 | %
14 | % RESULT
15 | % counts (k*1) The number of eigenvalues in each bin
16 | % begins (k*1) The starts of each bin
17 | % ends (k*1) The ends of each bin
18 | %
19 |
20 | function [counts, begins, ends] = konect_spectral_distribution(A, decomposition, format, k, varargin)
21 |
22 | consts = konect_consts();
23 |
24 | bounds = [];
25 |
26 | set_format();
27 | opts.disp = 2;
28 | opts.issym = 1;
29 |
30 | %
31 | % Transform A to square and symmetric
32 | %
33 | 'transform to square and symmetric matrix'
34 | if strcmp(decomposition, 'sym')
35 |
36 | A = konect_matrix('symfull', A, format);
37 |
38 | elseif strcmp(decomposition, 'sym-n')
39 |
40 | A = konect_matrix('sym-nfull', A, format);
41 |
42 | bounds = [ -1, +1 ];
43 |
44 | elseif strcmp(decomposition, 'lap')
45 | A = konect_matrix(decomposition, A, format);
46 | bounds = [0];
47 | else
48 | error('*** Invalid decomposition');
49 | end
50 |
51 | size_A = size(A)
52 |
53 | n = size(A,1)
54 |
55 | %
56 | % Default bounds: +/- spectral norm
57 | %
58 | if length(bounds) == 0
59 | 'calling eigs() in konect_spectral_distribution()'
60 | d = eigs(@(x)(A * x), n, 1, 'lm', opts);
61 | upper = abs(diag(d))
62 | lower = - upper
63 | elseif length(bounds) == 1
64 | 'calling eigs() for only the maximum eigenvalue'
65 | d = eigs(@(x)(A * x), n, 1, 'la', opts);
66 | lower = bounds(1)
67 | upper = d
68 | else
69 | 'fixed bounds'
70 | lower = bounds(1)
71 | upper = bounds(2)
72 | end
73 |
74 | data_decomposition = konect_data_decomposition(decomposition);
75 |
76 | if data_decomposition.posdef
77 | lower = 0;
78 | end
79 |
80 | [counts begins ends] = konect_spectral_distribution_plain2(A, lower, upper, k);
81 |
82 | end
83 |
84 | function set_format()
85 |
86 | format long;
87 |
88 | end
89 |
--------------------------------------------------------------------------------
/m/konect_power_law_range.m:
--------------------------------------------------------------------------------
1 | %
2 | % Fit a power law using the method from Aaron Clauset [1], using the
3 | % implementation from [2]. This method is very slow, but gives
4 | % correct results when the distribution is a power law only beginning
5 | % at a certain degree.
6 | %
7 | % This function has the same API as konect_power_law_flat().
8 | %
9 | % PARAMETERS
10 | % A Adjacency matrix or weight vector
11 | % weights (optional) Weight types; pass POSITIVE or UNWEIGHTED for
12 | % weight vectors; defaults to WEIGHTED
13 | % enable_p (optional) Enable computation of p-values
14 | % (VERY slow, disabled by default)
15 | %
16 | % RESULT
17 | % values Column vector of values as returned by the pl*
18 | % functions from Aaron Clauset.
19 | % (1) gamma The exponent (positive)
20 | % (2) xmin The minimal degree
21 | % (3) L Log-likelihood of the data x >= xmin
22 | % under the fitted power law
23 | % (4) p p-value, i.e. small denotes better fit
24 | % (5) gof Goodness-of-fit value
25 | %
26 | % REFERENCES
27 | %
28 | % [1] Power-law distributions in empirical data, Aaron Clauset, Cosma
29 | % Rohilla Shalizi, M. E. J. Newman.
30 | %
31 | % [2] http://tuvalu.santafe.edu/~aaronc/powerlaws/
32 | % Visited on 2014-10-16
33 | %
34 |
35 | function [values] = konect_power_law_range(A, weights, enable_p)
36 |
37 | consts = konect_consts();
38 |
39 | if nargin < 2
40 | weights = consts.WEIGHTED;
41 | end
42 |
43 | if ~exist('enable_p', 'var')
44 | enable_p = 0;
45 | end
46 |
47 | if weights == consts.SIGNED | weights == consts.WEIGHTED | ...
48 | weights == consts.MULTIWEIGHTED
49 | A = A ~= 0;
50 | end
51 |
52 | degrees = sum(A,2);
53 |
54 | degrees = degrees(degrees ~= 0);
55 |
56 | if length(unique(degrees)) < 2
57 | fprintf(1, ...
58 | 'konect_power_law_range.m: length(unique(degrees)) = %u\n', length(unique(degrees)));
59 | values = [ NaN; NaN; NaN ];
60 | if enable_p
61 | value = [ values ; NaN ; NaN ];
62 | end
63 | return;
64 | end
65 |
66 | range = [ 1.001 : 0.01 : 9 ];
67 |
68 | [gamma, xmin, L] = plfit(degrees, 'range', range)
69 | xmin = full(xmin)
70 |
71 | values = [gamma xmin L]';
72 |
73 | if enable_p
74 | xmin
75 | %% degrees = full(degrees);
76 | [p, gof] = plpva(degrees, xmin, 'range', range)
77 | values = [ values ; p ; gof ];
78 | end
79 |
--------------------------------------------------------------------------------
/m/konect_statistic_cluscoall.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the clustering coefficients, along with some extra
3 | % variants.
4 | %
5 | % PARAMETERS
6 | % A Adjacency matrix
7 | % format
8 | % weights
9 | %
10 | % RESULT
11 | % values Column vector of results
12 | % [1] clustering coefficient
13 | % [2] directed clustering coefficient (ASYM)
14 | % [3] signed clustering coefficient (SIGNED or WEIGHTED)
15 | % [4] signed directed clustering coefficient ({SIGNED or WEIGHTED) and ASYM)
16 | % [5] relative signed clustering coefficient (SIGNED or WEIGHTED)
17 | % [6] relative signed directed clustering coefficient ((SIGNED or WEIGHTED) and ASYM)
18 | % [7-12] same with [clusco2, i.e., mean of local
19 | % clustering coefficients]
20 | %
21 | % GROUP: square
22 | % GROUP+2: asym
23 | % GROUP+3: squarenegative
24 | % GROUP+4: squarenegative
25 | % GROUP+5: squarenegative
26 | % GROUP+6: squarenegative
27 | % GROUP+9: squarenegative
28 | % GROUP+10: squarenegative
29 | % GROUP+11: squarenegative
30 | % GROUP+12: squarenegative
31 | %
32 |
33 | function values = konect_statistic_cluscoall(A, format, weights)
34 |
35 | consts = konect_consts();
36 |
37 | if format == consts.BIP
38 | error '*** Clustering coefficient is trivially zero for bipartite networks';
39 | end
40 |
41 | % Round all values to -1/0/+1
42 | A = konect_signx(A);
43 |
44 | A_abs = A ~= 0;
45 |
46 | [x c c2] = konect_clusco(A_abs | A_abs');
47 | values(1) = c;
48 | values(7) = c2;
49 |
50 | if weights == consts.SIGNED | weights == consts.WEIGHTED | weights == consts.MULTIWEIGHTED | weights == consts.MULTISIGNED
51 | % Note: we must use "+" instead "|" in order to preserves to -1
52 | % entries in the matrix.
53 | [x c c2] = konect_clusco(konect_signx(A + A'));
54 | values(3) = c;
55 | values(9) = c2;
56 | values(5) = values(3) / values(1);
57 | values(11) = values(9) / values(7);
58 | end
59 |
60 | if format == consts.ASYM
61 | [x c c2] = konect_clusco(A_abs);
62 | values(2) = c;
63 | values(8) = c2;
64 |
65 | if weights == consts.SIGNED | weights == consts.WEIGHTED | weights == consts.MULTIWEIGHTED
66 | [x c c2] = konect_clusco(A);
67 | values(4) = c;
68 | values(10) = c2;
69 | values(6) = values(4) / values(2);
70 | values(12) = values(10) / values(8);
71 | end
72 | end
73 |
74 | values = values';
75 |
--------------------------------------------------------------------------------
/m/konect_data_tag.m:
--------------------------------------------------------------------------------
1 | %
2 | % Return information about the tags in KONECT.
3 | %
4 | % RETURN VALUES
5 | % tag_list List of tags, in preferred KONECT order
6 | % tag_text Textual description for each tag
7 | %
8 |
9 | function [tag_list tag_text tag_name] = konect_data_tag()
10 |
11 | % This order is how the tags are shown to the user. It goes roughly
12 | % from "simple" to "complex".
13 | tag_list = { ...
14 | 'skew', 'path', ...
15 | 'nonreciprocal', 'acyclic', 'loop', 'clique', 'tournament', ...
16 | 'trianglefree', 'zeroweight', 'incomplete', 'join', ...
17 | 'missingorientation', 'missingmultiplicity', ...
18 | 'kcore', 'lcc', ...
19 | };
20 |
21 |
22 | tag_text = struct();
23 |
24 | tag_text.acyclic = 'Does not contain directed cycles';
25 | tag_text.clique = 'Edges exist between all possible nodes';
26 | tag_text.incomplete = 'Is a snapshot and likely to not contain all data';
27 | tag_text.join = 'Is the join of an underlying network';
28 | tag_text.kcore = 'Only nodes with degree larger than a given threshold are included';
29 | tag_text.lcc = 'Only the largest connected component of the original data is included';
30 | tag_text.loop = 'Contains loops';
31 | tag_text.missingorientation = 'Is not directed, but the underlying data is';
32 | tag_text.missingmultiplicity = 'Does not have multiple edges, but the underlying data has';
33 | tag_text.nonreciprocal = 'Does not contain reciprocal edges';
34 | tag_text.path = 'The edges form paths';
35 | tag_text.skew = 'Inverted edges can be interpreted as negated edges';
36 | tag_text.tournament = 'All pairs of nodes are connected by a directed edge';
37 | tag_text.trianglefree = 'Does not contain triangles';
38 | tag_text.zeroweight = 'Edges may have weight zero';
39 |
40 | tag_name = struct();
41 |
42 | tag_name.acyclic = 'Directed cycles';
43 | tag_name.clique = 'Complete';
44 | tag_name.incomplete = 'Snapshot';
45 | tag_name.join = 'Join';
46 | tag_name.kcore = 'k-Core';
47 | tag_name.lcc = 'Connectedness';
48 | tag_name.loop = 'Loops';
49 | tag_name.missingorientation = 'Orientation';
50 | tag_name.missingmultiplicity = 'Multiplicity';
51 | tag_name.nonreciprocal = 'Reciprocal';
52 | tag_name.path = 'Paths';
53 | tag_name.skew = 'Skew-symmetry';
54 | tag_name.tournament = 'Tournament';
55 | tag_name.trianglefree = 'Triangles';
56 | tag_name.zeroweight = 'Zero weights';
57 |
--------------------------------------------------------------------------------
/m/konect_statistic_conflict.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the algebraic conflict [conflict]. The conflict is
3 | % computed for the largest connected component. Multiple edges as
4 | % well as loops are ignored.
5 | %
6 | % PARAMETERS
7 | % A Adjacency or biadjacency matrix
8 | % format
9 | % weights
10 | %
11 | % RESULT
12 | % values Results as column vector
13 | % [1] The smallest eigenvalue of the signed Laplacian \lambda, based on largest connected component
14 | % [2] The relative relaxed frustration \xi = \lambda n / 8 m
15 | %
16 | % GROUP: negative
17 | %
18 |
19 | function values = konect_statistic_conflict(A, format, weights)
20 |
21 | consts = konect_consts();
22 | [negative, interval_scale, multi] = konect_data_weights()
23 |
24 | tol = 1e-6
25 |
26 | opts.disp = 2;
27 |
28 | if 1 ~= negative(weights)
29 | %% if weights == consts.UNWEIGHTED | weights == consts.POSITIVE | ...
30 | %% weights == consts.POSWEIGHTED
31 | % Would be zero. We don't allow that.
32 | assert(0);
33 | exit(1);
34 | error('***');
35 | end
36 |
37 | % Remove multiple edges
38 | A = (A > 0) - (A < 0);
39 |
40 | % Build the Laplacian matrix L
41 | if format ~= consts.BIP
42 |
43 | [n1 n2] = size(A);
44 | assert(n1 == n2);
45 |
46 | % Remove loops
47 | A = A - spdiags(diag(A), [0], n1, n1);
48 |
49 | [A cc n] = konect_connect_matrix_square(A);
50 | % M is computed as an undirected graph
51 | L = konect_matrix('lap', A, format, weights, opts);
52 | m = (nnz(L) - n) / 2;
53 |
54 | else % BIP
55 |
56 | [A cc1 cc2 n] = konect_connect_matrix_bipartite(A);
57 | L = konect_matrix('lap', A, format, weights, opts);
58 | m = nnz(A);
59 |
60 | end
61 |
62 | opts.tol = tol
63 |
64 | %%lambda_n = normest(L)
65 | lambda_n = eigs(L, 1, 'lm', opts)
66 | %%if lambda_n == 0
67 | %% dd = load_eig()
68 | %% lambda_n= dd(1)
69 | %%end
70 |
71 | L_m = lambda_n * speye(size(L,1)) - L;
72 | opts.tol = tol / lambda_n
73 | %%lambda_n_minus_1 = normest(L_m, tol_2)
74 | lambda_n_minus_1 = eigs(L_m, 1, 'lm', opts)
75 | %%if lambda_n_minus_1 == 0
76 | %% dd = load_eig()
77 | %% lambda_n_minus_1= dd(1)
78 | %%end
79 |
80 | conflict = lambda_n - lambda_n_minus_1
81 |
82 | if conflict < 0
83 | % This is an error
84 | error('*** [conflict] must not be negative');
85 | end
86 |
87 | values = [ ...
88 | conflict, ...
89 | (conflict * n / 8 / m)
90 | ]';
91 |
--------------------------------------------------------------------------------
/m/konect_statistic_avgdegree.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the average degree [avgdegree].
3 | %
4 | % The main average degree (lines [1] to [3]) does take into account
5 | % multiple edges. Only the "unique" variants do not take into
6 | % account multiple edges.
7 | %
8 | % For directed networks, the returned value is the total (out+in)
9 | % degree. The average outdegree and average indegree are both the
10 | % half of this.
11 | %
12 | % PARAMETERS
13 | % A Adjacency matrix
14 | % format
15 | % weights
16 | %
17 | % RESULT
18 | % values Column vector of results
19 | % [1] Global average degree
20 | % [2] Average left degree (BIP)
21 | % [3] Average right degree (BIP)
22 | % [4] Global unique average degree
23 | % [5] Average unique left degree (BIP)
24 | % [6] Average unique right degree (BIP)
25 | %
26 | % GROUP+2: bip
27 | % GROUP+3: bip
28 | % GROUP+5: bip
29 | % GROUP+6: bip
30 | %
31 |
32 | function values = konect_statistic_avgdegree(A, format, weights)
33 |
34 | consts = konect_consts();
35 |
36 | % Reduce A to be the unweighted adjacency matrix, potentially
37 | % including multiple edges.
38 | if weights == consts.UNWEIGHTED
39 | % noop
40 | elseif weights == consts.POSITIVE
41 | % noop
42 | elseif weights == consts.POSWEIGHTED
43 | A = (A ~= 0);
44 | elseif weights == consts.SIGNED
45 | A = (A ~= 0);
46 | elseif weights == consts.WEIGHTED
47 | A = (A ~= 0);
48 | elseif weights == consts.MULTIWEIGHTED
49 | % With only the adjacency matrix available, we cannot recover
50 | % edge multiplicities, and thus ignore multiple edges.
51 | A = (A ~= 0);
52 | elseif weights == consts.DYNAMIC
53 | % noop
54 | end
55 |
56 | % Total number of edges
57 | if weights == consts.POSITIVE
58 | m = sum(sum(A));
59 | m_unique = nnz(A);
60 | else
61 | m = nnz(A);
62 | m_unique = m;
63 | end
64 |
65 | if format == consts.BIP
66 |
67 | [n_1 n_2] = size(A);
68 |
69 | values = [ 2 * m / (n_1 + n_2) ; ...
70 | m / n_1 ; ...
71 | m / n_2 ; ...
72 | 2 * m_unique / (n_1 + n_2) ; ...
73 | m_unique / n_1 ; ...
74 | m_unique / n_2 ];
75 |
76 | elseif format == consts.SYM || format == consts.ASYM
77 |
78 | n = size(A, 1);
79 |
80 | values = [ 2 * m / n ; ...
81 | 0 ; 0 ;
82 | 2 * m_unique / n ; ...
83 | 0 ; 0 ];
84 |
85 | else
86 | error('*** Invalid format');
87 | end
88 |
89 |
--------------------------------------------------------------------------------
/m/konect_print.m:
--------------------------------------------------------------------------------
1 | %
2 | % Save a plot to a file. This is used to print all KONECT plots.
3 | %
4 | % PARAMETERS
5 | %
6 | % filename The EPS filename
7 | %
8 | % STYLE
9 | %
10 | % Plots in KONECT follow the following style recommendations:
11 | %
12 | % * Don't include a title (titles are added in papers using Latex)
13 | % * Grid lines are included for the Y axis when the X axis is
14 | % discrete (or the other way around); otherwise they are omitted.
15 | % * The plots should be viewable at small size, for papers and the
16 | % previews on the KONECT website. As a rule, a user will see an image
17 | % with a width of about 5cm in both cases. Remember that the font in
18 | % plots should be of comparable size to the font in papers or in the
19 | % browser.
20 | %
21 | % SIZE
22 | %
23 | % For Matlab:
24 | % font_size = 22; % 18 when the labels contain subscripts
25 | % line_width = 3;
26 | %
27 | % For Octave:
28 | % line_width = 14;
29 | %
30 | % COLORS
31 | %
32 | % Color values used in KONECT plots are defined in
33 | % konect_colors_letter.m.
34 | %
35 | % The following colors are used in KONECT plots:
36 | %
37 | % blue: general color for plot lines and points
38 | % spectrum: orange/brown
39 | % red: runtime
40 | % positive/negative values: green/red, respectively
41 | % left/right distributions in bipartite graphs: red/green
42 | % (consistent with navigation lights)
43 | % outlinks/inlinks: red/green (consistent with left/right
44 | % distributions in bipartite networks)
45 | %
46 | % OTHER SETTINGS
47 | %
48 | % set(gca, 'XMinorTick', 'on');
49 | % set(gca, 'YMinorTick', 'on');
50 | % set(gca, 'TickLength', [0.05 0.05]);
51 | %
52 | % NOTES
53 | %
54 | % With Octave under Ubuntu, it seems that package "epstool" must be
55 | % installed.
56 | %
57 |
58 | function konect_print(filename)
59 |
60 | fprintf(1, 'konect_print(%s)\n', filename);
61 |
62 | % In Octave, use some better fonts
63 | if konect_usingoctave()
64 | FN = findall(0,'-property','FontName');
65 | set(FN,'FontName','/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans.ttf');
66 | FS = findall(0,'-property','FontSize');
67 | set(FS,'FontSize', 15);
68 | end
69 |
70 | try
71 | % "epsc" stands for "EPS color". The "-d" options sets the device.
72 | print(filename, '-depsc');
73 |
74 | fprintf(1, '\tdone printing %s\n', filename);
75 |
76 | catch err
77 |
78 | % Print the error
79 | err
80 |
81 | % Delete the eventually existing partially generated file
82 | delete(filename);
83 |
84 | error(sprintf('Error while printing %s', filename));
85 |
86 | end
87 |
88 | close all;
89 |
--------------------------------------------------------------------------------
/m/konect_statistic_squares.m:
--------------------------------------------------------------------------------
1 | %
2 | % The number of squares in a graph.
3 | %
4 | % The computed number of squares is independent of the orientation
5 | % of edges. The multiplicity of edges is ignored. Loops in the
6 | % graph are ignored.
7 | %
8 | % PARAMETERS
9 | % A Adjacency matrix
10 | % format
11 | % weights
12 | % opts (optional)
13 | %
14 | % RESULT
15 | % values Columns vector of results
16 | % [1] Number of squares
17 | %
18 |
19 | function values = konect_statistic_squares(A, format, weights, opts)
20 |
21 | %
22 | % Method: count all squares including overlapping edges, and
23 | % remove from it the number of two-stars and edges. (See exact
24 | % formula below.)
25 | %
26 |
27 | % Size in double variables of the largest number of doubles that is to
28 | % be used as temporary memory. Used in the calculation of the default
29 | % value of SIZE_CHUNK.
30 | size_resident = 1e7;
31 |
32 | consts = konect_consts();
33 |
34 | if format == consts.BIP
35 | [n1 n2] = size(A);
36 | A = [sparse(n1,n1), A; A', sparse(n2,n2)];
37 | end
38 |
39 | n = size(A, 1);
40 |
41 | A = konect_absx(A);
42 | A = A | A';
43 |
44 | % Necessary to make matrix multiplication work, because matrix
45 | % multiplication does not work with logical matrices.
46 | A = double(A);
47 |
48 | % Set diagonal elements to zero.
49 | A = A - spdiags(diag(A), [0], n, n);
50 |
51 | % Count all squares, including the twostars and edges. This is the
52 | % total count of squares including squares including those where
53 | % multiple nodes overlap (being in fact edges and two-stars), and
54 | % counting each orientation separately.
55 | count_total = 0;
56 |
57 | size_chunk = floor(size_resident / n);
58 | if size_chunk < 1, size_chunk = 1; end;
59 |
60 | [k from to] = konect_fromto(1, n, size_chunk);
61 |
62 | t = konect_timer(n);
63 |
64 | for j = 1 : k
65 |
66 | t = konect_timer_tick(t, to(j));
67 |
68 | count_j = sum(sum(A(:,from(j):to(j)) .* (A * (A * A(:,from(j):to(j)))), 1), 2);
69 |
70 | count_total = count_total + count_j;
71 |
72 | end
73 |
74 | konect_timer_end(t);
75 |
76 | %
77 | % How to get the actual number
78 | %
79 | % C: total count of squares with overlap (count_total)
80 | % q: number of squares
81 | % s: number of two-stars
82 | % m: number of edges
83 | %
84 | % C = 8 q + 2 m + 4 s
85 | %
86 | % q = (C - 2 m - 4 s) / 8
87 | %
88 |
89 | d = sum(A,2);
90 |
91 | count_twostars_double = d' * (d-1);
92 |
93 | count_edges_double = nnz(A);
94 |
95 | values = (count_total - count_edges_double - 2 * count_twostars_double) / 8;
96 |
97 | assert(values >= 0);
98 | assert(values == floor(values));
99 |
--------------------------------------------------------------------------------
/m/konect_hopdistr_ex.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the hop distribution and all nodes' eccentricities. Due to
3 | % the computation of eccentricities, this function is slower than
4 | % konect_hopdistr(). In all other regards, the two functions do the same,
5 | % except that this function returns the additional value
6 | % "eccentricities". Another difference is the argument a for undirected
7 | % networks, which is not symmetric in this function.
8 | %
9 | % For undirected and bipartite graphs, this is the normal undirected hop
10 | % distribution. For directed graphs, edge directions are ignored.
11 | %
12 | % RESULT
13 | % d (1*diam) Number of distances by distance (zero is excluded); the
14 | % length of this vector is the graph's diameter.
15 | % eccentricities Node vector of eccentricity values
16 | %
17 | % PARAMETERS
18 | % A Adjacency or biadjacency matrix (FOR UNDIRECTED
19 | % NETWORKS, this need not be symmetric).
20 | % format (optional) Format using the constants in
21 | % konect_consts.m; ASYM when not given
22 | % size_chunk (optional) Size of chunks used; the choice of
23 | % this value only influences the runtime of the
24 | % function, not the result
25 |
26 | function [d eccentricities] = konect_hopdistr_ex(A, format, size_chunk)
27 |
28 | % Size in double variables of the largest number of doubles that is to
29 | % be used as temporary memory. Used in the calculation of the default
30 | % value of SIZE_CHUNK.
31 | size_resident = 1e7;
32 |
33 | consts = konect_consts();
34 |
35 | maxit = intmax;
36 |
37 | if ~exist('format', 'var')
38 | format = consts.ASYM;
39 | end
40 |
41 | if format == consts.BIP
42 | [m n] = size(A);
43 | A = [ sparse(m,m) A ; sparse(m,n) sparse(n,n) ];
44 | end
45 |
46 | n = size(A,1);
47 |
48 | if ~exist('size_chunk', 'var')
49 | size_chunk = floor(size_resident / n);
50 | if size_chunk < 1, size_chunk = 1; end;
51 | end
52 |
53 | % Add loops, i.e. diagonal elements
54 | A = double(((A ~= 0) + speye(n)) ~= 0);
55 |
56 | d = [];
57 |
58 | eccentricities = zeros(n, 1);
59 |
60 | t = konect_timer(n);
61 |
62 | for j = 1 : n
63 |
64 | t = konect_timer_tick(t, j);
65 |
66 | x = A(:, j);
67 |
68 | dd = [];
69 |
70 | for i = 1 : maxit
71 | dd(i) = nnz(x);
72 |
73 | x_old = x;
74 | x = A * x + A' * x;
75 | x = x ~= 0;
76 |
77 | if norm(x - x_old, 'fro') == 0, break; end;
78 | end
79 |
80 | % Eccentricities
81 | eccentricities(j) = length(dd) - 1;
82 |
83 | % Add sums
84 | if length(d) < length(dd)
85 | if length(d) > 0
86 | d((end+1) : length(dd)) = d(end);
87 | else
88 | d(length(dd)) = 0;
89 | end
90 | else
91 | dd((end+1) : length(d)) = dd(end);
92 | end
93 | d = d + dd;
94 | end
95 |
96 | konect_timer_end(t);
97 |
98 | eccentricities = 1 + eccentricities;
99 |
--------------------------------------------------------------------------------
/m/konect_decomposition_stoch1.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the 'stoch1' decomposition of a network, i.e., the
3 | % decomposition of the matrix D^-1 A, where A is the adjacency matrix
4 | % of the network and D the diagonal degree matrix. This corresponds
5 | % to the matrix used for computation of PageRank
6 | % ("Google matrix"), up to the teleportation term. The dominant left
7 | % eigenvector returned by this function is thus the PageRank
8 | % vector. Instead of the teleportation factor, this function
9 | % restricts the computation to the largest connected component of the
10 | % bipartite double cover.
11 | %
12 | % To get the PageRank without teleportation, call this function with
13 | % r = 1.
14 | %
15 | % PARAMETERS
16 | % A (m*n) Adjacency or biadjacency matrix
17 | % r Rank of the decomposition
18 | % format Format of network
19 | % weights Weights of network
20 | % opts (optional) Passed to eigs()
21 | %
22 | % RESULT
23 | % U (m*r) Left eigenvectors of the matrix D^-1 A
24 | % D (r*r) Eigenvalues of the matrix D^-1 A
25 | % V (n*r) Right eigenvectors of the matrix D^-1 A
26 | %
27 |
28 | function [U D V] = konect_decomposition_stoch1(A, r, format, weights, opts)
29 |
30 | if ~exist('opts', 'var'),
31 | opts = struct();
32 | end
33 |
34 | consts = konect_consts();
35 | [negative] = konect_data_weights();
36 |
37 | if format == consts.BIP
38 |
39 | [A cc1 cc2 n] = konect_connect_matrix_bipartite(A);
40 |
41 | [mm nn] = size(A);
42 |
43 | A = konect_matrix('bip', A);
44 | A = konect_matrix('stoch1', A, format, weights);
45 |
46 | r = min([r (size(A,1)-2)]);
47 |
48 | [uv D] = eigs(A, r, 'lr', opts);
49 |
50 | U = uv(1:mm, :);
51 | V = uv(mm+1:mm+nn, :);
52 |
53 | U = konect_connect_back(cc1, U);
54 | V = konect_connect_back(cc2, V);
55 |
56 | else % SQUARE
57 |
58 | if format == consts.SYM
59 | [A cc n] = konect_connect_matrix_square(A);
60 | A = A + A';
61 | else
62 | [A cc n] = konect_connect_matrix_strong(A);
63 | end
64 |
65 | if n <= 1
66 | U = zeros(n,r);
67 | V = zeros(n,r);
68 | D = zeros(r, r);
69 | else
70 | [P] = konect_matrix('stoch1', A, format, weights, opts);
71 | r = min(r, size(P,1)-2);
72 | [U D] = eigs(P, r, 'lr', opts);
73 |
74 | % Make the dominant left eigenvector be nonnegative
75 | if sum(U(:,1) < 0)
76 | U(:,1) = -U(:,1);
77 | end
78 |
79 | % Do V = pinv(U)' using the economic full SVD
80 | [uu dd vv] = svd(U, 'econ'); V = uu * pinv(dd) * vv';
81 | % D = D'; % Not a no-op because the diagonal is complex
82 |
83 | end
84 |
85 | U = konect_connect_back(cc, U);
86 | V = konect_connect_back(cc, V);
87 |
88 | end
89 |
90 | D = real(D);
91 | dd = diag(D);
92 | [dd ii] = sort(dd, 'descend');
93 | D = D(ii,ii);
94 | U = U(:,ii);
95 | V = V(:,ii);
96 |
97 |
--------------------------------------------------------------------------------
/m/konect_consts.m:
--------------------------------------------------------------------------------
1 | %
2 | % Numerical constants used in KONECT. The list of values and their
3 | % numerical values are prescribed by the KONECT handbook.
4 | %
5 | % RESULT
6 | % A struct containing all constants as fields, with all-uppercase
7 | % names; see below
8 | %
9 |
10 | function [consts symbols_format symbols_weights labels_format labels_weights int_format int_weights] = konect_consts()
11 |
12 | consts = {};
13 |
14 | %
15 | % Format
16 | %
17 | consts.SYM = 1; % Undirected network; adjacency matrix contains edge
18 | % edge only once, and A + A' is used implicitly
19 | consts.ASYM = 2; % Directed network,
20 | consts.BIP = 3; % Bipartite network; the biadjacency matrix is passed
21 |
22 | consts.FORMAT_COUNT = 3;
23 |
24 | %
25 | % Weights
26 | %
27 | consts.UNWEIGHTED = 1;
28 | consts.POSITIVE = 2;
29 | consts.POSWEIGHTED = 3;
30 | consts.SIGNED = 4;
31 | consts.MULTISIGNED = 5;
32 | consts.WEIGHTED = 6;
33 | consts.MULTIWEIGHTED = 7;
34 | consts.DYNAMIC = 8;
35 | consts.MULTIPOSWEIGHTED = 9;
36 |
37 | consts.WEIGHTS_COUNT = 9;
38 |
39 | %
40 | % Symbols
41 | %
42 |
43 | symbols_format{consts.SYM } = 'U';
44 | symbols_format{consts.ASYM} = 'D';
45 | symbols_format{consts.BIP } = 'B';
46 |
47 | symbols_weights{consts.UNWEIGHTED } = '$-$';
48 | symbols_weights{consts.POSITIVE } = '$=$';
49 | symbols_weights{consts.POSWEIGHTED } = '$+$';
50 | symbols_weights{consts.SIGNED } = '$\pm$';
51 | symbols_weights{consts.MULTISIGNED } = '$\stackrel{+}{=}$';
52 | symbols_weights{consts.WEIGHTED } = '$*$';
53 | symbols_weights{consts.MULTIWEIGHTED } = '$_*{}^*$';
54 | symbols_weights{consts.DYNAMIC } = '$\rightleftharpoons$';
55 | symbols_weights{consts.MULTIPOSWEIGHTED} = '$++$';
56 |
57 | %
58 | % Labels
59 | %
60 |
61 | labels_format{consts.SYM} = 'Unipartite, undirected';
62 | labels_format{consts.ASYM} = 'Unipartite, directed';
63 | labels_format{consts.BIP} = 'Bipartite, undirected';
64 |
65 | labels_weights{consts.UNWEIGHTED} = 'Unweighted, no multiple edges';
66 | labels_weights{consts.POSITIVE} = 'Unweighted, multiple edges';
67 | labels_weights{consts.POSWEIGHTED} = 'Positive weights, no multiple edges';
68 | labels_weights{consts.SIGNED} = 'Signed, possibly weighted, no multiple edges';
69 | labels_weights{consts.MULTISIGNED} = 'Signed, possibly weighted, multiple edges';
70 | labels_weights{consts.WEIGHTED} = 'Ratings, no multiple edges';
71 | labels_weights{consts.MULTIWEIGHTED} = 'Ratings, multiple edges';
72 | labels_weights{consts.DYNAMIC} = 'Dynamic';
73 | labels_weights{consts.MULTIPOSWEIGHTED} = 'Positive weights, multiple edges';
74 |
75 | %
76 | % Internal names
77 | %
78 |
79 | int_format{1} = 'sym';
80 | int_format{2} = 'asym';
81 | int_format{3} = 'bip';
82 |
83 | int_weights{1} = 'unweighted';
84 | int_weights{2} = 'positive';
85 | int_weights{3} = 'posweighted';
86 | int_weights{4} = 'signed';
87 | int_weights{5} = 'multisigned';
88 | int_weights{6} = 'weighted';
89 | int_weights{7} = 'multiweighted';
90 | int_weights{8} = 'dynamic';
91 | int_weights{9} = 'multiposweighted';
92 |
--------------------------------------------------------------------------------
/m/konect_clusco_approx.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the approximate overall clustering coefficient of a
3 | % network. The network must not be bipartite. Edge weights are
4 | % ignored, except for their sign. Effectively, this will return the
5 | % signed clustering coefficient. Pass (A ~= 0) in this case to get
6 | % the ordinary clustering coefficient.
7 | %
8 | % PARAMETERS
9 | % A Adjacency matrix; must be square; only the sign of
10 | % weights is used, not the magnitude.
11 | % format Network format
12 | % epsilon (optional) Precision
13 | %
14 | % RESULT
15 | % clusco Overall clustering coefficient
16 | %
17 |
18 | function clusco = konect_clusco_approx(A, format, epsilon)
19 |
20 | consts = konect_consts();
21 |
22 | batch_size = 29;
23 |
24 | sum_pairs = 0;
25 | sum_count = 0;
26 |
27 | if ~exist('epsilon')
28 | epsilon = 0.0006;
29 | end
30 |
31 | if format == consts.SYM
32 | A = A + A';
33 | end
34 |
35 | % Make diagonal zero
36 | A = A - diag(diag(A));
37 |
38 | if sum(abs(diag(A))) > 0
39 | error('*** A must be diagonal-free');
40 | end
41 |
42 | A = sign(A);
43 |
44 | % Remove zero lines and columns
45 | a_abs = abs(A);
46 | degree_out = sum(a_abs,2);
47 | degree_in = sum(a_abs,1)';
48 | ok = (degree_out > 0) & (degree_in > 0);
49 | A = A(ok, ok);
50 |
51 | n = size(A, 1)
52 |
53 | iteration_count_max = n
54 | iteration_count_min = floor(0.0012 * n);
55 |
56 | fprintf(1, 'Clusco(%d)...\n', n);
57 |
58 | clusco_last = NaN;
59 |
60 | perm = randperm(n);
61 |
62 | for k = 1 : iteration_count_max
63 |
64 | i = perm(k);
65 |
66 | ao = A(i, :)'; % Outlink vector
67 | ai = A(:, i); % Inlink vector
68 |
69 | % Positive/negative out/in-neighbors
70 | nebs_op = find(ao > 0);
71 | nebs_on = find(ao < 0);
72 | nebs_ip = find(ai > 0);
73 | nebs_in = find(ai < 0);
74 |
75 | a_pp = A(nebs_ip, nebs_op);
76 | a_pn = A(nebs_ip, nebs_on);
77 | a_np = A(nebs_in, nebs_op);
78 | a_nn = A(nebs_in, nebs_on);
79 |
80 | node_count = full(sum(sum(a_pp)) - sum(sum(a_pn)) - sum(sum(a_np)) + sum(sum(a_nn)));
81 | node_pairs = (size(nebs_ip, 1) + size(nebs_in, 1)) * ...
82 | (size(nebs_op, 1) + size(nebs_on, 1)) - sum((ai ~= 0) & (ao ~= 0));
83 |
84 | if node_count > node_pairs
85 | error 'Invalid counts';
86 | end
87 |
88 | sum_count = sum_count + node_count;
89 | sum_pairs = sum_pairs + node_pairs;
90 |
91 | if mod(k, batch_size) == 0
92 |
93 | clusco = sum_count / sum_pairs;
94 |
95 | fprintf(1, ' clusco(%d) = %g\n', k, clusco);
96 |
97 | if k > iteration_count_min
98 | diff = abs(clusco - clusco_last);
99 | fprintf(1, ' clusco(%d) = %g [%g]\n', k, clusco, diff);
100 | if diff < epsilon
101 | fprintf(1, 'Clusco = %g\n', clusco);
102 | return;
103 | end
104 | end
105 |
106 | clusco_last = clusco;
107 | end
108 |
109 | end;
110 |
111 | clusco = sum_count / sum_pairs;
112 |
113 |
--------------------------------------------------------------------------------
/m/konect_clusco_simple.m:
--------------------------------------------------------------------------------
1 | %
2 | % Same as konect_clusco() but, do not support negative edges and
3 | % directed graphs, and therefore be faster. The difference when
4 | % calling it is that A can be the half-adjacency matrix in this
5 | % function.
6 | %
7 | % PARAMETERS
8 | % A (n*n) Half-adjacency matrix; must be a square 0/1
9 | % matrix
10 | %
11 | % RESULT
12 | % c_local (n*1) Node vector giving the local clustering
13 | % coefficient of each node; the value is zero
14 | % when the degree of a node is zero or one
15 | % c_global_1 Global clustering coefficient (varant 1),
16 | % defined as the probability that two incident
17 | % edges are completed by a third edge to form a
18 | % triangle; this is the default clustering
19 | % coefficient in KONECT
20 | % c_global_2 Global clustering coefficient (variant 2),
21 | % defined as the average local clustering
22 | % coefficient; usually not used in KONECT
23 | %
24 |
25 | function [c_local, c_global_1, c_global_2] = konect_clusco_simple(A)
26 |
27 | assert(size(A, 1) == size(A, 2));
28 |
29 | n = size(A, 1);
30 |
31 | % Check that the matrix is a -1/0/+1 matrix
32 | [x y z] = find(A);
33 | if sum(z ~= 1) > 0
34 | error('*** A must be a 0/+1 matrix');
35 | end
36 |
37 | % Remove diagonal elements
38 | z(x == y) = 0;
39 | A = sparse(x, y, z, n, n);
40 | A = A + A';
41 | A = (A ~= 0);
42 | % A is now the full adjacency matrix
43 |
44 | sum_pairs = 0;
45 | sum_count = 0;
46 |
47 | c_local = zeros(n, 1);
48 |
49 | t = konect_timer(n);
50 |
51 | for u = 1 : n
52 |
53 | t = konect_timer_tick(t, u);
54 |
55 | % Vectors of neighbors
56 | a = A(:, u);
57 |
58 | % Indexes of neighbors
59 | nebs = find(a > 0);
60 |
61 | % Submatrices of relationships between neighbors
62 | A_sub = A(nebs, nebs);
63 |
64 | % Number of edges among neighbors, double counted
65 | user_count = nnz(A_sub);
66 |
67 | % Number of possible neighbor pairs, double counted
68 | user_pairs = nnz(a) * (nnz(a) - 1);
69 | % user_pairs = full((length(nebs_ip) + length(nebs_in)) * (length(nebs_op) + length(nebs_on)) ...
70 | % - sum((ai ~= 0) & (ao ~= 0)));
71 |
72 | sum_count = sum_count + user_count;
73 | sum_pairs = sum_pairs + user_pairs;
74 |
75 | if user_pairs ~= 0
76 | c_u = user_count / user_pairs;
77 | c_local(u) = c_u;
78 | if abs(c_u) > 1
79 | A_pp
80 | A_pn
81 | A_np
82 | A_nn
83 | nebs_op
84 | nebs_on
85 | nebs_ip
86 | nebs_in
87 | error(sprintf('*** c_u = %f, u = %d, user_count = %d, user_pairs = %d, [%d %d %d %d]', ...
88 | c_u, u, user_count, user_pairs, ...
89 | length(nebs_op), length(nebs_on), length(nebs_ip), length(nebs_in)));
90 | end
91 | end;
92 |
93 | end;
94 |
95 | konect_timer_end(t);
96 |
97 | c_global_1 = sum_count / sum_pairs;
98 | c_global_2 = mean(c_local);
99 |
100 | if abs(c_global_1) > 1
101 | error(sprintf('*** c_global_1 = %f', c_global_1));
102 | end
103 |
104 | if abs(c_global_2) > 1
105 | error(sprintf('*** c_global_2 = %f', c_global_2));
106 | end
107 |
--------------------------------------------------------------------------------
/m/@konect_timer/konect_timer.m:
--------------------------------------------------------------------------------
1 | %
2 | % Timer class. This is used to output the remaining time on the log
3 | % of a long computation. See konect_clusco.m for example usage.
4 | %
5 | % Note: it is important to assign the result of konect_timer_tick()
6 | % to the timer object.
7 | %
8 | % ABOUT
9 | % This file is part of the KONECT Matlab Toolbox version 0.3.
10 | % konect.cc
11 | % (c) Jerome Kunegis 2017; this is Free Software released under
12 | % the GPLv3, see COPYING.
13 | %
14 |
15 | classdef konect_timer
16 |
17 | properties
18 | time_begin
19 | time_last
20 | time_threshold
21 | n
22 | count_my;
23 | end
24 |
25 | methods
26 |
27 | % Create a timer with N iterations.
28 | function this = konect_timer(n)
29 | persistent count;
30 |
31 | if (n < 0)
32 | count = count - 1;
33 | return;
34 | end
35 |
36 | this.time_begin = clock;
37 | this.time_last = this.time_begin;
38 | this.n = n;
39 | if ~size(count), count = 0; end;
40 | this.count_my = count;
41 | count = count + 1;
42 | this.time_threshold = 10; % seconds
43 | end
44 |
45 | function this = konect_timer_tick(this, i)
46 |
47 | assert(i >= 0);
48 |
49 | time_now = clock;
50 |
51 | time_diff = etime(time_now, this.time_last);
52 | if (time_diff < this.time_threshold)
53 | return;
54 | end
55 |
56 | this.time_last = time_now;
57 |
58 | time_diff = etime(time_now, this.time_begin);
59 | left = time_diff * (this.n - i + 1) / (i - 1);
60 | text = konect_timer_text(this, left);
61 | if this.count_my > 0
62 | fprintf(1, '%d of %d {%s left}\n', i - 1, this.n, text);
63 | else
64 | fprintf(1, '%d of %d [%s left]\n', i - 1, this.n, text);
65 | end
66 | end
67 |
68 | function text = konect_timer_text(this, t)
69 | if t < 3600
70 | text = sprintf('%d:%02d', floor(t/60), mod(floor(t), 60));
71 | elseif t < 3600 * 24
72 | text = sprintf('%d:%02d:%02d', floor(t/3600), mod(floor(t/60), 60), mod(floor(t), 60));
73 | else
74 | text = sprintf('%d-%02d:%02d:%02d', floor(t/3600/24), mod(floor(t/3600), 24), mod(floor(t/60), 60), mod(floor(t), 60));
75 | end
76 | end
77 |
78 | function this = konect_timer_end(this)
79 | e = etime(this.time_last, this.time_begin);
80 | if e ~= 0
81 | text = konect_timer_text(this, 0);
82 | if this.count_my > 0
83 | fprintf(1, '%d of %d {%s left}\n', this.n, this.n, text);
84 | else
85 | fprintf(1, '%d of %d [%s left]\n', this.n, this.n, text);
86 | end
87 | end
88 | konect_timer(-1);
89 | end
90 |
91 | % Set new value of N, i.e. of the total number of
92 | % iterations
93 | function this = konect_timer_set(this, n_new)
94 | this.n = n_new;
95 | end
96 |
97 | end
98 | end
99 |
--------------------------------------------------------------------------------
/m/konect_pa_full.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute statistics related to the preferential attachment exponent
3 | % \beta. This is the internal function that computes everything
4 | % related to the preferential attachment exponent. For a high-level
5 | % interface, use konect_pa().
6 | %
7 | % PARAMETERS
8 | % i_1 (e_1 * 1) IDs of all old vertices
9 | % w_1 (e_1 * 1) Weights of all old vertices, or 1 for uniform weights
10 | % i_2 (e_2 * 1) IDs of all new vertices
11 | % w_2 (e_2 * 1) Weights of all new vertices, or 1 for uniform weights
12 | %
13 | % RETURN VALUE
14 | % ret Values
15 | % .$method For each method, see below (the
16 | % letters in parentheses)
17 | % A vector of parameter values. Each
18 | % vector contains two (or one) elements: the
19 | % preferential attachment exponent \beta
20 | % and the error term (optional). This implementation
21 | % includes only the 'a' method to be
22 | % compatible with GNU Octave (which
23 | % doesn't have lscov() by default)
24 | % .lambda
25 | % .lambda_1
26 | % ret_data Related data
27 | % .xx .yy .xxx
28 | %
29 |
30 | function [ret ret_data] = konect_pa_full(i_1, w_1, i_2, w_2)
31 |
32 | % Regularizarion parameters
33 | lambda = 0.1;
34 | lambda_1 = 1;
35 |
36 | % Number of vertices
37 | n = max(max(i_1), max(i_2));
38 |
39 | % (n*1)
40 | % The degree of each vertex, for both time bins
41 | d_1 = sparse(i_1, 1, w_1, n, 1);
42 | d_2 = sparse(i_2, 1, w_2, n, 1);
43 |
44 | % Regularization
45 | d_1 = d_1;
46 | d_2 = d_2 + lambda;
47 |
48 | % Maximum degree
49 | d_max = max(d_1);
50 |
51 | % (d_max*1)
52 | % Degree distribution, i.e., frequency of degree
53 | % Indexes are degree values, values are number of nodes with that degree
54 | freq_1 = sparse(d_1+1, 1, 1, d_max+1, 1); freq_1 = freq_1(2:end);
55 |
56 | % (d_max*1)
57 | % Total number of new edges for nodes for given degree. Indexes are
58 | % degrees. Values are number of new edges attached to nodes of that
59 | % degree.
60 | summ = sparse(d_1+1, 1, d_2, d_max+1, 1); summ = summ(2:end);
61 |
62 | % (d_max*1)
63 | % Total sum of squares of new degrees for nodes with given old degree.
64 | sumsq = sparse(d_1+1, 1, d_2 .^ 2, d_max+1, 1); sumsq = sumsq(2:end);
65 |
66 | % The points on the plot
67 | xx = find(freq_1 > 0);
68 |
69 | yy = summ(xx) ./ freq_1(xx);
70 |
71 | yy_dev = ((sumsq(xx) ./ freq_1(xx)) - (summ(xx) ./ freq_1(xx)) .^ 2) .^ 0.5;
72 |
73 | %
74 | % (a) a * x
75 | %
76 | fact_a = xx \ yy
77 | ret.a = [ fact_a ];
78 |
79 | % Log of new degrees
80 | d_geo_2 = log(d_2);
81 |
82 |
83 | sum_geo = sparse(d_1+1, 1, d_geo_2, d_max+1, 1); sum_geo = sum_geo(2:end);
84 |
85 | sumsq_geo = sparse(d_1+1, 1, d_geo_2 .^ 2, d_max+1, 1); sumsq_geo = sumsq_geo(2:end);
86 |
87 | yy_geo = sum_geo(xx) ./ freq_1(xx);
88 |
89 |
90 | % The YY values computed geometrically.
91 | yy_geo_orig = exp(yy_geo);
92 |
93 | % The additive error in the geometric domain. This is the sample standard deviation.
94 | yy_dev_log_geo = (((sumsq_geo(xx) ./ freq_1(xx)) - (sum_geo(xx) ./ freq_1(xx)) .^ 2) .* freq_1(xx) ./ (freq_1(xx) - 1)) .^ 0.5;
95 |
96 | % The multiplicative error, i.e., the actual values are multiplied or divied by these values
97 | yy_dev_geo = exp(yy_dev_log_geo);
98 |
99 | %
100 | % Save
101 | %
102 |
103 | ret.lambda = lambda;
104 | ret.lambda_1 = lambda_1;
105 |
106 | ret_data.xx = xx;
107 | ret_data.yy = yy;
108 | ret_data.yy_dev = yy_dev;
109 | ret_data.yy_geo_orig = yy_geo_orig;
110 | ret_data.yy_dev_geo = yy_dev_geo;
111 | ret_data.d_1 = d_1;
112 | ret_data.d_2 = d_2;
113 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | == KONECT Toolbox ==
2 |
3 | This is a toolbox for the analysis of large complex networks written in
4 | Matlab, but also largely compatible with GNU Octave. It is used by the KONECT
5 | Project:
6 |
7 | http://konect.cc/
8 |
9 | This toolbox largely implements the graph analysis functions described in
10 | the KONECT Handbook:
11 |
12 | https://github.com/kunegis/konect-handbook/blob/master/konect-handbook.pdf
13 |
14 | In particular, the KONECT Handbook contains all formal definitions of
15 | the computations performed in this toolbox.
16 |
17 | To support KONECT, you may donate here:
18 |
19 | https://www.paypal.com/donate?hosted_button_id=Q9JY2FB3AFHR6
20 |
21 | === Installation ===
22 |
23 | The toolbox consists entirely of *.m files. To use it, add the
24 | directory m/ to the Matlab path, e.g., using addpath() or by setting the
25 | $MATLABPATH variable.
26 |
27 | GNU Octave too supports the addpath() function and $MATLABPATH
28 | variable.
29 |
30 | === Requirements ===
31 |
32 | Some functions need Matlab-BGL, the Matlab port of the Boost Graph
33 | Library.
34 |
35 | Installation of Matlab-BGL:
36 | * Download version 4.0.1 from the Matlab-BGL website (newer versions may
37 | not work)
38 | * Unzip it somewhere where you'll keep it
39 | * (On a few old Debian-based systems, we needed to install libstdc++5 from http://packages.debian.org/stable/base/libstdc++5)
40 | * Make sure matlab_bgl/ is the the Matlab path. This is necessary for
41 | both GNU Octave and Matlab. For instance, do
42 | export MATLABPATH="$MATLABPATH:~/matlab_bgl/"
43 | (But probably you have your own way of setting the Matlab path.)
44 |
45 | === Usage ===
46 |
47 | The toolbox consists of individual functions, each of which is
48 | contained in its own *.m file in the m/ directory.
49 |
50 | Most functions are self-contained, and can be used as-is.
51 |
52 | Each function is documented in its source file.
53 |
54 | The source files contain UTF-8 encoded text in the comments; the rest is
55 | ASCII.
56 |
57 | The functions are used from both GNU Octave Matlab and Matlab. Since none of
58 | these is properly standardised, we can't assure that all versions will
59 | work -- when in doubt, ask.
60 |
61 | If you fix bugs or add new features, please contact
62 | so we can merge your changes into the master version.
63 |
64 | === Help ===
65 |
66 | For support, write to , or directly write a ticket on
67 | GitHub:
68 |
69 | https://github.com/kunegis/konect-toolbox
70 |
71 | === License ===
72 |
73 | Written by Jérôme Kunegis.
74 |
75 | The KONECT Toolbox is free software, and therefore you can easily use it
76 | in your project. You can either tell your users that they have to
77 | install the KONECT Toolbox before they use your software, or you can put
78 | a copy of the files you need into your project directory. If you copy
79 | files from the KONECT Toolbox into your project directory, make sure you
80 | follow the GNU GPL: Write in your README file which files are from the
81 | KONECT Toolbox, mention this license information, and provide a copy of
82 | the GNU GPL, i.e., the file COPYING.
83 |
84 | The KONECT Toolbox is free software: you can redistribute it and/or
85 | modify it under the terms of the GNU General Public License as published
86 | by the Free Software Foundation, either version 3 of the License, or (at
87 | your option) any later version.
88 |
89 | The KONECT Toolbox is distributed in the hope that it will be useful,
90 | but WITHOUT ANY WARRANTY; without even the implied warranty of
91 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
92 | General Public License for more details.
93 |
94 | The full text of the GPLv3 can be found in the file COPYING.
95 |
--------------------------------------------------------------------------------
/m/konect_hopdistr.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the hop distribution, i.e., the distribution of shortest path
3 | % distances between all pairs. As a side effect, this method also
4 | % computes the exact diameter (as the length of the returned vector D),
5 | % and the result can be used to compute the exact effective diameter,
6 | % mean and median diameters.
7 | %
8 | % For undirected and bipartite graphs, this is the normal undirected hop
9 | % plot. For directed graphs, this is the directed hop-plot.
10 | %
11 | % RESULT
12 | % d (1*diam) d(i) equals the number of nodes pairs at
13 | % distance at most i. The value for zero is excluded
14 | % (since d(0) cannot be defined). The length of this
15 | % vector is the graph's diameter. d(end) always equals
16 | % the squared number of nodes.
17 | %
18 | % PARAMETERS
19 | % A Adjacency or biadjacency matrix (need not be
20 | % symmetric for undirected networks); the
21 | % network must be connected if
22 | % allow_disconnected in not set
23 | % format (optional) Format using the constants in
24 | % konect_consts.m; ASYM when not given
25 | % size_chunk (optional) Size of chunks used; the choice of
26 | % this value only influences the runtime of the
27 | % function, not the result ; pass [] for the
28 | % default value
29 | % allow_disconnected (default = 0) If set, allow the
30 | % network to be disconnected, otherwise not. If
31 | % set, the returned distribution contains only the
32 | % pairs that are connected. If not set, the function
33 | % will throw an error when the graph is not
34 | % connected.
35 | %
36 |
37 | function [d] = konect_hopdistr(A, format, size_chunk, allow_disconnected)
38 |
39 | % Size in double variables of the largest number of doubles that is to
40 | % be used as temporary memory. Used in the calculation of the default
41 | % value of SIZE_CHUNK.
42 | size_resident = 1e7;
43 |
44 | consts = konect_consts();
45 |
46 | % Maximum number of iterations
47 | maxit = intmax;
48 |
49 | if ~exist('format', 'var')
50 | format = consts.ASYM;
51 | end
52 |
53 | if ~exist('allow_disconnected', 'var')
54 | allow_disconnected = 0;
55 | end
56 |
57 | A = (A ~= 0);
58 |
59 | if format == consts.SYM | format == consts.ASYM
60 | A = A | A';
61 | elseif format == consts.BIP
62 | [m n] = size(A);
63 | A = [ sparse(m,m) A ; A' sparse(n,n) ];
64 | end
65 |
66 | n = size(A,1);
67 |
68 | if ~exist('size_chunk', 'var') | length(size_chunk) == 0
69 | size_chunk = floor(size_resident / n);
70 | if size_chunk < 1, size_chunk = 1; end;
71 | end
72 |
73 | % Add loops, i.e., diagonal elements, and remove edge weights
74 | A = double(((A ~= 0) + speye(n)) ~= 0);
75 |
76 | % Return value; the values are added up
77 | d = [];
78 |
79 | [k from to] = konect_fromto(1, n, size_chunk);
80 |
81 | t = konect_timer(n);
82 |
83 | for j = 1 : k
84 |
85 | t = konect_timer_tick(t, to(j));
86 |
87 | x = A(:, from(j):to(j));
88 |
89 | % The values of D added for this chunk
90 | dd = [];
91 |
92 | for i = 1 : maxit
93 | dd(i) = nnz(x);
94 |
95 | x_old = x;
96 | x = A * x;
97 | x = x ~= 0;
98 |
99 | if x == x_old, break; end;
100 | end
101 |
102 | % Add sums to d
103 | if length(d) < length(dd)
104 | if length(d) > 0
105 | d((end+1) : length(dd)) = d(end);
106 | else
107 | d(length(dd)) = 0;
108 | end
109 | else
110 | dd((end+1) : length(d)) = dd(end);
111 | end
112 | d = d + dd;
113 | end
114 |
115 | konect_timer_end(t);
116 |
117 | if ~allow_disconnected
118 | if d(end) ~= n*n
119 | error('*** Network is not connected');
120 | end
121 | else
122 | assert(d(end) <= n*n);
123 | end
124 |
125 |
126 |
--------------------------------------------------------------------------------
/m/konect_effective_diameter.m:
--------------------------------------------------------------------------------
1 | %
2 | % Estimate the 90-percentile effective diameter of a graph. The graph
3 | % should be connected. In unipartite graphs, this computes the strong
4 | % (i.e., unidirectional) diameter. To get the weak (unoriented)
5 | % diameter, pass A+A'.
6 | %
7 | % The value returned is imprecise as a heuristic is used (node
8 | % sampling). No accuracy of the result is returned, or even known.
9 | %
10 | % This always computes the 90-percentile effective diameter.
11 | %
12 | % PARAMETERS
13 | % A Adjacency matrix or biadjacency matrix
14 | % epsi (optional) Requested precision
15 | %
16 | % RESULT
17 | % diameter The compute effective diameter
18 | %
19 |
20 | function ret = konect_effective_diameter(A, epsi)
21 |
22 | % Compute the effective diameter at this value (e.g. at 90%)
23 | threshold = .9;
24 |
25 | % Size of a "batch"
26 | iteration_count = 9;
27 |
28 | % Requested precision
29 | if ~exist('epsi', 'var')
30 | epsi = .029;
31 | end
32 |
33 | % Initialize the Matlab random number generator
34 | rng('shuffle')
35 | % The following line was broken by Matlab 2016b.
36 | %% RandStream.setDefaultStream(RandStream('mt19937ar','seed',sum(100*clock)));
37 |
38 | fprintf(1, 'Diameter (%d * %d, %d)...\n', size(A,1), size(A,2), nnz(A));
39 |
40 | [m,n] = size(A);
41 |
42 | % Maximum number of overall iterations
43 | iteration_count_max = m;
44 |
45 | % Minimum number of batches to compute
46 | iteration_count_min = floor(m * 0.000001);
47 |
48 | al = double(A ~= 0);
49 |
50 | % number of paths computed
51 | count = 0;
52 | counts = 0;
53 |
54 | % We actually compute the half-diameter because we also use a*a', so we
55 | % double the result at the end.
56 |
57 | diameter_last = NaN;
58 |
59 | perm = randperm(m);
60 |
61 | for i = 1 : iteration_count_max
62 |
63 | index = perm(1 + floor(rand * m));
64 |
65 | u = zeros(m,1);
66 | u(index) = 1;
67 | r_last = 0;
68 | r = 1;
69 | distance = 0;
70 |
71 | while r ~= r_last
72 | r_last = r;
73 | u_new = logical(al * (al' * u));
74 | distance = distance + 1;
75 |
76 | r_new = sum((u_new - u) > 0);
77 | u = logical(al* (al' * u) + u);
78 | r = sum(u);
79 | if size(counts, 2) < distance
80 | counts(distance) = r_new;
81 | else
82 | counts(distance) = counts(distance) + r_new;
83 | end
84 | count = count + r_new;
85 | end;
86 |
87 | if mod(i, iteration_count) == 0 | i == iteration_count_max
88 | counts_i = counts / count;
89 |
90 | diameter = NaN;
91 | counts_i = cumsum(counts_i);
92 |
93 | counts_i = [0 counts_i];
94 | for j = 1:(size(counts_i,2)-1)
95 | if counts_i(j) <= threshold & counts_i(j+1) > threshold
96 | diameter = j-1 + (threshold - counts_i(j)) / (counts_i(j+1) - counts_i(j));
97 | break;
98 | end;
99 | end
100 |
101 | if diameter_last == diameter_last & i >= iteration_count_min
102 |
103 | stddev = i^-.5;
104 | rel_err = stddev / diameter;
105 |
106 | fprintf(1, ' diameter(%d) = %g [%g]\n', i, diameter, rel_err);
107 | if rel_err < epsi
108 | ret = 2 * diameter;
109 | return;
110 | end
111 | else
112 | fprintf(1, ' diameter(%d) = %g\n', i, diameter);
113 | end
114 |
115 | diameter_last = diameter;
116 | end
117 | end
118 |
119 | for j = 1:(size(counts_i,2)-1)
120 | if counts_i(j) <= threshold & counts_i(j+1) > threshold
121 | diameter = j-1 + (threshold - counts_i(j)) / (counts_i(j+1) - counts_i(j));
122 | break;
123 | end;
124 | end
125 |
126 | ret = 2 * diameter;
127 |
--------------------------------------------------------------------------------
/m/konect_clusco.m:
--------------------------------------------------------------------------------
1 | %
2 | % Compute the local and global clustering coefficients, and at the
3 | % same time the local clustering coefficients for all nodes.
4 | %
5 | % Note: to compute just the global clustering coefficient, it is
6 | % faster to use the expression
7 | %
8 | % c = 3t / s.
9 | %
10 | % which expresses the global clustering coefficient c in terms of the
11 | % number of triangles t and the number of wedges s.
12 | %
13 | % Loops are ignored. If A not symmetric, the directed clustering
14 | % coefficient is computed. If A contains negative values, the signed
15 | % clustering coefficient is computed.
16 | %
17 | % PARAMETERS
18 | % A (n*n) Adjacency matrix; must be square; must be a 0/1
19 | % matrix for the usual (unsigned) clustering
20 | % coefficient, or -1/0/+1 for the signed clustering
21 | % coefficient
22 | %
23 | % RESULT
24 | % c_local (n*1) Node vector giving the local clustering
25 | % coefficient of each node; the value is zero
26 | % when the degree of a node is zero or one
27 | % c_global_1 Global clustering coefficient (varant 1),
28 | % defined as the probability that two incident
29 | % edges are completed by a third edge to form a
30 | % triangle; this is the default clustering
31 | % coefficient in KONECT
32 | % c_global_2 Global clustering coefficient (variant 2),
33 | % defined as the average local clustering
34 | % coefficient; usually not used in KONECT
35 | %
36 |
37 | function [c_local, c_global_1, c_global_2] = konect_clusco(A)
38 |
39 | assert(size(A, 1) == size(A, 2));
40 |
41 | n = size(A, 1);
42 |
43 | % Check that the matrix is a -1/0/+1 matrix
44 | [x y z] = find(A);
45 | if sum(abs(z) ~= 1) > 0
46 | error('*** A must be a -1/0/+1 matrix');
47 | end
48 |
49 | % Remove diagonal elements
50 | z(x == y) = 0;
51 | A = sparse(x, y, z, n, n);
52 |
53 | sum_pairs = 0;
54 | sum_count = 0;
55 |
56 | c_local = zeros(n, 1);
57 |
58 | t = konect_timer(n);
59 |
60 | for u = 1:n
61 |
62 | t = konect_timer_tick(t, u);
63 |
64 | % Vectors of neighbors
65 | ao = A(u, :)';
66 | ai = A(:, u);
67 |
68 | % Indexes of positive and negative neighbors
69 | nebs_op = find(ao > 0);
70 | nebs_on = find(ao < 0);
71 | nebs_ip = find(ai > 0);
72 | nebs_in = find(ai < 0);
73 |
74 | % Submatrices of relationships between neighbors
75 | A_pp = A(nebs_ip, nebs_op);
76 | A_pn = A(nebs_ip, nebs_on);
77 | A_np = A(nebs_in, nebs_op);
78 | A_nn = A(nebs_in, nebs_on);
79 |
80 | % Sum of edge weights among neighbors
81 | user_count = full(sum(sum(A_pp)) - sum(sum(A_pn)) - sum(sum(A_np)) + sum(sum(A_nn)));
82 |
83 | % Number of possible neighbor pairs
84 | user_pairs = full((length(nebs_ip) + length(nebs_in)) * (length(nebs_op) + length(nebs_on)) ...
85 | - sum((ai ~= 0) & (ao ~= 0)));
86 |
87 | sum_count = sum_count + user_count;
88 | sum_pairs = sum_pairs + user_pairs;
89 |
90 | if user_pairs ~= 0
91 | c_u = user_count / user_pairs;
92 | c_local(u) = c_u;
93 | if abs(c_u) > 1
94 | A_pp
95 | A_pn
96 | A_np
97 | A_nn
98 | nebs_op
99 | nebs_on
100 | nebs_ip
101 | nebs_in
102 | error(sprintf('*** c_u = %f, u = %d, user_count = %d, user_pairs = %d, [%d %d %d %d]', ...
103 | c_u, u, user_count, user_pairs, ...
104 | length(nebs_op), length(nebs_on), length(nebs_ip), length(nebs_in)));
105 | end
106 | end;
107 |
108 | end;
109 |
110 | konect_timer_end(t);
111 |
112 | c_global_1 = sum_count / sum_pairs
113 | c_global_2 = mean(c_local)
114 |
115 | if abs(c_global_1) > 1
116 | error(sprintf('*** c_global_1 = %f', c_global_1));
117 | end
118 |
119 | if abs(c_global_2) > 1
120 | error(sprintf('*** c_global_2 = %f', c_global_2));
121 | end
122 |
--------------------------------------------------------------------------------
/m/konect_statistic_derived.m:
--------------------------------------------------------------------------------
1 | %
2 | % Computre a derived statistic.
3 | %
4 | % RESULT
5 | % values Volumn vector of values of STATISTIC
6 | %
7 | % PARAMETERS
8 | % statistic The statistic to compute
9 | % statistic_underlying The statistic of which values are known
10 | % values_underlying The known values of the underlying
11 | % statistic, as a column vector
12 | % values_size
13 | % values_volume
14 | % values_uniquevolume
15 | % values_fill Column vector of FILL statistics
16 | % values_avgdegree
17 | %
18 |
19 | function values = konect_statistic_derived(statistic, statistic_underlying, ...
20 | values_underlying, ...
21 | values_size, ...
22 | values_volume, ...
23 | values_uniquevolume, ...
24 | values_fill, ...
25 | values_avgdegree)
26 |
27 | if strcmp(statistic, 'triangles_norm')
28 |
29 | assert(strcmp(statistic_underlying, 'triangles'));
30 |
31 | t = values_underlying(1);
32 |
33 | n = values_size(1);
34 |
35 | values = (t - (1/48) * n * (n-1) * (n-2)) / sqrt( 7 / 384 * n * (n-1) * (n-2));
36 |
37 | elseif strcmp(statistic, 'clusco_norm')
38 |
39 | assert(strcmp(statistic_underlying, 'clusco'));
40 |
41 | c = values_underlying(1);
42 |
43 | n = values_size(1);
44 |
45 | values = (c - 0.5) * sqrt(n * (n-1) / 2);
46 |
47 | elseif strcmp(statistic, 'clusco_norm_p')
48 |
49 | assert(strcmp(statistic_underlying, 'clusco'));
50 |
51 | c = values_underlying(1);
52 |
53 | p = values_fill(2);
54 |
55 | values = c / p;
56 |
57 | elseif strcmp(statistic, 'triangles_norm_p')
58 |
59 | assert(strcmp(statistic_underlying, 'triangles'));
60 |
61 | t = values_underlying(1);
62 |
63 | n = values_size(1);
64 | p = values_fill(2);
65 |
66 | values = t / (p^3 * (1/6) * n * (n-1) * (n-2));
67 |
68 | elseif strcmp(statistic, 'twostars_norm_p')
69 |
70 | assert(strcmp(statistic_underlying, 'twostars'));
71 |
72 | s = values_underlying(1);
73 |
74 | n = values_size(1);
75 | p = values_fill(2);
76 |
77 | values = s / ( p * p * 0.5 * n * (n-1) * (n-2));
78 |
79 | elseif strcmp(statistic, 'twostars_coef')
80 |
81 | assert(strcmp(statistic_underlying, 'twostars'));
82 |
83 | s = values_underlying(1);
84 |
85 | m = values_uniquevolume(1);
86 | n = values_size(1);
87 |
88 | values = s / (m * (n - 2));
89 |
90 | elseif strcmp(statistic, 'twostars_norm_d')
91 |
92 | assert(strcmp(statistic_underlying, 'twostars'));
93 |
94 | s = values_underlying(1);
95 |
96 | n = values_size(1);
97 | d = values_avgdegree(4);
98 |
99 | values = (s / n) / (0.5 * d * (d-1));
100 |
101 | elseif strcmp(statistic, 'clusco_n')
102 |
103 | assert(strcmp(statistic_underlying, 'clusco'));
104 |
105 | c = values_underlying(1);
106 |
107 | n = values_size(1);
108 |
109 | values = sqrt(n) * c;
110 |
111 | elseif strcmp(statistic, 'twostars_perf')
112 |
113 | assert(strcmp(statistic_underlying, 'twostars'));
114 |
115 | s = values_underlying(1);
116 | n = values_size(1);
117 | m = values_uniquevolume(1);
118 |
119 | values = s / (sqrt(n) * m);
120 |
121 | elseif strcmp(statistic, 'volume_norm')
122 |
123 | assert(strcmp(statistic_underlying, 'volume'));
124 |
125 | m = values_uniquevolume(1);
126 | n = values_size(1);
127 | p = 0.5;
128 |
129 | values = (m - p * (1/2) * n * (n-1)) / sqrt(p * (1-p) * (1/2) * n * (n-1));
130 |
131 | elseif strcmp(statistic, 'meandist')
132 |
133 | assert(strcmp(statistic_underlying, 'diam'));
134 |
135 | values = values_underlying(4);
136 |
137 | else
138 | error(sprintf('*** Invalid statistic %s', statistic));
139 | end
140 |
--------------------------------------------------------------------------------