├── .gitignore
├── CFLAGS.common
├── CFLAGS.dbg
├── CFLAGS.mode
├── CFLAGS.opt
├── COPYING
├── DECOMPOSITIONS
├── DECOMPOSITIONS_ASYM
├── DOWNLOAD_PATTERNS
├── GROUPS
├── GROUPS-PLOT
├── INSTALL
├── Makefile.old
├── PLOTS
├── PLOTTING
├── README
├── SCATTERS
├── STATISTICS
├── TODO
├── c
    ├── .gitignore
    ├── README
    ├── asxprintf.h
    ├── binary_heap.h
    ├── bits.h
    ├── consts.h
    ├── dijkstra.h
    ├── feature.h
    ├── feature_degree.c
    ├── graph.h
    ├── graph_read_sgraph0.h
    ├── graph_simple.h
    ├── graph_width.h
    ├── hyperanf.c
    ├── hyperloglog.h
    ├── ifub.c
    ├── lcc.h
    ├── lines.c
    ├── maxdegree.c
    ├── outs.c
    ├── sgraph.h
    ├── sgraph0_create.c
    ├── sgraph0_io.h
    ├── sgraph1_create.c
    ├── sgraph1_create_lcc.c
    ├── sgraph1_create_simple.c
    ├── sgraph1_dump.c
    ├── sgraph1_io.h
    ├── sgraph1_subgraph.h
    ├── sgraph1_write.h
    ├── size.c
    ├── statistic_mediandegree.c
    ├── statistic_twostars.c
    ├── statistic_volume.c
    ├── triangles.c
    ├── volume.c
    ├── width.h
    └── widthhelper.h
├── colormap
├── jl
    ├── inter.jl
    ├── inter2.jl
    ├── konect_consts.jl
    ├── read_statistic.jl
    └── step_full.jl
├── julia
├── ktop
├── lib
    ├── .gitignore
    ├── JULIA
    ├── README
    ├── deta.m
    ├── gridxy
    │   ├── gridxy.m
    │   └── license.txt
    ├── hsl2rgb.m
    ├── octave
    │   ├── README
    │   └── corr.m
    ├── plpva.m
    └── rgb2hsl.m
├── m
    ├── approximation.m
    ├── approximation_plot.m
    ├── assortativity.m
    ├── assortativity_one.m
    ├── axis_fit.m
    ├── beta_do.m
    ├── beta_one.m
    ├── beta_plot.m
    ├── bidd.m
    ├── bidd_one.m
    ├── check.m
    ├── check_failed.m
    ├── check_successful.m
    ├── cluscod.m
    ├── cluscod_plot.m
    ├── comparison.m
    ├── comparison_cross.m
    ├── complex2rgb.m
    ├── curve_apply.m
    ├── data.m
    ├── decomposition_comp.m
    ├── decomposition_map.m
    ├── decomposition_plot.m
    ├── decomposition_split.m
    ├── decomposition_time.m
    ├── decomposition_time_plot.m
    ├── degcc.m
    ├── degree.m
    ├── degree_print.m
    ├── delaunay_one.m
    ├── delaunay_plot.m
    ├── diadens.m
    ├── diagonality.m
    ├── distr.m
    ├── distr_plot.m
    ├── distrtest_colors.m
    ├── distrtest_multi.m
    ├── distrtest_plot.m
    ├── distrtest_types.m
    ├── estimate_power_law.m
    ├── evol_permutation.m
    ├── exp_entropy.m
    ├── fit.m
    ├── fit_plot.m
    ├── fit_plot_curve.m
    ├── format_number.m
    ├── format_statistic.m
    ├── get_ids_submethod.m
    ├── get_labels_measure.m
    ├── get_labels_method.m
    ├── get_labels_method_submethod.m
    ├── get_labels_submethod.m
    ├── get_rank.m
    ├── get_rank_type.m
    ├── get_tags.m
    ├── get_updown_statistic.m
    ├── has_timestamps.m
    ├── hopdistr_comp.m
    ├── hopdistr_distrtest.m
    ├── hopdistr_plot.m
    ├── hopdistr_time_comp.m
    ├── hopdistr_time_plot.m
    ├── konect_decomposition_dedicom4.m
    ├── ksdist.m
    ├── layout.m
    ├── load_strings.m
    ├── lorenz.m
    ├── lorenz_one.m
    ├── lybl.m
    ├── map.m
    ├── map_line.m
    ├── map_minmax.m
    ├── mask.m
    ├── mask_step.m
    ├── means.m
    ├── means_best.m
    ├── means_e.m
    ├── means_euv.m
    ├── means_regr.m
    ├── means_regrn.m
    ├── measure_compute.m
    ├── measure_compute_ap.m
    ├── measure_compute_auc.m
    ├── measure_compute_corr.m
    ├── measure_compute_kendall.m
    ├── measure_compute_map.m
    ├── measure_compute_mauc.m
    ├── measure_compute_spear.m
    ├── mediandist.m
    ├── mkcategory.m
    ├── network_key.m
    ├── outin.m
    ├── pa_compute.m
    ├── pa_plot.m
    ├── pa_plot_one.m
    ├── pivotize.m
    ├── precision_all.m
    ├── precision_comp.m
    ├── precision_one.m
    ├── precisions_plot.m
    ├── predict_euclidean.m
    ├── predict_spectral.m
    ├── prediction_decomposition.m
    ├── prediction_local.m
    ├── prediction_local_compute.m
    ├── prediction_local_compute_mask.m
    ├── prediction_local_compute_neib.m
    ├── prediction_local_compute_neib3.m
    ├── prediction_local_compute_pref.m
    ├── prediction_local_compute_zero.m
    ├── prepare_matrix_target.m
    ├── rating_evolution.m
    ├── rating_evolution2.m
    ├── read_info.m
    ├── read_meta.m
    ├── read_statistic.m
    ├── rmse_full.m
    ├── rmse_latent.m
    ├── runtime.m
    ├── scatter_comp.m
    ├── scatter_plot.m
    ├── scatter_single.m
    ├── shrinkingdiversity.m
    ├── sne.m
    ├── spectral_diagonality_test.m
    ├── spectral_extrapolation.m
    ├── spectrum_visualize.m
    ├── split.m
    ├── statistic_avgdegree.m
    ├── statistic_comp.m
    ├── statistic_comp_spectral.m
    ├── statistic_diameff.m
    ├── statistic_full_prefatt.m
    ├── statistic_lines.m
    ├── statistic_meandist.m
    ├── statistic_size.m
    ├── statistic_spectral.m
    ├── statistic_time.m
    ├── statistic_time_diam.m
    ├── statistic_time_plot.m
    ├── statistic_time_slice.m
    ├── statistic_time_spectral.m
    ├── statistics_time_plot.m
    ├── steps.m
    ├── stepsi.m
    ├── styles_method.m
    ├── styles_submethod.m
    ├── syngraphy_plot.m
    ├── time_degree.m
    ├── time_histogram.m
    ├── time_histogram_signed.m
    ├── time_xaxis.m
    ├── time_xaxis_unix.m
    ├── trend.m
    ├── trend_plot.m
    ├── weights_plot.m
    ├── zipf.m
    └── zipf_one.m
├── main.stu
├── matlab
├── mns
├── octave
├── pl
    ├── Konect.pm
    ├── README
    └── TexToHtml
    │   └── Converter.pm
├── runtime.source
├── sh
    ├── README
    ├── category
    ├── check
    ├── checkmeta
    ├── classes
    ├── dep-network
    ├── depc
    ├── eps2png
    ├── group
    ├── intersect
    ├── listempty
    ├── mem
    ├── mkcite
    ├── mkdatasetlist
    ├── mkdownloadlist
    ├── mkmissing
    ├── mkpath
    ├── mkrdf
    ├── mkreadme
    ├── mkstat
    ├── mktime
    ├── network-format
    ├── network-info
    ├── network-weights
    ├── out2
    ├── plot-network
    ├── save_diag
    ├── sort-networks
    ├── statistic-network
    ├── statistic-size
    ├── unset-lc
    ├── widths
    ├── widths-one
    └── widths-simple
└── status


/.gitignore:
--------------------------------------------------------------------------------
 1 | CFLAGS
 2 | MATLABPATH
 3 | PERL5LIB
 4 | bin
 5 | dat
 6 | plot
 7 | tex
 8 | uni
 9 | dat-*
10 | plot-*
11 | octave-workspace
12 | konect-toolbox
13 | konect-extr
14 | konect-handbook
15 | stu-utils
16 | syngraphy
17 | error.log
18 | tmp.runtime
19 | lib/BibTeX
20 | lib/LaTeX
21 | lib/HTML
22 | lib/BibTeX-Parser-*.tar.gz
23 | lib/HTML-Parser-*.tar.gz
24 | lib/LaTeX-ToUnicode-*.tar.gz
25 | 


--------------------------------------------------------------------------------
/CFLAGS.common:
--------------------------------------------------------------------------------
1 | -std=c99 -pedantic -Wall -Wextra -Werror 
2 | -Wundef -D_GNU_SOURCE 
3 | -lm
4 | 


--------------------------------------------------------------------------------
/CFLAGS.dbg:
--------------------------------------------------------------------------------
1 | -g
2 | 


--------------------------------------------------------------------------------
/CFLAGS.mode:
--------------------------------------------------------------------------------
1 | opt
2 | 


--------------------------------------------------------------------------------
/CFLAGS.opt:
--------------------------------------------------------------------------------
1 | -O3 -DNDEBUG -s  
2 | 


--------------------------------------------------------------------------------
/DECOMPOSITIONS:
--------------------------------------------------------------------------------
1 | sym
2 | sym-n
3 | lap
4 | lapq
5 | seidel
6 | 


--------------------------------------------------------------------------------
/DECOMPOSITIONS_ASYM:
--------------------------------------------------------------------------------
 1 | svd
 2 | svd-n
 3 | back
 4 | herm
 5 | hermn
 6 | diag
 7 | lapherm
 8 | skew
 9 | skewn
10 | lapskew
11 | lapd
12 | lapd-n
13 | diag-n
14 | stoch2
15 | stoch1
16 | mskew
17 | dedicom3
18 | takane
19 | lapdiag2
20 | 


--------------------------------------------------------------------------------
/GROUPS:
--------------------------------------------------------------------------------
 1 | ALL
 2 | SYM
 3 | ASYM
 4 | BIP
 5 | SQUARE
 6 | NEGATIVE
 7 | NONUNWEIGHTED
 8 | ASYMNEGATIVE
 9 | SQUARENEGATIVE
10 | TIME
11 | TIME_NEGATIVE
12 | MULTI
13 | 
14 | 


--------------------------------------------------------------------------------
/GROUPS-PLOT:
--------------------------------------------------------------------------------
 1 | layout			ALL
 2 | degree			ALL
 3 | bidd			ALL
 4 | lorenz			ALL
 5 | distr.sym		ALL
 6 | distr.sym-n		ALL
 7 | distr.lap		ALL
 8 | map.sym			ALL
 9 | map.lap			ALL
10 | map.stoch		ALL
11 | assortativity		ALL
12 | zipf			ALL
13 | hopdistr		ALL
14 | lybl			ALL
15 | delaunay		ALL
16 | outin			ASYM
17 | rating_evolution	NEGATIVE
18 | weights			NONUNWEIGHTED
19 | cluscod			SQUARE
20 | degcc			SQUARE
21 | time_histogram		TIME
22 | hopdistr_time.full	TIME
23 | diadens			TIME
24 | time_histogram_signed	TIME_NEGATIVE
25 | rating_evolution2	TIME_NEGATIVE
26 | syngraphy		SQUARE
27 | inter			TIME
28 | inter2			TIME
29 | 


--------------------------------------------------------------------------------
/PLOTS:
--------------------------------------------------------------------------------
 1 | layout
 2 | degree
 3 | bidd
 4 | lorenz
 5 | distr.sym
 6 | distr.sym-n
 7 | distr.lap
 8 | map.sym
 9 | map.lap
10 | map.stoch
11 | assortativity
12 | zipf
13 | hopdistr
14 | lybl
15 | delaunay
16 | outin
17 | rating_evolution
18 | weights
19 | cluscod
20 | degcc
21 | time_histogram
22 | hopdistr_time.full
23 | diadens
24 | time_histogram_signed
25 | rating_evolution2
26 | syngraphy
27 | inter
28 | inter2
29 | 


--------------------------------------------------------------------------------
/PLOTTING:
--------------------------------------------------------------------------------
 1 | The plots generated by KONECT are stored as "plot/*.eps". 
 2 | 
 3 | All plots are printed with the function konect_print() from the KONECT
 4 | toolbox.  That function also contains style recommendations for plots.  
 5 | 
 6 | == Names of plots ==
 7 | 
 8 | All plots are named like this:
 9 | 
10 | plot/$SCRIPT.$TYPE.$NETWORK.eps
11 | 
12 | where
13 | 
14 | * $SCRIPT is the name of the generating script (without "_plot" if present)
15 | * $TYPE is the type, when one script generates multiple plots; Typically
16 |   single letters, with "a" being the main plot, i.e. the only plot
17 |   declared in the Makefile.  'u' and 'v' are common names when there are
18 |   separate left/right plots, or separate in-degree/out-degree plots. 
19 | * $NETWORK is the network name, and is always the last part of the name
20 | 
21 | All name parts should be in all-lowercase, with words separated by
22 | dashes or underscores. 
23 | 
24 | All plots are saved in color EPS.
25 | 


--------------------------------------------------------------------------------
/SCATTERS:
--------------------------------------------------------------------------------
 1 | 
 2 | volume.size
 3 | size.volume
 4 | volume.uniquevolume
 5 | size.avgdegree
 6 | volume.avgdegree
 7 | size.fill
 8 | volume.maxdegree
 9 | volume.reciprocity
10 | volume.negativity
11 | size.coco
12 | size.cocos
13 | volume.twostars
14 | volume.threestars
15 | volume.triangles
16 | volume.squares
17 | volume.power
18 | volume.gini
19 | volume.dentropyn
20 | volume.clusco
21 | volume.diam
22 | volume.diameff90
23 | volume.diameff50
24 | volume.snorm
25 | volume.alcon
26 | 


--------------------------------------------------------------------------------
/STATISTICS:
--------------------------------------------------------------------------------
 1 | size
 2 | size+2
 3 | size+3
 4 | volume
 5 | uniquevolume
 6 | loops
 7 | twostars
 8 | threestars
 9 | fourstars
10 | triangles
11 | squares
12 | tour4
13 | maxdegree
14 | maxdegree+2
15 | maxdegree+3
16 | maxdegree+4
17 | maxdegree+5
18 | avgdegree
19 | avgdegree+2
20 | avgdegree+3
21 | fill
22 | avgmult
23 | coco
24 | cocos
25 | cocos+2
26 | diam
27 | diameff50
28 | diameff90
29 | mediandist
30 | meandist
31 | gini
32 | own
33 | own+2
34 | own+3
35 | own+4
36 | own+5
37 | dentropyn
38 | power
39 | power2
40 | power3
41 | power3+4
42 | power3+6
43 | power3+9
44 | power3+11
45 | power3+14
46 | power3+16
47 | power3+19
48 | power3+21
49 | power3+24
50 | assortativity
51 | assortativity+2
52 | inoutassort
53 | clusco
54 | cluscoasym
55 | snorm
56 | opnorm
57 | maxdiag
58 | alcon
59 | separation
60 | reciprocity
61 | nonbip
62 | nonbipn
63 | nonbipal
64 | anticonflict
65 | negativity
66 | conflict
67 | tconflict
68 | fconflict
69 | controllability
70 | controllability+2
71 | 


--------------------------------------------------------------------------------
/c/.gitignore:
--------------------------------------------------------------------------------
1 | *.*.h
2 | 


--------------------------------------------------------------------------------
/c/README:
--------------------------------------------------------------------------------
 1 | 
 2 | These are C programs used in KONECT-Analysis.  These are much
 3 | faster and use much less memory than Matlab, but are also more complex.  
 4 | 
 5 | Each standalone program is a *.c file.  Libraries are simply *.h files.
 6 | We always compile *.c files in a single step, without using *.o files.
 7 | 
 8 | The compiled files are in bin/.
 9 | 
10 | We use the C99 standard.
11 | 
12 | Programs use custom bit widths per dataset; see width.h for an overview. 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/c/asxprintf.h:
--------------------------------------------------------------------------------
 1 | #ifndef ASXPRINTF_H
 2 | #define ASXPRINTF_H
 3 | 
 4 | #include <stdarg.h>
 5 | 
 6 | /* Format a string like printf, but return the formatted string as an
 7 |    allocated zero-terminated string that must be freed using free(). 
 8 |    On error, use perror() and exit(). 
 9 |  */
10 | char *asxprintf(const char *fmt, ...)
11 | 	__attribute__ ((format(printf, 1, 2)));
12 | 
13 | char *asxprintf(const char *fmt, ...)
14 | {
15 | 	va_list ap;
16 | 
17 | 	va_start(ap, fmt); 
18 | 
19 | 	char *ret;
20 | 
21 | 	int r= vasprintf(&ret, fmt, ap);
22 | 
23 | 	va_end(ap); 
24 | 
25 | 	if (r < 0) {
26 | 		perror("vasprintf");
27 | 		exit(1); 
28 | 	}
29 | 
30 | 	return ret;
31 | }
32 | 
33 | #endif /* ! ASXPRINTF_H */
34 | 


--------------------------------------------------------------------------------
/c/bits.h:
--------------------------------------------------------------------------------
 1 | #ifndef BITS_H
 2 | #define BITS_H
 3 | 
 4 | /*
 5 |  * These are various bit manipulation functions. 
 6 |  */
 7 | 
 8 | #define BITMASK(b) (1 << ((b) % CHAR_BIT))
 9 | 
10 | #define BITSLOT(b) ((b) / CHAR_BIT)
11 | 
12 | #define BITSET(a, b) ((a)[BITSLOT(b)] |= BITMASK(b))
13 | 
14 | #define BITCLEAR(a, b) ((a)[BITSLOT(b)] &= ~BITMASK(b))
15 | 
16 | #define BITTEST(a, b) ((a)[BITSLOT(b)] & BITMASK(b))
17 | 
18 | #define BITNSLOTS(nb) ((nb + CHAR_BIT - 1) / CHAR_BIT)
19 | 
20 | /* 
21 |  * Set all bits in A1 which are also set in A2.  In other words, perform
22 |  *
23 |  *    A1 |= A2.
24 |  *
25 |  * N is the length in bits of the array. 
26 |  *
27 |  * A1 and A2 must be disjoint arrays. 
28 |  */
29 | void BITSSET(unsigned char *restrict a1, 
30 | 	     const unsigned char *restrict a2, 
31 | 	     size_t n) 
32 | {
33 | 	for (size_t i= 0;  i < BITNSLOTS(n);  ++i) {
34 | 		a1[i] |= a2[i];
35 | 	}
36 | }
37 | 
38 | /* Count the total number of set bits. 
39 |  */
40 | size_t BITSCOUNT(const unsigned char *restrict a, size_t n)
41 | {
42 | 	/* The code below is specific to 64-bit long longs */ 
43 | 	assert(CHAR_BIT * sizeof(long long) == 64); 
44 | 
45 | 	size_t ret= 0;
46 | 	for (size_t i= 0;  i < BITNSLOTS(n);  ++i) {
47 | 		/* 
48 | 		 * This method to count bits in a char is from 
49 | 		 * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
50 | 		 */
51 | 		ret += ((a[i] * 0x200040008001ULL & 0x111111111111111ULL) % 0xf);
52 | 	}
53 | 	return ret; 
54 | }
55 | 
56 | #endif /* ! BITS_H */ 
57 | 


--------------------------------------------------------------------------------
/c/consts.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONSTS_H
 2 | #define CONSTS_H
 3 | 
 4 | /* Numerical values of variables representing the format and widths of a
 5 |  * network.  
 6 |  */
 7 | 
 8 | #define FORMAT_SYM  1
 9 | #define FORMAT_ASYM 2
10 | #define FORMAT_BIP  3
11 | 
12 | #define WEIGHTS_UNWEIGHTED       1
13 | #define WEIGHTS_POSITIVE         2
14 | #define WEIGHTS_POSWEIGHTED      3
15 | #define WEIGHTS_SIGNED           4
16 | #define WEIGHTS_MULTISIGNED      5
17 | #define WEIGHTS_WEIGHTED         6
18 | #define WEIGHTS_MULTIWEIGHTED    7
19 | #define WEIGHTS_DYNAMIC          8
20 | #define WEIGHTS_MULTIPOSWEIGHTED 9
21 | 
22 | #endif /* ! CONSTS_H */
23 | 
24 | 


--------------------------------------------------------------------------------
/c/dijkstra.h:
--------------------------------------------------------------------------------
 1 | #ifndef DIJKSTRA_H
 2 | #define DIJKSTRA_H
 3 | 
 4 | /* 
 5 |  * Dijkstra's algorithm on an SG1 graph.
 6 |  */
 7 | 
 8 | #include "bits.h"
 9 | 
10 | #if TYPE_u$ != TYPE_v$
11 | #   error Dataset must have equal types for U and V
12 | #endif
13 | 
14 | /* Compute the distances from U to all nodes.  Write the result into D,
15 |  * which must have length N (number of nodes).   
16 |  * Write the bit array of visited nodes into *VISITED if VISITED is not
17 |  * NULL.  If the array is written, it is a malloc'ed array. 
18 |  */
19 | void dijkstra_$(const struct sgraph1_reader_$ *r, 
20 | 				u$_ft u, 
21 | 				u$_ft *d,
22 | 				unsigned char **visited)
23 | {
24 | 	const u$_ft n= r->h->n1; 
25 | 	assert(u < n); 
26 | 
27 | 	unsigned char *s= calloc(BITNSLOTS(n), 1); 
28 | 	struct binary_heap_u$ b= binary_heap_create_u$();
29 | 
30 | 	memset(d, (1 << CHAR_BIT) - 1, sizeof(u$_ft) * n);
31 | 
32 | 	binary_heap_insert_u$(&b, u, 0);
33 | 
34 | 	d[u]= 0;
35 | 
36 | 	while (! binary_heap_empty_u$(&b)) {
37 | 		const u$_ft i= binary_heap_min_u$(b);
38 | 		assert(i < n); 
39 | 		binary_heap_remove_min_u$(&b);
40 | 		if (! BITTEST(s, i)) {
41 | 			BITSET(s, i);
42 | 			const m$_ft end= i == n - 1 ? r->len_m : read_m$(r->adj_to, i + 1);
43 | 			for (m$_ft k= read_m$(r->adj_to, i); k < end; ++k) {
44 | 				const u$_ft j= read_u$(r->to, k);    
45 | 				assert(j < n); 
46 | 				assert(i != j);
47 | 				if (! BITTEST(s, j)) {
48 | 					if(d[j] > d[i] + 1) {
49 | 						d[j]= d[i] + 1;
50 | 						binary_heap_insert_u$(&b, j, d[i] + 1);
51 | 					}
52 | 				}
53 | 			}
54 | 		}
55 | 	}
56 | 
57 | 	binary_heap_delete_u$(&b);
58 | 
59 | 	if (visited) {
60 | 		*visited= s;
61 | 	} else {
62 | 		free(s);
63 | 	}
64 | }
65 | 
66 | #endif /* ! DIJKSTRA_H */
67 | 


--------------------------------------------------------------------------------
/c/feature_degree.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Generate the degree vector of a network.
 3 |  *
 4 |  * INVOCATION
 5 |  *     $0 SG1-FILE DEGREE-FILE LOGFILE
 6 |  */
 7 | 
 8 | #include "width.ma.h"
 9 | #include "width.ua.h"
10 | #include "width.va.h"
11 | #include "width.wa.h"
12 | #include "width.ta.h"
13 | #include "width.fa.h"
14 | 
15 | #include "sgraph1_io.a.h"
16 | #include "feature.a.h"
17 | 
18 | #if WEIGHTS_a == WEIGHTS_POSITIVE && TYPE_wa != '-'
19 | /* In that case, need to sum up the weights */ 
20 | #   error "*** Not implemented"
21 | #endif
22 | 
23 | int main(int argc, char **argv)
24 | {
25 | 	if (argc != 4) {
26 | 		fprintf(stderr, "*** Invalid number of parameters\n");
27 | 		exit(1);
28 | 	}
29 | 
30 | 	const char *const filename_sg1= argv[1];
31 | 	const char *const filename_ft= argv[2];
32 | 	/* The LOGFILE is ignore */
33 | 	
34 | 	struct sgraph1_reader_a r;
35 | 
36 | 	if (0 > sgraph1_open_read_a(filename_sg1, &r, 2)) {
37 | 		exit(1); 
38 | 	}
39 | 
40 | 	if (0 > sgraph1_advise_a(&r, MADV_SEQUENTIAL)) {
41 | 		perror(filename_sg1); 
42 | 		exit(1);
43 | 	}
44 | 
45 | 	struct feature_a f;
46 | 
47 | 	if (0 > feature_open_write_a(filename_ft, &f, r.h->n1
48 | #if FEATURE_N2
49 | 			      , r.h->n2
50 | #endif
51 | 			      )) {
52 | 		exit(1);
53 | 	}
54 | 
55 | 	if (0 > feature_advise_a(&f, MADV_SEQUENTIAL)) {
56 | 		perror(filename_ft);
57 | 		exit(1);
58 | 	}
59 | 
60 | 	for (ua_ft u= 0;  u < r.h->n1;  ++u) {
61 | 		const ma_ft beg= read_ma(r.adj_to, u);
62 | 		const ma_ft end= u == r.h->n1 - 1 ? r.len_m : read_ma(r.adj_to, u + 1);
63 | 		assert(beg <= end); 
64 | 		assert((ma_ft)(end - beg) < fa_max); 
65 | 		writeonzero_fa(f.f1, u, end - beg); 
66 | 	}
67 | 
68 | #if FEATURE_N2
69 | 	for (va_ft v= 0;  v < r.h->n2;  ++v) {
70 | 		const ma_ft beg= read_ma(r.adj_from, v);
71 | 		const ma_ft end= v == r.h->n2 - 1 ? r.len_m : read_ma(r.adj_from, v + 1);
72 | 		assert((ma_ft) (end - beg) < fa_max); 
73 | 		writeonzero_fa(f.f2, v, end - beg); 
74 | 	}
75 | #endif
76 | 
77 | 	if (0 > feature_close_write_a(&f)) {
78 | 		perror(filename_ft);
79 | 		if (0 > unlink(filename_ft)) {
80 | 			perror(filename_ft); 
81 | 		}
82 | 		exit(1); 
83 | 	}
84 | 
85 | 	exit(0); 
86 | }
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/c/graph_simple.h:
--------------------------------------------------------------------------------
 1 | #ifndef GRAPH_SIMPLE_$1$1_H
 2 | #define GRAPH_SIMPLE_$1$2_H
 3 | 
 4 | /* Code for transforming a graph into a simple graph. 
 5 |  * $1 is the source graph.  $2 is the target graph. 
 6 |  */
 7 | 
 8 | /* Read an SG0 graph into a struct graph_$2. 
 9 |  * The given graph must be uninitialized. 
10 |  * The resulting graph is simple (SYM-POSITIVE). 
11 |  */ 
12 | void graph_read_sg0_simple_$1_$2(struct graph_$2 *restrict g, 
13 | 				 struct sgraph0_reader_$1 *restrict r)
14 | {
15 | 	struct header *s= (struct header *)r->out;
16 | 
17 | 	g->format  = FORMAT_SYM; 
18 | 	g->weights = WEIGHTS_POSITIVE;
19 | 
20 | 	g->cols    = 2; 
21 | 	g->m       = 0;
22 | 
23 | #if FORMAT_$1 == FORMAT_SYM || FORMAT_$1 == FORMAT_ASYM	
24 | 	assert(s->n1 == s->n2); 
25 | 	g->n1      = s->n1;
26 | 	g->n2      = s->n2;
27 | #elif FORMAT_$1 == FORMAT_BIP
28 | 	g->n1 	   = s->n1 + s->n2;
29 | 	g->n2      = s->n1 + s->n2; 
30 | #else
31 | #   error
32 | #endif
33 | 
34 | 	g->loops   = 0;
35 | 
36 | 	g->deg_to= calloc(arraylen_m$2(g->n1), 1);
37 | 	g->deg_from= NULL;
38 | 
39 | 	g->to= malloc(g->n1 * sizeof(v$2_at *)); 
40 | 	g->from= NULL;
41 | 
42 | #if TYPE_w$2 != '-'
43 | 	g->weight_to= NULL;
44 | 	g->weight_from= NULL; 
45 | #endif	
46 | 
47 | #if TYPE_t$2 != '-'
48 | 	g->timestamp_to= NULL;
49 | 	g->timestamp_from= NULL; 
50 | #endif	
51 | 
52 | 	for (m$2_ft i= 0;  i < s->m;  ++i) {
53 | 
54 | 		const u$1_ft u= read_u$1(r->u, i);
55 | 		const v$1_ft v= read_v$1(r->v, i);
56 | 
57 | 		if (u == v) 
58 | 			continue;
59 | 
60 | #if FORMAT_$1 == FORMAT_SYM || FORMAT_$1 == FORMAT_ASYM		
61 | 
62 | 		const u$2_ft u2= u;
63 | 		const v$2_ft v2= v;
64 | 
65 | #elif FORMAT_$1 == FORMAT_BIP
66 | 
67 | 		assert(s->n1 < v$2_max);
68 | 		
69 | 		const u$2_ft u2= u;
70 | 		const v$2_ft v2= s->n1 + v;
71 | 
72 | 		assert(v2 >= s->n1); 
73 | 
74 | #else
75 | #    error
76 | #endif
77 | 
78 | 		const m$2_ft degree_u= g->deg_to[u2];
79 | 		const m$2_ft degree_v= g->deg_to[v2]; 
80 | 
81 | 		assert(degree_u < m$2_max); 
82 | 		assert(degree_v < m$2_max); 
83 | 
84 | 		graph_append_v$2(g->to + u2, degree_u, v2);
85 | 		graph_append_v$2(g->to + v2, degree_v, u2); 
86 | 
87 | 		++ g->deg_to[u2];
88 | 		++ g->deg_to[v2]; 
89 | 
90 | 		++ g->m;
91 | 	}
92 | }
93 | 
94 | #endif /* ! GRAPH_SIMPLE_$1$2_H */ 
95 | 


--------------------------------------------------------------------------------
/c/graph_width.h:
--------------------------------------------------------------------------------
 1 | #if TYPE_$1$2 != '-'
 2 | 
 3 | #include <string.h>
 4 | 
 5 | /* Append the value X to the array whose owning pointer is pointed to by
 6 |  * P, and whose current degree is D. 
 7 |  */
 8 | void graph_append_$1$2($1$2_at **p, m$2_ft d, $1$2_ft x)
 9 | {
10 | 	/* Maximum number of array elements that fit into the pointer */ 
11 | 	const m$2_ft maxd= arrayn_$1$2(sizeof($1$2_at *)); 
12 | 	assert(maxd > 0); 
13 | 
14 | 	if (d > maxd) {
15 | 		/* realloc */ 
16 | 		*p= realloc(*p, arraylen_$1$2(d + 1));
17 | 		write_$1$2(*p, d, x);
18 | 		
19 | 	} else if (d < maxd) {
20 | 		/* write into pointer */ 
21 | 		write_$1$2(($1$2_at *)p, d, x); 
22 | 	} else { /* d == maxd */ 
23 | 		/* move to alloc */ 
24 | 		assert(d == maxd); 
25 | 		$1$2_at *p_copy= *p;
26 | 		*p = malloc(arraylen_$1$2(d + 1));
27 | 		*($1$2_at **)*p= p_copy;
28 | 		write_$1$2(*p, d, x); 
29 | 	}
30 | }
31 | 
32 | int compar_$1$2(const void *x, const void *y) 
33 | {
34 | 	$1$2_at *xx= ($1$2_at *) x;
35 | 	$1$2_at *yy= ($1$2_at *) y;
36 | 
37 | 	if (*xx < *yy)  return -1;
38 | 	if (*xx > *yy)  return +1;
39 | 	return 0; 
40 | }
41 | 
42 | #endif /* TYPE_$1$2 != '-' */
43 | 


--------------------------------------------------------------------------------
/c/lcc.h:
--------------------------------------------------------------------------------
 1 | #ifndef LCC_H
 2 | #define LCC_H
 3 | 
 4 | #include "bits.h"
 5 | #include "consts.h"
 6 | #include "dijkstra.$.h"
 7 | 
 8 | #if FORMAT_a != FORMAT_SYM
 9 | #   error "*** Network must be undirected"
10 | #endif
11 | 
12 | /* Find the largest connected component (LCC) in R.  Return a malloc'ed
13 |    bit array with 1's for nodes included in the largest connected
14 |    component. 
15 |  */
16 | unsigned char *lcc_find(struct sgraph1_reader_$ *r);
17 | 
18 | unsigned char *lcc_find(struct sgraph1_reader_$ *r)
19 | {
20 | 	const u$_ft n= r->h->n1;
21 | 	assert(n > 0); 
22 | 
23 | 	/* Size of bit arrays */ 
24 | 	const size_t k= BITNSLOTS(n); 
25 | 
26 | 	/* Bit array of visited nodes */ 
27 | 	unsigned char *const visited= calloc(k, 1);
28 | 
29 | 	unsigned char *ret= NULL;
30 | 	u$_ft size_ret= 0; 
31 | 
32 | 	u$_ft *d= malloc(n * sizeof(u$_ft)); 
33 | 
34 | 	for (u$_ft u= 0;  u < n;  ++u) {
35 | 
36 | 		while (u < n && BITTEST(visited, u)) {
37 | 			++u;
38 | 		}
39 | 
40 | 		if (u == n) {
41 | 			break;
42 | 		}
43 | 
44 | 		unsigned char *ret_new;
45 | 		dijkstra_$(r, u, d, &ret_new); 
46 |  		u$_ft size_ret_new= BITSCOUNT(ret_new, n); 
47 | 
48 | 		assert(ret_new != NULL); 
49 | 
50 | 		/* The found component contains at least U itself */ 
51 | 		assert(size_ret_new > 0); 
52 | 		assert(BITSET(ret_new, u));
53 | 
54 | 		BITSSET(visited, ret_new, n); 
55 | 
56 | 		if (size_ret_new > size_ret) {
57 | 			size_ret= size_ret_new;
58 | 			free(ret);
59 | 			ret= ret_new;
60 | 
61 | 			/* If the connected component contains more than
62 | 			   half of all nodes, there cannot be a larger one */
63 | 			if (size_ret > n / 2)
64 | 				break;
65 | 		} else {
66 | 			free(ret_new); 
67 | 		}
68 | 	}
69 | 
70 | 	free(d); 
71 | 	
72 | 	assert(ret != NULL);
73 | 	assert(size_ret > 0);
74 | 	assert(size_ret <= n); 
75 | 
76 | 	return ret; 
77 | }
78 | 
79 | #endif /* ! LCC_H */
80 | 


--------------------------------------------------------------------------------
/c/lines.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Determine the numbe of lines of a network from its SG1 file.  This is
 3 |  * trivial, and can be determined without the SG1 file in most cases.
 4 |  * This implementation is only used in cases where the SG1 file is the
 5 |  * primary version of the dataset (at the moment, only simple~[NETWORK]
 6 |  * networks). 
 7 |  *
 8 |  *
 9 |  * STDOUT 
10 |  * 	The number of lines is printed to stdout.
11 |  *
12 |  * INVOCATION 
13 |  * 
14 |  *	$0 INPUT-FILE LOGFILE
15 |  */ 
16 | 
17 | #include "width.ma.h"
18 | #include "width.ua.h"
19 | #include "width.va.h"
20 | #include "width.wa.h"
21 | #include "width.ta.h"
22 | 
23 | #include "sgraph1_io.a.h"
24 | 
25 | int main(int argc, char **argv)
26 | {
27 | 	if (argc != 3) {
28 | 		fprintf(stderr, "*** Invalid number of parameters\n");
29 | 		exit(1);
30 | 	}
31 | 
32 | 	const char *const filename_sg1= argv[1];
33 | 
34 | 	struct sgraph1_reader_a r;
35 | 
36 | 	if (0 > sgraph1_open_read_a(filename_sg1, &r, 0)) {
37 | 		exit(1); 
38 | 	}
39 | 
40 | 	/* No need for file advisories since we're only reading the
41 | 	   header */ 
42 | 
43 | 
44 | 	const ma_ft lines= r.h->m;
45 | 
46 | 	printf("%" PR_fma "\n", lines); 
47 | 
48 | 	exit(0);
49 | }
50 | 


--------------------------------------------------------------------------------
/c/sgraph1_create.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Create an SG1 file from an SG0 file.
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #ifndef NDEBUG
 9 | #   include <mcheck.h>
10 | #endif
11 | 
12 | #include "width.ma.h"
13 | #include "width.ua.h"
14 | #include "width.va.h"
15 | #include "width.wa.h"
16 | #include "width.ta.h"
17 | 
18 | #include "graph_width.u.a.h"
19 | #include "graph_width.v.a.h"
20 | #include "graph_width.w.a.h"
21 | #include "graph_width.t.a.h"
22 | 
23 | #include "sgraph0_io.a.h"
24 | #include "graph.a.h"
25 | #include "sgraph1_write.a.h"
26 | #include "graph_read_sgraph0.a.h"
27 | 
28 | /*
29 |  * INVOCATION
30 |  *
31 |  *	$0 INPUT-FILENAME OUTPUT-FILENAME LOGFILE
32 |  * 
33 |  * The input file must be in SG0 format; the output file is in SG1 format. 
34 |  */
35 | int main(int argc, char **argv)
36 | {
37 | #ifndef NDEBUG
38 | 	if (mcheck(NULL))  exit(1); 
39 | #endif
40 | 
41 | 	if (argc != 4) {
42 | 		fprintf(stderr, "*** Invalid number of arguments\n");
43 | 		exit(1);
44 | 	}
45 | 
46 | 	const char *filename_in= argv[1];
47 | 	const char *filename_out= argv[2];
48 | 
49 | 	struct sgraph0_reader_a r;
50 | 	
51 | 	if (0 > sgraph0_open_read_a(filename_in, &r, COLS_ALL)) {
52 | 		exit(1); 
53 | 	}
54 | 
55 | 	if (0 > sgraph0_advise_a(&r, MADV_SEQUENTIAL)) {
56 | 		perror(filename_in);
57 | 		goto error_close;
58 | 	}
59 | 
60 | 	struct graph_a g;
61 | 
62 | 	graph_read_sgraph0_a(&g, &r); 
63 | 
64 | 	sgraph0_close_a(&r);
65 | 
66 | 	graph_sort_a(&g); 
67 | 
68 | 	if (0 > sgraph1_write_a(&g, filename_out)) {
69 | 		exit(1); 
70 | 	}
71 | 
72 | 	exit(0);
73 | 
74 |  error_close:
75 | 	sgraph0_close_a(&r); 
76 | 	exit(1); 
77 | }
78 | 


--------------------------------------------------------------------------------
/c/sgraph1_create_lcc.c:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Extract the largest connected component of a graph.  Input and output
 3 |  * files are both SG1 files.
 4 |  */
 5 | 
 6 | #include "width.ma.h"
 7 | #include "width.ua.h"
 8 | #include "width.va.h"
 9 | #include "width.wa.h"
10 | #include "width.ta.h"
11 | 
12 | #include "sgraph1_io.a.h"
13 | #include "sgraph1_subgraph.a.h"
14 | #include "binary_heap.ua.h"
15 | #include "dijkstra.a.h"
16 | #include "lcc.a.h"
17 | 
18 | #include "consts.h"
19 | 
20 | #if FORMAT_a != FORMAT_SYM || WEIGHTS_a != WEIGHTS_UNWEIGHTED || LOOPS_a != 0
21 | #   error "*** Only implemented for simple networks"
22 | #endif
23 | 
24 | /* 
25 |  * INVOCATION 
26 |  * 
27 |  * 	$0 INPUT-FILE OUTPUTFILE LOGFILE
28 |  */
29 | int main(int argc, char **argv)
30 | {
31 | 	assert(argc == 4);
32 | 	(void) argc;
33 | 	const char *const filename_in= argv[1];
34 | 	const char *const filename_out= argv[2];
35 | 
36 | 	struct sgraph1_reader_a r;
37 | 
38 | 	if (0 > sgraph1_open_read_a(filename_in, &r, 2)) {
39 | 		exit(1); 
40 | 	}
41 | 
42 | 	if (0 > sgraph1_advise_a(&r, MADV_WILLNEED)) {
43 | 		perror(filename_in);
44 | 		exit(1); 
45 | 	}
46 | 
47 | 	unsigned char *lcc= lcc_find(&r);
48 | 
49 | 	if (0 > sgraph1_subgraph_a(&r, lcc, filename_out)) {
50 | 		exit(1); 
51 | 	}
52 | 
53 | 	exit(0); 
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/c/sgraph1_create_simple.c:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * Create an Sg1 file from an Sg0 file, transforming any network in a
 3 |  * simple SYM network.
 4 |  */
 5 | 
 6 | /* 
 7 |  * The passes M/U/V/W/T are those of the original graph.  Additional
 8 |  * parameters for the generated graph: 
 9 |  * 
10 |  * MX, UX, VX, WX, TX, 
11 |  */
12 | 
13 | #include <stdio.h>
14 | #include <stdlib.h>
15 | 
16 | #include "width.ma.h"
17 | #include "width.ua.h"
18 | #include "width.va.h"
19 | #include "width.wa.h"
20 | #include "width.ta.h"
21 | 
22 | #include "width.mb.h"
23 | #include "width.ub.h"
24 | #include "width.vb.h"
25 | #include "width.wb.h"
26 | #include "width.tb.h"
27 | 
28 | #include "graph_width.u.b.h"
29 | #include "graph_width.v.b.h"
30 | 
31 | #include "sgraph0_io.a.h"
32 | #include "graph.b.h"
33 | #include "graph_simple.a.b.h"
34 | #include "sgraph1_write.b.h"
35 | 
36 | /*
37 |  * INVOCATION
38 |  *
39 |  *	$0 INPUT-FILE OUTPUT-FILE LOGFILE
40 |  * 
41 |  * The input file must be in SG0 format; the output file is in sg1 format. 
42 |  */
43 | int main(int argc, char **argv)
44 | {
45 | 	if (argc != 4) {
46 | 		fprintf(stderr, "*** Invalid number of arguments\n");
47 | 		exit(1);
48 | 	}
49 | 
50 | 	const char *const filename_in= argv[1];
51 | 	const char *const filename_out= argv[2]; 
52 | 
53 | 	struct sgraph0_reader_a r;
54 | 	
55 | 	if (0 > sgraph0_open_read_a(filename_in, &r, COLS_ALL)) {
56 | 		exit(1); 
57 | 	}
58 | 
59 | 	if (0 > sgraph0_advise_a(&r, MADV_SEQUENTIAL)) {
60 | 		perror(filename_in);
61 | 		goto error_close;
62 | 	}
63 | 
64 | 	struct graph_b g;
65 | 
66 | 	graph_read_sg0_simple_a_b(&g, &r);  
67 | 
68 | 	sgraph0_close_a(&r);
69 | 
70 | 	graph_sort_b(&g); 
71 | 	graph_unique_b(&g); 
72 | 
73 | 	if (0 > sgraph1_write_b(&g, filename_out)) {
74 | 		exit(1); 
75 | 	}
76 | 
77 | 	exit(0);
78 | 
79 |  error_close:
80 | 	sgraph0_close_a(&r); 
81 | 	exit(1); 
82 | }
83 | 


--------------------------------------------------------------------------------
/c/sgraph1_dump.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /* Dump an SG1 file to an OUT file.
 3 |  *
 4 |  * INVOCATION
 5 |  *
 6 |  *    $0 SG1-FILE OUT-FILE
 7 |  */
 8 | 
 9 | #include "width.ma.h"
10 | #include "width.ua.h"
11 | #include "width.va.h"
12 | #include "width.wa.h"
13 | #include "width.ta.h"
14 | 
15 | #include "sgraph1_io.a.h"
16 | 
17 | #include <stdio.h>
18 | 
19 | int main(int argc, char **argv)
20 | {
21 | 	/* Not implemented yet */ 
22 | 	assert(TYPE_wa == '-' && TYPE_ta == '-');
23 | 
24 | 	if (argc != 3) {
25 | 		fprintf(stderr, "*** wrong number of arguments/n");
26 | 		exit(1); 
27 | 	}
28 | 
29 | 	const char *const filename_sg1= argv[1];
30 | 	const char *const filename_out= argv[2]; 
31 | 
32 | 	FILE *out= fopen(filename_out, "w");
33 | 	if (out == NULL) {
34 | 		perror(filename_out);
35 | 		exit(1); 
36 | 	}
37 | 
38 | 	struct sgraph1_reader_a r;
39 | 
40 | 	if (0 > sgraph1_open_read_a(filename_sg1, &r, 2)) {
41 | 		exit(1); 
42 | 	}
43 | 
44 | 	if (0 > sgraph1_advise_a(&r, MADV_SEQUENTIAL)) {
45 | 		perror(filename_sg1); 
46 | 		exit(1);
47 | 	}
48 | 
49 | 	assert(r.h->format == FORMAT_SYM);
50 | 
51 | 	for (ua_ft u= 0;  u < r.h->n1;  ++u) {
52 | 
53 | 		ma_ft end= (u == r.h->n1 - 1) ? r.h->m : read_ma(r.adj_to, u + 1);
54 | 		for (ma_ft i= read_ma(r.adj_to, u);  i < end;  ++i) {
55 | 			va_ft v= read_va(r.to, i);
56 | 			fprintf(out, "%" PR_fua "\t%" PR_fva "\n", u + 1, v + 1); 
57 | 		}
58 | 	}
59 | 
60 | 	if (0 > fclose(out)) {
61 | 		perror(filename_out);
62 | 		exit(1);
63 | 	}
64 | 
65 | 	exit(0); 
66 | }
67 | 


--------------------------------------------------------------------------------
/c/size.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Determine the size of a network from its SG1 file.  This is
 3 |  * trivial, and can be determined without the SG1 file in most cases.
 4 |  * This implementation is only used in cases where the SG1 file is the
 5 |  * primary version of the dataset (at the moment, only simple~[NETWORK]
 6 |  * networks). 
 7 |  *
 8 |  * The output follows the substatistics described in
 9 |  * 'konect-toolbox/konect_statistic_size.m'. 
10 |  *
11 |  * The size is printed to stdout.
12 |  *
13 |  * INVOCATION 
14 |  *
15 |  *	$0 INPUT-FILE LOGFILE
16 |  */ 
17 | 
18 | #include "width.ma.h"
19 | #include "width.ua.h"
20 | #include "width.va.h"
21 | #include "width.wa.h"
22 | #include "width.ta.h"
23 | 
24 | #include "sgraph1_io.a.h"
25 | 
26 | int main(int argc, char **argv)
27 | {
28 | 	if (argc != 3) {
29 | 		fprintf(stderr, "*** Invalid number of parameters\n");
30 | 		exit(1);
31 | 	}
32 | 
33 | 	const char *const filename_sg1= argv[1];
34 | 
35 | 	struct sgraph1_reader_a r;
36 | 
37 | 	if (0 > sgraph1_open_read_a(filename_sg1, &r, 0)) {
38 | 		exit(1); 
39 | 	}
40 | 
41 | 	/* No need for file advisories since we're only reading the
42 | 	 * header.  */ 
43 | 
44 | 	if (r.h->format == FORMAT_SYM || r.h->format == FORMAT_ASYM) {
45 | 		assert(r.h->n1 == r.h->n2); 
46 | 		const ua_ft n= r.h->n1;
47 | 		printf("%" PR_fua "\n", n);
48 | 	} else if (r.h->format == FORMAT_BIP) {
49 | 		const ua_ft n1= r.h->n1;
50 | 		const va_ft n2= r.h->n2;
51 | 		const uintmax_t n= n1 + n2;
52 | 		printf("%" PRIuMAX "\n"
53 | 		       "%" PR_fua "\n"
54 | 		       "%" PR_fva "\n",
55 | 		       n, n1, n2);
56 | 	} else
57 | 		assert(0);
58 | 
59 | 	exit(0);
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/c/statistic_mediandegree.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Determine the median degree. 
 3 |  *
 4 |  * INVOCATION
 5 |  *	$0 $SG1_FILENAME LOGFILE
 6 |  *
 7 |  * INPUT FILES 
 8 |  *	$SG1_FILENAME
 9 |  *
10 |  * STDOUT:  The full statistics; one per line, as specified in
11 |  * 	    konect-toolbox/m/konect_statistic_mediandegree.m 
12 |  */
13 | 
14 | #include "width.ma.h"
15 | #include "width.ua.h"
16 | #include "width.va.h"
17 | #include "width.wa.h"
18 | #include "width.ta.h"
19 | 
20 | #include "consts.h"
21 | #include "sgraph1_io.a.h"
22 | 
23 | #if FORMAT_a == FORMAT_SYM
24 | ma_at *d_sym;
25 | #elif FORMAT_a == FORMAT_BIP || FORMAT_a == FORMAT_ASYM
26 | ...;
27 | #else
28 | #   "*** Invalid FORMAT" 
29 | #endif
30 | 
31 | ma_ft nth_element(ma_at *const p, ma_first
32 | 
33 | int main(int argc, char **argv)
34 | {
35 | 	if (argc != 3) {
36 | 		fprintf(stderr, "*** Expected exactly two arguments\n");
37 | 		exit(1);
38 | 	}
39 | 
40 | 	const char *filename_sg1= argv[1];
41 | 
42 | 	struct sgraph1_reader_a r;
43 | 	if (0 != sgraph1_open_read_a(filename_sg1, &r,
44 | #if FORMAT_a == FORMAT_sym
45 | 				     1
46 | #else FORMAT_a == FORMAT_bip || FORMAT_a == FORMAT_asym
47 | 				     2
48 | #else
49 | #  error "*** Invalid FORMAT_a"
50 | #endif
51 | 				     ))  
52 | 		exit(1);
53 | 
54 | 	if (0 > sgraph1_advise_a(&r, MADV_SEQUENTIAL)) {
55 | 		perror(filename_sg1); 
56 | 		exit(1);
57 | 	}
58 | 
59 | #if FORMAT_a == FORMAT_sym
60 | 
61 | 	d_sym= calloc(arraylen_ma(r->h->n1), 1);
62 | 	if (!d_sym) {
63 | 		perror("calloc");
64 | 		exit(1); 
65 | 	}
66 | 	
67 | 	for (ua_ft u= 0;  u + 1 < (ua_ft) r.h->n1;  ++u) {
68 | 		const ma_ft deg_u= read_ma(r.adj_to, u + 1) - read_ma(r.adj_to, u);
69 | 		writeonzero_a(d_sym, u, deg_u); 
70 | 	}
71 | 	const ma_ft deg_u_last= r.len_m - read_ma(r.adj_to, r.h->n1 - 1);
72 | 	writeonzero_a(d_sym, r.h->n1-1, deg_u_last); 
73 | 
74 | 	ma_ft median= nth_element_(d_sym, 0, r.h->n1 / 2, r.h->n1); 
75 | 
76 | 	printf("%" PR_fma "\n", median); 
77 | 	
78 | #elif FORMAT_a == FORMAT_asym || FORMAT_a == FORMAT_BIP
79 | 	...;
80 | #else
81 | #   error "*** Invalid FORMAT_a"		      
82 | #endif
83 | 
84 |  	if (ferror(stdout)) {
85 | 		perror("stdout"); 
86 | 		exit(1); 
87 | 	}
88 | 
89 | 	exit(0); 
90 | }
91 | 


--------------------------------------------------------------------------------
/c/statistic_twostars.c:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Compute the number of wedges. 
  3 |  *
  4 |  * INVOCATION 
  5 |  *
  6 |  *      $0 FT-DEGREE-FILE LOGFILE
  7 |  *
  8 |  * The statistics are written to stdout.
  9 |  */ 
 10 | 
 11 | #include <stdio.h>
 12 | 
 13 | #include "width.ma.h"
 14 | #include "width.ua.h"
 15 | #include "width.va.h"
 16 | #include "width.fa.h"
 17 | 
 18 | #include "feature.a.h"
 19 | 
 20 | #include "consts.h"
 21 | 
 22 | int main(int argc, char **argv)
 23 | {
 24 | 	if (argc != 3) {
 25 | 		fprintf(stderr, "*** Invalid number of arguments\n");
 26 | 		exit(1);
 27 | 	}
 28 | 
 29 | 	const char *const filename= argv[1];
 30 | 
 31 | 	struct feature_a f;
 32 | 
 33 | 	if (0 > feature_open_read_a(filename, &f)) {
 34 | 		perror(filename);
 35 | 		exit(1);
 36 | 	}
 37 | 
 38 | 	if (0 > feature_advise_a(&f, MADV_SEQUENTIAL)) {
 39 | 		perror(filename); 
 40 | 		exit(1);
 41 | 	}
 42 | 	
 43 | 	/* 
 44 | 	 * Total
 45 | 	 */
 46 | 
 47 | 	uintmax_t s= 0;
 48 | 
 49 | #if FORMAT_a == FORMAT_SYM
 50 | 
 51 | 	for (ua_ft u= 0;  u < f.h->n1;  ++u) {
 52 | 		const fa_ft d= read_fa(f.f1, u);
 53 | 		s += (d * (d - 1) / 2); 
 54 | 	}
 55 | 
 56 | #elif FORMAT_a == FORMAT_ASYM 
 57 | 
 58 | 	for (ua_ft u= 0;  u < f.h->n1;  ++u) {
 59 | 		const fa_ft d_out= read_fa(f.f1, u);
 60 | 		const fa_ft d_in=  read_fa(f.f2, u);
 61 | 		const fa_ft d= d_out + d_in;
 62 | 		s += (d * (d - 1) / 2);
 63 | 	}
 64 | 
 65 | #elif FORMAT_a == FORMAT_BIP
 66 | 
 67 | 	for (ua_ft u= 0;  u < f.h->n1;  ++u) {
 68 | 		const fa_ft d= read_fa(f.f1, u);
 69 | 		s += (d * (d - 1) / 2); 
 70 | 	}
 71 | 	
 72 | 	for (va_ft v= 0;  v < f.h->n2;  ++v) {
 73 | 		const fa_ft d= read_fa(f.f2, v);
 74 | 		s += (d * (d - 1) / 2); 
 75 | 	}
 76 | 
 77 | #else
 78 | #   error "*** Invalid format"
 79 | #endif	
 80 | 
 81 | 	printf("%" PRIuMAX "\n", s); 
 82 | 
 83 | 	/*
 84 | 	 * Left and right
 85 | 	 */
 86 | 
 87 | #if FORMAT_a == FORMAT_BIP || FORMAT_a == FORMAT_ASYM
 88 | 
 89 | 	uintmax_t s1= 0;
 90 | 	for (ua_ft u= 0;  u < f.h->n1;  ++u) {
 91 | 		const fa_ft d= read_fa(f.f1, u);
 92 | 		s1 += (d * (d - 1) / 2); 
 93 | 	}
 94 | 
 95 | 	printf("%" PRIuMAX "\n", s1);
 96 | 	
 97 | 	uintmax_t s2= 0;
 98 | 	for (va_ft v= 0;  v < f.h->n2;  ++v) {
 99 | 		const fa_ft d= read_fa(f.f2, v);
100 | 		s2 += (d * (d - 1) / 2); 
101 | 	}
102 | 
103 | 	printf("%" PRIuMAX "\n", s2);
104 | 
105 | #endif
106 | 
107 | 	exit(0);
108 | }
109 | 


--------------------------------------------------------------------------------
/c/volume.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Determine the volume of a network from its SG1 file.  This is
 3 |  * trivial, and can be determined without the SG1 file in most cases.
 4 |  * This implementation is only used in cases where the SG1 file is the
 5 |  * primary version of the dataset (at the moment, only simple~[NETWORK]
 6 |  * networks). 
 7 |  *
 8 |  * The volume is printed to stdout.
 9 |  *
10 |  * The input file must not be a POSITIVE network with weight column. 
11 |  *
12 |  * INVOCATION 
13 |  * 
14 |  *	$0 INPUT-FILE LOGFILE
15 |  */ 
16 | 
17 | #include "width__m__h"
18 | #include "width__u__h"
19 | #include "width__v__h"
20 | #include "width__w__h"
21 | #include "width__t__h"
22 | 
23 | #include "sgraph1_io.h"
24 | 
25 | #if FORMAT_a == FORMAT_POSITIVE && TYPE_wa != '-'
26 | #   error "the combination of POSITIVE with a weight column is not supported"
27 | #endif 
28 | 
29 | int main(int argc, char **argv)
30 | {
31 | 	if (argc != 3) {
32 | 		fprintf(stderr, "*** Invalid number of parameters\n");
33 | 		exit(1);
34 | 	}
35 | 
36 | 	const char *const filename_sg1= argv[1];
37 | 
38 | 	struct sgraph1_reader r;
39 | 
40 | 	if (0 > sgraph1_open_read(filename_sg1, &r, 0)) {
41 | 		exit(1); 
42 | 	}
43 | 
44 | 	/* No need for file advisories since we're only reading the
45 | 	   header */ 
46 | 
47 | 	const m_ft m= r.h->m;
48 | 	
49 | 	printf("%" PR_fm "\n", m);
50 | 
51 | 	exit(0);
52 | }
53 | 


--------------------------------------------------------------------------------
/c/widthhelper.h:
--------------------------------------------------------------------------------
 1 | #ifndef WIDTHHELPER_H
 2 | #define WIDTHHELPER_H
 3 | 
 4 | #define CLASS_UNSIGNED  0
 5 | #define CLASS_SIGNED    1
 6 | #define CLASS_FLOAT     2
 7 | 
 8 | #define CONCATx2(x, y) x ## y
 9 | #define CONCATx3(x, y, z) x ## y ## z
10 | #define CONCAT2(x, y) CONCATx2(x, y)
11 | #define CONCAT3(x, y, z) CONCATx3(x, y, z)
12 | 
13 | #endif /* ! WIDTHHELPER */
14 | 
15 | 


--------------------------------------------------------------------------------
/jl/inter.jl:
--------------------------------------------------------------------------------
 1 | #
 2 | # Plot inter-event distributions.
 3 | #
 4 | # This is the first Julia code in KONECT, and therefore can serve as an
 5 | # example of how to do it. 
 6 | #
 7 | # PARAMETERS 
 8 | #	$network	Network name
 9 | #
10 | # INPUT FILES
11 | #	dat/out2.$network
12 | #
13 | # OUTPUT FILES
14 | # 	plot/inter.$type.$network.png
15 | #		$type:
16 | #		a	Overall distribution, log-log
17 | #		al	Overall distribution, lin-log
18 | #
19 | 
20 | using PyPlot;
21 | 
22 | include("step_full.jl");
23 | 
24 | network = ENV["network"]; 
25 | 
26 | T = readdlm("dat/out2.$network", '\t'); 
27 | 
28 | t = T[:,4];  
29 | sort!(t);
30 | 
31 | d = t[2:end] - t[1:end-1];
32 | d = d[d .!= 0]; 
33 | 
34 | fig = figure("Title", figsize=(5,3.7)); 
35 | 
36 | step_full(d);
37 | 
38 | xlabel("Inter-event time (t) [s]");
39 | ylabel("P(x ≥ t)");
40 | tight_layout();
41 | 
42 | # Day line
43 | axvline(60 * 60 * 24, linestyle = "--", linewidth = 0.5, color = "k"); 
44 | 
45 | savefig("plot/inter.a.$network.png");
46 | 
47 | xscale("linear");
48 | 
49 | savefig("plot/inter.al.$network.png");
50 | 
51 | 


--------------------------------------------------------------------------------
/jl/inter2.jl:
--------------------------------------------------------------------------------
 1 | #
 2 | # Plot node-level interevent time distributions.
 3 | #
 4 | # PARAMETERS
 5 | #	$network
 6 | #
 7 | # INPUT FILES
 8 | #	dat/out2.$network
 9 | #	dat/statistic.format.$network
10 | #
11 | # OUTPUT FILES
12 | #	plot/inter2.{auv}{,l}.$network.png
13 | #
14 | 
15 | using PyPlot;
16 | 
17 | include("read_statistic.jl");
18 | include("konect_consts.jl"); 
19 | include("step_full.jl"); 
20 | 
21 | network = ENV["network"]; 
22 | 
23 | format = read_statistic("format", network)[1];
24 | 
25 | T = readdlm("dat/out2.$network", '\t'); 
26 | 
27 | #
28 | # c	Character
29 | # x	Values
30 | # t	Timestamps
31 | #
32 | function inter2_one(c, network, x, t)
33 | 
34 |     d = [];
35 |     for i in unique(x) 
36 | ##        x_i = x[x .== i];
37 |         t_i = t[x .== i];
38 |         sort!(t_i);
39 |         d_i = t_i[2:end] - t_i[1:end-1];
40 |         d = [d ; d_i]; 
41 |     end
42 |     d = d[d .!= 0 ]; 
43 | 
44 |     println("inter2: $network $c length(d) = $(length(d))"); 
45 |     
46 |     close(); 
47 | 
48 |     fig = figure("Title", figsize=(5,3.7)); 
49 | 
50 |     step_full(d);
51 | 
52 |     xlabel("Inter-event time (t) [s]");
53 |     ylabel("P(x ≥ t)");
54 | 
55 |     tight_layout();
56 | 
57 |     # Day line
58 |     axvline(60 * 60 * 24, linestyle = "--", linewidth = 0.5, color = "k"); 
59 | 
60 |     savefig("plot/inter2.$c.$network.png");
61 |     xscale("linear");
62 |     c2= string(c, "l");
63 |     savefig("plot/inter2.$c2.$network.png");
64 | end
65 | 
66 | if format == KONECT_BIP
67 |     inter2_one('u', network, T[:,1], T[:,4]);
68 |     inter2_one('v', network, T[:,2], T[:,4]);
69 |     inter2_one('a', network, [T[:,1]; T[:,2] + maximum(T[:,1])], [T[:,4]; T[:,4]]); 
70 | elseif format == KONECT_SYM
71 |     inter2_one('a', network, [T[:,1]; T[:,2]], [T[:,4]; T[:,4]]);
72 | elseif format == KONECT_ASYM
73 |     inter2_one('u', network, T[:,1], T[:,4]);
74 |     inter2_one('v', network, T[:,2], T[:,4]);
75 |     inter2_one('a', network, [T[:,1]; T[:,2]], [T[:,4]; T[:,4]]);
76 | else
77 |     @assert false
78 | end
79 | 
80 | 


--------------------------------------------------------------------------------
/jl/konect_consts.jl:
--------------------------------------------------------------------------------
1 | KONECT_SYM  = 1;
2 | KONECT_ASYM = 2;
3 | KONECT_BIP  = 3;
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/jl/read_statistic.jl:
--------------------------------------------------------------------------------
 1 | #
 2 | # Read a statistic from a file.
 3 | #
 4 | # PARAMETERS
 5 | #	statistic	Statistic name
 6 | #	network		Network name
 7 | #
 8 | # INPUT FILES
 9 | #	dat/statistic.$statistic.$network
10 | #
11 | function read_statistic(statistic, network)
12 | 
13 |     data = readdlm("dat/statistic.$statistic.$network");
14 | 
15 |     return data; 
16 | end
17 | 
18 | 


--------------------------------------------------------------------------------
/jl/step_full.jl:
--------------------------------------------------------------------------------
 1 | #
 2 | # Draw a "full" step plot.
 3 | #
 4 | # PARAMETERS
 5 | #
 6 | #	x	(n) Values
 7 | #
 8 | 
 9 | function step_full(x)
10 |     
11 |     n = length(x);
12 |     
13 |     sort!(x);
14 | 
15 |     step(x,
16 |          (n:-1:1) / n,
17 |          linestyle="-");
18 | 
19 |     # If the Y axis was not logarithmic, we show also add the point
20 |     # (x_sorted[end], 0) to the plot, making x_sorted[end] be there
21 |     # twice. 
22 |     
23 |     xscale("log");
24 |     yscale("log");
25 | 
26 | end
27 | 
28 | 


--------------------------------------------------------------------------------
/julia:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Wrapper for running Julia scripts. 
 4 | #
 5 | 
 6 | TMPDIR=${TMPDIR-/tmp}
 7 | 
 8 | log=$TMPDIR/jl
 9 | 
10 | name=$(basename "$1" | sed -E -e 's,\.jl$,,')
11 | 
12 | log=$log.$name
13 | 
14 | [ "$network" ] && log=$log.$network
15 | 
16 | log=$log.log
17 | 
18 | printf >&2 '\t%s\n' "$log"
19 | 
20 | if ! julia "$1" "$log" >"$log" 2>&1 ; then
21 | 	echo >&2 "*** Error in $log"
22 | 	exit 1
23 | fi
24 | 
25 | exit 0
26 | 


--------------------------------------------------------------------------------
/ktop:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Analogue to top(1) that shows KONECT processes.
 4 | #
 5 | 
 6 | set -e 
 7 | 
 8 | while : ; do 
 9 | 	clear 
10 | 	./status
11 | 	sleep 90 
12 | done
13 | 


--------------------------------------------------------------------------------
/lib/.gitignore:
--------------------------------------------------------------------------------
1 | gplot2.m
2 | plfit.m
3 | zeta.m
4 | matlab_bgl
5 | 
6 | 


--------------------------------------------------------------------------------
/lib/JULIA:
--------------------------------------------------------------------------------
 1 | We're experimenting with replacing Matlab by Julia.
 2 | 
 3 | See jl/inter for an example script, and the @inter target in the Stu
 4 | file. 
 5 | 
 6 | We use PyPlot for plotting, which is based on Matplotlib. 
 7 | 
 8 | 
 9 | How to set up Julia
10 | ===================
11 | 
12 | * apt-get install julia
13 | * apt-get install python-matplotlib
14 | 
15 | From within Julia (once, to perform system-wide setup):
16 | * Pkg.add("PyPlot")
17 | 


--------------------------------------------------------------------------------
/lib/README:
--------------------------------------------------------------------------------
 1 | lib/ contains third-party libraries needed for analysis. 
 2 | 
 3 | See the subdirectories for license and copyright information.  The
 4 | libraries described in the following are *not* part of this distribution
 5 | and must be installed by hand. 
 6 | 
 7 | Matlab-BGL 
 8 | =========
 9 | 
10 | http://www.stanford.edu/~dgleich/programs/matlab_bgl/
11 | 
12 | Installation:  
13 | * Download version 4.x from website
14 | * unzip, such that the directory konect-analysis/lib/matlab_bgl/ is present 
15 | 
16 | WAFO
17 | ====
18 | 
19 | Needed to fit the generalized Gamma distribution.
20 | 
21 | We used version 25. (i.e., the file wafo25.7z)
22 | 
23 | Gplot2
24 | ======
25 | 
26 | Get the file gplot2.m from the following URL and put in into lib/
27 | 
28 | http://www.mathworks.com/matlabcentral/fileexchange/10342-gplot-enhanced/content/gplot2.m
29 | 
30 | PLfit
31 | =====
32 | 
33 | We need the files plfit.m, zeta.m from
34 | 
35 | http://tuvalu.santafe.edu/~aaronc/powerlaws/
36 | 
37 | Julia
38 | =====
39 | 
40 | See lib/JULIA
41 | 


--------------------------------------------------------------------------------
/lib/gridxy/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009, Jos van der Geest
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in
12 |       the documentation and/or other materials provided with the distribution
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/lib/hsl2rgb.m:
--------------------------------------------------------------------------------
 1 | function rgb=hsl2rgb(hsl_in)
 2 | %Converts Hue-Saturation-Luminance Color value to Red-Green-Blue Color value
 3 | %
 4 | %Usage
 5 | %       RGB = hsl2rgb(HSL)
 6 | %
 7 | %   converts HSL, a M [x N] x 3 color matrix with values between 0 and 1
 8 | %   into RGB, a M [x N] X 3 color matrix with values between 0 and 1
 9 | %
10 | %See also rgb2hsl, rgb2hsv, hsv2rgb
11 | 
12 | % (C) Vladimir Bychkovsky, June 2008
13 | % written using: 
14 | % - an implementation by Suresh E Joel, April 26,2003
15 | % - Wikipedia: http://en.wikipedia.org/wiki/HSL_and_HSV
16 | 
17 | hsl=reshape(hsl_in, [], 3);
18 | 
19 | H=hsl(:,1);
20 | S=hsl(:,2);
21 | L=hsl(:,3);
22 | 
23 | lowLidx=L < (1/2);
24 | q=(L .* (1+S) ).*lowLidx + (L+S-(L.*S)).*(~lowLidx);
25 | p=2*L - q;
26 | hk=H; % this is already divided by 360
27 | 
28 | t=zeros([length(H), 3]); % 1=R, 2=B, 3=G
29 | t(:,1)=hk+1/3;
30 | t(:,2)=hk;
31 | t(:,3)=hk-1/3;
32 | 
33 | underidx=t < 0;
34 | overidx=t > 1;
35 | t=t+underidx - overidx;
36 |     
37 | range1=t < (1/6);
38 | range2=(t >= (1/6) & t < (1/2));
39 | range3=(t >= (1/2) & t < (2/3));
40 | range4= t >= (2/3);
41 | 
42 | % replicate matricies (one per color) to make the final expression simpler
43 | P=repmat(p, [1,3]);
44 | Q=repmat(q, [1,3]);
45 | rgb_c= (P + ((Q-P).*6.*t)).*range1 + ...
46 |         Q.*range2 + ...
47 |         (P + ((Q-P).*6.*(2/3 - t))).*range3 + ...
48 |         P.*range4;
49 |        
50 | rgb_c=round(rgb_c.*10000)./10000; 
51 | rgb=reshape(rgb_c, size(hsl_in));


--------------------------------------------------------------------------------
/lib/octave/README:
--------------------------------------------------------------------------------
1 | This directory contains implementations of functions that are in Matlab
2 | but not in Octave. This directory is only added to the path when running
3 | Octave. 
4 | 
5 | The files here have the same license and authors as the main part of
6 | KONECT-Analysis.  
7 | 


--------------------------------------------------------------------------------
/lib/octave/corr.m:
--------------------------------------------------------------------------------
 1 | 
 2 | function ret = corr(a, b)
 3 | 
 4 | if nargin == 2
 5 | 
 6 |     ret = corrcoef(a, b);
 7 |     
 8 | elseif nargin == 4
 9 | 
10 |     ret = corrcoef(a, b, c, d);
11 |     
12 | else
13 | 
14 |     error('*** unsupported case'); 
15 |     
16 | end
17 | 
18 | 


--------------------------------------------------------------------------------
/lib/rgb2hsl.m:
--------------------------------------------------------------------------------
 1 | function hsl=rgb2hsl(rgb_in)
 2 | %Converts Red-Green-Blue Color value to Hue-Saturation-Luminance Color value
 3 | %
 4 | %Usage
 5 | %       HSL = rgb2hsl(RGB)
 6 | %
 7 | %   converts RGB, a M [x N] x 3 color matrix with values between 0 and 1
 8 | %   into HSL, a M [x N] X 3 color matrix with values between 0 and 1
 9 | %
10 | %See also hsl2rgb, rgb2hsv, hsv2rgb
11 | 
12 | % (C) Vladimir Bychkovsky, June 2008
13 | % written using: 
14 | % - an implementation by Suresh E Joel, April 26,2003
15 | % - Wikipedia: http://en.wikipedia.org/wiki/HSL_and_HSV
16 | 
17 | rgb=reshape(rgb_in, [], 3);
18 | 
19 | mx=max(rgb,[],2);%max of the 3 colors
20 | mn=min(rgb,[],2);%min of the 3 colors
21 | 
22 | L=(mx+mn)/2;%luminance is half of max value + min value
23 | S=zeros(size(L));
24 | 
25 | % this set of matrix operations can probably be done as an addition...
26 | zeroidx= (mx==mn);
27 | S(zeroidx)=0;
28 | 
29 | lowlidx=L <= 0.5;
30 | calc=(mx-mn)./(mx+mn);
31 | idx=lowlidx & (~ zeroidx);
32 | S(idx)=calc(idx);
33 | 
34 | hilidx=L > 0.5;
35 | calc=(mx-mn)./(2-(mx+mn));
36 | idx=hilidx & (~ zeroidx);
37 | S(idx)=calc(idx);
38 | 
39 | hsv=rgb2hsv(rgb);
40 | H=hsv(:,1);
41 | 
42 | hsl=[H, S, L];
43 | 
44 | hsl=round(hsl.*100000)./100000; 
45 | hsl=reshape(hsl, size(rgb_in));


--------------------------------------------------------------------------------
/m/approximation.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Goodness of fit of all matrix decompositions in function of rank. 
 3 | %
 4 | % This test uses full matrices, so will only work for very small networks. 
 5 | %
 6 | % PARAMETERS 
 7 | %	$NETWORK
 8 | %
 9 | % OUTPUT 
10 | %	dat/approximation.$NETWORK.mat
11 | %		functions		Cell array of function names
12 | %		prec			RMSE values
13 | %			.(function).values(i_decomposition, r)
14 | %		decompositions
15 | %		names_decompositions
16 | %
17 | % INPUT 
18 | %	dat/data.$NETWORK.mat
19 | %	dat/info.$NETWORK
20 | %	dat/meansi.$NETWORK.mat
21 | %
22 | 
23 | r_max = 25; % Maximal rank
24 | opts.disp = 2; 
25 | 
26 | decompositions = { 'svd', 'diag', 'dedicom1u', 'dedicom1v', 'dedicom2', 'dedicom3' }; 
27 | functions = { 'e1', 'e2', 'a' }; 
28 | 
29 | network = getenv('NETWORK'); 
30 | 
31 | consts = konect_consts(); 
32 | 
33 | info = read_info(network); 
34 | data = load(sprintf('dat/data.%s.mat', network)); 
35 | means = load(sprintf('dat/meansi.%s.mat', network)); 
36 | 
37 | T = konct_normalize_additively(data.T, means); 
38 | a = konect_spconvert(T, info.m, info.n); 
39 | 
40 | fprintf(1, 'Computing exponential 1...\n'); 
41 | a_1 = expm(0.1 * a); 
42 | fprintf(1, 'Computing exponential 2...\n'); 
43 | a_2 = expm(0.03 * a); 
44 | fprintf(1, 'Done.\n'); 
45 | 
46 | names_decompositions = []; 
47 | 
48 | labels_method = get_labels_method(); 
49 | 
50 | prec = struct(); 
51 | 
52 | a1 = T(:,1);
53 | a2 = T(:,2);
54 | if size(T,2) >= 3
55 |   a3 = T(:,3);
56 | else
57 |   a3 = ones(size(T,1), 1); 
58 | end
59 | 
60 | for i = 1 : length(decompositions)
61 | 
62 |   decomposition = decompositions{i}
63 | 
64 |   names_decompositions = [ names_decompositions ; cellstr(labels_method.(regexprep(decomposition, '-', '_'))) ]; 
65 | 
66 |   for r = 1 : r_max
67 | 
68 |     r
69 | 
70 |     [u d v] = konect_decomposition(decomposition, a, r, info.format, info.weights, opts); 
71 | 
72 |     for j = 1 : length(functions)
73 |       f = functions{j};
74 |       
75 |       if strcmp(f, 'a')
76 |         value = rmse_latent(a1, a2, a3, u, d, v); 
77 |       elseif strcmp(f, 'e1')          
78 |         value = rmse_full(a_1, u, expm(0.1 * d), v); 
79 |       elseif strcmp(f, 'e2')          
80 |         value = rmse_full(a_2, u, expm(0.2 * d), v); 
81 |       end     
82 |    
83 |       prec.(f).values(i,r) = value; 
84 |     end
85 |   end
86 | 
87 | end
88 | 
89 | save(sprintf('dat/approximation.%s.mat', network), '-v7.3', 'functions', 'prec', 'decompositions', 'names_decompositions'); 
90 | 


--------------------------------------------------------------------------------
/m/approximation_plot.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot matrix approximations.
 3 | % 
 4 | % INPUT 
 5 | %	dat/approximation.$NETWORK.mat
 6 | %
 7 | % OUTPUT 
 8 | %	plot/approximation.$FUNCTION.$NETWORK.eps 
 9 | %		For all functions as given in the data file
10 | %
11 | 
12 | network = getenv('NETWORK');
13 | 
14 | data = load(sprintf('dat/approximation.%s.mat', network)); 
15 | 
16 | [colors line_styles markers] = styles_method(); 
17 | 
18 | for j = 1 : length(data.functions)
19 | 
20 |     f = data.functions{j}
21 | 
22 |     hold on; 
23 |     for i = 1 : size(data.prec.(f).values, 1)
24 |         decomposition = data.decompositions{i}; 
25 |         if strcmp(decomposition, 'svd'), continue; end; 
26 |         plot(1 : size(data.prec.(f).values, 2), data.prec.(f).values(i, :), ...
27 |              'LineStyle',  line_styles.(decomposition), 'Marker', markers.(decomposition), 'Color', colors.(decomposition), ...
28 |              'LineWidth', 3); 
29 |     end
30 | 
31 |     legend(data.names_decompositions, 'Location', 'EastOutside'); 
32 | 
33 |     xlabel('Decomposition rank (r)', 'FontSize', 16); 
34 |     ylabel('Root mean squared error (RMSE)', 'FontSize', 16); 
35 | 
36 |     set(gca, 'FontSize', 16); 
37 | 
38 |     ax = axis(); 
39 |     ax(4) = max(ax(4), 2.15e-4); 
40 |     axis(ax); 
41 | 
42 |     konect_print(sprintf('plot/approximation.%s.%s.eps', f, network)); 
43 | end
44 | 


--------------------------------------------------------------------------------
/m/assortativity.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Generate assortativity plots. 
 3 | %
 4 | % PARAMETERS 
 5 | %	$network	Name of the network
 6 | %
 7 | % INPUT 
 8 | %	dat/data.$network.mat
 9 | %	dat/info.$network
10 | %
11 | % OUTPUT 
12 | %	plot/assortativity.[auv].$network.eps
13 | %
14 | 
15 | consts = konect_consts(); 
16 | 
17 | network = getenv('network');
18 | 
19 | data = load(sprintf('dat/data.%s.mat', network)); 
20 | info = read_info(network); 
21 | 
22 | if info.weights == consts.POSITIVE & size(data.T, 2) >= 3
23 |     w = data.T(:,3);
24 | else
25 |     w = 1; 
26 | end
27 | 
28 | d_1 = sparse(data.T(:,1), 1, w, info.n1, 1);
29 | d_2 = sparse(data.T(:,2), 1, w, info.n2, 1); 
30 | 
31 | A = sparse(data.T(:,1), data.T(:,2), w, info.n1, info.n2); 
32 | 
33 | if info.format == consts.ASYM
34 |         
35 |     assortativity_one(d_1, d_2, A, 'u', 'outdegree');
36 |     konect_print(sprintf('plot/assortativity.u.%s.eps', network)); 
37 |     
38 |     assortativity_one(d_2, d_1, A', 'v', 'indegree');
39 |     konect_print(sprintf('plot/assortativity.v.%s.eps', network)); 
40 | 
41 | elseif info.format == consts.BIP
42 | 
43 |     assortativity_one(d_1, d_2, A, 'u', 'left degree');
44 |     konect_print(sprintf('plot/assortativity.u.%s.eps', network)); 
45 |     
46 |     assortativity_one(d_2, d_1, A', 'v', 'right degree');
47 |     konect_print(sprintf('plot/assortativity.v.%s.eps', network)); 
48 | 
49 | end
50 | 
51 | if info.format == consts.BIP
52 | 
53 |     dd = [ d_1 ; d_2 ];
54 | 
55 |     assortativity_one(dd, dd, ...
56 |                       [ sparse(info.n1, info.n1) A ; A' sparse(info.n2, info.n2) ], ...
57 |                       'a', 'degree');
58 |     konect_print(sprintf('plot/assortativity.a.%s.eps', network)); 
59 | 
60 | else
61 | 
62 |     dd = d_1 + d_2; 
63 |     assortativity_one(dd, dd, A + A', 'a', 'degree'); 
64 |     konect_print(sprintf('plot/assortativity.a.%s.eps', network)); 
65 | 
66 | end
67 | 


--------------------------------------------------------------------------------
/m/assortativity_one.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Generate an assortativity plot.
 3 | %
 4 | % PARAMETERS 
 5 | %	d	(n*1) Degree vector
 6 | %	A	(n*m) Unweighted adjacency / biadjacency matrix; does
 7 | %		not contain edge weights, but may contain other
 8 | %		values than 0/1 when there are multiple edges 
 9 | %	letter	Determines the color
10 | %	text	(string) The word "degree" or similar in lower case
11 | %
12 | 
13 | function assortativity_one(d, d2, A, letter, text)
14 | 
15 | colors = konect_colors_letter(); 
16 | 
17 | font_size = 22; 
18 | 
19 | q = A * d2;
20 | e = q ./ d;
21 | 
22 | assert(text(1) >= 'a' && text(1) <= 'z'); 
23 | text_sentence = text;  text_sentence(1) = text_sentence(1) + ('A' - 'a'); 
24 | 
25 | plot(d, e, '.', 'Color', colors.(letter));
26 | 
27 | xlabel(text_sentence, 'FontSize', font_size);
28 | ylabel(sprintf('Average neighbor %s', text), 'FontSize', font_size);
29 | 
30 | set(gca, 'XScale', 'log', 'YScale', 'log');
31 | 
32 | set(gca, 'FontSize', font_size); 
33 | 
34 | set(gca, 'XMinorTick', 'on');
35 | set(gca, 'YMinorTick', 'on'); 
36 | set(gca, 'TickLength', [0.05 0.05]); 
37 | 
38 | % Workaround for Matlab bug. Otherwise, the minor ticks are not visible. 
39 | ax = axis(); 
40 | if ax(1) > 0 & ax(3) > 0 
41 |     set(gca, 'XTick', 10 .^ (ceil(log(ax(1)) / log(10)):floor(log(ax(2)) / log(10)))); 
42 |     set(gca, 'YTick', 10 .^ (ceil(log(ax(3)) / log(10)):floor(log(ax(4)) / log(10)))); 
43 | end
44 | 


--------------------------------------------------------------------------------
/m/axis_fit.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Return suitable values for the parameters of axis() given the
 3 | % range. 
 4 | %
 5 | % Given X axis values, it returns values of axis(1) and axis(2).
 6 | % Given Y axis values, it returns values of axis(3) and axis(4).
 7 | %
 8 | % PARAMETERS 
 9 | %	x	(n*1) Values to be plotted
10 | %	is_log	(0/1) Whether a logarithmic axis is used 
11 | %
12 | % RESULTS 
13 | %	ret	(1*2) Min/max values to be passed to axis()
14 | %
15 | 
16 | function ret = axis_fit(x, is_log)
17 | 
18 | is_log
19 | 
20 | offset = 0.1; 
21 | 
22 | % When the axis is logarithmic, there cannot be nonpositive values 
23 | %% assert((~is_log) | sum(x <= 0) == 0); 
24 | 
25 | % Filter out nonpositive values when is_log
26 | if is_log
27 |     'filter'
28 |     ii = find(x > 0);
29 |     x = x(ii); 
30 | end
31 | 
32 | xi = min(x); 
33 | xa = max(x); 
34 | 
35 | if is_log
36 |     dx = log(xa) - log(xi); 
37 |     if dx == 0
38 |         dx = 1; 
39 |     end;
40 |     ret = [ (exp(log(xi) - offset * dx)) exp(log(xa) + offset * dx) ];
41 | else
42 |     dx = xa - xi;
43 |     if dx == 0
44 |         dx = 1;
45 |     end;
46 |     ret = [ (xi - offset * dx) (xa + offset * dx) ]; 
47 | end
48 | 


--------------------------------------------------------------------------------
/m/beta_do.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Draw the binormalized degree distributions. 
 3 | %
 4 | % This file is not called beta.m because of the builtin function
 5 | % beta(). 
 6 | %
 7 | % PARAMETERS 
 8 | %	$NETWORK
 9 | %
10 | % INPUT 
11 | %	dat/data.$NETWORK.mat
12 | %
13 | % OUTPUT 
14 | %	plot/beta.[auv]{,x}.$NETWORK.dat
15 | %		a/u/v - All / left / right
16 | %		"" / "x" - Normalized / non-normalized
17 | %
18 | 
19 | network = getenv('NETWORK');
20 | 
21 | data = load(sprintf('dat/data.%s.mat', network)); 
22 | 
23 | T = data.T; 
24 | 
25 | consts = konect_consts(); 
26 | 
27 | info = read_info(network); 
28 | 
29 | if info.weights ~= consts.POSITIVE & size(T,2) >= 3
30 |     T(:,3:end) = []; 
31 | end
32 | 
33 | %
34 | % U, V
35 | %
36 | if info.format ~= consts.SYM
37 | 
38 |     if size(T,2) >= 3
39 |         q = T(:,3);
40 |     else
41 |         q = []; 
42 |     end
43 | 
44 |     beta_one(T(:,1), q, 'u');
45 |     konect_print(sprintf('plot/beta.u.%s.eps', network)); 
46 | 
47 |     beta_one(T(:,2), q, 'v'); 
48 |     konect_print(sprintf('plot/beta.v.%s.eps', network)); 
49 | 
50 | end
51 | 
52 | 
53 | %
54 | % A
55 | %
56 | 
57 | p = [ T(:,1) ; T(:,2) ]; 
58 | if size(T,2) >= 3
59 |     q = [ T(:,3) ; T(:,3) ]; 
60 | else  
61 |     q = []; 
62 | end
63 | 
64 | beta_one(p, q, 'a'); 
65 | konect_print(sprintf('plot/beta.a.%s.eps', network)); 
66 | 


--------------------------------------------------------------------------------
/m/beta_one.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot one BETA curve.
 3 | %
 4 | % PARAMETERS
 5 | %	p	(e*1) Node indexes
 6 | %	q	(e*1) Multiplicities; [] to denote all ones
 7 | %	enable_normalization	Enable division with degree sum
 8 | %	type
 9 | % 
10 | 
11 | function beta_one(p, q, type)
12 | 
13 | colors = konect_colors_letter(); 
14 | 
15 | font_size = 24; 
16 | 
17 | if length(q) == 0, q = 1; end
18 | degrees = full(sparse(p, 1, q, max(p), 1)); 
19 | degrees = degrees(find(degrees));
20 | 
21 | beta_plot(degrees, colors.(type)); 
22 | 
23 | xlabel('Relative degree (d / D)', 'FontSize', font_size);
24 | ylabel('P(x = d / D)', 'FontSize', font_size); 
25 | 
26 | 


--------------------------------------------------------------------------------
/m/beta_plot.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot a distribution in Beta style. 
 3 | %
 4 | % PARAMETERS 
 5 | %	values			(n*1) Values; must all be larger than zero
 6 | %	color			(optional) Color of the line
 7 | %
 8 | 
 9 | function beta_plot(values, color)
10 | 
11 | if ~exist('color', 'var')
12 |     color = [0 0 1]; 
13 | end
14 | 
15 | font_size = 24; 
16 | 
17 | [counts ids] = sort(values); 
18 | 
19 | maxcount = counts(end);
20 | freq = histc(counts, 0 : maxcount); 
21 | 
22 | nz = freq ~= 0; 
23 | x = 0 : maxcount; 
24 | x = x(nz);
25 | y = freq(nz); 
26 | 
27 | hold on; 
28 | loglog(x / sum(values), y / length(values), '+', 'Color', color);
29 | 
30 | set(gca, 'XScale', 'log', 'YScale', 'log'); 
31 | 
32 | ax = axis()
33 | 
34 | [phat pci] = betafit(values / sum(values)) 
35 | 
36 | N = 200;
37 | xx = exp(log(ax(1)) + (0:1:N) / N * (log(ax(2)) - log(ax(1)))); 
38 | 
39 | yy = xx .^ (phat(1) - 1) .* (1 - xx) .^ (phat(2) - 1) / beta(phat(1), phat(2)) / sum(values); 
40 | 
41 | plot(xx, yy, '-');
42 | 
43 | axis(ax); 
44 | 
45 | %n = 50;
46 | %hist(values / sum(values), (1 / (2 * n)) : (1 / n) : (1 - 1 / (2 * n)));
47 | %axis([0 1 0 (sum(values) / n)]); 
48 | 
49 | set(gca, 'FontSize', font_size); 
50 | 
51 | set(gca, 'XMinorTick', 'on');
52 | set(gca, 'YMinorTick', 'on'); 
53 | set(gca, 'TickLength', [0.05 0.05]); 
54 | 
55 | % Workaround for Matlab bug. Otherwise, the minor ticks are not visible. 
56 | ax = axis(); 
57 | if ax(1) > 0 & ax(3) > 0 
58 |     set(gca, 'XTick', 10 .^ (ceil(log(ax(1)) / log(10)):floor(log(ax(2)) / log(10)))); 
59 |     set(gca, 'YTick', 10 .^ (ceil(log(ax(3)) / log(10)):floor(log(ax(4)) / log(10)))); 
60 | end
61 | 
62 | 


--------------------------------------------------------------------------------
/m/bidd_one.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot one BIDD curve.
 3 | %
 4 | % PARAMETERS 
 5 | %	p	(e*1) Node indexes
 6 | %	q	(e*1) Multiplicities; [] to denote all ones
 7 | %	enable_normalization	Enable division with degree sum
 8 | %	type
 9 | %	name
10 | %	symbol
11 | %	enable_axis		Same semantics as in konect_plot_power_law()
12 | % 
13 | 
14 | function bidd_one(p, q, enable_normalization, type, name, symbol, enable_axis)
15 | 
16 | 'bidd_one()'
17 | type
18 | name
19 | symbol
20 |   
21 | colors = konect_colors_letter(); 
22 | 
23 | font_size = 24; 
24 | 
25 | if length(q) == 0, q = 1; end
26 | degrees = full(sparse(p, 1, q, max(p), 1)); 
27 | degrees = degrees(find(degrees));  % Remove zero-degree nodes 
28 | 
29 | konect_plot_power_law(degrees, [], enable_normalization, colors.(type), 0, enable_axis); 
30 | 
31 | if enable_normalization
32 |   xlabel(sprintf('Relative %s (%s / D)', name, symbol), 'FontSize', font_size);
33 |   ylabel(sprintf('P(x \\geq %s / D)', symbol), 'FontSize', font_size); 
34 | else
35 |   xlabel(sprintf('%s (%s) [vertices]', name, symbol), 'FontSize', font_size);
36 |   ylabel(sprintf('P(x \\geq %s)', symbol), 'FontSize', font_size); 
37 | end
38 | 


--------------------------------------------------------------------------------
/m/check_failed.m:
--------------------------------------------------------------------------------
 1 | function check_failed(text)
 2 | 
 3 | FILE = fopen(sprintf('dat/check_error.%s', getenv('network')), 'w');
 4 | if FILE < 0,  error('fopen');  end
 5 | fprintf(FILE, '%s\n', text);
 6 | if fclose(FILE) < 0,  error('fclose');  end
 7 | 
 8 | data = 0; 
 9 | OUT = fopen(sprintf('dat/check.%s', getenv('network')), 'w');
10 | if OUT < 0,  error('fopen');  end 
11 | fprintf(OUT, '0\n');
12 | if fclose(OUT) < 0,  error('fclose');  end
13 | 
14 | exit(0);
15 |   
16 | 


--------------------------------------------------------------------------------
/m/check_successful.m:
--------------------------------------------------------------------------------
 1 | function check_successful()
 2 | 
 3 | delete(sprintf('dat/check_error.%s', getenv('network'))); 
 4 |   
 5 | data = 0; 
 6 | OUT = fopen(sprintf('dat/check.%s', getenv('network')), 'w');
 7 | if OUT < 0,  error('fopen');  end 
 8 | fprintf(OUT, '1\n');
 9 | if fclose(OUT) < 0,  error('fclose');  end
10 | 
11 | exit(0);
12 |   
13 | 


--------------------------------------------------------------------------------
/m/cluscod.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the distribution of local clustering coefficients.  The
 3 | % values are computed for the underlying undirected, unweighted
 4 | % network. 
 5 | %
 6 | % ENVIRONMENT 
 7 | %	$network	Network name
 8 | %
 9 | % INPUT FILES 
10 | %	dat/data.$network.mat
11 | %	dat/info.$network
12 | %
13 | % OUTPUT FILES 
14 | %	dat/cluscod.$network.mat
15 | %		c_local		Vector of node degree distributions
16 | %		c
17 | %		c2
18 | %
19 | 
20 | consts = konect_consts(); 
21 | 
22 | network = getenv('network'); 
23 | 
24 | data = load(sprintf('dat/data.%s.mat', network)); 
25 | 
26 | info = read_info(network);
27 | 
28 | assert(info.format ~= consts.BIP); 
29 | 
30 | % Ignore edge weights
31 | A = sparse(data.T(:,1), data.T(:,2), 1, info.n1, info.n2); 
32 | 
33 | % Remove multiple edges 
34 | A = (A ~= 0); 
35 | 
36 | % Ignore edge directions 
37 | A = A | A'; 
38 | 
39 | [c_local c c2] = konect_clusco(A); 
40 | 
41 | save(sprintf('dat/cluscod.%s.mat', network), 'c_local', 'c', 'c2', '-v7.3');
42 | 


--------------------------------------------------------------------------------
/m/cluscod_plot.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot the clustering coefficient degree distribution.
 3 | %
 4 | % PARAMETERS 
 5 | %	$network
 6 | %
 7 | % INPUT 
 8 | %	dat/cluscod.$network.mat
 9 | %
10 | % OUTPUT 
11 | %	plot/cluscod.[a].$network.eps
12 | %
13 | 
14 | network = getenv('network');
15 | 
16 | font_size = 22; 
17 | line_width = 3; 
18 | 
19 | data = load(sprintf('dat/cluscod.%s.mat', network)); 
20 | 
21 | c_local = data.c_local;
22 | 
23 | F = cdfplot(c_local);
24 | 
25 | set(F, 'LineWidth', line_width);
26 | 
27 | axis([0 1 0 1]); 
28 | 
29 | title(''); 
30 | xlabel('Local clustering coefficient (c)', 'FontSize', font_size); 
31 | ylabel('P(x \leq c)', 'FontSize', font_size); 
32 | 
33 | set(gca, 'FontSize', font_size); 
34 | 
35 | konect_print(sprintf('plot/cluscod.a.%s.eps', network)); 
36 | 


--------------------------------------------------------------------------------
/m/complex2rgb.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Convert a complex number to a RGB value for visualization. 
 3 | %
 4 | 
 5 | function [rgb] = complex2rgb(value)
 6 | 
 7 | h = angle(value) / (2*pi) + 0.5;
 8 | s = 1;
 9 | v = 1 - 1 / (abs(value) + 1);
10 | 
11 | rgb = hsv2rgb([h s v]);
12 | 


--------------------------------------------------------------------------------
/m/data.m:
--------------------------------------------------------------------------------
 1 | % 
 2 | % Save the full dataset (not split) in a MAT file.  If timestamps are
 3 | % present, the data is sorted. 
 4 | %
 5 | % PARAMETERS 
 6 | %	$input		Input filename (out.*)
 7 | %	$output		Output filename (*.mat)
 8 | %
 9 | % INPUT FILES
10 | %	$input
11 | % 
12 | % OUTPUT FILES 
13 | %	$output		Matlab file
14 | %		T	The data in triple/quadruple format as found in the out.* file 
15 | %
16 | 
17 | T = load(getenv('input'));
18 | 
19 | if size(T,2) >= 4
20 |     [x,i] = sort(T(:,4));
21 |     T = T(i, 1:3); 
22 | end
23 | 
24 | save(getenv('output'), '-v7.3', 'T'); 
25 | 


--------------------------------------------------------------------------------
/m/decomposition_comp.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute a network decomposition on the full network.
 3 | %
 4 | % PARAMETERS 
 5 | %	$network
 6 | %	$decomposition
 7 | %
 8 | % INPUT FILES 
 9 | %	dat/data.$network.mat
10 | %	dat/info.$network
11 | %	dat/meansi.$network.mat
12 | %
13 | % OUTPUT FILES 
14 | %	dat/decomposition{,_map}.$decomposition.$network.mat
15 | %		.D	Eigenvalues / Singular value / Middle matrix
16 | %		.U	Eigenvectors or equivalent
17 | %		.V	Eigenvectors; may be []
18 | %		.r	Used rank
19 | %		.n	Used number of nodes (may be less than input)
20 | %
21 | 
22 | network = getenv('network');
23 | decomposition = getenv('decomposition');
24 | 
25 | info = read_info(network); 
26 | 
27 | data = load(sprintf('dat/data.%s.mat', network)); 
28 | 
29 | means = load(sprintf('dat/meansi.%s.mat', network)); 
30 | 
31 | T = konect_normalize_additively(data.T, means); 
32 | 
33 | A = konect_spconvert(T, info.n1, info.n2); 
34 | 
35 | opts.disp = 2;
36 | 
37 | r = get_rank_type(network, decomposition);
38 | 
39 | [U D V D_u D_v n] = konect_decomposition(decomposition, A, r, info.format, info.weights, opts); 
40 | 
41 | save(sprintf('dat/decomposition.%s.%s.mat', decomposition, network'), '-v7.3', ...
42 |   'D', 'U', 'V', 'r', 'n'); 
43 | 


--------------------------------------------------------------------------------
/m/decomposition_map.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute a network decomposition on the full network.
 3 | %
 4 | % PARAMETERS 
 5 | %	$network
 6 | %	$decomposition
 7 | %
 8 | % INPUT 
 9 | %	dat/data.$network.mat
10 | %	dat/info.$network
11 | %	dat/meansi.$network.mat
12 | %
13 | % OUTPUT 
14 | %	dat/decomposition{,_map}.$decomposition.$network.mat
15 | %		.D	Eigenvalues / Singular value / Middle matrix
16 | %		.U	Eigenvectors or equivalent
17 | %		.V	Eigenvectors; may be []
18 | %		.r	Used rank
19 | %		.n	Used number of nodes (may be less than input)
20 | %
21 | 
22 | network = getenv('network');
23 | decomposition = getenv('decomposition');
24 | 
25 | info = read_info(network); 
26 | 
27 | data = load(sprintf('dat/data.%s.mat', network)); 
28 | 
29 | means = load(sprintf('dat/meansi.%s.mat', network)); 
30 | 
31 | T = konect_normalize_additively(data.T, means); 
32 | 
33 | A = konect_spconvert(T, info.n1, info.n2); 
34 | 
35 | opts.disp = 2;
36 | opts.maxit = 10;
37 | opts.tol = 1e-3; 
38 | 
39 | first = konect_first_index(decomposition);
40 | r = first + 1;
41 | 
42 | [U D V D_u D_v n] = konect_decomposition(decomposition, A, r, info.format, info.weights, opts); 
43 | 
44 | save(sprintf('dat/decomposition_map.%s.%s.mat', decomposition, network'), '-v7.3', ...
45 |   'D', 'U', 'V', 'r', 'n'); 
46 | 


--------------------------------------------------------------------------------
/m/decomposition_split.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Decompose the source and training matrices.
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK
 6 | %	$DECOMPOSITION
 7 | %	$TYPE		"source" or "training"
 8 | %
 9 | % INPUT 
10 | %	dat/split.$NETWORK.mat 
11 | %	dat/means.$NETWORK.mat (source only)
12 | %	dat/meanst.$NETWORK.mat (training only)
13 | %	dat/info.$NETWORK
14 | % 
15 | % OUTPUT 
16 | %	dat/decomposition_split.$TYPE.$DECOMPOSITION.$NETWORK.mat
17 | %		.D	Eigenvalues / Singular value / Middle matrix
18 | %		.U	Eigenvectors or equivalent
19 | %		.V	Eigenvectors; may be []
20 | %		.r	Used rank
21 | %		.n	Used number of nodes (may be less than input)
22 | % 
23 | 
24 | network = getenv('NETWORK');
25 | decomposition = getenv('DECOMPOSITION'); 
26 | type = getenv('TYPE'); 
27 | 
28 | split = load(sprintf('dat/split.%s.mat', network));
29 | info = read_info(network); 
30 | 
31 | if strcmp(type, 'source')
32 |     T = split.T_source;
33 |     means = load(sprintf('dat/means.%s.mat', network));
34 | elseif strcmp(type, 'training')
35 |     T = [ split.T_source ; split.T_target ];
36 |     means = load(sprintf('dat/meanst.%s.mat', network));
37 | else
38 |     error(sprintf('*** Invalid type %s', type)); 
39 | end
40 | 
41 | T = konect_normalize_additively(T, means); 
42 | 
43 | A = konect_spconvert(T, split.n1, split.n2);
44 | 
45 | opts.disp = 2;
46 | 
47 | r = get_rank_type(network, decomposition);
48 | 
49 | [U D V D_u D_v n] = konect_decomposition(decomposition, A, r, info.format, info.weights, opts); 
50 | 
51 | save(sprintf('dat/decomposition_split.%s.%s.%s.mat', type, decomposition, network), '-v7.3', ...
52 |   'D', 'U', 'V', 'r', 'n'); 
53 | 


--------------------------------------------------------------------------------
/m/degcc.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % For one network, scatter plot of degree vs local clustering coefficient.
 3 | % 
 4 | % PARAMETERS 
 5 | %	$network
 6 | %
 7 | % INPUT FILES 
 8 | %	dat/cluscod.$network.mat
 9 | %	uni/out.$network
10 | %
11 | % OUTPUT FILES 
12 | %	plot/degcc.{a}.$network.eps
13 | %
14 | 
15 | network = getenv('network');
16 | 
17 | consts = konect_consts(); 
18 | 
19 | weights = read_statistic('weights', network);
20 | weights = weights(1)
21 | forma = read_statistic('format', network);
22 | forma = forma(1)
23 | n     = read_statistic('size', network); 
24 | n     = n(1); 
25 | 
26 | T = load(sprintf('uni/out.%s', network)); 
27 | 
28 | x = [T(:,1) ; T(:,2)];
29 | 
30 | if weights == consts.POSITIVE
31 |     if size(T,2) >= 3
32 |         w = [ T(:,3) ; T(:,3) ]; 
33 |     else
34 |         w = 1;
35 |     end
36 | else
37 |     w = 1;
38 | end
39 | 
40 | degrees = sparse(x, 1, w, n, 1); 
41 | 
42 | cluscod = load(sprintf('dat/cluscod.%s.mat', network)); 
43 | 
44 | hold on; 
45 | 
46 | x = degrees;
47 | y = cluscod.c_local; 
48 | 
49 | i = find(x > 0);
50 | x = x(i);
51 | y = y(i); 
52 | 
53 | plot(x, y, '.'); 
54 | 
55 | xlabel('Degree');
56 | ylabel('Local clustering coefficient'); 
57 | 
58 | n = max(x); 
59 | 
60 | clusco_count = sparse(x, 1, 1); 
61 | clusco_sum   = sparse(x, 1, y);
62 | clusco_sq    = sparse(x, 1, y .^ 2); 
63 | 
64 | plot(1:n, clusco_sum ./ clusco_count, '+-', 'Color', [1 0 0]); 
65 | 
66 | set(gca, 'XScale', 'log', 'YScale', 'log'); 
67 | 
68 | konect_print(sprintf('plot/degcc.a.%s.eps', network));
69 | 


--------------------------------------------------------------------------------
/m/delaunay_one.m:
--------------------------------------------------------------------------------
 1 | 
 2 | function delaunay_one(A, U)
 3 | 
 4 | hold on;
 5 | 
 6 | gplot2(A | A', U, '-', 'LineWidth', 0.1, 'Color', 0.6 * [1 1 1]);
 7 | gplot2(A | A', U, '.', 'Color', [0.7 0.3 0], 'MarkerSize', 50); 
 8 | 
 9 | axis off equal; 
10 | 


--------------------------------------------------------------------------------
/m/diadens.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot average degree and diameter by time. 
 3 | %
 4 | % ENVIRONMENT 
 5 | %	$network		Network name 
 6 | %
 7 | % INPUT 
 8 | %	dat/stepsi.$network
 9 | %	dat/statistic_time.full.diameter.$network
10 | %	dat/statistic_time.full.avgdegree.$network
11 | %
12 | % OUTPUT 
13 | %	plot/diadens.a.$network.eps
14 | %
15 | 
16 | font_size_label = 18; 
17 | 
18 | network = getenv('network');
19 | 
20 | steps = load(sprintf('dat/stepsi.%s', network));
21 | statistic_diameter = load(sprintf('dat/statistic_time.full.diameter.%s', network)); 
22 | statistic_avgdegree  = load(sprintf('dat/statistic_time.full.avgdegree.%s', network)); 
23 | 
24 | [ax, h1, h2] = plotyy(steps, statistic_avgdegree(:,1), steps, statistic_diameter(:,1), 'plot'); 
25 | set(h1, 'LineWidth', 2.5) 
26 | set(h2, 'LineWidth', 2.5);
27 | set(h1,'LineStyle','--')
28 | set(h2,'LineStyle','-')
29 | 
30 | set(get(ax(1),'Ylabel'),'String','Average degree (d)', 'FontSize', font_size_label); 
31 | set(get(ax(2),'Ylabel'),'String','Effective diameter (\delta_{0.9})', 'FontSize', font_size_label); 
32 | set(ax(1), 'FontSize', font_size_label); 
33 | set(ax(2), 'FontSize', font_size_label); 
34 | 
35 | legend(konect_label_statistic('avgdegree', 'matlab'), konect_label_statistic('diameter', 'matlab'), 'Location', 'SouthEast'); 
36 | legend('boxoff'); 
37 | 
38 | xlabel(konect_label_statistic('volume', 'matlab'), 'FontSize', font_size_label); 
39 | 
40 | konect_print(sprintf('plot/diadens.a.%s.eps', network)); 
41 | 


--------------------------------------------------------------------------------
/m/distr.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute a spectral distribution.
 3 | %
 4 | % PARAMETERS 
 5 | %	$network		Network
 6 | %	$decomposition		Decomposition type
 7 | %
 8 | % INPUT 
 9 | % 	dat/data.$network.mat
10 | %	dat/meansi.$network.mat
11 | %
12 | % OUTPUT 
13 | %	dat/distr.$decomposition.$network
14 | %		One line per bin.  Three columns:  count, begin end.
15 | %		For time networks, all columns are doubled for the
16 | %		halftime graph.   
17 | %
18 | 
19 | % Odd to avoid splitting on zero
20 | bin_count = 49; 
21 | 
22 | network = getenv('network'); 
23 | decomposition = getenv('decomposition'); 
24 | 
25 | info = read_info(network)
26 | 
27 | data = load(sprintf('dat/data.%s.mat', network)); 
28 | means = load(sprintf('dat/meansi.%s.mat', network)); 
29 | 
30 | A = konect_spconvert(konect_normalize_additively(data.T, means), info.n1, info.n2); 
31 | 
32 | [counts, begins, ends] = konect_spectral_distribution(A, decomposition, info.format, bin_count); 
33 | 
34 | ret = [ counts begins ends ]; 
35 | 
36 | save(sprintf('dat/distr.%s.%s', decomposition, network), '-ascii', 'ret'); 
37 | 


--------------------------------------------------------------------------------
/m/distrtest_colors.m:
--------------------------------------------------------------------------------
 1 | 
 2 | function colors = distrtest_colors()
 3 | 
 4 | colors = struct();
 5 | colors.normal      = [ 1  0  1];
 6 | colors.lognormal   = [ 1 .3  1];
 7 | colors.logistic    = [ 0  1  0];
 8 | colors.loglogistic = [.3  1 .3];
 9 | colors.cauchy      = [ 1  0  0];
10 | colors.logcauchy   = [ 1 .3 .3];
11 | colors.gumbel      = [.2 .7 .7];
12 | colors.weibull     = [ 0 .7 .7];
13 | colors.hsd         = [ 0  0  1];
14 | colors.loghsd      = [.3 .3  1];
15 | colors.exp         = [.5 .5 .2];
16 | colors.pareto      = [.5 .5  0];
17 | colors.gamma       = [.6 .6 .6];
18 | colors.beta        = [ 0  0  0];
19 | colors.halfnormal  = [.48 .01 .36];
20 | colors.gengamma    = [.15 .06 .03];
21 | colors.poisson     = [.92 .77 .65];
22 | 
23 | 


--------------------------------------------------------------------------------
/m/distrtest_types.m:
--------------------------------------------------------------------------------
 1 | 
 2 | %
 3 | % Return the types that can be passed to distrtest_plot(). 
 4 | %
 5 | % RESULTS 
 6 | %	types	Cell array of all type names
 7 | %
 8 | 
 9 | function types = distrtest_types()
10 | 
11 | types = {'normal', 'lognormal', 'logistic', 'loglogistic', 'cauchy', 'logcauchy', ...
12 |          'gumbel', 'weibull', 'hsd', 'loghsd', 'exp', 'pareto', ...
13 |          'gamma', 'beta', 'halfnormal', 'gengamma', 'poisson'}; 
14 | 
15 | 


--------------------------------------------------------------------------------
/m/estimate_power_law.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Estimate a power law when only the largest values are known.   
 3 | %
 4 | % Don't use this for degrees.  For degrees, not all values are necessarily known and power_law_*() should be used. 
 5 | %
 6 | % PARAMETERS
 7 | %	values		Values. Nonpositive values are ignored. 
 8 | %
 9 | % RESULT
10 | %	alpha		Negative slope, .a.k.a. the power law exponent 
11 | %
12 | function alpha = estimate_power_law(values)
13 | 
14 | values = values(values > 0); 
15 | 
16 | values = sort(values, 'descend'); 
17 | 
18 | log_values = log(values); 
19 | 
20 | p = polyfit((1:length(values))', log_values, 1); 
21 | 
22 | alpha = exp(-p(1)); 
23 | 
24 | 


--------------------------------------------------------------------------------
/m/evol_permutation.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Draw a permutation plot.  Given two eigenvector matrices, this will
 3 | % draw a square matrix of cosine similarities using black for 1 and
 4 | % white for 0. 
 5 | %
 6 | % PARAMETERS 
 7 | %	u1,u2	(n*r) Eigenvectors to be compared
 8 | %
 9 | 
10 | function evol_permutation(u1, u2)
11 | 
12 | font_size = 20; 
13 | 
14 | cm = 1:-0.01:0; 
15 | cm = [cm.^.6' cm.^.6' cm.^.9']; 
16 | 
17 | u1 = u1 ./ norm(u1);
18 | u2 = u2 ./ norm(u2); 
19 | 
20 | similarities = abs(u1' * u2);
21 | 
22 | imagesc(similarities', [0 1]);
23 | colorbar; 
24 | pbaspect([size(u1,2) size(u2,2) 1]); 
25 | colormap(cm); 
26 | xlabel('k', 'FontSize', font_size); 
27 | ylabel('l', 'FontSize', font_size); 
28 | set(gca, 'FontSize', font_size); 
29 | 
30 | 


--------------------------------------------------------------------------------
/m/exp_entropy.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % The exp-entropy, i.e. the entropy of a vector given by considering the
 3 | % exponential of its entries as probabilities.  Used with eigenvalues in
 4 | % [671]. 
 5 | %
 6 | % RESULT 
 7 | %	entropy	Entropy 
 8 | %
 9 | % PARAMETERS 
10 | %	values 	Vector of values
11 | %
12 | 
13 | function entropy = exp_entropy(values)
14 | 
15 | values = values - max(values);
16 | 
17 | lnsum = log(sum(exp(values)));
18 | 
19 | values = values - lnsum;
20 | 
21 | entropy = - sum(exp(values) .* values);
22 | 
23 | 


--------------------------------------------------------------------------------
/m/fit_plot.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot curve fitting.
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK
 6 | %	$DECOMPOSITION
 7 | %
 8 | % INPUT 
 9 | %	dat/fit.$DECOMPOSITION.$NETWORK.mat
10 | %
11 | % OUTPUT 
12 | %	plot/fit.[ab].$DECOMPOSITION.$NETWORK.eps
13 | %
14 | 
15 | network = getenv('NETWORK');
16 | decomposition = getenv('DECOMPOSITION');
17 | 
18 | font_size = 18; 
19 | 
20 | fit = load(sprintf('dat/fit.%s.%s.mat', decomposition, network)); 
21 | 
22 | a = fit.a;
23 | d = fit.d; 
24 | 
25 | % In complex decompositions, show the absolute values
26 | if ~isreal(a) | ~isreal(d)
27 | 
28 |   a_show = real(a); 
29 |   d_show = real(d); 
30 | 
31 | else
32 |   a_show = a;
33 |   d_show = d
34 | end
35 | 
36 | %
37 | % (b) Bare plot
38 | % 
39 | 
40 | plot(a_show, d_show, 'ok');
41 | 
42 | gridxy([0], [0], 'LineStyle', '--');
43 | 
44 | xlabel('Eigenvalue (\lambda_k)', 'FontSize', font_size); 
45 | ylabel('New eigenvalue (f(\lambda_k))', 'FontSize', font_size); 
46 | set(gca, 'FontSize', font_size); 
47 | 
48 | konect_print(sprintf('plot/fit.b.%s.%s.eps', decomposition, network)); 
49 | 
50 | %
51 | % (a) With curves
52 | %
53 | 
54 | [colors line_styles markers] = styles_submethod(); 
55 | 
56 | hold on; 
57 | 
58 | plot(a_show, d_show, 'ok');
59 | 
60 | ax = axis(); 
61 | 
62 | mi = ax(1);
63 | ma = ax(2); 
64 | 		
65 | curves = fieldnames(fit.curves) 
66 | handles = []; 
67 | legends = [];
68 | for i = 1 : length(curves)
69 |   curve = curves{i}
70 |   values = fit.curves.(curve)
71 | 
72 |   if strcmp(curve, 'like'), continue; end; 
73 | 
74 |   h = fit_plot_curve(curve, mi, ma, a, values, colors.(curve), line_styles.(curve), fit.pivot);
75 | 
76 |   handles = [handles h]; 
77 |   legends = [legends cellstr(curve)]; 
78 | end
79 | 
80 | gridxy([0], [0], 'LineStyle', '--');
81 | 
82 | xlabel('Eigenvalue (\lambda_k)', 'FontSize', font_size); 
83 | ylabel('New eigenvalue (f(\lambda_k))', 'FontSize', font_size); 
84 | 
85 | set(gca, 'FontSize', font_size); 
86 | 
87 | axis(ax); 
88 | 
89 | legend(handles, legends, 'Location', 'EastOutside'); 
90 | 
91 | konect_print(sprintf('plot/fit.a.%s.%s.eps', decomposition, network)); 
92 | 


--------------------------------------------------------------------------------
/m/format_number.m:
--------------------------------------------------------------------------------
 1 | function ret = format_number(number)
 2 | 
 3 | fprintf(1, 'format %d\n', number); 
 4 | 
 5 | if number < 1000
 6 |     ret = sprintf('%d', number); 
 7 | elseif number < 1000000
 8 |     ret = sprintf('%d,%03d', floor(number/1000), mod(number,1000)); 
 9 | elseif number < 1000000000
10 |     ret = sprintf('%d,%03d,%03d', floor(number/1000000), mod(floor(number/1000), 1000), ...
11 |                   mod(number,1000)); 
12 | else
13 |     ret = sprintf('%d,%03d,%03d,%03d', floor(number/1000000000), mod(floor(number/1000000),1000), ...
14 |                   mod(floor(number/1000), 1000), ...
15 |                   mod(number,1000)); 
16 | end
17 | 
18 | fprintf(1, '  ret=%s\n', ret); 
19 | 


--------------------------------------------------------------------------------
/m/format_statistic.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Format the value of a statistic. 
 3 | %
 4 | 
 5 | function [text] = format_statistic(statistic, value)
 6 | 
 7 | [l i] = konect_data_statistic();
 8 | 
 9 | if i.(statistic)
10 | 
11 |   text = format_number(value);
12 | 
13 | else
14 | 
15 |   text = sprintf('%.3f', value);
16 | 
17 | end
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/m/get_ids_submethod.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Integer IDs of submethods; beginning at 1. 
 3 | %
 4 | 
 5 | function [ids_submethod] = get_ids_submethod()
 6 | 
 7 | ids_submethod.rat = 1;
 8 | ids_submethod.main = 2;
 9 | 
10 | ids_submethod.common = 3;
11 | ids_submethod.adad = 4;
12 | ids_submethod.ra = 5;
13 | ids_submethod.jaccard = 6;
14 | ids_submethod.cosine = 7;
15 | ids_submethod.sorensen = 8;
16 | ids_submethod.hpi = 9;
17 | ids_submethod.hdi = 10;
18 | ids_submethod.lhni = 11;
19 | 
20 | ids_submethod.lin = 12;
21 | ids_submethod.poly = 13;
22 | ids_submethod.polyo = 14;
23 | ids_submethod.polyn = 15;
24 | ids_submethod.polyon = 16;
25 | ids_submethod.rr = 17;
26 | ids_submethod.exp = 18;
27 | ids_submethod.expo = 19;
28 | ids_submethod.rat = 20;
29 | ids_submethod.rato = 21;
30 | ids_submethod.like = 22;
31 | ids_submethod.rank1 = 23;
32 | ids_submethod.rank2 = 24;
33 | ids_submethod.rank3 = 25;
34 | ids_submethod.euclidean = 26;
35 | ids_submethod.sne = 27;
36 | ids_submethod.rrs = 28;
37 | ids_submethod.expnl = 29;
38 | ids_submethod.ratn = 30;
39 | ids_submethod.ratno = 31;
40 | ids_submethod.uni = 32;
41 | ids_submethod.polyl = 33;
42 | ids_submethod.polynl = 34;
43 | ids_submethod.rrl = 35;
44 | ids_submethod.expl = 36;
45 | ids_submethod.lap = 37;
46 | ids_submethod.ratl = 38;
47 | ids_submethod.rank1i = 39;
48 | ids_submethod.rank2i = 40;
49 | ids_submethod.rank3i = 41;
50 | ids_submethod.polyx = 42;
51 | 
52 | ids_submethod.commonasym = 43;
53 | ids_submethod.adadasym = 44;
54 | ids_submethod.raasym = 45;
55 | ids_submethod.jaccardasym = 46;
56 | ids_submethod.cosineasym = 47;
57 | ids_submethod.sorensenasym = 48;
58 | ids_submethod.hpiasym = 49;
59 | ids_submethod.hdiasym = 50;
60 | ids_submethod.lhniasym = 51;
61 | 
62 | ids_submethod.commonout = 52;
63 | ids_submethod.adadout = 53;
64 | ids_submethod.raout = 54;
65 | ids_submethod.jaccardout = 55;
66 | ids_submethod.cosineout = 56;
67 | ids_submethod.sorensenout = 57;
68 | ids_submethod.hpiout = 58;
69 | ids_submethod.hdiout = 59;
70 | ids_submethod.lhniout = 60;
71 | 
72 | ids_submethod.commonin = 61;
73 | ids_submethod.adadin = 62;
74 | ids_submethod.rain = 63;
75 | ids_submethod.jaccardin = 64;
76 | ids_submethod.cosinein = 65;
77 | ids_submethod.sorensenin = 66;
78 | ids_submethod.hpiin = 67;
79 | ids_submethod.hdiin = 68;
80 | ids_submethod.lhniin = 69;
81 | 
82 | ids_submethod.abscommon = 70; 
83 | ids_submethod.absadad   = 71;
84 | ids_submethod.absjaccard = 72;
85 | ids_submethod.abscosine  = 73; 
86 | 
87 | ids_submethod.path3      = 74; 
88 | 
89 | ids_submethod.count      = 74; 
90 | 


--------------------------------------------------------------------------------
/m/get_labels_measure.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % The label associated with a measure.  The content of the returned
 3 | % struct is used in various places as the canonical list of measures to
 4 | % use. 
 5 | %
 6 | % RESULT 
 7 | %	labels		Struct of labels by measure name
 8 | %	labels_short	Short names 
 9 | %
10 | 
11 | function [labels labels_short] = get_labels_measure()
12 | 
13 | labels = struct(); 
14 | 
15 | % Individual ones can be enabled/disabled, but CORR must always be
16 | % enabled, because we use it as a target in the makefile. 
17 | 
18 | %labels.ap	= 'Average precision';
19 | %labels.map	= 'Mean average precision'; 
20 | labels.corr	= 'Pearson correlation';
21 | %labels.spear	= 'Spearman correlation';
22 | labels.auc	= 'Area under the curve';
23 | %labels.mauc	= 'Mean area under the curve';
24 | 
25 | % KENDALL is not computed because it is too slow. 
26 | % MAP and MAUC removed because they are slow and the results are indistinguishible from AP and AUC. 
27 | 
28 | 
29 | labels_short = struct();
30 | 
31 | labels_short.corr = '\rho';
32 | labels_short.auc = 'AUC'; 
33 | 


--------------------------------------------------------------------------------
/m/get_labels_method.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Labels of methods and decompositions.
 3 | %
 4 | % RESULT
 5 | %	labels
 6 | %		.(method)	Label of method
 7 | %
 8 | 
 9 | function [labels] = get_labels_method()
10 | 
11 | labels = struct();
12 | 
13 | labels.zero 		= '0';
14 | labels.pref 		= 'PA';
15 | labels.mask		= 'Mask';
16 | labels.neib		= 'Neib.'; 
17 | labels.neib3		= 'P3';
18 | 
19 | % Decompositions
20 | labels.sym		= 'A';
21 | labels.sym_n 		= 'N';
22 | labels.lap		= 'L';
23 | labels.lapc		= 'Lc';
24 | labels.svd		= '[0 A; A'' 0]';
25 | labels.svd_n		= '[0 N; N'' 0]';
26 | labels.stoch2 		= 'D^{-1}A';
27 | labels.stoch1		= 'AD^{-1}'; 
28 | labels.lapd		= 'L_d';
29 | labels.lapd_n		= 'Z_d'; 
30 | labels.back		= 'A + \alpha A'''; 
31 | labels.diag		= 'A (asym)';
32 | labels.diag_n		= 'N (asym)';
33 | labels.skew		= 'A - A'''; 
34 | labels.skewi		= 'iA - iA'''; 
35 | labels.skewn 		= 'N - N'''; 
36 | labels.herm		= 'A_H'; 
37 | labels.hermi		= 'iA_H'; 
38 | labels.hermn		= 'N_H'; 
39 | labels.lapherm		= 'L_H'; 
40 | labels.lapherm2		= 'L_{H2}'; 
41 | labels.lapskew		= 'L_S'; 
42 | labels.quantum		= 'Q'; 
43 | labels.mskew		= 'M';
44 | labels.lapquantum	= 'L_Q'; 
45 | labels.lapq		= 'K'; 
46 | labels.stochbip		= 'S'; 
47 | labels.symabs		= '\bar A'; 
48 | labels.symc		= 'A_c'; 
49 | 
50 | labels.quantum5         = 'Q5';
51 | labels.quantum10        = 'Q10';
52 | labels.quantum20        = 'Q20';
53 | labels.quantum50        = 'Q50';
54 | labels.quantum100       = 'Q100';
55 | labels.quantum200       = 'Q200';
56 | labels.quantum500       = 'Q500';
57 | labels.quantum785       = 'Q785';
58 | labels.quantum1000      = 'Q1000';
59 | labels.quantum1570      = 'Q1570';
60 | 
61 | 
62 | % DEDICOM
63 | labels.dedicom1u	= 'DEDICOM 1u';
64 | labels.dedicom1v	= 'DEDICOM 1v';
65 | labels.dedicom2		= 'DEDICOM 2';
66 | labels.dedicom2s	= 'DEDICOM 2s';
67 | labels.dedicom3		= 'DEDICOM 3';
68 | labels.dedicom3_0	= 'DEDICOM 3/0';
69 | labels.dedicom4		= 'DEDICOM 4';
70 | labels.takane		= 'Takane';
71 | 


--------------------------------------------------------------------------------
/m/get_labels_method_submethod.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Labels for combined method/submethods.
 3 | %
 4 | % RESULT 
 5 | %	labels
 6 | %	.([method "." submethod])	Readable label of the
 7 | %	method/submethod combination 
 8 | %
 9 | 
10 | function labels = get_labels_method_submethod()
11 | 
12 | labels = struct();
13 | 
14 | labels.sym_expo		= 'SINH';
15 | labels.sym_rato		= 'NEU';
16 | labels.sym_polyo	= 'POLY';
17 | labels.sym_polyon	= 'POLYN';
18 | 
19 | labels.sym_n_polyo	= 'N-POLY';
20 | labels.sym_n_polyon	= 'N-POLYN';
21 | labels.sym_n_expo	= 'N-HEAT';
22 | labels.sym_n_ratno	= 'N-NEU';
23 | 
24 | labels.lap_lap		= 'COM';
25 | labels.lap_expl		= 'HEAT';
26 | 
27 | labels.pref_main	= 'PA';
28 | 
29 | labels.neib3_path3	= 'P3'; 
30 | 


--------------------------------------------------------------------------------
/m/get_rank.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the dimensional parameter r in function of dataset.  
 3 | % 
 4 | % RESULT 
 5 | % 	r_svd 	reduced rank for singular value decomposition
 6 | % 	r_lap 	reduced rank for Laplacian decomposition
 7 | %
 8 | % PARAMETERS 
 9 | %	network	Dataset name
10 | %
11 | % INPUT 
12 | % 	dat/runtime
13 | %
14 | 
15 | function [r, r_l] = get_rank(network)
16 | 
17 | alpha = 0.7;  % Scaling factor for automated computation
18 | r_min = 30;  
19 | r_min_l = r_min;
20 | lap_proportion = 0.5; 
21 | 
22 | info = read_info(network); 
23 | 
24 | n1 = info.n1; 
25 | n2 = info.n2; 
26 | m_ = info.lines; 
27 | 
28 | r_max = min(n1,n2); % Rank must not be larger than dimensions of matrix 
29 | 
30 | rs = struct();
31 | 
32 | 
33 | %
34 | % List of predefined sizes
35 | %
36 | rs.advogato = 		 		[500  75];
37 | rs.arenas_meta = 			[150  75]; 
38 | rs.citeseer = 		 		[ 75  38]; 
39 | rs.contact = 		 		[  5   5]; 
40 | rs.movielens_100k__rating = 	 	[ 60  30]; 
41 | rs.movielens_100k__rating_unweighted = 	[100  50]; 
42 | rs.movielens_1m =           		[100  50]; 
43 | rs.www = 		 		[ 75  38]; 
44 | rs.epinions = 		 		[ 70  35]; 
45 | rs.slashdot_zoo =  	 		[ 90  75]; 
46 | rs.hep_th_citations =    		[ 75  38]; 
47 | rs.facebook_wosn_links = 		[ 70  35];
48 | rs.facebook_wosn_wall =  		[ 49  15];
49 | rs.filmtipset =          		[  9   9]; 
50 | rs.trec_wt10g =          		[  9   9]; 
51 | rs.wiki_Talk =           		[  9   9]; 
52 | rs.roadNet_CA = 			[ 15  15]; 
53 | rs.dbpedia_similar = 			[100 100]; 
54 | rs.edit_frwikibooks = 			[ 40  20]; 
55 | rs.gottron_net_core = 			[100  50]; 
56 | rs.dblp_cite = 				[400  50]; 
57 | rs.elec = 				[1000 1000]; % This is used in examples plots in the handbook 
58 | rs.web_Stanford = 			[120  60];
59 | rs.wikisigned_k2 = 			[120  60];  
60 | 
61 | fieldname = network; 
62 | fieldname = regexprep(fieldname, '_', '__'); 
63 | fieldname = regexprep(fieldname, '-', '_');
64 | 
65 | if isfield(rs, fieldname)
66 | 
67 |   rs_network = rs.(fieldname);
68 |   r = rs_network(1);
69 |   r_l = rs_network(2); 
70 | 
71 | else % Automatic settings
72 | 
73 |   x = load('dat/runtime');
74 | 
75 |   r = round(alpha * exp(-x(1)) * (n1 + n2)^-x(2) * m_^-x(3) * (n1*n2)^-x(4)); 
76 | 
77 |   r_l = round(lap_proportion * r);
78 | 
79 |   % Lower bounds
80 |   if (r < r_min), r = r_min; end;
81 |   if (r_l < r_min_l), r_l = r_min_l; end;
82 | 
83 |   % Upper bounds
84 |   if r   > r_max, r   = r_max; end
85 |   if r_l > r_max, r_l = r_max; end
86 | 
87 |   r
88 |   r_l 
89 | end
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/m/get_rank_type.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the reduced rank in function of decomposition type.
 3 | %
 4 | 
 5 | function r = get_rank_type(network, decomposition)
 6 | 
 7 | [r_svd, r_lap] = get_rank(network);
 8 | 
 9 | if     strcmp(decomposition, 'svd' ),   r = r_svd; 
10 | elseif strcmp(decomposition, 'sym' ),   r = r_svd;
11 | elseif strcmp(decomposition, 'diag' ),  r = r_svd;
12 | 
13 | elseif strcmp(decomposition, 'lapb'),   r = r_lap; 
14 | elseif strcmp(decomposition, 'laps'),   r = r_lap; 
15 | elseif strcmp(decomposition, 'svd-n'),  r = r_lap; 
16 | elseif strcmp(decomposition, 'sym-n'),  r = r_lap; 
17 | 
18 | elseif strcmp(decomposition, 'takane'),		r = max(5, floor(r_lap / 3)); 
19 | elseif strcmp(decomposition, 'lapd'), 		r = max(5, floor(r_lap / 3)); 
20 | elseif strcmp(decomposition, 'dedicom3'), 	r = max(5, floor(r_lap / 3)); 
21 | 
22 | else		               r = r_lap;
23 | 
24 | end
25 | 
26 | 


--------------------------------------------------------------------------------
/m/get_tags.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Extract the tags from the metadata of a network. 
 3 | %
 4 | % RETURN VALUE 
 5 | %	ret	A struct contain a field for every tag 
 6 | %
 7 | % ARGUMENTS 
 8 | %	meta	The metadata, as returned by read_meta() 
 9 | %
10 | 
11 | function ret = get_tags(meta)
12 | 
13 | ret = struct(); 
14 | 
15 | if ~ isfield(meta, 'tags')
16 |     return; 
17 | end
18 | 
19 | tags = meta.tags;
20 | 
21 | match = regexp(tags, '#[a-z]+', 'match')
22 | 
23 | for i = 1 : length(match)
24 |     tag = match{i}
25 |     tag = tag(2:end) 
26 |     ret.(tag) = 1; 
27 | end
28 | 


--------------------------------------------------------------------------------
/m/get_updown_statistic.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Whether statistics should go up or down according to the shrinking diversity hypothesis. 
 3 | %
 4 | 
 5 | function [updown_statistic] = get_updown_statistic()
 6 | 
 7 | updown_statistic.diameter 		= -1;
 8 | updown_statistic.network_rank_sq 	= -1;
 9 | updown_statistic.network_rank_norm4 	= -1;
10 | updown_statistic.gini 			= +1;
11 | updown_statistic.controllability 	= -1;
12 | updown_statistic.controllabilityn 	= -1;
13 | updown_statistic.alcon 			= +1;
14 | updown_statistic.dentropyn 		= -1;
15 | updown_statistic.alconn 		= +1;
16 | updown_statistic.jain 			= -1; 
17 | updown_statistic.own			= -1;
18 | updown_statistic.dentropy2		= -1; 
19 | updown_statistic.dentropy		= -1; 
20 | updown_statistic.network_rank_abs	= -1; 
21 | updown_statistic.epower			= +1; 
22 | updown_statistic.entropy		= -1; 
23 | updown_statistic.entropyn		= -1; 
24 | updown_statistic.separation		= +1;
25 | updown_statistic.power			= -1;
26 | updown_statistic.separationl		= +1;
27 | updown_statistic.clusco			= +1; 
28 | updown_statistic.avgdegree		= +1; 
29 | % updown_statistic.density		= +1; 
30 | 


--------------------------------------------------------------------------------
/m/has_timestamps.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Return whether a network has timestamps.
 3 | %
 4 | % ARGUMENTS
 5 | %	network		Name of network 
 6 | %
 7 | % INPUT FILES
 8 | %	uni/out.$network
 9 | %
10 | % RETURN VALUE
11 | %	1/0	True/false
12 | %
13 | 
14 | function [ret] = has_timestamps(network)
15 | 
16 | filename= sprintf('uni/out.%s', network); 
17 |   
18 | FILE = fopen(filename, 'r');
19 | 
20 | if FILE < 0,
21 |   error(sprintf('opening "%s"', filename)); 
22 | end
23 | 
24 | line = fgetl(FILE); line = fgetl(FILE); line = fgetl(FILE); line = fgetl(FILE); line = fgetl(FILE); line = fgetl(FILE); 
25 | 
26 | [a count] = sscanf(line, '%s %s %s %s');
27 | 
28 | ret = count >= 4; 
29 | 
30 | if fclose(FILE) < 0,
31 |   error(sprintf('closing "%s"', filename)); 
32 | end
33 | 


--------------------------------------------------------------------------------
/m/hopdistr_comp.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the hop plot of the largest connected component.  We always
 3 | % compute the undirected hop plot (and thus also the undirected
 4 | % diameter).  
 5 | %
 6 | % PARAMETERS 
 7 | %	$NETWORK
 8 | %
 9 | % INPUT 
10 | %	dat/data.$NETWORK.mat
11 | %	dat/info.$NETWORK
12 | % 
13 | % OUTPUT 
14 | %	dat/hopdistr.$NETWORK
15 | %		As integer, the number of hops (zero excluded) 
16 | %
17 | 
18 | network = getenv('NETWORK');
19 | 
20 | consts = konect_consts(); 
21 | 
22 | data = load(sprintf('dat/data.%s.mat', network)); 
23 | 
24 | info = read_info(network);
25 | 
26 | A = sparse(data.T(:,1), data.T(:,2), 1, info.n1, info.n2);
27 | A = (A ~= 0); 
28 | 
29 | % Make undirected and keep largest connected component
30 | if info.format == consts.ASYM | info.format == consts.SYM
31 |     A = konect_connect_matrix_square(A); 
32 | elseif info.format == consts.BIP
33 |     A = konect_connect_matrix_bipartite(A); 
34 | else
35 |     error('*** Invalid format'); 
36 | end
37 | 
38 | n = length(A)
39 | 
40 | d = konect_hopdistr(A, info.format); 
41 | 
42 | OUT = fopen(sprintf('dat/hopdistr.%s', network), 'w');
43 | fprintf(OUT, '%ld\n', d); 
44 | if fclose(OUT), error 'fclose'; end; 
45 | 
46 | 


--------------------------------------------------------------------------------
/m/hopdistr_distrtest.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Test which distributions fit the hop distribution.
 3 | %
 4 | % PARAMETERS 
 5 | %	$network
 6 | %
 7 | % INPUT 
 8 | %	dat/info.$network
 9 | %	dat/hopdistr.$network
10 | %
11 | % OUTPUT 
12 | %	dat/hopdistr_distrtest.$network.mat
13 | %		.data.[distr-name]
14 | %		the data of the fit as returned by
15 | %		distrtest_plot()
16 | %
17 | 
18 | network = getenv('network');
19 | 
20 | consts = konect_consts();  
21 | 
22 | info = read_info(network);
23 | dat = load(sprintf('dat/hopdistr.%s', network));
24 | 
25 | % Make a column vector
26 | dat = dat(:)
27 | 
28 | % The number of nodes for which the hop distrubution was
29 | % computed. This is the size of the network's largest connected
30 | % component. 
31 | n = round(sqrt(dat(end)));
32 | 
33 | values = (0 : (length(dat) - 1))';
34 | counts = dat - [0; dat(1:end-1)];
35 | 
36 | types = distrtest_types();
37 | 
38 | data = struct(); 
39 | 
40 | for i = 1 : length(types)
41 | 
42 |     type = types{i}
43 | 
44 |     if ~strcmp(type, 'beta')
45 | 
46 |         ret = distrtest_multi(type, values, counts);
47 | 
48 |         data.(type) = ret;
49 |     end
50 | end
51 | 
52 | save(sprintf('dat/hopdistr_distrtest.%s.mat', network), '-v7.3', 'data');
53 | 


--------------------------------------------------------------------------------
/m/konect_decomposition_dedicom4.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Moved to here from the Matlab toolbox because it doesn't work.  The
 3 | % iterations do not converge. 
 4 | %
 5 | % Iterative solution to DEDICOM from [1]:
 6 | %
 7 | %	A = U D U'
 8 | %
 9 | % [1] Models for Analysis of Asymmetrical Relationships Among
10 | %     N Objects or Stimuli, Richard A. Harshman, Proc. First Meeting of
11 | %     the Psychometric Society and The Society for Methematical
12 | %     Phychology, 1978. 
13 | %
14 | % RESULT 
15 | %	U	(n*r) Factor matrix with orthonormal columns
16 | %	D	(r*r) Central asymmetric matrix
17 | %
18 | % PARAMETERS 
19 | %	A	(n*n) Square asymmetric adjacency matrix
20 | %	r	Rank
21 | %	opts	Options for svds()
22 | %
23 | 
24 | function [U D] = konect_decomposition_dedicom4(A, r, opts)
25 | 
26 | [uu D vv] = svds(double(A), r, 'L', opts); 
27 | 
28 | epsilon = 1e-7; 
29 |   
30 | for i = 1:100000
31 |     U = 0.5 * (uu + vv); 
32 |     d_old = D; 
33 | 
34 |     % Decompose U
35 |     [u_u u_d u_v] = svd(U, 'econ'); 
36 |     u_d_i = konect_xinv(u_d); 
37 | 
38 |     % This computes D = U \ A / U';
39 |     D = u_v * (u_d_i * (u_u' * A * u_u) * u_d_i' * u_v');
40 |     
41 |     % Reorder here because otherwise the convergence test does not work.
42 |     [U D] = konect_order_dedicom(U, D); 
43 | 
44 |     if rem(i,20) == 0
45 |         dif = norm(D - d_old, 'fro')^2 / prod(size(D)); 
46 |         fprintf(1, 'iteration %d dif= %g\n', i, dif); 
47 |         if dif < epsilon, break; end; 
48 |     end
49 | 
50 |     % Compute uu = A / U' / D;
51 |     uu = A * u_u * (u_d_i' * u_v' * pinv(D)); 
52 | 
53 |     % Compute vv = A' / U' / D';
54 |     vv = A' * uu * (u_d_i' * u_v' * pinv(D'));
55 | 
56 |     % Orthonormalize
57 |     [qu ru] = qr(uu, 0);
58 |     [qv rv] = qr(vv, 0);
59 |     D = ru * D * rv';
60 |     uu = qu;
61 |     vv = qv; 
62 | end
63 | 
64 | 


--------------------------------------------------------------------------------
/m/ksdist.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the Kolmogorov--Smirnov distance between a given sample and
 3 | % a given continuous distribution.
 4 | %
 5 | % PARAMETERS 
 6 | %	x	(n*1) The sample
 7 | %	F	@(x)(P(<=x)) The cumulative distribution function;
 8 | %		this function must take as input a vector of values,
 9 | %		and return a vector of the same size containing, for
10 | %		each x, the probability that a variable is smaller or
11 | %		equal to x, i.e., the cumulative distribution
12 | %		function 
13 | %
14 | % RESULTS 
15 | %	D	The Kolmogorov--Smirnov distance
16 | %
17 | 
18 | function D = ksdist(x, F)
19 | 
20 | n = length(x);
21 | 
22 | x = sort(x);
23 | 
24 | f = F(x);
25 | 
26 | D = max(max(abs((0:(n-1))'/n - f)), max(abs((1:n)'/n - f))); 
27 | %D = max(abs(((1:n)'-0.5)/n - f));
28 | 


--------------------------------------------------------------------------------
/m/layout.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Draw a graph layout of one network.
 3 | %
 4 | % PARAMETERS 
 5 | %	$network
 6 | %
 7 | % INPUT FILES 
 8 | %	dat/data.$network.mat
 9 | %
10 | % OUTPUT FILES 
11 | %	plot/layout.tmp.a.$network.png
12 | %
13 | 
14 | network = getenv('network'); 
15 | 
16 | consts = konect_consts(); 
17 | 
18 | info = read_info(network); 
19 | 
20 | data = load(sprintf('dat/data.%s.mat', network));
21 | 
22 | A = sparse(data.T(:,1), data.T(:,2), 1, info.n1, info.n2);
23 | 
24 | if info.format == consts.SYM | info.format == consts.ASYM
25 | 
26 |     A = A | A';
27 | 
28 | elseif info.format == consts.BIP
29 | 
30 |     A = [sparse(info.n1, info.n1), A; A', sparse(info.n2, info.n2)];
31 | 
32 | else
33 |     error('*** Invalid format');
34 | end
35 | 
36 | X = fruchterman_reingold_force_directed_layout(A);
37 | 
38 | gplot2(A, X, 'o-', ...
39 |        'MarkerFaceColor', [0 0 0], 'MarkerEdgeColor', [0 0 0]);
40 | 
41 | axis off; 
42 | 
43 | konect_print_bitmap(sprintf('plot/layout.tmp.a.%s.png', network));
44 | 
45 | 


--------------------------------------------------------------------------------
/m/load_strings.m:
--------------------------------------------------------------------------------
 1 | 
 2 | %
 3 | % Load strings from a file.  The file should contain one string per
 4 | % line.  The function returns a cell array of strings, where each
 5 | % string is the content of one line.
 6 | %
 7 | 
 8 | function [strings] = load_strings(filename)
 9 | 
10 | FILE = fopen(filename);
11 | 
12 | if FILE < 0,  error('fopen');  end;
13 | 
14 | strings = textscan(FILE, '%s');
15 | % Returns a cell array of one element, which is a cell array with all
16 | % the strings. 
17 | 
18 | strings = strings{1};
19 | 
20 | if 0 > fclose(FILE),  error('fclose');  end;
21 | 


--------------------------------------------------------------------------------
/m/lorenz.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Draw the Lorenz curve for a network's degree distribution.
 3 | %
 4 | % PARAMETERS 
 5 | %	$network
 6 | %
 7 | % INPUT 
 8 | %	dat/data.$network.mat
 9 | %
10 | % OUTPUT 
11 | %	plot/lorenz.[uva]{,b}.$network.dat
12 | %		a - total 
13 | %		u,v - Row/column-based (only BIP and ASYM)
14 | %		b - Bare, i.e. without the P value
15 | %
16 | 
17 | network = getenv('network'); 
18 | 
19 | data = load(sprintf('dat/data.%s.mat', network)); 
20 | 
21 | T = data.T; 
22 | 
23 | consts = konect_consts(); 
24 | 
25 | info = read_info(network); 
26 | 
27 | if info.weights ~= consts.POSITIVE & size(T,2) >= 3
28 |     T(:,3:end) = []; 
29 | end
30 | 
31 | %
32 | % U, V
33 | %
34 | if info.format ~= consts.SYM
35 | 
36 |     if size(T,2) >= 3
37 |         q = T(:,3);
38 |     else
39 |         q = []; 
40 |     end
41 | 
42 |     lorenz_one(T(:,1), q, 0, 'u');
43 |     konect_print(sprintf('plot/lorenz.u.%s.eps', network)); 
44 |     lorenz_one(T(:,1), q, 1, 'u');
45 |     konect_print(sprintf('plot/lorenz.ub.%s.eps', network)); 
46 |     
47 |     lorenz_one(T(:,2), q, 0, 'v'); 
48 |     konect_print(sprintf('plot/lorenz.v.%s.eps', network)); 
49 |     lorenz_one(T(:,2), q, 1, 'v'); 
50 |     konect_print(sprintf('plot/lorenz.vb.%s.eps', network)); 
51 | end
52 | 
53 | 
54 | %
55 | % A
56 | %
57 | 
58 | if info.format == consts.BIP
59 |     m = max(T(:,1)); 
60 |     
61 |     p = [ T(:,1) ; T(:,2)+m ]; 
62 |     if size(T,2) >= 3
63 |         q = [ T(:,3) ; T(:,3) ]; 
64 |     else  
65 |         q = []; 
66 |     end
67 | else
68 |     p = [ T(:,1) ; T(:,2) ]; 
69 |     if size(T,2) >= 3
70 |         q = [ T(:,3) ; T(:,3) ]; 
71 |     else  
72 |         q = []; 
73 |     end
74 | end
75 | 
76 | lorenz_one(p, q, 0, 'a'); 
77 | konect_print(sprintf('plot/lorenz.a.%s.eps', network)); 
78 | lorenz_one(p, q, 1, 'a'); 
79 | konect_print(sprintf('plot/lorenz.ab.%s.eps', network)); 
80 | 


--------------------------------------------------------------------------------
/m/lorenz_one.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot one Lorenz curve.
 3 | %
 4 | % PARAMETERS 
 5 | %	p	(e*1) Node indexes
 6 | %	q	(e*1) Multiplicities; [] to denote all ones
 7 | %	b	1 for bare, else 0
 8 | %	type	One letter [auv]; determines the plot color
 9 | % 
10 | 
11 | function lorenz_one(p, q, b, type)
12 | 
13 | font_size = 18; 
14 | 
15 | colors = konect_colors_letter(); 
16 | 
17 | color_line = colors.(type); 
18 | color_fill = 0.1 * color_line + 0.9 * [1 1 1]; 
19 | 
20 | [gini r_x r_y] = konect_gini(p, q); 
21 | own = konect_own(p, q); 
22 | 
23 | hold on; 
24 | 
25 | plot(r_x, r_y, '-', 'LineWidth', 3, 'Color', color_line);
26 | 
27 | axis square;
28 | 
29 | axis([0, 1, 0, 1]); 
30 | 
31 | set(gca, 'FontSize', font_size); 
32 | 
33 | fill([r_x ; 0], [r_y ; 0], color_fill, 'LineStyle', 'none'); 
34 | 
35 | line([0 1],  [0 1], 'LineWidth', 2, 'Color', [0 0 0], 'LineStyle', '--'); 
36 | 
37 | if ~b
38 |   line([1 0],  [0 1], 'LineWidth', 2, 'Color', [0 0 0], 'LineStyle', '--'); 
39 |   plot(1-own, own, '.', 'MarkerSize', 30, 'Color', [0 0 0]); 
40 |   text(1-own+0.04, own, sprintf('P = %.1f%%', own*100), 'FontSize', font_size, 'HorizontalAlign', 'Left', 'VerticalAlign', 'Middle'); 
41 | end
42 | 
43 | 
44 | 
45 | grid on; 
46 | 
47 | set(gca, 'XTick', [0 .2 .4 .6 .8 1], 'XTickLabel', [cellstr('0%') cellstr('20%') cellstr('40%') cellstr('60%') cellstr('80%') cellstr('100%')]); 
48 | set(gca, 'YTick', [0 .2 .4 .6 .8 1], 'YTickLabel', [cellstr('0%') cellstr('20%') cellstr('40%') cellstr('60%') cellstr('80%') cellstr('100%')]); 
49 | 
50 | xlabel('Share of nodes with smallest degrees');
51 | ylabel('Share of edges'); 
52 | 
53 | text(0.25, 0.20, sprintf('G = %.1f%%', gini*100), 'FontSize', font_size); 
54 | 


--------------------------------------------------------------------------------
/m/map_line.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Draw a full graph (with edges) using decomposition data. 
 3 | %
 4 | % PARAMETERS
 5 | %	x, y	(n*1) Coordinates
 6 | %	T	(e*2) Edges 
 7 | %	dense	0/1 dense plot
 8 | %
 9 | 
10 | function map_line(x, y, T, dense)
11 | 
12 | if dense
13 |     line_width = 0.1; 
14 | else
15 |     line_width = 1; 
16 | end
17 | 
18 | colors_letter = konect_colors_letter();
19 | 
20 | if dense
21 |     style = '-'; 
22 | else
23 |     style = '-o';
24 | end
25 | 
26 | if dense
27 |     hold on; 
28 | end
29 | 
30 | gplot2(sparse(T(:,1), T(:,2), 1), [x y], style, 'LineWidth', line_width, ...
31 |        'Color', colors_letter.a);
32 | 
33 | if dense 
34 |     gplot2(sparse(T(:,1), T(:,2), 1), [x y], '.', 'Color', [1 0.5 0]); 
35 | end
36 | 
37 | axis equal; 
38 | axis off; 
39 | 


--------------------------------------------------------------------------------
/m/map_minmax.m:
--------------------------------------------------------------------------------
 1 | 
 2 | %
 3 | % Minimal and maximal numbers, cutting the extreme points. 
 4 | %
 5 | function [min_w, max_w] = map_minmax(ww)
 6 |   
 7 |     % cut ALPHA points from each side
 8 |     enable_cut = 1;
 9 | 
10 |     % use the L1 deviation instead of the standard deviation
11 |     enable_l1 = 0;  
12 | 
13 |     % amount of points to cut
14 |     alpha = .06;   
15 | 
16 |     % number of std. devs. to show 
17 |     k = 2.5;
18 | 
19 |     w = ww;
20 | 
21 |     if enable_cut
22 |         n = size(w,1);
23 |         w = sort(w);
24 |         start = round(alpha * n)
25 |         endin = round((1-alpha) * n)
26 |         if start > 0
27 |             w = w(start:endin);
28 |         end
29 |     end;
30 | 
31 |     mean_w = mean(w);
32 | 
33 |     if enable_l1
34 |         std_w  = mean(abs(w - mean_w));
35 |     else
36 |         std_w  = std(w,1);
37 |     end;
38 | 
39 |     min_w = mean_w - k * std_w;
40 |     max_w = mean_w + k * std_w;
41 | 
42 | end
43 | 


--------------------------------------------------------------------------------
/m/mask.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Mask approximation of rank 1.  We use the row-column (RC) algorithm
 3 | % from [1] and use SVD to initialize the eigenvectors. 
 4 | %
 5 | % [1] Estimation of Rank Deficient Matrices from Partial Observations:
 6 | %     Two-Step Iterative Algorithms, Rui F. C. Guerreiro and Pedro M. Q.
 7 | %     Aguiar.  
 8 | %
 9 | % PARAMETERS 
10 | %	a	(m*n)	Matrix to approximate
11 | %	w	(m*n)	Weight (or mask) matrix, usually (a~=0)
12 | %
13 | % RESULT 
14 | %	u,v	(m*1,n*1)	u*v' is the rank-1 approximation
15 | %
16 | % TODO 
17 | %	extend to rank > 1. 
18 | % 	convergence criterion. 
19 | %
20 | 
21 | function [u,v] = mask(a, w)
22 | 
23 | [m,n] = size(a); 
24 | 
25 | opts.disp = 2; 
26 | [u,d,v] = svds(a, 1, 'L', opts); 
27 | 
28 | u = u .* sqrt(d); 
29 | 
30 | for j = 1:16
31 |     
32 |     u_old = u;
33 | 
34 |     v = mask_step(u, a, w);
35 |     u = mask_step(v, a', w'); 
36 | 
37 |     if mod(j,5) == 0
38 |         square_sum = 0;
39 |         if m < n
40 |             for i = 1:m
41 |                 square_sum = square_sum + sum(((u(i,:) * v' - a(i,:)) .* w(i,:)).^2);
42 |             end
43 |         else
44 |             for i = 1:n
45 |                 square_sum = square_sum + sum(((v(i,:) * u' - a(:, i)') .* w(:, i)').^2);
46 |             end
47 |         end
48 |         norm_uv = sqrt(square_sum); 
49 | 
50 |         fprintf(1, '  [%d] normdiff(u) = %g  normdiff(uvT) = %g\n', ...
51 |                 j, ...
52 |                 norm(u - u_old), ...
53 |                 norm_uv); 
54 |     end
55 | end
56 | 


--------------------------------------------------------------------------------
/m/mask_step.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % One iteration step in mask approximation. 
 3 | %
 4 | % It must hold:  a = a .* w;  (i.e. a must be zero where w is zero.)
 5 | %
 6 | % PARAMETERS 
 7 | %	u	(m*1) The previous eigenvector
 8 | %	a	(m*n) Adjacency matrix
 9 | %	w	(m*1) The weight (or mask) matrix
10 | %
11 | % RESULT 
12 | %	v	(n*1)
13 | %
14 | 
15 | function v = mask_step(u, a, w)
16 | 
17 | if size(u,2) ~= 1
18 |     error 'Invalid'
19 | end
20 | 
21 | n = size(a,2); 
22 | 
23 | v = zeros(n,1);
24 | 
25 | fprintf(1, '  mask step /%d\n', n); 
26 | 
27 | t = konect_timer(n); 
28 | 
29 | for j = 1:n
30 | 
31 |     t = konect_timer_tick(t, j); 
32 | 
33 |     if mod(j,5000) == 0, fprintf(1, '    %d\n', j); end; 
34 | 
35 |     v(j,1) = pinv(u' * (u .* w(:,j))) * (u' * a(:,j));
36 | 
37 | end
38 | 
39 | konect_timer_end(t); 
40 | 


--------------------------------------------------------------------------------
/m/means.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the row and column means.  The halves are saved, to streamline
 3 | % normalization. 
 4 | %
 5 | % PARAMETERS 
 6 | %	$network		Network name
 7 | %	$type			String
 8 | %		full		On the full dataset
 9 | %		split		On the source set of the split
10 | %		training	One the training set of the split
11 | %
12 | % INPUT 
13 | %	dat/data.$network.mat (only FULL) 
14 | %	dat/split.$network.mat (only SPLIT)
15 | %
16 | % OUTPUT 
17 | %	dat/means{,i,t}.$network.mat	The means
18 | %		U,V	The weights or []
19 | % 
20 | 
21 | network = getenv('network'); 
22 | type = getenv('type'); 
23 | 
24 | info = read_info(network); 
25 | 
26 | if strcmp(type, 'full') 
27 |     data = load(sprintf('dat/data.%s.mat', network)); 
28 |     T = data.T; 
29 |     suffix = 'i'; 
30 | elseif strcmp(type, 'split')
31 |     split = load(sprintf('dat/split.%s.mat', network)); 
32 |     T = split.T_source; 
33 |     suffix = ''; 
34 | elseif strcmp(type, 'training')
35 |     split = load(sprintf('dat/split.%s.mat', network)); 
36 |     T = [ split.T_source ; split.T_target ]; 
37 |     suffix = 't'; 
38 | end
39 | 
40 | [U V] = means_best(T, info.n1, info.n2, info.weights); 
41 | 
42 | save(sprintf('dat/means%s.%s.mat', suffix, network), '-v7.3', 'U', 'V'); 
43 | 


--------------------------------------------------------------------------------
/m/means_best.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % This is the additive normalization used.  This function also knows
 3 | % which type of networks need normalization or not.   
 4 | %
 5 | 
 6 | function [U V] = means_best(T, m, n, weights)
 7 | 
 8 | %%consts = konect_consts(); 
 9 | [negative interval_scale] = konect_data_weights(); 
10 | 
11 | if interval_scale(weights)
12 |     [U V] = means_euv(T, m, n);
13 | else
14 |     U = []; 
15 |     V = []; 
16 | end
17 | 


--------------------------------------------------------------------------------
/m/means_e.m:
--------------------------------------------------------------------------------
1 | function [U,V] = means_e(T_training, m, n)
2 | 
3 | e = .5 * mean(at_training(:,3)); 
4 | 
5 | U = e * ones(m,1); 
6 | V = e * ones(n,1); 
7 | 


--------------------------------------------------------------------------------
/m/means_euv.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Average between subject, object and global mean. 
 3 | %
 4 | % PARAMETERS
 5 | %	T	(r*3) Subject ID, object ID, rating 
 6 | %	m,n	subject count, object count
 7 | %
 8 | % RESULT
 9 | %	U,V	(m*1,n*1) Vectors such that U 1 + 1 V' is an approximation 
10 | %
11 | 
12 | function [U,V] = means_euv(T, m, n)
13 | 
14 | A = konect_spconvert(T, m, n);
15 | 
16 | A_mask = (A ~= 0); 
17 | 
18 | e = .1 * mean(T(:,3))
19 | U = .4 * (sum(A,2) ./ sum(A_mask, 2)) ;
20 | V = .4 * (sum(A,1) ./ sum(A_mask, 1))';
21 | 
22 | U(U ~= U) = 0;
23 | V(V ~= V) = 0;
24 | U = U + e;
25 | V = V + e; 
26 |  
27 | assert(sum(U ~= U) + sum(V ~= V) == 0); 
28 | 
29 | 


--------------------------------------------------------------------------------
/m/means_regr.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % The means by linear regression. 
 3 | %
 4 | 
 5 | function [U,V] = means_regr(T, m, n)
 6 | 
 7 | r = size(T, 1); 
 8 | s = round(r * .8); 
 9 | val = (1+s) : r; 
10 | 
11 | A_training = konect_spconvert(T(1:s, :), m, n); 
12 | A_training_mask = (A_training ~= 0); 
13 | 
14 | A_testtraining = konect_spconvert(T, m, n); 
15 | A_testtraining_mask = (A_testtraining ~= 0); 
16 | 
17 | meane = mean(T(1:s,3))
18 | meanu = sum(A_training, 2) ./ sum(A_training_mask,2); 
19 | meanv = sum(A_training, 1)' ./ sum(A_training_mask,1)'; 
20 | 
21 | meanu(meanu ~= meanu) = 0; 
22 | meanv(meanv ~= meanv) = 0; 
23 | 
24 | pred_e = meane * ones(r-s,1); 
25 | pred_u = meanu(T(val,1)); 
26 | pred_v = meanv(T(val,2)); 
27 | 
28 | targ = T(val,3); 
29 | 
30 | w_regr = [pred_e pred_u pred_v] \ targ
31 | 
32 | meane = mean(T(:,3)); 
33 | meanu = sum(A_testtraining, 2)  ./ sum(A_testtraining_mask,2); 
34 | meanv = sum(A_testtraining, 1)' ./ sum(A_testtraining_mask,1)'; 
35 | 
36 | meanu(meanu ~= meanu) = 0; 
37 | meanv(meanv ~= meanv) = 0; 
38 | 
39 | U = .5 * w_regr(1) * meane + w_regr(2) * meanu;
40 | V = .5 * w_regr(1) * meane + w_regr(3) * meanv;
41 | 


--------------------------------------------------------------------------------
/m/means_regrn.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % The means by nonnegative linear regression. 
 3 | %
 4 | 
 5 | function [U,V] = means_regrn(T, m, n)
 6 | 
 7 | r = size(T, 1); 
 8 | s = round(r * .8); 
 9 | val = (1+s) : r; 
10 | 
11 | A_training = konect_spconvert(T(1:s, :), m, n); 
12 | A_training_mask = (A_training ~= 0); 
13 | 
14 | A_testtraining = konect_spconvert(T, m, n); 
15 | A_testtraining_mask = (A_testtraining ~= 0); 
16 | 
17 | meane = mean(T(1:s,3))
18 | meanu = sum(A_training, 2) ./ sum(A_training_mask,2); 
19 | meanv = sum(A_training, 1)' ./ sum(A_training_mask,1)'; 
20 | 
21 | meanu(meanu ~= meanu) = 0; 
22 | meanv(meanv ~= meanv) = 0; 
23 | 
24 | pred_e = meane * ones(r-s,1); 
25 | pred_u = meanu(T(val,1)); 
26 | pred_v = meanv(T(val,2)); 
27 | 
28 | targ = T(val,3); 
29 | 
30 | w_regrn = lsqnonneg(full([pred_e pred_u pred_v]), targ) 
31 | 
32 | meane = mean(T(:,3)); 
33 | meanu = sum(A_testtraining, 2)  ./ sum(A_testtraining_mask,2); 
34 | meanv = sum(A_testtraining, 1)' ./ sum(A_testtraining_mask,1)'; 
35 | 
36 | meanu(meanu ~= meanu) = 0; 
37 | meanv(meanv ~= meanv) = 0; 
38 | 
39 | U = .5 * w_regrn(1) * meane + w_regrn(2) * meanu;
40 | V = .5 * w_regrn(1) * meane + w_regrn(3) * meanv;
41 | 


--------------------------------------------------------------------------------
/m/measure_compute.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute a link prediction measure.
 3 | %
 4 | % PARAMETERS
 5 | %	measure Name of measure 
 6 | %	p	(e*1) Predictions
 7 | %	T	(e*3) To be predicted 
 8 | %
 9 | % RESULT
10 | %	value	Link prediction measure ; higher is better 
11 | %
12 | function [value] = measure_compute(measure, p, T)
13 | 
14 | fh = str2func(sprintf('measure_compute_%s', measure)); 
15 | 
16 | value = fh(p, T); 


--------------------------------------------------------------------------------
/m/measure_compute_ap.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute average precision. 
 3 | % 
 4 | % PARAMETERS 
 5 | %	p	(e*1) Predictions
 6 | %	T	(e*3) to be predicted
 7 | %
 8 | % RESULT 
 9 | %	value	Average precision
10 | %
11 | 
12 | function [value] = measure_compute_ap(p, T)
13 | 
14 | value = konect_ap(p, T(:,3)); 
15 | 


--------------------------------------------------------------------------------
/m/measure_compute_auc.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute area under the curve. 
 3 | % 
 4 | % PARAMETERS
 5 | %	p	(e*1) Predictions
 6 | %	at	(e*3) To be predicted 
 7 | %
 8 | % RESULT
 9 | %	value	Area under the curve 
10 | %
11 | 
12 | function [value] = measure_compute_auc(p, at)
13 | 
14 | value = konect_auc(p, at(:,3)); 
15 | 


--------------------------------------------------------------------------------
/m/measure_compute_corr.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute Pearson correlation error measure.
 3 | %
 4 | % RESULT 
 5 | %	value	Correlation value
 6 | %
 7 | % PARAMETERS 
 8 | %	p	(e*1) Predictions
 9 | %	T	(e*3) To be predicted
10 | %
11 | 
12 | function [value] = measure_compute_corr(p, T)
13 | 
14 | value = corr(p, T(:,3)); 
15 | 
16 | if ~isfinite(value)
17 |     % An undefined Pearon correlation means constant predictions, so
18 |     % the results is 0. 
19 |     value = 0; 
20 | end
21 | 


--------------------------------------------------------------------------------
/m/measure_compute_kendall.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute Kendall tau precision measure.
 3 | %
 4 | % RESULT 
 5 | %	value	Correlation value
 6 | %
 7 | % PARAMETERS 
 8 | %	p	(e*1) Predictions
 9 | %	T	(e*3) To be predicted
10 | %
11 | 
12 | function [value] = measure_compute_kendall(p, T)
13 | 
14 | value = corr(p, T(:,3), 'type', 'Kendall'); 
15 | 
16 | 


--------------------------------------------------------------------------------
/m/measure_compute_map.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute mean average precision. 
 3 | %
 4 | % PARAMETERS
 5 | %	p	(e*1) Predictions
 6 | %	T	(e*3) To be predicted
 7 | %
 8 | % RESULT
 9 | %	value	MAP value
10 | %
11 | 
12 | function [value] = measure_compute_map(p, T)
13 | 
14 | value = konect_map(p, T); 
15 |  
16 | 
17 | 


--------------------------------------------------------------------------------
/m/measure_compute_mauc.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute mean area under the curve. 
 3 | %
 4 | % PARAMETERS
 5 | %	p	(e*1) Predictions
 6 | %	at	(e*3) To be predicted
 7 | %
 8 | % RESULT
 9 | %	value	MAUC value
10 | %
11 | 
12 | function [value] = measure_compute_mauc(p, at)
13 | 
14 | value = konect_mauc(p, at); 
15 |  
16 | 
17 | 


--------------------------------------------------------------------------------
/m/measure_compute_spear.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute Spearman correlation link prediction measure.
 3 | %
 4 | % RESULT 
 5 | %	value	Correlation value
 6 | %
 7 | % PARAMETERS 
 8 | %	p	(e*1) Predictions
 9 | %	T	(e*3) To be predicted
10 | %
11 | 
12 | function [value] = measure_compute_spear(p, T)
13 | 
14 | value = corr(p, T(:,3), 'type', 'Spearman'); 
15 | 
16 | 


--------------------------------------------------------------------------------
/m/mediandist.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Determine the median distance in the network, from the hop
 3 | % distribution.
 4 | %
 5 | % In case the atcual median value would be a non-integer, because we are
 6 | % averaging between two values, the result is rounded up, to always
 7 | % result in an integer.  There must always be one pair of nodes for each
 8 | % possible distance between zero and the diameter, and thus the only
 9 | % case of averaging happens between two adjacent integers.  For large
10 | % networks, this is exceedingly unlikely. 
11 | %
12 | % PARAMETERS
13 | %	$network
14 | %
15 | % INPUT FILES
16 | %	dat/hopdistr.$network 
17 | %
18 | % OUTPUT FILES
19 | %	dat/statistic.mediandist.$network
20 | %
21 | 
22 | network = getenv('network')
23 | 
24 | h = load(sprintf('dat/hopdistr.%s', network))
25 | 
26 | v = sum(h <= (h(end) / 2))
27 | 
28 | values = [ v ]
29 | 
30 | filename_OUT = sprintf('dat/statistic.mediandist.%s', network);
31 | OUT = fopen(filename_OUT, 'w');
32 | if OUT < 0,  error(filename_OUT); exit(1);  end;
33 | fprintf(OUT, '%u\n', values);
34 | if fclose(OUT) < 0,  error(filename_OUT); exit(1);  end;
35 | 


--------------------------------------------------------------------------------
/m/network_key.m:
--------------------------------------------------------------------------------
 1 | % 
 2 | % The key used for sorting datasets. 
 3 | %
 4 | % PARAMETERS 
 5 | %	network 
 6 | %
 7 | 
 8 | function ret = network_key(metadata)
 9 | 
10 | name = metadata.name; 
11 | 
12 | name_no_space = regexprep(name, ' ', '-'); 
13 | 
14 | ret = sprintf('%s:%s', metadata.category, name_no_space); 
15 | 
16 | 


--------------------------------------------------------------------------------
/m/outin.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot the outdegree vs the indegree of all nodes.  Only for directed
 3 | % networks. 
 4 | %
 5 | % PARAMETERS 
 6 | %	$network	Network name
 7 | %
 8 | % INPUT 
 9 | %	dat/data.$network.mat
10 | %	dat/info.$network
11 | %
12 | % OUTPUT 
13 | %	plot/outin.[a].$network.eps	Plots
14 | %
15 | 
16 | font_size = 22; 
17 | 
18 | consts = konect_consts(); 
19 | 
20 | network = getenv('network'); 
21 | 
22 | info = read_info(network); 
23 | 
24 | assert(info.format == consts.ASYM); 
25 | 
26 | data = load(sprintf('dat/data.%s.mat', network)); 
27 | 
28 | if info.weights == consts.POSITIVE & size(data.T, 2) >= 3
29 |     w = data.T(:,3);
30 | else
31 |     w = 1; 
32 | end
33 | 
34 | % Outdegrees 
35 | d_1 = sparse(data.T(:,1), 1, w, info.n1, 1);
36 | 
37 | % Indegrees
38 | d_2 = sparse(data.T(:,2), 1, w, info.n2, 1); 
39 | 
40 | %
41 | % (b) - logarithmic axes
42 | %
43 | 
44 | loglog(d_1, d_2, '.');
45 | 
46 | xlabel('Outdegree (d_1)', 'FontSize', font_size); 
47 | ylabel('Indegree (d_2)', 'FontSize', font_size); 
48 | 
49 | set(gca, 'FontSize', font_size); 
50 | 
51 | set(gca, 'XMinorTick', 'on');
52 | set(gca, 'YMinorTick', 'on'); 
53 | set(gca, 'TickLength', [0.05 0.05]); 
54 | 
55 | % Workaround for Matlab bug. Otherwise, the minor ticks are not visible. 
56 | ax = axis(); 
57 | ax(1) = 0.7;
58 | ax(3) = 0.7;
59 | axis(ax); 
60 | if ax(1) > 0 & ax(3) > 0 
61 |     set(gca, 'XTick', 10 .^ (ceil(log(ax(1)) / log(10)):floor(log(ax(2)) / log(10)))); 
62 |     set(gca, 'YTick', 10 .^ (ceil(log(ax(3)) / log(10)):floor(log(ax(4)) / log(10)))); 
63 | end
64 | 
65 | konect_print(sprintf('plot/outin.b.%s.eps', network)); 
66 | 
67 | %
68 | % (c) - shifted logarithmic axes
69 | %
70 | 
71 | loglog(d_1 + 1, d_2 + 1, '.');
72 | 
73 | xlabel('Augmented outdegree (1 + d^+)', 'FontSize', font_size); 
74 | ylabel('Augmented indegree (1 + d^-)', 'FontSize', font_size); 
75 | 
76 | set(gca, 'FontSize', font_size); 
77 | 
78 | set(gca, 'XMinorTick', 'on');
79 | set(gca, 'YMinorTick', 'on'); 
80 | set(gca, 'TickLength', [0.05 0.05]); 
81 | 
82 | % Workaround for Matlab bug. Otherwise, the minor ticks are not visible. 
83 | ax = axis(); 
84 | ax(1) = 0.7;
85 | ax(3) = 0.7;
86 | axis(ax); 
87 | if ax(1) > 0 & ax(3) > 0 
88 |     set(gca, 'XTick', 10 .^ (ceil(log(ax(1)) / log(10)):floor(log(ax(2)) / log(10)))); 
89 |     set(gca, 'YTick', 10 .^ (ceil(log(ax(3)) / log(10)):floor(log(ax(4)) / log(10)))); 
90 | end
91 | 
92 | konect_print(sprintf('plot/outin.c.%s.eps', network)); 
93 | 
94 | %
95 | % (a) - normal axes
96 | %
97 | plot(d_1, d_2, '.');
98 | konect_print(sprintf('plot/outin.a.%s.eps', network)); 
99 | 


--------------------------------------------------------------------------------
/m/pa_plot.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Preferential attachment tests.
 3 | %
 4 | % PARAMETERS 
 5 | %	$network
 6 | %
 7 | % INPUT 
 8 | %	dat/pa.$NETWORK.mat
 9 | %	dat/pa_data.$NETWORK.mat
10 | %	dat/info.$NETWORK
11 | %
12 | % OUTPUT 
13 | %	plot/pa.[a][auv].$NETWORK.eps
14 | %
15 | 
16 | network = getenv('network') 
17 | 
18 | consts = konect_consts(); 
19 | 
20 | info = read_info(network)
21 | 
22 | pa = load(sprintf('dat/pa.%s.mat', network)); 
23 | pa = pa.pa; 
24 | 
25 | pa_data = load(sprintf('dat/pa_data.%s.mat', network)); 
26 | pa_data = pa_data.pa_data; 
27 | 
28 | if info.format == consts.ASYM           
29 | 
30 |     pa_plot_one(network, 'u', pa.u, pa_data.u); 
31 |     pa_plot_one(network, 'v', pa.v, pa_data.v); 
32 | 
33 |     pa_plot_one(network, 'a', pa.a, pa_data.a);
34 | 
35 | elseif info.format == consts.SYM 
36 | 
37 |     pa_plot_one(network, 'a', pa.a, pa_data.a);
38 | 
39 | elseif info.format == consts.BIP
40 | 
41 |     pa_plot_one(network, 'u', pa.u, pa_data.u); 
42 |     pa_plot_one(network, 'v', pa.v, pa_data.v); 
43 | 
44 |     pa_plot_one(network, 'a', pa.a, pa_data.a);
45 | 
46 | else
47 |     error('*** Invalid format'); 
48 | end
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/m/pivotize.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % The norm used for normalizing central matrices. 
 3 | %
 4 | % PARAMETERS 
 5 | %	data_decomposition
 6 | %	x			Central matrix
 7 | %
 8 | % RESULT 
 9 | %	pivot 			The pivot value
10 | %
11 | 
12 | function [pivot] = pivotize(data_decomposition, x)
13 | 
14 | pivot = 1; 
15 | 
16 | if ~data_decomposition.n
17 | 
18 |   if data_decomposition.l
19 |     a = diag(x); 
20 |     a_nonzero = a(a ~= 0); 
21 |     if length(a_nonzero)
22 |       pivot = min(abs(a_nonzero)); 
23 |     end
24 |   else
25 |     pivot = norm(x); 
26 |   end
27 | end
28 | 
29 | if pivot <= 0, error('***'); end;
30 | 


--------------------------------------------------------------------------------
/m/precision_comp.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute precision of predictions. 
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK
 6 | %	$METHOD
 7 | %
 8 | % INPUT 
 9 | %	dat/prediction.$METHOD.$NETWORK.mat
10 | %	dat/split.$NETWORK.mat 
11 | %	dat/meanst.$NETWORK.mat
12 | %
13 | % OUTPUT 
14 | %	dat/precision.$METHOD.$NETWORK.mat
15 | %		precisions{submethod}	Precisions by submethod; each entry is a 2-vector of the value and the runtime in seconds
16 | %			.$MEASURE	e.g., "auc"
17 | %
18 | 
19 | labels_measure = get_labels_measure(); 
20 | measure_names = fieldnames(labels_measure); 
21 | 
22 | network = getenv('NETWORK');
23 | method = getenv('METHOD'); 
24 | 
25 | split = load(sprintf('dat/split.%s.mat', network)); 
26 | prediction = load(sprintf('dat/prediction.%s.%s.mat', method, network)); 
27 | 
28 | if length(split.T_test_zero)
29 |     if size(split.T_test,2) == 3
30 |         T_all = [ split.T_test ; split.T_test_zero, zeros(size(split.T_test_zero, 1), 1) ]; 
31 |     else
32 |         T_all = [ split.T_test, ones(size(split.T_test, 1), 1) ; split.T_test_zero, zeros(size(split.T_test_zero, 1), 1) ]; 
33 |     end
34 | else
35 |     T_all = [ split.T_test ]; 
36 | end
37 | 
38 | meanst = load(sprintf('dat/meanst.%s.mat', network)); 
39 | 
40 | T_all = konect_normalize_additively(T_all, meanst); 			  
41 | 
42 | precisions = struct(); 
43 | 
44 | submethods = fieldnames(prediction.predictions);
45 | 
46 | for i = 1 : length(submethods)
47 | 
48 |     submethod = submethods{i};
49 |     fprintf(1, '\nsubmethod = %s\n', submethod); 
50 |     prediction_submethod = prediction.predictions.(submethod); 
51 |     p_normal = prediction_submethod.prediction;
52 |     p_zero = prediction_submethod.prediction_zero; 
53 | 
54 |     if length(split.T_test_zero) 
55 |         p = [ p_normal ; p_zero ]; 
56 |     else
57 |         p = [ p_normal ]; 
58 |     end  
59 | 
60 |     p = real(p); 
61 | 
62 |     for i = 1:length(measure_names)
63 |         measure = measure_names{i}; 
64 |         fprintf(1, '%s(%s) = ', submethod, measure); 
65 |         t0 = cputime; 
66 |         value = measure_compute(measure, p, T_all)
67 |         t1 = cputime;
68 |         runtime = t1 - t0; 
69 |         fprintf(1, '%f [%f s]\n', value, runtime); 
70 | 
71 |         if ~isfinite(value)
72 |             error('*** Non-finite precision value'); 
73 |         end
74 |         precisions.(submethod).(measure) = [ value runtime ]; 
75 |     end
76 | 
77 | end
78 | 
79 | save(sprintf('dat/precision.%s.%s.mat', method, network), '-v7.3', 'precisions'); 
80 | 


--------------------------------------------------------------------------------
/m/precision_one.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot precisions for one network/method combination.
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK
 6 | %	$METHOD
 7 | %
 8 | % INPUT 
 9 | %	dat/precision.$METHOD.$NETWORK.mat
10 | %
11 | % OUTPUT 
12 | %	plot/precision.one.[a].$MEASURE.$METHOD.$NETWORK.eps
13 | %		for all $MEASURE
14 | %
15 | 
16 | network = getenv('NETWORK');
17 | method = getenv('METHOD'); 
18 | 
19 | precision = load(sprintf('dat/precision.%s.%s.mat', method, network)); 
20 | 
21 | labels_measure = get_labels_measure(); 
22 | 
23 | measures = fieldnames(labels_measure)
24 | 
25 | for i = 1 : length(measures) 
26 | 
27 |     measure = measures{i}
28 | 
29 |     names = [];
30 |     precisions = []; 
31 | 
32 |     submethods = fieldnames(precision.precisions);
33 |     for k = 1 : length(submethods)
34 |         submethod = submethods{k}
35 |         values = precision.precisions.(submethod).(measure)
36 |         precisions = [ precisions ; values(1) ]; 
37 |     end
38 | 
39 |     precisions_plot([], submethods, precisions, measure); 
40 |     konect_print(sprintf('plot/precision.one.a.%s.%s.%s.eps', measure, method, network)); 
41 | 
42 | end
43 | 


--------------------------------------------------------------------------------
/m/precisions_plot.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Draw a single bar chart of link prediction results.
 3 | %
 4 | % PARAMETERS 
 5 | %	methods_arg		(n*1) Cellstrings of methods; may be [] to omit the method names
 6 | %	submethods	(n*1) Cellstrings of submethods 
 7 | %	precisions	(n*1) Precision values 
 8 | %	measure		The measure used
 9 | %
10 | 
11 | function precisions_plot(methods_arg, submethods, precisions, measure)
12 | 
13 | font_size = 16; 
14 | font_size_min = 4; 
15 | rotation = 60; 
16 | 
17 | labels_measure = get_labels_measure(); 
18 | 
19 | n = size(precisions,1) 
20 | 
21 | if n == 0
22 |     plot(0,0);
23 |     return; 
24 | end
25 | 
26 | [colors line_styles markers] = styles_submethod(); 
27 | labels_submethod = get_labels_submethod(); 
28 | labels_method = get_labels_method(); 
29 | 
30 | labels = []; 
31 | cm = [ ]; 
32 | 
33 | for k = 1:n
34 |     submethod = submethods(k,1);  submethod = submethod{:} 
35 |     label_submethod = labels_submethod.(submethod); 
36 |     if length(methods_arg)
37 |         methods_arg
38 |         method = methods_arg(k,1); method = method{:} 
39 |         label_method = labels_method.(regexprep(method, '-', '_'));
40 |         label = sprintf('%s %s', label_method, label_submethod); 
41 |     else
42 |         label = label_submethod; 
43 |     end
44 |             
45 |     color_k = colors.(submethod);
46 |     cm = [cm; color_k]; 
47 |     labels = [ labels ; cellstr(label) ]; 
48 | end
49 | 
50 | hold on; 
51 | for k = 1:n
52 |     l = zeros(1,n);
53 |     l(k) = precisions(k);
54 |     h = bar(l);
55 |     set(h, 'FaceColor', cm(k,:)); 
56 | end
57 | set(gca, 'FontSize', font_size); 
58 | ylabel(labels_measure.(measure), 'FontSize', font_size);
59 | 
60 | ax = axis()
61 | ax(2) = n+1;
62 | ax(3) = max(0, min(precisions) - 0.05 * (max(precisions) - min(precisions))); 
63 | if ax(3) == ax(4), ax(3) = 0; ax(4) = 1; end; 
64 | ax
65 | axis(ax);
66 | xticklabel_rotate(1:n, rotation, labels, 'FontSize', max(font_size_min, 20 - max(0, floor(0.6 * (n-10))))); 
67 | 
68 | 


--------------------------------------------------------------------------------
/m/predict_euclidean.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Predict Euclidean predictions.  The result is the negative Euclidean
 3 | % distance in the matrix M with MM' = U abs(D) V'. 
 4 | %
 5 | % RESULT 
 6 | %	prediction	(e*1) Prediction values 
 7 | %
 8 | % PARAMETERS 
 9 | %	u		(m*r) Left eigenvectors
10 | %	d		(r*r) Central matrix
11 | %	v		(n*r) Rigth eigenvectors; may be []
12 | %	at		(e*2) Pairs of vertices to predict for 
13 | %
14 | 
15 | function [prediction] = predict_euclidean(U, D, V, T)
16 | 
17 | chunk_size = 10000; 
18 | 
19 | [UU DD] = eig(D);
20 | 
21 | D_sqrt = UU * sqrt(abs(DD)); 
22 | 
23 | U = U * D_sqrt;
24 | 
25 | if ~length(V)
26 |   V = U;
27 | else
28 |   V = V * D_sqrt; 
29 | end
30 | 
31 | [k from to] = konect_fromto(1, size(T,1), chunk_size);
32 | 
33 | prediction = []; 
34 | 
35 | for i = 1:k
36 | 
37 |   from_i = from(i); 
38 |   to_i = to(i); 
39 |   %  fprintf(1, '%d - %d\n', from_i, to_i); 
40 | 
41 |   T_i = T(from_i : to_i, :); 
42 | 
43 |   dif = U(T_i(:,1),:) - V(T_i(:,2),:); 
44 | 
45 |   prediction = [ prediction ; - sum(conj(dif) .* dif, 2) ]; 
46 | end
47 | 


--------------------------------------------------------------------------------
/m/predict_spectral.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute spectral predictions.  Predictions correspond to elements of
 3 | % the matrix UDV', or UDV' - VDU' for the skew decomposition. 
 4 | %
 5 | % RESULT 
 6 | %	prediction	(e*1) Prediction values 
 7 | %
 8 | % PARAMETERS 
 9 | %	U		(m*r) Left eigenvectors
10 | %	D		(r*r) Central matrix 
11 | %	V		(n*r) Right eigenvectors, may be [] in which case U is used in its place 
12 | %	T		(e*2) Pairs of vertices for which to compute link prediction scores 
13 | % 	decomposition
14 | %	
15 | 
16 | function [prediction prediction_complex] = predict_spectral(U, D, V, T, decomposition)
17 | 
18 | chunk_size = 20000; 
19 | 
20 | % The predictions are computed as A * B 
21 | if strcmp(decomposition, 'skew')
22 | 
23 |     A = [ U * D, -V * D ];
24 |     B = [ V, U ];
25 | 
26 | else
27 | 
28 |     A = U * D; 
29 | 
30 |     if length(V)
31 |         B = V; 
32 |     else
33 |         B = U;
34 |     end
35 | end
36 | 
37 | clear U D V;
38 | 
39 | [k from to] = konect_fromto(1, size(T,1), chunk_size);
40 | 
41 | prediction = []; 
42 | 
43 | t = konect_timer(k); 
44 | 
45 | for i = 1:k
46 | 
47 |     t = konect_timer_tick(t, i); 
48 | 
49 |     from_i = from(i); 
50 |     to_i = to(i); 
51 | 
52 |     T_i = T(from_i : to_i, :); 
53 | 
54 |     prediction_i = sum(A(T_i(:,1), :) .* B(T_i(:,2), :), 2); 
55 | 	  
56 |     prediction = [ prediction ; prediction_i ]; 
57 | 
58 | end
59 | 
60 | konect_timer_end(t); 
61 | 
62 | data_decomposition = konect_data_decomposition(decomposition); 
63 | 
64 | if ~isreal(prediction) | data_decomposition.imag
65 |     prediction_complex = imag(prediction); 
66 |     prediction = real(prediction); 
67 | else
68 |     prediction_complex = [];
69 | end
70 | 


--------------------------------------------------------------------------------
/m/prediction_local.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute predictions using a local method.
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK	Name of network 
 6 | %	$METHOD		Name pf local link prediction method
 7 | %
 8 | % INPUT 
 9 | %	dat/split.$NETWORK.mat
10 | %	dat/meanst.$NETWORK.mat
11 | %	dat/info.$NETWORK
12 | %
13 | % OUTPUT 
14 | %	dat/prediction.$METHOD.$NETWORK.mat
15 | %		predictions{submethod}		Struct by name of submethod containing struct of
16 | %			prediction		Column vector of prediction values, following SPLIT.at_test
17 | %			prediction_zero		Column vector of prediction values, following SPLIT.at_test_zero; [] if not used 
18 | %
19 | 
20 | network = getenv('NETWORK');
21 | method = getenv('METHOD'); 
22 | 
23 | split = load(sprintf('dat/split.%s.mat', network));
24 | meanst = load(sprintf('dat/meanst.%s.mat', network));
25 | info = read_info(network); 
26 | 
27 | enable_zero = size(split.T_test_zero) 
28 | 
29 | T_training = [ split.T_source ; split.T_target ]; 
30 | 
31 | T_training = konect_normalize_additively(T_training, meanst);
32 | 
33 | A_training = konect_spconvert(T_training, split.n1, split.n2); 
34 | 
35 | T_test_all = split.T_test(:,1:2);
36 | if enable_zero
37 |     T_test_all = [ T_test_all ; split.T_test_zero ]; 
38 | end
39 | 
40 | ps = prediction_local_compute(method, A_training, T_test_all, info.format, info.weights);
41 | 
42 | submethods = fieldnames(ps); 
43 | 
44 | predictions = struct(); 
45 | 
46 | for i = 1 : length(submethods) 
47 |     submethod = submethods{i};
48 | 
49 |     prediction = konect_denormalize_additively(T_test_all, ps.(submethod), meanst); 
50 | 
51 |     if enable_zero
52 |         prediction_zero = prediction((size(split.T_test,1)+1) : end); 
53 |         prediction = prediction(1 : size(split.T_test,1)); 
54 |     else
55 |         prediction_zero = []; 
56 |     end
57 | 
58 |     predictions.(submethod).prediction = prediction; 
59 |     predictions.(submethod).prediction_zero = prediction_zero; 
60 | 
61 | end
62 | 
63 | save(sprintf('dat/prediction.%s.%s.mat', method, network), '-v7.3', 'predictions'); 
64 | 


--------------------------------------------------------------------------------
/m/prediction_local_compute.m:
--------------------------------------------------------------------------------
 1 | % 
 2 | % Compute local prediction.
 3 | %
 4 | % PARAMETERS
 5 | %	method		Prediction method
 6 | %	a		Adjacency/biadjacency matrix
 7 | %	at		(e*2) Each row is a vertex pair (i,j) for which to compute a prediction
 8 | %	format
 9 | %	weights
10 | %
11 | % RESULT
12 | %	predictions{submethod}		(e*1) Prediction scores 
13 | % 
14 | 
15 | function [predictions] = prediction_local_compute(method, a, at, format, weights)
16 | 
17 | fh = str2func(sprintf('prediction_local_compute_%s', method)); 
18 | 
19 | predictions = fh(a, at, format, weights); 
20 | 


--------------------------------------------------------------------------------
/m/prediction_local_compute_mask.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the Rank-1 mask approximation (only when there is
 3 | % no "zero").   See mask.m
 4 | %
 5 | % PARAMETERS
 6 | %	a		Adjacency/biadjacency matrix
 7 | %	at		(e*2) Vertex pairs for which to compute predictions
 8 | %	format
 9 | %	weights
10 | %
11 | % RESULT
12 | %	predictions{submethod}	Struct by submethod name of (e*1) Predictions
13 | %		'main'		(only SIGNED and WEIGHTED) rank-1 mask
14 | %				approximation 
15 | %
16 | 
17 | function [predictions] = prediction_local_compute_mask(a, at, format, weights)
18 | 
19 | consts = konect_consts();
20 | 
21 | predictions = struct(); 
22 | 
23 | if weights == consts.SIGNED | weights == consts.WEIGHTED
24 | 
25 |   [u v] = mask(a, a ~= 0);
26 |   predictions.main = u(at(:,1)) .* v(at(:,2)); 
27 | end
28 | 
29 | 


--------------------------------------------------------------------------------
/m/prediction_local_compute_neib.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute neighborhood predictions. 
 3 | %
 4 | % RESULT 
 5 | %	predictions	Struct by submethod name of (e*1) Predictions
 6 | %
 7 | % PARAMETERS 
 8 | %	A		Adjacency/biadjacency matrix
 9 | %	T		(e*2) Vertex pairs for which to compute predictions
10 | %	format
11 | %	weights
12 | %
13 | 
14 | function [predictions] = prediction_local_compute_neib(A, T, format, weights)
15 | 
16 | consts = konect_consts(); 
17 | 
18 | [negative] = konect_data_weights(); 
19 | 
20 | predictions = struct(); 
21 | 
22 | if format == consts.BIP
23 |     return; 
24 | end
25 | 
26 | submethods = { 'common', 'adad', 'ra', 'jaccard', 'cosine', 'sorensen', 'hpi', 'hdi', 'lhni' }; 
27 | submethods_negative = { 'abscommon', 'absadad', 'absjaccard', 'abscosine' }; 
28 | 
29 | N = length(submethods);
30 | if negative(weights)
31 |     N = N + length(submethods_negative); 
32 | end
33 | if format == consts.ASYM
34 |     N = N + 3 * length(submethods); 
35 | end
36 | 
37 | t = konect_timer(N); 
38 | 
39 | I = 1; 
40 | 
41 | for i = 1 : length(submethods)
42 |     t = konect_timer_tick(t, I); I = I + 1; 
43 |     submethod = submethods{i}; 
44 |     predictions.(submethod) = konect_predict_neib(submethod, A, T, format, 'sym'); 
45 | end
46 | 
47 | if negative(weights)
48 |     for i = 1 : length(submethods_negative)
49 |         t = konect_timer_tick(t, I); I = I + 1; 
50 |         submethod = submethods_negative{i}; 
51 |         predictions.(submethod) = konect_predict_neib(submethod, A, T, format, 'sym'); 
52 |     end
53 | end
54 | 
55 | if format == consts.ASYM
56 |     for i = 1 : length(submethods)
57 |         t = konect_timer_tick(t, I); I = I + 1; 
58 |         submethod = submethods{i}; 
59 |         predictions.([submethod 'asym']) = konect_predict_neib(submethod, A, T, format, 'asym'); 
60 |         predictions.([submethod 'out' ]) = konect_predict_neib(submethod, A, T, format, 'out' ); 
61 |         predictions.([submethod 'in'  ]) = konect_predict_neib(submethod, A, T, format, 'in'  ); 
62 |     end
63 | end
64 | 
65 | konect_timer_end(t); 
66 | 


--------------------------------------------------------------------------------
/m/prediction_local_compute_neib3.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute neighborhood predictions, based on paths of length 3. 
 3 | %
 4 | % RESULT 
 5 | %	predictions	Struct by submethod name of (e*1)-vectors
 6 | %			containing the predictions
 7 | %
 8 | % PARAMETERS 
 9 | %	A		Adjacency/biadjacency matrix
10 | %	T		(e*2) Vertex pairs for which to compute predictions
11 | %	format		Format of network
12 | %	weights		Weights of network 
13 | %
14 | 
15 | function [predictions] = prediction_local_compute_neib3(A, T, format, weights)
16 | 
17 | consts = konect_consts(); 
18 | 
19 | [negative] = konect_data_weights(); 
20 | 
21 | predictions = struct(); 
22 | 
23 | submethods = { 'path3' }; 
24 | 
25 | N = length(submethods);
26 | 
27 | for i = 1 : N
28 |     submethod = submethods{i}; 
29 |     predictions.(submethod) = konect_predict_neib3(submethod, A, T, format); 
30 | end
31 | 


--------------------------------------------------------------------------------
/m/prediction_local_compute_pref.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute preferential attachment prediction.
 3 | %
 4 | % PARAMETERS
 5 | %	a		Adjacency/biadjacency matrix
 6 | %	at		(e*2) Vertex pairs for which to compute predictions
 7 | %	format
 8 | %	weights
 9 | %
10 | % RESULT
11 | %	predictions	Struct by submethod name of (e*1) Predictions
12 | %
13 | 
14 | function [predictions] = prediction_local_compute_pref(a, at, format, weights)
15 | 
16 | consts = konect_consts(); 
17 | 
18 | if ~islogical(a)
19 |   a = abs(a); 
20 | end
21 | 
22 | su = sum(a, 2); 
23 | sv = sum(a, 1)'; 
24 | 
25 | if format == consts.SYM
26 |   su = su + sv;
27 |   sv = su; 
28 | end
29 | 
30 | predictions.main = su(at(:,1)) .* sv(at(:,2)); 
31 | 


--------------------------------------------------------------------------------
/m/prediction_local_compute_zero.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute zero prediction, i.e. always predict zero. 
 3 | %
 4 | % PARAMETERS
 5 | %	a		Adjacency/biadjacency matrix
 6 | %	at		(e*2) Vertex pairs for which to compute predictions
 7 | %	format
 8 | %	weights
 9 | %
10 | % RESULT
11 | %	predictions	Struct by subname of (e*1) Predictions
12 | %
13 | 
14 | function [predictions] = prediction_local_compute_zero(a, at, format, weights)
15 | 
16 | predictions.main = zeros(size(at,1), 1); 
17 | 


--------------------------------------------------------------------------------
/m/prepare_matrix_target.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Prepare target matrix for the diagonality test when in "base" mode,
 3 | % not self mode.  This is identical to konect_prepare_matrix(), only
 4 | % that for Laplacian decompositions this will return the underlying
 5 | % adjacency matrix.   
 6 | %
 7 | 
 8 | function [B] = prepare_matrix_target(decomposition, A, format)
 9 | 
10 | if     strcmp(decomposition, 'lap'), 		decomposition = 'sym'; 		
11 | elseif strcmp(decomposition, 'lapc'),		decomposition = 'sym'; 
12 | elseif strcmp(decomposition, 'lapd'),		decomposition = 'svd'; 
13 | elseif strcmp(decomposition, 'lapd-n'),		decomposition = 'svd-n'; 
14 | elseif strcmp(decomposition, 'lapherm'),	decomposition = 'herm';
15 | elseif strcmp(decomposition, 'lapskew'), 	decomposition = 'skewi';
16 | elseif strcmp(decomposition, 'lapquantum'),	decomposition = 'quantum';
17 | elseif strcmp(decomposition, 'lapq'),		decomposition = 'sym'; 
18 | end
19 | 
20 | 
21 | B = konect_matrix(decomposition, A, format);
22 | 


--------------------------------------------------------------------------------
/m/rating_evolution.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot the rating evolution, i.e. the mean rating of each item in
 3 | % function of number of ratings.
 4 | %
 5 | % PARAMETERS 
 6 | %	$network
 7 | %
 8 | % INPUT FILES 
 9 | %	dat/data.$network.mat
10 | %
11 | % OUTPUT FILES 
12 | %	plot/rating_evolution.[ab].$network.eps
13 | %
14 | 
15 | network = getenv('network');
16 | 
17 | data = load(sprintf('dat/data.%s.mat', network));
18 | 
19 | ids = unique(data.T(:,2));
20 | 
21 | %
22 | % (b) - normalized to zero final mean weight
23 | %
24 | hold on; 
25 | 
26 | for k = 1:length(ids)
27 | 
28 |   i = ids(k);
29 | 
30 |   ati = data.T(find(data.T(:,2) == i), 3);
31 | 
32 |   n = length(ati); 
33 |  
34 |   range = 1:n;
35 |   
36 |   averages = cumsum(ati) ./ range';
37 | 
38 |   plot(range, averages - mean(ati), '-');  
39 |   
40 | end
41 | 
42 | konect_print(sprintf('plot/rating_evolution.b.%s.eps', network)); 
43 | 
44 | %
45 | % (a) - all
46 | %
47 | hold on; 
48 | 
49 | for k = 1:length(ids)
50 | 
51 |   i = ids(k);
52 | 
53 |   ati = data.T(find(data.T(:,2) == i), 3);
54 | 
55 |   n = length(ati); 
56 |  
57 |   range = 1:n;
58 |   
59 |   averages = cumsum(ati) ./ range';
60 | 
61 |   plot(range, averages, 'g-');  
62 |   
63 | end
64 | 
65 | konect_print(sprintf('plot/rating_evolution.a.%s.eps', network)); 
66 | 
67 | 


--------------------------------------------------------------------------------
/m/rating_evolution2.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot the rating evolution, i.e. the mean rating of each item in
 3 | % function of number of ratings.  Plot only the fast stuff.
 4 | %
 5 | % PARAMETERS 
 6 | %	$network
 7 | %
 8 | % INPUT FILES 
 9 | %	dat/data.$network.mat
10 | %
11 | % OUTPUT FILES 
12 | %	plot/rating_evolution.[c].$network.eps
13 | %
14 | 
15 | network = getenv('network');
16 | 
17 | bins = 10; 
18 | font_size = 22;
19 | line_width = 3; 
20 | 
21 | data = load(sprintf('dat/data.%s.mat', network));
22 | 
23 | ids = unique(data.T(:,2));
24 | 
25 | rating_min = min(data.T(:,3))
26 | rating_max = max(data.T(:,3)) 
27 | 
28 | sums   = zeros(bins, 0); 
29 | counts = zeros(bins, 0);
30 | 
31 | %
32 | % (c) - all
33 | %
34 | 
35 | for k = 1:length(ids)
36 | 
37 |   i = ids(k);
38 |   ati = data.T(find(data.T(:,2) == i), 3);
39 |   n = length(ati); 
40 |   range = 1:n;
41 |   average = cumsum(ati)' ./ range;
42 | 
43 |   if length(average) > size(sums,2)
44 |     sums     = [sums    , zeros(bins, length(average) - size(sums  , 2))]; 
45 |     counts   = [counts  , zeros(bins, length(average) - size(counts, 2))]; 
46 |   end  
47 | 
48 |   i_bin = 1 + floor(bins * (average(end) - rating_min) / (rating_max - rating_min)); 
49 |   if i_bin > 10, i_bin = 10; end
50 | 
51 |   rating_midbin = rating_min + (i_bin - 0.5) * (rating_max - rating_min) / bins;
52 | 
53 |   average = average - average(end) + rating_midbin; 
54 | 
55 |   sums(i_bin, 1 : length(average)) = sums(i_bin, 1 : length(average)) + average;
56 |   counts(i_bin, 1 : length(average)) = counts(i_bin, 1 : length(average)) + ones(size(average));   
57 |   
58 | end
59 | 
60 | hold on;
61 | 
62 | for i = 1 : bins
63 |   plot(1 : size(sums, 2), sums(i, :) ./ counts(i, :), '-', ...
64 |       'LineWidth', line_width); 
65 | end
66 | 
67 | set(gca, 'FontSize', font_size); 
68 | 
69 | konect_print(sprintf('plot/rating_evolution2.c.%s.eps', network)); 
70 | 


--------------------------------------------------------------------------------
/m/read_info.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Get information about one dataset.
 3 | %
 4 | % DEPRECATED - instead, load individual statistics from dat/statistic.$STATISTIC.$NETWORK
 5 | %
 6 | % RESULT 
 7 | %	A struct with the following fields
 8 | %		
 9 | %		n1,n2		Number of left/right nodes equal in
10 | %				unipartite networks
11 | %		n		Total number of edges
12 | %		lines		Number of edges, but mot counting
13 | %				multiple edges when they are
14 | %				aggregated
15 | %		rmn_		= r / (m*n)  [deprecated]
16 | %		format		as a number (see constants.m)
17 | %		weights		as a number (see constants.m)
18 | %
19 | % PARAMETERS 
20 | %	network		Dataset name
21 | %
22 | 
23 | function info = read_info(network)
24 | 
25 | network
26 | 
27 | info_data = load(sprintf('dat/info.%s', network));
28 | 
29 | consts = konect_consts(); 
30 | 
31 | info= struct(); 
32 | 
33 | info.n1 = 	info_data(1);
34 | info.n2 = 	info_data(2); 
35 | info.lines = 	info_data(3);
36 | info.rmn_ = 	info_data(4); 
37 | info.format = 	info_data(5); 
38 | info.weights = 	info_data(6); 
39 | 
40 | if info.format == consts.BIP
41 |     info.n = sum(info_data(1:2));
42 | else
43 |     assert (info_data(1) == info_data(2)); 
44 |     info.n = info_data(1);
45 | end
46 | 


--------------------------------------------------------------------------------
/m/read_meta.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Read metadata from a data file.
 3 | %
 4 | % RETURN VALUE 
 5 | %	A struct where field names are the keys and the values are
 6 | %	strings. In key names, '-' is replaced with '_'.
 7 | %
 8 | % PARAMETERS 
 9 | %	network		Network name
10 | %
11 | 
12 | function ret = read_meta(network)
13 | 
14 | filename = sprintf('uni/meta.%s', network); 
15 | 
16 | FILE = fopen(filename, 'r'); 
17 | 
18 | if FILE < 0, error('*** open'); end; 
19 | 
20 | ret = {};  
21 | 
22 | while 1
23 | 
24 |     line = fgetl(FILE);
25 | 
26 |     if line == -1; break; end; 
27 | 
28 |     tokens = regexp(line, '\s*([a-zA-Z0-9-]+)\s*:\s*(\S.*\S|\S)\s*', 'tokens', 'once'); 
29 | 
30 |     if length(tokens) < 2
31 |         continue;
32 |     end
33 | 
34 |     key   = tokens{1}; 
35 |     value = tokens{2};
36 | 
37 |     key = regexprep(key, '-', '_'); 
38 | 
39 |     ret.(key) = value;   
40 | 
41 | end;
42 | 
43 | if fclose(FILE) < 0, error('fclose'); end; 
44 | 
45 | 


--------------------------------------------------------------------------------
/m/read_statistic.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Read a network statistic.
 3 | %
 4 | % RETURN VALUES 
 5 | %	data	(k*) Vector of all computed values
 6 | %
 7 | % PARAMETERS  
 8 | %	statistic	Internal name of statistic
 9 | %	network		Internal name of network
10 | %	k		(optional) Number of values to return; by
11 | % 			default, return all values 
12 | %
13 | 
14 | function data = read_statistic(statistic, network, k)
15 | 
16 | data = load(sprintf('dat/statistic.%s.%s', statistic, network));
17 | 
18 | assert(size(data, 2) == 1); 
19 | 
20 | if (exist('k', 'var') == 1)
21 |   assert(length(data) >= k); 
22 |   data = data(:,1:k);
23 | end
24 | 


--------------------------------------------------------------------------------
/m/rmse_full.m:
--------------------------------------------------------------------------------
 1 | 
 2 | function ret = rmse_full(A, U, X, V)
 3 | 
 4 | if ~size(V)
 5 |     V= U; 
 6 | end
 7 | 
 8 | di = A - U * X * V'; 
 9 | 
10 | ret = sum(sum(conj(di) .* di)) / prod(size(A))
11 | 
12 | if isnan(ret) | isinf(ret), error('***'); end
13 | 


--------------------------------------------------------------------------------
/m/rmse_latent.m:
--------------------------------------------------------------------------------
 1 | 
 2 | function ret = rmse_latent(a1, a2, a3, U, X, V)
 3 | 
 4 | if ~size(v)
 5 |     v= u; 
 6 | end
 7 | 
 8 | sum = 0;
 9 | 
10 | for i = 1 : size(a1,1)
11 | 
12 |     pred = U(a1(i),:) * X * V(a2(i),:)';
13 |   
14 |     sum = sum + (abs(pred - a3(i)))^2; 
15 | 
16 | end
17 | 
18 | ret = sqrt(sum / size(a1,1));
19 | 
20 | if isnan(ret) | isinf(ret), error('***'); end
21 | 


--------------------------------------------------------------------------------
/m/runtime.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Analyse runtime.
 3 | %
 4 | % OUTPUT 
 5 | % 	dat/runtime
 6 | % 	plot/runtime.eps
 7 | %
 8 | % INPUT 
 9 | %       tmp.runtime
10 | %
11 | 
12 | a = load('tmp.runtime')
13 | 
14 | % regression
15 | c = [-log(a(:,1))]
16 | d = [ones(size(a,1),1)  log(a(:,2) + a(:,3)) log(a(:,4)) log(a(:,2) .* a(:,3))]
17 | x = pinv(d) * c
18 | kp = exp(- d * x);
19 | 
20 | [a(:,1) kp]
21 | 
22 | % curve fitting
23 | b = [a(:,1) ((a(:,2) + a(:,3)).^x(2) .* a(:,4).^x(3) .* (a(:,2).*a(:,3)).^x(4))]
24 | 
25 | i_squ = find(((a(:,2) - a(:,3)) ./ a(:,2)) <  .002)
26 | i_rec = find(((a(:,2) - a(:,3)) ./ a(:,2)) >= .002)
27 | 
28 | loglog(b(i_squ,2), b(i_squ,1), 'ob');
29 | hold;
30 | loglog(b(i_rec,2), b(i_rec,1), 'or');
31 | print('-depsc', 'plot/runtime.eps');  close all;
32 | 
33 | save -ascii 'dat/runtime' x;
34 | 


--------------------------------------------------------------------------------
/m/shrinkingdiversity.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Test the hypothesis of shrinking diversity
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORKS	Space-separated list of networks
 6 | %	$STATISTIC
 7 | %	$TYPE		"split" or "full"
 8 | %
 9 | % INPUT 
10 | %	dat/trend.$TYPE.$STATISTIC.$NETWORK.mat
11 | %		For each $STATISTIC in $STATISTICS
12 | %
13 | % OUTPUT 
14 | %	dat/shrinkingdiversity.$TYPE.$STATISTIC		Unspecified content (evaluated by hand)
15 | %
16 | 
17 | networks = getenv('NETWORKS');  networks = regexp(networks, '[a-zA-Z0-9_-]+', 'match')
18 | statistic = getenv('STATISTIC'); 
19 | type = getenv('TYPE'); 
20 | 
21 | alpha = 0.05;
22 | 
23 | updown_statistic = get_updown_statistic(); 
24 | updown_statistic_i = updown_statistic.(statistic); 
25 | 
26 | % Each column is a network
27 | % Rows correspond to those in dat/trend.*
28 | data = [];
29 | 
30 | n = length(networks) 
31 | 
32 | for i = 1 : n
33 |     network = networks{i}
34 | 
35 |     data_i = load(sprintf('dat/trend.%s.%s.%s.mat', type, statistic, network));
36 | 
37 |     data = [ data [ data_i.H ; data_i.updown ] ]; 
38 | end
39 | 
40 | going_up   = data(2,:) > 0; 
41 | going_down = data(2,:) < 0; 
42 | k_up =   sum(going_up)		% Number of networks where statistic goes up
43 | k_down = sum(going_down)	% Number of networks where statistic goes down
44 | 
45 | 
46 | p_up   = betainc(0.5, k_up  , n - k_up   + 1)	% p-value for hypothesis of going up
47 | p_down = betainc(0.5, k_down, n - k_down + 1)	% p-value for hypothesis of going down
48 | H_up   = p_up   < alpha 			% whether the going-up hypothesis is validated
49 | H_down = p_down < alpha 			% whether the going-down hypothesis is validated
50 | 
51 | if updown_statistic_i > 0
52 |     H = H_up
53 | else
54 |     H = H_down
55 | end
56 | 
57 | %
58 | % Save
59 | %
60 | OUT = fopen(sprintf('dat/shrinkingdiversity.%s.%s', type, statistic), 'w');
61 | fprintf(OUT, '%u\n%u\n%u\n%g\n%u\n%g\n%u\n%u\n', ...
62 |         k_up, k_down, n, p_up, H_up, p_down, H_down, H); 
63 | if fclose(OUT); error '*** fclose'; end;
64 | 
65 | 


--------------------------------------------------------------------------------
/m/sne.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Spectral network evolution.
 3 | %
 4 | % Both decompositions need not be of the same size. 
 5 | %
 6 | % V and source_V are [] for symmetric decompositions. 
 7 | %
 8 | % RESULT 
 9 | %	dd_new		Predicted new eigenvalues
10 | %
11 | % PARAMETERS 
12 | %	source_U		Decomposition of source set 
13 | %	source_dd
14 | %	source_V
15 | %	U,dd,V			Decomposition of training set
16 | %	func			
17 | %		(optional) Function mapping the scalar product of eigenvectors to a
18 | %		weight; defaults to f(x) = x. 
19 | %
20 | 
21 | function [dd_new] = sne(source_U, source_dd, source_V, ...
22 |                         U, dd, V, func)
23 | 
24 | if ~exist('func', 'var')
25 |     func = @(x)(x); 
26 | end
27 | 
28 | k = size(U,2); 
29 | k_old = size(source_U,2); 
30 | 
31 | asymmetric = size(V); 
32 | 
33 | dd_old = dd; 
34 | 
35 | for i = 1:k
36 | 
37 |     d_sum = 0; 
38 |     weight_sum = 0;
39 | 
40 |     for j = 1:k_old
41 |         weight_u = source_U(:,j)' * U(:,i); 
42 | 
43 |         if asymmetric
44 |             weight_v = source_V(:,j)' * V(:,i);  
45 |         end
46 | 
47 |         if asymmetric
48 |             weight = func(weight_u * weight_v); 
49 |         else
50 |             weight = func(weight_u ^2); 
51 |         end
52 | 
53 |         weight_sum = weight_sum + weight;
54 |         d_sum = d_sum + weight * source_dd(j); 
55 |     end
56 | 
57 |     if weight_sum == 0
58 |         weight_sum = 1; 
59 |     end
60 | 
61 |     dd_old(i) = d_sum / weight_sum;     
62 | 
63 | end
64 | 
65 | dd_new = dd - dd_old; 
66 | 


--------------------------------------------------------------------------------
/m/spectral_diagonality_test.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the spectral diagonality test matrix.
 3 | %
 4 | % This is the matrix \Delta in Jérôme's PhD thesis.
 5 | %
 6 | % PARAMETERS 
 7 | %	decomposition			The decomposition that is used 
 8 | %	decomposition_source		The actual decomposition of the source matrix
 9 | %		.U, .D, .V (optional)
10 | %	A_target			The target matrix
11 | %	format				Format of the network
12 | %	enable_self			Mode (optional)
13 | %		0			(default) Use base matrix
14 | %		1			Use the same matrix 
15 | %
16 | 
17 | function [Delta] = spectral_diagonality_test(decomposition, decomposition_source, ...
18 |                                              A_target, format, enable_self)
19 | 
20 | if ~exist('enable_self', 'var')
21 |     enable_self = 0; 
22 | end
23 | 
24 | enable_self 
25 | 
26 | data_decomposition = konect_data_decomposition(decomposition);
27 | 
28 | size_A_target = size(A_target)
29 | if enable_self
30 |     A_target = konect_matrix(decomposition, A_target, format); 
31 | else % base
32 |     A_target = prepare_matrix_target(decomposition, A_target, format); 
33 | end
34 | size_A_target = size(A_target)
35 | 
36 | 
37 | U = decomposition_source.U;
38 | V = decomposition_source.V; 
39 | 
40 | size_U = size(U)
41 | size_V = size(V) 
42 | 
43 | if length(V) > 0 && size(U,1)+size(V,1) == size(A_target,1) && size(A_target,1) == size(A_target,2);
44 |     U = [U ; V]; 
45 |     V = []; 
46 | end
47 | 
48 | if length(V)
49 | 
50 |     if data_decomposition.o
51 |         u_i = U'; 
52 |         v_i = V'; 
53 |     else
54 |         u_i = konect_xpinv(U);
55 |         v_i = konect_xpinv(V);
56 |     end
57 | 
58 |     if strcmp(decomposition, 'skew')
59 |         Delta = u_i * A_target * v_i' - v_i * A_target * u_i'; 
60 |     else
61 |         size_u_i = size(u_i)
62 |         size_v_i = size(v_i)
63 |         size_A_target = size(A_target)
64 |             
65 |         Delta = u_i * A_target * v_i';
66 |     end
67 | 
68 | else
69 | 
70 |     if data_decomposition.o
71 |         u_i = U'; 
72 |     else
73 |         u_i = konect_xpinv(U);
74 |     end
75 |     
76 |     Delta = u_i * A_target * u_i'; 
77 | 
78 | end
79 | 
80 | 


--------------------------------------------------------------------------------
/m/spectral_extrapolation.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Visualization of the spectral extrapolation method.
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK
 6 | %	$DECOMPOSITION
 7 | % 
 8 | % INPUT 
 9 | %	dat/decomposition_time.split.$DECOMPOSITION.$NETWORK.mat
10 | %	dat/steps.$NETWORK.mat
11 | % 
12 | % OUTPUT 
13 | %	plot/spectral_extrapolation.$DECOMPOSITION.$NETWORK.eps 
14 | %
15 | 
16 | marker_size = 10; 
17 | 
18 | network = getenv('NETWORK');
19 | decomposition = getenv('DECOMPOSITION'); 
20 | 
21 | data_decomposition = load(sprintf('dat/decomposition_time.split.%s.%s.mat', decomposition, network)); 
22 | steps_data = load(sprintf('dat/steps.%s.mat', network)); 
23 | 
24 | decompositions = data_decomposition.decompositions; 
25 | r = data_decomposition.r; 
26 | 
27 | steps_source = steps_data.steps_source; 
28 | steps_training = steps_data.steps_source + steps_data.steps_target; 
29 | steps_all = steps_data.steps_all; 
30 | e_steps = steps_data.e_steps; 
31 | 
32 | is_asymmetric = size(decompositions(end).V); 
33 | 
34 | 
35 | %
36 | % Spectrum over time
37 | %
38 | 
39 | hold on;
40 | 
41 | for k = 1 : r
42 |     spectrum = zeros(prod(size(e_steps)),1); 
43 |     
44 |     for l = 1 : prod(size(decompositions))
45 |         % Each decomposition may have a different size.  
46 |         if k <= size(decompositions(l).D, 1)
47 |             spectrum(l) = decompositions(l).D(k,k);
48 |         else
49 |             spectrum(l) = NaN; % No k'th eigenvalue at this timepoint
50 |         end
51 |     end
52 | 
53 |     spectrum = spectrum_visualize(spectrum, decomposition); 
54 | 
55 |     plot(e_steps, spectrum, '.b', 'MarkerSize', marker_size); 
56 | end
57 | 
58 | 
59 | %
60 | % Extrapolation lines 
61 | %
62 | D_source = decompositions(steps_source).D;
63 | U_source = decompositions(steps_source).U; 
64 | V_source = decompositions(steps_source).V; 
65 | 
66 | D_target = decompositions(steps_training).D; 
67 | U_target = decompositions(steps_training).U;
68 | V_target = decompositions(steps_training).V; 
69 | 
70 | dd_diff_squ = sne(U_source, diag(D_source), V_source, U_target, diag(D_target), V_target, @(x)(x)); 
71 | 
72 | dd_new_squ = dd_diff_squ + diag(D_target) 
73 | 
74 | dd_old_squ = - dd_diff_squ + diag(D_target)
75 | 
76 | for k = 1 : r
77 |     if size(D_target, 1) >= k
78 |         plot(steps_data.e_steps([steps_source steps_training steps_all]), ...
79 |              real([dd_old_squ(k) D_target(k,k) dd_new_squ(k)]), 'o--', 'Color', [0 0 0], 'LineWidth', 2); 
80 |     end
81 | end
82 | 
83 | konect_print(sprintf('plot/spectral_extrapolation.%s.%s.eps', decomposition, network));
84 | 


--------------------------------------------------------------------------------
/m/spectrum_visualize.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Convert a spectrum to real values that can be visualized.
 3 | %
 4 | % In all cases we take simply the real part, except for skew
 5 | % decompositions, where we take the imaginary part.  (A more complex
 6 | % example would be the complex logarithm for orthogonal matrices, whose
 7 | % eigenvalues are unitary.)
 8 | %
 9 | % RESULT 
10 | %	ret		Real values 
11 | %
12 | % PARAMETERS 
13 | %	spectrum	Complex spectrum to visualize
14 | %	decomposition	Decomposition
15 | %
16 | 
17 | function ret = spectrum_visualize(spectrum, decomposition)
18 | 
19 | data_decomposition = konect_data_decomposition(decomposition); 
20 | 
21 | if data_decomposition.i
22 | %if strcmp(decomposition, 'skew') | strcmp(decomposition
23 | 
24 |   ret = imag(spectrum); 
25 | 
26 | else
27 | 
28 |   ret = real(spectrum); 
29 | 
30 | end
31 | 
32 | 


--------------------------------------------------------------------------------
/m/statistic_avgdegree.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the average degree statistic of a network. 
 3 | %
 4 | % PARAMETERS 
 5 | %	$network 
 6 | %
 7 | % INPUT 
 8 | %	dat/statistic.volume.$network
 9 | %	dat/statistic.size.$network 
10 | %	dat/statistic.format.$network 
11 | %
12 | % OUTPUT 
13 | %	dat/statistic.avgdegree.$network
14 | %		The meaning of the values is the same as that described
15 | % 		in konect-toolbox/m/konect_statistic_avgdegree.m 
16 | %
17 | 
18 | network = getenv('network');
19 | 
20 | consts = konect_consts(); 
21 | 
22 | format long; 
23 | 
24 | values_m = load(sprintf('dat/statistic.volume.%s', network)); 
25 | m = values_m(1); 
26 | 
27 | values_n = load(sprintf('dat/statistic.size.%s', network)); 
28 | n= values_n(1); 
29 | 
30 | values= 2 * m / n; 
31 | 
32 | value_format = load(sprintf('dat/statistic.format.%s', network)); 
33 | 
34 | if value_format == consts.BIP
35 | 
36 |     assert(length(values_n) == 3); 
37 | 
38 |     n1= values_n(2);
39 |     n2= values_n(3); 
40 | 
41 |     assert(n == n1 + n2); 
42 | 
43 |     values(2) = m / n1;
44 |     values(3) = m / n2;
45 | 
46 | elseif value_format == consts.SYM || value_format == consts.ASYM
47 | 
48 |     assert(length(values_n) == 1); 
49 | 
50 | else
51 |     
52 |     error('*** invalid format'); 
53 | 
54 | end
55 | 
56 | values = values';
57 | 
58 | save(sprintf('dat/statistic.avgdegree.%s', network), 'values', ...
59 |      '-ascii', '-double'); 
60 | 
61 | 


--------------------------------------------------------------------------------
/m/statistic_comp.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute a network statistic using Matlab. 
 3 | %
 4 | % PARAMETERS 
 5 | %	$network 
 6 | %	$statistic
 7 | %
 8 | % INPUT 
 9 | %	dat/data.$NETWORK.mat
10 | %	dat/info.$NETWORK
11 | %	dat/meansi.$NETWORK.mat 
12 | %
13 | % OUTPUT 
14 | %	dat/statistic.$STATISTIC.$NETWORK
15 | %		Text file with one number per line, the first being the statistic
16 | %		itself and the other lines being additional values such as the error
17 | %		on the value.  As a last value, the runtime is added. 
18 | %
19 | 
20 | network = getenv('network');
21 | statistic = getenv('statistic'); 
22 | 
23 | format long; 
24 | 
25 | data = load(sprintf('dat/data.%s.mat', network)); 
26 | T = data.T; 
27 | 
28 | info = read_info(network); 
29 | 
30 | means = load(sprintf('dat/meansi.%s.mat', network)); 
31 | T = konect_normalize_additively(T, means); 
32 | 
33 | A = konect_spconvert(T, info.n1, info.n2); 
34 | 
35 | t0 = cputime;
36 | values = konect_statistic(statistic, A, info.format, info.weights);
37 | t1 = cputime;
38 | runtime = t1 - t0; 
39 | values = [full(values) ; runtime]; 
40 | 
41 | % The first value must not be NaN -- other can to denote that they don't apply 
42 | if isnan(values(1))
43 | %%if sum(isnan(values)) ~= 0
44 |   values
45 |   error('*** NaN in statistic computation'); 
46 | end
47 | 
48 | save(sprintf('dat/statistic.%s.%s', statistic, network), 'values', ...
49 |      '-ascii', '-double'); 
50 | 


--------------------------------------------------------------------------------
/m/statistic_comp_spectral.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute a spectral network statistic for a full dataset.
 3 | %
 4 | % PARAMETERS 
 5 | %	$statistic
 6 | %	$decomposition
 7 | %	$network
 8 | %
 9 | % INPUT 
10 | %	dat/info.$network
11 | %	dat/decomposition.$decomposition.$network.mat
12 | %
13 | % OUTPUT 
14 | %	dat/statistic.$statistic.$network
15 | %		Same format as generated by statistic_comp.m
16 | %
17 | 
18 | network = getenv('network');
19 | statistic = getenv('statistic');
20 | decomposition = getenv('decomposition');
21 | 
22 | info = read_info(network); 
23 | 
24 | data_decomposition = load(sprintf('dat/decomposition.%s.%s.mat', decomposition, network)); 
25 | 
26 | values = statistic_spectral(statistic, data_decomposition.D, data_decomposition.n);
27 | 
28 | save(sprintf('dat/statistic.%s.%s', statistic, network), 'values', '-ascii'); 
29 | 


--------------------------------------------------------------------------------
/m/statistic_diameff.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Computer effective diameter. 
 3 | %
 4 | % PARAMETERS 
 5 | %	$network
 6 | %	$percentile   (between 01 and 99)
 7 | %
 8 | % INPUT FILES 
 9 | %	dat/hopdistr.$network
10 | %
11 | % OUTPUT FILES 
12 | %	dat/statistic.diameff$percentile.$network
13 | %
14 | 
15 | network = getenv('network'); 
16 | 
17 | percentile_text = getenv('percentile'); 
18 | percentile = str2double(percentile_text);
19 | if isnan(percentile)
20 |   fprintf(2, '*** Invalid percentile\n');
21 |   exit(1);
22 | end
23 | 
24 | % Sanity check:  if the percentile is in the range [0...1], then that
25 | % likely is an error. 
26 | assert(percentile >= 1 && percentile < 100);
27 | 
28 | data = load(sprintf('dat/hopdistr.%s', network));
29 | 
30 | x = konect_diameff(data, percentile / 100)
31 | 
32 | assert(length(x) == 1); 
33 | 
34 | save(sprintf('dat/statistic.diameff%s.%s', percentile_text, network), 'x', '-ascii');
35 | 


--------------------------------------------------------------------------------
/m/statistic_full_prefatt.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the 'prefatt' statistic (preferential attachment exponent),
 3 | % by reading the 'dat/pa.%.mat' files. 
 4 | %
 5 | % We only compute this for temporal networks, even though
 6 | % non-temporal networks also have a SPLIT, but since that split is
 7 | % random, it wouldn't be interesting. 
 8 | %
 9 | % PARAMETERS 
10 | %	$network
11 | %
12 | % INPUT FILES
13 | %	dat/pa.$network.mat
14 | %
15 | % OUTPUT FILES
16 | % 	dat/statistic.prefatt.$network
17 | %		[1] normal \beta
18 | %		[2] normal mse
19 | %		[3] long-tail \beta
20 | %		[4] long-tail mse
21 | %
22 | 
23 | function statistic_full_prefatt
24 | 
25 | network = getenv('network'); 
26 | 
27 | if has_timestamps(network)
28 | 
29 |     pa = load(sprintf('dat/pa.%s.mat', network))
30 |     pa.pa.a
31 | 
32 |     % Take V if it exists, because it is the 'passive' side.
33 |     % Otherwise take A. 
34 |     if isfield(pa.pa, 'v')
35 |         values = get_values(pa.pa.v); 
36 |     else
37 |         values = get_values(pa.pa.a); 
38 |     end
39 | 
40 | else
41 | 
42 |     values = [ NaN ; NaN ; NaN ; NaN ];
43 | 
44 | end
45 | 
46 | % The values are a column vector 
47 | [m n] = size(values);
48 | assert(n == 1); 
49 | 
50 | save(sprintf('dat/statistic.prefatt.%s', network), 'values', '-ascii'); 
51 | 
52 | end
53 | 
54 | function [ret] = get_values(vect)
55 | 
56 | ret = [ NaN ; NaN ; NaN ; NaN ]; 
57 | 
58 | ret(1) = vect.e(1);
59 | 
60 | if ret(1) > 0
61 |     ret(2) = exp(sqrt(vect.e(3))); 
62 | else 
63 |     ret(2) = NaN;
64 | end
65 | 
66 | ret(3) = vect.g(1);
67 | 
68 | if ret(3) > 0
69 |     ret(4) = exp(sqrt(vect.g(3))); 
70 | else
71 |     ret(3) = NaN; 
72 | end
73 | 
74 | end
75 | 


--------------------------------------------------------------------------------
/m/statistic_lines.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the number of lines in the definition of a network, i.e.,
 3 | % the number of lines in the out.* file.  This is different from the
 4 | % volume for networks with multiple edges without timestamps, because
 5 | % these networks aggregate multiple edges into one line. 
 6 | %
 7 | % PARAMETERS 
 8 | %	$network 
 9 | %
10 | % INPUT 
11 | %	dat/info.$network
12 | %
13 | % OUTPUT 
14 | %	dat/statistic.lines.$network
15 | %
16 | 
17 | network = getenv('network');
18 | 
19 | info = read_info(network); 
20 | 
21 | consts = konect_consts(); 
22 | 
23 | values = [ info.lines ]; 
24 | 
25 | save(sprintf('dat/statistic.lines.%s', network), 'values', '-ascii'); 
26 | 


--------------------------------------------------------------------------------
/m/statistic_meandist.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Computer the average distance in a network. 
 3 | %
 4 | % PARAMETERS 
 5 | %	$network
 6 | %
 7 | % INPUT FILES 
 8 | %	dat/hopdistr.$network
 9 | %
10 | % OUTPUT FILES 
11 | %	dat/statistic.meandist.$network
12 | %
13 | 
14 | network = getenv('network'); 
15 | 
16 | data = load(sprintf('dat/hopdistr.%s', network));
17 | 
18 | x = konect_diammean(data);
19 | 
20 | save(sprintf('dat/statistic.meandist.%s', network), 'x', '-ascii');
21 | 


--------------------------------------------------------------------------------
/m/statistic_size.m:
--------------------------------------------------------------------------------
 1 | DEPRECATED:::
 2 | 
 3 | %
 4 | % Compute the size statistic of a network. 
 5 | %
 6 | % PARAMETERS 
 7 | %	$network 
 8 | %
 9 | % INPUT 
10 | %	dat/info.$network
11 | %
12 | % OUTPUT 
13 | %	dat/statistic.size.$network
14 | %
15 | 
16 | network = getenv('network');
17 | 
18 | info = read_info(network); 
19 | 
20 | consts = konect_consts(); 
21 | 
22 | if info.format == consts.BIP
23 | 
24 |   values = [ info.n1 + info.n2 ; info.n1 ; info.n2 ]; 
25 | 
26 | else
27 | 
28 |   % TODO:  for ASYM networks, also output the number of nodes with
29 |   % nonzero number of outlinks and inlinks.  
30 | 
31 |   values = [ info.n1 ]; 
32 | 
33 | end
34 | 
35 | OUT= fopen(sprintf('dat/statistic.size.%s', network), 'w');
36 | if OUT < 0, 
37 |     error('fopen');
38 | end
39 | 
40 | fprintf(OUT, '%u\n', values);
41 | 
42 | if 0 > fclose(OUT)
43 |     error('fclose'); 
44 | end
45 | 


--------------------------------------------------------------------------------
/m/statistic_spectral.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute a spectral statistic.  A spectral statistic is one that
 3 | % depends on the rank-r spectrum (and not just on the first or second
 4 | % eigenvalues). 
 5 | %
 6 | % RESULT 
 7 | %	values		Column vector of values
 8 | %
 9 | % PARAMETERS 
10 | %	D		(r*r) Eigenvalues or equivalent
11 | %	statistic
12 | %	n		Number of nodes
13 | %
14 | 
15 | function values = statistic_spectral(statistic, D, n)
16 | 
17 | if strcmp(statistic, 'network_rank_abs')
18 | 
19 |     dd = abs(diag(D)); 
20 |     values = sum(dd) / dd(1); 
21 | 
22 | elseif strcmp(statistic, 'network_rank_norm')
23 | 
24 |     values = sum(abs(diag(D))); 
25 | 
26 | elseif strcmp(statistic, 'network_rank_norm4')
27 | 
28 |     values = sum(abs(diag(D)) .^ 4); 
29 | 
30 | elseif strcmp(statistic, 'epower')
31 | 
32 |     values = estimate_power_law(abs(diag(D))); 
33 | 
34 | elseif strcmp(statistic, 'entropy') | strcmp(statistic, 'entropyn')
35 | 
36 |     values = konect_normalized_entropy(abs(diag(D))); 
37 | 
38 | elseif strcmp(statistic, 'aredis')
39 | 
40 |     epsilon = 1e-11; 
41 |   
42 |     dd = diag(D);
43 |     dd(dd < epsilon) = 0; 
44 |     dd = dd .^ -1;
45 |     dd(isinf(dd)) = 0;
46 |     values = [ sum(dd) ];   
47 | 
48 |     values = values * 2 / n; 
49 | 
50 | elseif strcmp(statistic, 'oddcycles')
51 | 
52 |     dd = diag(D);
53 | 
54 |     alpha = 1 / max(abs(dd)); 
55 | 
56 |     x = alpha * dd;
57 | 
58 |     oddcycles = sum(sinh(dd)) / sum(exp(dd)); 
59 |     oddcycles_2 = sum(sinh(x)) / sum(exp(x)); 
60 |     oddcycles_3 = sum(x(2:end) ./ (1 - x(2:end) .^ 2)) / sum((-x(2:end) + 1) .^ -1); 
61 | 
62 |     values = [ oddcycles; oddcycles_2; oddcycles_3 ]; 
63 | 
64 | else
65 |     error(sprintf('*** Invalid spectral statistic %s', statistic)); 
66 | end
67 | 


--------------------------------------------------------------------------------
/m/statistic_time_diam.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Computer the "diam" statistic from the hop distribution.
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK
 6 | %	$TYPE		"full" or "split"
 7 | %
 8 | % INPUT 
 9 | %	dat/hopdistr_time.$TYPE.$NETWORK
10 | %
11 | % OUTPUT 
12 | %	dat/statistic_time.$TYPE.diam.$NETWORK
13 | %
14 | 
15 | network = getenv('NETWORK');
16 | type = getenv('TYPE');
17 | 
18 | data = load(sprintf('dat/hopdistr_time.%s.%s', type, network)); 
19 | 
20 | ret = [];
21 | 
22 | for i = 1 : size(data,1)
23 |     line = data(i, :); 
24 |     line = data(find(data > 0)); 
25 |     values = []; 
26 |     values(1) = length(line); 
27 |     values(2) = konect_diameff(line, 0.9); 
28 |     values(3) = konect_diameff(line, 0.5); 
29 |     values(4) = konect_diammean(line); 
30 |     ret = [ret ; values];
31 | end
32 | 
33 | save(sprintf('dat/statistic_time.%s.diam.%s', type, network), 'ret', '-ascii'); 
34 | 
35 | 


--------------------------------------------------------------------------------
/m/statistic_time_slice.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Generate 'slice' from a temporal statistic.
 3 | %
 4 | % PARAMETERS
 5 | %	$statistic	Name of the statistic
 6 | %	$network	Name of the network
 7 | %	$type
 8 | %	$K		The slide ID, i.e., column number
 9 | %
10 | % INPUT FILES
11 | %	dat/statistic_time.$type.$statistic.$network
12 | %		The temporal statistics:  one timepoint per line, each
13 | % 		line contains multiple numbers, the $K'th number being
14 | % 		the substatistic we are interested in
15 | %
16 | % OUTPUT FILES
17 | %	dat/statistic_time.$type.${statistic}+${K}.$network
18 | %		A file containing only a single column, taken from the
19 | % 		input file
20 | %
21 | 
22 | statistic = getenv('statistic')
23 | network   = getenv('network')
24 | type      = getenv('type')
25 | k_text    = getenv('K')
26 | 
27 | k = str2num(k_text)
28 | 
29 | assert(k >= 2); 
30 | 
31 | data = load(sprintf('dat/statistic_time.%s.%s.%s', type, statistic, network));
32 | 
33 | size_data = size(data) 
34 | assert(size_data(1) >= 10);
35 | assert(size_data(2) >= k); 
36 | 	    
37 | data_k = data(:,k);
38 | 
39 | save(sprintf('dat/statistic_time.%s.%s+%s.%s', type, statistic, k_text, network), ...
40 |      'data_k', ...
41 |      '-ascii');
42 | 


--------------------------------------------------------------------------------
/m/statistic_time_spectral.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute a spectral network statistic over time on either the full or the split dataset. 
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK	The network; must have timestamps
 6 | %	$STATISTIC	The statistic
 7 | %	$DECOMPOSITION
 8 | %	$TYPE		The data to use, "full" or "split"
 9 | %
10 | % INPUT 
11 | %	dat/info.$NETWORK
12 | %	dat/stepsi.$NETWORK (only full)
13 | %	dat/steps.$NETWORK.mat (only split) 
14 | %	dat/decomposition_time.$TYPE.$DECOMPOSITION.$NETWORK.mat 
15 | %
16 | % OUTPUT 
17 | %	dat/statistic_time.$TYPE.$STATISTIC.$NETWORK	
18 | %		All statistics as text.  One timepoint per line.  Each
19 | %		line contains the statistics, with the first number
20 | %		being the main statistic. 
21 | %
22 | 
23 | network = getenv('NETWORK'); 
24 | statistic = getenv('STATISTIC'); 
25 | decomposition = getenv('DECOMPOSITION'); 
26 | type = getenv('TYPE');
27 | is_split = strcmp(type, 'split'); 
28 | 
29 | info = read_info(network); 
30 | 
31 | if ~is_split
32 |     e_steps = load(sprintf('dat/stepsi.%s', network)); 
33 | else 
34 |     steps = load(sprintf('dat/steps.%s.mat', network)); 
35 |     e_steps = steps.e_steps; 
36 | end
37 | 
38 | data_decomposition = load(sprintf('dat/decomposition_time.%s.%s.%s.mat', type, decomposition, network)); 
39 | 
40 | ret = []; 
41 | 
42 | for k = 1 : prod(size(e_steps))
43 | 
44 |     values = statistic_spectral(statistic, data_decomposition.decompositions(k).D, data_decomposition.decompositions(k).n); 
45 | 
46 |     if sum(size(ret)) ~= 0
47 |         ret = [ret zeros(1, size(values,1) - size(ret, 2))];
48 |     end
49 |     ret = [ret ; values'];
50 | 
51 | end
52 | 
53 | save(sprintf('dat/statistic_time.%s.%s.%s', type, statistic, network), 'ret', '-ascii');
54 | 


--------------------------------------------------------------------------------
/m/statistics_time_plot.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Aggregate plot of all statistics over time.
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK
 6 | %	$TYPE		"full" or "split"
 7 | %	$STATISTICS	Space-separated list of statistics
 8 | %
 9 | % INPUT 
10 | %	dat/statistic_time.$TYPE.$STATISTIC.$NETWORK
11 | %		for each $STATISTICS in $STATISTICS
12 | %
13 | % OUTPUT 
14 | %	plot/statistics_time.[a].$TYPE.$NETWORK.eps
15 | %
16 | 
17 | network = getenv('NETWORK');
18 | type = getenv('TYPE'); 
19 | statistics = getenv('STATISTICS'); 
20 | 
21 | statistics = regexp(statistics, '\S+', 'match')
22 | 
23 | datas = []; 
24 | legends = []; 
25 | 
26 | for k = 1:size(statistics, 2)
27 |     statistic = statistics(k)
28 |     statistic = statistic{:}
29 | 
30 |     data = load(sprintf('dat/statistic_time.%s.%s.%s', type, statistic, network));
31 |     data = data(:,1); 
32 | 
33 |     % Normalize
34 |     part = data(10:end); 
35 |     i = min(part); 
36 |     a = max(part); 
37 |     data = (data - i) / (a - i); 
38 | 
39 |     datas = [ datas  data ];
40 | 
41 |     legends = [ legends ; cellstr(konect_label_statistic(statistic, 'matlab-short')) ]; 
42 | end
43 | 
44 | colors= [0  0  0;
45 |          1  0  0;
46 |          0  1  0;
47 |          0  0  1;
48 |          .5 0  0; 
49 |          0  .5 0; 
50 |          .7 .7 0;
51 |          1  0  1;
52 |          0  1  1 ];
53 | 
54 | set(0,'DefaultAxesColorOrder', colors, 'DefaultAxesLineStyleOrder','-|--|-.')
55 | 
56 | plot(1 : size(datas, 1), datas); 
57 | 
58 | axis([ 0 size(datas,1) 0 1]); 
59 | 
60 | xlabel(konect_label_statistic('volume', 'matlab')); 
61 | 
62 | legend(legends, 'Location', 'EastOutside'); 
63 | 
64 | konect_print(sprintf('plot/statistics_time.a.%s.%s.eps', type, network)); 
65 | 


--------------------------------------------------------------------------------
/m/steps.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute steps for time-dependent analysis of a split dataset.  
 3 | %
 4 | % The time steps are not exactly all over the same number of edges.
 5 | % Instead, they are fudged so as to put steps on the moment the two
 6 | % splits happen. 
 7 | %
 8 | % PARAMETERS 
 9 | %	$network		Network
10 | %
11 | % INPUT 
12 | %	dat/split.$network.mat	
13 | %
14 | % OUTPUT 
15 | %	dat/steps.$network.mat 		Steps
16 | %		steps_all		Total number of steps ( = size(e_steps, 1))
17 | %		steps_source		Number of source steps
18 | %		steps_target 		Number of target steps
19 | %		e_steps			(count×1) Edge count at each step
20 | %
21 | 
22 | network = getenv('network'); 
23 | 
24 | count = 100; 
25 | 
26 | split = load(sprintf('dat/split.%s.mat', network)); 
27 | means = load(sprintf('dat/means.%s.mat', network)); 
28 | 
29 | % Edge counts
30 | e_source = size(split.T_source, 1)
31 | e_target = size(split.T_target, 1)
32 | e_test   = size(split.T_test  , 1) 
33 | 
34 | % Last index in the source and target set
35 | index_source = floor(e_source * count / (e_source + e_target + e_test))
36 | index_target = floor((e_source + e_target) * count / (e_source + e_target + e_test))
37 | 
38 | e_steps = [ floor((1 : index_source) * e_source / index_source)  ...
39 |             (e_source + floor((1 : index_target - index_source) * e_target / (index_target - index_source)))  ...
40 |             (e_source + e_target + floor(( 1 : count - index_target) * e_test / (count - index_target))) ]'
41 | 
42 | steps_all = count
43 | steps_source = index_source
44 | steps_target = index_target - index_source
45 | 
46 | save(sprintf('dat/steps.%s.mat', network), '-v7.3', ...
47 |   'steps_all', 'steps_source', 'steps_target', ...
48 |   'e_steps'); 
49 | 


--------------------------------------------------------------------------------
/m/stepsi.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Compute the steps
 3 | %
 4 | % PARAMETERS 
 5 | %	$network
 6 | %
 7 | % INPUT 
 8 | %	dat/info.$network
 9 | %
10 | % OUTPUT 
11 | %	dat/stepsi.$network
12 | %		The list of step values as text, with one number per line
13 | %
14 | 
15 | count = 100; 
16 | 
17 | network = getenv('network'); 
18 | 
19 | info = read_info(network); 
20 | 
21 | stepsi_data = floor((1:count) * info.lines / count); 
22 | 
23 | filename = sprintf('dat/stepsi.%s', network);
24 | 
25 | FILE = fopen(filename, 'w'); 
26 | 
27 | if FILE < 0, error; end; 
28 | 
29 | fprintf(FILE, '%u\n', stepsi_data); 
30 | 
31 | if fclose(FILE) < 0, error; end; 
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/m/styles_method.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Style info by methods.
 3 | %
 4 | % RESULT 
 5 | %	colors			Struct by method
 6 | %		.(method)	(1*3) Color
 7 | %	line_styles		Struct by method
 8 | %		.(method)	(string) line style
 9 | %	markers			Struct by method
10 | %		.(method)	(string) marker 
11 | %
12 | 
13 | function [colors line_styles markers] = styles_method()
14 | 
15 | colors = struct();
16 | line_styles = struct();
17 | markers = struct(); 
18 | 
19 | colors.svd		= [1  0  0 ];    line_styles.svd	= '-';   markers.svd		= 'o';
20 | colors.diag		= [0  1  0 ];    line_styles.diag	= '--';  markers.diag		= 's';
21 | colors.takane		= [0  0  1 ];    line_styles.takane	= ':';   markers.takane		= '*';
22 | colors.dedicom1u	= [1  0  1 ];	 line_styles.dedicom1u	= '-';   markers.dedicom1u	= '+';
23 | colors.dedicom1v	= [0  1  1 ];    line_styles.dedicom1v	= '--';  markers.dedicom1v	= 'x';
24 | colors.dedicom2		= [.7 0  0 ];    line_styles.dedicom2	= ':';   markers.dedicom2	= 'd';
25 | colors.dedicom2s	= [0  .7 0 ];    line_styles.dedicom2s	= '-';   markers.dedicom2s	= 'h';
26 | colors.dedicom3		= [0  0  .7];    line_styles.dedicom3	= '--';  markers.dedicom3	= 'p';
27 | colors.sym		= [.7 .7 0 ]; 
28 | colors.pref		= [.7 0  .7];
29 | colors.neib		= [0  .7 .7];
30 | 


--------------------------------------------------------------------------------
/m/time_degree.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Draw overlay of degree distributions over time. 
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK
 6 | %	$RANDOMIZE	'1' to randomize order of edges 
 7 | %
 8 | % INPUT 
 9 | %	dat/split.$NETWORK.mat
10 | %
11 | % OUTPUT 
12 | %	plot/time_degree.$NETWORK.eps
13 | %
14 | 
15 | consts = konect_consts(); 
16 | 
17 | network = getenv('NETWORK'); 
18 | randomize = getenv('RANDOMIZE'); 
19 | randomize = strcmp(randomize, '1'); 
20 | 
21 | info = read_info(network); 
22 | 
23 | split = load(sprintf('dat/split.%s.mat', network)); 
24 | 
25 | T = [split.T_source; split.T_target; split.T_test]; 
26 | 
27 | s = size(T,1)
28 | 
29 | if randomize
30 |     p = randperm(s);
31 |     T(p, :); 
32 | end
33 | 
34 | steps = [round(s/3), round(s * 2/3), s]
35 | 
36 | cm = ...
37 |     [232  43 235;
38 |      35  76 191;
39 |      67 177  58] / 255;		     
40 | 
41 | point_styles = [cellstr('o'), cellstr('x'), cellstr('s')]; 
42 | 		     
43 | hold on; 
44 | 
45 | for i = 1 : prod(size(steps))
46 | 
47 |     A = konect_spconvert(T(1:steps(i), :), split.n1, split.n2); 
48 | 
49 |     if info.format == consts.SYM
50 |         A = A + A'; 
51 |     end  
52 | 
53 |     degree = full(sum(A,2));
54 |     [counts, ids] = sort(degree);
55 |     maxcount = counts(end-0);  
56 |     freq = histc(counts, 0:maxcount);
57 | 
58 |     nz = freq ~= 0; 
59 |     ra = 0:maxcount; 
60 |     ra = ra(nz);
61 |     fq = freq(nz); 
62 | 
63 |     point_style = point_styles(i); 
64 |     point_style = point_style{:};
65 | 
66 |     loglog(ra, fq, point_style, 'Color', cm(i,:));
67 | end
68 | 
69 | xlabel('Number of neighbors (n)');
70 | ylabel('Frequency'); 
71 | 
72 | set(gca, 'XScale', 'log');
73 | set(gca, 'YScale', 'log');
74 | 
75 | legend([cellstr('|E| = 1/3 |Eall|'), cellstr('|E| = 2/3 |Eall|'), cellstr('|E| = 3/3 |Eall|')]); 
76 | 
77 | extra = '';
78 | if randomize
79 |     extra = '.rand'; 
80 | end
81 | 	      	   
82 | konect_print(sprintf('plot/time_degree.%s%s.eps', network, extra));  
83 | 
84 | 


--------------------------------------------------------------------------------
/m/time_xaxis.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Make the X axis labels by correct for year numbers. 
 3 | %
 4 | % DEPRECATED -- use time_xaxis_unix.m 
 5 | %
 6 | % PARAMETERS 
 7 | %	t_min, t_max	Minimum and maximum values to be plotted, in
 8 | %			year numbers 
 9 | %
10 | 
11 | function time_xaxis(t_min, t_max)
12 | 
13 | NUM = 7; % Maximum number of ticks to show on the date axis (X axis)
14 | 
15 | months = {'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', ...
16 |           'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'}; 
17 | 
18 | if t_max - t_min < 1 / 6
19 |     r = (ceil(t_min / (24 * 60 * 60)) : floor(t_max / (24 * 60 * 60))) / 365.25 + 1970
20 |     length_r = length(r)
21 |     if length(r) > 0
22 |         'aaa'
23 |         for i = 1 : length(r)
24 |             l{i} = sprintf('Day %d', i);
25 |         end
26 |         l
27 |         set(gca, 'XTick', r, 'XTickLabel', l); 
28 |     end
29 | 
30 | elseif t_max - t_min < (1/12) * NUM
31 |     r = (ceil(t_min * 12) / 12):(1/12):(floor(t_max * 12) / 12); 
32 |     set(gca, 'XTick', r);
33 |     rr = ceil(t_min * 12) : floor(t_max * 12);
34 |     assert(length(r) == length(rr));
35 |     assert(length(r) > 0); 
36 |     for i = 1 : length(r)
37 |         if (mod(i,12) == 0)
38 |             text = round(r(i));
39 |         else
40 |             month = mod(i,12);
41 |             text = months{month + 1}; 
42 |         end
43 |         l{i} = text;
44 |     end
45 |     set(gca, 'XTickLabel', l); 
46 | 
47 | elseif t_max - t_min < (1/6) * NUM
48 |     r = (ceil(t_min * 6) / 6):(1/6):(floor(t_max * 6) / 6); 
49 |     set(gca, 'XTick', r);
50 |     rr = ceil(t_min * 6) : floor(t_max * 6);
51 |     assert(length(r) == length(rr));
52 |     assert(length(r) > 0); 
53 |     for i = 1 : length(r)
54 |         if (mod(i,6) == 0)
55 |             text = round(r(i));
56 |         else
57 |             month = 2 * mod(i,6);
58 |             text = months{month + 1}; 
59 |         end
60 |         l{i} = text;
61 |     end
62 |     set(gca, 'XTickLabel', l); 
63 | 
64 | elseif t_max - t_min < 1 * NUM
65 |     set(gca, 'XTick', ceil(t_min:floor(t_max)));
66 | elseif t_max - t_min < 2 * NUM
67 |     set(gca, 'XTick', ceil(t_min:2:floor(t_max)));
68 | elseif t_max - t_min < 5 * NUM
69 |     set(gca, 'XTick', ceil(t_min:5:floor(t_max)));
70 | elseif t_max - t_min < 10 * NUM
71 |     set(gca, 'XTick', ceil(t_min:10:floor(t_max)));
72 | elseif t_max - t_min < 20 * NUM
73 |     set(gca, 'XTick', ceil(t_min:20:floor(t_max)));
74 | elseif t_max - t_min < 50 * NUM
75 |     set(gca, 'XTick', ceil(t_min:50:floor(t_max)));
76 | elseif t_max - t_min < 100 * NUM
77 |     set(gca, 'XTick', ceil(t_min:100:floor(t_max)));
78 | end
79 | 
80 | 


--------------------------------------------------------------------------------
/m/trend.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Estimate the trend in the time evolution of a statistic.
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK
 6 | %	$STATISTIC
 7 | %	$TYPE
 8 | %
 9 | % INPUT 
10 | %	dat/statistic_time.$TYPE.$STATISTIC.$NETWORK
11 | %
12 | % OUTPUT 
13 | %	dat/trend.$TYPE.$STATISTIC.$NETWORK.mat
14 | %		H	1 when the trend is significant, 0 otherwise (regardless of direction)
15 | %		p 	p value (regardless of direction)
16 | %		updown	Direction of trend: +1 up, -1 down
17 | %		range	Range of time values used (in 1..100)
18 | %		values	Values used
19 | %
20 | 
21 | network = getenv('NETWORK');
22 | statistic = getenv('STATISTIC');
23 | type = getenv('TYPE') 
24 | 
25 | data = load(sprintf('dat/statistic_time.%s.%s.%s', type, statistic, network)); 
26 | 
27 | alpha = 0.05; 
28 | 
29 | if strcmp(type, 'full')
30 |   first = floor(size(data,1) / 2);
31 | elseif strcmp(type, 'split')
32 |   steps = load(sprintf('dat/steps.%s.mat', network))
33 |   first = 1 + steps.steps_source;
34 | else
35 |   error '***'; 
36 | end
37 | 
38 | range = first:size(data,1);
39 | 
40 | values = data(range, 1)
41 | 
42 | [ H p ] = Mann_Kendall(values, alpha)
43 | 
44 | % New code for estimating updown:  Linear least squares on ranks
45 | if size(values,1) > size(values,2), values = values'; end; 
46 | [x i] = sort(values) ; 
47 | X = i / [ 1 : length(values) ; ones(1, length(values)) ]; 
48 | updown = sign(X(1))
49 | 
50 | % Old code for estimating updown:  sign of difference of sum between first and second half of values
51 | %l = floor(length(values) / 2)
52 | %values_begin = values(1:l)
53 | %values_end   = values(end:-1:end-l+1)
54 | %updown = sign(sum(values_end) - sum(values_begin))
55 | 
56 | save(sprintf('dat/trend.%s.%s.%s.mat', type, statistic, network), '-v7.3', ...
57 |   'H', 'p', 'updown', 'range', 'values');
58 | 


--------------------------------------------------------------------------------
/m/trend_plot.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot the network evolution together with the p-value and tendency. 
 3 | %
 4 | % PARAMETERS 
 5 | %	$NETWORK
 6 | %	$STATISTIC
 7 | %	$TYPE		full or split
 8 | %
 9 | % INPUT 
10 | %	dat/trend.$TYPE.$STATISTIC.$NETWORK.mat
11 | %
12 | % OUTPUT 
13 | %	plot/trend.$TYPE.$STATISTIC.$NETWORK.eps
14 | %
15 | 
16 | network = getenv('NETWORK');
17 | statistic = getenv('STATISTIC');
18 | type = getenv('TYPE');
19 | 
20 | line_width = 5; 
21 | font_size = 86; 
22 | 
23 | data = load(sprintf('dat/trend.%s.%s.%s.mat', type, statistic, network)); 
24 | 
25 | updown_statistic = get_updown_statistic(); 
26 | 
27 | shrinking = data.H & data.updown == updown_statistic.(statistic) 
28 | 		    
29 | if shrinking
30 |     color_test = [ 0 .7 0 ]; 
31 | else
32 |     color_test = [ .7 0 0 ];
33 | end
34 | 
35 | plot(data.range, data.values, '-', 'LineWidth', line_width, 'Color', color_test); 
36 | 
37 | set(gca, 'XTick', [], 'YTick', []); 
38 | 
39 | axis tight;
40 | 
41 | if shrinking
42 |     ax = axis(); 
43 |     text(ax(2), ax(3), sprintf('%.3g', data.p), 'VerticalAlignment', 'bottom', 'HorizontalAlignment', 'right', 'FontSize', font_size); 
44 | end
45 | 
46 | konect_print(sprintf('plot/trend.%s.%s.%s.eps', type, statistic, network)); 
47 | 
48 | 


--------------------------------------------------------------------------------
/m/zipf.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Draw Zipf plots.  This is the transpose of the BIDD plot, i.e., of
 3 | % the cumulative degree distribution.  
 4 | % 
 5 | % PARAMETERS 
 6 | %	$network
 7 | %
 8 | % INPUT 
 9 | %	dat/data.$network.mat
10 | %
11 | % OUTPUT 
12 | %	plot/zipf.[auv].$network.dat
13 | %		a - total
14 | %		u,v - Row/column based (only BIP and ASYM)
15 | %
16 | 
17 | network = getenv('network');
18 | 
19 | data = load(sprintf('dat/data.%s.mat', network)); 
20 | 
21 | T = data.T; 
22 | 
23 | consts = konect_consts(); 
24 | 
25 | info = read_info(network); 
26 | 
27 | if info.weights ~= consts.POSITIVE & size(T,2) >= 3
28 |     T(:,3:end) = []; 
29 | end
30 | 
31 | %
32 | % U, V
33 | %
34 | if info.format ~= consts.SYM
35 | 
36 |     if size(T,2) >= 3
37 |         q = T(:,3);
38 |     else
39 |         q = []; 
40 |     end
41 |     
42 |     zipf_one(T(:,1), q, 'u');
43 |     konect_print(sprintf('plot/zipf.u.%s.eps', network)); 
44 | 
45 |     zipf_one(T(:,2), q, 'v'); 
46 |     konect_print(sprintf('plot/zipf.v.%s.eps', network)); 
47 | end
48 | 
49 | 
50 | %
51 | % A
52 | %
53 | 
54 | if info.format == consts.BIP
55 |     m = max(T(:,1)); 
56 |   
57 |     p = [ T(:,1) ; T(:,2)+m ]; 
58 |     if size(T,2) >= 3
59 |         q = [ T(:,3) ; T(:,3) ]; 
60 |     else  
61 |         q = []; 
62 |     end
63 | else
64 |     p = [ T(:,1) ; T(:,2) ]; 
65 |     if size(T,2) >= 3
66 |         q = [ T(:,3) ; T(:,3) ]; 
67 |     else  
68 |         q = []; 
69 |     end
70 | end
71 | 
72 | zipf_one(p, q, 'a'); 
73 | konect_print(sprintf('plot/zipf.a.%s.eps', network)); 
74 | 


--------------------------------------------------------------------------------
/m/zipf_one.m:
--------------------------------------------------------------------------------
 1 | %
 2 | % Plot one Zipf plot.
 3 | %
 4 | % PARAMETERS 
 5 | %	p	(e*1) Node indexes
 6 | %	q	(e*1) Multiplicities; [] to denote all ones
 7 | %	letter
 8 | %
 9 | 
10 | function zipf_one(p, q, letter)
11 | 
12 | font_size = 24; 
13 | marker_size = 13; 
14 | point_style = '.'; 
15 | 
16 | colors = konect_colors_letter();
17 | 
18 | if length(q) == 0
19 |     q = 1; 
20 | end
21 | 
22 | degrees = full(sparse(p, 1, q, max(p), 1)); 
23 | 
24 | degrees = degrees(find(degrees)); 
25 | 
26 | [~,i] = sort(-degrees);
27 | 
28 | degrees = degrees(i); 
29 | 
30 | loglog(1:length(degrees), degrees, point_style, 'Color', colors.(letter), 'MarkerSize', marker_size); 
31 | 
32 | set(gca, 'FontSize', font_size); 
33 | 
34 | xlabel('Rank (i)', 'FontSize', font_size);
35 | ylabel('Degree (d(i))', 'FontSize', font_size); 
36 | 
37 | ax = axis()
38 | 
39 | set(gca, 'XMinorTick', 'on');
40 | set(gca, 'YMinorTick', 'on'); 
41 | set(gca, 'TickLength', [0.05 0.05]); 
42 | 
43 | % Workaround for Matlab bug. Otherwise, the minor ticks are not visible. 
44 | ax = axis(); 
45 | if ax(1) > 0 & ax(3) > 0 
46 |   set(gca, 'XTick', 10 .^ (ceil(log(ax(1)) / log(10)):floor(log(ax(2)) / log(10)))); 
47 |   set(gca, 'YTick', 10 .^ (ceil(log(ax(3)) / log(10)):floor(log(ax(4)) / log(10)))); 
48 | end
49 | 


--------------------------------------------------------------------------------
/mns:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Execute Stu parallely, dividing used memory by K, and limiting
 4 | # runtime of processes.  Also, be NICE.
 5 | #
 6 | # In order to limit the memory used by KONECT processes, use
 7 | #
 8 | #    ulimit -v SIZE
 9 | #
10 | # where SIZE is the amount of memory that should be used, in kilobytes.  
11 | #
12 | # INVOCATION 
13 | #	$0 [K] args...
14 | #		K is the number of processes to run in parallel; if not
15 | #		given, defaults to a single process 
16 | #
17 | # PARAMETERS
18 | #	$MN_RUNTIME	Maximal runtime in hours; if not set, don't
19 | #			limit runtime 
20 | #
21 | 
22 | K=1
23 | 
24 | [ "$1" ] && echo "$1" | grep '^[0-9]*$' >/dev/null && {
25 | 	K=$1
26 | 	shift 
27 | }
28 | 
29 | rm -f error.log 
30 | 
31 | if [ "$MN_RUNTIME" ] ; then 
32 | 	RUNTIME_S=$((MN_RUNTIME * 3600))
33 | 	ulimit -t "$RUNTIME_S"
34 | fi 
35 | 
36 | exec nice sh/mem "$K" stu -j "$K" "$@"
37 | 


--------------------------------------------------------------------------------
/octave:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Execute an Octave script.  This is a thin wrapper around Octave that
 4 | # is used in KONECT Analysis, but is not necessarily specific to it.  It
 5 | # is also exported to other projects. 
 6 | #
 7 | # ARGUMENTS
 8 | # 	$1	filename of script to execute, usually a .m script 
 9 | #
10 | # OUTPUT FILES 
11 | #	error.log	Errors from all runs of this script are appended
12 | #	                to this file  
13 | #
14 | 
15 | SCRIPT=$1
16 | 
17 | [ "$PREFIX" ] && PREFIX=."$PREFIX"
18 | LOGNAME=$(basename "$SCRIPT" .m)
19 | 
20 | # Error log
21 | exec 6>>error.log
22 | 
23 | # Words to use in the logfile are the lowercase environment variables.
24 | # This may break in some cases. 
25 | 
26 | for NAME in $(set | sed -e 's,=.*$,,;tnext;d;:next;/^[a-z][a-z_-]*$/!d') ; do
27 | 	eval VALUE=\"\$"$NAME"\"
28 | 	length=$(echo $(echo "$VALUE" | wc -c))
29 | 	if [ "$length" -lt 30 ] ; then # Don't insert value if too long
30 | 		LOGNAME=$LOGNAME.$VALUE
31 | 	fi
32 | done
33 | 
34 | export LOG="${TMPDIR-/tmp}/o.$LOGNAME$PREFIX.log"
35 | printf >&2 '\t%s\n' "$LOG"
36 | 
37 | ## #
38 | ## # Reset locale because Octave outputs some localized number formats with the
39 | ## # wrong locale.  
40 | ## #
41 | ## unset $(set | sed -E 's,^(LC_.*)=.*$,\1,;t;d')
42 | ## set | grep -E '^LC' >&2
43 | 
44 | #
45 | # Invocation of Octave:
46 | #
47 | # -q	Quiet
48 | #
49 | # Note: --no-window-system is not supported by Octave 3.0 
50 | #
51 | 
52 | DIR_SCRIPT=$(dirname "$SCRIPT")
53 | if echo "$DIR_SCRIPT" | grep -vq '^/' ; then
54 | 	DIR_SCRIPT="$PWD/$DIR_SCRIPT"
55 | fi
56 | 
57 | OCTAVE_PATH="$OCTAVE_PATH:$MATLABPATH"
58 | OCTAVE_PATH="$DIR_SCRIPT:$OCTAVE_PATH"
59 | export OCTAVE_PATH
60 | exec 3>$LOG 
61 | GNUTERM=dumb DISPLAY= octave -q -W --no-gui --no-window-system "$SCRIPT" "$LOG" >&3 2>&1  ||
62 | {
63 | 	# The error messages of Octave don't conform to a well-defined
64 | 	# standard, and usually don't even show the location first.
65 | 	# Therefore, we first extract all "location" lines, and then
66 | 	# output the full log. 
67 | 
68 | 	<"$LOG" sed >&2 -E -e '
69 | 		s|^error:\s*(.*) at line ([0-9]+), column ([0-9]+)\s*$|\1:\2:\3:|;t
70 | 		s,^parse error near line ([0-9]+) of file (.+)$,\2:\1:,;t
71 | 		d
72 | 	'
73 | 
74 |  	<"$LOG" sed >&2 -n -E -e '
75 | 		/^(error:|parse error)/,$p
76 | 	'
77 | 
78 | 	echo >&2 "*** error in $LOG"
79 | 	echo >&6 "*** error in $LOG"
80 | 
81 | 	exit 1
82 | }
83 | 
84 | echo >&3 '=== FINISHED SUCCESSFULLY ==='
85 | 
86 | exit 0
87 | 


--------------------------------------------------------------------------------
/pl/Konect.pm:
--------------------------------------------------------------------------------
 1 | package Konect; 
 2 | 
 3 | use strict;
 4 | use warnings; 
 5 | 
 6 | # Can be exported
 7 | our @EXPORT_OK = qw( bitwidth );
 8 | 
 9 | # Exported by default 
10 | our @EXPORT    = qw( bitwidth );
11 | 
12 | #
13 | # Given a number of nodes, return the bitwidth character for the
14 | # corresponding unsigned type.  
15 | #
16 | sub bitwidth($) {
17 |     my ($N) = @_;
18 |     
19 |     my $ret = 'a';
20 | 
21 |     while ($N > 1) {
22 | 	$N = int(sqrt($N)); 
23 | 	++$ret; 
24 |     }
25 | 
26 |     return $ret; 
27 | }
28 | 
29 | 1;
30 | 


--------------------------------------------------------------------------------
/pl/README:
--------------------------------------------------------------------------------
1 | This directory (pl/) contains Perl modules.  Perl scripts are not here
2 | but in sh/ instead.
3 | 


--------------------------------------------------------------------------------
/pl/TexToHtml/Converter.pm:
--------------------------------------------------------------------------------
 1 | package TexToHtml::Converter;
 2 | sub to_text{
 3 | 	shift();
 4 | 	my $string=shift();
 5 | 
 6 | 	#dash
 7 | 	$string=~s/--/&ndash;/g;
 8 | 	
 9 | 	#remove curly braces
10 | 	$string=~s/\{\\[^{]+\{(.*)\}\}/$1/g;
11 | 	
12 | 	$string=~s/[{}]//g;
13 | 	$string=~s/``([^']*)''''/"$1"/g;
14 | 	return $string;
15 | 
16 | }
17 | sub convert{
18 | 	shift();
19 | 	my $string=shift();
20 | 	#generate links
21 | 	$string=~s/\\url\{([^}]*)\}/<a href="$1">$1<\/a>/gi;
22 | 
23 | 	#dash
24 | 	$string=~s/--/&ndash;/g;
25 | 	
26 | 	
27 | 	#remove curly braces
28 | 	$string=~s/\{\\[^{]+\{(.*)\}\}/$1/g;
29 | 	
30 | 	$string=~s/[{}]//g;
31 | 	return $string;
32 | 
33 | }
34 | 
35 | 
36 | return 1;
37 | 


--------------------------------------------------------------------------------
/runtime.source:
--------------------------------------------------------------------------------
 1 | %
 2 | % output of svds.  k m n r
 3 | %
 4 | %= dblp-cite		105	 12561	 12563	  49779
 5 | %= advogato      	152 	  7385 	  7385	  57627
 6 | %= hep-th-citations	 59	 27770	 27766	 352807
 7 | %= slashdot-zoo		 30	 71523	 71523	 488440
 8 | %= epinions		 16     131828	131827   841372
 9 | %= movielens		163	  6040	  3706	1000209
10 | %= bx			 10	105283	340532	1149780
11 | %= www			 17	325729  325729	1497135
12 | %= citeulike-tags	  8	731769	153277	2411819
13 | %= dblp-author		  1	660822 1099443  2773008
14 | %= wiki-en-cat		  2    1853493  182947	3795796
15 | %= trec-wt10g		  1    1601787 1601772  8063026
16 | %= filmtipset		 21      64051   49087 14385828
17 | %= libimseti		  7	220970  220962 17359346 
18 | %%= patentcite		  0     3774768 3774680 16522438
19 | 
20 | 


--------------------------------------------------------------------------------
/sh/README:
--------------------------------------------------------------------------------
1 | This directory contains scripts, i.e. files that begin with '#!'.  These
2 | are shell scripts, Perl 5 scripts, awk scripts, Sed scripts, etc. 
3 | 


--------------------------------------------------------------------------------
/sh/category:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Filter networks by their category.
 4 | #
 5 | # PARAMETERS 
 6 | #	$category	Name of the category
 7 | #
 8 | # STDIN 
 9 | #	One network name per line
10 | #
11 | # STDOUT 
12 | #	One network name per line, containing only those networks that
13 | #	are in the given category 
14 | #
15 | 
16 | while read network
17 | do
18 |     if grep -qE '^\s*category\s*:\s*'"$category"'\s*$' uni/meta."$network"
19 |     then
20 | 	echo "$network"
21 |     fi
22 | done
23 | 


--------------------------------------------------------------------------------
/sh/check:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Check all files in uni/ for consistency at the file level, i.e., encoding, etc.
 4 | #
 5 | 
 6 | for file in uni/*
 7 | do
 8 | 
 9 |     echo Checking "$file"...
10 |     if ! isutf8 $file ; then
11 | 	exit 1
12 |     fi
13 | 
14 | done
15 | 
16 | exit 0
17 | 


--------------------------------------------------------------------------------
/sh/checkmeta:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | # 
 3 | # Check that the meta.* files are correct. 
 4 | #
 5 | # PARAMETERS 
 6 | #	$NETWORKS 	List of networks to check 
 7 | #
 8 | 
 9 | [ "$verbose" ] && exec 4>&2 || exec 4>/dev/null 
10 | 
11 | found_error=0
12 | 
13 | # $1:  file in which the error occurred
14 | # other arguments:  error message
15 | error()
16 | {
17 | 	found_error=1
18 | 	f="$1"
19 | 	shift
20 | 	echo "Error in file $f: $@" >&2
21 | }
22 | 
23 | FILES="$(sed -E -e 's,^,uni/meta.,' dat/NETWORKS)"   
24 | 
25 | CODES=
26 | CODESANDFILES=
27 | 
28 | MANDATORY="name code category entity-names relationship-names"
29 | 
30 | for file in $FILES 
31 | do
32 | 	echo >&4 "Checking $f ..."
33 | 
34 | 	# Field names must be followed by ':' and not '=' 
35 | 	grep -Eq '^[^:=]*=' "$file" && error "$f" '*** Field names must be followed by colon (:), not an equal sign (=)'
36 | 
37 | 	# Duplicate codes
38 | 	CODE="$(grep 'code' $file | sed -n -re 's/^\s*code\s*:\s*([a-zA-Z0-9\@]+)\s*$/\1/p')"
39 | 	[ "$CODE" ] || { error "$file" '*** No code given'; continue; }
40 | 	[ "$(echo $CODESANDFILES | egrep '(^| )'$CODE':' )" ] && { error "$file" "*** Uses same code ($CODE) as file $(echo $CODESANDFILES | egrep -o '(^| )'$CODE':[^ ]+' | cut -d':' -f2- )"; continue;  } 
41 | 	CODESANDFILES="$CODESANDFILES $CODE:$file"
42 | 
43 | 	# Mandatory fields 
44 | 	for field in $MANDATORY ; do
45 | 		if ! grep -Eq '^\s*'"$field"'\s*:' "$file" ; then
46 | 			error "$file" "Field '$field' missing"
47 | 		fi
48 | 	done
49 | done
50 | 
51 | # Check that all meta.* files have a corresponding out.* file
52 | for file in uni/meta.* ; do
53 | 	out="$(echo "$file" | sed -E -e 's,^uni/meta,uni/out,')"
54 | 	[ -r "$out" ] || {
55 | 		echo >&2 "*** Missing file '$out' corresponding to file '$file'"
56 | 		exit 1
57 | 	}
58 | done
59 | 
60 | exit "$found_error"
61 | 
62 | 


--------------------------------------------------------------------------------
/sh/dep-network:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Output the dependencies of a network. 
 4 | #
 5 | # The list of dependencies is output in the order in which it is build,
 6 | # which is the same order as things are defined in the KONECT Handbook.  
 7 | #
 8 | # PARAMETERS
 9 | #	$network
10 | #	$plots_only	Non-empty when only plots should be output 
11 | #
12 | # STDOUT
13 | #	The list of dependencies in Stu format
14 | #
15 | # INPUT FILES
16 | #	dat/dep.statistic.all.$network
17 | #	dat/dep.decomposition.all.$network
18 | #	dat/dep.plot.all.$network
19 | #
20 | 
21 | set -e
22 | 
23 | #
24 | # Tests
25 | #
26 | 
27 | echo @check."$network"
28 | 
29 | #
30 | # Data files 
31 | #
32 | 
33 | echo @tsv."$network"
34 | 
35 | #
36 | # Statistics
37 | #
38 | 
39 | cat dat/dep.statistic.all."$network"
40 | 
41 | #
42 | # Features
43 | #
44 | 
45 | #
46 | # Decompositions
47 | #
48 | 
49 | cat dat/dep.decomposition.all."$network"
50 | 
51 | #
52 | # Plots
53 | #
54 | 
55 | cat dat/dep.plot.all."$network"
56 | 


--------------------------------------------------------------------------------
/sh/depc:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Compute C/C++ dependencies.
 4 | #
 5 | # INVOCATION 
 6 | #	$0 $FILENAME
 7 | #
 8 | # STDOUT
 9 | #	The needed files, one per line
10 | #
11 | 
12 | set -e
13 | 
14 | TMPDIR="${TMPDIR:-/tmp}"
15 | TMPFILE="$TMPDIR/depc.$$"
16 | 
17 | echo "$1" >"$TMPFILE"
18 | 
19 | while :
20 | do
21 | 	rm -f "$TMPFILE".new
22 | 	r=0
23 | 	for file in $(cat "$TMPFILE")
24 | 	do
25 | 		r=1
26 | 		echo "$file"
27 | 		if [ -e "$file" ]; then
28 | 			<"$file" >>"$TMPFILE".new sed -E -e 's,^\s*#\s*include\s+"(.*)",c/\1,;t;d'
29 | 		fi
30 | 	done
31 | 	if [ "$r" = 0 ]; then  exit 0;  fi
32 | 	cp "$TMPFILE".new "$TMPFILE" 
33 | done |
34 | sort -u 
35 | 


--------------------------------------------------------------------------------
/sh/eps2png:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Convert an EPS file to PNG
 4 | #
 5 | # INVOCATION
 6 | #
 7 | #	$0 $filename_eps $filename_png
 8 | #
 9 | 
10 | inkscape -z "$1" -e "$2"
11 | 


--------------------------------------------------------------------------------
/sh/group:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Get the group of a statistic, i.e., the string describing the set
 4 | # of networks that apply to it.
 5 | #
 6 | # INVOCATION 
 7 | #	$0 $STATISTIC
 8 | #
 9 | # INPUT FILES
10 | #	konect-toolbox/m/konect_statistic_$statistic_base.m 
11 | #
12 | # STDOUT
13 | #	The group name is output.  Nothing is output when no group is
14 | #	known. 
15 | #
16 | 
17 | if [ -z "$1" ] ; then
18 | 	echo >&2 "*** $0:  first argument missing"
19 | 	exit 1
20 | fi
21 | 
22 | statistic="$1"
23 | 
24 | statistic_base="$(echo "$statistic" | sed -E -e 's,\+[0-9]+$,,')"
25 | 
26 | suffix="$(echo "$statistic" | sed -E -e 's,^.*\+,+,;t;d')"
27 | 
28 | file="konect-toolbox/m/konect_statistic_$statistic_base.m"
29 | 
30 | if [ ! -e "$file" ] ; then
31 | 	echo >&2 "*** $0:  file '$file' not found"
32 | 	exit 1
33 | fi
34 | 
35 | suffix_regexp="$(echo "$suffix" | sed -E -e 's,\+,\\+,')"
36 | 
37 | {
38 | 	# Take the first non-empty of the following two:
39 | 
40 | 	# Substatistic-specific group
41 | 	sed -E -e 's,^.*GROUP'"$suffix_regexp"'\s*:\s*([^ 	]+)\s*$,\1,;t;;d' "$file" | 
42 | 	tr a-z A-Z
43 | 
44 | 	# Statistic-specific group
45 | 	sed -E -e 's,^.*GROUP\s*:\s*([^ 	]+)\s*$,\1,;t;;d' "$file" | 
46 | 	tr a-z A-Z
47 | } |
48 | sed -E -e '/^\s*$/d' | sed -E -e '1!d'
49 | 
50 | exit 0
51 | 


--------------------------------------------------------------------------------
/sh/intersect:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Compute the intersection of two network lists.
 4 | #
 5 | # INVOCATION
 6 | #
 7 | # 	$0 FILE-1 FILE-2
 8 | #
 9 | 
10 | set -e
11 | 
12 | if [ -z "$1" -o -z "$2" ] ; then
13 | 	echo >&2 '*** Error in invocation'
14 | 	exit 1
15 | fi
16 | 
17 | tmp1="${TMPDIR:-/tmp/}/$$.intersect.1"
18 | tmp2="${TMPDIR:-/tmp/}/$$.intersect.2"
19 | 
20 | sort -k 1b,1 "$1" >"$tmp1"
21 | sort -k 1b,1 "$2" >"$tmp2"
22 | 
23 | join "$tmp1" "$tmp2" | 
24 | sh/sort-networks
25 | 


--------------------------------------------------------------------------------
/sh/listempty:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | # 
 3 | # Output the name of empty plot/*.eps files.  
 4 | #
 5 | # Some plot/*.eps are erroneous:  Matlab crashes while generating them,
 6 | # usually because there is too much data to plot.  This results in a
 7 | # segmentation violation in Matlab which cannot be catched in Matlab
 8 | # code.  This script finds such EPS files and outputs their names.
 9 | #
10 | 
11 | [ "$verbose" ] && exec 4>&2 || exec 4>/dev/null
12 | 
13 | find plot/ -name '*.eps' |
14 | while read -r file
15 | do
16 | 	echo >&4 Trying "'$file'"
17 | 
18 | 	if tail -1 "$file" | grep -Flvq '%%EOF' 
19 | 	then
20 | 		echo "$file"
21 | 	fi
22 | done
23 | 


--------------------------------------------------------------------------------
/sh/mem:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Divide the 'ulimit -v' value by SIZE and execute COMMAND.  ulimit -v
 4 | # must be set to a finite value, or be 'unlimited'.
 5 | #
 6 | # 'ulimit -v' is used to limit the amount of memory usable by each
 7 | # process.  
 8 | #
 9 | # INVOCATION 
10 | # 
11 | #	$0 SIZE COMMAND
12 | #
13 | 
14 | size=$1
15 | shift
16 | 
17 | mem=$(echo $(ulimit -v))
18 | 
19 | if [ "$mem" = unlimited ]
20 | then
21 | 	# There is no limit -- just execute the command
22 | 	exec "$@"
23 | else
24 | 	ulimit -v "$((mem / size))"
25 | 	exec "$@"
26 | fi
27 | 


--------------------------------------------------------------------------------
/sh/mkdatasetlist:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Generate list of networks.
 4 | #
 5 | # INPUT FILES
 6 | #	uni/*
 7 | #
 8 | # STDOUT
 9 | #	One network per line:
10 | #
11 | #	NETWORK <space> FORMAT <space> WEIGHTS <space> TIMESTAMPS <space> BYTES
12 | #
13 | #	TIMESTAMPS is t for timestamps and 0 for no timestamps 
14 | #	BYTES is the size in bytes.
15 | #	Entries are not sorted. 
16 | #
17 | # ENVIRONMENT 
18 | #
19 | #	$verbose	Set to non-empty for verbose mode
20 | #
21 | 
22 | [ "$verbose" ] && exec 4>&2 || exec 4>/dev/null 
23 | 
24 | echo uni/out.* | sed -e 's| \{1,\}|\
25 | |g' |
26 | while IFS= read -r file
27 | do
28 | 	echo >&4 "file='$file'"
29 | 	name="$(echo "$file" | sed -e 's,^uni/out\.,,')"
30 | 	echo >&4 "name='$name'"
31 | 	
32 | 	# Name, format and weight
33 | 	printf '%s' "$name $(head -1 "$file" | sed -E -e 's,^.* ([^ ]+) .*$,\1,') $(head -1 "$file" | sed -E -e 's,^.*\s([^ ]+)\s*$,\1,') "
34 | 
35 | 	# Timestamps
36 | 	if [ $(head "$file" | tail -1 | wc -w) -gt 3 ]
37 | 	then
38 | 		printf t
39 | 	else
40 | 		printf 0
41 | 	fi
42 | 	printf ' ' 
43 | 
44 | 	# Bytes
45 | 	ls -lH -- "$file" | cut -d ' ' -f 5 
46 | done
47 | 


--------------------------------------------------------------------------------
/sh/mkdownloadlist:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Determine the list of networks that can be downloaded, on stdout.
 4 | #
 5 | # The output preserves the ordering from dat/NETWORKS. 
 6 | #
 7 | # INPUT FILES
 8 | #	dat/NETWORKS		Complete list of networks
 9 | #	DOWNLOAD_PATTERNS	Patterns that are allowed
10 | #
11 | 
12 | sed -E -e '
13 | /^\s*#/d
14 | /^\s*$/d
15 | s,$\s+,,g
16 | s,\s+$,,g
17 | s,\*,.*,g
18 | s,^,^,
19 | s,$,$,
20 | ' DOWNLOAD_PATTERNS |
21 | grep -E -f - dat/NETWORKS
22 | 


--------------------------------------------------------------------------------
/sh/mkmissing:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | #
 3 | # DEPRECATED -- this is deprecated because we now use Stu, which can do
 4 | # this on its own. 
 5 | # 
 6 | # This script determines which files for a Makefile target are missing
 7 | # and invokes make on each one of them.
 8 | #
 9 | # INVOCATION 
10 | #
11 | # 	./mkmissing.pl "make arguments" "make invoke command"
12 | #
13 | # where the parameters are:
14 | #	make arguments: make options and target
15 | #		e.g., "-f Makefile.dataset datasets.make"
16 | #	make invoke command: make command and the options with which it is invoked
17 | #		e.g., "make -f Makefile.dataset"
18 | #
19 | 
20 | use strict;
21 | use warnings;
22 | 
23 | my ($makeargs,$makeinvoke) = @ARGV;
24 | 
25 | unless($makeargs) {$makeargs="";}
26 | unless($makeinvoke) {$makeinvoke="./mn 5 -k";}
27 | 
28 | my @targets=();
29 | 
30 | open MAKE,"LANG=en_US.utf-8 make -p -n $makeargs|" or die "$!";
31 | $prevline="";
32 | 
33 | while ($line = <MAKE>)
34 | {
35 | 	if ($line=~/^# @ := (.+)$/)
36 | 	{
37 | 		unless (-e $1)
38 | 		{
39 | 			push(@targets,$1);
40 | 		}
41 | 	}
42 | }
43 | 
44 | close MAKE or die "$!";
45 | 
46 | $count= @targets;
47 | print "amount of targets: $count\n";
48 | 
49 | print "running make\n";
50 | open MAKEOUT, "|xargs $makeinvoke" or die "$!";
51 | 
52 | foreach $target(@targets)
53 | {
54 | 	print MAKEOUT "$target\n";
55 | }
56 | 
57 | close MAKEOUT or die "$!";
58 | print "done\n";
59 | 


--------------------------------------------------------------------------------
/sh/mkpath:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 | #
3 | # Output the value of $MATLABPATH. 
4 | #
5 | 
6 | printf "$PWD/m/:$PWD/konect-toolbox/m:$PWD/lib/:$PWD/lib/matlab_bgl/:$PWD/lib/wafo-statistics/:$PWD/lib/wafo-misc/:$PWD/lib/gridxy:$PWD/syngraphy/" 
7 | 


--------------------------------------------------------------------------------
/sh/mkstat:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/awk -f
 2 | #
 3 | # Convert result of runtime evaluation to Matlab matrix file for analysis.
 4 | #
 5 | # STDIN		
 6 | #
 7 | # 	Text with data lines beginning with "%="
 8 | # STDOUT	
 9 | #
10 | # 	Matlab file with same data
11 | #
12 | 
13 | ! /^%=/ { next } {print $3 " " $4 " " $5 " " $6}
14 | 


--------------------------------------------------------------------------------
/sh/mktime:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Create the timestamp file from the out.* file, via stdin/stdout.
 4 | #
 5 | # PARAMETERS
 6 | #	$network (only for the check, not for accessing the file)
 7 | #
 8 | 
 9 | set -e
10 | 
11 | if ! grep -qE '^'"$network"'$' dat/NETWORKS_TIME ; then
12 | 	exit 1
13 | fi
14 | 
15 | sed -E -e '
16 | /^%/d
17 | s,[0-9]+\s+[0-9]+\s+[^ ]+\s+([0-9-]+),\1,
18 | '
19 | 


--------------------------------------------------------------------------------
/sh/network-format:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | #
 3 | # Extract format from an unirelational dataset. 
 4 | # 
 5 | # STDIN 
 6 | #	Dataset file, typically named out.* or rel.*
 7 | #
 8 | # STDOUT 
 9 | #	One line with the numerical format
10 | #
11 | 
12 | use strict; 
13 | use warnings; 
14 | 
15 | use Switch; 
16 | 
17 | $_ = <>; 
18 | 
19 | /^\s*%\s+(\S+)\s+(\S+)/
20 |     or die "*** error:  Invalid first input line:  $_"; 
21 | 
22 | my $relationship_format_text = $1;
23 | 
24 | my $relationship_format = -1;
25 | 
26 | switch ($relationship_format_text)
27 | {
28 |     case "sym"  { $relationship_format = 1; }
29 |     case "asym" { $relationship_format = 2; }
30 |     case "bip"  { $relationship_format = 3; }
31 |     else { die "*** error:  Invalid relationship format $relationship_format_text"; }
32 | }
33 | 
34 | print "$relationship_format\n";
35 | 


--------------------------------------------------------------------------------
/sh/network-info:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | #
 3 | # Extract numerical info from an unirelational dataset:  Number of
 4 | # subject and object entities, number of edges, format and weights. 
 5 | #
 6 | # This data format is deprecated, but still used. 
 7 | # 
 8 | # STDIN
 9 | #	Dataset file, typically named out.* or rel.*
10 | #
11 | # STDOUT
12 | #	Six lines with the following info:
13 | #		n1	Number of subjects
14 | #		n2	Number of objects
15 | #		g	Number of lines
16 | #		emn_	Fill:   m / (n1 * n2) [DEPRECATED]
17 | #		format	Relationship format as defined in
18 | #			matlab/constants.m
19 | #		weights Weight range as defined in
20 | #			matlab/constants.m
21 | #
22 | # Parameters are ignored. 
23 | #
24 | 
25 | use strict; 
26 | use warnings;
27 | 
28 | use Switch; 
29 | 
30 | $_ = <STDIN>; 
31 | 
32 | /^\s*%\s+(\S+)\s+(\S+)/
33 |     or die "*** error:  Invalid first input line:  $_"; 
34 | 
35 | my $relationship_format_text = $1;
36 | my $weight_range_text = $2; 
37 | 
38 | my $relationship_format = -1;
39 | my $weight_range = -1; 
40 | 
41 | switch ($relationship_format_text)
42 | {
43 | 	case "sym"  { $relationship_format = 1; }
44 | 	case "asym" { $relationship_format = 2; }
45 | 	case "bip"  { $relationship_format = 3; }
46 | 	else { die "*** error:  Invalid relationship format $relationship_format_text"; }
47 | }
48 | 
49 | switch ($weight_range_text)
50 | {
51 | 	case "unweighted"      { $weight_range = 1; }
52 | 	case "positive"        { $weight_range = 2; }
53 | 	case "posweighted"     { $weight_range = 3; }
54 | 	case "signed"          { $weight_range = 4; }
55 | 	case "multisigned"     { $weight_range = 5; }
56 | 	case "weighted"        { $weight_range = 6; }
57 | 	case "multiweighted"   { $weight_range = 7; }
58 | 	case "dynamic"         { $weight_range = 8; }
59 | 	else { die "*** error:  Invalid weight range:  $weight_range_text"; }
60 | }
61 | 
62 | my $m = -1;
63 | my $n = -1; 
64 | my $e = 0; 
65 | 
66 | while (<STDIN>)
67 | {
68 | 	if (/^%/) { next; }
69 | 	if (/^\s*$/) {next; }
70 | 
71 | 	/^\s*(\S+)\s+(\S+)/
72 | 	    or die "*** error:  Invalid input:  $_"; 
73 | 
74 | 	my $subject = $1; 
75 | 	my $object = $2; 
76 |     
77 | 	++ $e; 
78 | 
79 | 	if ($subject > $m) { $m = $subject; }
80 | 	if ($object  > $n) { $n = $object;  }
81 | }
82 | 
83 | if ($relationship_format != 3)
84 | {
85 | 	if ($m < $n) { $m = $n; }
86 | 	if ($n < $m) { $n = $m; }
87 | }
88 | 
89 | my $emn = $e / ($m * $n); 
90 | 
91 | print "$m\n";
92 | print "$n\n";
93 | print "$e\n";
94 | print "$emn\n";
95 | print "$relationship_format\n";
96 | print "$weight_range\n";
97 | 


--------------------------------------------------------------------------------
/sh/network-weights:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | #
 3 | # Extract weights from an unirelational dataset. 
 4 | # 
 5 | # STDIN
 6 | #	Dataset file, typically named out.* or rel.*
 7 | #
 8 | # STDOUT 
 9 | #	Weights as a number 
10 | #
11 | 
12 | use warnings; 
13 | use strict; 
14 | 
15 | use Switch; 
16 | 
17 | $_ = <>; 
18 | 
19 | /^\s*%\s+(\S+)\s+(\S+)/
20 |     or die "*** error:  Invalid first input line:  $_"; 
21 | 
22 | my $weight_range_text = $2; 
23 | 
24 | my $weight_range = -1; 
25 | 
26 | switch ($weight_range_text)
27 | {
28 |     case "unweighted"      { $weight_range = 1; }
29 |     case "positive"        { $weight_range = 2; }
30 |     case "posweighted"     { $weight_range = 3; }
31 |     case "signed"          { $weight_range = 4; }
32 |     case "multisigned"	   { $weight_range = 5; }
33 |     case "weighted"        { $weight_range = 6; }
34 |     case "multiweighted"   { $weight_range = 7; }
35 |     case "dynamic"     	   { $weight_range = 8; }
36 |     else { die "*** error:  Invalid weight range:  $weight_range_text"; }
37 | }
38 | 
39 | print "$weight_range\n";
40 | 


--------------------------------------------------------------------------------
/sh/out2:
--------------------------------------------------------------------------------
 1 | #! /bin/sed -f
 2 | #
 3 | # Generate simplified out-files:  All delimiters are tabs, and there are
 4 | # no comment lines. 
 5 | #
 6 | 
 7 | # Remove comment lines
 8 | /^[[:space:]]*%/d
 9 | 
10 | # Remove space at beginning of line
11 | s,^[[:space:]]*,,
12 | 
13 | # Remove space at end of line
14 | s,[[:space:]]*$,,
15 | 
16 | # Collapse internal space to individual tabs
17 | s/[[:space:]]\{1,\}/	/g
18 | 
19 | 


--------------------------------------------------------------------------------
/sh/plot-network:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Output the plot-dependencies of a given network.
 4 | #
 5 | # PARAMETERS
 6 | #	$network 
 7 | #
 8 | # INPUT FILES 
 9 | #	PLOTS
10 | #	dat/NETWORKS_$group  for all groups
11 | #
12 | 
13 | set -e
14 | 
15 | for plot in $(cat PLOTS) ; do
16 | 
17 | 	group=$(<GROUPS-PLOT sed -E -e 's,^\s*'"$plot"'\s+(\S+)\s*$,\1,;t;d')
18 | 	[ "$group" ] || {
19 | 		echo >&2 "*** Error: group for plot '$plot' not found in 'PLOTS'"
20 | 		exit 1
21 | 	}
22 | 
23 | 	# Sanity check:  the network must be in the ALL group
24 | 	grep -q -E '^'"$network"'$' dat/NETWORKS_ALL || {
25 | 		echo >&2 "*** Network '$network' must be in the 'ALL' group"
26 | 		exit 1
27 | 	}
28 | 
29 | 	[ -r dat/NETWORKS_"$group" ] || {
30 | 		echo >&2 "*** Expected file 'dat/NETWORKS_$group' to exist because network '$network' is in group '$group' of plot '$plot'"
31 | 		exit 1
32 | 	}
33 | 	
34 | 	if grep -q -E '^'"$network"'$' dat/NETWORKS_"$group" ; then
35 | 		echo @"$plot"."$network"
36 | 	fi
37 | done
38 | 
39 | 


--------------------------------------------------------------------------------
/sh/save_diag:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Extract last eigenvalues calculated.
 4 | #
 5 | # INVOCATION
 6 | #		$1	name of logfile
 7 | #
 8 | # STDIN		$1
 9 | # STDOUT	The values 
10 | #
11 | 
12 | tail -n900 $1 | tac | sed -e '/^Iteration/Q' | tac | sed -E -e '/^\s*[e+0-9.]+\s*$/!Q'
13 | 
14 | 


--------------------------------------------------------------------------------
/sh/sort-networks:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | #
 3 | # Sort the given list of networks by ascending size. 
 4 | # 
 5 | # STDIN 
 6 | #	The concatenation of all network lists, in no particular order
 7 | # 
 8 | # STDOUT 
 9 | #	The sorted list of networks, by ascending size
10 | #
11 | # INPUT
12 | #	dat/networks.asc
13 | #
14 | 
15 | use strict;
16 | use warnings;
17 | 
18 | my %networks;
19 | 
20 | while (<>) {
21 | 	chomp;
22 | 	$networks{$_}= 1;
23 | }
24 | 
25 | open(IN, "<", "dat/networks.asc") or die "$!";
26 | 
27 | while (<IN>) {
28 | 	/^\s*(\S+)\s/ or die;
29 | 	my $network = $1;
30 | 	if (exists $networks{$network}) {
31 | 		print "$network\n"; 
32 | 		delete $networks{$network}; 
33 | 	}
34 | }
35 | 
36 | # All networks that remain in $networks were not found in the full
37 | # network list--an error. 
38 | my $count = keys %networks; 
39 | if ($count != 0) {
40 | 	print STDERR "*** Unmatched networks:\n";
41 | 	print STDERR "$_\n" for keys %networks;
42 | 	exit 1
43 | }
44 | 
45 | close(IN) or die "$!";
46 | 
47 | 


--------------------------------------------------------------------------------
/sh/statistic-network:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # Output all statistic targets for $network.
 4 | #
 5 | # PARAMETERS
 6 | #	$network 
 7 | #
 8 | # INPUT FILES
 9 | #	STATISTICS
10 | #	GROUPS
11 | #	dat/NETWORKS_$group  for all groups
12 | #	konect-toolbox/m/konect_statistic_$statistic.m
13 | #
14 | 
15 | for statistic in $(cat STATISTICS) ; do
16 | 	# check whether the statistic must be computed for this network 
17 | 
18 | 	echo >&2 "statistic='$statistic'" ## RM
19 | 	compute=1
20 | 	filename=konect-toolbox/m/konect_statistic_"$(echo "$statistic" | sed -E -e 's,\+.*$,,')".m
21 | 	echo >&2 "filename='$filename'" ## RM
22 | 	if [ -r "$filename" ] ; then
23 | 		echo >&2 "file exists" ## RM
24 | 		if echo "$statistic" | grep -q -F + ; then
25 | 			key=GROUP+$(echo "$statistic" | sed -E -e 's,^.*\+,,')
26 | 		else
27 | 			key=GROUP
28 | 		fi
29 | 		echo >&2 "key='$key'" ## RM
30 | 		key_esc=$(echo "$key" | sed -E -e 's,\+,\\+,g')
31 | 		# The group must match for *both* the main statistic and
32 | 		# the substatistic, hence the loop.  In most cases, only
33 | 		# the main statistic declares a group, which then also
34 | 		# applies to all substatistics.  In some cases,
35 | 		# substatistics however have a more restricted group. 
36 | 		for group in $(<"$filename" sed -E -e 's,^.*(GROUP|'"$key_esc"')\s*:,,;t;d' | tr a-z A-Z) ; do
37 | 			echo >&2 "group='$group'" ## RM 
38 | 			if ! grep -q -E '^\s*'"$group"'\s*$' GROUPS ; then
39 | 				echo >&2 "$filename:  unknown group '$group' (1)" 
40 | 				exit 1
41 | 			fi
42 | 			if [ ! -r dat/NETWORKS_"$group" ] ; then 
43 | 				echo >&2 "$filename:  unknown group '$group' (2)" 
44 | 
45 | 				exit 1
46 | 			fi
47 | 			if ! grep -Eq '^'"$network"'$' "dat/NETWORKS_$group" ; then
48 | 				echo >&2 "exclude" ## RM
49 | 				compute=0
50 | 			else
51 | 				echo >&2 "include" ## RM
52 | 			fi
53 | 		done
54 | 	else
55 | 		:
56 | 		# Nothing.  Not all statistics have a file in the
57 | 		# toolbox.  Assume the statistic is computed in that
58 | 		# case. 
59 | 	fi
60 | 		
61 | 	if [ "$compute" != 0 ] ; then
62 | 		echo @statistic."$statistic"."$network"
63 | 	fi
64 | done
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/sh/statistic-size:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | #
 3 | # Determine network size statistic.  This outputs all substatistics of
 4 | # the [size] statistics; see
 5 | # 'konect-toolbox/m/konect_statistic_size.m'. 
 6 | #
 7 | # PARAMETERS
 8 | #	$network	Internal name of network 
 9 | #
10 | # STDIN:  uni/out.$network
11 | # STDOUT: The values, one per line
12 | #
13 | # INPUT FILES
14 | #	dat/statistic.format.$network
15 | # 
16 | 
17 | use strict; 
18 | use warnings FATAL => 'all';
19 | 
20 | use Switch; 
21 | 
22 | my $network = $ENV{"network"};
23 | 
24 | open(FORMAT, "<dat/statistic.format.$network") or die "$!"; 
25 | 
26 | my $format_network = <FORMAT>;
27 | chomp $format_network;
28 | 
29 | close(FORMAT) or die "$!";
30 | 
31 | if ($format_network eq "1" or $format_network eq "2") {
32 |     	my $n= 0; 
33 |     	while (<STDIN>) {
34 |     	    next if /^%/;
35 |     	    /^([0-9]+)\s+([0-9]+).*/ or die "Invalid format in input: '$_'"; 
36 |     	    my $a= $1;
37 |     	    my $b= $2; 
38 | 	    if ($a > $n) { $n = $a ; }
39 | 	    if ($b > $n) { $n = $b ; }
40 |     	}
41 |     	if ($n < 1) { die "Value of n" } 
42 |     	print "$n\n"; 
43 | } elsif ($format_network eq "3") {
44 |     	my $n1= 0;
45 |     	my $n2= 0;
46 |     	while (<STDIN>) {
47 |     	    next if /^%/;
48 |     	    /^([0-9]+)\s+([0-9]+).*/ or die "Invalid format in input: '$_'"; 
49 |     	    my $a= $1;
50 |     	    my $b= $2; 
51 | 	    if ($a > $n1) { $n1 = $a ; }
52 | 	    if ($b > $n2) { $n2 = $b ; }
53 |     	}
54 |     	if ($n1 < 1) { die "Value of n1" } 
55 |     	if ($n2 < 1) { die "Value of n1" }
56 |     	my $n = $n1 + $n2; 
57 |     	print "$n\n$n1\n$n2\n"; 
58 | } else {
59 | 	die "*** Invalid format '$format_network'";
60 | }
61 | 
62 | close(STDIN) or die "$!";
63 | 
64 | 


--------------------------------------------------------------------------------
/sh/unset-lc:
--------------------------------------------------------------------------------
1 | #
2 | # Script to be sourced to unset all locales.  Used to invole awk. 
3 | #
4 | 
5 | unset $(set | sed -E 's,^(LC_.*)=.*$,\1,;t;d')
6 | 
7 | 


--------------------------------------------------------------------------------
/sh/widths-one:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | #
 3 | # Given a number of characters, return the bitwidth character needed to
 4 | # represent that number as an unsigned type.
 5 | #
 6 | # INVOCATION
 7 | #	$0 N
 8 | #
 9 | 
10 | use strict;
11 | use warnings;
12 | 
13 | use Konect; 
14 | 
15 | my $n = $ARGV[0];
16 | 
17 | my $width = Konect::bitwidth($n);
18 | 
19 | print "$width\n";
20 | 
21 | 


--------------------------------------------------------------------------------
/sh/widths-simple:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | #
 3 | # Given the bitswidths of a network, return the bitwidths of the
 4 | # corresponding simple network.
 5 | #
 6 | #  * The format is transformed to SYM (undirected, 1)
 7 | #  * The weights is transformed to UNWEIGHTED (1)
 8 | #  * Weights and timestamps are set to unused (-)
 9 | #  * LOOPS is transformed to zero (0)
10 | #  * If the input graph was bipartite, U and V are transformed to a bit
11 | #    width that is large enough to represent the sum of left and right
12 | #    nodes. 
13 | #  * If the graph is BIP or ASYM take into account that each edge must
14 | #    be stored twice and thus the bitwidth for M may be increased 
15 | #
16 | # ENVIRONMENT 
17 | #	$network
18 | #
19 | # INPUT FILES 
20 | #	dat/widths.$network
21 | #	dat/statistic.size.$network
22 | #	dat/statistic.volume.$network
23 | #
24 | # OUTPUT FILES 
25 | #	The transformed bitwidth
26 | #
27 | 
28 | use strict;
29 | use warnings;
30 | 
31 | require Konect; 
32 | 
33 | my $network = $ENV{"network"};
34 | 
35 | open(WIDTHS, "<", "dat/widths.$network") or die $!; 
36 | 
37 | my $in = <WIDTHS>;
38 | 
39 | chomp $in; 
40 | 
41 | $in =~ /(.)(.)(.)(.)(.)(.)(.)(.)/;
42 | 
43 | my $format = $6;
44 | 
45 | my $m = $1;
46 | my $u = $2;
47 | my $v = $3;
48 | 
49 | if ($format == 3) {
50 | 
51 | 	# Bipartite 
52 | 
53 | 	open(SIZE, "<", "dat/statistic.size.$network") or die "$!";
54 | 	my @values= <SIZE>;
55 | 	die unless (scalar @values) == 3; 
56 | 	my $n  = $values[0];  chomp $n;
57 | 	my $n1 = $values[1];  chomp $n1;
58 | 	my $n2 = $values[2];  chomp $n2; 
59 | 	die unless $n == $n1 + $n2;
60 | 
61 | 	open(VOLUME, "<", "dat/statistic.volume.$network") or die "$!";
62 | 	my $value_m= <VOLUME>;  chomp $value_m; 
63 | 	
64 | 	$u = Konect::bitwidth($n);
65 | 	$v = $u; 
66 |     
67 | 	# Recompute the M bitwidth
68 | 	$m = Konect::bitwidth(2 * $value_m); 
69 | 
70 | } elsif ($format == 1 || $format == 2) {
71 | 
72 | 	# Unipartite:  Keep bitwidths of underlying network 
73 | 
74 | } else {
75 | 	die "Invalid format $format";
76 | }
77 | 
78 | print "${m}${u}${v}--110\n";
79 | 
80 | 


--------------------------------------------------------------------------------