├── .gitignore ├── CFLAGS.common ├── CFLAGS.dbg ├── CFLAGS.mode ├── CFLAGS.opt ├── COPYING ├── DECOMPOSITIONS ├── DECOMPOSITIONS_ASYM ├── DOWNLOAD_PATTERNS ├── GROUPS ├── GROUPS-PLOT ├── INSTALL ├── Makefile.old ├── PLOTS ├── PLOTTING ├── README ├── SCATTERS ├── STATISTICS ├── TODO ├── c ├── .gitignore ├── README ├── asxprintf.h ├── binary_heap.h ├── bits.h ├── consts.h ├── dijkstra.h ├── feature.h ├── feature_degree.c ├── graph.h ├── graph_read_sgraph0.h ├── graph_simple.h ├── graph_width.h ├── hyperanf.c ├── hyperloglog.h ├── ifub.c ├── lcc.h ├── lines.c ├── maxdegree.c ├── outs.c ├── sgraph.h ├── sgraph0_create.c ├── sgraph0_io.h ├── sgraph1_create.c ├── sgraph1_create_lcc.c ├── sgraph1_create_simple.c ├── sgraph1_dump.c ├── sgraph1_io.h ├── sgraph1_subgraph.h ├── sgraph1_write.h ├── size.c ├── statistic_mediandegree.c ├── statistic_twostars.c ├── statistic_volume.c ├── triangles.c ├── volume.c ├── width.h └── widthhelper.h ├── colormap ├── jl ├── inter.jl ├── inter2.jl ├── konect_consts.jl ├── read_statistic.jl └── step_full.jl ├── julia ├── ktop ├── lib ├── .gitignore ├── JULIA ├── README ├── deta.m ├── gridxy │ ├── gridxy.m │ └── license.txt ├── hsl2rgb.m ├── octave │ ├── README │ └── corr.m ├── plpva.m └── rgb2hsl.m ├── m ├── approximation.m ├── approximation_plot.m ├── assortativity.m ├── assortativity_one.m ├── axis_fit.m ├── beta_do.m ├── beta_one.m ├── beta_plot.m ├── bidd.m ├── bidd_one.m ├── check.m ├── check_failed.m ├── check_successful.m ├── cluscod.m ├── cluscod_plot.m ├── comparison.m ├── comparison_cross.m ├── complex2rgb.m ├── curve_apply.m ├── data.m ├── decomposition_comp.m ├── decomposition_map.m ├── decomposition_plot.m ├── decomposition_split.m ├── decomposition_time.m ├── decomposition_time_plot.m ├── degcc.m ├── degree.m ├── degree_print.m ├── delaunay_one.m ├── delaunay_plot.m ├── diadens.m ├── diagonality.m ├── distr.m ├── distr_plot.m ├── distrtest_colors.m ├── distrtest_multi.m ├── distrtest_plot.m ├── distrtest_types.m ├── estimate_power_law.m ├── evol_permutation.m ├── exp_entropy.m ├── fit.m ├── fit_plot.m ├── fit_plot_curve.m ├── format_number.m ├── format_statistic.m ├── get_ids_submethod.m ├── get_labels_measure.m ├── get_labels_method.m ├── get_labels_method_submethod.m ├── get_labels_submethod.m ├── get_rank.m ├── get_rank_type.m ├── get_tags.m ├── get_updown_statistic.m ├── has_timestamps.m ├── hopdistr_comp.m ├── hopdistr_distrtest.m ├── hopdistr_plot.m ├── hopdistr_time_comp.m ├── hopdistr_time_plot.m ├── konect_decomposition_dedicom4.m ├── ksdist.m ├── layout.m ├── load_strings.m ├── lorenz.m ├── lorenz_one.m ├── lybl.m ├── map.m ├── map_line.m ├── map_minmax.m ├── mask.m ├── mask_step.m ├── means.m ├── means_best.m ├── means_e.m ├── means_euv.m ├── means_regr.m ├── means_regrn.m ├── measure_compute.m ├── measure_compute_ap.m ├── measure_compute_auc.m ├── measure_compute_corr.m ├── measure_compute_kendall.m ├── measure_compute_map.m ├── measure_compute_mauc.m ├── measure_compute_spear.m ├── mediandist.m ├── mkcategory.m ├── network_key.m ├── outin.m ├── pa_compute.m ├── pa_plot.m ├── pa_plot_one.m ├── pivotize.m ├── precision_all.m ├── precision_comp.m ├── precision_one.m ├── precisions_plot.m ├── predict_euclidean.m ├── predict_spectral.m ├── prediction_decomposition.m ├── prediction_local.m ├── prediction_local_compute.m ├── prediction_local_compute_mask.m ├── prediction_local_compute_neib.m ├── prediction_local_compute_neib3.m ├── prediction_local_compute_pref.m ├── prediction_local_compute_zero.m ├── prepare_matrix_target.m ├── rating_evolution.m ├── rating_evolution2.m ├── read_info.m ├── read_meta.m ├── read_statistic.m ├── rmse_full.m ├── rmse_latent.m ├── runtime.m ├── scatter_comp.m ├── scatter_plot.m ├── scatter_single.m ├── shrinkingdiversity.m ├── sne.m ├── spectral_diagonality_test.m ├── spectral_extrapolation.m ├── spectrum_visualize.m ├── split.m ├── statistic_avgdegree.m ├── statistic_comp.m ├── statistic_comp_spectral.m ├── statistic_diameff.m ├── statistic_full_prefatt.m ├── statistic_lines.m ├── statistic_meandist.m ├── statistic_size.m ├── statistic_spectral.m ├── statistic_time.m ├── statistic_time_diam.m ├── statistic_time_plot.m ├── statistic_time_slice.m ├── statistic_time_spectral.m ├── statistics_time_plot.m ├── steps.m ├── stepsi.m ├── styles_method.m ├── styles_submethod.m ├── syngraphy_plot.m ├── time_degree.m ├── time_histogram.m ├── time_histogram_signed.m ├── time_xaxis.m ├── time_xaxis_unix.m ├── trend.m ├── trend_plot.m ├── weights_plot.m ├── zipf.m └── zipf_one.m ├── main.stu ├── matlab ├── mns ├── octave ├── pl ├── Konect.pm ├── README └── TexToHtml │ └── Converter.pm ├── runtime.source ├── sh ├── README ├── category ├── check ├── checkmeta ├── classes ├── dep-network ├── depc ├── eps2png ├── group ├── intersect ├── listempty ├── mem ├── mkcite ├── mkdatasetlist ├── mkdownloadlist ├── mkmissing ├── mkpath ├── mkrdf ├── mkreadme ├── mkstat ├── mktime ├── network-format ├── network-info ├── network-weights ├── out2 ├── plot-network ├── save_diag ├── sort-networks ├── statistic-network ├── statistic-size ├── unset-lc ├── widths ├── widths-one └── widths-simple └── status /.gitignore: -------------------------------------------------------------------------------- 1 | CFLAGS 2 | MATLABPATH 3 | PERL5LIB 4 | bin 5 | dat 6 | plot 7 | tex 8 | uni 9 | dat-* 10 | plot-* 11 | octave-workspace 12 | konect-toolbox 13 | konect-extr 14 | konect-handbook 15 | stu-utils 16 | syngraphy 17 | error.log 18 | tmp.runtime 19 | lib/BibTeX 20 | lib/LaTeX 21 | lib/HTML 22 | lib/BibTeX-Parser-*.tar.gz 23 | lib/HTML-Parser-*.tar.gz 24 | lib/LaTeX-ToUnicode-*.tar.gz 25 | -------------------------------------------------------------------------------- /CFLAGS.common: -------------------------------------------------------------------------------- 1 | -std=c99 -pedantic -Wall -Wextra -Werror 2 | -Wundef -D_GNU_SOURCE 3 | -lm 4 | -------------------------------------------------------------------------------- /CFLAGS.dbg: -------------------------------------------------------------------------------- 1 | -g 2 | -------------------------------------------------------------------------------- /CFLAGS.mode: -------------------------------------------------------------------------------- 1 | opt 2 | -------------------------------------------------------------------------------- /CFLAGS.opt: -------------------------------------------------------------------------------- 1 | -O3 -DNDEBUG -s 2 | -------------------------------------------------------------------------------- /DECOMPOSITIONS: -------------------------------------------------------------------------------- 1 | sym 2 | sym-n 3 | lap 4 | lapq 5 | seidel 6 | -------------------------------------------------------------------------------- /DECOMPOSITIONS_ASYM: -------------------------------------------------------------------------------- 1 | svd 2 | svd-n 3 | back 4 | herm 5 | hermn 6 | diag 7 | lapherm 8 | skew 9 | skewn 10 | lapskew 11 | lapd 12 | lapd-n 13 | diag-n 14 | stoch2 15 | stoch1 16 | mskew 17 | dedicom3 18 | takane 19 | lapdiag2 20 | -------------------------------------------------------------------------------- /GROUPS: -------------------------------------------------------------------------------- 1 | ALL 2 | SYM 3 | ASYM 4 | BIP 5 | SQUARE 6 | NEGATIVE 7 | NONUNWEIGHTED 8 | ASYMNEGATIVE 9 | SQUARENEGATIVE 10 | TIME 11 | TIME_NEGATIVE 12 | MULTI 13 | 14 | -------------------------------------------------------------------------------- /GROUPS-PLOT: -------------------------------------------------------------------------------- 1 | layout ALL 2 | degree ALL 3 | bidd ALL 4 | lorenz ALL 5 | distr.sym ALL 6 | distr.sym-n ALL 7 | distr.lap ALL 8 | map.sym ALL 9 | map.lap ALL 10 | map.stoch ALL 11 | assortativity ALL 12 | zipf ALL 13 | hopdistr ALL 14 | lybl ALL 15 | delaunay ALL 16 | outin ASYM 17 | rating_evolution NEGATIVE 18 | weights NONUNWEIGHTED 19 | cluscod SQUARE 20 | degcc SQUARE 21 | time_histogram TIME 22 | hopdistr_time.full TIME 23 | diadens TIME 24 | time_histogram_signed TIME_NEGATIVE 25 | rating_evolution2 TIME_NEGATIVE 26 | syngraphy SQUARE 27 | inter TIME 28 | inter2 TIME 29 | -------------------------------------------------------------------------------- /PLOTS: -------------------------------------------------------------------------------- 1 | layout 2 | degree 3 | bidd 4 | lorenz 5 | distr.sym 6 | distr.sym-n 7 | distr.lap 8 | map.sym 9 | map.lap 10 | map.stoch 11 | assortativity 12 | zipf 13 | hopdistr 14 | lybl 15 | delaunay 16 | outin 17 | rating_evolution 18 | weights 19 | cluscod 20 | degcc 21 | time_histogram 22 | hopdistr_time.full 23 | diadens 24 | time_histogram_signed 25 | rating_evolution2 26 | syngraphy 27 | inter 28 | inter2 29 | -------------------------------------------------------------------------------- /PLOTTING: -------------------------------------------------------------------------------- 1 | The plots generated by KONECT are stored as "plot/*.eps". 2 | 3 | All plots are printed with the function konect_print() from the KONECT 4 | toolbox. That function also contains style recommendations for plots. 5 | 6 | == Names of plots == 7 | 8 | All plots are named like this: 9 | 10 | plot/$SCRIPT.$TYPE.$NETWORK.eps 11 | 12 | where 13 | 14 | * $SCRIPT is the name of the generating script (without "_plot" if present) 15 | * $TYPE is the type, when one script generates multiple plots; Typically 16 | single letters, with "a" being the main plot, i.e. the only plot 17 | declared in the Makefile. 'u' and 'v' are common names when there are 18 | separate left/right plots, or separate in-degree/out-degree plots. 19 | * $NETWORK is the network name, and is always the last part of the name 20 | 21 | All name parts should be in all-lowercase, with words separated by 22 | dashes or underscores. 23 | 24 | All plots are saved in color EPS. 25 | -------------------------------------------------------------------------------- /SCATTERS: -------------------------------------------------------------------------------- 1 | 2 | volume.size 3 | size.volume 4 | volume.uniquevolume 5 | size.avgdegree 6 | volume.avgdegree 7 | size.fill 8 | volume.maxdegree 9 | volume.reciprocity 10 | volume.negativity 11 | size.coco 12 | size.cocos 13 | volume.twostars 14 | volume.threestars 15 | volume.triangles 16 | volume.squares 17 | volume.power 18 | volume.gini 19 | volume.dentropyn 20 | volume.clusco 21 | volume.diam 22 | volume.diameff90 23 | volume.diameff50 24 | volume.snorm 25 | volume.alcon 26 | -------------------------------------------------------------------------------- /STATISTICS: -------------------------------------------------------------------------------- 1 | size 2 | size+2 3 | size+3 4 | volume 5 | uniquevolume 6 | loops 7 | twostars 8 | threestars 9 | fourstars 10 | triangles 11 | squares 12 | tour4 13 | maxdegree 14 | maxdegree+2 15 | maxdegree+3 16 | maxdegree+4 17 | maxdegree+5 18 | avgdegree 19 | avgdegree+2 20 | avgdegree+3 21 | fill 22 | avgmult 23 | coco 24 | cocos 25 | cocos+2 26 | diam 27 | diameff50 28 | diameff90 29 | mediandist 30 | meandist 31 | gini 32 | own 33 | own+2 34 | own+3 35 | own+4 36 | own+5 37 | dentropyn 38 | power 39 | power2 40 | power3 41 | power3+4 42 | power3+6 43 | power3+9 44 | power3+11 45 | power3+14 46 | power3+16 47 | power3+19 48 | power3+21 49 | power3+24 50 | assortativity 51 | assortativity+2 52 | inoutassort 53 | clusco 54 | cluscoasym 55 | snorm 56 | opnorm 57 | maxdiag 58 | alcon 59 | separation 60 | reciprocity 61 | nonbip 62 | nonbipn 63 | nonbipal 64 | anticonflict 65 | negativity 66 | conflict 67 | tconflict 68 | fconflict 69 | controllability 70 | controllability+2 71 | -------------------------------------------------------------------------------- /c/.gitignore: -------------------------------------------------------------------------------- 1 | *.*.h 2 | -------------------------------------------------------------------------------- /c/README: -------------------------------------------------------------------------------- 1 | 2 | These are C programs used in KONECT-Analysis. These are much 3 | faster and use much less memory than Matlab, but are also more complex. 4 | 5 | Each standalone program is a *.c file. Libraries are simply *.h files. 6 | We always compile *.c files in a single step, without using *.o files. 7 | 8 | The compiled files are in bin/. 9 | 10 | We use the C99 standard. 11 | 12 | Programs use custom bit widths per dataset; see width.h for an overview. 13 | 14 | 15 | -------------------------------------------------------------------------------- /c/asxprintf.h: -------------------------------------------------------------------------------- 1 | #ifndef ASXPRINTF_H 2 | #define ASXPRINTF_H 3 | 4 | #include 5 | 6 | /* Format a string like printf, but return the formatted string as an 7 | allocated zero-terminated string that must be freed using free(). 8 | On error, use perror() and exit(). 9 | */ 10 | char *asxprintf(const char *fmt, ...) 11 | __attribute__ ((format(printf, 1, 2))); 12 | 13 | char *asxprintf(const char *fmt, ...) 14 | { 15 | va_list ap; 16 | 17 | va_start(ap, fmt); 18 | 19 | char *ret; 20 | 21 | int r= vasprintf(&ret, fmt, ap); 22 | 23 | va_end(ap); 24 | 25 | if (r < 0) { 26 | perror("vasprintf"); 27 | exit(1); 28 | } 29 | 30 | return ret; 31 | } 32 | 33 | #endif /* ! ASXPRINTF_H */ 34 | -------------------------------------------------------------------------------- /c/bits.h: -------------------------------------------------------------------------------- 1 | #ifndef BITS_H 2 | #define BITS_H 3 | 4 | /* 5 | * These are various bit manipulation functions. 6 | */ 7 | 8 | #define BITMASK(b) (1 << ((b) % CHAR_BIT)) 9 | 10 | #define BITSLOT(b) ((b) / CHAR_BIT) 11 | 12 | #define BITSET(a, b) ((a)[BITSLOT(b)] |= BITMASK(b)) 13 | 14 | #define BITCLEAR(a, b) ((a)[BITSLOT(b)] &= ~BITMASK(b)) 15 | 16 | #define BITTEST(a, b) ((a)[BITSLOT(b)] & BITMASK(b)) 17 | 18 | #define BITNSLOTS(nb) ((nb + CHAR_BIT - 1) / CHAR_BIT) 19 | 20 | /* 21 | * Set all bits in A1 which are also set in A2. In other words, perform 22 | * 23 | * A1 |= A2. 24 | * 25 | * N is the length in bits of the array. 26 | * 27 | * A1 and A2 must be disjoint arrays. 28 | */ 29 | void BITSSET(unsigned char *restrict a1, 30 | const unsigned char *restrict a2, 31 | size_t n) 32 | { 33 | for (size_t i= 0; i < BITNSLOTS(n); ++i) { 34 | a1[i] |= a2[i]; 35 | } 36 | } 37 | 38 | /* Count the total number of set bits. 39 | */ 40 | size_t BITSCOUNT(const unsigned char *restrict a, size_t n) 41 | { 42 | /* The code below is specific to 64-bit long longs */ 43 | assert(CHAR_BIT * sizeof(long long) == 64); 44 | 45 | size_t ret= 0; 46 | for (size_t i= 0; i < BITNSLOTS(n); ++i) { 47 | /* 48 | * This method to count bits in a char is from 49 | * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 50 | */ 51 | ret += ((a[i] * 0x200040008001ULL & 0x111111111111111ULL) % 0xf); 52 | } 53 | return ret; 54 | } 55 | 56 | #endif /* ! BITS_H */ 57 | -------------------------------------------------------------------------------- /c/consts.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTS_H 2 | #define CONSTS_H 3 | 4 | /* Numerical values of variables representing the format and widths of a 5 | * network. 6 | */ 7 | 8 | #define FORMAT_SYM 1 9 | #define FORMAT_ASYM 2 10 | #define FORMAT_BIP 3 11 | 12 | #define WEIGHTS_UNWEIGHTED 1 13 | #define WEIGHTS_POSITIVE 2 14 | #define WEIGHTS_POSWEIGHTED 3 15 | #define WEIGHTS_SIGNED 4 16 | #define WEIGHTS_MULTISIGNED 5 17 | #define WEIGHTS_WEIGHTED 6 18 | #define WEIGHTS_MULTIWEIGHTED 7 19 | #define WEIGHTS_DYNAMIC 8 20 | #define WEIGHTS_MULTIPOSWEIGHTED 9 21 | 22 | #endif /* ! CONSTS_H */ 23 | 24 | -------------------------------------------------------------------------------- /c/dijkstra.h: -------------------------------------------------------------------------------- 1 | #ifndef DIJKSTRA_H 2 | #define DIJKSTRA_H 3 | 4 | /* 5 | * Dijkstra's algorithm on an SG1 graph. 6 | */ 7 | 8 | #include "bits.h" 9 | 10 | #if TYPE_u$ != TYPE_v$ 11 | # error Dataset must have equal types for U and V 12 | #endif 13 | 14 | /* Compute the distances from U to all nodes. Write the result into D, 15 | * which must have length N (number of nodes). 16 | * Write the bit array of visited nodes into *VISITED if VISITED is not 17 | * NULL. If the array is written, it is a malloc'ed array. 18 | */ 19 | void dijkstra_$(const struct sgraph1_reader_$ *r, 20 | u$_ft u, 21 | u$_ft *d, 22 | unsigned char **visited) 23 | { 24 | const u$_ft n= r->h->n1; 25 | assert(u < n); 26 | 27 | unsigned char *s= calloc(BITNSLOTS(n), 1); 28 | struct binary_heap_u$ b= binary_heap_create_u$(); 29 | 30 | memset(d, (1 << CHAR_BIT) - 1, sizeof(u$_ft) * n); 31 | 32 | binary_heap_insert_u$(&b, u, 0); 33 | 34 | d[u]= 0; 35 | 36 | while (! binary_heap_empty_u$(&b)) { 37 | const u$_ft i= binary_heap_min_u$(b); 38 | assert(i < n); 39 | binary_heap_remove_min_u$(&b); 40 | if (! BITTEST(s, i)) { 41 | BITSET(s, i); 42 | const m$_ft end= i == n - 1 ? r->len_m : read_m$(r->adj_to, i + 1); 43 | for (m$_ft k= read_m$(r->adj_to, i); k < end; ++k) { 44 | const u$_ft j= read_u$(r->to, k); 45 | assert(j < n); 46 | assert(i != j); 47 | if (! BITTEST(s, j)) { 48 | if(d[j] > d[i] + 1) { 49 | d[j]= d[i] + 1; 50 | binary_heap_insert_u$(&b, j, d[i] + 1); 51 | } 52 | } 53 | } 54 | } 55 | } 56 | 57 | binary_heap_delete_u$(&b); 58 | 59 | if (visited) { 60 | *visited= s; 61 | } else { 62 | free(s); 63 | } 64 | } 65 | 66 | #endif /* ! DIJKSTRA_H */ 67 | -------------------------------------------------------------------------------- /c/feature_degree.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generate the degree vector of a network. 3 | * 4 | * INVOCATION 5 | * $0 SG1-FILE DEGREE-FILE LOGFILE 6 | */ 7 | 8 | #include "width.ma.h" 9 | #include "width.ua.h" 10 | #include "width.va.h" 11 | #include "width.wa.h" 12 | #include "width.ta.h" 13 | #include "width.fa.h" 14 | 15 | #include "sgraph1_io.a.h" 16 | #include "feature.a.h" 17 | 18 | #if WEIGHTS_a == WEIGHTS_POSITIVE && TYPE_wa != '-' 19 | /* In that case, need to sum up the weights */ 20 | # error "*** Not implemented" 21 | #endif 22 | 23 | int main(int argc, char **argv) 24 | { 25 | if (argc != 4) { 26 | fprintf(stderr, "*** Invalid number of parameters\n"); 27 | exit(1); 28 | } 29 | 30 | const char *const filename_sg1= argv[1]; 31 | const char *const filename_ft= argv[2]; 32 | /* The LOGFILE is ignore */ 33 | 34 | struct sgraph1_reader_a r; 35 | 36 | if (0 > sgraph1_open_read_a(filename_sg1, &r, 2)) { 37 | exit(1); 38 | } 39 | 40 | if (0 > sgraph1_advise_a(&r, MADV_SEQUENTIAL)) { 41 | perror(filename_sg1); 42 | exit(1); 43 | } 44 | 45 | struct feature_a f; 46 | 47 | if (0 > feature_open_write_a(filename_ft, &f, r.h->n1 48 | #if FEATURE_N2 49 | , r.h->n2 50 | #endif 51 | )) { 52 | exit(1); 53 | } 54 | 55 | if (0 > feature_advise_a(&f, MADV_SEQUENTIAL)) { 56 | perror(filename_ft); 57 | exit(1); 58 | } 59 | 60 | for (ua_ft u= 0; u < r.h->n1; ++u) { 61 | const ma_ft beg= read_ma(r.adj_to, u); 62 | const ma_ft end= u == r.h->n1 - 1 ? r.len_m : read_ma(r.adj_to, u + 1); 63 | assert(beg <= end); 64 | assert((ma_ft)(end - beg) < fa_max); 65 | writeonzero_fa(f.f1, u, end - beg); 66 | } 67 | 68 | #if FEATURE_N2 69 | for (va_ft v= 0; v < r.h->n2; ++v) { 70 | const ma_ft beg= read_ma(r.adj_from, v); 71 | const ma_ft end= v == r.h->n2 - 1 ? r.len_m : read_ma(r.adj_from, v + 1); 72 | assert((ma_ft) (end - beg) < fa_max); 73 | writeonzero_fa(f.f2, v, end - beg); 74 | } 75 | #endif 76 | 77 | if (0 > feature_close_write_a(&f)) { 78 | perror(filename_ft); 79 | if (0 > unlink(filename_ft)) { 80 | perror(filename_ft); 81 | } 82 | exit(1); 83 | } 84 | 85 | exit(0); 86 | } 87 | 88 | 89 | -------------------------------------------------------------------------------- /c/graph_simple.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAPH_SIMPLE_$1$1_H 2 | #define GRAPH_SIMPLE_$1$2_H 3 | 4 | /* Code for transforming a graph into a simple graph. 5 | * $1 is the source graph. $2 is the target graph. 6 | */ 7 | 8 | /* Read an SG0 graph into a struct graph_$2. 9 | * The given graph must be uninitialized. 10 | * The resulting graph is simple (SYM-POSITIVE). 11 | */ 12 | void graph_read_sg0_simple_$1_$2(struct graph_$2 *restrict g, 13 | struct sgraph0_reader_$1 *restrict r) 14 | { 15 | struct header *s= (struct header *)r->out; 16 | 17 | g->format = FORMAT_SYM; 18 | g->weights = WEIGHTS_POSITIVE; 19 | 20 | g->cols = 2; 21 | g->m = 0; 22 | 23 | #if FORMAT_$1 == FORMAT_SYM || FORMAT_$1 == FORMAT_ASYM 24 | assert(s->n1 == s->n2); 25 | g->n1 = s->n1; 26 | g->n2 = s->n2; 27 | #elif FORMAT_$1 == FORMAT_BIP 28 | g->n1 = s->n1 + s->n2; 29 | g->n2 = s->n1 + s->n2; 30 | #else 31 | # error 32 | #endif 33 | 34 | g->loops = 0; 35 | 36 | g->deg_to= calloc(arraylen_m$2(g->n1), 1); 37 | g->deg_from= NULL; 38 | 39 | g->to= malloc(g->n1 * sizeof(v$2_at *)); 40 | g->from= NULL; 41 | 42 | #if TYPE_w$2 != '-' 43 | g->weight_to= NULL; 44 | g->weight_from= NULL; 45 | #endif 46 | 47 | #if TYPE_t$2 != '-' 48 | g->timestamp_to= NULL; 49 | g->timestamp_from= NULL; 50 | #endif 51 | 52 | for (m$2_ft i= 0; i < s->m; ++i) { 53 | 54 | const u$1_ft u= read_u$1(r->u, i); 55 | const v$1_ft v= read_v$1(r->v, i); 56 | 57 | if (u == v) 58 | continue; 59 | 60 | #if FORMAT_$1 == FORMAT_SYM || FORMAT_$1 == FORMAT_ASYM 61 | 62 | const u$2_ft u2= u; 63 | const v$2_ft v2= v; 64 | 65 | #elif FORMAT_$1 == FORMAT_BIP 66 | 67 | assert(s->n1 < v$2_max); 68 | 69 | const u$2_ft u2= u; 70 | const v$2_ft v2= s->n1 + v; 71 | 72 | assert(v2 >= s->n1); 73 | 74 | #else 75 | # error 76 | #endif 77 | 78 | const m$2_ft degree_u= g->deg_to[u2]; 79 | const m$2_ft degree_v= g->deg_to[v2]; 80 | 81 | assert(degree_u < m$2_max); 82 | assert(degree_v < m$2_max); 83 | 84 | graph_append_v$2(g->to + u2, degree_u, v2); 85 | graph_append_v$2(g->to + v2, degree_v, u2); 86 | 87 | ++ g->deg_to[u2]; 88 | ++ g->deg_to[v2]; 89 | 90 | ++ g->m; 91 | } 92 | } 93 | 94 | #endif /* ! GRAPH_SIMPLE_$1$2_H */ 95 | -------------------------------------------------------------------------------- /c/graph_width.h: -------------------------------------------------------------------------------- 1 | #if TYPE_$1$2 != '-' 2 | 3 | #include 4 | 5 | /* Append the value X to the array whose owning pointer is pointed to by 6 | * P, and whose current degree is D. 7 | */ 8 | void graph_append_$1$2($1$2_at **p, m$2_ft d, $1$2_ft x) 9 | { 10 | /* Maximum number of array elements that fit into the pointer */ 11 | const m$2_ft maxd= arrayn_$1$2(sizeof($1$2_at *)); 12 | assert(maxd > 0); 13 | 14 | if (d > maxd) { 15 | /* realloc */ 16 | *p= realloc(*p, arraylen_$1$2(d + 1)); 17 | write_$1$2(*p, d, x); 18 | 19 | } else if (d < maxd) { 20 | /* write into pointer */ 21 | write_$1$2(($1$2_at *)p, d, x); 22 | } else { /* d == maxd */ 23 | /* move to alloc */ 24 | assert(d == maxd); 25 | $1$2_at *p_copy= *p; 26 | *p = malloc(arraylen_$1$2(d + 1)); 27 | *($1$2_at **)*p= p_copy; 28 | write_$1$2(*p, d, x); 29 | } 30 | } 31 | 32 | int compar_$1$2(const void *x, const void *y) 33 | { 34 | $1$2_at *xx= ($1$2_at *) x; 35 | $1$2_at *yy= ($1$2_at *) y; 36 | 37 | if (*xx < *yy) return -1; 38 | if (*xx > *yy) return +1; 39 | return 0; 40 | } 41 | 42 | #endif /* TYPE_$1$2 != '-' */ 43 | -------------------------------------------------------------------------------- /c/lcc.h: -------------------------------------------------------------------------------- 1 | #ifndef LCC_H 2 | #define LCC_H 3 | 4 | #include "bits.h" 5 | #include "consts.h" 6 | #include "dijkstra.$.h" 7 | 8 | #if FORMAT_a != FORMAT_SYM 9 | # error "*** Network must be undirected" 10 | #endif 11 | 12 | /* Find the largest connected component (LCC) in R. Return a malloc'ed 13 | bit array with 1's for nodes included in the largest connected 14 | component. 15 | */ 16 | unsigned char *lcc_find(struct sgraph1_reader_$ *r); 17 | 18 | unsigned char *lcc_find(struct sgraph1_reader_$ *r) 19 | { 20 | const u$_ft n= r->h->n1; 21 | assert(n > 0); 22 | 23 | /* Size of bit arrays */ 24 | const size_t k= BITNSLOTS(n); 25 | 26 | /* Bit array of visited nodes */ 27 | unsigned char *const visited= calloc(k, 1); 28 | 29 | unsigned char *ret= NULL; 30 | u$_ft size_ret= 0; 31 | 32 | u$_ft *d= malloc(n * sizeof(u$_ft)); 33 | 34 | for (u$_ft u= 0; u < n; ++u) { 35 | 36 | while (u < n && BITTEST(visited, u)) { 37 | ++u; 38 | } 39 | 40 | if (u == n) { 41 | break; 42 | } 43 | 44 | unsigned char *ret_new; 45 | dijkstra_$(r, u, d, &ret_new); 46 | u$_ft size_ret_new= BITSCOUNT(ret_new, n); 47 | 48 | assert(ret_new != NULL); 49 | 50 | /* The found component contains at least U itself */ 51 | assert(size_ret_new > 0); 52 | assert(BITSET(ret_new, u)); 53 | 54 | BITSSET(visited, ret_new, n); 55 | 56 | if (size_ret_new > size_ret) { 57 | size_ret= size_ret_new; 58 | free(ret); 59 | ret= ret_new; 60 | 61 | /* If the connected component contains more than 62 | half of all nodes, there cannot be a larger one */ 63 | if (size_ret > n / 2) 64 | break; 65 | } else { 66 | free(ret_new); 67 | } 68 | } 69 | 70 | free(d); 71 | 72 | assert(ret != NULL); 73 | assert(size_ret > 0); 74 | assert(size_ret <= n); 75 | 76 | return ret; 77 | } 78 | 79 | #endif /* ! LCC_H */ 80 | -------------------------------------------------------------------------------- /c/lines.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Determine the numbe of lines of a network from its SG1 file. This is 3 | * trivial, and can be determined without the SG1 file in most cases. 4 | * This implementation is only used in cases where the SG1 file is the 5 | * primary version of the dataset (at the moment, only simple~[NETWORK] 6 | * networks). 7 | * 8 | * 9 | * STDOUT 10 | * The number of lines is printed to stdout. 11 | * 12 | * INVOCATION 13 | * 14 | * $0 INPUT-FILE LOGFILE 15 | */ 16 | 17 | #include "width.ma.h" 18 | #include "width.ua.h" 19 | #include "width.va.h" 20 | #include "width.wa.h" 21 | #include "width.ta.h" 22 | 23 | #include "sgraph1_io.a.h" 24 | 25 | int main(int argc, char **argv) 26 | { 27 | if (argc != 3) { 28 | fprintf(stderr, "*** Invalid number of parameters\n"); 29 | exit(1); 30 | } 31 | 32 | const char *const filename_sg1= argv[1]; 33 | 34 | struct sgraph1_reader_a r; 35 | 36 | if (0 > sgraph1_open_read_a(filename_sg1, &r, 0)) { 37 | exit(1); 38 | } 39 | 40 | /* No need for file advisories since we're only reading the 41 | header */ 42 | 43 | 44 | const ma_ft lines= r.h->m; 45 | 46 | printf("%" PR_fma "\n", lines); 47 | 48 | exit(0); 49 | } 50 | -------------------------------------------------------------------------------- /c/sgraph1_create.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Create an SG1 file from an SG0 file. 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | #ifndef NDEBUG 9 | # include 10 | #endif 11 | 12 | #include "width.ma.h" 13 | #include "width.ua.h" 14 | #include "width.va.h" 15 | #include "width.wa.h" 16 | #include "width.ta.h" 17 | 18 | #include "graph_width.u.a.h" 19 | #include "graph_width.v.a.h" 20 | #include "graph_width.w.a.h" 21 | #include "graph_width.t.a.h" 22 | 23 | #include "sgraph0_io.a.h" 24 | #include "graph.a.h" 25 | #include "sgraph1_write.a.h" 26 | #include "graph_read_sgraph0.a.h" 27 | 28 | /* 29 | * INVOCATION 30 | * 31 | * $0 INPUT-FILENAME OUTPUT-FILENAME LOGFILE 32 | * 33 | * The input file must be in SG0 format; the output file is in SG1 format. 34 | */ 35 | int main(int argc, char **argv) 36 | { 37 | #ifndef NDEBUG 38 | if (mcheck(NULL)) exit(1); 39 | #endif 40 | 41 | if (argc != 4) { 42 | fprintf(stderr, "*** Invalid number of arguments\n"); 43 | exit(1); 44 | } 45 | 46 | const char *filename_in= argv[1]; 47 | const char *filename_out= argv[2]; 48 | 49 | struct sgraph0_reader_a r; 50 | 51 | if (0 > sgraph0_open_read_a(filename_in, &r, COLS_ALL)) { 52 | exit(1); 53 | } 54 | 55 | if (0 > sgraph0_advise_a(&r, MADV_SEQUENTIAL)) { 56 | perror(filename_in); 57 | goto error_close; 58 | } 59 | 60 | struct graph_a g; 61 | 62 | graph_read_sgraph0_a(&g, &r); 63 | 64 | sgraph0_close_a(&r); 65 | 66 | graph_sort_a(&g); 67 | 68 | if (0 > sgraph1_write_a(&g, filename_out)) { 69 | exit(1); 70 | } 71 | 72 | exit(0); 73 | 74 | error_close: 75 | sgraph0_close_a(&r); 76 | exit(1); 77 | } 78 | -------------------------------------------------------------------------------- /c/sgraph1_create_lcc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Extract the largest connected component of a graph. Input and output 3 | * files are both SG1 files. 4 | */ 5 | 6 | #include "width.ma.h" 7 | #include "width.ua.h" 8 | #include "width.va.h" 9 | #include "width.wa.h" 10 | #include "width.ta.h" 11 | 12 | #include "sgraph1_io.a.h" 13 | #include "sgraph1_subgraph.a.h" 14 | #include "binary_heap.ua.h" 15 | #include "dijkstra.a.h" 16 | #include "lcc.a.h" 17 | 18 | #include "consts.h" 19 | 20 | #if FORMAT_a != FORMAT_SYM || WEIGHTS_a != WEIGHTS_UNWEIGHTED || LOOPS_a != 0 21 | # error "*** Only implemented for simple networks" 22 | #endif 23 | 24 | /* 25 | * INVOCATION 26 | * 27 | * $0 INPUT-FILE OUTPUTFILE LOGFILE 28 | */ 29 | int main(int argc, char **argv) 30 | { 31 | assert(argc == 4); 32 | (void) argc; 33 | const char *const filename_in= argv[1]; 34 | const char *const filename_out= argv[2]; 35 | 36 | struct sgraph1_reader_a r; 37 | 38 | if (0 > sgraph1_open_read_a(filename_in, &r, 2)) { 39 | exit(1); 40 | } 41 | 42 | if (0 > sgraph1_advise_a(&r, MADV_WILLNEED)) { 43 | perror(filename_in); 44 | exit(1); 45 | } 46 | 47 | unsigned char *lcc= lcc_find(&r); 48 | 49 | if (0 > sgraph1_subgraph_a(&r, lcc, filename_out)) { 50 | exit(1); 51 | } 52 | 53 | exit(0); 54 | } 55 | 56 | -------------------------------------------------------------------------------- /c/sgraph1_create_simple.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Create an Sg1 file from an Sg0 file, transforming any network in a 3 | * simple SYM network. 4 | */ 5 | 6 | /* 7 | * The passes M/U/V/W/T are those of the original graph. Additional 8 | * parameters for the generated graph: 9 | * 10 | * MX, UX, VX, WX, TX, 11 | */ 12 | 13 | #include 14 | #include 15 | 16 | #include "width.ma.h" 17 | #include "width.ua.h" 18 | #include "width.va.h" 19 | #include "width.wa.h" 20 | #include "width.ta.h" 21 | 22 | #include "width.mb.h" 23 | #include "width.ub.h" 24 | #include "width.vb.h" 25 | #include "width.wb.h" 26 | #include "width.tb.h" 27 | 28 | #include "graph_width.u.b.h" 29 | #include "graph_width.v.b.h" 30 | 31 | #include "sgraph0_io.a.h" 32 | #include "graph.b.h" 33 | #include "graph_simple.a.b.h" 34 | #include "sgraph1_write.b.h" 35 | 36 | /* 37 | * INVOCATION 38 | * 39 | * $0 INPUT-FILE OUTPUT-FILE LOGFILE 40 | * 41 | * The input file must be in SG0 format; the output file is in sg1 format. 42 | */ 43 | int main(int argc, char **argv) 44 | { 45 | if (argc != 4) { 46 | fprintf(stderr, "*** Invalid number of arguments\n"); 47 | exit(1); 48 | } 49 | 50 | const char *const filename_in= argv[1]; 51 | const char *const filename_out= argv[2]; 52 | 53 | struct sgraph0_reader_a r; 54 | 55 | if (0 > sgraph0_open_read_a(filename_in, &r, COLS_ALL)) { 56 | exit(1); 57 | } 58 | 59 | if (0 > sgraph0_advise_a(&r, MADV_SEQUENTIAL)) { 60 | perror(filename_in); 61 | goto error_close; 62 | } 63 | 64 | struct graph_b g; 65 | 66 | graph_read_sg0_simple_a_b(&g, &r); 67 | 68 | sgraph0_close_a(&r); 69 | 70 | graph_sort_b(&g); 71 | graph_unique_b(&g); 72 | 73 | if (0 > sgraph1_write_b(&g, filename_out)) { 74 | exit(1); 75 | } 76 | 77 | exit(0); 78 | 79 | error_close: 80 | sgraph0_close_a(&r); 81 | exit(1); 82 | } 83 | -------------------------------------------------------------------------------- /c/sgraph1_dump.c: -------------------------------------------------------------------------------- 1 | 2 | /* Dump an SG1 file to an OUT file. 3 | * 4 | * INVOCATION 5 | * 6 | * $0 SG1-FILE OUT-FILE 7 | */ 8 | 9 | #include "width.ma.h" 10 | #include "width.ua.h" 11 | #include "width.va.h" 12 | #include "width.wa.h" 13 | #include "width.ta.h" 14 | 15 | #include "sgraph1_io.a.h" 16 | 17 | #include 18 | 19 | int main(int argc, char **argv) 20 | { 21 | /* Not implemented yet */ 22 | assert(TYPE_wa == '-' && TYPE_ta == '-'); 23 | 24 | if (argc != 3) { 25 | fprintf(stderr, "*** wrong number of arguments/n"); 26 | exit(1); 27 | } 28 | 29 | const char *const filename_sg1= argv[1]; 30 | const char *const filename_out= argv[2]; 31 | 32 | FILE *out= fopen(filename_out, "w"); 33 | if (out == NULL) { 34 | perror(filename_out); 35 | exit(1); 36 | } 37 | 38 | struct sgraph1_reader_a r; 39 | 40 | if (0 > sgraph1_open_read_a(filename_sg1, &r, 2)) { 41 | exit(1); 42 | } 43 | 44 | if (0 > sgraph1_advise_a(&r, MADV_SEQUENTIAL)) { 45 | perror(filename_sg1); 46 | exit(1); 47 | } 48 | 49 | assert(r.h->format == FORMAT_SYM); 50 | 51 | for (ua_ft u= 0; u < r.h->n1; ++u) { 52 | 53 | ma_ft end= (u == r.h->n1 - 1) ? r.h->m : read_ma(r.adj_to, u + 1); 54 | for (ma_ft i= read_ma(r.adj_to, u); i < end; ++i) { 55 | va_ft v= read_va(r.to, i); 56 | fprintf(out, "%" PR_fua "\t%" PR_fva "\n", u + 1, v + 1); 57 | } 58 | } 59 | 60 | if (0 > fclose(out)) { 61 | perror(filename_out); 62 | exit(1); 63 | } 64 | 65 | exit(0); 66 | } 67 | -------------------------------------------------------------------------------- /c/size.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Determine the size of a network from its SG1 file. This is 3 | * trivial, and can be determined without the SG1 file in most cases. 4 | * This implementation is only used in cases where the SG1 file is the 5 | * primary version of the dataset (at the moment, only simple~[NETWORK] 6 | * networks). 7 | * 8 | * The output follows the substatistics described in 9 | * 'konect-toolbox/konect_statistic_size.m'. 10 | * 11 | * The size is printed to stdout. 12 | * 13 | * INVOCATION 14 | * 15 | * $0 INPUT-FILE LOGFILE 16 | */ 17 | 18 | #include "width.ma.h" 19 | #include "width.ua.h" 20 | #include "width.va.h" 21 | #include "width.wa.h" 22 | #include "width.ta.h" 23 | 24 | #include "sgraph1_io.a.h" 25 | 26 | int main(int argc, char **argv) 27 | { 28 | if (argc != 3) { 29 | fprintf(stderr, "*** Invalid number of parameters\n"); 30 | exit(1); 31 | } 32 | 33 | const char *const filename_sg1= argv[1]; 34 | 35 | struct sgraph1_reader_a r; 36 | 37 | if (0 > sgraph1_open_read_a(filename_sg1, &r, 0)) { 38 | exit(1); 39 | } 40 | 41 | /* No need for file advisories since we're only reading the 42 | * header. */ 43 | 44 | if (r.h->format == FORMAT_SYM || r.h->format == FORMAT_ASYM) { 45 | assert(r.h->n1 == r.h->n2); 46 | const ua_ft n= r.h->n1; 47 | printf("%" PR_fua "\n", n); 48 | } else if (r.h->format == FORMAT_BIP) { 49 | const ua_ft n1= r.h->n1; 50 | const va_ft n2= r.h->n2; 51 | const uintmax_t n= n1 + n2; 52 | printf("%" PRIuMAX "\n" 53 | "%" PR_fua "\n" 54 | "%" PR_fva "\n", 55 | n, n1, n2); 56 | } else 57 | assert(0); 58 | 59 | exit(0); 60 | } 61 | 62 | -------------------------------------------------------------------------------- /c/statistic_mediandegree.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Determine the median degree. 3 | * 4 | * INVOCATION 5 | * $0 $SG1_FILENAME LOGFILE 6 | * 7 | * INPUT FILES 8 | * $SG1_FILENAME 9 | * 10 | * STDOUT: The full statistics; one per line, as specified in 11 | * konect-toolbox/m/konect_statistic_mediandegree.m 12 | */ 13 | 14 | #include "width.ma.h" 15 | #include "width.ua.h" 16 | #include "width.va.h" 17 | #include "width.wa.h" 18 | #include "width.ta.h" 19 | 20 | #include "consts.h" 21 | #include "sgraph1_io.a.h" 22 | 23 | #if FORMAT_a == FORMAT_SYM 24 | ma_at *d_sym; 25 | #elif FORMAT_a == FORMAT_BIP || FORMAT_a == FORMAT_ASYM 26 | ...; 27 | #else 28 | # "*** Invalid FORMAT" 29 | #endif 30 | 31 | ma_ft nth_element(ma_at *const p, ma_first 32 | 33 | int main(int argc, char **argv) 34 | { 35 | if (argc != 3) { 36 | fprintf(stderr, "*** Expected exactly two arguments\n"); 37 | exit(1); 38 | } 39 | 40 | const char *filename_sg1= argv[1]; 41 | 42 | struct sgraph1_reader_a r; 43 | if (0 != sgraph1_open_read_a(filename_sg1, &r, 44 | #if FORMAT_a == FORMAT_sym 45 | 1 46 | #else FORMAT_a == FORMAT_bip || FORMAT_a == FORMAT_asym 47 | 2 48 | #else 49 | # error "*** Invalid FORMAT_a" 50 | #endif 51 | )) 52 | exit(1); 53 | 54 | if (0 > sgraph1_advise_a(&r, MADV_SEQUENTIAL)) { 55 | perror(filename_sg1); 56 | exit(1); 57 | } 58 | 59 | #if FORMAT_a == FORMAT_sym 60 | 61 | d_sym= calloc(arraylen_ma(r->h->n1), 1); 62 | if (!d_sym) { 63 | perror("calloc"); 64 | exit(1); 65 | } 66 | 67 | for (ua_ft u= 0; u + 1 < (ua_ft) r.h->n1; ++u) { 68 | const ma_ft deg_u= read_ma(r.adj_to, u + 1) - read_ma(r.adj_to, u); 69 | writeonzero_a(d_sym, u, deg_u); 70 | } 71 | const ma_ft deg_u_last= r.len_m - read_ma(r.adj_to, r.h->n1 - 1); 72 | writeonzero_a(d_sym, r.h->n1-1, deg_u_last); 73 | 74 | ma_ft median= nth_element_(d_sym, 0, r.h->n1 / 2, r.h->n1); 75 | 76 | printf("%" PR_fma "\n", median); 77 | 78 | #elif FORMAT_a == FORMAT_asym || FORMAT_a == FORMAT_BIP 79 | ...; 80 | #else 81 | # error "*** Invalid FORMAT_a" 82 | #endif 83 | 84 | if (ferror(stdout)) { 85 | perror("stdout"); 86 | exit(1); 87 | } 88 | 89 | exit(0); 90 | } 91 | -------------------------------------------------------------------------------- /c/statistic_twostars.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Compute the number of wedges. 3 | * 4 | * INVOCATION 5 | * 6 | * $0 FT-DEGREE-FILE LOGFILE 7 | * 8 | * The statistics are written to stdout. 9 | */ 10 | 11 | #include 12 | 13 | #include "width.ma.h" 14 | #include "width.ua.h" 15 | #include "width.va.h" 16 | #include "width.fa.h" 17 | 18 | #include "feature.a.h" 19 | 20 | #include "consts.h" 21 | 22 | int main(int argc, char **argv) 23 | { 24 | if (argc != 3) { 25 | fprintf(stderr, "*** Invalid number of arguments\n"); 26 | exit(1); 27 | } 28 | 29 | const char *const filename= argv[1]; 30 | 31 | struct feature_a f; 32 | 33 | if (0 > feature_open_read_a(filename, &f)) { 34 | perror(filename); 35 | exit(1); 36 | } 37 | 38 | if (0 > feature_advise_a(&f, MADV_SEQUENTIAL)) { 39 | perror(filename); 40 | exit(1); 41 | } 42 | 43 | /* 44 | * Total 45 | */ 46 | 47 | uintmax_t s= 0; 48 | 49 | #if FORMAT_a == FORMAT_SYM 50 | 51 | for (ua_ft u= 0; u < f.h->n1; ++u) { 52 | const fa_ft d= read_fa(f.f1, u); 53 | s += (d * (d - 1) / 2); 54 | } 55 | 56 | #elif FORMAT_a == FORMAT_ASYM 57 | 58 | for (ua_ft u= 0; u < f.h->n1; ++u) { 59 | const fa_ft d_out= read_fa(f.f1, u); 60 | const fa_ft d_in= read_fa(f.f2, u); 61 | const fa_ft d= d_out + d_in; 62 | s += (d * (d - 1) / 2); 63 | } 64 | 65 | #elif FORMAT_a == FORMAT_BIP 66 | 67 | for (ua_ft u= 0; u < f.h->n1; ++u) { 68 | const fa_ft d= read_fa(f.f1, u); 69 | s += (d * (d - 1) / 2); 70 | } 71 | 72 | for (va_ft v= 0; v < f.h->n2; ++v) { 73 | const fa_ft d= read_fa(f.f2, v); 74 | s += (d * (d - 1) / 2); 75 | } 76 | 77 | #else 78 | # error "*** Invalid format" 79 | #endif 80 | 81 | printf("%" PRIuMAX "\n", s); 82 | 83 | /* 84 | * Left and right 85 | */ 86 | 87 | #if FORMAT_a == FORMAT_BIP || FORMAT_a == FORMAT_ASYM 88 | 89 | uintmax_t s1= 0; 90 | for (ua_ft u= 0; u < f.h->n1; ++u) { 91 | const fa_ft d= read_fa(f.f1, u); 92 | s1 += (d * (d - 1) / 2); 93 | } 94 | 95 | printf("%" PRIuMAX "\n", s1); 96 | 97 | uintmax_t s2= 0; 98 | for (va_ft v= 0; v < f.h->n2; ++v) { 99 | const fa_ft d= read_fa(f.f2, v); 100 | s2 += (d * (d - 1) / 2); 101 | } 102 | 103 | printf("%" PRIuMAX "\n", s2); 104 | 105 | #endif 106 | 107 | exit(0); 108 | } 109 | -------------------------------------------------------------------------------- /c/volume.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Determine the volume of a network from its SG1 file. This is 3 | * trivial, and can be determined without the SG1 file in most cases. 4 | * This implementation is only used in cases where the SG1 file is the 5 | * primary version of the dataset (at the moment, only simple~[NETWORK] 6 | * networks). 7 | * 8 | * The volume is printed to stdout. 9 | * 10 | * The input file must not be a POSITIVE network with weight column. 11 | * 12 | * INVOCATION 13 | * 14 | * $0 INPUT-FILE LOGFILE 15 | */ 16 | 17 | #include "width__m__h" 18 | #include "width__u__h" 19 | #include "width__v__h" 20 | #include "width__w__h" 21 | #include "width__t__h" 22 | 23 | #include "sgraph1_io.h" 24 | 25 | #if FORMAT_a == FORMAT_POSITIVE && TYPE_wa != '-' 26 | # error "the combination of POSITIVE with a weight column is not supported" 27 | #endif 28 | 29 | int main(int argc, char **argv) 30 | { 31 | if (argc != 3) { 32 | fprintf(stderr, "*** Invalid number of parameters\n"); 33 | exit(1); 34 | } 35 | 36 | const char *const filename_sg1= argv[1]; 37 | 38 | struct sgraph1_reader r; 39 | 40 | if (0 > sgraph1_open_read(filename_sg1, &r, 0)) { 41 | exit(1); 42 | } 43 | 44 | /* No need for file advisories since we're only reading the 45 | header */ 46 | 47 | const m_ft m= r.h->m; 48 | 49 | printf("%" PR_fm "\n", m); 50 | 51 | exit(0); 52 | } 53 | -------------------------------------------------------------------------------- /c/widthhelper.h: -------------------------------------------------------------------------------- 1 | #ifndef WIDTHHELPER_H 2 | #define WIDTHHELPER_H 3 | 4 | #define CLASS_UNSIGNED 0 5 | #define CLASS_SIGNED 1 6 | #define CLASS_FLOAT 2 7 | 8 | #define CONCATx2(x, y) x ## y 9 | #define CONCATx3(x, y, z) x ## y ## z 10 | #define CONCAT2(x, y) CONCATx2(x, y) 11 | #define CONCAT3(x, y, z) CONCATx3(x, y, z) 12 | 13 | #endif /* ! WIDTHHELPER */ 14 | 15 | -------------------------------------------------------------------------------- /jl/inter.jl: -------------------------------------------------------------------------------- 1 | # 2 | # Plot inter-event distributions. 3 | # 4 | # This is the first Julia code in KONECT, and therefore can serve as an 5 | # example of how to do it. 6 | # 7 | # PARAMETERS 8 | # $network Network name 9 | # 10 | # INPUT FILES 11 | # dat/out2.$network 12 | # 13 | # OUTPUT FILES 14 | # plot/inter.$type.$network.png 15 | # $type: 16 | # a Overall distribution, log-log 17 | # al Overall distribution, lin-log 18 | # 19 | 20 | using PyPlot; 21 | 22 | include("step_full.jl"); 23 | 24 | network = ENV["network"]; 25 | 26 | T = readdlm("dat/out2.$network", '\t'); 27 | 28 | t = T[:,4]; 29 | sort!(t); 30 | 31 | d = t[2:end] - t[1:end-1]; 32 | d = d[d .!= 0]; 33 | 34 | fig = figure("Title", figsize=(5,3.7)); 35 | 36 | step_full(d); 37 | 38 | xlabel("Inter-event time (t) [s]"); 39 | ylabel("P(x ≥ t)"); 40 | tight_layout(); 41 | 42 | # Day line 43 | axvline(60 * 60 * 24, linestyle = "--", linewidth = 0.5, color = "k"); 44 | 45 | savefig("plot/inter.a.$network.png"); 46 | 47 | xscale("linear"); 48 | 49 | savefig("plot/inter.al.$network.png"); 50 | 51 | -------------------------------------------------------------------------------- /jl/inter2.jl: -------------------------------------------------------------------------------- 1 | # 2 | # Plot node-level interevent time distributions. 3 | # 4 | # PARAMETERS 5 | # $network 6 | # 7 | # INPUT FILES 8 | # dat/out2.$network 9 | # dat/statistic.format.$network 10 | # 11 | # OUTPUT FILES 12 | # plot/inter2.{auv}{,l}.$network.png 13 | # 14 | 15 | using PyPlot; 16 | 17 | include("read_statistic.jl"); 18 | include("konect_consts.jl"); 19 | include("step_full.jl"); 20 | 21 | network = ENV["network"]; 22 | 23 | format = read_statistic("format", network)[1]; 24 | 25 | T = readdlm("dat/out2.$network", '\t'); 26 | 27 | # 28 | # c Character 29 | # x Values 30 | # t Timestamps 31 | # 32 | function inter2_one(c, network, x, t) 33 | 34 | d = []; 35 | for i in unique(x) 36 | ## x_i = x[x .== i]; 37 | t_i = t[x .== i]; 38 | sort!(t_i); 39 | d_i = t_i[2:end] - t_i[1:end-1]; 40 | d = [d ; d_i]; 41 | end 42 | d = d[d .!= 0 ]; 43 | 44 | println("inter2: $network $c length(d) = $(length(d))"); 45 | 46 | close(); 47 | 48 | fig = figure("Title", figsize=(5,3.7)); 49 | 50 | step_full(d); 51 | 52 | xlabel("Inter-event time (t) [s]"); 53 | ylabel("P(x ≥ t)"); 54 | 55 | tight_layout(); 56 | 57 | # Day line 58 | axvline(60 * 60 * 24, linestyle = "--", linewidth = 0.5, color = "k"); 59 | 60 | savefig("plot/inter2.$c.$network.png"); 61 | xscale("linear"); 62 | c2= string(c, "l"); 63 | savefig("plot/inter2.$c2.$network.png"); 64 | end 65 | 66 | if format == KONECT_BIP 67 | inter2_one('u', network, T[:,1], T[:,4]); 68 | inter2_one('v', network, T[:,2], T[:,4]); 69 | inter2_one('a', network, [T[:,1]; T[:,2] + maximum(T[:,1])], [T[:,4]; T[:,4]]); 70 | elseif format == KONECT_SYM 71 | inter2_one('a', network, [T[:,1]; T[:,2]], [T[:,4]; T[:,4]]); 72 | elseif format == KONECT_ASYM 73 | inter2_one('u', network, T[:,1], T[:,4]); 74 | inter2_one('v', network, T[:,2], T[:,4]); 75 | inter2_one('a', network, [T[:,1]; T[:,2]], [T[:,4]; T[:,4]]); 76 | else 77 | @assert false 78 | end 79 | 80 | -------------------------------------------------------------------------------- /jl/konect_consts.jl: -------------------------------------------------------------------------------- 1 | KONECT_SYM = 1; 2 | KONECT_ASYM = 2; 3 | KONECT_BIP = 3; 4 | 5 | 6 | -------------------------------------------------------------------------------- /jl/read_statistic.jl: -------------------------------------------------------------------------------- 1 | # 2 | # Read a statistic from a file. 3 | # 4 | # PARAMETERS 5 | # statistic Statistic name 6 | # network Network name 7 | # 8 | # INPUT FILES 9 | # dat/statistic.$statistic.$network 10 | # 11 | function read_statistic(statistic, network) 12 | 13 | data = readdlm("dat/statistic.$statistic.$network"); 14 | 15 | return data; 16 | end 17 | 18 | -------------------------------------------------------------------------------- /jl/step_full.jl: -------------------------------------------------------------------------------- 1 | # 2 | # Draw a "full" step plot. 3 | # 4 | # PARAMETERS 5 | # 6 | # x (n) Values 7 | # 8 | 9 | function step_full(x) 10 | 11 | n = length(x); 12 | 13 | sort!(x); 14 | 15 | step(x, 16 | (n:-1:1) / n, 17 | linestyle="-"); 18 | 19 | # If the Y axis was not logarithmic, we show also add the point 20 | # (x_sorted[end], 0) to the plot, making x_sorted[end] be there 21 | # twice. 22 | 23 | xscale("log"); 24 | yscale("log"); 25 | 26 | end 27 | 28 | -------------------------------------------------------------------------------- /julia: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Wrapper for running Julia scripts. 4 | # 5 | 6 | TMPDIR=${TMPDIR-/tmp} 7 | 8 | log=$TMPDIR/jl 9 | 10 | name=$(basename "$1" | sed -E -e 's,\.jl$,,') 11 | 12 | log=$log.$name 13 | 14 | [ "$network" ] && log=$log.$network 15 | 16 | log=$log.log 17 | 18 | printf >&2 '\t%s\n' "$log" 19 | 20 | if ! julia "$1" "$log" >"$log" 2>&1 ; then 21 | echo >&2 "*** Error in $log" 22 | exit 1 23 | fi 24 | 25 | exit 0 26 | -------------------------------------------------------------------------------- /ktop: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Analogue to top(1) that shows KONECT processes. 4 | # 5 | 6 | set -e 7 | 8 | while : ; do 9 | clear 10 | ./status 11 | sleep 90 12 | done 13 | -------------------------------------------------------------------------------- /lib/.gitignore: -------------------------------------------------------------------------------- 1 | gplot2.m 2 | plfit.m 3 | zeta.m 4 | matlab_bgl 5 | 6 | -------------------------------------------------------------------------------- /lib/JULIA: -------------------------------------------------------------------------------- 1 | We're experimenting with replacing Matlab by Julia. 2 | 3 | See jl/inter for an example script, and the @inter target in the Stu 4 | file. 5 | 6 | We use PyPlot for plotting, which is based on Matplotlib. 7 | 8 | 9 | How to set up Julia 10 | =================== 11 | 12 | * apt-get install julia 13 | * apt-get install python-matplotlib 14 | 15 | From within Julia (once, to perform system-wide setup): 16 | * Pkg.add("PyPlot") 17 | -------------------------------------------------------------------------------- /lib/README: -------------------------------------------------------------------------------- 1 | lib/ contains third-party libraries needed for analysis. 2 | 3 | See the subdirectories for license and copyright information. The 4 | libraries described in the following are *not* part of this distribution 5 | and must be installed by hand. 6 | 7 | Matlab-BGL 8 | ========= 9 | 10 | http://www.stanford.edu/~dgleich/programs/matlab_bgl/ 11 | 12 | Installation: 13 | * Download version 4.x from website 14 | * unzip, such that the directory konect-analysis/lib/matlab_bgl/ is present 15 | 16 | WAFO 17 | ==== 18 | 19 | Needed to fit the generalized Gamma distribution. 20 | 21 | We used version 25. (i.e., the file wafo25.7z) 22 | 23 | Gplot2 24 | ====== 25 | 26 | Get the file gplot2.m from the following URL and put in into lib/ 27 | 28 | http://www.mathworks.com/matlabcentral/fileexchange/10342-gplot-enhanced/content/gplot2.m 29 | 30 | PLfit 31 | ===== 32 | 33 | We need the files plfit.m, zeta.m from 34 | 35 | http://tuvalu.santafe.edu/~aaronc/powerlaws/ 36 | 37 | Julia 38 | ===== 39 | 40 | See lib/JULIA 41 | -------------------------------------------------------------------------------- /lib/gridxy/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, Jos van der Geest 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in 12 | the documentation and/or other materials provided with the distribution 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 18 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /lib/hsl2rgb.m: -------------------------------------------------------------------------------- 1 | function rgb=hsl2rgb(hsl_in) 2 | %Converts Hue-Saturation-Luminance Color value to Red-Green-Blue Color value 3 | % 4 | %Usage 5 | % RGB = hsl2rgb(HSL) 6 | % 7 | % converts HSL, a M [x N] x 3 color matrix with values between 0 and 1 8 | % into RGB, a M [x N] X 3 color matrix with values between 0 and 1 9 | % 10 | %See also rgb2hsl, rgb2hsv, hsv2rgb 11 | 12 | % (C) Vladimir Bychkovsky, June 2008 13 | % written using: 14 | % - an implementation by Suresh E Joel, April 26,2003 15 | % - Wikipedia: http://en.wikipedia.org/wiki/HSL_and_HSV 16 | 17 | hsl=reshape(hsl_in, [], 3); 18 | 19 | H=hsl(:,1); 20 | S=hsl(:,2); 21 | L=hsl(:,3); 22 | 23 | lowLidx=L < (1/2); 24 | q=(L .* (1+S) ).*lowLidx + (L+S-(L.*S)).*(~lowLidx); 25 | p=2*L - q; 26 | hk=H; % this is already divided by 360 27 | 28 | t=zeros([length(H), 3]); % 1=R, 2=B, 3=G 29 | t(:,1)=hk+1/3; 30 | t(:,2)=hk; 31 | t(:,3)=hk-1/3; 32 | 33 | underidx=t < 0; 34 | overidx=t > 1; 35 | t=t+underidx - overidx; 36 | 37 | range1=t < (1/6); 38 | range2=(t >= (1/6) & t < (1/2)); 39 | range3=(t >= (1/2) & t < (2/3)); 40 | range4= t >= (2/3); 41 | 42 | % replicate matricies (one per color) to make the final expression simpler 43 | P=repmat(p, [1,3]); 44 | Q=repmat(q, [1,3]); 45 | rgb_c= (P + ((Q-P).*6.*t)).*range1 + ... 46 | Q.*range2 + ... 47 | (P + ((Q-P).*6.*(2/3 - t))).*range3 + ... 48 | P.*range4; 49 | 50 | rgb_c=round(rgb_c.*10000)./10000; 51 | rgb=reshape(rgb_c, size(hsl_in)); -------------------------------------------------------------------------------- /lib/octave/README: -------------------------------------------------------------------------------- 1 | This directory contains implementations of functions that are in Matlab 2 | but not in Octave. This directory is only added to the path when running 3 | Octave. 4 | 5 | The files here have the same license and authors as the main part of 6 | KONECT-Analysis. 7 | -------------------------------------------------------------------------------- /lib/octave/corr.m: -------------------------------------------------------------------------------- 1 | 2 | function ret = corr(a, b) 3 | 4 | if nargin == 2 5 | 6 | ret = corrcoef(a, b); 7 | 8 | elseif nargin == 4 9 | 10 | ret = corrcoef(a, b, c, d); 11 | 12 | else 13 | 14 | error('*** unsupported case'); 15 | 16 | end 17 | 18 | -------------------------------------------------------------------------------- /lib/rgb2hsl.m: -------------------------------------------------------------------------------- 1 | function hsl=rgb2hsl(rgb_in) 2 | %Converts Red-Green-Blue Color value to Hue-Saturation-Luminance Color value 3 | % 4 | %Usage 5 | % HSL = rgb2hsl(RGB) 6 | % 7 | % converts RGB, a M [x N] x 3 color matrix with values between 0 and 1 8 | % into HSL, a M [x N] X 3 color matrix with values between 0 and 1 9 | % 10 | %See also hsl2rgb, rgb2hsv, hsv2rgb 11 | 12 | % (C) Vladimir Bychkovsky, June 2008 13 | % written using: 14 | % - an implementation by Suresh E Joel, April 26,2003 15 | % - Wikipedia: http://en.wikipedia.org/wiki/HSL_and_HSV 16 | 17 | rgb=reshape(rgb_in, [], 3); 18 | 19 | mx=max(rgb,[],2);%max of the 3 colors 20 | mn=min(rgb,[],2);%min of the 3 colors 21 | 22 | L=(mx+mn)/2;%luminance is half of max value + min value 23 | S=zeros(size(L)); 24 | 25 | % this set of matrix operations can probably be done as an addition... 26 | zeroidx= (mx==mn); 27 | S(zeroidx)=0; 28 | 29 | lowlidx=L <= 0.5; 30 | calc=(mx-mn)./(mx+mn); 31 | idx=lowlidx & (~ zeroidx); 32 | S(idx)=calc(idx); 33 | 34 | hilidx=L > 0.5; 35 | calc=(mx-mn)./(2-(mx+mn)); 36 | idx=hilidx & (~ zeroidx); 37 | S(idx)=calc(idx); 38 | 39 | hsv=rgb2hsv(rgb); 40 | H=hsv(:,1); 41 | 42 | hsl=[H, S, L]; 43 | 44 | hsl=round(hsl.*100000)./100000; 45 | hsl=reshape(hsl, size(rgb_in)); -------------------------------------------------------------------------------- /m/approximation.m: -------------------------------------------------------------------------------- 1 | % 2 | % Goodness of fit of all matrix decompositions in function of rank. 3 | % 4 | % This test uses full matrices, so will only work for very small networks. 5 | % 6 | % PARAMETERS 7 | % $NETWORK 8 | % 9 | % OUTPUT 10 | % dat/approximation.$NETWORK.mat 11 | % functions Cell array of function names 12 | % prec RMSE values 13 | % .(function).values(i_decomposition, r) 14 | % decompositions 15 | % names_decompositions 16 | % 17 | % INPUT 18 | % dat/data.$NETWORK.mat 19 | % dat/info.$NETWORK 20 | % dat/meansi.$NETWORK.mat 21 | % 22 | 23 | r_max = 25; % Maximal rank 24 | opts.disp = 2; 25 | 26 | decompositions = { 'svd', 'diag', 'dedicom1u', 'dedicom1v', 'dedicom2', 'dedicom3' }; 27 | functions = { 'e1', 'e2', 'a' }; 28 | 29 | network = getenv('NETWORK'); 30 | 31 | consts = konect_consts(); 32 | 33 | info = read_info(network); 34 | data = load(sprintf('dat/data.%s.mat', network)); 35 | means = load(sprintf('dat/meansi.%s.mat', network)); 36 | 37 | T = konct_normalize_additively(data.T, means); 38 | a = konect_spconvert(T, info.m, info.n); 39 | 40 | fprintf(1, 'Computing exponential 1...\n'); 41 | a_1 = expm(0.1 * a); 42 | fprintf(1, 'Computing exponential 2...\n'); 43 | a_2 = expm(0.03 * a); 44 | fprintf(1, 'Done.\n'); 45 | 46 | names_decompositions = []; 47 | 48 | labels_method = get_labels_method(); 49 | 50 | prec = struct(); 51 | 52 | a1 = T(:,1); 53 | a2 = T(:,2); 54 | if size(T,2) >= 3 55 | a3 = T(:,3); 56 | else 57 | a3 = ones(size(T,1), 1); 58 | end 59 | 60 | for i = 1 : length(decompositions) 61 | 62 | decomposition = decompositions{i} 63 | 64 | names_decompositions = [ names_decompositions ; cellstr(labels_method.(regexprep(decomposition, '-', '_'))) ]; 65 | 66 | for r = 1 : r_max 67 | 68 | r 69 | 70 | [u d v] = konect_decomposition(decomposition, a, r, info.format, info.weights, opts); 71 | 72 | for j = 1 : length(functions) 73 | f = functions{j}; 74 | 75 | if strcmp(f, 'a') 76 | value = rmse_latent(a1, a2, a3, u, d, v); 77 | elseif strcmp(f, 'e1') 78 | value = rmse_full(a_1, u, expm(0.1 * d), v); 79 | elseif strcmp(f, 'e2') 80 | value = rmse_full(a_2, u, expm(0.2 * d), v); 81 | end 82 | 83 | prec.(f).values(i,r) = value; 84 | end 85 | end 86 | 87 | end 88 | 89 | save(sprintf('dat/approximation.%s.mat', network), '-v7.3', 'functions', 'prec', 'decompositions', 'names_decompositions'); 90 | -------------------------------------------------------------------------------- /m/approximation_plot.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot matrix approximations. 3 | % 4 | % INPUT 5 | % dat/approximation.$NETWORK.mat 6 | % 7 | % OUTPUT 8 | % plot/approximation.$FUNCTION.$NETWORK.eps 9 | % For all functions as given in the data file 10 | % 11 | 12 | network = getenv('NETWORK'); 13 | 14 | data = load(sprintf('dat/approximation.%s.mat', network)); 15 | 16 | [colors line_styles markers] = styles_method(); 17 | 18 | for j = 1 : length(data.functions) 19 | 20 | f = data.functions{j} 21 | 22 | hold on; 23 | for i = 1 : size(data.prec.(f).values, 1) 24 | decomposition = data.decompositions{i}; 25 | if strcmp(decomposition, 'svd'), continue; end; 26 | plot(1 : size(data.prec.(f).values, 2), data.prec.(f).values(i, :), ... 27 | 'LineStyle', line_styles.(decomposition), 'Marker', markers.(decomposition), 'Color', colors.(decomposition), ... 28 | 'LineWidth', 3); 29 | end 30 | 31 | legend(data.names_decompositions, 'Location', 'EastOutside'); 32 | 33 | xlabel('Decomposition rank (r)', 'FontSize', 16); 34 | ylabel('Root mean squared error (RMSE)', 'FontSize', 16); 35 | 36 | set(gca, 'FontSize', 16); 37 | 38 | ax = axis(); 39 | ax(4) = max(ax(4), 2.15e-4); 40 | axis(ax); 41 | 42 | konect_print(sprintf('plot/approximation.%s.%s.eps', f, network)); 43 | end 44 | -------------------------------------------------------------------------------- /m/assortativity.m: -------------------------------------------------------------------------------- 1 | % 2 | % Generate assortativity plots. 3 | % 4 | % PARAMETERS 5 | % $network Name of the network 6 | % 7 | % INPUT 8 | % dat/data.$network.mat 9 | % dat/info.$network 10 | % 11 | % OUTPUT 12 | % plot/assortativity.[auv].$network.eps 13 | % 14 | 15 | consts = konect_consts(); 16 | 17 | network = getenv('network'); 18 | 19 | data = load(sprintf('dat/data.%s.mat', network)); 20 | info = read_info(network); 21 | 22 | if info.weights == consts.POSITIVE & size(data.T, 2) >= 3 23 | w = data.T(:,3); 24 | else 25 | w = 1; 26 | end 27 | 28 | d_1 = sparse(data.T(:,1), 1, w, info.n1, 1); 29 | d_2 = sparse(data.T(:,2), 1, w, info.n2, 1); 30 | 31 | A = sparse(data.T(:,1), data.T(:,2), w, info.n1, info.n2); 32 | 33 | if info.format == consts.ASYM 34 | 35 | assortativity_one(d_1, d_2, A, 'u', 'outdegree'); 36 | konect_print(sprintf('plot/assortativity.u.%s.eps', network)); 37 | 38 | assortativity_one(d_2, d_1, A', 'v', 'indegree'); 39 | konect_print(sprintf('plot/assortativity.v.%s.eps', network)); 40 | 41 | elseif info.format == consts.BIP 42 | 43 | assortativity_one(d_1, d_2, A, 'u', 'left degree'); 44 | konect_print(sprintf('plot/assortativity.u.%s.eps', network)); 45 | 46 | assortativity_one(d_2, d_1, A', 'v', 'right degree'); 47 | konect_print(sprintf('plot/assortativity.v.%s.eps', network)); 48 | 49 | end 50 | 51 | if info.format == consts.BIP 52 | 53 | dd = [ d_1 ; d_2 ]; 54 | 55 | assortativity_one(dd, dd, ... 56 | [ sparse(info.n1, info.n1) A ; A' sparse(info.n2, info.n2) ], ... 57 | 'a', 'degree'); 58 | konect_print(sprintf('plot/assortativity.a.%s.eps', network)); 59 | 60 | else 61 | 62 | dd = d_1 + d_2; 63 | assortativity_one(dd, dd, A + A', 'a', 'degree'); 64 | konect_print(sprintf('plot/assortativity.a.%s.eps', network)); 65 | 66 | end 67 | -------------------------------------------------------------------------------- /m/assortativity_one.m: -------------------------------------------------------------------------------- 1 | % 2 | % Generate an assortativity plot. 3 | % 4 | % PARAMETERS 5 | % d (n*1) Degree vector 6 | % A (n*m) Unweighted adjacency / biadjacency matrix; does 7 | % not contain edge weights, but may contain other 8 | % values than 0/1 when there are multiple edges 9 | % letter Determines the color 10 | % text (string) The word "degree" or similar in lower case 11 | % 12 | 13 | function assortativity_one(d, d2, A, letter, text) 14 | 15 | colors = konect_colors_letter(); 16 | 17 | font_size = 22; 18 | 19 | q = A * d2; 20 | e = q ./ d; 21 | 22 | assert(text(1) >= 'a' && text(1) <= 'z'); 23 | text_sentence = text; text_sentence(1) = text_sentence(1) + ('A' - 'a'); 24 | 25 | plot(d, e, '.', 'Color', colors.(letter)); 26 | 27 | xlabel(text_sentence, 'FontSize', font_size); 28 | ylabel(sprintf('Average neighbor %s', text), 'FontSize', font_size); 29 | 30 | set(gca, 'XScale', 'log', 'YScale', 'log'); 31 | 32 | set(gca, 'FontSize', font_size); 33 | 34 | set(gca, 'XMinorTick', 'on'); 35 | set(gca, 'YMinorTick', 'on'); 36 | set(gca, 'TickLength', [0.05 0.05]); 37 | 38 | % Workaround for Matlab bug. Otherwise, the minor ticks are not visible. 39 | ax = axis(); 40 | if ax(1) > 0 & ax(3) > 0 41 | set(gca, 'XTick', 10 .^ (ceil(log(ax(1)) / log(10)):floor(log(ax(2)) / log(10)))); 42 | set(gca, 'YTick', 10 .^ (ceil(log(ax(3)) / log(10)):floor(log(ax(4)) / log(10)))); 43 | end 44 | -------------------------------------------------------------------------------- /m/axis_fit.m: -------------------------------------------------------------------------------- 1 | % 2 | % Return suitable values for the parameters of axis() given the 3 | % range. 4 | % 5 | % Given X axis values, it returns values of axis(1) and axis(2). 6 | % Given Y axis values, it returns values of axis(3) and axis(4). 7 | % 8 | % PARAMETERS 9 | % x (n*1) Values to be plotted 10 | % is_log (0/1) Whether a logarithmic axis is used 11 | % 12 | % RESULTS 13 | % ret (1*2) Min/max values to be passed to axis() 14 | % 15 | 16 | function ret = axis_fit(x, is_log) 17 | 18 | is_log 19 | 20 | offset = 0.1; 21 | 22 | % When the axis is logarithmic, there cannot be nonpositive values 23 | %% assert((~is_log) | sum(x <= 0) == 0); 24 | 25 | % Filter out nonpositive values when is_log 26 | if is_log 27 | 'filter' 28 | ii = find(x > 0); 29 | x = x(ii); 30 | end 31 | 32 | xi = min(x); 33 | xa = max(x); 34 | 35 | if is_log 36 | dx = log(xa) - log(xi); 37 | if dx == 0 38 | dx = 1; 39 | end; 40 | ret = [ (exp(log(xi) - offset * dx)) exp(log(xa) + offset * dx) ]; 41 | else 42 | dx = xa - xi; 43 | if dx == 0 44 | dx = 1; 45 | end; 46 | ret = [ (xi - offset * dx) (xa + offset * dx) ]; 47 | end 48 | -------------------------------------------------------------------------------- /m/beta_do.m: -------------------------------------------------------------------------------- 1 | % 2 | % Draw the binormalized degree distributions. 3 | % 4 | % This file is not called beta.m because of the builtin function 5 | % beta(). 6 | % 7 | % PARAMETERS 8 | % $NETWORK 9 | % 10 | % INPUT 11 | % dat/data.$NETWORK.mat 12 | % 13 | % OUTPUT 14 | % plot/beta.[auv]{,x}.$NETWORK.dat 15 | % a/u/v - All / left / right 16 | % "" / "x" - Normalized / non-normalized 17 | % 18 | 19 | network = getenv('NETWORK'); 20 | 21 | data = load(sprintf('dat/data.%s.mat', network)); 22 | 23 | T = data.T; 24 | 25 | consts = konect_consts(); 26 | 27 | info = read_info(network); 28 | 29 | if info.weights ~= consts.POSITIVE & size(T,2) >= 3 30 | T(:,3:end) = []; 31 | end 32 | 33 | % 34 | % U, V 35 | % 36 | if info.format ~= consts.SYM 37 | 38 | if size(T,2) >= 3 39 | q = T(:,3); 40 | else 41 | q = []; 42 | end 43 | 44 | beta_one(T(:,1), q, 'u'); 45 | konect_print(sprintf('plot/beta.u.%s.eps', network)); 46 | 47 | beta_one(T(:,2), q, 'v'); 48 | konect_print(sprintf('plot/beta.v.%s.eps', network)); 49 | 50 | end 51 | 52 | 53 | % 54 | % A 55 | % 56 | 57 | p = [ T(:,1) ; T(:,2) ]; 58 | if size(T,2) >= 3 59 | q = [ T(:,3) ; T(:,3) ]; 60 | else 61 | q = []; 62 | end 63 | 64 | beta_one(p, q, 'a'); 65 | konect_print(sprintf('plot/beta.a.%s.eps', network)); 66 | -------------------------------------------------------------------------------- /m/beta_one.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot one BETA curve. 3 | % 4 | % PARAMETERS 5 | % p (e*1) Node indexes 6 | % q (e*1) Multiplicities; [] to denote all ones 7 | % enable_normalization Enable division with degree sum 8 | % type 9 | % 10 | 11 | function beta_one(p, q, type) 12 | 13 | colors = konect_colors_letter(); 14 | 15 | font_size = 24; 16 | 17 | if length(q) == 0, q = 1; end 18 | degrees = full(sparse(p, 1, q, max(p), 1)); 19 | degrees = degrees(find(degrees)); 20 | 21 | beta_plot(degrees, colors.(type)); 22 | 23 | xlabel('Relative degree (d / D)', 'FontSize', font_size); 24 | ylabel('P(x = d / D)', 'FontSize', font_size); 25 | 26 | -------------------------------------------------------------------------------- /m/beta_plot.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot a distribution in Beta style. 3 | % 4 | % PARAMETERS 5 | % values (n*1) Values; must all be larger than zero 6 | % color (optional) Color of the line 7 | % 8 | 9 | function beta_plot(values, color) 10 | 11 | if ~exist('color', 'var') 12 | color = [0 0 1]; 13 | end 14 | 15 | font_size = 24; 16 | 17 | [counts ids] = sort(values); 18 | 19 | maxcount = counts(end); 20 | freq = histc(counts, 0 : maxcount); 21 | 22 | nz = freq ~= 0; 23 | x = 0 : maxcount; 24 | x = x(nz); 25 | y = freq(nz); 26 | 27 | hold on; 28 | loglog(x / sum(values), y / length(values), '+', 'Color', color); 29 | 30 | set(gca, 'XScale', 'log', 'YScale', 'log'); 31 | 32 | ax = axis() 33 | 34 | [phat pci] = betafit(values / sum(values)) 35 | 36 | N = 200; 37 | xx = exp(log(ax(1)) + (0:1:N) / N * (log(ax(2)) - log(ax(1)))); 38 | 39 | yy = xx .^ (phat(1) - 1) .* (1 - xx) .^ (phat(2) - 1) / beta(phat(1), phat(2)) / sum(values); 40 | 41 | plot(xx, yy, '-'); 42 | 43 | axis(ax); 44 | 45 | %n = 50; 46 | %hist(values / sum(values), (1 / (2 * n)) : (1 / n) : (1 - 1 / (2 * n))); 47 | %axis([0 1 0 (sum(values) / n)]); 48 | 49 | set(gca, 'FontSize', font_size); 50 | 51 | set(gca, 'XMinorTick', 'on'); 52 | set(gca, 'YMinorTick', 'on'); 53 | set(gca, 'TickLength', [0.05 0.05]); 54 | 55 | % Workaround for Matlab bug. Otherwise, the minor ticks are not visible. 56 | ax = axis(); 57 | if ax(1) > 0 & ax(3) > 0 58 | set(gca, 'XTick', 10 .^ (ceil(log(ax(1)) / log(10)):floor(log(ax(2)) / log(10)))); 59 | set(gca, 'YTick', 10 .^ (ceil(log(ax(3)) / log(10)):floor(log(ax(4)) / log(10)))); 60 | end 61 | 62 | -------------------------------------------------------------------------------- /m/bidd_one.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot one BIDD curve. 3 | % 4 | % PARAMETERS 5 | % p (e*1) Node indexes 6 | % q (e*1) Multiplicities; [] to denote all ones 7 | % enable_normalization Enable division with degree sum 8 | % type 9 | % name 10 | % symbol 11 | % enable_axis Same semantics as in konect_plot_power_law() 12 | % 13 | 14 | function bidd_one(p, q, enable_normalization, type, name, symbol, enable_axis) 15 | 16 | 'bidd_one()' 17 | type 18 | name 19 | symbol 20 | 21 | colors = konect_colors_letter(); 22 | 23 | font_size = 24; 24 | 25 | if length(q) == 0, q = 1; end 26 | degrees = full(sparse(p, 1, q, max(p), 1)); 27 | degrees = degrees(find(degrees)); % Remove zero-degree nodes 28 | 29 | konect_plot_power_law(degrees, [], enable_normalization, colors.(type), 0, enable_axis); 30 | 31 | if enable_normalization 32 | xlabel(sprintf('Relative %s (%s / D)', name, symbol), 'FontSize', font_size); 33 | ylabel(sprintf('P(x \\geq %s / D)', symbol), 'FontSize', font_size); 34 | else 35 | xlabel(sprintf('%s (%s) [vertices]', name, symbol), 'FontSize', font_size); 36 | ylabel(sprintf('P(x \\geq %s)', symbol), 'FontSize', font_size); 37 | end 38 | -------------------------------------------------------------------------------- /m/check_failed.m: -------------------------------------------------------------------------------- 1 | function check_failed(text) 2 | 3 | FILE = fopen(sprintf('dat/check_error.%s', getenv('network')), 'w'); 4 | if FILE < 0, error('fopen'); end 5 | fprintf(FILE, '%s\n', text); 6 | if fclose(FILE) < 0, error('fclose'); end 7 | 8 | data = 0; 9 | OUT = fopen(sprintf('dat/check.%s', getenv('network')), 'w'); 10 | if OUT < 0, error('fopen'); end 11 | fprintf(OUT, '0\n'); 12 | if fclose(OUT) < 0, error('fclose'); end 13 | 14 | exit(0); 15 | 16 | -------------------------------------------------------------------------------- /m/check_successful.m: -------------------------------------------------------------------------------- 1 | function check_successful() 2 | 3 | delete(sprintf('dat/check_error.%s', getenv('network'))); 4 | 5 | data = 0; 6 | OUT = fopen(sprintf('dat/check.%s', getenv('network')), 'w'); 7 | if OUT < 0, error('fopen'); end 8 | fprintf(OUT, '1\n'); 9 | if fclose(OUT) < 0, error('fclose'); end 10 | 11 | exit(0); 12 | 13 | -------------------------------------------------------------------------------- /m/cluscod.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the distribution of local clustering coefficients. The 3 | % values are computed for the underlying undirected, unweighted 4 | % network. 5 | % 6 | % ENVIRONMENT 7 | % $network Network name 8 | % 9 | % INPUT FILES 10 | % dat/data.$network.mat 11 | % dat/info.$network 12 | % 13 | % OUTPUT FILES 14 | % dat/cluscod.$network.mat 15 | % c_local Vector of node degree distributions 16 | % c 17 | % c2 18 | % 19 | 20 | consts = konect_consts(); 21 | 22 | network = getenv('network'); 23 | 24 | data = load(sprintf('dat/data.%s.mat', network)); 25 | 26 | info = read_info(network); 27 | 28 | assert(info.format ~= consts.BIP); 29 | 30 | % Ignore edge weights 31 | A = sparse(data.T(:,1), data.T(:,2), 1, info.n1, info.n2); 32 | 33 | % Remove multiple edges 34 | A = (A ~= 0); 35 | 36 | % Ignore edge directions 37 | A = A | A'; 38 | 39 | [c_local c c2] = konect_clusco(A); 40 | 41 | save(sprintf('dat/cluscod.%s.mat', network), 'c_local', 'c', 'c2', '-v7.3'); 42 | -------------------------------------------------------------------------------- /m/cluscod_plot.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot the clustering coefficient degree distribution. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % 7 | % INPUT 8 | % dat/cluscod.$network.mat 9 | % 10 | % OUTPUT 11 | % plot/cluscod.[a].$network.eps 12 | % 13 | 14 | network = getenv('network'); 15 | 16 | font_size = 22; 17 | line_width = 3; 18 | 19 | data = load(sprintf('dat/cluscod.%s.mat', network)); 20 | 21 | c_local = data.c_local; 22 | 23 | F = cdfplot(c_local); 24 | 25 | set(F, 'LineWidth', line_width); 26 | 27 | axis([0 1 0 1]); 28 | 29 | title(''); 30 | xlabel('Local clustering coefficient (c)', 'FontSize', font_size); 31 | ylabel('P(x \leq c)', 'FontSize', font_size); 32 | 33 | set(gca, 'FontSize', font_size); 34 | 35 | konect_print(sprintf('plot/cluscod.a.%s.eps', network)); 36 | -------------------------------------------------------------------------------- /m/complex2rgb.m: -------------------------------------------------------------------------------- 1 | % 2 | % Convert a complex number to a RGB value for visualization. 3 | % 4 | 5 | function [rgb] = complex2rgb(value) 6 | 7 | h = angle(value) / (2*pi) + 0.5; 8 | s = 1; 9 | v = 1 - 1 / (abs(value) + 1); 10 | 11 | rgb = hsv2rgb([h s v]); 12 | -------------------------------------------------------------------------------- /m/data.m: -------------------------------------------------------------------------------- 1 | % 2 | % Save the full dataset (not split) in a MAT file. If timestamps are 3 | % present, the data is sorted. 4 | % 5 | % PARAMETERS 6 | % $input Input filename (out.*) 7 | % $output Output filename (*.mat) 8 | % 9 | % INPUT FILES 10 | % $input 11 | % 12 | % OUTPUT FILES 13 | % $output Matlab file 14 | % T The data in triple/quadruple format as found in the out.* file 15 | % 16 | 17 | T = load(getenv('input')); 18 | 19 | if size(T,2) >= 4 20 | [x,i] = sort(T(:,4)); 21 | T = T(i, 1:3); 22 | end 23 | 24 | save(getenv('output'), '-v7.3', 'T'); 25 | -------------------------------------------------------------------------------- /m/decomposition_comp.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute a network decomposition on the full network. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % $decomposition 7 | % 8 | % INPUT FILES 9 | % dat/data.$network.mat 10 | % dat/info.$network 11 | % dat/meansi.$network.mat 12 | % 13 | % OUTPUT FILES 14 | % dat/decomposition{,_map}.$decomposition.$network.mat 15 | % .D Eigenvalues / Singular value / Middle matrix 16 | % .U Eigenvectors or equivalent 17 | % .V Eigenvectors; may be [] 18 | % .r Used rank 19 | % .n Used number of nodes (may be less than input) 20 | % 21 | 22 | network = getenv('network'); 23 | decomposition = getenv('decomposition'); 24 | 25 | info = read_info(network); 26 | 27 | data = load(sprintf('dat/data.%s.mat', network)); 28 | 29 | means = load(sprintf('dat/meansi.%s.mat', network)); 30 | 31 | T = konect_normalize_additively(data.T, means); 32 | 33 | A = konect_spconvert(T, info.n1, info.n2); 34 | 35 | opts.disp = 2; 36 | 37 | r = get_rank_type(network, decomposition); 38 | 39 | [U D V D_u D_v n] = konect_decomposition(decomposition, A, r, info.format, info.weights, opts); 40 | 41 | save(sprintf('dat/decomposition.%s.%s.mat', decomposition, network'), '-v7.3', ... 42 | 'D', 'U', 'V', 'r', 'n'); 43 | -------------------------------------------------------------------------------- /m/decomposition_map.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute a network decomposition on the full network. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % $decomposition 7 | % 8 | % INPUT 9 | % dat/data.$network.mat 10 | % dat/info.$network 11 | % dat/meansi.$network.mat 12 | % 13 | % OUTPUT 14 | % dat/decomposition{,_map}.$decomposition.$network.mat 15 | % .D Eigenvalues / Singular value / Middle matrix 16 | % .U Eigenvectors or equivalent 17 | % .V Eigenvectors; may be [] 18 | % .r Used rank 19 | % .n Used number of nodes (may be less than input) 20 | % 21 | 22 | network = getenv('network'); 23 | decomposition = getenv('decomposition'); 24 | 25 | info = read_info(network); 26 | 27 | data = load(sprintf('dat/data.%s.mat', network)); 28 | 29 | means = load(sprintf('dat/meansi.%s.mat', network)); 30 | 31 | T = konect_normalize_additively(data.T, means); 32 | 33 | A = konect_spconvert(T, info.n1, info.n2); 34 | 35 | opts.disp = 2; 36 | opts.maxit = 10; 37 | opts.tol = 1e-3; 38 | 39 | first = konect_first_index(decomposition); 40 | r = first + 1; 41 | 42 | [U D V D_u D_v n] = konect_decomposition(decomposition, A, r, info.format, info.weights, opts); 43 | 44 | save(sprintf('dat/decomposition_map.%s.%s.mat', decomposition, network'), '-v7.3', ... 45 | 'D', 'U', 'V', 'r', 'n'); 46 | -------------------------------------------------------------------------------- /m/decomposition_split.m: -------------------------------------------------------------------------------- 1 | % 2 | % Decompose the source and training matrices. 3 | % 4 | % PARAMETERS 5 | % $NETWORK 6 | % $DECOMPOSITION 7 | % $TYPE "source" or "training" 8 | % 9 | % INPUT 10 | % dat/split.$NETWORK.mat 11 | % dat/means.$NETWORK.mat (source only) 12 | % dat/meanst.$NETWORK.mat (training only) 13 | % dat/info.$NETWORK 14 | % 15 | % OUTPUT 16 | % dat/decomposition_split.$TYPE.$DECOMPOSITION.$NETWORK.mat 17 | % .D Eigenvalues / Singular value / Middle matrix 18 | % .U Eigenvectors or equivalent 19 | % .V Eigenvectors; may be [] 20 | % .r Used rank 21 | % .n Used number of nodes (may be less than input) 22 | % 23 | 24 | network = getenv('NETWORK'); 25 | decomposition = getenv('DECOMPOSITION'); 26 | type = getenv('TYPE'); 27 | 28 | split = load(sprintf('dat/split.%s.mat', network)); 29 | info = read_info(network); 30 | 31 | if strcmp(type, 'source') 32 | T = split.T_source; 33 | means = load(sprintf('dat/means.%s.mat', network)); 34 | elseif strcmp(type, 'training') 35 | T = [ split.T_source ; split.T_target ]; 36 | means = load(sprintf('dat/meanst.%s.mat', network)); 37 | else 38 | error(sprintf('*** Invalid type %s', type)); 39 | end 40 | 41 | T = konect_normalize_additively(T, means); 42 | 43 | A = konect_spconvert(T, split.n1, split.n2); 44 | 45 | opts.disp = 2; 46 | 47 | r = get_rank_type(network, decomposition); 48 | 49 | [U D V D_u D_v n] = konect_decomposition(decomposition, A, r, info.format, info.weights, opts); 50 | 51 | save(sprintf('dat/decomposition_split.%s.%s.%s.mat', type, decomposition, network), '-v7.3', ... 52 | 'D', 'U', 'V', 'r', 'n'); 53 | -------------------------------------------------------------------------------- /m/degcc.m: -------------------------------------------------------------------------------- 1 | % 2 | % For one network, scatter plot of degree vs local clustering coefficient. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % 7 | % INPUT FILES 8 | % dat/cluscod.$network.mat 9 | % uni/out.$network 10 | % 11 | % OUTPUT FILES 12 | % plot/degcc.{a}.$network.eps 13 | % 14 | 15 | network = getenv('network'); 16 | 17 | consts = konect_consts(); 18 | 19 | weights = read_statistic('weights', network); 20 | weights = weights(1) 21 | forma = read_statistic('format', network); 22 | forma = forma(1) 23 | n = read_statistic('size', network); 24 | n = n(1); 25 | 26 | T = load(sprintf('uni/out.%s', network)); 27 | 28 | x = [T(:,1) ; T(:,2)]; 29 | 30 | if weights == consts.POSITIVE 31 | if size(T,2) >= 3 32 | w = [ T(:,3) ; T(:,3) ]; 33 | else 34 | w = 1; 35 | end 36 | else 37 | w = 1; 38 | end 39 | 40 | degrees = sparse(x, 1, w, n, 1); 41 | 42 | cluscod = load(sprintf('dat/cluscod.%s.mat', network)); 43 | 44 | hold on; 45 | 46 | x = degrees; 47 | y = cluscod.c_local; 48 | 49 | i = find(x > 0); 50 | x = x(i); 51 | y = y(i); 52 | 53 | plot(x, y, '.'); 54 | 55 | xlabel('Degree'); 56 | ylabel('Local clustering coefficient'); 57 | 58 | n = max(x); 59 | 60 | clusco_count = sparse(x, 1, 1); 61 | clusco_sum = sparse(x, 1, y); 62 | clusco_sq = sparse(x, 1, y .^ 2); 63 | 64 | plot(1:n, clusco_sum ./ clusco_count, '+-', 'Color', [1 0 0]); 65 | 66 | set(gca, 'XScale', 'log', 'YScale', 'log'); 67 | 68 | konect_print(sprintf('plot/degcc.a.%s.eps', network)); 69 | -------------------------------------------------------------------------------- /m/delaunay_one.m: -------------------------------------------------------------------------------- 1 | 2 | function delaunay_one(A, U) 3 | 4 | hold on; 5 | 6 | gplot2(A | A', U, '-', 'LineWidth', 0.1, 'Color', 0.6 * [1 1 1]); 7 | gplot2(A | A', U, '.', 'Color', [0.7 0.3 0], 'MarkerSize', 50); 8 | 9 | axis off equal; 10 | -------------------------------------------------------------------------------- /m/diadens.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot average degree and diameter by time. 3 | % 4 | % ENVIRONMENT 5 | % $network Network name 6 | % 7 | % INPUT 8 | % dat/stepsi.$network 9 | % dat/statistic_time.full.diameter.$network 10 | % dat/statistic_time.full.avgdegree.$network 11 | % 12 | % OUTPUT 13 | % plot/diadens.a.$network.eps 14 | % 15 | 16 | font_size_label = 18; 17 | 18 | network = getenv('network'); 19 | 20 | steps = load(sprintf('dat/stepsi.%s', network)); 21 | statistic_diameter = load(sprintf('dat/statistic_time.full.diameter.%s', network)); 22 | statistic_avgdegree = load(sprintf('dat/statistic_time.full.avgdegree.%s', network)); 23 | 24 | [ax, h1, h2] = plotyy(steps, statistic_avgdegree(:,1), steps, statistic_diameter(:,1), 'plot'); 25 | set(h1, 'LineWidth', 2.5) 26 | set(h2, 'LineWidth', 2.5); 27 | set(h1,'LineStyle','--') 28 | set(h2,'LineStyle','-') 29 | 30 | set(get(ax(1),'Ylabel'),'String','Average degree (d)', 'FontSize', font_size_label); 31 | set(get(ax(2),'Ylabel'),'String','Effective diameter (\delta_{0.9})', 'FontSize', font_size_label); 32 | set(ax(1), 'FontSize', font_size_label); 33 | set(ax(2), 'FontSize', font_size_label); 34 | 35 | legend(konect_label_statistic('avgdegree', 'matlab'), konect_label_statistic('diameter', 'matlab'), 'Location', 'SouthEast'); 36 | legend('boxoff'); 37 | 38 | xlabel(konect_label_statistic('volume', 'matlab'), 'FontSize', font_size_label); 39 | 40 | konect_print(sprintf('plot/diadens.a.%s.eps', network)); 41 | -------------------------------------------------------------------------------- /m/distr.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute a spectral distribution. 3 | % 4 | % PARAMETERS 5 | % $network Network 6 | % $decomposition Decomposition type 7 | % 8 | % INPUT 9 | % dat/data.$network.mat 10 | % dat/meansi.$network.mat 11 | % 12 | % OUTPUT 13 | % dat/distr.$decomposition.$network 14 | % One line per bin. Three columns: count, begin end. 15 | % For time networks, all columns are doubled for the 16 | % halftime graph. 17 | % 18 | 19 | % Odd to avoid splitting on zero 20 | bin_count = 49; 21 | 22 | network = getenv('network'); 23 | decomposition = getenv('decomposition'); 24 | 25 | info = read_info(network) 26 | 27 | data = load(sprintf('dat/data.%s.mat', network)); 28 | means = load(sprintf('dat/meansi.%s.mat', network)); 29 | 30 | A = konect_spconvert(konect_normalize_additively(data.T, means), info.n1, info.n2); 31 | 32 | [counts, begins, ends] = konect_spectral_distribution(A, decomposition, info.format, bin_count); 33 | 34 | ret = [ counts begins ends ]; 35 | 36 | save(sprintf('dat/distr.%s.%s', decomposition, network), '-ascii', 'ret'); 37 | -------------------------------------------------------------------------------- /m/distrtest_colors.m: -------------------------------------------------------------------------------- 1 | 2 | function colors = distrtest_colors() 3 | 4 | colors = struct(); 5 | colors.normal = [ 1 0 1]; 6 | colors.lognormal = [ 1 .3 1]; 7 | colors.logistic = [ 0 1 0]; 8 | colors.loglogistic = [.3 1 .3]; 9 | colors.cauchy = [ 1 0 0]; 10 | colors.logcauchy = [ 1 .3 .3]; 11 | colors.gumbel = [.2 .7 .7]; 12 | colors.weibull = [ 0 .7 .7]; 13 | colors.hsd = [ 0 0 1]; 14 | colors.loghsd = [.3 .3 1]; 15 | colors.exp = [.5 .5 .2]; 16 | colors.pareto = [.5 .5 0]; 17 | colors.gamma = [.6 .6 .6]; 18 | colors.beta = [ 0 0 0]; 19 | colors.halfnormal = [.48 .01 .36]; 20 | colors.gengamma = [.15 .06 .03]; 21 | colors.poisson = [.92 .77 .65]; 22 | 23 | -------------------------------------------------------------------------------- /m/distrtest_types.m: -------------------------------------------------------------------------------- 1 | 2 | % 3 | % Return the types that can be passed to distrtest_plot(). 4 | % 5 | % RESULTS 6 | % types Cell array of all type names 7 | % 8 | 9 | function types = distrtest_types() 10 | 11 | types = {'normal', 'lognormal', 'logistic', 'loglogistic', 'cauchy', 'logcauchy', ... 12 | 'gumbel', 'weibull', 'hsd', 'loghsd', 'exp', 'pareto', ... 13 | 'gamma', 'beta', 'halfnormal', 'gengamma', 'poisson'}; 14 | 15 | -------------------------------------------------------------------------------- /m/estimate_power_law.m: -------------------------------------------------------------------------------- 1 | % 2 | % Estimate a power law when only the largest values are known. 3 | % 4 | % Don't use this for degrees. For degrees, not all values are necessarily known and power_law_*() should be used. 5 | % 6 | % PARAMETERS 7 | % values Values. Nonpositive values are ignored. 8 | % 9 | % RESULT 10 | % alpha Negative slope, .a.k.a. the power law exponent 11 | % 12 | function alpha = estimate_power_law(values) 13 | 14 | values = values(values > 0); 15 | 16 | values = sort(values, 'descend'); 17 | 18 | log_values = log(values); 19 | 20 | p = polyfit((1:length(values))', log_values, 1); 21 | 22 | alpha = exp(-p(1)); 23 | 24 | -------------------------------------------------------------------------------- /m/evol_permutation.m: -------------------------------------------------------------------------------- 1 | % 2 | % Draw a permutation plot. Given two eigenvector matrices, this will 3 | % draw a square matrix of cosine similarities using black for 1 and 4 | % white for 0. 5 | % 6 | % PARAMETERS 7 | % u1,u2 (n*r) Eigenvectors to be compared 8 | % 9 | 10 | function evol_permutation(u1, u2) 11 | 12 | font_size = 20; 13 | 14 | cm = 1:-0.01:0; 15 | cm = [cm.^.6' cm.^.6' cm.^.9']; 16 | 17 | u1 = u1 ./ norm(u1); 18 | u2 = u2 ./ norm(u2); 19 | 20 | similarities = abs(u1' * u2); 21 | 22 | imagesc(similarities', [0 1]); 23 | colorbar; 24 | pbaspect([size(u1,2) size(u2,2) 1]); 25 | colormap(cm); 26 | xlabel('k', 'FontSize', font_size); 27 | ylabel('l', 'FontSize', font_size); 28 | set(gca, 'FontSize', font_size); 29 | 30 | -------------------------------------------------------------------------------- /m/exp_entropy.m: -------------------------------------------------------------------------------- 1 | % 2 | % The exp-entropy, i.e. the entropy of a vector given by considering the 3 | % exponential of its entries as probabilities. Used with eigenvalues in 4 | % [671]. 5 | % 6 | % RESULT 7 | % entropy Entropy 8 | % 9 | % PARAMETERS 10 | % values Vector of values 11 | % 12 | 13 | function entropy = exp_entropy(values) 14 | 15 | values = values - max(values); 16 | 17 | lnsum = log(sum(exp(values))); 18 | 19 | values = values - lnsum; 20 | 21 | entropy = - sum(exp(values) .* values); 22 | 23 | -------------------------------------------------------------------------------- /m/fit_plot.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot curve fitting. 3 | % 4 | % PARAMETERS 5 | % $NETWORK 6 | % $DECOMPOSITION 7 | % 8 | % INPUT 9 | % dat/fit.$DECOMPOSITION.$NETWORK.mat 10 | % 11 | % OUTPUT 12 | % plot/fit.[ab].$DECOMPOSITION.$NETWORK.eps 13 | % 14 | 15 | network = getenv('NETWORK'); 16 | decomposition = getenv('DECOMPOSITION'); 17 | 18 | font_size = 18; 19 | 20 | fit = load(sprintf('dat/fit.%s.%s.mat', decomposition, network)); 21 | 22 | a = fit.a; 23 | d = fit.d; 24 | 25 | % In complex decompositions, show the absolute values 26 | if ~isreal(a) | ~isreal(d) 27 | 28 | a_show = real(a); 29 | d_show = real(d); 30 | 31 | else 32 | a_show = a; 33 | d_show = d 34 | end 35 | 36 | % 37 | % (b) Bare plot 38 | % 39 | 40 | plot(a_show, d_show, 'ok'); 41 | 42 | gridxy([0], [0], 'LineStyle', '--'); 43 | 44 | xlabel('Eigenvalue (\lambda_k)', 'FontSize', font_size); 45 | ylabel('New eigenvalue (f(\lambda_k))', 'FontSize', font_size); 46 | set(gca, 'FontSize', font_size); 47 | 48 | konect_print(sprintf('plot/fit.b.%s.%s.eps', decomposition, network)); 49 | 50 | % 51 | % (a) With curves 52 | % 53 | 54 | [colors line_styles markers] = styles_submethod(); 55 | 56 | hold on; 57 | 58 | plot(a_show, d_show, 'ok'); 59 | 60 | ax = axis(); 61 | 62 | mi = ax(1); 63 | ma = ax(2); 64 | 65 | curves = fieldnames(fit.curves) 66 | handles = []; 67 | legends = []; 68 | for i = 1 : length(curves) 69 | curve = curves{i} 70 | values = fit.curves.(curve) 71 | 72 | if strcmp(curve, 'like'), continue; end; 73 | 74 | h = fit_plot_curve(curve, mi, ma, a, values, colors.(curve), line_styles.(curve), fit.pivot); 75 | 76 | handles = [handles h]; 77 | legends = [legends cellstr(curve)]; 78 | end 79 | 80 | gridxy([0], [0], 'LineStyle', '--'); 81 | 82 | xlabel('Eigenvalue (\lambda_k)', 'FontSize', font_size); 83 | ylabel('New eigenvalue (f(\lambda_k))', 'FontSize', font_size); 84 | 85 | set(gca, 'FontSize', font_size); 86 | 87 | axis(ax); 88 | 89 | legend(handles, legends, 'Location', 'EastOutside'); 90 | 91 | konect_print(sprintf('plot/fit.a.%s.%s.eps', decomposition, network)); 92 | -------------------------------------------------------------------------------- /m/format_number.m: -------------------------------------------------------------------------------- 1 | function ret = format_number(number) 2 | 3 | fprintf(1, 'format %d\n', number); 4 | 5 | if number < 1000 6 | ret = sprintf('%d', number); 7 | elseif number < 1000000 8 | ret = sprintf('%d,%03d', floor(number/1000), mod(number,1000)); 9 | elseif number < 1000000000 10 | ret = sprintf('%d,%03d,%03d', floor(number/1000000), mod(floor(number/1000), 1000), ... 11 | mod(number,1000)); 12 | else 13 | ret = sprintf('%d,%03d,%03d,%03d', floor(number/1000000000), mod(floor(number/1000000),1000), ... 14 | mod(floor(number/1000), 1000), ... 15 | mod(number,1000)); 16 | end 17 | 18 | fprintf(1, ' ret=%s\n', ret); 19 | -------------------------------------------------------------------------------- /m/format_statistic.m: -------------------------------------------------------------------------------- 1 | % 2 | % Format the value of a statistic. 3 | % 4 | 5 | function [text] = format_statistic(statistic, value) 6 | 7 | [l i] = konect_data_statistic(); 8 | 9 | if i.(statistic) 10 | 11 | text = format_number(value); 12 | 13 | else 14 | 15 | text = sprintf('%.3f', value); 16 | 17 | end 18 | 19 | 20 | -------------------------------------------------------------------------------- /m/get_ids_submethod.m: -------------------------------------------------------------------------------- 1 | % 2 | % Integer IDs of submethods; beginning at 1. 3 | % 4 | 5 | function [ids_submethod] = get_ids_submethod() 6 | 7 | ids_submethod.rat = 1; 8 | ids_submethod.main = 2; 9 | 10 | ids_submethod.common = 3; 11 | ids_submethod.adad = 4; 12 | ids_submethod.ra = 5; 13 | ids_submethod.jaccard = 6; 14 | ids_submethod.cosine = 7; 15 | ids_submethod.sorensen = 8; 16 | ids_submethod.hpi = 9; 17 | ids_submethod.hdi = 10; 18 | ids_submethod.lhni = 11; 19 | 20 | ids_submethod.lin = 12; 21 | ids_submethod.poly = 13; 22 | ids_submethod.polyo = 14; 23 | ids_submethod.polyn = 15; 24 | ids_submethod.polyon = 16; 25 | ids_submethod.rr = 17; 26 | ids_submethod.exp = 18; 27 | ids_submethod.expo = 19; 28 | ids_submethod.rat = 20; 29 | ids_submethod.rato = 21; 30 | ids_submethod.like = 22; 31 | ids_submethod.rank1 = 23; 32 | ids_submethod.rank2 = 24; 33 | ids_submethod.rank3 = 25; 34 | ids_submethod.euclidean = 26; 35 | ids_submethod.sne = 27; 36 | ids_submethod.rrs = 28; 37 | ids_submethod.expnl = 29; 38 | ids_submethod.ratn = 30; 39 | ids_submethod.ratno = 31; 40 | ids_submethod.uni = 32; 41 | ids_submethod.polyl = 33; 42 | ids_submethod.polynl = 34; 43 | ids_submethod.rrl = 35; 44 | ids_submethod.expl = 36; 45 | ids_submethod.lap = 37; 46 | ids_submethod.ratl = 38; 47 | ids_submethod.rank1i = 39; 48 | ids_submethod.rank2i = 40; 49 | ids_submethod.rank3i = 41; 50 | ids_submethod.polyx = 42; 51 | 52 | ids_submethod.commonasym = 43; 53 | ids_submethod.adadasym = 44; 54 | ids_submethod.raasym = 45; 55 | ids_submethod.jaccardasym = 46; 56 | ids_submethod.cosineasym = 47; 57 | ids_submethod.sorensenasym = 48; 58 | ids_submethod.hpiasym = 49; 59 | ids_submethod.hdiasym = 50; 60 | ids_submethod.lhniasym = 51; 61 | 62 | ids_submethod.commonout = 52; 63 | ids_submethod.adadout = 53; 64 | ids_submethod.raout = 54; 65 | ids_submethod.jaccardout = 55; 66 | ids_submethod.cosineout = 56; 67 | ids_submethod.sorensenout = 57; 68 | ids_submethod.hpiout = 58; 69 | ids_submethod.hdiout = 59; 70 | ids_submethod.lhniout = 60; 71 | 72 | ids_submethod.commonin = 61; 73 | ids_submethod.adadin = 62; 74 | ids_submethod.rain = 63; 75 | ids_submethod.jaccardin = 64; 76 | ids_submethod.cosinein = 65; 77 | ids_submethod.sorensenin = 66; 78 | ids_submethod.hpiin = 67; 79 | ids_submethod.hdiin = 68; 80 | ids_submethod.lhniin = 69; 81 | 82 | ids_submethod.abscommon = 70; 83 | ids_submethod.absadad = 71; 84 | ids_submethod.absjaccard = 72; 85 | ids_submethod.abscosine = 73; 86 | 87 | ids_submethod.path3 = 74; 88 | 89 | ids_submethod.count = 74; 90 | -------------------------------------------------------------------------------- /m/get_labels_measure.m: -------------------------------------------------------------------------------- 1 | % 2 | % The label associated with a measure. The content of the returned 3 | % struct is used in various places as the canonical list of measures to 4 | % use. 5 | % 6 | % RESULT 7 | % labels Struct of labels by measure name 8 | % labels_short Short names 9 | % 10 | 11 | function [labels labels_short] = get_labels_measure() 12 | 13 | labels = struct(); 14 | 15 | % Individual ones can be enabled/disabled, but CORR must always be 16 | % enabled, because we use it as a target in the makefile. 17 | 18 | %labels.ap = 'Average precision'; 19 | %labels.map = 'Mean average precision'; 20 | labels.corr = 'Pearson correlation'; 21 | %labels.spear = 'Spearman correlation'; 22 | labels.auc = 'Area under the curve'; 23 | %labels.mauc = 'Mean area under the curve'; 24 | 25 | % KENDALL is not computed because it is too slow. 26 | % MAP and MAUC removed because they are slow and the results are indistinguishible from AP and AUC. 27 | 28 | 29 | labels_short = struct(); 30 | 31 | labels_short.corr = '\rho'; 32 | labels_short.auc = 'AUC'; 33 | -------------------------------------------------------------------------------- /m/get_labels_method.m: -------------------------------------------------------------------------------- 1 | % 2 | % Labels of methods and decompositions. 3 | % 4 | % RESULT 5 | % labels 6 | % .(method) Label of method 7 | % 8 | 9 | function [labels] = get_labels_method() 10 | 11 | labels = struct(); 12 | 13 | labels.zero = '0'; 14 | labels.pref = 'PA'; 15 | labels.mask = 'Mask'; 16 | labels.neib = 'Neib.'; 17 | labels.neib3 = 'P3'; 18 | 19 | % Decompositions 20 | labels.sym = 'A'; 21 | labels.sym_n = 'N'; 22 | labels.lap = 'L'; 23 | labels.lapc = 'Lc'; 24 | labels.svd = '[0 A; A'' 0]'; 25 | labels.svd_n = '[0 N; N'' 0]'; 26 | labels.stoch2 = 'D^{-1}A'; 27 | labels.stoch1 = 'AD^{-1}'; 28 | labels.lapd = 'L_d'; 29 | labels.lapd_n = 'Z_d'; 30 | labels.back = 'A + \alpha A'''; 31 | labels.diag = 'A (asym)'; 32 | labels.diag_n = 'N (asym)'; 33 | labels.skew = 'A - A'''; 34 | labels.skewi = 'iA - iA'''; 35 | labels.skewn = 'N - N'''; 36 | labels.herm = 'A_H'; 37 | labels.hermi = 'iA_H'; 38 | labels.hermn = 'N_H'; 39 | labels.lapherm = 'L_H'; 40 | labels.lapherm2 = 'L_{H2}'; 41 | labels.lapskew = 'L_S'; 42 | labels.quantum = 'Q'; 43 | labels.mskew = 'M'; 44 | labels.lapquantum = 'L_Q'; 45 | labels.lapq = 'K'; 46 | labels.stochbip = 'S'; 47 | labels.symabs = '\bar A'; 48 | labels.symc = 'A_c'; 49 | 50 | labels.quantum5 = 'Q5'; 51 | labels.quantum10 = 'Q10'; 52 | labels.quantum20 = 'Q20'; 53 | labels.quantum50 = 'Q50'; 54 | labels.quantum100 = 'Q100'; 55 | labels.quantum200 = 'Q200'; 56 | labels.quantum500 = 'Q500'; 57 | labels.quantum785 = 'Q785'; 58 | labels.quantum1000 = 'Q1000'; 59 | labels.quantum1570 = 'Q1570'; 60 | 61 | 62 | % DEDICOM 63 | labels.dedicom1u = 'DEDICOM 1u'; 64 | labels.dedicom1v = 'DEDICOM 1v'; 65 | labels.dedicom2 = 'DEDICOM 2'; 66 | labels.dedicom2s = 'DEDICOM 2s'; 67 | labels.dedicom3 = 'DEDICOM 3'; 68 | labels.dedicom3_0 = 'DEDICOM 3/0'; 69 | labels.dedicom4 = 'DEDICOM 4'; 70 | labels.takane = 'Takane'; 71 | -------------------------------------------------------------------------------- /m/get_labels_method_submethod.m: -------------------------------------------------------------------------------- 1 | % 2 | % Labels for combined method/submethods. 3 | % 4 | % RESULT 5 | % labels 6 | % .([method "." submethod]) Readable label of the 7 | % method/submethod combination 8 | % 9 | 10 | function labels = get_labels_method_submethod() 11 | 12 | labels = struct(); 13 | 14 | labels.sym_expo = 'SINH'; 15 | labels.sym_rato = 'NEU'; 16 | labels.sym_polyo = 'POLY'; 17 | labels.sym_polyon = 'POLYN'; 18 | 19 | labels.sym_n_polyo = 'N-POLY'; 20 | labels.sym_n_polyon = 'N-POLYN'; 21 | labels.sym_n_expo = 'N-HEAT'; 22 | labels.sym_n_ratno = 'N-NEU'; 23 | 24 | labels.lap_lap = 'COM'; 25 | labels.lap_expl = 'HEAT'; 26 | 27 | labels.pref_main = 'PA'; 28 | 29 | labels.neib3_path3 = 'P3'; 30 | -------------------------------------------------------------------------------- /m/get_rank.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the dimensional parameter r in function of dataset. 3 | % 4 | % RESULT 5 | % r_svd reduced rank for singular value decomposition 6 | % r_lap reduced rank for Laplacian decomposition 7 | % 8 | % PARAMETERS 9 | % network Dataset name 10 | % 11 | % INPUT 12 | % dat/runtime 13 | % 14 | 15 | function [r, r_l] = get_rank(network) 16 | 17 | alpha = 0.7; % Scaling factor for automated computation 18 | r_min = 30; 19 | r_min_l = r_min; 20 | lap_proportion = 0.5; 21 | 22 | info = read_info(network); 23 | 24 | n1 = info.n1; 25 | n2 = info.n2; 26 | m_ = info.lines; 27 | 28 | r_max = min(n1,n2); % Rank must not be larger than dimensions of matrix 29 | 30 | rs = struct(); 31 | 32 | 33 | % 34 | % List of predefined sizes 35 | % 36 | rs.advogato = [500 75]; 37 | rs.arenas_meta = [150 75]; 38 | rs.citeseer = [ 75 38]; 39 | rs.contact = [ 5 5]; 40 | rs.movielens_100k__rating = [ 60 30]; 41 | rs.movielens_100k__rating_unweighted = [100 50]; 42 | rs.movielens_1m = [100 50]; 43 | rs.www = [ 75 38]; 44 | rs.epinions = [ 70 35]; 45 | rs.slashdot_zoo = [ 90 75]; 46 | rs.hep_th_citations = [ 75 38]; 47 | rs.facebook_wosn_links = [ 70 35]; 48 | rs.facebook_wosn_wall = [ 49 15]; 49 | rs.filmtipset = [ 9 9]; 50 | rs.trec_wt10g = [ 9 9]; 51 | rs.wiki_Talk = [ 9 9]; 52 | rs.roadNet_CA = [ 15 15]; 53 | rs.dbpedia_similar = [100 100]; 54 | rs.edit_frwikibooks = [ 40 20]; 55 | rs.gottron_net_core = [100 50]; 56 | rs.dblp_cite = [400 50]; 57 | rs.elec = [1000 1000]; % This is used in examples plots in the handbook 58 | rs.web_Stanford = [120 60]; 59 | rs.wikisigned_k2 = [120 60]; 60 | 61 | fieldname = network; 62 | fieldname = regexprep(fieldname, '_', '__'); 63 | fieldname = regexprep(fieldname, '-', '_'); 64 | 65 | if isfield(rs, fieldname) 66 | 67 | rs_network = rs.(fieldname); 68 | r = rs_network(1); 69 | r_l = rs_network(2); 70 | 71 | else % Automatic settings 72 | 73 | x = load('dat/runtime'); 74 | 75 | r = round(alpha * exp(-x(1)) * (n1 + n2)^-x(2) * m_^-x(3) * (n1*n2)^-x(4)); 76 | 77 | r_l = round(lap_proportion * r); 78 | 79 | % Lower bounds 80 | if (r < r_min), r = r_min; end; 81 | if (r_l < r_min_l), r_l = r_min_l; end; 82 | 83 | % Upper bounds 84 | if r > r_max, r = r_max; end 85 | if r_l > r_max, r_l = r_max; end 86 | 87 | r 88 | r_l 89 | end 90 | 91 | 92 | -------------------------------------------------------------------------------- /m/get_rank_type.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the reduced rank in function of decomposition type. 3 | % 4 | 5 | function r = get_rank_type(network, decomposition) 6 | 7 | [r_svd, r_lap] = get_rank(network); 8 | 9 | if strcmp(decomposition, 'svd' ), r = r_svd; 10 | elseif strcmp(decomposition, 'sym' ), r = r_svd; 11 | elseif strcmp(decomposition, 'diag' ), r = r_svd; 12 | 13 | elseif strcmp(decomposition, 'lapb'), r = r_lap; 14 | elseif strcmp(decomposition, 'laps'), r = r_lap; 15 | elseif strcmp(decomposition, 'svd-n'), r = r_lap; 16 | elseif strcmp(decomposition, 'sym-n'), r = r_lap; 17 | 18 | elseif strcmp(decomposition, 'takane'), r = max(5, floor(r_lap / 3)); 19 | elseif strcmp(decomposition, 'lapd'), r = max(5, floor(r_lap / 3)); 20 | elseif strcmp(decomposition, 'dedicom3'), r = max(5, floor(r_lap / 3)); 21 | 22 | else r = r_lap; 23 | 24 | end 25 | 26 | -------------------------------------------------------------------------------- /m/get_tags.m: -------------------------------------------------------------------------------- 1 | % 2 | % Extract the tags from the metadata of a network. 3 | % 4 | % RETURN VALUE 5 | % ret A struct contain a field for every tag 6 | % 7 | % ARGUMENTS 8 | % meta The metadata, as returned by read_meta() 9 | % 10 | 11 | function ret = get_tags(meta) 12 | 13 | ret = struct(); 14 | 15 | if ~ isfield(meta, 'tags') 16 | return; 17 | end 18 | 19 | tags = meta.tags; 20 | 21 | match = regexp(tags, '#[a-z]+', 'match') 22 | 23 | for i = 1 : length(match) 24 | tag = match{i} 25 | tag = tag(2:end) 26 | ret.(tag) = 1; 27 | end 28 | -------------------------------------------------------------------------------- /m/get_updown_statistic.m: -------------------------------------------------------------------------------- 1 | % 2 | % Whether statistics should go up or down according to the shrinking diversity hypothesis. 3 | % 4 | 5 | function [updown_statistic] = get_updown_statistic() 6 | 7 | updown_statistic.diameter = -1; 8 | updown_statistic.network_rank_sq = -1; 9 | updown_statistic.network_rank_norm4 = -1; 10 | updown_statistic.gini = +1; 11 | updown_statistic.controllability = -1; 12 | updown_statistic.controllabilityn = -1; 13 | updown_statistic.alcon = +1; 14 | updown_statistic.dentropyn = -1; 15 | updown_statistic.alconn = +1; 16 | updown_statistic.jain = -1; 17 | updown_statistic.own = -1; 18 | updown_statistic.dentropy2 = -1; 19 | updown_statistic.dentropy = -1; 20 | updown_statistic.network_rank_abs = -1; 21 | updown_statistic.epower = +1; 22 | updown_statistic.entropy = -1; 23 | updown_statistic.entropyn = -1; 24 | updown_statistic.separation = +1; 25 | updown_statistic.power = -1; 26 | updown_statistic.separationl = +1; 27 | updown_statistic.clusco = +1; 28 | updown_statistic.avgdegree = +1; 29 | % updown_statistic.density = +1; 30 | -------------------------------------------------------------------------------- /m/has_timestamps.m: -------------------------------------------------------------------------------- 1 | % 2 | % Return whether a network has timestamps. 3 | % 4 | % ARGUMENTS 5 | % network Name of network 6 | % 7 | % INPUT FILES 8 | % uni/out.$network 9 | % 10 | % RETURN VALUE 11 | % 1/0 True/false 12 | % 13 | 14 | function [ret] = has_timestamps(network) 15 | 16 | filename= sprintf('uni/out.%s', network); 17 | 18 | FILE = fopen(filename, 'r'); 19 | 20 | if FILE < 0, 21 | error(sprintf('opening "%s"', filename)); 22 | end 23 | 24 | line = fgetl(FILE); line = fgetl(FILE); line = fgetl(FILE); line = fgetl(FILE); line = fgetl(FILE); line = fgetl(FILE); 25 | 26 | [a count] = sscanf(line, '%s %s %s %s'); 27 | 28 | ret = count >= 4; 29 | 30 | if fclose(FILE) < 0, 31 | error(sprintf('closing "%s"', filename)); 32 | end 33 | -------------------------------------------------------------------------------- /m/hopdistr_comp.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the hop plot of the largest connected component. We always 3 | % compute the undirected hop plot (and thus also the undirected 4 | % diameter). 5 | % 6 | % PARAMETERS 7 | % $NETWORK 8 | % 9 | % INPUT 10 | % dat/data.$NETWORK.mat 11 | % dat/info.$NETWORK 12 | % 13 | % OUTPUT 14 | % dat/hopdistr.$NETWORK 15 | % As integer, the number of hops (zero excluded) 16 | % 17 | 18 | network = getenv('NETWORK'); 19 | 20 | consts = konect_consts(); 21 | 22 | data = load(sprintf('dat/data.%s.mat', network)); 23 | 24 | info = read_info(network); 25 | 26 | A = sparse(data.T(:,1), data.T(:,2), 1, info.n1, info.n2); 27 | A = (A ~= 0); 28 | 29 | % Make undirected and keep largest connected component 30 | if info.format == consts.ASYM | info.format == consts.SYM 31 | A = konect_connect_matrix_square(A); 32 | elseif info.format == consts.BIP 33 | A = konect_connect_matrix_bipartite(A); 34 | else 35 | error('*** Invalid format'); 36 | end 37 | 38 | n = length(A) 39 | 40 | d = konect_hopdistr(A, info.format); 41 | 42 | OUT = fopen(sprintf('dat/hopdistr.%s', network), 'w'); 43 | fprintf(OUT, '%ld\n', d); 44 | if fclose(OUT), error 'fclose'; end; 45 | 46 | -------------------------------------------------------------------------------- /m/hopdistr_distrtest.m: -------------------------------------------------------------------------------- 1 | % 2 | % Test which distributions fit the hop distribution. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % 7 | % INPUT 8 | % dat/info.$network 9 | % dat/hopdistr.$network 10 | % 11 | % OUTPUT 12 | % dat/hopdistr_distrtest.$network.mat 13 | % .data.[distr-name] 14 | % the data of the fit as returned by 15 | % distrtest_plot() 16 | % 17 | 18 | network = getenv('network'); 19 | 20 | consts = konect_consts(); 21 | 22 | info = read_info(network); 23 | dat = load(sprintf('dat/hopdistr.%s', network)); 24 | 25 | % Make a column vector 26 | dat = dat(:) 27 | 28 | % The number of nodes for which the hop distrubution was 29 | % computed. This is the size of the network's largest connected 30 | % component. 31 | n = round(sqrt(dat(end))); 32 | 33 | values = (0 : (length(dat) - 1))'; 34 | counts = dat - [0; dat(1:end-1)]; 35 | 36 | types = distrtest_types(); 37 | 38 | data = struct(); 39 | 40 | for i = 1 : length(types) 41 | 42 | type = types{i} 43 | 44 | if ~strcmp(type, 'beta') 45 | 46 | ret = distrtest_multi(type, values, counts); 47 | 48 | data.(type) = ret; 49 | end 50 | end 51 | 52 | save(sprintf('dat/hopdistr_distrtest.%s.mat', network), '-v7.3', 'data'); 53 | -------------------------------------------------------------------------------- /m/konect_decomposition_dedicom4.m: -------------------------------------------------------------------------------- 1 | % 2 | % Moved to here from the Matlab toolbox because it doesn't work. The 3 | % iterations do not converge. 4 | % 5 | % Iterative solution to DEDICOM from [1]: 6 | % 7 | % A = U D U' 8 | % 9 | % [1] Models for Analysis of Asymmetrical Relationships Among 10 | % N Objects or Stimuli, Richard A. Harshman, Proc. First Meeting of 11 | % the Psychometric Society and The Society for Methematical 12 | % Phychology, 1978. 13 | % 14 | % RESULT 15 | % U (n*r) Factor matrix with orthonormal columns 16 | % D (r*r) Central asymmetric matrix 17 | % 18 | % PARAMETERS 19 | % A (n*n) Square asymmetric adjacency matrix 20 | % r Rank 21 | % opts Options for svds() 22 | % 23 | 24 | function [U D] = konect_decomposition_dedicom4(A, r, opts) 25 | 26 | [uu D vv] = svds(double(A), r, 'L', opts); 27 | 28 | epsilon = 1e-7; 29 | 30 | for i = 1:100000 31 | U = 0.5 * (uu + vv); 32 | d_old = D; 33 | 34 | % Decompose U 35 | [u_u u_d u_v] = svd(U, 'econ'); 36 | u_d_i = konect_xinv(u_d); 37 | 38 | % This computes D = U \ A / U'; 39 | D = u_v * (u_d_i * (u_u' * A * u_u) * u_d_i' * u_v'); 40 | 41 | % Reorder here because otherwise the convergence test does not work. 42 | [U D] = konect_order_dedicom(U, D); 43 | 44 | if rem(i,20) == 0 45 | dif = norm(D - d_old, 'fro')^2 / prod(size(D)); 46 | fprintf(1, 'iteration %d dif= %g\n', i, dif); 47 | if dif < epsilon, break; end; 48 | end 49 | 50 | % Compute uu = A / U' / D; 51 | uu = A * u_u * (u_d_i' * u_v' * pinv(D)); 52 | 53 | % Compute vv = A' / U' / D'; 54 | vv = A' * uu * (u_d_i' * u_v' * pinv(D')); 55 | 56 | % Orthonormalize 57 | [qu ru] = qr(uu, 0); 58 | [qv rv] = qr(vv, 0); 59 | D = ru * D * rv'; 60 | uu = qu; 61 | vv = qv; 62 | end 63 | 64 | -------------------------------------------------------------------------------- /m/ksdist.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the Kolmogorov--Smirnov distance between a given sample and 3 | % a given continuous distribution. 4 | % 5 | % PARAMETERS 6 | % x (n*1) The sample 7 | % F @(x)(P(<=x)) The cumulative distribution function; 8 | % this function must take as input a vector of values, 9 | % and return a vector of the same size containing, for 10 | % each x, the probability that a variable is smaller or 11 | % equal to x, i.e., the cumulative distribution 12 | % function 13 | % 14 | % RESULTS 15 | % D The Kolmogorov--Smirnov distance 16 | % 17 | 18 | function D = ksdist(x, F) 19 | 20 | n = length(x); 21 | 22 | x = sort(x); 23 | 24 | f = F(x); 25 | 26 | D = max(max(abs((0:(n-1))'/n - f)), max(abs((1:n)'/n - f))); 27 | %D = max(abs(((1:n)'-0.5)/n - f)); 28 | -------------------------------------------------------------------------------- /m/layout.m: -------------------------------------------------------------------------------- 1 | % 2 | % Draw a graph layout of one network. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % 7 | % INPUT FILES 8 | % dat/data.$network.mat 9 | % 10 | % OUTPUT FILES 11 | % plot/layout.tmp.a.$network.png 12 | % 13 | 14 | network = getenv('network'); 15 | 16 | consts = konect_consts(); 17 | 18 | info = read_info(network); 19 | 20 | data = load(sprintf('dat/data.%s.mat', network)); 21 | 22 | A = sparse(data.T(:,1), data.T(:,2), 1, info.n1, info.n2); 23 | 24 | if info.format == consts.SYM | info.format == consts.ASYM 25 | 26 | A = A | A'; 27 | 28 | elseif info.format == consts.BIP 29 | 30 | A = [sparse(info.n1, info.n1), A; A', sparse(info.n2, info.n2)]; 31 | 32 | else 33 | error('*** Invalid format'); 34 | end 35 | 36 | X = fruchterman_reingold_force_directed_layout(A); 37 | 38 | gplot2(A, X, 'o-', ... 39 | 'MarkerFaceColor', [0 0 0], 'MarkerEdgeColor', [0 0 0]); 40 | 41 | axis off; 42 | 43 | konect_print_bitmap(sprintf('plot/layout.tmp.a.%s.png', network)); 44 | 45 | -------------------------------------------------------------------------------- /m/load_strings.m: -------------------------------------------------------------------------------- 1 | 2 | % 3 | % Load strings from a file. The file should contain one string per 4 | % line. The function returns a cell array of strings, where each 5 | % string is the content of one line. 6 | % 7 | 8 | function [strings] = load_strings(filename) 9 | 10 | FILE = fopen(filename); 11 | 12 | if FILE < 0, error('fopen'); end; 13 | 14 | strings = textscan(FILE, '%s'); 15 | % Returns a cell array of one element, which is a cell array with all 16 | % the strings. 17 | 18 | strings = strings{1}; 19 | 20 | if 0 > fclose(FILE), error('fclose'); end; 21 | -------------------------------------------------------------------------------- /m/lorenz.m: -------------------------------------------------------------------------------- 1 | % 2 | % Draw the Lorenz curve for a network's degree distribution. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % 7 | % INPUT 8 | % dat/data.$network.mat 9 | % 10 | % OUTPUT 11 | % plot/lorenz.[uva]{,b}.$network.dat 12 | % a - total 13 | % u,v - Row/column-based (only BIP and ASYM) 14 | % b - Bare, i.e. without the P value 15 | % 16 | 17 | network = getenv('network'); 18 | 19 | data = load(sprintf('dat/data.%s.mat', network)); 20 | 21 | T = data.T; 22 | 23 | consts = konect_consts(); 24 | 25 | info = read_info(network); 26 | 27 | if info.weights ~= consts.POSITIVE & size(T,2) >= 3 28 | T(:,3:end) = []; 29 | end 30 | 31 | % 32 | % U, V 33 | % 34 | if info.format ~= consts.SYM 35 | 36 | if size(T,2) >= 3 37 | q = T(:,3); 38 | else 39 | q = []; 40 | end 41 | 42 | lorenz_one(T(:,1), q, 0, 'u'); 43 | konect_print(sprintf('plot/lorenz.u.%s.eps', network)); 44 | lorenz_one(T(:,1), q, 1, 'u'); 45 | konect_print(sprintf('plot/lorenz.ub.%s.eps', network)); 46 | 47 | lorenz_one(T(:,2), q, 0, 'v'); 48 | konect_print(sprintf('plot/lorenz.v.%s.eps', network)); 49 | lorenz_one(T(:,2), q, 1, 'v'); 50 | konect_print(sprintf('plot/lorenz.vb.%s.eps', network)); 51 | end 52 | 53 | 54 | % 55 | % A 56 | % 57 | 58 | if info.format == consts.BIP 59 | m = max(T(:,1)); 60 | 61 | p = [ T(:,1) ; T(:,2)+m ]; 62 | if size(T,2) >= 3 63 | q = [ T(:,3) ; T(:,3) ]; 64 | else 65 | q = []; 66 | end 67 | else 68 | p = [ T(:,1) ; T(:,2) ]; 69 | if size(T,2) >= 3 70 | q = [ T(:,3) ; T(:,3) ]; 71 | else 72 | q = []; 73 | end 74 | end 75 | 76 | lorenz_one(p, q, 0, 'a'); 77 | konect_print(sprintf('plot/lorenz.a.%s.eps', network)); 78 | lorenz_one(p, q, 1, 'a'); 79 | konect_print(sprintf('plot/lorenz.ab.%s.eps', network)); 80 | -------------------------------------------------------------------------------- /m/lorenz_one.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot one Lorenz curve. 3 | % 4 | % PARAMETERS 5 | % p (e*1) Node indexes 6 | % q (e*1) Multiplicities; [] to denote all ones 7 | % b 1 for bare, else 0 8 | % type One letter [auv]; determines the plot color 9 | % 10 | 11 | function lorenz_one(p, q, b, type) 12 | 13 | font_size = 18; 14 | 15 | colors = konect_colors_letter(); 16 | 17 | color_line = colors.(type); 18 | color_fill = 0.1 * color_line + 0.9 * [1 1 1]; 19 | 20 | [gini r_x r_y] = konect_gini(p, q); 21 | own = konect_own(p, q); 22 | 23 | hold on; 24 | 25 | plot(r_x, r_y, '-', 'LineWidth', 3, 'Color', color_line); 26 | 27 | axis square; 28 | 29 | axis([0, 1, 0, 1]); 30 | 31 | set(gca, 'FontSize', font_size); 32 | 33 | fill([r_x ; 0], [r_y ; 0], color_fill, 'LineStyle', 'none'); 34 | 35 | line([0 1], [0 1], 'LineWidth', 2, 'Color', [0 0 0], 'LineStyle', '--'); 36 | 37 | if ~b 38 | line([1 0], [0 1], 'LineWidth', 2, 'Color', [0 0 0], 'LineStyle', '--'); 39 | plot(1-own, own, '.', 'MarkerSize', 30, 'Color', [0 0 0]); 40 | text(1-own+0.04, own, sprintf('P = %.1f%%', own*100), 'FontSize', font_size, 'HorizontalAlign', 'Left', 'VerticalAlign', 'Middle'); 41 | end 42 | 43 | 44 | 45 | grid on; 46 | 47 | set(gca, 'XTick', [0 .2 .4 .6 .8 1], 'XTickLabel', [cellstr('0%') cellstr('20%') cellstr('40%') cellstr('60%') cellstr('80%') cellstr('100%')]); 48 | set(gca, 'YTick', [0 .2 .4 .6 .8 1], 'YTickLabel', [cellstr('0%') cellstr('20%') cellstr('40%') cellstr('60%') cellstr('80%') cellstr('100%')]); 49 | 50 | xlabel('Share of nodes with smallest degrees'); 51 | ylabel('Share of edges'); 52 | 53 | text(0.25, 0.20, sprintf('G = %.1f%%', gini*100), 'FontSize', font_size); 54 | -------------------------------------------------------------------------------- /m/map_line.m: -------------------------------------------------------------------------------- 1 | % 2 | % Draw a full graph (with edges) using decomposition data. 3 | % 4 | % PARAMETERS 5 | % x, y (n*1) Coordinates 6 | % T (e*2) Edges 7 | % dense 0/1 dense plot 8 | % 9 | 10 | function map_line(x, y, T, dense) 11 | 12 | if dense 13 | line_width = 0.1; 14 | else 15 | line_width = 1; 16 | end 17 | 18 | colors_letter = konect_colors_letter(); 19 | 20 | if dense 21 | style = '-'; 22 | else 23 | style = '-o'; 24 | end 25 | 26 | if dense 27 | hold on; 28 | end 29 | 30 | gplot2(sparse(T(:,1), T(:,2), 1), [x y], style, 'LineWidth', line_width, ... 31 | 'Color', colors_letter.a); 32 | 33 | if dense 34 | gplot2(sparse(T(:,1), T(:,2), 1), [x y], '.', 'Color', [1 0.5 0]); 35 | end 36 | 37 | axis equal; 38 | axis off; 39 | -------------------------------------------------------------------------------- /m/map_minmax.m: -------------------------------------------------------------------------------- 1 | 2 | % 3 | % Minimal and maximal numbers, cutting the extreme points. 4 | % 5 | function [min_w, max_w] = map_minmax(ww) 6 | 7 | % cut ALPHA points from each side 8 | enable_cut = 1; 9 | 10 | % use the L1 deviation instead of the standard deviation 11 | enable_l1 = 0; 12 | 13 | % amount of points to cut 14 | alpha = .06; 15 | 16 | % number of std. devs. to show 17 | k = 2.5; 18 | 19 | w = ww; 20 | 21 | if enable_cut 22 | n = size(w,1); 23 | w = sort(w); 24 | start = round(alpha * n) 25 | endin = round((1-alpha) * n) 26 | if start > 0 27 | w = w(start:endin); 28 | end 29 | end; 30 | 31 | mean_w = mean(w); 32 | 33 | if enable_l1 34 | std_w = mean(abs(w - mean_w)); 35 | else 36 | std_w = std(w,1); 37 | end; 38 | 39 | min_w = mean_w - k * std_w; 40 | max_w = mean_w + k * std_w; 41 | 42 | end 43 | -------------------------------------------------------------------------------- /m/mask.m: -------------------------------------------------------------------------------- 1 | % 2 | % Mask approximation of rank 1. We use the row-column (RC) algorithm 3 | % from [1] and use SVD to initialize the eigenvectors. 4 | % 5 | % [1] Estimation of Rank Deficient Matrices from Partial Observations: 6 | % Two-Step Iterative Algorithms, Rui F. C. Guerreiro and Pedro M. Q. 7 | % Aguiar. 8 | % 9 | % PARAMETERS 10 | % a (m*n) Matrix to approximate 11 | % w (m*n) Weight (or mask) matrix, usually (a~=0) 12 | % 13 | % RESULT 14 | % u,v (m*1,n*1) u*v' is the rank-1 approximation 15 | % 16 | % TODO 17 | % extend to rank > 1. 18 | % convergence criterion. 19 | % 20 | 21 | function [u,v] = mask(a, w) 22 | 23 | [m,n] = size(a); 24 | 25 | opts.disp = 2; 26 | [u,d,v] = svds(a, 1, 'L', opts); 27 | 28 | u = u .* sqrt(d); 29 | 30 | for j = 1:16 31 | 32 | u_old = u; 33 | 34 | v = mask_step(u, a, w); 35 | u = mask_step(v, a', w'); 36 | 37 | if mod(j,5) == 0 38 | square_sum = 0; 39 | if m < n 40 | for i = 1:m 41 | square_sum = square_sum + sum(((u(i,:) * v' - a(i,:)) .* w(i,:)).^2); 42 | end 43 | else 44 | for i = 1:n 45 | square_sum = square_sum + sum(((v(i,:) * u' - a(:, i)') .* w(:, i)').^2); 46 | end 47 | end 48 | norm_uv = sqrt(square_sum); 49 | 50 | fprintf(1, ' [%d] normdiff(u) = %g normdiff(uvT) = %g\n', ... 51 | j, ... 52 | norm(u - u_old), ... 53 | norm_uv); 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /m/mask_step.m: -------------------------------------------------------------------------------- 1 | % 2 | % One iteration step in mask approximation. 3 | % 4 | % It must hold: a = a .* w; (i.e. a must be zero where w is zero.) 5 | % 6 | % PARAMETERS 7 | % u (m*1) The previous eigenvector 8 | % a (m*n) Adjacency matrix 9 | % w (m*1) The weight (or mask) matrix 10 | % 11 | % RESULT 12 | % v (n*1) 13 | % 14 | 15 | function v = mask_step(u, a, w) 16 | 17 | if size(u,2) ~= 1 18 | error 'Invalid' 19 | end 20 | 21 | n = size(a,2); 22 | 23 | v = zeros(n,1); 24 | 25 | fprintf(1, ' mask step /%d\n', n); 26 | 27 | t = konect_timer(n); 28 | 29 | for j = 1:n 30 | 31 | t = konect_timer_tick(t, j); 32 | 33 | if mod(j,5000) == 0, fprintf(1, ' %d\n', j); end; 34 | 35 | v(j,1) = pinv(u' * (u .* w(:,j))) * (u' * a(:,j)); 36 | 37 | end 38 | 39 | konect_timer_end(t); 40 | -------------------------------------------------------------------------------- /m/means.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the row and column means. The halves are saved, to streamline 3 | % normalization. 4 | % 5 | % PARAMETERS 6 | % $network Network name 7 | % $type String 8 | % full On the full dataset 9 | % split On the source set of the split 10 | % training One the training set of the split 11 | % 12 | % INPUT 13 | % dat/data.$network.mat (only FULL) 14 | % dat/split.$network.mat (only SPLIT) 15 | % 16 | % OUTPUT 17 | % dat/means{,i,t}.$network.mat The means 18 | % U,V The weights or [] 19 | % 20 | 21 | network = getenv('network'); 22 | type = getenv('type'); 23 | 24 | info = read_info(network); 25 | 26 | if strcmp(type, 'full') 27 | data = load(sprintf('dat/data.%s.mat', network)); 28 | T = data.T; 29 | suffix = 'i'; 30 | elseif strcmp(type, 'split') 31 | split = load(sprintf('dat/split.%s.mat', network)); 32 | T = split.T_source; 33 | suffix = ''; 34 | elseif strcmp(type, 'training') 35 | split = load(sprintf('dat/split.%s.mat', network)); 36 | T = [ split.T_source ; split.T_target ]; 37 | suffix = 't'; 38 | end 39 | 40 | [U V] = means_best(T, info.n1, info.n2, info.weights); 41 | 42 | save(sprintf('dat/means%s.%s.mat', suffix, network), '-v7.3', 'U', 'V'); 43 | -------------------------------------------------------------------------------- /m/means_best.m: -------------------------------------------------------------------------------- 1 | % 2 | % This is the additive normalization used. This function also knows 3 | % which type of networks need normalization or not. 4 | % 5 | 6 | function [U V] = means_best(T, m, n, weights) 7 | 8 | %%consts = konect_consts(); 9 | [negative interval_scale] = konect_data_weights(); 10 | 11 | if interval_scale(weights) 12 | [U V] = means_euv(T, m, n); 13 | else 14 | U = []; 15 | V = []; 16 | end 17 | -------------------------------------------------------------------------------- /m/means_e.m: -------------------------------------------------------------------------------- 1 | function [U,V] = means_e(T_training, m, n) 2 | 3 | e = .5 * mean(at_training(:,3)); 4 | 5 | U = e * ones(m,1); 6 | V = e * ones(n,1); 7 | -------------------------------------------------------------------------------- /m/means_euv.m: -------------------------------------------------------------------------------- 1 | % 2 | % Average between subject, object and global mean. 3 | % 4 | % PARAMETERS 5 | % T (r*3) Subject ID, object ID, rating 6 | % m,n subject count, object count 7 | % 8 | % RESULT 9 | % U,V (m*1,n*1) Vectors such that U 1 + 1 V' is an approximation 10 | % 11 | 12 | function [U,V] = means_euv(T, m, n) 13 | 14 | A = konect_spconvert(T, m, n); 15 | 16 | A_mask = (A ~= 0); 17 | 18 | e = .1 * mean(T(:,3)) 19 | U = .4 * (sum(A,2) ./ sum(A_mask, 2)) ; 20 | V = .4 * (sum(A,1) ./ sum(A_mask, 1))'; 21 | 22 | U(U ~= U) = 0; 23 | V(V ~= V) = 0; 24 | U = U + e; 25 | V = V + e; 26 | 27 | assert(sum(U ~= U) + sum(V ~= V) == 0); 28 | 29 | -------------------------------------------------------------------------------- /m/means_regr.m: -------------------------------------------------------------------------------- 1 | % 2 | % The means by linear regression. 3 | % 4 | 5 | function [U,V] = means_regr(T, m, n) 6 | 7 | r = size(T, 1); 8 | s = round(r * .8); 9 | val = (1+s) : r; 10 | 11 | A_training = konect_spconvert(T(1:s, :), m, n); 12 | A_training_mask = (A_training ~= 0); 13 | 14 | A_testtraining = konect_spconvert(T, m, n); 15 | A_testtraining_mask = (A_testtraining ~= 0); 16 | 17 | meane = mean(T(1:s,3)) 18 | meanu = sum(A_training, 2) ./ sum(A_training_mask,2); 19 | meanv = sum(A_training, 1)' ./ sum(A_training_mask,1)'; 20 | 21 | meanu(meanu ~= meanu) = 0; 22 | meanv(meanv ~= meanv) = 0; 23 | 24 | pred_e = meane * ones(r-s,1); 25 | pred_u = meanu(T(val,1)); 26 | pred_v = meanv(T(val,2)); 27 | 28 | targ = T(val,3); 29 | 30 | w_regr = [pred_e pred_u pred_v] \ targ 31 | 32 | meane = mean(T(:,3)); 33 | meanu = sum(A_testtraining, 2) ./ sum(A_testtraining_mask,2); 34 | meanv = sum(A_testtraining, 1)' ./ sum(A_testtraining_mask,1)'; 35 | 36 | meanu(meanu ~= meanu) = 0; 37 | meanv(meanv ~= meanv) = 0; 38 | 39 | U = .5 * w_regr(1) * meane + w_regr(2) * meanu; 40 | V = .5 * w_regr(1) * meane + w_regr(3) * meanv; 41 | -------------------------------------------------------------------------------- /m/means_regrn.m: -------------------------------------------------------------------------------- 1 | % 2 | % The means by nonnegative linear regression. 3 | % 4 | 5 | function [U,V] = means_regrn(T, m, n) 6 | 7 | r = size(T, 1); 8 | s = round(r * .8); 9 | val = (1+s) : r; 10 | 11 | A_training = konect_spconvert(T(1:s, :), m, n); 12 | A_training_mask = (A_training ~= 0); 13 | 14 | A_testtraining = konect_spconvert(T, m, n); 15 | A_testtraining_mask = (A_testtraining ~= 0); 16 | 17 | meane = mean(T(1:s,3)) 18 | meanu = sum(A_training, 2) ./ sum(A_training_mask,2); 19 | meanv = sum(A_training, 1)' ./ sum(A_training_mask,1)'; 20 | 21 | meanu(meanu ~= meanu) = 0; 22 | meanv(meanv ~= meanv) = 0; 23 | 24 | pred_e = meane * ones(r-s,1); 25 | pred_u = meanu(T(val,1)); 26 | pred_v = meanv(T(val,2)); 27 | 28 | targ = T(val,3); 29 | 30 | w_regrn = lsqnonneg(full([pred_e pred_u pred_v]), targ) 31 | 32 | meane = mean(T(:,3)); 33 | meanu = sum(A_testtraining, 2) ./ sum(A_testtraining_mask,2); 34 | meanv = sum(A_testtraining, 1)' ./ sum(A_testtraining_mask,1)'; 35 | 36 | meanu(meanu ~= meanu) = 0; 37 | meanv(meanv ~= meanv) = 0; 38 | 39 | U = .5 * w_regrn(1) * meane + w_regrn(2) * meanu; 40 | V = .5 * w_regrn(1) * meane + w_regrn(3) * meanv; 41 | -------------------------------------------------------------------------------- /m/measure_compute.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute a link prediction measure. 3 | % 4 | % PARAMETERS 5 | % measure Name of measure 6 | % p (e*1) Predictions 7 | % T (e*3) To be predicted 8 | % 9 | % RESULT 10 | % value Link prediction measure ; higher is better 11 | % 12 | function [value] = measure_compute(measure, p, T) 13 | 14 | fh = str2func(sprintf('measure_compute_%s', measure)); 15 | 16 | value = fh(p, T); -------------------------------------------------------------------------------- /m/measure_compute_ap.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute average precision. 3 | % 4 | % PARAMETERS 5 | % p (e*1) Predictions 6 | % T (e*3) to be predicted 7 | % 8 | % RESULT 9 | % value Average precision 10 | % 11 | 12 | function [value] = measure_compute_ap(p, T) 13 | 14 | value = konect_ap(p, T(:,3)); 15 | -------------------------------------------------------------------------------- /m/measure_compute_auc.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute area under the curve. 3 | % 4 | % PARAMETERS 5 | % p (e*1) Predictions 6 | % at (e*3) To be predicted 7 | % 8 | % RESULT 9 | % value Area under the curve 10 | % 11 | 12 | function [value] = measure_compute_auc(p, at) 13 | 14 | value = konect_auc(p, at(:,3)); 15 | -------------------------------------------------------------------------------- /m/measure_compute_corr.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute Pearson correlation error measure. 3 | % 4 | % RESULT 5 | % value Correlation value 6 | % 7 | % PARAMETERS 8 | % p (e*1) Predictions 9 | % T (e*3) To be predicted 10 | % 11 | 12 | function [value] = measure_compute_corr(p, T) 13 | 14 | value = corr(p, T(:,3)); 15 | 16 | if ~isfinite(value) 17 | % An undefined Pearon correlation means constant predictions, so 18 | % the results is 0. 19 | value = 0; 20 | end 21 | -------------------------------------------------------------------------------- /m/measure_compute_kendall.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute Kendall tau precision measure. 3 | % 4 | % RESULT 5 | % value Correlation value 6 | % 7 | % PARAMETERS 8 | % p (e*1) Predictions 9 | % T (e*3) To be predicted 10 | % 11 | 12 | function [value] = measure_compute_kendall(p, T) 13 | 14 | value = corr(p, T(:,3), 'type', 'Kendall'); 15 | 16 | -------------------------------------------------------------------------------- /m/measure_compute_map.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute mean average precision. 3 | % 4 | % PARAMETERS 5 | % p (e*1) Predictions 6 | % T (e*3) To be predicted 7 | % 8 | % RESULT 9 | % value MAP value 10 | % 11 | 12 | function [value] = measure_compute_map(p, T) 13 | 14 | value = konect_map(p, T); 15 | 16 | 17 | -------------------------------------------------------------------------------- /m/measure_compute_mauc.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute mean area under the curve. 3 | % 4 | % PARAMETERS 5 | % p (e*1) Predictions 6 | % at (e*3) To be predicted 7 | % 8 | % RESULT 9 | % value MAUC value 10 | % 11 | 12 | function [value] = measure_compute_mauc(p, at) 13 | 14 | value = konect_mauc(p, at); 15 | 16 | 17 | -------------------------------------------------------------------------------- /m/measure_compute_spear.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute Spearman correlation link prediction measure. 3 | % 4 | % RESULT 5 | % value Correlation value 6 | % 7 | % PARAMETERS 8 | % p (e*1) Predictions 9 | % T (e*3) To be predicted 10 | % 11 | 12 | function [value] = measure_compute_spear(p, T) 13 | 14 | value = corr(p, T(:,3), 'type', 'Spearman'); 15 | 16 | -------------------------------------------------------------------------------- /m/mediandist.m: -------------------------------------------------------------------------------- 1 | % 2 | % Determine the median distance in the network, from the hop 3 | % distribution. 4 | % 5 | % In case the atcual median value would be a non-integer, because we are 6 | % averaging between two values, the result is rounded up, to always 7 | % result in an integer. There must always be one pair of nodes for each 8 | % possible distance between zero and the diameter, and thus the only 9 | % case of averaging happens between two adjacent integers. For large 10 | % networks, this is exceedingly unlikely. 11 | % 12 | % PARAMETERS 13 | % $network 14 | % 15 | % INPUT FILES 16 | % dat/hopdistr.$network 17 | % 18 | % OUTPUT FILES 19 | % dat/statistic.mediandist.$network 20 | % 21 | 22 | network = getenv('network') 23 | 24 | h = load(sprintf('dat/hopdistr.%s', network)) 25 | 26 | v = sum(h <= (h(end) / 2)) 27 | 28 | values = [ v ] 29 | 30 | filename_OUT = sprintf('dat/statistic.mediandist.%s', network); 31 | OUT = fopen(filename_OUT, 'w'); 32 | if OUT < 0, error(filename_OUT); exit(1); end; 33 | fprintf(OUT, '%u\n', values); 34 | if fclose(OUT) < 0, error(filename_OUT); exit(1); end; 35 | -------------------------------------------------------------------------------- /m/network_key.m: -------------------------------------------------------------------------------- 1 | % 2 | % The key used for sorting datasets. 3 | % 4 | % PARAMETERS 5 | % network 6 | % 7 | 8 | function ret = network_key(metadata) 9 | 10 | name = metadata.name; 11 | 12 | name_no_space = regexprep(name, ' ', '-'); 13 | 14 | ret = sprintf('%s:%s', metadata.category, name_no_space); 15 | 16 | -------------------------------------------------------------------------------- /m/outin.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot the outdegree vs the indegree of all nodes. Only for directed 3 | % networks. 4 | % 5 | % PARAMETERS 6 | % $network Network name 7 | % 8 | % INPUT 9 | % dat/data.$network.mat 10 | % dat/info.$network 11 | % 12 | % OUTPUT 13 | % plot/outin.[a].$network.eps Plots 14 | % 15 | 16 | font_size = 22; 17 | 18 | consts = konect_consts(); 19 | 20 | network = getenv('network'); 21 | 22 | info = read_info(network); 23 | 24 | assert(info.format == consts.ASYM); 25 | 26 | data = load(sprintf('dat/data.%s.mat', network)); 27 | 28 | if info.weights == consts.POSITIVE & size(data.T, 2) >= 3 29 | w = data.T(:,3); 30 | else 31 | w = 1; 32 | end 33 | 34 | % Outdegrees 35 | d_1 = sparse(data.T(:,1), 1, w, info.n1, 1); 36 | 37 | % Indegrees 38 | d_2 = sparse(data.T(:,2), 1, w, info.n2, 1); 39 | 40 | % 41 | % (b) - logarithmic axes 42 | % 43 | 44 | loglog(d_1, d_2, '.'); 45 | 46 | xlabel('Outdegree (d_1)', 'FontSize', font_size); 47 | ylabel('Indegree (d_2)', 'FontSize', font_size); 48 | 49 | set(gca, 'FontSize', font_size); 50 | 51 | set(gca, 'XMinorTick', 'on'); 52 | set(gca, 'YMinorTick', 'on'); 53 | set(gca, 'TickLength', [0.05 0.05]); 54 | 55 | % Workaround for Matlab bug. Otherwise, the minor ticks are not visible. 56 | ax = axis(); 57 | ax(1) = 0.7; 58 | ax(3) = 0.7; 59 | axis(ax); 60 | if ax(1) > 0 & ax(3) > 0 61 | set(gca, 'XTick', 10 .^ (ceil(log(ax(1)) / log(10)):floor(log(ax(2)) / log(10)))); 62 | set(gca, 'YTick', 10 .^ (ceil(log(ax(3)) / log(10)):floor(log(ax(4)) / log(10)))); 63 | end 64 | 65 | konect_print(sprintf('plot/outin.b.%s.eps', network)); 66 | 67 | % 68 | % (c) - shifted logarithmic axes 69 | % 70 | 71 | loglog(d_1 + 1, d_2 + 1, '.'); 72 | 73 | xlabel('Augmented outdegree (1 + d^+)', 'FontSize', font_size); 74 | ylabel('Augmented indegree (1 + d^-)', 'FontSize', font_size); 75 | 76 | set(gca, 'FontSize', font_size); 77 | 78 | set(gca, 'XMinorTick', 'on'); 79 | set(gca, 'YMinorTick', 'on'); 80 | set(gca, 'TickLength', [0.05 0.05]); 81 | 82 | % Workaround for Matlab bug. Otherwise, the minor ticks are not visible. 83 | ax = axis(); 84 | ax(1) = 0.7; 85 | ax(3) = 0.7; 86 | axis(ax); 87 | if ax(1) > 0 & ax(3) > 0 88 | set(gca, 'XTick', 10 .^ (ceil(log(ax(1)) / log(10)):floor(log(ax(2)) / log(10)))); 89 | set(gca, 'YTick', 10 .^ (ceil(log(ax(3)) / log(10)):floor(log(ax(4)) / log(10)))); 90 | end 91 | 92 | konect_print(sprintf('plot/outin.c.%s.eps', network)); 93 | 94 | % 95 | % (a) - normal axes 96 | % 97 | plot(d_1, d_2, '.'); 98 | konect_print(sprintf('plot/outin.a.%s.eps', network)); 99 | -------------------------------------------------------------------------------- /m/pa_plot.m: -------------------------------------------------------------------------------- 1 | % 2 | % Preferential attachment tests. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % 7 | % INPUT 8 | % dat/pa.$NETWORK.mat 9 | % dat/pa_data.$NETWORK.mat 10 | % dat/info.$NETWORK 11 | % 12 | % OUTPUT 13 | % plot/pa.[a][auv].$NETWORK.eps 14 | % 15 | 16 | network = getenv('network') 17 | 18 | consts = konect_consts(); 19 | 20 | info = read_info(network) 21 | 22 | pa = load(sprintf('dat/pa.%s.mat', network)); 23 | pa = pa.pa; 24 | 25 | pa_data = load(sprintf('dat/pa_data.%s.mat', network)); 26 | pa_data = pa_data.pa_data; 27 | 28 | if info.format == consts.ASYM 29 | 30 | pa_plot_one(network, 'u', pa.u, pa_data.u); 31 | pa_plot_one(network, 'v', pa.v, pa_data.v); 32 | 33 | pa_plot_one(network, 'a', pa.a, pa_data.a); 34 | 35 | elseif info.format == consts.SYM 36 | 37 | pa_plot_one(network, 'a', pa.a, pa_data.a); 38 | 39 | elseif info.format == consts.BIP 40 | 41 | pa_plot_one(network, 'u', pa.u, pa_data.u); 42 | pa_plot_one(network, 'v', pa.v, pa_data.v); 43 | 44 | pa_plot_one(network, 'a', pa.a, pa_data.a); 45 | 46 | else 47 | error('*** Invalid format'); 48 | end 49 | 50 | 51 | -------------------------------------------------------------------------------- /m/pivotize.m: -------------------------------------------------------------------------------- 1 | % 2 | % The norm used for normalizing central matrices. 3 | % 4 | % PARAMETERS 5 | % data_decomposition 6 | % x Central matrix 7 | % 8 | % RESULT 9 | % pivot The pivot value 10 | % 11 | 12 | function [pivot] = pivotize(data_decomposition, x) 13 | 14 | pivot = 1; 15 | 16 | if ~data_decomposition.n 17 | 18 | if data_decomposition.l 19 | a = diag(x); 20 | a_nonzero = a(a ~= 0); 21 | if length(a_nonzero) 22 | pivot = min(abs(a_nonzero)); 23 | end 24 | else 25 | pivot = norm(x); 26 | end 27 | end 28 | 29 | if pivot <= 0, error('***'); end; 30 | -------------------------------------------------------------------------------- /m/precision_comp.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute precision of predictions. 3 | % 4 | % PARAMETERS 5 | % $NETWORK 6 | % $METHOD 7 | % 8 | % INPUT 9 | % dat/prediction.$METHOD.$NETWORK.mat 10 | % dat/split.$NETWORK.mat 11 | % dat/meanst.$NETWORK.mat 12 | % 13 | % OUTPUT 14 | % dat/precision.$METHOD.$NETWORK.mat 15 | % precisions{submethod} Precisions by submethod; each entry is a 2-vector of the value and the runtime in seconds 16 | % .$MEASURE e.g., "auc" 17 | % 18 | 19 | labels_measure = get_labels_measure(); 20 | measure_names = fieldnames(labels_measure); 21 | 22 | network = getenv('NETWORK'); 23 | method = getenv('METHOD'); 24 | 25 | split = load(sprintf('dat/split.%s.mat', network)); 26 | prediction = load(sprintf('dat/prediction.%s.%s.mat', method, network)); 27 | 28 | if length(split.T_test_zero) 29 | if size(split.T_test,2) == 3 30 | T_all = [ split.T_test ; split.T_test_zero, zeros(size(split.T_test_zero, 1), 1) ]; 31 | else 32 | T_all = [ split.T_test, ones(size(split.T_test, 1), 1) ; split.T_test_zero, zeros(size(split.T_test_zero, 1), 1) ]; 33 | end 34 | else 35 | T_all = [ split.T_test ]; 36 | end 37 | 38 | meanst = load(sprintf('dat/meanst.%s.mat', network)); 39 | 40 | T_all = konect_normalize_additively(T_all, meanst); 41 | 42 | precisions = struct(); 43 | 44 | submethods = fieldnames(prediction.predictions); 45 | 46 | for i = 1 : length(submethods) 47 | 48 | submethod = submethods{i}; 49 | fprintf(1, '\nsubmethod = %s\n', submethod); 50 | prediction_submethod = prediction.predictions.(submethod); 51 | p_normal = prediction_submethod.prediction; 52 | p_zero = prediction_submethod.prediction_zero; 53 | 54 | if length(split.T_test_zero) 55 | p = [ p_normal ; p_zero ]; 56 | else 57 | p = [ p_normal ]; 58 | end 59 | 60 | p = real(p); 61 | 62 | for i = 1:length(measure_names) 63 | measure = measure_names{i}; 64 | fprintf(1, '%s(%s) = ', submethod, measure); 65 | t0 = cputime; 66 | value = measure_compute(measure, p, T_all) 67 | t1 = cputime; 68 | runtime = t1 - t0; 69 | fprintf(1, '%f [%f s]\n', value, runtime); 70 | 71 | if ~isfinite(value) 72 | error('*** Non-finite precision value'); 73 | end 74 | precisions.(submethod).(measure) = [ value runtime ]; 75 | end 76 | 77 | end 78 | 79 | save(sprintf('dat/precision.%s.%s.mat', method, network), '-v7.3', 'precisions'); 80 | -------------------------------------------------------------------------------- /m/precision_one.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot precisions for one network/method combination. 3 | % 4 | % PARAMETERS 5 | % $NETWORK 6 | % $METHOD 7 | % 8 | % INPUT 9 | % dat/precision.$METHOD.$NETWORK.mat 10 | % 11 | % OUTPUT 12 | % plot/precision.one.[a].$MEASURE.$METHOD.$NETWORK.eps 13 | % for all $MEASURE 14 | % 15 | 16 | network = getenv('NETWORK'); 17 | method = getenv('METHOD'); 18 | 19 | precision = load(sprintf('dat/precision.%s.%s.mat', method, network)); 20 | 21 | labels_measure = get_labels_measure(); 22 | 23 | measures = fieldnames(labels_measure) 24 | 25 | for i = 1 : length(measures) 26 | 27 | measure = measures{i} 28 | 29 | names = []; 30 | precisions = []; 31 | 32 | submethods = fieldnames(precision.precisions); 33 | for k = 1 : length(submethods) 34 | submethod = submethods{k} 35 | values = precision.precisions.(submethod).(measure) 36 | precisions = [ precisions ; values(1) ]; 37 | end 38 | 39 | precisions_plot([], submethods, precisions, measure); 40 | konect_print(sprintf('plot/precision.one.a.%s.%s.%s.eps', measure, method, network)); 41 | 42 | end 43 | -------------------------------------------------------------------------------- /m/precisions_plot.m: -------------------------------------------------------------------------------- 1 | % 2 | % Draw a single bar chart of link prediction results. 3 | % 4 | % PARAMETERS 5 | % methods_arg (n*1) Cellstrings of methods; may be [] to omit the method names 6 | % submethods (n*1) Cellstrings of submethods 7 | % precisions (n*1) Precision values 8 | % measure The measure used 9 | % 10 | 11 | function precisions_plot(methods_arg, submethods, precisions, measure) 12 | 13 | font_size = 16; 14 | font_size_min = 4; 15 | rotation = 60; 16 | 17 | labels_measure = get_labels_measure(); 18 | 19 | n = size(precisions,1) 20 | 21 | if n == 0 22 | plot(0,0); 23 | return; 24 | end 25 | 26 | [colors line_styles markers] = styles_submethod(); 27 | labels_submethod = get_labels_submethod(); 28 | labels_method = get_labels_method(); 29 | 30 | labels = []; 31 | cm = [ ]; 32 | 33 | for k = 1:n 34 | submethod = submethods(k,1); submethod = submethod{:} 35 | label_submethod = labels_submethod.(submethod); 36 | if length(methods_arg) 37 | methods_arg 38 | method = methods_arg(k,1); method = method{:} 39 | label_method = labels_method.(regexprep(method, '-', '_')); 40 | label = sprintf('%s %s', label_method, label_submethod); 41 | else 42 | label = label_submethod; 43 | end 44 | 45 | color_k = colors.(submethod); 46 | cm = [cm; color_k]; 47 | labels = [ labels ; cellstr(label) ]; 48 | end 49 | 50 | hold on; 51 | for k = 1:n 52 | l = zeros(1,n); 53 | l(k) = precisions(k); 54 | h = bar(l); 55 | set(h, 'FaceColor', cm(k,:)); 56 | end 57 | set(gca, 'FontSize', font_size); 58 | ylabel(labels_measure.(measure), 'FontSize', font_size); 59 | 60 | ax = axis() 61 | ax(2) = n+1; 62 | ax(3) = max(0, min(precisions) - 0.05 * (max(precisions) - min(precisions))); 63 | if ax(3) == ax(4), ax(3) = 0; ax(4) = 1; end; 64 | ax 65 | axis(ax); 66 | xticklabel_rotate(1:n, rotation, labels, 'FontSize', max(font_size_min, 20 - max(0, floor(0.6 * (n-10))))); 67 | 68 | -------------------------------------------------------------------------------- /m/predict_euclidean.m: -------------------------------------------------------------------------------- 1 | % 2 | % Predict Euclidean predictions. The result is the negative Euclidean 3 | % distance in the matrix M with MM' = U abs(D) V'. 4 | % 5 | % RESULT 6 | % prediction (e*1) Prediction values 7 | % 8 | % PARAMETERS 9 | % u (m*r) Left eigenvectors 10 | % d (r*r) Central matrix 11 | % v (n*r) Rigth eigenvectors; may be [] 12 | % at (e*2) Pairs of vertices to predict for 13 | % 14 | 15 | function [prediction] = predict_euclidean(U, D, V, T) 16 | 17 | chunk_size = 10000; 18 | 19 | [UU DD] = eig(D); 20 | 21 | D_sqrt = UU * sqrt(abs(DD)); 22 | 23 | U = U * D_sqrt; 24 | 25 | if ~length(V) 26 | V = U; 27 | else 28 | V = V * D_sqrt; 29 | end 30 | 31 | [k from to] = konect_fromto(1, size(T,1), chunk_size); 32 | 33 | prediction = []; 34 | 35 | for i = 1:k 36 | 37 | from_i = from(i); 38 | to_i = to(i); 39 | % fprintf(1, '%d - %d\n', from_i, to_i); 40 | 41 | T_i = T(from_i : to_i, :); 42 | 43 | dif = U(T_i(:,1),:) - V(T_i(:,2),:); 44 | 45 | prediction = [ prediction ; - sum(conj(dif) .* dif, 2) ]; 46 | end 47 | -------------------------------------------------------------------------------- /m/predict_spectral.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute spectral predictions. Predictions correspond to elements of 3 | % the matrix UDV', or UDV' - VDU' for the skew decomposition. 4 | % 5 | % RESULT 6 | % prediction (e*1) Prediction values 7 | % 8 | % PARAMETERS 9 | % U (m*r) Left eigenvectors 10 | % D (r*r) Central matrix 11 | % V (n*r) Right eigenvectors, may be [] in which case U is used in its place 12 | % T (e*2) Pairs of vertices for which to compute link prediction scores 13 | % decomposition 14 | % 15 | 16 | function [prediction prediction_complex] = predict_spectral(U, D, V, T, decomposition) 17 | 18 | chunk_size = 20000; 19 | 20 | % The predictions are computed as A * B 21 | if strcmp(decomposition, 'skew') 22 | 23 | A = [ U * D, -V * D ]; 24 | B = [ V, U ]; 25 | 26 | else 27 | 28 | A = U * D; 29 | 30 | if length(V) 31 | B = V; 32 | else 33 | B = U; 34 | end 35 | end 36 | 37 | clear U D V; 38 | 39 | [k from to] = konect_fromto(1, size(T,1), chunk_size); 40 | 41 | prediction = []; 42 | 43 | t = konect_timer(k); 44 | 45 | for i = 1:k 46 | 47 | t = konect_timer_tick(t, i); 48 | 49 | from_i = from(i); 50 | to_i = to(i); 51 | 52 | T_i = T(from_i : to_i, :); 53 | 54 | prediction_i = sum(A(T_i(:,1), :) .* B(T_i(:,2), :), 2); 55 | 56 | prediction = [ prediction ; prediction_i ]; 57 | 58 | end 59 | 60 | konect_timer_end(t); 61 | 62 | data_decomposition = konect_data_decomposition(decomposition); 63 | 64 | if ~isreal(prediction) | data_decomposition.imag 65 | prediction_complex = imag(prediction); 66 | prediction = real(prediction); 67 | else 68 | prediction_complex = []; 69 | end 70 | -------------------------------------------------------------------------------- /m/prediction_local.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute predictions using a local method. 3 | % 4 | % PARAMETERS 5 | % $NETWORK Name of network 6 | % $METHOD Name pf local link prediction method 7 | % 8 | % INPUT 9 | % dat/split.$NETWORK.mat 10 | % dat/meanst.$NETWORK.mat 11 | % dat/info.$NETWORK 12 | % 13 | % OUTPUT 14 | % dat/prediction.$METHOD.$NETWORK.mat 15 | % predictions{submethod} Struct by name of submethod containing struct of 16 | % prediction Column vector of prediction values, following SPLIT.at_test 17 | % prediction_zero Column vector of prediction values, following SPLIT.at_test_zero; [] if not used 18 | % 19 | 20 | network = getenv('NETWORK'); 21 | method = getenv('METHOD'); 22 | 23 | split = load(sprintf('dat/split.%s.mat', network)); 24 | meanst = load(sprintf('dat/meanst.%s.mat', network)); 25 | info = read_info(network); 26 | 27 | enable_zero = size(split.T_test_zero) 28 | 29 | T_training = [ split.T_source ; split.T_target ]; 30 | 31 | T_training = konect_normalize_additively(T_training, meanst); 32 | 33 | A_training = konect_spconvert(T_training, split.n1, split.n2); 34 | 35 | T_test_all = split.T_test(:,1:2); 36 | if enable_zero 37 | T_test_all = [ T_test_all ; split.T_test_zero ]; 38 | end 39 | 40 | ps = prediction_local_compute(method, A_training, T_test_all, info.format, info.weights); 41 | 42 | submethods = fieldnames(ps); 43 | 44 | predictions = struct(); 45 | 46 | for i = 1 : length(submethods) 47 | submethod = submethods{i}; 48 | 49 | prediction = konect_denormalize_additively(T_test_all, ps.(submethod), meanst); 50 | 51 | if enable_zero 52 | prediction_zero = prediction((size(split.T_test,1)+1) : end); 53 | prediction = prediction(1 : size(split.T_test,1)); 54 | else 55 | prediction_zero = []; 56 | end 57 | 58 | predictions.(submethod).prediction = prediction; 59 | predictions.(submethod).prediction_zero = prediction_zero; 60 | 61 | end 62 | 63 | save(sprintf('dat/prediction.%s.%s.mat', method, network), '-v7.3', 'predictions'); 64 | -------------------------------------------------------------------------------- /m/prediction_local_compute.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute local prediction. 3 | % 4 | % PARAMETERS 5 | % method Prediction method 6 | % a Adjacency/biadjacency matrix 7 | % at (e*2) Each row is a vertex pair (i,j) for which to compute a prediction 8 | % format 9 | % weights 10 | % 11 | % RESULT 12 | % predictions{submethod} (e*1) Prediction scores 13 | % 14 | 15 | function [predictions] = prediction_local_compute(method, a, at, format, weights) 16 | 17 | fh = str2func(sprintf('prediction_local_compute_%s', method)); 18 | 19 | predictions = fh(a, at, format, weights); 20 | -------------------------------------------------------------------------------- /m/prediction_local_compute_mask.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the Rank-1 mask approximation (only when there is 3 | % no "zero"). See mask.m 4 | % 5 | % PARAMETERS 6 | % a Adjacency/biadjacency matrix 7 | % at (e*2) Vertex pairs for which to compute predictions 8 | % format 9 | % weights 10 | % 11 | % RESULT 12 | % predictions{submethod} Struct by submethod name of (e*1) Predictions 13 | % 'main' (only SIGNED and WEIGHTED) rank-1 mask 14 | % approximation 15 | % 16 | 17 | function [predictions] = prediction_local_compute_mask(a, at, format, weights) 18 | 19 | consts = konect_consts(); 20 | 21 | predictions = struct(); 22 | 23 | if weights == consts.SIGNED | weights == consts.WEIGHTED 24 | 25 | [u v] = mask(a, a ~= 0); 26 | predictions.main = u(at(:,1)) .* v(at(:,2)); 27 | end 28 | 29 | -------------------------------------------------------------------------------- /m/prediction_local_compute_neib.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute neighborhood predictions. 3 | % 4 | % RESULT 5 | % predictions Struct by submethod name of (e*1) Predictions 6 | % 7 | % PARAMETERS 8 | % A Adjacency/biadjacency matrix 9 | % T (e*2) Vertex pairs for which to compute predictions 10 | % format 11 | % weights 12 | % 13 | 14 | function [predictions] = prediction_local_compute_neib(A, T, format, weights) 15 | 16 | consts = konect_consts(); 17 | 18 | [negative] = konect_data_weights(); 19 | 20 | predictions = struct(); 21 | 22 | if format == consts.BIP 23 | return; 24 | end 25 | 26 | submethods = { 'common', 'adad', 'ra', 'jaccard', 'cosine', 'sorensen', 'hpi', 'hdi', 'lhni' }; 27 | submethods_negative = { 'abscommon', 'absadad', 'absjaccard', 'abscosine' }; 28 | 29 | N = length(submethods); 30 | if negative(weights) 31 | N = N + length(submethods_negative); 32 | end 33 | if format == consts.ASYM 34 | N = N + 3 * length(submethods); 35 | end 36 | 37 | t = konect_timer(N); 38 | 39 | I = 1; 40 | 41 | for i = 1 : length(submethods) 42 | t = konect_timer_tick(t, I); I = I + 1; 43 | submethod = submethods{i}; 44 | predictions.(submethod) = konect_predict_neib(submethod, A, T, format, 'sym'); 45 | end 46 | 47 | if negative(weights) 48 | for i = 1 : length(submethods_negative) 49 | t = konect_timer_tick(t, I); I = I + 1; 50 | submethod = submethods_negative{i}; 51 | predictions.(submethod) = konect_predict_neib(submethod, A, T, format, 'sym'); 52 | end 53 | end 54 | 55 | if format == consts.ASYM 56 | for i = 1 : length(submethods) 57 | t = konect_timer_tick(t, I); I = I + 1; 58 | submethod = submethods{i}; 59 | predictions.([submethod 'asym']) = konect_predict_neib(submethod, A, T, format, 'asym'); 60 | predictions.([submethod 'out' ]) = konect_predict_neib(submethod, A, T, format, 'out' ); 61 | predictions.([submethod 'in' ]) = konect_predict_neib(submethod, A, T, format, 'in' ); 62 | end 63 | end 64 | 65 | konect_timer_end(t); 66 | -------------------------------------------------------------------------------- /m/prediction_local_compute_neib3.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute neighborhood predictions, based on paths of length 3. 3 | % 4 | % RESULT 5 | % predictions Struct by submethod name of (e*1)-vectors 6 | % containing the predictions 7 | % 8 | % PARAMETERS 9 | % A Adjacency/biadjacency matrix 10 | % T (e*2) Vertex pairs for which to compute predictions 11 | % format Format of network 12 | % weights Weights of network 13 | % 14 | 15 | function [predictions] = prediction_local_compute_neib3(A, T, format, weights) 16 | 17 | consts = konect_consts(); 18 | 19 | [negative] = konect_data_weights(); 20 | 21 | predictions = struct(); 22 | 23 | submethods = { 'path3' }; 24 | 25 | N = length(submethods); 26 | 27 | for i = 1 : N 28 | submethod = submethods{i}; 29 | predictions.(submethod) = konect_predict_neib3(submethod, A, T, format); 30 | end 31 | -------------------------------------------------------------------------------- /m/prediction_local_compute_pref.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute preferential attachment prediction. 3 | % 4 | % PARAMETERS 5 | % a Adjacency/biadjacency matrix 6 | % at (e*2) Vertex pairs for which to compute predictions 7 | % format 8 | % weights 9 | % 10 | % RESULT 11 | % predictions Struct by submethod name of (e*1) Predictions 12 | % 13 | 14 | function [predictions] = prediction_local_compute_pref(a, at, format, weights) 15 | 16 | consts = konect_consts(); 17 | 18 | if ~islogical(a) 19 | a = abs(a); 20 | end 21 | 22 | su = sum(a, 2); 23 | sv = sum(a, 1)'; 24 | 25 | if format == consts.SYM 26 | su = su + sv; 27 | sv = su; 28 | end 29 | 30 | predictions.main = su(at(:,1)) .* sv(at(:,2)); 31 | -------------------------------------------------------------------------------- /m/prediction_local_compute_zero.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute zero prediction, i.e. always predict zero. 3 | % 4 | % PARAMETERS 5 | % a Adjacency/biadjacency matrix 6 | % at (e*2) Vertex pairs for which to compute predictions 7 | % format 8 | % weights 9 | % 10 | % RESULT 11 | % predictions Struct by subname of (e*1) Predictions 12 | % 13 | 14 | function [predictions] = prediction_local_compute_zero(a, at, format, weights) 15 | 16 | predictions.main = zeros(size(at,1), 1); 17 | -------------------------------------------------------------------------------- /m/prepare_matrix_target.m: -------------------------------------------------------------------------------- 1 | % 2 | % Prepare target matrix for the diagonality test when in "base" mode, 3 | % not self mode. This is identical to konect_prepare_matrix(), only 4 | % that for Laplacian decompositions this will return the underlying 5 | % adjacency matrix. 6 | % 7 | 8 | function [B] = prepare_matrix_target(decomposition, A, format) 9 | 10 | if strcmp(decomposition, 'lap'), decomposition = 'sym'; 11 | elseif strcmp(decomposition, 'lapc'), decomposition = 'sym'; 12 | elseif strcmp(decomposition, 'lapd'), decomposition = 'svd'; 13 | elseif strcmp(decomposition, 'lapd-n'), decomposition = 'svd-n'; 14 | elseif strcmp(decomposition, 'lapherm'), decomposition = 'herm'; 15 | elseif strcmp(decomposition, 'lapskew'), decomposition = 'skewi'; 16 | elseif strcmp(decomposition, 'lapquantum'), decomposition = 'quantum'; 17 | elseif strcmp(decomposition, 'lapq'), decomposition = 'sym'; 18 | end 19 | 20 | 21 | B = konect_matrix(decomposition, A, format); 22 | -------------------------------------------------------------------------------- /m/rating_evolution.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot the rating evolution, i.e. the mean rating of each item in 3 | % function of number of ratings. 4 | % 5 | % PARAMETERS 6 | % $network 7 | % 8 | % INPUT FILES 9 | % dat/data.$network.mat 10 | % 11 | % OUTPUT FILES 12 | % plot/rating_evolution.[ab].$network.eps 13 | % 14 | 15 | network = getenv('network'); 16 | 17 | data = load(sprintf('dat/data.%s.mat', network)); 18 | 19 | ids = unique(data.T(:,2)); 20 | 21 | % 22 | % (b) - normalized to zero final mean weight 23 | % 24 | hold on; 25 | 26 | for k = 1:length(ids) 27 | 28 | i = ids(k); 29 | 30 | ati = data.T(find(data.T(:,2) == i), 3); 31 | 32 | n = length(ati); 33 | 34 | range = 1:n; 35 | 36 | averages = cumsum(ati) ./ range'; 37 | 38 | plot(range, averages - mean(ati), '-'); 39 | 40 | end 41 | 42 | konect_print(sprintf('plot/rating_evolution.b.%s.eps', network)); 43 | 44 | % 45 | % (a) - all 46 | % 47 | hold on; 48 | 49 | for k = 1:length(ids) 50 | 51 | i = ids(k); 52 | 53 | ati = data.T(find(data.T(:,2) == i), 3); 54 | 55 | n = length(ati); 56 | 57 | range = 1:n; 58 | 59 | averages = cumsum(ati) ./ range'; 60 | 61 | plot(range, averages, 'g-'); 62 | 63 | end 64 | 65 | konect_print(sprintf('plot/rating_evolution.a.%s.eps', network)); 66 | 67 | -------------------------------------------------------------------------------- /m/rating_evolution2.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot the rating evolution, i.e. the mean rating of each item in 3 | % function of number of ratings. Plot only the fast stuff. 4 | % 5 | % PARAMETERS 6 | % $network 7 | % 8 | % INPUT FILES 9 | % dat/data.$network.mat 10 | % 11 | % OUTPUT FILES 12 | % plot/rating_evolution.[c].$network.eps 13 | % 14 | 15 | network = getenv('network'); 16 | 17 | bins = 10; 18 | font_size = 22; 19 | line_width = 3; 20 | 21 | data = load(sprintf('dat/data.%s.mat', network)); 22 | 23 | ids = unique(data.T(:,2)); 24 | 25 | rating_min = min(data.T(:,3)) 26 | rating_max = max(data.T(:,3)) 27 | 28 | sums = zeros(bins, 0); 29 | counts = zeros(bins, 0); 30 | 31 | % 32 | % (c) - all 33 | % 34 | 35 | for k = 1:length(ids) 36 | 37 | i = ids(k); 38 | ati = data.T(find(data.T(:,2) == i), 3); 39 | n = length(ati); 40 | range = 1:n; 41 | average = cumsum(ati)' ./ range; 42 | 43 | if length(average) > size(sums,2) 44 | sums = [sums , zeros(bins, length(average) - size(sums , 2))]; 45 | counts = [counts , zeros(bins, length(average) - size(counts, 2))]; 46 | end 47 | 48 | i_bin = 1 + floor(bins * (average(end) - rating_min) / (rating_max - rating_min)); 49 | if i_bin > 10, i_bin = 10; end 50 | 51 | rating_midbin = rating_min + (i_bin - 0.5) * (rating_max - rating_min) / bins; 52 | 53 | average = average - average(end) + rating_midbin; 54 | 55 | sums(i_bin, 1 : length(average)) = sums(i_bin, 1 : length(average)) + average; 56 | counts(i_bin, 1 : length(average)) = counts(i_bin, 1 : length(average)) + ones(size(average)); 57 | 58 | end 59 | 60 | hold on; 61 | 62 | for i = 1 : bins 63 | plot(1 : size(sums, 2), sums(i, :) ./ counts(i, :), '-', ... 64 | 'LineWidth', line_width); 65 | end 66 | 67 | set(gca, 'FontSize', font_size); 68 | 69 | konect_print(sprintf('plot/rating_evolution2.c.%s.eps', network)); 70 | -------------------------------------------------------------------------------- /m/read_info.m: -------------------------------------------------------------------------------- 1 | % 2 | % Get information about one dataset. 3 | % 4 | % DEPRECATED - instead, load individual statistics from dat/statistic.$STATISTIC.$NETWORK 5 | % 6 | % RESULT 7 | % A struct with the following fields 8 | % 9 | % n1,n2 Number of left/right nodes equal in 10 | % unipartite networks 11 | % n Total number of edges 12 | % lines Number of edges, but mot counting 13 | % multiple edges when they are 14 | % aggregated 15 | % rmn_ = r / (m*n) [deprecated] 16 | % format as a number (see constants.m) 17 | % weights as a number (see constants.m) 18 | % 19 | % PARAMETERS 20 | % network Dataset name 21 | % 22 | 23 | function info = read_info(network) 24 | 25 | network 26 | 27 | info_data = load(sprintf('dat/info.%s', network)); 28 | 29 | consts = konect_consts(); 30 | 31 | info= struct(); 32 | 33 | info.n1 = info_data(1); 34 | info.n2 = info_data(2); 35 | info.lines = info_data(3); 36 | info.rmn_ = info_data(4); 37 | info.format = info_data(5); 38 | info.weights = info_data(6); 39 | 40 | if info.format == consts.BIP 41 | info.n = sum(info_data(1:2)); 42 | else 43 | assert (info_data(1) == info_data(2)); 44 | info.n = info_data(1); 45 | end 46 | -------------------------------------------------------------------------------- /m/read_meta.m: -------------------------------------------------------------------------------- 1 | % 2 | % Read metadata from a data file. 3 | % 4 | % RETURN VALUE 5 | % A struct where field names are the keys and the values are 6 | % strings. In key names, '-' is replaced with '_'. 7 | % 8 | % PARAMETERS 9 | % network Network name 10 | % 11 | 12 | function ret = read_meta(network) 13 | 14 | filename = sprintf('uni/meta.%s', network); 15 | 16 | FILE = fopen(filename, 'r'); 17 | 18 | if FILE < 0, error('*** open'); end; 19 | 20 | ret = {}; 21 | 22 | while 1 23 | 24 | line = fgetl(FILE); 25 | 26 | if line == -1; break; end; 27 | 28 | tokens = regexp(line, '\s*([a-zA-Z0-9-]+)\s*:\s*(\S.*\S|\S)\s*', 'tokens', 'once'); 29 | 30 | if length(tokens) < 2 31 | continue; 32 | end 33 | 34 | key = tokens{1}; 35 | value = tokens{2}; 36 | 37 | key = regexprep(key, '-', '_'); 38 | 39 | ret.(key) = value; 40 | 41 | end; 42 | 43 | if fclose(FILE) < 0, error('fclose'); end; 44 | 45 | -------------------------------------------------------------------------------- /m/read_statistic.m: -------------------------------------------------------------------------------- 1 | % 2 | % Read a network statistic. 3 | % 4 | % RETURN VALUES 5 | % data (k*) Vector of all computed values 6 | % 7 | % PARAMETERS 8 | % statistic Internal name of statistic 9 | % network Internal name of network 10 | % k (optional) Number of values to return; by 11 | % default, return all values 12 | % 13 | 14 | function data = read_statistic(statistic, network, k) 15 | 16 | data = load(sprintf('dat/statistic.%s.%s', statistic, network)); 17 | 18 | assert(size(data, 2) == 1); 19 | 20 | if (exist('k', 'var') == 1) 21 | assert(length(data) >= k); 22 | data = data(:,1:k); 23 | end 24 | -------------------------------------------------------------------------------- /m/rmse_full.m: -------------------------------------------------------------------------------- 1 | 2 | function ret = rmse_full(A, U, X, V) 3 | 4 | if ~size(V) 5 | V= U; 6 | end 7 | 8 | di = A - U * X * V'; 9 | 10 | ret = sum(sum(conj(di) .* di)) / prod(size(A)) 11 | 12 | if isnan(ret) | isinf(ret), error('***'); end 13 | -------------------------------------------------------------------------------- /m/rmse_latent.m: -------------------------------------------------------------------------------- 1 | 2 | function ret = rmse_latent(a1, a2, a3, U, X, V) 3 | 4 | if ~size(v) 5 | v= u; 6 | end 7 | 8 | sum = 0; 9 | 10 | for i = 1 : size(a1,1) 11 | 12 | pred = U(a1(i),:) * X * V(a2(i),:)'; 13 | 14 | sum = sum + (abs(pred - a3(i)))^2; 15 | 16 | end 17 | 18 | ret = sqrt(sum / size(a1,1)); 19 | 20 | if isnan(ret) | isinf(ret), error('***'); end 21 | -------------------------------------------------------------------------------- /m/runtime.m: -------------------------------------------------------------------------------- 1 | % 2 | % Analyse runtime. 3 | % 4 | % OUTPUT 5 | % dat/runtime 6 | % plot/runtime.eps 7 | % 8 | % INPUT 9 | % tmp.runtime 10 | % 11 | 12 | a = load('tmp.runtime') 13 | 14 | % regression 15 | c = [-log(a(:,1))] 16 | d = [ones(size(a,1),1) log(a(:,2) + a(:,3)) log(a(:,4)) log(a(:,2) .* a(:,3))] 17 | x = pinv(d) * c 18 | kp = exp(- d * x); 19 | 20 | [a(:,1) kp] 21 | 22 | % curve fitting 23 | b = [a(:,1) ((a(:,2) + a(:,3)).^x(2) .* a(:,4).^x(3) .* (a(:,2).*a(:,3)).^x(4))] 24 | 25 | i_squ = find(((a(:,2) - a(:,3)) ./ a(:,2)) < .002) 26 | i_rec = find(((a(:,2) - a(:,3)) ./ a(:,2)) >= .002) 27 | 28 | loglog(b(i_squ,2), b(i_squ,1), 'ob'); 29 | hold; 30 | loglog(b(i_rec,2), b(i_rec,1), 'or'); 31 | print('-depsc', 'plot/runtime.eps'); close all; 32 | 33 | save -ascii 'dat/runtime' x; 34 | -------------------------------------------------------------------------------- /m/shrinkingdiversity.m: -------------------------------------------------------------------------------- 1 | % 2 | % Test the hypothesis of shrinking diversity 3 | % 4 | % PARAMETERS 5 | % $NETWORKS Space-separated list of networks 6 | % $STATISTIC 7 | % $TYPE "split" or "full" 8 | % 9 | % INPUT 10 | % dat/trend.$TYPE.$STATISTIC.$NETWORK.mat 11 | % For each $STATISTIC in $STATISTICS 12 | % 13 | % OUTPUT 14 | % dat/shrinkingdiversity.$TYPE.$STATISTIC Unspecified content (evaluated by hand) 15 | % 16 | 17 | networks = getenv('NETWORKS'); networks = regexp(networks, '[a-zA-Z0-9_-]+', 'match') 18 | statistic = getenv('STATISTIC'); 19 | type = getenv('TYPE'); 20 | 21 | alpha = 0.05; 22 | 23 | updown_statistic = get_updown_statistic(); 24 | updown_statistic_i = updown_statistic.(statistic); 25 | 26 | % Each column is a network 27 | % Rows correspond to those in dat/trend.* 28 | data = []; 29 | 30 | n = length(networks) 31 | 32 | for i = 1 : n 33 | network = networks{i} 34 | 35 | data_i = load(sprintf('dat/trend.%s.%s.%s.mat', type, statistic, network)); 36 | 37 | data = [ data [ data_i.H ; data_i.updown ] ]; 38 | end 39 | 40 | going_up = data(2,:) > 0; 41 | going_down = data(2,:) < 0; 42 | k_up = sum(going_up) % Number of networks where statistic goes up 43 | k_down = sum(going_down) % Number of networks where statistic goes down 44 | 45 | 46 | p_up = betainc(0.5, k_up , n - k_up + 1) % p-value for hypothesis of going up 47 | p_down = betainc(0.5, k_down, n - k_down + 1) % p-value for hypothesis of going down 48 | H_up = p_up < alpha % whether the going-up hypothesis is validated 49 | H_down = p_down < alpha % whether the going-down hypothesis is validated 50 | 51 | if updown_statistic_i > 0 52 | H = H_up 53 | else 54 | H = H_down 55 | end 56 | 57 | % 58 | % Save 59 | % 60 | OUT = fopen(sprintf('dat/shrinkingdiversity.%s.%s', type, statistic), 'w'); 61 | fprintf(OUT, '%u\n%u\n%u\n%g\n%u\n%g\n%u\n%u\n', ... 62 | k_up, k_down, n, p_up, H_up, p_down, H_down, H); 63 | if fclose(OUT); error '*** fclose'; end; 64 | 65 | -------------------------------------------------------------------------------- /m/sne.m: -------------------------------------------------------------------------------- 1 | % 2 | % Spectral network evolution. 3 | % 4 | % Both decompositions need not be of the same size. 5 | % 6 | % V and source_V are [] for symmetric decompositions. 7 | % 8 | % RESULT 9 | % dd_new Predicted new eigenvalues 10 | % 11 | % PARAMETERS 12 | % source_U Decomposition of source set 13 | % source_dd 14 | % source_V 15 | % U,dd,V Decomposition of training set 16 | % func 17 | % (optional) Function mapping the scalar product of eigenvectors to a 18 | % weight; defaults to f(x) = x. 19 | % 20 | 21 | function [dd_new] = sne(source_U, source_dd, source_V, ... 22 | U, dd, V, func) 23 | 24 | if ~exist('func', 'var') 25 | func = @(x)(x); 26 | end 27 | 28 | k = size(U,2); 29 | k_old = size(source_U,2); 30 | 31 | asymmetric = size(V); 32 | 33 | dd_old = dd; 34 | 35 | for i = 1:k 36 | 37 | d_sum = 0; 38 | weight_sum = 0; 39 | 40 | for j = 1:k_old 41 | weight_u = source_U(:,j)' * U(:,i); 42 | 43 | if asymmetric 44 | weight_v = source_V(:,j)' * V(:,i); 45 | end 46 | 47 | if asymmetric 48 | weight = func(weight_u * weight_v); 49 | else 50 | weight = func(weight_u ^2); 51 | end 52 | 53 | weight_sum = weight_sum + weight; 54 | d_sum = d_sum + weight * source_dd(j); 55 | end 56 | 57 | if weight_sum == 0 58 | weight_sum = 1; 59 | end 60 | 61 | dd_old(i) = d_sum / weight_sum; 62 | 63 | end 64 | 65 | dd_new = dd - dd_old; 66 | -------------------------------------------------------------------------------- /m/spectral_diagonality_test.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the spectral diagonality test matrix. 3 | % 4 | % This is the matrix \Delta in Jérôme's PhD thesis. 5 | % 6 | % PARAMETERS 7 | % decomposition The decomposition that is used 8 | % decomposition_source The actual decomposition of the source matrix 9 | % .U, .D, .V (optional) 10 | % A_target The target matrix 11 | % format Format of the network 12 | % enable_self Mode (optional) 13 | % 0 (default) Use base matrix 14 | % 1 Use the same matrix 15 | % 16 | 17 | function [Delta] = spectral_diagonality_test(decomposition, decomposition_source, ... 18 | A_target, format, enable_self) 19 | 20 | if ~exist('enable_self', 'var') 21 | enable_self = 0; 22 | end 23 | 24 | enable_self 25 | 26 | data_decomposition = konect_data_decomposition(decomposition); 27 | 28 | size_A_target = size(A_target) 29 | if enable_self 30 | A_target = konect_matrix(decomposition, A_target, format); 31 | else % base 32 | A_target = prepare_matrix_target(decomposition, A_target, format); 33 | end 34 | size_A_target = size(A_target) 35 | 36 | 37 | U = decomposition_source.U; 38 | V = decomposition_source.V; 39 | 40 | size_U = size(U) 41 | size_V = size(V) 42 | 43 | if length(V) > 0 && size(U,1)+size(V,1) == size(A_target,1) && size(A_target,1) == size(A_target,2); 44 | U = [U ; V]; 45 | V = []; 46 | end 47 | 48 | if length(V) 49 | 50 | if data_decomposition.o 51 | u_i = U'; 52 | v_i = V'; 53 | else 54 | u_i = konect_xpinv(U); 55 | v_i = konect_xpinv(V); 56 | end 57 | 58 | if strcmp(decomposition, 'skew') 59 | Delta = u_i * A_target * v_i' - v_i * A_target * u_i'; 60 | else 61 | size_u_i = size(u_i) 62 | size_v_i = size(v_i) 63 | size_A_target = size(A_target) 64 | 65 | Delta = u_i * A_target * v_i'; 66 | end 67 | 68 | else 69 | 70 | if data_decomposition.o 71 | u_i = U'; 72 | else 73 | u_i = konect_xpinv(U); 74 | end 75 | 76 | Delta = u_i * A_target * u_i'; 77 | 78 | end 79 | 80 | -------------------------------------------------------------------------------- /m/spectral_extrapolation.m: -------------------------------------------------------------------------------- 1 | % 2 | % Visualization of the spectral extrapolation method. 3 | % 4 | % PARAMETERS 5 | % $NETWORK 6 | % $DECOMPOSITION 7 | % 8 | % INPUT 9 | % dat/decomposition_time.split.$DECOMPOSITION.$NETWORK.mat 10 | % dat/steps.$NETWORK.mat 11 | % 12 | % OUTPUT 13 | % plot/spectral_extrapolation.$DECOMPOSITION.$NETWORK.eps 14 | % 15 | 16 | marker_size = 10; 17 | 18 | network = getenv('NETWORK'); 19 | decomposition = getenv('DECOMPOSITION'); 20 | 21 | data_decomposition = load(sprintf('dat/decomposition_time.split.%s.%s.mat', decomposition, network)); 22 | steps_data = load(sprintf('dat/steps.%s.mat', network)); 23 | 24 | decompositions = data_decomposition.decompositions; 25 | r = data_decomposition.r; 26 | 27 | steps_source = steps_data.steps_source; 28 | steps_training = steps_data.steps_source + steps_data.steps_target; 29 | steps_all = steps_data.steps_all; 30 | e_steps = steps_data.e_steps; 31 | 32 | is_asymmetric = size(decompositions(end).V); 33 | 34 | 35 | % 36 | % Spectrum over time 37 | % 38 | 39 | hold on; 40 | 41 | for k = 1 : r 42 | spectrum = zeros(prod(size(e_steps)),1); 43 | 44 | for l = 1 : prod(size(decompositions)) 45 | % Each decomposition may have a different size. 46 | if k <= size(decompositions(l).D, 1) 47 | spectrum(l) = decompositions(l).D(k,k); 48 | else 49 | spectrum(l) = NaN; % No k'th eigenvalue at this timepoint 50 | end 51 | end 52 | 53 | spectrum = spectrum_visualize(spectrum, decomposition); 54 | 55 | plot(e_steps, spectrum, '.b', 'MarkerSize', marker_size); 56 | end 57 | 58 | 59 | % 60 | % Extrapolation lines 61 | % 62 | D_source = decompositions(steps_source).D; 63 | U_source = decompositions(steps_source).U; 64 | V_source = decompositions(steps_source).V; 65 | 66 | D_target = decompositions(steps_training).D; 67 | U_target = decompositions(steps_training).U; 68 | V_target = decompositions(steps_training).V; 69 | 70 | dd_diff_squ = sne(U_source, diag(D_source), V_source, U_target, diag(D_target), V_target, @(x)(x)); 71 | 72 | dd_new_squ = dd_diff_squ + diag(D_target) 73 | 74 | dd_old_squ = - dd_diff_squ + diag(D_target) 75 | 76 | for k = 1 : r 77 | if size(D_target, 1) >= k 78 | plot(steps_data.e_steps([steps_source steps_training steps_all]), ... 79 | real([dd_old_squ(k) D_target(k,k) dd_new_squ(k)]), 'o--', 'Color', [0 0 0], 'LineWidth', 2); 80 | end 81 | end 82 | 83 | konect_print(sprintf('plot/spectral_extrapolation.%s.%s.eps', decomposition, network)); 84 | -------------------------------------------------------------------------------- /m/spectrum_visualize.m: -------------------------------------------------------------------------------- 1 | % 2 | % Convert a spectrum to real values that can be visualized. 3 | % 4 | % In all cases we take simply the real part, except for skew 5 | % decompositions, where we take the imaginary part. (A more complex 6 | % example would be the complex logarithm for orthogonal matrices, whose 7 | % eigenvalues are unitary.) 8 | % 9 | % RESULT 10 | % ret Real values 11 | % 12 | % PARAMETERS 13 | % spectrum Complex spectrum to visualize 14 | % decomposition Decomposition 15 | % 16 | 17 | function ret = spectrum_visualize(spectrum, decomposition) 18 | 19 | data_decomposition = konect_data_decomposition(decomposition); 20 | 21 | if data_decomposition.i 22 | %if strcmp(decomposition, 'skew') | strcmp(decomposition 23 | 24 | ret = imag(spectrum); 25 | 26 | else 27 | 28 | ret = real(spectrum); 29 | 30 | end 31 | 32 | -------------------------------------------------------------------------------- /m/statistic_avgdegree.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the average degree statistic of a network. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % 7 | % INPUT 8 | % dat/statistic.volume.$network 9 | % dat/statistic.size.$network 10 | % dat/statistic.format.$network 11 | % 12 | % OUTPUT 13 | % dat/statistic.avgdegree.$network 14 | % The meaning of the values is the same as that described 15 | % in konect-toolbox/m/konect_statistic_avgdegree.m 16 | % 17 | 18 | network = getenv('network'); 19 | 20 | consts = konect_consts(); 21 | 22 | format long; 23 | 24 | values_m = load(sprintf('dat/statistic.volume.%s', network)); 25 | m = values_m(1); 26 | 27 | values_n = load(sprintf('dat/statistic.size.%s', network)); 28 | n= values_n(1); 29 | 30 | values= 2 * m / n; 31 | 32 | value_format = load(sprintf('dat/statistic.format.%s', network)); 33 | 34 | if value_format == consts.BIP 35 | 36 | assert(length(values_n) == 3); 37 | 38 | n1= values_n(2); 39 | n2= values_n(3); 40 | 41 | assert(n == n1 + n2); 42 | 43 | values(2) = m / n1; 44 | values(3) = m / n2; 45 | 46 | elseif value_format == consts.SYM || value_format == consts.ASYM 47 | 48 | assert(length(values_n) == 1); 49 | 50 | else 51 | 52 | error('*** invalid format'); 53 | 54 | end 55 | 56 | values = values'; 57 | 58 | save(sprintf('dat/statistic.avgdegree.%s', network), 'values', ... 59 | '-ascii', '-double'); 60 | 61 | -------------------------------------------------------------------------------- /m/statistic_comp.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute a network statistic using Matlab. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % $statistic 7 | % 8 | % INPUT 9 | % dat/data.$NETWORK.mat 10 | % dat/info.$NETWORK 11 | % dat/meansi.$NETWORK.mat 12 | % 13 | % OUTPUT 14 | % dat/statistic.$STATISTIC.$NETWORK 15 | % Text file with one number per line, the first being the statistic 16 | % itself and the other lines being additional values such as the error 17 | % on the value. As a last value, the runtime is added. 18 | % 19 | 20 | network = getenv('network'); 21 | statistic = getenv('statistic'); 22 | 23 | format long; 24 | 25 | data = load(sprintf('dat/data.%s.mat', network)); 26 | T = data.T; 27 | 28 | info = read_info(network); 29 | 30 | means = load(sprintf('dat/meansi.%s.mat', network)); 31 | T = konect_normalize_additively(T, means); 32 | 33 | A = konect_spconvert(T, info.n1, info.n2); 34 | 35 | t0 = cputime; 36 | values = konect_statistic(statistic, A, info.format, info.weights); 37 | t1 = cputime; 38 | runtime = t1 - t0; 39 | values = [full(values) ; runtime]; 40 | 41 | % The first value must not be NaN -- other can to denote that they don't apply 42 | if isnan(values(1)) 43 | %%if sum(isnan(values)) ~= 0 44 | values 45 | error('*** NaN in statistic computation'); 46 | end 47 | 48 | save(sprintf('dat/statistic.%s.%s', statistic, network), 'values', ... 49 | '-ascii', '-double'); 50 | -------------------------------------------------------------------------------- /m/statistic_comp_spectral.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute a spectral network statistic for a full dataset. 3 | % 4 | % PARAMETERS 5 | % $statistic 6 | % $decomposition 7 | % $network 8 | % 9 | % INPUT 10 | % dat/info.$network 11 | % dat/decomposition.$decomposition.$network.mat 12 | % 13 | % OUTPUT 14 | % dat/statistic.$statistic.$network 15 | % Same format as generated by statistic_comp.m 16 | % 17 | 18 | network = getenv('network'); 19 | statistic = getenv('statistic'); 20 | decomposition = getenv('decomposition'); 21 | 22 | info = read_info(network); 23 | 24 | data_decomposition = load(sprintf('dat/decomposition.%s.%s.mat', decomposition, network)); 25 | 26 | values = statistic_spectral(statistic, data_decomposition.D, data_decomposition.n); 27 | 28 | save(sprintf('dat/statistic.%s.%s', statistic, network), 'values', '-ascii'); 29 | -------------------------------------------------------------------------------- /m/statistic_diameff.m: -------------------------------------------------------------------------------- 1 | % 2 | % Computer effective diameter. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % $percentile (between 01 and 99) 7 | % 8 | % INPUT FILES 9 | % dat/hopdistr.$network 10 | % 11 | % OUTPUT FILES 12 | % dat/statistic.diameff$percentile.$network 13 | % 14 | 15 | network = getenv('network'); 16 | 17 | percentile_text = getenv('percentile'); 18 | percentile = str2double(percentile_text); 19 | if isnan(percentile) 20 | fprintf(2, '*** Invalid percentile\n'); 21 | exit(1); 22 | end 23 | 24 | % Sanity check: if the percentile is in the range [0...1], then that 25 | % likely is an error. 26 | assert(percentile >= 1 && percentile < 100); 27 | 28 | data = load(sprintf('dat/hopdistr.%s', network)); 29 | 30 | x = konect_diameff(data, percentile / 100) 31 | 32 | assert(length(x) == 1); 33 | 34 | save(sprintf('dat/statistic.diameff%s.%s', percentile_text, network), 'x', '-ascii'); 35 | -------------------------------------------------------------------------------- /m/statistic_full_prefatt.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the 'prefatt' statistic (preferential attachment exponent), 3 | % by reading the 'dat/pa.%.mat' files. 4 | % 5 | % We only compute this for temporal networks, even though 6 | % non-temporal networks also have a SPLIT, but since that split is 7 | % random, it wouldn't be interesting. 8 | % 9 | % PARAMETERS 10 | % $network 11 | % 12 | % INPUT FILES 13 | % dat/pa.$network.mat 14 | % 15 | % OUTPUT FILES 16 | % dat/statistic.prefatt.$network 17 | % [1] normal \beta 18 | % [2] normal mse 19 | % [3] long-tail \beta 20 | % [4] long-tail mse 21 | % 22 | 23 | function statistic_full_prefatt 24 | 25 | network = getenv('network'); 26 | 27 | if has_timestamps(network) 28 | 29 | pa = load(sprintf('dat/pa.%s.mat', network)) 30 | pa.pa.a 31 | 32 | % Take V if it exists, because it is the 'passive' side. 33 | % Otherwise take A. 34 | if isfield(pa.pa, 'v') 35 | values = get_values(pa.pa.v); 36 | else 37 | values = get_values(pa.pa.a); 38 | end 39 | 40 | else 41 | 42 | values = [ NaN ; NaN ; NaN ; NaN ]; 43 | 44 | end 45 | 46 | % The values are a column vector 47 | [m n] = size(values); 48 | assert(n == 1); 49 | 50 | save(sprintf('dat/statistic.prefatt.%s', network), 'values', '-ascii'); 51 | 52 | end 53 | 54 | function [ret] = get_values(vect) 55 | 56 | ret = [ NaN ; NaN ; NaN ; NaN ]; 57 | 58 | ret(1) = vect.e(1); 59 | 60 | if ret(1) > 0 61 | ret(2) = exp(sqrt(vect.e(3))); 62 | else 63 | ret(2) = NaN; 64 | end 65 | 66 | ret(3) = vect.g(1); 67 | 68 | if ret(3) > 0 69 | ret(4) = exp(sqrt(vect.g(3))); 70 | else 71 | ret(3) = NaN; 72 | end 73 | 74 | end 75 | -------------------------------------------------------------------------------- /m/statistic_lines.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the number of lines in the definition of a network, i.e., 3 | % the number of lines in the out.* file. This is different from the 4 | % volume for networks with multiple edges without timestamps, because 5 | % these networks aggregate multiple edges into one line. 6 | % 7 | % PARAMETERS 8 | % $network 9 | % 10 | % INPUT 11 | % dat/info.$network 12 | % 13 | % OUTPUT 14 | % dat/statistic.lines.$network 15 | % 16 | 17 | network = getenv('network'); 18 | 19 | info = read_info(network); 20 | 21 | consts = konect_consts(); 22 | 23 | values = [ info.lines ]; 24 | 25 | save(sprintf('dat/statistic.lines.%s', network), 'values', '-ascii'); 26 | -------------------------------------------------------------------------------- /m/statistic_meandist.m: -------------------------------------------------------------------------------- 1 | % 2 | % Computer the average distance in a network. 3 | % 4 | % PARAMETERS 5 | % $network 6 | % 7 | % INPUT FILES 8 | % dat/hopdistr.$network 9 | % 10 | % OUTPUT FILES 11 | % dat/statistic.meandist.$network 12 | % 13 | 14 | network = getenv('network'); 15 | 16 | data = load(sprintf('dat/hopdistr.%s', network)); 17 | 18 | x = konect_diammean(data); 19 | 20 | save(sprintf('dat/statistic.meandist.%s', network), 'x', '-ascii'); 21 | -------------------------------------------------------------------------------- /m/statistic_size.m: -------------------------------------------------------------------------------- 1 | DEPRECATED::: 2 | 3 | % 4 | % Compute the size statistic of a network. 5 | % 6 | % PARAMETERS 7 | % $network 8 | % 9 | % INPUT 10 | % dat/info.$network 11 | % 12 | % OUTPUT 13 | % dat/statistic.size.$network 14 | % 15 | 16 | network = getenv('network'); 17 | 18 | info = read_info(network); 19 | 20 | consts = konect_consts(); 21 | 22 | if info.format == consts.BIP 23 | 24 | values = [ info.n1 + info.n2 ; info.n1 ; info.n2 ]; 25 | 26 | else 27 | 28 | % TODO: for ASYM networks, also output the number of nodes with 29 | % nonzero number of outlinks and inlinks. 30 | 31 | values = [ info.n1 ]; 32 | 33 | end 34 | 35 | OUT= fopen(sprintf('dat/statistic.size.%s', network), 'w'); 36 | if OUT < 0, 37 | error('fopen'); 38 | end 39 | 40 | fprintf(OUT, '%u\n', values); 41 | 42 | if 0 > fclose(OUT) 43 | error('fclose'); 44 | end 45 | -------------------------------------------------------------------------------- /m/statistic_spectral.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute a spectral statistic. A spectral statistic is one that 3 | % depends on the rank-r spectrum (and not just on the first or second 4 | % eigenvalues). 5 | % 6 | % RESULT 7 | % values Column vector of values 8 | % 9 | % PARAMETERS 10 | % D (r*r) Eigenvalues or equivalent 11 | % statistic 12 | % n Number of nodes 13 | % 14 | 15 | function values = statistic_spectral(statistic, D, n) 16 | 17 | if strcmp(statistic, 'network_rank_abs') 18 | 19 | dd = abs(diag(D)); 20 | values = sum(dd) / dd(1); 21 | 22 | elseif strcmp(statistic, 'network_rank_norm') 23 | 24 | values = sum(abs(diag(D))); 25 | 26 | elseif strcmp(statistic, 'network_rank_norm4') 27 | 28 | values = sum(abs(diag(D)) .^ 4); 29 | 30 | elseif strcmp(statistic, 'epower') 31 | 32 | values = estimate_power_law(abs(diag(D))); 33 | 34 | elseif strcmp(statistic, 'entropy') | strcmp(statistic, 'entropyn') 35 | 36 | values = konect_normalized_entropy(abs(diag(D))); 37 | 38 | elseif strcmp(statistic, 'aredis') 39 | 40 | epsilon = 1e-11; 41 | 42 | dd = diag(D); 43 | dd(dd < epsilon) = 0; 44 | dd = dd .^ -1; 45 | dd(isinf(dd)) = 0; 46 | values = [ sum(dd) ]; 47 | 48 | values = values * 2 / n; 49 | 50 | elseif strcmp(statistic, 'oddcycles') 51 | 52 | dd = diag(D); 53 | 54 | alpha = 1 / max(abs(dd)); 55 | 56 | x = alpha * dd; 57 | 58 | oddcycles = sum(sinh(dd)) / sum(exp(dd)); 59 | oddcycles_2 = sum(sinh(x)) / sum(exp(x)); 60 | oddcycles_3 = sum(x(2:end) ./ (1 - x(2:end) .^ 2)) / sum((-x(2:end) + 1) .^ -1); 61 | 62 | values = [ oddcycles; oddcycles_2; oddcycles_3 ]; 63 | 64 | else 65 | error(sprintf('*** Invalid spectral statistic %s', statistic)); 66 | end 67 | -------------------------------------------------------------------------------- /m/statistic_time_diam.m: -------------------------------------------------------------------------------- 1 | % 2 | % Computer the "diam" statistic from the hop distribution. 3 | % 4 | % PARAMETERS 5 | % $NETWORK 6 | % $TYPE "full" or "split" 7 | % 8 | % INPUT 9 | % dat/hopdistr_time.$TYPE.$NETWORK 10 | % 11 | % OUTPUT 12 | % dat/statistic_time.$TYPE.diam.$NETWORK 13 | % 14 | 15 | network = getenv('NETWORK'); 16 | type = getenv('TYPE'); 17 | 18 | data = load(sprintf('dat/hopdistr_time.%s.%s', type, network)); 19 | 20 | ret = []; 21 | 22 | for i = 1 : size(data,1) 23 | line = data(i, :); 24 | line = data(find(data > 0)); 25 | values = []; 26 | values(1) = length(line); 27 | values(2) = konect_diameff(line, 0.9); 28 | values(3) = konect_diameff(line, 0.5); 29 | values(4) = konect_diammean(line); 30 | ret = [ret ; values]; 31 | end 32 | 33 | save(sprintf('dat/statistic_time.%s.diam.%s', type, network), 'ret', '-ascii'); 34 | 35 | -------------------------------------------------------------------------------- /m/statistic_time_slice.m: -------------------------------------------------------------------------------- 1 | % 2 | % Generate 'slice' from a temporal statistic. 3 | % 4 | % PARAMETERS 5 | % $statistic Name of the statistic 6 | % $network Name of the network 7 | % $type 8 | % $K The slide ID, i.e., column number 9 | % 10 | % INPUT FILES 11 | % dat/statistic_time.$type.$statistic.$network 12 | % The temporal statistics: one timepoint per line, each 13 | % line contains multiple numbers, the $K'th number being 14 | % the substatistic we are interested in 15 | % 16 | % OUTPUT FILES 17 | % dat/statistic_time.$type.${statistic}+${K}.$network 18 | % A file containing only a single column, taken from the 19 | % input file 20 | % 21 | 22 | statistic = getenv('statistic') 23 | network = getenv('network') 24 | type = getenv('type') 25 | k_text = getenv('K') 26 | 27 | k = str2num(k_text) 28 | 29 | assert(k >= 2); 30 | 31 | data = load(sprintf('dat/statistic_time.%s.%s.%s', type, statistic, network)); 32 | 33 | size_data = size(data) 34 | assert(size_data(1) >= 10); 35 | assert(size_data(2) >= k); 36 | 37 | data_k = data(:,k); 38 | 39 | save(sprintf('dat/statistic_time.%s.%s+%s.%s', type, statistic, k_text, network), ... 40 | 'data_k', ... 41 | '-ascii'); 42 | -------------------------------------------------------------------------------- /m/statistic_time_spectral.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute a spectral network statistic over time on either the full or the split dataset. 3 | % 4 | % PARAMETERS 5 | % $NETWORK The network; must have timestamps 6 | % $STATISTIC The statistic 7 | % $DECOMPOSITION 8 | % $TYPE The data to use, "full" or "split" 9 | % 10 | % INPUT 11 | % dat/info.$NETWORK 12 | % dat/stepsi.$NETWORK (only full) 13 | % dat/steps.$NETWORK.mat (only split) 14 | % dat/decomposition_time.$TYPE.$DECOMPOSITION.$NETWORK.mat 15 | % 16 | % OUTPUT 17 | % dat/statistic_time.$TYPE.$STATISTIC.$NETWORK 18 | % All statistics as text. One timepoint per line. Each 19 | % line contains the statistics, with the first number 20 | % being the main statistic. 21 | % 22 | 23 | network = getenv('NETWORK'); 24 | statistic = getenv('STATISTIC'); 25 | decomposition = getenv('DECOMPOSITION'); 26 | type = getenv('TYPE'); 27 | is_split = strcmp(type, 'split'); 28 | 29 | info = read_info(network); 30 | 31 | if ~is_split 32 | e_steps = load(sprintf('dat/stepsi.%s', network)); 33 | else 34 | steps = load(sprintf('dat/steps.%s.mat', network)); 35 | e_steps = steps.e_steps; 36 | end 37 | 38 | data_decomposition = load(sprintf('dat/decomposition_time.%s.%s.%s.mat', type, decomposition, network)); 39 | 40 | ret = []; 41 | 42 | for k = 1 : prod(size(e_steps)) 43 | 44 | values = statistic_spectral(statistic, data_decomposition.decompositions(k).D, data_decomposition.decompositions(k).n); 45 | 46 | if sum(size(ret)) ~= 0 47 | ret = [ret zeros(1, size(values,1) - size(ret, 2))]; 48 | end 49 | ret = [ret ; values']; 50 | 51 | end 52 | 53 | save(sprintf('dat/statistic_time.%s.%s.%s', type, statistic, network), 'ret', '-ascii'); 54 | -------------------------------------------------------------------------------- /m/statistics_time_plot.m: -------------------------------------------------------------------------------- 1 | % 2 | % Aggregate plot of all statistics over time. 3 | % 4 | % PARAMETERS 5 | % $NETWORK 6 | % $TYPE "full" or "split" 7 | % $STATISTICS Space-separated list of statistics 8 | % 9 | % INPUT 10 | % dat/statistic_time.$TYPE.$STATISTIC.$NETWORK 11 | % for each $STATISTICS in $STATISTICS 12 | % 13 | % OUTPUT 14 | % plot/statistics_time.[a].$TYPE.$NETWORK.eps 15 | % 16 | 17 | network = getenv('NETWORK'); 18 | type = getenv('TYPE'); 19 | statistics = getenv('STATISTICS'); 20 | 21 | statistics = regexp(statistics, '\S+', 'match') 22 | 23 | datas = []; 24 | legends = []; 25 | 26 | for k = 1:size(statistics, 2) 27 | statistic = statistics(k) 28 | statistic = statistic{:} 29 | 30 | data = load(sprintf('dat/statistic_time.%s.%s.%s', type, statistic, network)); 31 | data = data(:,1); 32 | 33 | % Normalize 34 | part = data(10:end); 35 | i = min(part); 36 | a = max(part); 37 | data = (data - i) / (a - i); 38 | 39 | datas = [ datas data ]; 40 | 41 | legends = [ legends ; cellstr(konect_label_statistic(statistic, 'matlab-short')) ]; 42 | end 43 | 44 | colors= [0 0 0; 45 | 1 0 0; 46 | 0 1 0; 47 | 0 0 1; 48 | .5 0 0; 49 | 0 .5 0; 50 | .7 .7 0; 51 | 1 0 1; 52 | 0 1 1 ]; 53 | 54 | set(0,'DefaultAxesColorOrder', colors, 'DefaultAxesLineStyleOrder','-|--|-.') 55 | 56 | plot(1 : size(datas, 1), datas); 57 | 58 | axis([ 0 size(datas,1) 0 1]); 59 | 60 | xlabel(konect_label_statistic('volume', 'matlab')); 61 | 62 | legend(legends, 'Location', 'EastOutside'); 63 | 64 | konect_print(sprintf('plot/statistics_time.a.%s.%s.eps', type, network)); 65 | -------------------------------------------------------------------------------- /m/steps.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute steps for time-dependent analysis of a split dataset. 3 | % 4 | % The time steps are not exactly all over the same number of edges. 5 | % Instead, they are fudged so as to put steps on the moment the two 6 | % splits happen. 7 | % 8 | % PARAMETERS 9 | % $network Network 10 | % 11 | % INPUT 12 | % dat/split.$network.mat 13 | % 14 | % OUTPUT 15 | % dat/steps.$network.mat Steps 16 | % steps_all Total number of steps ( = size(e_steps, 1)) 17 | % steps_source Number of source steps 18 | % steps_target Number of target steps 19 | % e_steps (count×1) Edge count at each step 20 | % 21 | 22 | network = getenv('network'); 23 | 24 | count = 100; 25 | 26 | split = load(sprintf('dat/split.%s.mat', network)); 27 | means = load(sprintf('dat/means.%s.mat', network)); 28 | 29 | % Edge counts 30 | e_source = size(split.T_source, 1) 31 | e_target = size(split.T_target, 1) 32 | e_test = size(split.T_test , 1) 33 | 34 | % Last index in the source and target set 35 | index_source = floor(e_source * count / (e_source + e_target + e_test)) 36 | index_target = floor((e_source + e_target) * count / (e_source + e_target + e_test)) 37 | 38 | e_steps = [ floor((1 : index_source) * e_source / index_source) ... 39 | (e_source + floor((1 : index_target - index_source) * e_target / (index_target - index_source))) ... 40 | (e_source + e_target + floor(( 1 : count - index_target) * e_test / (count - index_target))) ]' 41 | 42 | steps_all = count 43 | steps_source = index_source 44 | steps_target = index_target - index_source 45 | 46 | save(sprintf('dat/steps.%s.mat', network), '-v7.3', ... 47 | 'steps_all', 'steps_source', 'steps_target', ... 48 | 'e_steps'); 49 | -------------------------------------------------------------------------------- /m/stepsi.m: -------------------------------------------------------------------------------- 1 | % 2 | % Compute the steps 3 | % 4 | % PARAMETERS 5 | % $network 6 | % 7 | % INPUT 8 | % dat/info.$network 9 | % 10 | % OUTPUT 11 | % dat/stepsi.$network 12 | % The list of step values as text, with one number per line 13 | % 14 | 15 | count = 100; 16 | 17 | network = getenv('network'); 18 | 19 | info = read_info(network); 20 | 21 | stepsi_data = floor((1:count) * info.lines / count); 22 | 23 | filename = sprintf('dat/stepsi.%s', network); 24 | 25 | FILE = fopen(filename, 'w'); 26 | 27 | if FILE < 0, error; end; 28 | 29 | fprintf(FILE, '%u\n', stepsi_data); 30 | 31 | if fclose(FILE) < 0, error; end; 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /m/styles_method.m: -------------------------------------------------------------------------------- 1 | % 2 | % Style info by methods. 3 | % 4 | % RESULT 5 | % colors Struct by method 6 | % .(method) (1*3) Color 7 | % line_styles Struct by method 8 | % .(method) (string) line style 9 | % markers Struct by method 10 | % .(method) (string) marker 11 | % 12 | 13 | function [colors line_styles markers] = styles_method() 14 | 15 | colors = struct(); 16 | line_styles = struct(); 17 | markers = struct(); 18 | 19 | colors.svd = [1 0 0 ]; line_styles.svd = '-'; markers.svd = 'o'; 20 | colors.diag = [0 1 0 ]; line_styles.diag = '--'; markers.diag = 's'; 21 | colors.takane = [0 0 1 ]; line_styles.takane = ':'; markers.takane = '*'; 22 | colors.dedicom1u = [1 0 1 ]; line_styles.dedicom1u = '-'; markers.dedicom1u = '+'; 23 | colors.dedicom1v = [0 1 1 ]; line_styles.dedicom1v = '--'; markers.dedicom1v = 'x'; 24 | colors.dedicom2 = [.7 0 0 ]; line_styles.dedicom2 = ':'; markers.dedicom2 = 'd'; 25 | colors.dedicom2s = [0 .7 0 ]; line_styles.dedicom2s = '-'; markers.dedicom2s = 'h'; 26 | colors.dedicom3 = [0 0 .7]; line_styles.dedicom3 = '--'; markers.dedicom3 = 'p'; 27 | colors.sym = [.7 .7 0 ]; 28 | colors.pref = [.7 0 .7]; 29 | colors.neib = [0 .7 .7]; 30 | -------------------------------------------------------------------------------- /m/time_degree.m: -------------------------------------------------------------------------------- 1 | % 2 | % Draw overlay of degree distributions over time. 3 | % 4 | % PARAMETERS 5 | % $NETWORK 6 | % $RANDOMIZE '1' to randomize order of edges 7 | % 8 | % INPUT 9 | % dat/split.$NETWORK.mat 10 | % 11 | % OUTPUT 12 | % plot/time_degree.$NETWORK.eps 13 | % 14 | 15 | consts = konect_consts(); 16 | 17 | network = getenv('NETWORK'); 18 | randomize = getenv('RANDOMIZE'); 19 | randomize = strcmp(randomize, '1'); 20 | 21 | info = read_info(network); 22 | 23 | split = load(sprintf('dat/split.%s.mat', network)); 24 | 25 | T = [split.T_source; split.T_target; split.T_test]; 26 | 27 | s = size(T,1) 28 | 29 | if randomize 30 | p = randperm(s); 31 | T(p, :); 32 | end 33 | 34 | steps = [round(s/3), round(s * 2/3), s] 35 | 36 | cm = ... 37 | [232 43 235; 38 | 35 76 191; 39 | 67 177 58] / 255; 40 | 41 | point_styles = [cellstr('o'), cellstr('x'), cellstr('s')]; 42 | 43 | hold on; 44 | 45 | for i = 1 : prod(size(steps)) 46 | 47 | A = konect_spconvert(T(1:steps(i), :), split.n1, split.n2); 48 | 49 | if info.format == consts.SYM 50 | A = A + A'; 51 | end 52 | 53 | degree = full(sum(A,2)); 54 | [counts, ids] = sort(degree); 55 | maxcount = counts(end-0); 56 | freq = histc(counts, 0:maxcount); 57 | 58 | nz = freq ~= 0; 59 | ra = 0:maxcount; 60 | ra = ra(nz); 61 | fq = freq(nz); 62 | 63 | point_style = point_styles(i); 64 | point_style = point_style{:}; 65 | 66 | loglog(ra, fq, point_style, 'Color', cm(i,:)); 67 | end 68 | 69 | xlabel('Number of neighbors (n)'); 70 | ylabel('Frequency'); 71 | 72 | set(gca, 'XScale', 'log'); 73 | set(gca, 'YScale', 'log'); 74 | 75 | legend([cellstr('|E| = 1/3 |Eall|'), cellstr('|E| = 2/3 |Eall|'), cellstr('|E| = 3/3 |Eall|')]); 76 | 77 | extra = ''; 78 | if randomize 79 | extra = '.rand'; 80 | end 81 | 82 | konect_print(sprintf('plot/time_degree.%s%s.eps', network, extra)); 83 | 84 | -------------------------------------------------------------------------------- /m/time_xaxis.m: -------------------------------------------------------------------------------- 1 | % 2 | % Make the X axis labels by correct for year numbers. 3 | % 4 | % DEPRECATED -- use time_xaxis_unix.m 5 | % 6 | % PARAMETERS 7 | % t_min, t_max Minimum and maximum values to be plotted, in 8 | % year numbers 9 | % 10 | 11 | function time_xaxis(t_min, t_max) 12 | 13 | NUM = 7; % Maximum number of ticks to show on the date axis (X axis) 14 | 15 | months = {'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', ... 16 | 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'}; 17 | 18 | if t_max - t_min < 1 / 6 19 | r = (ceil(t_min / (24 * 60 * 60)) : floor(t_max / (24 * 60 * 60))) / 365.25 + 1970 20 | length_r = length(r) 21 | if length(r) > 0 22 | 'aaa' 23 | for i = 1 : length(r) 24 | l{i} = sprintf('Day %d', i); 25 | end 26 | l 27 | set(gca, 'XTick', r, 'XTickLabel', l); 28 | end 29 | 30 | elseif t_max - t_min < (1/12) * NUM 31 | r = (ceil(t_min * 12) / 12):(1/12):(floor(t_max * 12) / 12); 32 | set(gca, 'XTick', r); 33 | rr = ceil(t_min * 12) : floor(t_max * 12); 34 | assert(length(r) == length(rr)); 35 | assert(length(r) > 0); 36 | for i = 1 : length(r) 37 | if (mod(i,12) == 0) 38 | text = round(r(i)); 39 | else 40 | month = mod(i,12); 41 | text = months{month + 1}; 42 | end 43 | l{i} = text; 44 | end 45 | set(gca, 'XTickLabel', l); 46 | 47 | elseif t_max - t_min < (1/6) * NUM 48 | r = (ceil(t_min * 6) / 6):(1/6):(floor(t_max * 6) / 6); 49 | set(gca, 'XTick', r); 50 | rr = ceil(t_min * 6) : floor(t_max * 6); 51 | assert(length(r) == length(rr)); 52 | assert(length(r) > 0); 53 | for i = 1 : length(r) 54 | if (mod(i,6) == 0) 55 | text = round(r(i)); 56 | else 57 | month = 2 * mod(i,6); 58 | text = months{month + 1}; 59 | end 60 | l{i} = text; 61 | end 62 | set(gca, 'XTickLabel', l); 63 | 64 | elseif t_max - t_min < 1 * NUM 65 | set(gca, 'XTick', ceil(t_min:floor(t_max))); 66 | elseif t_max - t_min < 2 * NUM 67 | set(gca, 'XTick', ceil(t_min:2:floor(t_max))); 68 | elseif t_max - t_min < 5 * NUM 69 | set(gca, 'XTick', ceil(t_min:5:floor(t_max))); 70 | elseif t_max - t_min < 10 * NUM 71 | set(gca, 'XTick', ceil(t_min:10:floor(t_max))); 72 | elseif t_max - t_min < 20 * NUM 73 | set(gca, 'XTick', ceil(t_min:20:floor(t_max))); 74 | elseif t_max - t_min < 50 * NUM 75 | set(gca, 'XTick', ceil(t_min:50:floor(t_max))); 76 | elseif t_max - t_min < 100 * NUM 77 | set(gca, 'XTick', ceil(t_min:100:floor(t_max))); 78 | end 79 | 80 | -------------------------------------------------------------------------------- /m/trend.m: -------------------------------------------------------------------------------- 1 | % 2 | % Estimate the trend in the time evolution of a statistic. 3 | % 4 | % PARAMETERS 5 | % $NETWORK 6 | % $STATISTIC 7 | % $TYPE 8 | % 9 | % INPUT 10 | % dat/statistic_time.$TYPE.$STATISTIC.$NETWORK 11 | % 12 | % OUTPUT 13 | % dat/trend.$TYPE.$STATISTIC.$NETWORK.mat 14 | % H 1 when the trend is significant, 0 otherwise (regardless of direction) 15 | % p p value (regardless of direction) 16 | % updown Direction of trend: +1 up, -1 down 17 | % range Range of time values used (in 1..100) 18 | % values Values used 19 | % 20 | 21 | network = getenv('NETWORK'); 22 | statistic = getenv('STATISTIC'); 23 | type = getenv('TYPE') 24 | 25 | data = load(sprintf('dat/statistic_time.%s.%s.%s', type, statistic, network)); 26 | 27 | alpha = 0.05; 28 | 29 | if strcmp(type, 'full') 30 | first = floor(size(data,1) / 2); 31 | elseif strcmp(type, 'split') 32 | steps = load(sprintf('dat/steps.%s.mat', network)) 33 | first = 1 + steps.steps_source; 34 | else 35 | error '***'; 36 | end 37 | 38 | range = first:size(data,1); 39 | 40 | values = data(range, 1) 41 | 42 | [ H p ] = Mann_Kendall(values, alpha) 43 | 44 | % New code for estimating updown: Linear least squares on ranks 45 | if size(values,1) > size(values,2), values = values'; end; 46 | [x i] = sort(values) ; 47 | X = i / [ 1 : length(values) ; ones(1, length(values)) ]; 48 | updown = sign(X(1)) 49 | 50 | % Old code for estimating updown: sign of difference of sum between first and second half of values 51 | %l = floor(length(values) / 2) 52 | %values_begin = values(1:l) 53 | %values_end = values(end:-1:end-l+1) 54 | %updown = sign(sum(values_end) - sum(values_begin)) 55 | 56 | save(sprintf('dat/trend.%s.%s.%s.mat', type, statistic, network), '-v7.3', ... 57 | 'H', 'p', 'updown', 'range', 'values'); 58 | -------------------------------------------------------------------------------- /m/trend_plot.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot the network evolution together with the p-value and tendency. 3 | % 4 | % PARAMETERS 5 | % $NETWORK 6 | % $STATISTIC 7 | % $TYPE full or split 8 | % 9 | % INPUT 10 | % dat/trend.$TYPE.$STATISTIC.$NETWORK.mat 11 | % 12 | % OUTPUT 13 | % plot/trend.$TYPE.$STATISTIC.$NETWORK.eps 14 | % 15 | 16 | network = getenv('NETWORK'); 17 | statistic = getenv('STATISTIC'); 18 | type = getenv('TYPE'); 19 | 20 | line_width = 5; 21 | font_size = 86; 22 | 23 | data = load(sprintf('dat/trend.%s.%s.%s.mat', type, statistic, network)); 24 | 25 | updown_statistic = get_updown_statistic(); 26 | 27 | shrinking = data.H & data.updown == updown_statistic.(statistic) 28 | 29 | if shrinking 30 | color_test = [ 0 .7 0 ]; 31 | else 32 | color_test = [ .7 0 0 ]; 33 | end 34 | 35 | plot(data.range, data.values, '-', 'LineWidth', line_width, 'Color', color_test); 36 | 37 | set(gca, 'XTick', [], 'YTick', []); 38 | 39 | axis tight; 40 | 41 | if shrinking 42 | ax = axis(); 43 | text(ax(2), ax(3), sprintf('%.3g', data.p), 'VerticalAlignment', 'bottom', 'HorizontalAlignment', 'right', 'FontSize', font_size); 44 | end 45 | 46 | konect_print(sprintf('plot/trend.%s.%s.%s.eps', type, statistic, network)); 47 | 48 | -------------------------------------------------------------------------------- /m/zipf.m: -------------------------------------------------------------------------------- 1 | % 2 | % Draw Zipf plots. This is the transpose of the BIDD plot, i.e., of 3 | % the cumulative degree distribution. 4 | % 5 | % PARAMETERS 6 | % $network 7 | % 8 | % INPUT 9 | % dat/data.$network.mat 10 | % 11 | % OUTPUT 12 | % plot/zipf.[auv].$network.dat 13 | % a - total 14 | % u,v - Row/column based (only BIP and ASYM) 15 | % 16 | 17 | network = getenv('network'); 18 | 19 | data = load(sprintf('dat/data.%s.mat', network)); 20 | 21 | T = data.T; 22 | 23 | consts = konect_consts(); 24 | 25 | info = read_info(network); 26 | 27 | if info.weights ~= consts.POSITIVE & size(T,2) >= 3 28 | T(:,3:end) = []; 29 | end 30 | 31 | % 32 | % U, V 33 | % 34 | if info.format ~= consts.SYM 35 | 36 | if size(T,2) >= 3 37 | q = T(:,3); 38 | else 39 | q = []; 40 | end 41 | 42 | zipf_one(T(:,1), q, 'u'); 43 | konect_print(sprintf('plot/zipf.u.%s.eps', network)); 44 | 45 | zipf_one(T(:,2), q, 'v'); 46 | konect_print(sprintf('plot/zipf.v.%s.eps', network)); 47 | end 48 | 49 | 50 | % 51 | % A 52 | % 53 | 54 | if info.format == consts.BIP 55 | m = max(T(:,1)); 56 | 57 | p = [ T(:,1) ; T(:,2)+m ]; 58 | if size(T,2) >= 3 59 | q = [ T(:,3) ; T(:,3) ]; 60 | else 61 | q = []; 62 | end 63 | else 64 | p = [ T(:,1) ; T(:,2) ]; 65 | if size(T,2) >= 3 66 | q = [ T(:,3) ; T(:,3) ]; 67 | else 68 | q = []; 69 | end 70 | end 71 | 72 | zipf_one(p, q, 'a'); 73 | konect_print(sprintf('plot/zipf.a.%s.eps', network)); 74 | -------------------------------------------------------------------------------- /m/zipf_one.m: -------------------------------------------------------------------------------- 1 | % 2 | % Plot one Zipf plot. 3 | % 4 | % PARAMETERS 5 | % p (e*1) Node indexes 6 | % q (e*1) Multiplicities; [] to denote all ones 7 | % letter 8 | % 9 | 10 | function zipf_one(p, q, letter) 11 | 12 | font_size = 24; 13 | marker_size = 13; 14 | point_style = '.'; 15 | 16 | colors = konect_colors_letter(); 17 | 18 | if length(q) == 0 19 | q = 1; 20 | end 21 | 22 | degrees = full(sparse(p, 1, q, max(p), 1)); 23 | 24 | degrees = degrees(find(degrees)); 25 | 26 | [~,i] = sort(-degrees); 27 | 28 | degrees = degrees(i); 29 | 30 | loglog(1:length(degrees), degrees, point_style, 'Color', colors.(letter), 'MarkerSize', marker_size); 31 | 32 | set(gca, 'FontSize', font_size); 33 | 34 | xlabel('Rank (i)', 'FontSize', font_size); 35 | ylabel('Degree (d(i))', 'FontSize', font_size); 36 | 37 | ax = axis() 38 | 39 | set(gca, 'XMinorTick', 'on'); 40 | set(gca, 'YMinorTick', 'on'); 41 | set(gca, 'TickLength', [0.05 0.05]); 42 | 43 | % Workaround for Matlab bug. Otherwise, the minor ticks are not visible. 44 | ax = axis(); 45 | if ax(1) > 0 & ax(3) > 0 46 | set(gca, 'XTick', 10 .^ (ceil(log(ax(1)) / log(10)):floor(log(ax(2)) / log(10)))); 47 | set(gca, 'YTick', 10 .^ (ceil(log(ax(3)) / log(10)):floor(log(ax(4)) / log(10)))); 48 | end 49 | -------------------------------------------------------------------------------- /mns: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Execute Stu parallely, dividing used memory by K, and limiting 4 | # runtime of processes. Also, be NICE. 5 | # 6 | # In order to limit the memory used by KONECT processes, use 7 | # 8 | # ulimit -v SIZE 9 | # 10 | # where SIZE is the amount of memory that should be used, in kilobytes. 11 | # 12 | # INVOCATION 13 | # $0 [K] args... 14 | # K is the number of processes to run in parallel; if not 15 | # given, defaults to a single process 16 | # 17 | # PARAMETERS 18 | # $MN_RUNTIME Maximal runtime in hours; if not set, don't 19 | # limit runtime 20 | # 21 | 22 | K=1 23 | 24 | [ "$1" ] && echo "$1" | grep '^[0-9]*$' >/dev/null && { 25 | K=$1 26 | shift 27 | } 28 | 29 | rm -f error.log 30 | 31 | if [ "$MN_RUNTIME" ] ; then 32 | RUNTIME_S=$((MN_RUNTIME * 3600)) 33 | ulimit -t "$RUNTIME_S" 34 | fi 35 | 36 | exec nice sh/mem "$K" stu -j "$K" "$@" 37 | -------------------------------------------------------------------------------- /octave: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Execute an Octave script. This is a thin wrapper around Octave that 4 | # is used in KONECT Analysis, but is not necessarily specific to it. It 5 | # is also exported to other projects. 6 | # 7 | # ARGUMENTS 8 | # $1 filename of script to execute, usually a .m script 9 | # 10 | # OUTPUT FILES 11 | # error.log Errors from all runs of this script are appended 12 | # to this file 13 | # 14 | 15 | SCRIPT=$1 16 | 17 | [ "$PREFIX" ] && PREFIX=."$PREFIX" 18 | LOGNAME=$(basename "$SCRIPT" .m) 19 | 20 | # Error log 21 | exec 6>>error.log 22 | 23 | # Words to use in the logfile are the lowercase environment variables. 24 | # This may break in some cases. 25 | 26 | for NAME in $(set | sed -e 's,=.*$,,;tnext;d;:next;/^[a-z][a-z_-]*$/!d') ; do 27 | eval VALUE=\"\$"$NAME"\" 28 | length=$(echo $(echo "$VALUE" | wc -c)) 29 | if [ "$length" -lt 30 ] ; then # Don't insert value if too long 30 | LOGNAME=$LOGNAME.$VALUE 31 | fi 32 | done 33 | 34 | export LOG="${TMPDIR-/tmp}/o.$LOGNAME$PREFIX.log" 35 | printf >&2 '\t%s\n' "$LOG" 36 | 37 | ## # 38 | ## # Reset locale because Octave outputs some localized number formats with the 39 | ## # wrong locale. 40 | ## # 41 | ## unset $(set | sed -E 's,^(LC_.*)=.*$,\1,;t;d') 42 | ## set | grep -E '^LC' >&2 43 | 44 | # 45 | # Invocation of Octave: 46 | # 47 | # -q Quiet 48 | # 49 | # Note: --no-window-system is not supported by Octave 3.0 50 | # 51 | 52 | DIR_SCRIPT=$(dirname "$SCRIPT") 53 | if echo "$DIR_SCRIPT" | grep -vq '^/' ; then 54 | DIR_SCRIPT="$PWD/$DIR_SCRIPT" 55 | fi 56 | 57 | OCTAVE_PATH="$OCTAVE_PATH:$MATLABPATH" 58 | OCTAVE_PATH="$DIR_SCRIPT:$OCTAVE_PATH" 59 | export OCTAVE_PATH 60 | exec 3>$LOG 61 | GNUTERM=dumb DISPLAY= octave -q -W --no-gui --no-window-system "$SCRIPT" "$LOG" >&3 2>&1 || 62 | { 63 | # The error messages of Octave don't conform to a well-defined 64 | # standard, and usually don't even show the location first. 65 | # Therefore, we first extract all "location" lines, and then 66 | # output the full log. 67 | 68 | <"$LOG" sed >&2 -E -e ' 69 | s|^error:\s*(.*) at line ([0-9]+), column ([0-9]+)\s*$|\1:\2:\3:|;t 70 | s,^parse error near line ([0-9]+) of file (.+)$,\2:\1:,;t 71 | d 72 | ' 73 | 74 | <"$LOG" sed >&2 -n -E -e ' 75 | /^(error:|parse error)/,$p 76 | ' 77 | 78 | echo >&2 "*** error in $LOG" 79 | echo >&6 "*** error in $LOG" 80 | 81 | exit 1 82 | } 83 | 84 | echo >&3 '=== FINISHED SUCCESSFULLY ===' 85 | 86 | exit 0 87 | -------------------------------------------------------------------------------- /pl/Konect.pm: -------------------------------------------------------------------------------- 1 | package Konect; 2 | 3 | use strict; 4 | use warnings; 5 | 6 | # Can be exported 7 | our @EXPORT_OK = qw( bitwidth ); 8 | 9 | # Exported by default 10 | our @EXPORT = qw( bitwidth ); 11 | 12 | # 13 | # Given a number of nodes, return the bitwidth character for the 14 | # corresponding unsigned type. 15 | # 16 | sub bitwidth($) { 17 | my ($N) = @_; 18 | 19 | my $ret = 'a'; 20 | 21 | while ($N > 1) { 22 | $N = int(sqrt($N)); 23 | ++$ret; 24 | } 25 | 26 | return $ret; 27 | } 28 | 29 | 1; 30 | -------------------------------------------------------------------------------- /pl/README: -------------------------------------------------------------------------------- 1 | This directory (pl/) contains Perl modules. Perl scripts are not here 2 | but in sh/ instead. 3 | -------------------------------------------------------------------------------- /pl/TexToHtml/Converter.pm: -------------------------------------------------------------------------------- 1 | package TexToHtml::Converter; 2 | sub to_text{ 3 | shift(); 4 | my $string=shift(); 5 | 6 | #dash 7 | $string=~s/--/–/g; 8 | 9 | #remove curly braces 10 | $string=~s/\{\\[^{]+\{(.*)\}\}/$1/g; 11 | 12 | $string=~s/[{}]//g; 13 | $string=~s/``([^']*)''''/"$1"/g; 14 | return $string; 15 | 16 | } 17 | sub convert{ 18 | shift(); 19 | my $string=shift(); 20 | #generate links 21 | $string=~s/\\url\{([^}]*)\}/$1<\/a>/gi; 22 | 23 | #dash 24 | $string=~s/--/–/g; 25 | 26 | 27 | #remove curly braces 28 | $string=~s/\{\\[^{]+\{(.*)\}\}/$1/g; 29 | 30 | $string=~s/[{}]//g; 31 | return $string; 32 | 33 | } 34 | 35 | 36 | return 1; 37 | -------------------------------------------------------------------------------- /runtime.source: -------------------------------------------------------------------------------- 1 | % 2 | % output of svds. k m n r 3 | % 4 | %= dblp-cite 105 12561 12563 49779 5 | %= advogato 152 7385 7385 57627 6 | %= hep-th-citations 59 27770 27766 352807 7 | %= slashdot-zoo 30 71523 71523 488440 8 | %= epinions 16 131828 131827 841372 9 | %= movielens 163 6040 3706 1000209 10 | %= bx 10 105283 340532 1149780 11 | %= www 17 325729 325729 1497135 12 | %= citeulike-tags 8 731769 153277 2411819 13 | %= dblp-author 1 660822 1099443 2773008 14 | %= wiki-en-cat 2 1853493 182947 3795796 15 | %= trec-wt10g 1 1601787 1601772 8063026 16 | %= filmtipset 21 64051 49087 14385828 17 | %= libimseti 7 220970 220962 17359346 18 | %%= patentcite 0 3774768 3774680 16522438 19 | 20 | -------------------------------------------------------------------------------- /sh/README: -------------------------------------------------------------------------------- 1 | This directory contains scripts, i.e. files that begin with '#!'. These 2 | are shell scripts, Perl 5 scripts, awk scripts, Sed scripts, etc. 3 | -------------------------------------------------------------------------------- /sh/category: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Filter networks by their category. 4 | # 5 | # PARAMETERS 6 | # $category Name of the category 7 | # 8 | # STDIN 9 | # One network name per line 10 | # 11 | # STDOUT 12 | # One network name per line, containing only those networks that 13 | # are in the given category 14 | # 15 | 16 | while read network 17 | do 18 | if grep -qE '^\s*category\s*:\s*'"$category"'\s*$' uni/meta."$network" 19 | then 20 | echo "$network" 21 | fi 22 | done 23 | -------------------------------------------------------------------------------- /sh/check: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Check all files in uni/ for consistency at the file level, i.e., encoding, etc. 4 | # 5 | 6 | for file in uni/* 7 | do 8 | 9 | echo Checking "$file"... 10 | if ! isutf8 $file ; then 11 | exit 1 12 | fi 13 | 14 | done 15 | 16 | exit 0 17 | -------------------------------------------------------------------------------- /sh/checkmeta: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Check that the meta.* files are correct. 4 | # 5 | # PARAMETERS 6 | # $NETWORKS List of networks to check 7 | # 8 | 9 | [ "$verbose" ] && exec 4>&2 || exec 4>/dev/null 10 | 11 | found_error=0 12 | 13 | # $1: file in which the error occurred 14 | # other arguments: error message 15 | error() 16 | { 17 | found_error=1 18 | f="$1" 19 | shift 20 | echo "Error in file $f: $@" >&2 21 | } 22 | 23 | FILES="$(sed -E -e 's,^,uni/meta.,' dat/NETWORKS)" 24 | 25 | CODES= 26 | CODESANDFILES= 27 | 28 | MANDATORY="name code category entity-names relationship-names" 29 | 30 | for file in $FILES 31 | do 32 | echo >&4 "Checking $f ..." 33 | 34 | # Field names must be followed by ':' and not '=' 35 | grep -Eq '^[^:=]*=' "$file" && error "$f" '*** Field names must be followed by colon (:), not an equal sign (=)' 36 | 37 | # Duplicate codes 38 | CODE="$(grep 'code' $file | sed -n -re 's/^\s*code\s*:\s*([a-zA-Z0-9\@]+)\s*$/\1/p')" 39 | [ "$CODE" ] || { error "$file" '*** No code given'; continue; } 40 | [ "$(echo $CODESANDFILES | egrep '(^| )'$CODE':' )" ] && { error "$file" "*** Uses same code ($CODE) as file $(echo $CODESANDFILES | egrep -o '(^| )'$CODE':[^ ]+' | cut -d':' -f2- )"; continue; } 41 | CODESANDFILES="$CODESANDFILES $CODE:$file" 42 | 43 | # Mandatory fields 44 | for field in $MANDATORY ; do 45 | if ! grep -Eq '^\s*'"$field"'\s*:' "$file" ; then 46 | error "$file" "Field '$field' missing" 47 | fi 48 | done 49 | done 50 | 51 | # Check that all meta.* files have a corresponding out.* file 52 | for file in uni/meta.* ; do 53 | out="$(echo "$file" | sed -E -e 's,^uni/meta,uni/out,')" 54 | [ -r "$out" ] || { 55 | echo >&2 "*** Missing file '$out' corresponding to file '$file'" 56 | exit 1 57 | } 58 | done 59 | 60 | exit "$found_error" 61 | 62 | -------------------------------------------------------------------------------- /sh/dep-network: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Output the dependencies of a network. 4 | # 5 | # The list of dependencies is output in the order in which it is build, 6 | # which is the same order as things are defined in the KONECT Handbook. 7 | # 8 | # PARAMETERS 9 | # $network 10 | # $plots_only Non-empty when only plots should be output 11 | # 12 | # STDOUT 13 | # The list of dependencies in Stu format 14 | # 15 | # INPUT FILES 16 | # dat/dep.statistic.all.$network 17 | # dat/dep.decomposition.all.$network 18 | # dat/dep.plot.all.$network 19 | # 20 | 21 | set -e 22 | 23 | # 24 | # Tests 25 | # 26 | 27 | echo @check."$network" 28 | 29 | # 30 | # Data files 31 | # 32 | 33 | echo @tsv."$network" 34 | 35 | # 36 | # Statistics 37 | # 38 | 39 | cat dat/dep.statistic.all."$network" 40 | 41 | # 42 | # Features 43 | # 44 | 45 | # 46 | # Decompositions 47 | # 48 | 49 | cat dat/dep.decomposition.all."$network" 50 | 51 | # 52 | # Plots 53 | # 54 | 55 | cat dat/dep.plot.all."$network" 56 | -------------------------------------------------------------------------------- /sh/depc: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Compute C/C++ dependencies. 4 | # 5 | # INVOCATION 6 | # $0 $FILENAME 7 | # 8 | # STDOUT 9 | # The needed files, one per line 10 | # 11 | 12 | set -e 13 | 14 | TMPDIR="${TMPDIR:-/tmp}" 15 | TMPFILE="$TMPDIR/depc.$$" 16 | 17 | echo "$1" >"$TMPFILE" 18 | 19 | while : 20 | do 21 | rm -f "$TMPFILE".new 22 | r=0 23 | for file in $(cat "$TMPFILE") 24 | do 25 | r=1 26 | echo "$file" 27 | if [ -e "$file" ]; then 28 | <"$file" >>"$TMPFILE".new sed -E -e 's,^\s*#\s*include\s+"(.*)",c/\1,;t;d' 29 | fi 30 | done 31 | if [ "$r" = 0 ]; then exit 0; fi 32 | cp "$TMPFILE".new "$TMPFILE" 33 | done | 34 | sort -u 35 | -------------------------------------------------------------------------------- /sh/eps2png: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Convert an EPS file to PNG 4 | # 5 | # INVOCATION 6 | # 7 | # $0 $filename_eps $filename_png 8 | # 9 | 10 | inkscape -z "$1" -e "$2" 11 | -------------------------------------------------------------------------------- /sh/group: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Get the group of a statistic, i.e., the string describing the set 4 | # of networks that apply to it. 5 | # 6 | # INVOCATION 7 | # $0 $STATISTIC 8 | # 9 | # INPUT FILES 10 | # konect-toolbox/m/konect_statistic_$statistic_base.m 11 | # 12 | # STDOUT 13 | # The group name is output. Nothing is output when no group is 14 | # known. 15 | # 16 | 17 | if [ -z "$1" ] ; then 18 | echo >&2 "*** $0: first argument missing" 19 | exit 1 20 | fi 21 | 22 | statistic="$1" 23 | 24 | statistic_base="$(echo "$statistic" | sed -E -e 's,\+[0-9]+$,,')" 25 | 26 | suffix="$(echo "$statistic" | sed -E -e 's,^.*\+,+,;t;d')" 27 | 28 | file="konect-toolbox/m/konect_statistic_$statistic_base.m" 29 | 30 | if [ ! -e "$file" ] ; then 31 | echo >&2 "*** $0: file '$file' not found" 32 | exit 1 33 | fi 34 | 35 | suffix_regexp="$(echo "$suffix" | sed -E -e 's,\+,\\+,')" 36 | 37 | { 38 | # Take the first non-empty of the following two: 39 | 40 | # Substatistic-specific group 41 | sed -E -e 's,^.*GROUP'"$suffix_regexp"'\s*:\s*([^ ]+)\s*$,\1,;t;;d' "$file" | 42 | tr a-z A-Z 43 | 44 | # Statistic-specific group 45 | sed -E -e 's,^.*GROUP\s*:\s*([^ ]+)\s*$,\1,;t;;d' "$file" | 46 | tr a-z A-Z 47 | } | 48 | sed -E -e '/^\s*$/d' | sed -E -e '1!d' 49 | 50 | exit 0 51 | -------------------------------------------------------------------------------- /sh/intersect: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Compute the intersection of two network lists. 4 | # 5 | # INVOCATION 6 | # 7 | # $0 FILE-1 FILE-2 8 | # 9 | 10 | set -e 11 | 12 | if [ -z "$1" -o -z "$2" ] ; then 13 | echo >&2 '*** Error in invocation' 14 | exit 1 15 | fi 16 | 17 | tmp1="${TMPDIR:-/tmp/}/$$.intersect.1" 18 | tmp2="${TMPDIR:-/tmp/}/$$.intersect.2" 19 | 20 | sort -k 1b,1 "$1" >"$tmp1" 21 | sort -k 1b,1 "$2" >"$tmp2" 22 | 23 | join "$tmp1" "$tmp2" | 24 | sh/sort-networks 25 | -------------------------------------------------------------------------------- /sh/listempty: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Output the name of empty plot/*.eps files. 4 | # 5 | # Some plot/*.eps are erroneous: Matlab crashes while generating them, 6 | # usually because there is too much data to plot. This results in a 7 | # segmentation violation in Matlab which cannot be catched in Matlab 8 | # code. This script finds such EPS files and outputs their names. 9 | # 10 | 11 | [ "$verbose" ] && exec 4>&2 || exec 4>/dev/null 12 | 13 | find plot/ -name '*.eps' | 14 | while read -r file 15 | do 16 | echo >&4 Trying "'$file'" 17 | 18 | if tail -1 "$file" | grep -Flvq '%%EOF' 19 | then 20 | echo "$file" 21 | fi 22 | done 23 | -------------------------------------------------------------------------------- /sh/mem: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Divide the 'ulimit -v' value by SIZE and execute COMMAND. ulimit -v 4 | # must be set to a finite value, or be 'unlimited'. 5 | # 6 | # 'ulimit -v' is used to limit the amount of memory usable by each 7 | # process. 8 | # 9 | # INVOCATION 10 | # 11 | # $0 SIZE COMMAND 12 | # 13 | 14 | size=$1 15 | shift 16 | 17 | mem=$(echo $(ulimit -v)) 18 | 19 | if [ "$mem" = unlimited ] 20 | then 21 | # There is no limit -- just execute the command 22 | exec "$@" 23 | else 24 | ulimit -v "$((mem / size))" 25 | exec "$@" 26 | fi 27 | -------------------------------------------------------------------------------- /sh/mkdatasetlist: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Generate list of networks. 4 | # 5 | # INPUT FILES 6 | # uni/* 7 | # 8 | # STDOUT 9 | # One network per line: 10 | # 11 | # NETWORK FORMAT WEIGHTS TIMESTAMPS BYTES 12 | # 13 | # TIMESTAMPS is t for timestamps and 0 for no timestamps 14 | # BYTES is the size in bytes. 15 | # Entries are not sorted. 16 | # 17 | # ENVIRONMENT 18 | # 19 | # $verbose Set to non-empty for verbose mode 20 | # 21 | 22 | [ "$verbose" ] && exec 4>&2 || exec 4>/dev/null 23 | 24 | echo uni/out.* | sed -e 's| \{1,\}|\ 25 | |g' | 26 | while IFS= read -r file 27 | do 28 | echo >&4 "file='$file'" 29 | name="$(echo "$file" | sed -e 's,^uni/out\.,,')" 30 | echo >&4 "name='$name'" 31 | 32 | # Name, format and weight 33 | printf '%s' "$name $(head -1 "$file" | sed -E -e 's,^.* ([^ ]+) .*$,\1,') $(head -1 "$file" | sed -E -e 's,^.*\s([^ ]+)\s*$,\1,') " 34 | 35 | # Timestamps 36 | if [ $(head "$file" | tail -1 | wc -w) -gt 3 ] 37 | then 38 | printf t 39 | else 40 | printf 0 41 | fi 42 | printf ' ' 43 | 44 | # Bytes 45 | ls -lH -- "$file" | cut -d ' ' -f 5 46 | done 47 | -------------------------------------------------------------------------------- /sh/mkdownloadlist: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Determine the list of networks that can be downloaded, on stdout. 4 | # 5 | # The output preserves the ordering from dat/NETWORKS. 6 | # 7 | # INPUT FILES 8 | # dat/NETWORKS Complete list of networks 9 | # DOWNLOAD_PATTERNS Patterns that are allowed 10 | # 11 | 12 | sed -E -e ' 13 | /^\s*#/d 14 | /^\s*$/d 15 | s,$\s+,,g 16 | s,\s+$,,g 17 | s,\*,.*,g 18 | s,^,^, 19 | s,$,$, 20 | ' DOWNLOAD_PATTERNS | 21 | grep -E -f - dat/NETWORKS 22 | -------------------------------------------------------------------------------- /sh/mkmissing: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | # 3 | # DEPRECATED -- this is deprecated because we now use Stu, which can do 4 | # this on its own. 5 | # 6 | # This script determines which files for a Makefile target are missing 7 | # and invokes make on each one of them. 8 | # 9 | # INVOCATION 10 | # 11 | # ./mkmissing.pl "make arguments" "make invoke command" 12 | # 13 | # where the parameters are: 14 | # make arguments: make options and target 15 | # e.g., "-f Makefile.dataset datasets.make" 16 | # make invoke command: make command and the options with which it is invoked 17 | # e.g., "make -f Makefile.dataset" 18 | # 19 | 20 | use strict; 21 | use warnings; 22 | 23 | my ($makeargs,$makeinvoke) = @ARGV; 24 | 25 | unless($makeargs) {$makeargs="";} 26 | unless($makeinvoke) {$makeinvoke="./mn 5 -k";} 27 | 28 | my @targets=(); 29 | 30 | open MAKE,"LANG=en_US.utf-8 make -p -n $makeargs|" or die "$!"; 31 | $prevline=""; 32 | 33 | while ($line = ) 34 | { 35 | if ($line=~/^# @ := (.+)$/) 36 | { 37 | unless (-e $1) 38 | { 39 | push(@targets,$1); 40 | } 41 | } 42 | } 43 | 44 | close MAKE or die "$!"; 45 | 46 | $count= @targets; 47 | print "amount of targets: $count\n"; 48 | 49 | print "running make\n"; 50 | open MAKEOUT, "|xargs $makeinvoke" or die "$!"; 51 | 52 | foreach $target(@targets) 53 | { 54 | print MAKEOUT "$target\n"; 55 | } 56 | 57 | close MAKEOUT or die "$!"; 58 | print "done\n"; 59 | -------------------------------------------------------------------------------- /sh/mkpath: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Output the value of $MATLABPATH. 4 | # 5 | 6 | printf "$PWD/m/:$PWD/konect-toolbox/m:$PWD/lib/:$PWD/lib/matlab_bgl/:$PWD/lib/wafo-statistics/:$PWD/lib/wafo-misc/:$PWD/lib/gridxy:$PWD/syngraphy/" 7 | -------------------------------------------------------------------------------- /sh/mkstat: -------------------------------------------------------------------------------- 1 | #! /usr/bin/awk -f 2 | # 3 | # Convert result of runtime evaluation to Matlab matrix file for analysis. 4 | # 5 | # STDIN 6 | # 7 | # Text with data lines beginning with "%=" 8 | # STDOUT 9 | # 10 | # Matlab file with same data 11 | # 12 | 13 | ! /^%=/ { next } {print $3 " " $4 " " $5 " " $6} 14 | -------------------------------------------------------------------------------- /sh/mktime: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Create the timestamp file from the out.* file, via stdin/stdout. 4 | # 5 | # PARAMETERS 6 | # $network (only for the check, not for accessing the file) 7 | # 8 | 9 | set -e 10 | 11 | if ! grep -qE '^'"$network"'$' dat/NETWORKS_TIME ; then 12 | exit 1 13 | fi 14 | 15 | sed -E -e ' 16 | /^%/d 17 | s,[0-9]+\s+[0-9]+\s+[^ ]+\s+([0-9-]+),\1, 18 | ' 19 | -------------------------------------------------------------------------------- /sh/network-format: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | # 3 | # Extract format from an unirelational dataset. 4 | # 5 | # STDIN 6 | # Dataset file, typically named out.* or rel.* 7 | # 8 | # STDOUT 9 | # One line with the numerical format 10 | # 11 | 12 | use strict; 13 | use warnings; 14 | 15 | use Switch; 16 | 17 | $_ = <>; 18 | 19 | /^\s*%\s+(\S+)\s+(\S+)/ 20 | or die "*** error: Invalid first input line: $_"; 21 | 22 | my $relationship_format_text = $1; 23 | 24 | my $relationship_format = -1; 25 | 26 | switch ($relationship_format_text) 27 | { 28 | case "sym" { $relationship_format = 1; } 29 | case "asym" { $relationship_format = 2; } 30 | case "bip" { $relationship_format = 3; } 31 | else { die "*** error: Invalid relationship format $relationship_format_text"; } 32 | } 33 | 34 | print "$relationship_format\n"; 35 | -------------------------------------------------------------------------------- /sh/network-info: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | # 3 | # Extract numerical info from an unirelational dataset: Number of 4 | # subject and object entities, number of edges, format and weights. 5 | # 6 | # This data format is deprecated, but still used. 7 | # 8 | # STDIN 9 | # Dataset file, typically named out.* or rel.* 10 | # 11 | # STDOUT 12 | # Six lines with the following info: 13 | # n1 Number of subjects 14 | # n2 Number of objects 15 | # g Number of lines 16 | # emn_ Fill: m / (n1 * n2) [DEPRECATED] 17 | # format Relationship format as defined in 18 | # matlab/constants.m 19 | # weights Weight range as defined in 20 | # matlab/constants.m 21 | # 22 | # Parameters are ignored. 23 | # 24 | 25 | use strict; 26 | use warnings; 27 | 28 | use Switch; 29 | 30 | $_ = ; 31 | 32 | /^\s*%\s+(\S+)\s+(\S+)/ 33 | or die "*** error: Invalid first input line: $_"; 34 | 35 | my $relationship_format_text = $1; 36 | my $weight_range_text = $2; 37 | 38 | my $relationship_format = -1; 39 | my $weight_range = -1; 40 | 41 | switch ($relationship_format_text) 42 | { 43 | case "sym" { $relationship_format = 1; } 44 | case "asym" { $relationship_format = 2; } 45 | case "bip" { $relationship_format = 3; } 46 | else { die "*** error: Invalid relationship format $relationship_format_text"; } 47 | } 48 | 49 | switch ($weight_range_text) 50 | { 51 | case "unweighted" { $weight_range = 1; } 52 | case "positive" { $weight_range = 2; } 53 | case "posweighted" { $weight_range = 3; } 54 | case "signed" { $weight_range = 4; } 55 | case "multisigned" { $weight_range = 5; } 56 | case "weighted" { $weight_range = 6; } 57 | case "multiweighted" { $weight_range = 7; } 58 | case "dynamic" { $weight_range = 8; } 59 | else { die "*** error: Invalid weight range: $weight_range_text"; } 60 | } 61 | 62 | my $m = -1; 63 | my $n = -1; 64 | my $e = 0; 65 | 66 | while () 67 | { 68 | if (/^%/) { next; } 69 | if (/^\s*$/) {next; } 70 | 71 | /^\s*(\S+)\s+(\S+)/ 72 | or die "*** error: Invalid input: $_"; 73 | 74 | my $subject = $1; 75 | my $object = $2; 76 | 77 | ++ $e; 78 | 79 | if ($subject > $m) { $m = $subject; } 80 | if ($object > $n) { $n = $object; } 81 | } 82 | 83 | if ($relationship_format != 3) 84 | { 85 | if ($m < $n) { $m = $n; } 86 | if ($n < $m) { $n = $m; } 87 | } 88 | 89 | my $emn = $e / ($m * $n); 90 | 91 | print "$m\n"; 92 | print "$n\n"; 93 | print "$e\n"; 94 | print "$emn\n"; 95 | print "$relationship_format\n"; 96 | print "$weight_range\n"; 97 | -------------------------------------------------------------------------------- /sh/network-weights: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | # 3 | # Extract weights from an unirelational dataset. 4 | # 5 | # STDIN 6 | # Dataset file, typically named out.* or rel.* 7 | # 8 | # STDOUT 9 | # Weights as a number 10 | # 11 | 12 | use warnings; 13 | use strict; 14 | 15 | use Switch; 16 | 17 | $_ = <>; 18 | 19 | /^\s*%\s+(\S+)\s+(\S+)/ 20 | or die "*** error: Invalid first input line: $_"; 21 | 22 | my $weight_range_text = $2; 23 | 24 | my $weight_range = -1; 25 | 26 | switch ($weight_range_text) 27 | { 28 | case "unweighted" { $weight_range = 1; } 29 | case "positive" { $weight_range = 2; } 30 | case "posweighted" { $weight_range = 3; } 31 | case "signed" { $weight_range = 4; } 32 | case "multisigned" { $weight_range = 5; } 33 | case "weighted" { $weight_range = 6; } 34 | case "multiweighted" { $weight_range = 7; } 35 | case "dynamic" { $weight_range = 8; } 36 | else { die "*** error: Invalid weight range: $weight_range_text"; } 37 | } 38 | 39 | print "$weight_range\n"; 40 | -------------------------------------------------------------------------------- /sh/out2: -------------------------------------------------------------------------------- 1 | #! /bin/sed -f 2 | # 3 | # Generate simplified out-files: All delimiters are tabs, and there are 4 | # no comment lines. 5 | # 6 | 7 | # Remove comment lines 8 | /^[[:space:]]*%/d 9 | 10 | # Remove space at beginning of line 11 | s,^[[:space:]]*,, 12 | 13 | # Remove space at end of line 14 | s,[[:space:]]*$,, 15 | 16 | # Collapse internal space to individual tabs 17 | s/[[:space:]]\{1,\}/ /g 18 | 19 | -------------------------------------------------------------------------------- /sh/plot-network: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Output the plot-dependencies of a given network. 4 | # 5 | # PARAMETERS 6 | # $network 7 | # 8 | # INPUT FILES 9 | # PLOTS 10 | # dat/NETWORKS_$group for all groups 11 | # 12 | 13 | set -e 14 | 15 | for plot in $(cat PLOTS) ; do 16 | 17 | group=$(&2 "*** Error: group for plot '$plot' not found in 'PLOTS'" 20 | exit 1 21 | } 22 | 23 | # Sanity check: the network must be in the ALL group 24 | grep -q -E '^'"$network"'$' dat/NETWORKS_ALL || { 25 | echo >&2 "*** Network '$network' must be in the 'ALL' group" 26 | exit 1 27 | } 28 | 29 | [ -r dat/NETWORKS_"$group" ] || { 30 | echo >&2 "*** Expected file 'dat/NETWORKS_$group' to exist because network '$network' is in group '$group' of plot '$plot'" 31 | exit 1 32 | } 33 | 34 | if grep -q -E '^'"$network"'$' dat/NETWORKS_"$group" ; then 35 | echo @"$plot"."$network" 36 | fi 37 | done 38 | 39 | -------------------------------------------------------------------------------- /sh/save_diag: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Extract last eigenvalues calculated. 4 | # 5 | # INVOCATION 6 | # $1 name of logfile 7 | # 8 | # STDIN $1 9 | # STDOUT The values 10 | # 11 | 12 | tail -n900 $1 | tac | sed -e '/^Iteration/Q' | tac | sed -E -e '/^\s*[e+0-9.]+\s*$/!Q' 13 | 14 | -------------------------------------------------------------------------------- /sh/sort-networks: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | # 3 | # Sort the given list of networks by ascending size. 4 | # 5 | # STDIN 6 | # The concatenation of all network lists, in no particular order 7 | # 8 | # STDOUT 9 | # The sorted list of networks, by ascending size 10 | # 11 | # INPUT 12 | # dat/networks.asc 13 | # 14 | 15 | use strict; 16 | use warnings; 17 | 18 | my %networks; 19 | 20 | while (<>) { 21 | chomp; 22 | $networks{$_}= 1; 23 | } 24 | 25 | open(IN, "<", "dat/networks.asc") or die "$!"; 26 | 27 | while () { 28 | /^\s*(\S+)\s/ or die; 29 | my $network = $1; 30 | if (exists $networks{$network}) { 31 | print "$network\n"; 32 | delete $networks{$network}; 33 | } 34 | } 35 | 36 | # All networks that remain in $networks were not found in the full 37 | # network list--an error. 38 | my $count = keys %networks; 39 | if ($count != 0) { 40 | print STDERR "*** Unmatched networks:\n"; 41 | print STDERR "$_\n" for keys %networks; 42 | exit 1 43 | } 44 | 45 | close(IN) or die "$!"; 46 | 47 | -------------------------------------------------------------------------------- /sh/statistic-network: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # Output all statistic targets for $network. 4 | # 5 | # PARAMETERS 6 | # $network 7 | # 8 | # INPUT FILES 9 | # STATISTICS 10 | # GROUPS 11 | # dat/NETWORKS_$group for all groups 12 | # konect-toolbox/m/konect_statistic_$statistic.m 13 | # 14 | 15 | for statistic in $(cat STATISTICS) ; do 16 | # check whether the statistic must be computed for this network 17 | 18 | echo >&2 "statistic='$statistic'" ## RM 19 | compute=1 20 | filename=konect-toolbox/m/konect_statistic_"$(echo "$statistic" | sed -E -e 's,\+.*$,,')".m 21 | echo >&2 "filename='$filename'" ## RM 22 | if [ -r "$filename" ] ; then 23 | echo >&2 "file exists" ## RM 24 | if echo "$statistic" | grep -q -F + ; then 25 | key=GROUP+$(echo "$statistic" | sed -E -e 's,^.*\+,,') 26 | else 27 | key=GROUP 28 | fi 29 | echo >&2 "key='$key'" ## RM 30 | key_esc=$(echo "$key" | sed -E -e 's,\+,\\+,g') 31 | # The group must match for *both* the main statistic and 32 | # the substatistic, hence the loop. In most cases, only 33 | # the main statistic declares a group, which then also 34 | # applies to all substatistics. In some cases, 35 | # substatistics however have a more restricted group. 36 | for group in $(<"$filename" sed -E -e 's,^.*(GROUP|'"$key_esc"')\s*:,,;t;d' | tr a-z A-Z) ; do 37 | echo >&2 "group='$group'" ## RM 38 | if ! grep -q -E '^\s*'"$group"'\s*$' GROUPS ; then 39 | echo >&2 "$filename: unknown group '$group' (1)" 40 | exit 1 41 | fi 42 | if [ ! -r dat/NETWORKS_"$group" ] ; then 43 | echo >&2 "$filename: unknown group '$group' (2)" 44 | 45 | exit 1 46 | fi 47 | if ! grep -Eq '^'"$network"'$' "dat/NETWORKS_$group" ; then 48 | echo >&2 "exclude" ## RM 49 | compute=0 50 | else 51 | echo >&2 "include" ## RM 52 | fi 53 | done 54 | else 55 | : 56 | # Nothing. Not all statistics have a file in the 57 | # toolbox. Assume the statistic is computed in that 58 | # case. 59 | fi 60 | 61 | if [ "$compute" != 0 ] ; then 62 | echo @statistic."$statistic"."$network" 63 | fi 64 | done 65 | 66 | 67 | -------------------------------------------------------------------------------- /sh/statistic-size: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | # 3 | # Determine network size statistic. This outputs all substatistics of 4 | # the [size] statistics; see 5 | # 'konect-toolbox/m/konect_statistic_size.m'. 6 | # 7 | # PARAMETERS 8 | # $network Internal name of network 9 | # 10 | # STDIN: uni/out.$network 11 | # STDOUT: The values, one per line 12 | # 13 | # INPUT FILES 14 | # dat/statistic.format.$network 15 | # 16 | 17 | use strict; 18 | use warnings FATAL => 'all'; 19 | 20 | use Switch; 21 | 22 | my $network = $ENV{"network"}; 23 | 24 | open(FORMAT, "; 27 | chomp $format_network; 28 | 29 | close(FORMAT) or die "$!"; 30 | 31 | if ($format_network eq "1" or $format_network eq "2") { 32 | my $n= 0; 33 | while () { 34 | next if /^%/; 35 | /^([0-9]+)\s+([0-9]+).*/ or die "Invalid format in input: '$_'"; 36 | my $a= $1; 37 | my $b= $2; 38 | if ($a > $n) { $n = $a ; } 39 | if ($b > $n) { $n = $b ; } 40 | } 41 | if ($n < 1) { die "Value of n" } 42 | print "$n\n"; 43 | } elsif ($format_network eq "3") { 44 | my $n1= 0; 45 | my $n2= 0; 46 | while () { 47 | next if /^%/; 48 | /^([0-9]+)\s+([0-9]+).*/ or die "Invalid format in input: '$_'"; 49 | my $a= $1; 50 | my $b= $2; 51 | if ($a > $n1) { $n1 = $a ; } 52 | if ($b > $n2) { $n2 = $b ; } 53 | } 54 | if ($n1 < 1) { die "Value of n1" } 55 | if ($n2 < 1) { die "Value of n1" } 56 | my $n = $n1 + $n2; 57 | print "$n\n$n1\n$n2\n"; 58 | } else { 59 | die "*** Invalid format '$format_network'"; 60 | } 61 | 62 | close(STDIN) or die "$!"; 63 | 64 | -------------------------------------------------------------------------------- /sh/unset-lc: -------------------------------------------------------------------------------- 1 | # 2 | # Script to be sourced to unset all locales. Used to invole awk. 3 | # 4 | 5 | unset $(set | sed -E 's,^(LC_.*)=.*$,\1,;t;d') 6 | 7 | -------------------------------------------------------------------------------- /sh/widths-one: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | # 3 | # Given a number of characters, return the bitwidth character needed to 4 | # represent that number as an unsigned type. 5 | # 6 | # INVOCATION 7 | # $0 N 8 | # 9 | 10 | use strict; 11 | use warnings; 12 | 13 | use Konect; 14 | 15 | my $n = $ARGV[0]; 16 | 17 | my $width = Konect::bitwidth($n); 18 | 19 | print "$width\n"; 20 | 21 | -------------------------------------------------------------------------------- /sh/widths-simple: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | # 3 | # Given the bitswidths of a network, return the bitwidths of the 4 | # corresponding simple network. 5 | # 6 | # * The format is transformed to SYM (undirected, 1) 7 | # * The weights is transformed to UNWEIGHTED (1) 8 | # * Weights and timestamps are set to unused (-) 9 | # * LOOPS is transformed to zero (0) 10 | # * If the input graph was bipartite, U and V are transformed to a bit 11 | # width that is large enough to represent the sum of left and right 12 | # nodes. 13 | # * If the graph is BIP or ASYM take into account that each edge must 14 | # be stored twice and thus the bitwidth for M may be increased 15 | # 16 | # ENVIRONMENT 17 | # $network 18 | # 19 | # INPUT FILES 20 | # dat/widths.$network 21 | # dat/statistic.size.$network 22 | # dat/statistic.volume.$network 23 | # 24 | # OUTPUT FILES 25 | # The transformed bitwidth 26 | # 27 | 28 | use strict; 29 | use warnings; 30 | 31 | require Konect; 32 | 33 | my $network = $ENV{"network"}; 34 | 35 | open(WIDTHS, "<", "dat/widths.$network") or die $!; 36 | 37 | my $in = ; 38 | 39 | chomp $in; 40 | 41 | $in =~ /(.)(.)(.)(.)(.)(.)(.)(.)/; 42 | 43 | my $format = $6; 44 | 45 | my $m = $1; 46 | my $u = $2; 47 | my $v = $3; 48 | 49 | if ($format == 3) { 50 | 51 | # Bipartite 52 | 53 | open(SIZE, "<", "dat/statistic.size.$network") or die "$!"; 54 | my @values= ; 55 | die unless (scalar @values) == 3; 56 | my $n = $values[0]; chomp $n; 57 | my $n1 = $values[1]; chomp $n1; 58 | my $n2 = $values[2]; chomp $n2; 59 | die unless $n == $n1 + $n2; 60 | 61 | open(VOLUME, "<", "dat/statistic.volume.$network") or die "$!"; 62 | my $value_m= ; chomp $value_m; 63 | 64 | $u = Konect::bitwidth($n); 65 | $v = $u; 66 | 67 | # Recompute the M bitwidth 68 | $m = Konect::bitwidth(2 * $value_m); 69 | 70 | } elsif ($format == 1 || $format == 2) { 71 | 72 | # Unipartite: Keep bitwidths of underlying network 73 | 74 | } else { 75 | die "Invalid format $format"; 76 | } 77 | 78 | print "${m}${u}${v}--110\n"; 79 | 80 | --------------------------------------------------------------------------------