├── MDL
    ├── ._.svn
    ├── config.py
    ├── ._mdl.py
    ├── ._test.py
    ├── error.pyc
    ├── graph.pyc
    ├── makefile
    ├── mdl.pyc
    ├── model.pyc
    ├── ._config.py
    ├── ._error.py
    ├── ._graph.py
    ├── ._makefile
    ├── ._model.py
    ├── ._score.py
    ├── config.pyc
    ├── ._mdl_base.py
    ├── ._mdl_error.py
    ├── ._readme.txt
    ├── mdl_base.pyc
    ├── mdl_error.pyc
    ├── ._greedyScan.py
    ├── ._mdl_structs.py
    ├── mdl_structs.pyc
    ├── ._pythonOutput.txt
    ├── ._run_greedyScan.bash
    ├── ._cliqueStarClique.model
    ├── ._description_length.py
    ├── ._greedySearch_nStop.py
    ├── ._pythonOutput_toyExample.txt
    ├── pythonOutput.txt
    ├── pythonOutput_toyExample.txt
    ├── cliqueStarClique.model
    ├── run_greedyScan.bash
    ├── test.py
    ├── mdl_base.py
    ├── graph.py
    ├── mdl_error.py
    ├── readme.txt
    ├── score.py
    ├── greedyScan.py
    ├── error.py
    ├── mdl.py
    ├── greedySearch_nStop.py
    ├── model.py
    ├── mdl_structs.py
    └── description_length.py
├── DATA
    ├── ._.svn
    ├── ._cliqueStarClique.out
    └── cliqueStarClique.out
├── STRUCTURE_DISCOVERY
    ├── matbg.sh
    ├── ._.svn
    ├── ._BFS.m
    ├── ._BFScoloring.m
    ├── ._assertEqual.m
    ├── ._assertFalse.m
    ├── ._assertTrue.m
    ├── ._encodeAsBC.m
    ├── ._encodeAsNB.m
    ├── ._mdlCostAsBC.m
    ├── ._printModel.m
    ├── ._testMDLcost.m
    ├── ._EncodeSubgraph.m
    ├── ._ExactStructure.m
    ├── ._encodeAsChain.m
    ├── ._encodeAsStar.m
    ├── ._mdlCostAsChain.m
    ├── ._mdlCostAsStar.m
    ├── ._ExtractGccEncode.m
    ├── ._SlashBurnEncode.m
    ├── ._encodeAsFClique.m
    ├── ._encodeAsNClique.m
    ├── ._mdlCostAsBCorNB.m
    ├── ._test_error_edges.m
    ├── ._RemHdegreeGccEncode.m
    ├── ._encodeAsfANDnClique.m
    ├── ._compute_encodingCost.m
    ├── ._mdlCostAsfANDnClique.m
    ├── ._structureSelectionTop10.m
    ├── ._printStructureToModelFile.m
    ├── ._structureSelectionGreedyNforget.m
    ├── l2cnk.m
    ├── testMDLcost.m
    ├── test_error_edges.m
    ├── printModel.m
    ├── RemHdegreeGccEncode.m
    ├── structureSelectionTop10.m
    ├── printStructureToModelFile.m
    ├── encodeAsfANDnClique.m
    ├── assertFalse.m
    ├── ExtractGccEncode.m
    ├── BFScoloring.m
    ├── assertTrue.m
    ├── encodeAsChain.m
    ├── encodeAsFClique.m
    ├── encodeAsStar.m
    ├── encodeAsNClique.m
    ├── encodeAsBC.m
    ├── encodeAsNB.m
    ├── assertEqual.m
    ├── mdlCostAsfANDnClique.m
    ├── mdlCostAsBC.m
    ├── mdlCostAsStar.m
    ├── mdlCostAsChain.m
    ├── mdlCostAsBCorNB.m
    ├── BFS.m
    ├── EncodeSubgraph.m
    ├── structureSelectionGreedyNforget.m
    ├── compute_encodingCost.m
    ├── SlashBurnEncode.m
    └── ExactStructure.m
├── makefile
├── run_structureDiscovery.m
├── license.txt
├── README
└── demo_vog.bash


/MDL/._.svn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._.svn


--------------------------------------------------------------------------------
/MDL/config.py:
--------------------------------------------------------------------------------
1 | optModelZeroes = False;
2 | optVerbosity = 1;
3 | optDefaultError = 'TP';


--------------------------------------------------------------------------------
/DATA/._.svn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/DATA/._.svn


--------------------------------------------------------------------------------
/MDL/._mdl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._mdl.py


--------------------------------------------------------------------------------
/MDL/._test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._test.py


--------------------------------------------------------------------------------
/MDL/error.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/error.pyc


--------------------------------------------------------------------------------
/MDL/graph.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/graph.pyc


--------------------------------------------------------------------------------
/MDL/makefile:
--------------------------------------------------------------------------------
1 | demo:
2 | 	python score.py cliqueStarClique.graph cliqueStarClique.model
3 | 


--------------------------------------------------------------------------------
/MDL/mdl.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/mdl.pyc


--------------------------------------------------------------------------------
/MDL/model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/model.pyc


--------------------------------------------------------------------------------
/MDL/._config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._config.py


--------------------------------------------------------------------------------
/MDL/._error.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._error.py


--------------------------------------------------------------------------------
/MDL/._graph.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._graph.py


--------------------------------------------------------------------------------
/MDL/._makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._makefile


--------------------------------------------------------------------------------
/MDL/._model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._model.py


--------------------------------------------------------------------------------
/MDL/._score.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._score.py


--------------------------------------------------------------------------------
/MDL/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/config.pyc


--------------------------------------------------------------------------------
/MDL/._mdl_base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._mdl_base.py


--------------------------------------------------------------------------------
/MDL/._mdl_error.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._mdl_error.py


--------------------------------------------------------------------------------
/MDL/._readme.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._readme.txt


--------------------------------------------------------------------------------
/MDL/mdl_base.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/mdl_base.pyc


--------------------------------------------------------------------------------
/MDL/mdl_error.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/mdl_error.pyc


--------------------------------------------------------------------------------
/MDL/._greedyScan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._greedyScan.py


--------------------------------------------------------------------------------
/MDL/._mdl_structs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._mdl_structs.py


--------------------------------------------------------------------------------
/MDL/mdl_structs.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/mdl_structs.pyc


--------------------------------------------------------------------------------
/MDL/._pythonOutput.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._pythonOutput.txt


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/matbg.sh:
--------------------------------------------------------------------------------
1 | #!/bin/csh -f
2 | 
3 | unsetenv DISPLAY
4 | 
5 | nohup matlab < $1 > $2 &
6 | 
7 | 


--------------------------------------------------------------------------------
/MDL/._run_greedyScan.bash:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._run_greedyScan.bash


--------------------------------------------------------------------------------
/DATA/._cliqueStarClique.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/DATA/._cliqueStarClique.out


--------------------------------------------------------------------------------
/MDL/._cliqueStarClique.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._cliqueStarClique.model


--------------------------------------------------------------------------------
/MDL/._description_length.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._description_length.py


--------------------------------------------------------------------------------
/MDL/._greedySearch_nStop.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._greedySearch_nStop.py


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._.svn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._.svn


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._BFS.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._BFS.m


--------------------------------------------------------------------------------
/MDL/._pythonOutput_toyExample.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/MDL/._pythonOutput_toyExample.txt


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._BFScoloring.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._BFScoloring.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._assertEqual.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._assertEqual.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._assertFalse.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._assertFalse.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._assertTrue.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._assertTrue.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._encodeAsBC.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._encodeAsBC.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._encodeAsNB.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._encodeAsNB.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._mdlCostAsBC.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._mdlCostAsBC.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._printModel.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._printModel.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._testMDLcost.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._testMDLcost.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._EncodeSubgraph.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._EncodeSubgraph.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._ExactStructure.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._ExactStructure.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._encodeAsChain.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._encodeAsChain.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._encodeAsStar.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._encodeAsStar.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._mdlCostAsChain.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._mdlCostAsChain.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._mdlCostAsStar.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._mdlCostAsStar.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._ExtractGccEncode.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._ExtractGccEncode.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._SlashBurnEncode.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._SlashBurnEncode.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._encodeAsFClique.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._encodeAsFClique.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._encodeAsNClique.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._encodeAsNClique.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._mdlCostAsBCorNB.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._mdlCostAsBCorNB.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._test_error_edges.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._test_error_edges.m


--------------------------------------------------------------------------------
/MDL/pythonOutput.txt:
--------------------------------------------------------------------------------
1 |    	L(G,M)	L(M)	L(E)	#E+	#E-		#Ex
2 | M_0:	60310	2	60309	0/0	5467/4200651	0
3 | M_x:	57690	6447	51243	27/8992	4507/4191659		0
4 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._RemHdegreeGccEncode.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._RemHdegreeGccEncode.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._encodeAsfANDnClique.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._encodeAsfANDnClique.m


--------------------------------------------------------------------------------
/MDL/pythonOutput_toyExample.txt:
--------------------------------------------------------------------------------
1 |    	L(G,M)	L(M)	L(E)	#E+	#E-		#Ex
2 | M_0:	52665	2	52664	0/0	7547/353220	0
3 | M_x:	33922	33066	856	109/7656	0/345564		0
4 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._compute_encodingCost.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._compute_encodingCost.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._mdlCostAsfANDnClique.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._mdlCostAsfANDnClique.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._structureSelectionTop10.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._structureSelectionTop10.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._printStructureToModelFile.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._printStructureToModelFile.m


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/._structureSelectionGreedyNforget.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GemsLab/VoG_Graph_Summarization/HEAD/STRUCTURE_DISCOVERY/._structureSelectionGreedyNforget.m


--------------------------------------------------------------------------------
/MDL/cliqueStarClique.model:
--------------------------------------------------------------------------------
1 | fc 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
2 | fc 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
3 | st 21, 18 19 20 22 23 24 25 26 27 28 29
4 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/l2cnk.m:
--------------------------------------------------------------------------------
 1 | function [nbits] = l2cnk(n,k)
 2 | 	nbits = 0;
 3 | 	for i = n:-1:n-k+1
 4 | 		nbits = nbits + log2(i);
 5 | 	end
 6 | 	
 7 | 	for i = k:-1:1
 8 | 		nbits = nbits - log2(i);
 9 | 	end
10 | end
11 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/testMDLcost.m:
--------------------------------------------------------------------------------
1 | function [] = testMDLcost( mdlCost )
2 | 
3 | if mdlCost < 0 
4 |     error('The MDL cost is negative...');
5 | elseif isnan(mdlCost)
6 |     error('The MDL cost is NaN...');
7 | end
8 | 
9 | end


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/test_error_edges.m:
--------------------------------------------------------------------------------
 1 | function [ ] = test_error_edges(E)
 2 | 
 3 | 
 4 | for i = 1 : length(E)
 5 |     if E(i) < 0
 6 |         error('Negative number of 1s or 0s in the error matrix E...')
 7 |     end
 8 | end
 9 |     
10 | end


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | VOGFILES=README \
 2 |  makefile \
 3 |  license.txt \
 4 |  DATA \
 5 |  STRUCTURE_DISCOVERY \
 6 |  MDL \
 7 |  demo_vog.bash \
 8 |  run_structureDiscovery.m
 9 | 
10 | all:	demo
11 | 
12 | demo:
13 | 	bash demo_vog.bash
14 | 
15 | zip: tar
16 | 
17 | tar: ${VOGFILES} 
18 | 	tar -cvf vog.tar ${VOGFILES}
19 | 
20 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/printModel.m:
--------------------------------------------------------------------------------
 1 | function [ ] = printModel( model_ordered, outfile)
 2 | %% Select the top 10 substructures to output to the user.
 3 | % The ranking of the substructures is based on their MDL benefit.
 4 | 
 5 | fid = fopen(outfile, 'w');
 6 | 
 7 | for i = 1 : length(model_ordered) 
 8 |     printStructureToModelFile( model_ordered(i), fid );
 9 | end
10 | 
11 | fclose(fid);
12 | 
13 | end
14 | 


--------------------------------------------------------------------------------
/MDL/run_greedyScan.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | time -p python2.6 greedyScan.py ../../data/WikiUserGraphs/lcrMediaWiki_wholeEdges.graph ../../code/slashburn/wikipedia_files/lcrMediaWiki_whole_SB_noStar3_orderedALL.model > OUTPUT_greedyScan_lcr_whole.out
4 | time -p python2.6 greedyScan.py ../../data/WikiUserGraphs/chocMediaWiki_sentenceEdges.graph ../../code/slashburn/wikipedia_files/chocMediaWiki_sentence_SB_noStar3_orderedALL.model > OUTPUT_greedyScan_choc_sentence.out &
5 | time -p python2.6 greedyScan.py ../../data/WikiUserGraphs/kievMediaWiki_wholeEdges.graph ../../code/slashburn/wikipedia_files/kievMediaWiki_whole_SB_noStar3_orderedALL.model > OUTPUT_greedyScan_kiev_whole.out
6 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/RemHdegreeGccEncode.m:
--------------------------------------------------------------------------------
 1 | function [disind,gccind,topind] = RemHdegreeGccEncode(B,k,dir,out_fid, top_gccind, N_tot, info, minSize)
 2 | 
 3 | 
 4 | if nargin<3
 5 |     dir=1;
 6 | end
 7 | 
 8 | n = size(B,1);
 9 | 
10 | if (dir == 1)
11 | 	%D = inout_degree(B);
12 | 	D = sum(B,2);
13 | 	D = D + sum(B,1)';
14 | else
15 | 	D=sum(B,2);
16 | end
17 | [Dsort,I]=sort(D);
18 | 
19 | 
20 | topind = flipud(I(n-k+1:n));
21 | 
22 | B(topind, :) = 0;
23 | B(:, topind) = 0;
24 | 
25 | [gccind,disind] = ExtractGccEncode(B, out_fid, topind, top_gccind, N_tot, info, minSize );
26 | %fullind = 1:n;
27 | %disind = setdiff(fullind, gccind);
28 | topind = topind';
29 | 
30 | mask = ismember(disind, topind);
31 | disind = disind(~mask);
32 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/structureSelectionTop10.m:
--------------------------------------------------------------------------------
 1 | function [  ] = structureSelectionTop10(graphFile, model_ordered, outfile)
 2 | %% Select the top 10 substructures to output to the user.
 3 | % The ranking of the substructures is based on their MDL benefit.
 4 | 
 5 | fid = fopen(outfile, 'w');
 6 | 
 7 | for i = 1 : min( 10, length(model_ordered) )
 8 |     printStructureToModelFile( model_ordered(i), fid );
 9 | end
10 | 
11 | %comm = sprintf('python ../mdl/score.py %s %s > pythonOutput.txt;', ...
12 | %    graphFile, outfile )
13 | %system(comm)
14 | %pythonOutput = importdata('pythonOutput.txt');
15 | %cost0 = str2num(pythonOutput.textdata{2,2});
16 | %cost = str2num(pythonOutput.textdata{3,2});
17 | 
18 | fclose(fid);
19 | 
20 | end
21 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/printStructureToModelFile.m:
--------------------------------------------------------------------------------
 1 | function [] = printStructureToModelFile( structure, fid )
 2 | %% Print a structure to the final model file.
 3 | 
 4 | switch structure.code
 5 |     case 'nc'
 6 |         fprintf(fid, 'nc %d,', structure.edges);
 7 |         fprintf(fid, ' %d', structure.nodes1 );
 8 |         fprintf(fid, '\n');
 9 |     case {'fc', 'ch'}
10 |         fprintf(fid, '%s', structure.code);
11 |         fprintf(fid, ' %d', structure.nodes1 );
12 |         fprintf(fid, '\n');
13 |     case {'bc', 'nb', 'st'}
14 |         fprintf(fid, '%s', structure.code);
15 |         fprintf(fid, ' %d', structure.nodes1 );
16 |         fprintf(fid, ',');
17 |         fprintf(fid, ' %d', structure.nodes2 );
18 |         fprintf(fid, '\n');
19 |          
20 | end
21 | 
22 | 
23 | 
24 | end
25 | 


--------------------------------------------------------------------------------
/run_structureDiscovery.m:
--------------------------------------------------------------------------------
 1 |  input_file = 'DATA/cliqueStarClique.out';
 2 |  unweighted_graph = input_file;
 3 |  output_model_greedy = 'DATA';
 4 |  output_model_top10 = 'DATA';
 5 | 
 6 |  addpath('STRUCTURE_DISCOVERY');
 7 | 
 8 |  orig = spconvert(load(input_file));
 9 |  orig(max(size(orig)),max(size(orig))) = 0;
10 |  orig_sym = orig + orig';
11 |  [i,j,k] = find(orig_sym);
12 |  orig_sym(i(find(k==2)),j(find(k==2))) = 1;
13 |  orig_sym_nodiag = orig_sym - diag(diag(orig_sym));
14 |  
15 |  disp('==== Running VoG for structure discovery ====')
16 |  global model; 
17 |  model = struct('code', {}, 'edges', {}, 'nodes1', {}, 'nodes2', {}, 'benefit', {}, 'benefit_notEnc', {});
18 |  global model_idx;
19 |  model_idx = 0;
20 |  SlashBurnEncode( orig_sym_nodiag, 2, output_model_greedy, false, false, 3, unweighted_graph);
21 | 
22 |  quit
23 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/encodeAsfANDnClique.m:
--------------------------------------------------------------------------------
 1 | function [] = encodeAsfANDnClique( Asmall, curind, top_gccind, out_fid )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Encode given graph as clique and near-clique                           %
 4 | %  Author: Danai Koutra                                                   %
 5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 6 | 
 7 | n = size(curind, 2);
 8 | m = nnz(Asmall);
 9 | 
10 | % encode as full clique
11 | fprintf(out_fid, 'fc');
12 | for i=1:size(curind, 2)
13 |     fprintf(out_fid, ' %d', top_gccind( curind(i) ) );
14 | end
15 | fprintf(out_fid, '--- full clique \n');
16 | 
17 | % encode as near clique
18 | fprintf(out_fid, 'nc %d,', m/2);
19 | for i=1:size(curind, 2)
20 |     fprintf(out_fid, ' %d', top_gccind( curind(i) ) );
21 | end
22 | fprintf(out_fid, '--- nearClique \n');
23 | 
24 | end


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/assertFalse.m:
--------------------------------------------------------------------------------
 1 | function assertFalse(condition, message)
 2 | %assertFalse Assert that input condition is false
 3 | %   assertFalse(CONDITION, MESSAGE) throws an exception containing the string
 4 | %   MESSAGE if CONDITION is not false.
 5 | %
 6 | %   MESSAGE is optional.
 7 | %
 8 | %   Examples
 9 | %   --------
10 | %   assertFalse(isreal(sqrt(-1)))
11 | %
12 | %   assertFalse(isreal(sqrt(-1)), ...
13 | %       'Expected isreal(sqrt(-1)) to be false.')
14 | %
15 | %   See also assertTrue
16 | 
17 | %   Steven L. Eddins
18 | %   Copyright 2008-2010 The MathWorks, Inc.
19 | 
20 | if nargin < 2
21 |    message = 'Asserted condition is not false.';
22 | end
23 | 
24 | if ~isscalar(condition) || ~islogical(condition)
25 |    throwAsCaller(MException('assertFalse:invalidCondition', ...
26 |       'CONDITION must be a scalar logical value.'));
27 | end
28 | 
29 | if condition
30 |    throwAsCaller(MException('assertFalse:trueCondition', '%s', message));
31 | end
32 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/ExtractGccEncode.m:
--------------------------------------------------------------------------------
 1 | function [cur_gccind,cur_disind] = ExtractGccEncode(B, out_fid, topind, top_gccind, N_tot, info, minSize)
 2 | 
 3 | [S,C]=graphconncomp(B, 'WEAK', true);
 4 | 
 5 | maxind=-1;
 6 | maxsize=0;
 7 | 
 8 | size_v = zeros(0, S);
 9 | 
10 | for k=1:S
11 |     size_v(k)=size(find(C == k), 2);
12 | end
13 | 
14 | [size_sort,I]=sort(size_v, 'descend');
15 | 
16 | cur_gccind = find(C == I(1));
17 | 
18 | cur_disind = zeros(0,0);
19 | 
20 | for k=2:S
21 |     curind = find(C == I(k));
22 |     
23 |     if( size(curind,2) ==1 )
24 |         mask = ismember(curind, topind);
25 |         if sum(mask) == 1
26 |             continue;
27 |         end
28 |     end            
29 |     
30 |     if length(curind) > minSize
31 | %     EncodeConnComp(B, curind, top_gccind, out_fid);
32 |         EncodeSubgraph(B, curind, top_gccind, N_tot, out_fid, info, minSize);
33 |     end
34 |     cur_disind = [cur_disind curind];
35 | end
36 | 
37 | % fprintf('\tgccsize\t%d\n', size(cur_gccind,2));
38 | 
39 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/BFScoloring.m:
--------------------------------------------------------------------------------
 1 | function [ set1, set2 ] = BFScoloring( Asmall )
 2 | %% Given a bipartite graph, find the two node-sets memberships
 3 | 
 4 | n = size(Asmall,2);
 5 | [seed ~] = find(Asmall);
 6 | queue = [min(seed)];
 7 | % sets = 2 (if unvisited) or 0 (if in set 1) or 1 (if in set 2)
 8 | sets = zeros(1,n)+2;
 9 | color = 1;
10 | % coloring node 1 with color "0"
11 | sets(min(seed)) = 0;
12 | usedColor = false;
13 | 
14 | 
15 | while ~( isempty(queue) )
16 |     neighbors = find( Asmall(queue(1),:) ) ;
17 |     
18 |     for i = 1 : length(neighbors)
19 |         if sets( neighbors(i) ) == 2 % unvisited neighbor
20 |             sets( neighbors(i) ) = color;
21 |             queue = [ queue, neighbors(i) ];
22 |             usedColor = true;
23 |         end
24 |     end
25 |     
26 |     qsize = length(queue);
27 |     queue = queue(2:qsize);
28 |     if usedColor
29 |         color = mod(color+1, 2);
30 |     end
31 |     usedColor = false;
32 | end
33 | 
34 | set1 = find( sets == 0 );
35 | set2 = find( sets == 1 );
36 | 
37 | end


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/assertTrue.m:
--------------------------------------------------------------------------------
 1 | function assertTrue(condition, message)
 2 | %assertTrue Assert that input condition is true
 3 | %   assertTrue(CONDITION, MESSAGE) throws an exception containing the string
 4 | %   MESSAGE if CONDITION is not true.
 5 | %
 6 | %   MESSAGE is optional.
 7 | %
 8 | %   Examples
 9 | %   --------
10 | %   % This call returns silently.
11 | %   assertTrue(rand < 1, 'Expected output of rand to be less than 1')
12 | %
13 | %   % This call throws an error.
14 | %   assertTrue(sum(sum(magic(3))) == 0, ...
15 | %       'Expected sum of elements of magic(3) to be 0')
16 | %
17 | %   See also assertEqual, assertFalse
18 | 
19 | %   Steven L. Eddins
20 | %   Copyright 2008-2010 The MathWorks, Inc.
21 | 
22 | if nargin < 2
23 |    message = 'Asserted condition is not true.';
24 | end
25 | 
26 | if ~isscalar(condition) || ~islogical(condition)
27 |    throwAsCaller(MException('assertTrue:invalidCondition', ...
28 |       'CONDITION must be a scalar logical value.'));
29 | end
30 | 
31 | if ~condition
32 |    throwAsCaller(MException('assertTrue:falseCondition', '%s', message));
33 | end
34 | 


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013, Danai Koutra, Jilles Vreeken, U Kang
 2 | All rights reserved.
 3 | 
 4 | Permission is granted to use it for non-profit purposes,
 5 | including research and teaching. For-profit use requires
 6 | the express consent of the author (danai@cs.cmu.edu).
 7 | Redistribution are not permitted.
 8 | 
 9 |       
10 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
11 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
12 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
13 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
14 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
15 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
16 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
17 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
18 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
19 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
20 | POSSIBILITY OF SUCH DAMAGE.
21 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/encodeAsChain.m:
--------------------------------------------------------------------------------
 1 | function [ ] = encodeAsChain( curind, top_gccind, chain, costGain, costGain_notEnc, out_fid, info )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Print the encoding of the given graph as chain                         %
 4 | %   Output is stored in the model file in the form:                       %
 5 | %     ch node_ids_in_order, costGain                                      % 
 6 | %  Author: Danai Koutra                                                   %
 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 8 | global model; 
 9 | global model_idx;
10 | 
11 | %% Printing the encoded structure.
12 | fprintf(out_fid, 'ch ');
13 | fprintf(out_fid, ' %d', top_gccind( curind(chain) ) );
14 | if info == false
15 |     fprintf(out_fid, '\n');
16 | else
17 |     fprintf(out_fid, ', %f  | %f --- nearChain \n', costGain, costGain_notEnc);
18 | end
19 | 
20 | model_idx = model_idx + 1;
21 | model(model_idx) = struct('code', 'ch', 'edges', 0, 'nodes1', top_gccind(curind(chain)), 'nodes2', [], 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
22 | %n = size(model, 2);
23 | %model(n+1) = struct('code', 'ch', 'nodes1', top_gccind(curind(chain)), 'nodes2', [], 'benefit', costGain);
24 | 
25 | end


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/encodeAsFClique.m:
--------------------------------------------------------------------------------
 1 | function [ ] = encodeAsFClique( curind, top_gccind, costGain, costGain_notEnc, out_fid, info )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Print the encoding of given graph as a full clique.                    %
 4 | %   Output is stored in the model file in the form:                       %
 5 | %     fc node_ids_in_clique, costGain                                     % 
 6 | %  Author: Danai Koutra                                                   %
 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 8 | global model; 
 9 | global model_idx;
10 | 
11 | %% Printing the encoded structure.
12 | % encode as full clique
13 | fprintf(out_fid, 'fc');
14 | for i=1:size(curind, 2)
15 |     fprintf(out_fid, ' %d', top_gccind( curind(i) ) );
16 | end
17 | if info == false
18 |     fprintf(out_fid, '\n');
19 | else
20 |     fprintf(out_fid, ', %f | %f --- full clique \n', costGain, costGain_notEnc);
21 | end
22 | 
23 | model_idx = model_idx + 1;
24 | model(model_idx) = struct('code', 'fc', 'edges', 0, 'nodes1', top_gccind(curind), 'nodes2', [], 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
25 | %n = size(model, 2);
26 | %model(n+1) = struct('code', 'fc', 'nodes1', top_gccind(curind), 'nodes2', [], 'benefit', costGain);
27 | 
28 | end


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/encodeAsStar.m:
--------------------------------------------------------------------------------
 1 | function [ ] = encodeAsStar( curind, top_gccind, hub, spokes, costGain, costGain_notEnc, out_fid, info )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Print the encoding of the given graph as star                          %
 4 | %   Output is stored in the model file in the form:                       %
 5 | %     st hub, spokes_ids, costGain                                        %
 6 | %  Author: Danai Koutra                                                   %
 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 8 | global model; 
 9 | global model_idx;
10 | 
11 | fprintf(out_fid, 'st %d,', top_gccind( curind(hub) ) );
12 | fprintf(out_fid, ' %d', top_gccind( curind(spokes) ) );
13 | 
14 | if info == false
15 |     fprintf(out_fid, '\n');
16 | else
17 |     fprintf(out_fid, ', %f | %f --- nearStar \n', costGain, costGain_notEnc);
18 | end
19 | 
20 | model_idx = model_idx + 1;
21 | model(model_idx) = struct('code', 'st', 'edges', 0, 'nodes1', top_gccind(curind(hub)), 'nodes2', top_gccind(curind(spokes)), 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
22 | %n = size(model, 2);
23 | %model(n+1) = struct('code', 'st', 'nodes1', top_gccind(curind(hub)), 'nodes2', top_gccind(curind(spokes)), 'benefit', costGain);
24 | 
25 | 
26 | end


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/encodeAsNClique.m:
--------------------------------------------------------------------------------
 1 | function [ ] = encodeAsNClique( curind, top_gccind, m, costGain, costGain_notEnc, out_fid, info )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Print the encoding of given graph as a near-clique.                    %
 4 | %   Output is stored in the model file in the form:                       %
 5 | %     nc node_ids_in_clique, costGain                                     %
 6 | %   Note that the costGain is 0 in the case of near-clique.               %
 7 | %  Author: Danai Koutra                                                   %
 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 9 | global model; 
10 | global model_idx;
11 | 
12 | % encode as near clique
13 | fprintf(out_fid, 'nc %d,', m/2);
14 | for i=1:size(curind, 2)
15 |     fprintf(out_fid, ' %d', top_gccind( curind(i) ) );
16 | end
17 | if info == false
18 |     fprintf(out_fid, '\n');
19 | else
20 |     fprintf(out_fid, ', %f | %f --- near clique \n', costGain, costGain_notEnc);
21 | end
22 | 
23 | model_idx = model_idx + 1;
24 | model(model_idx) = struct('code', 'nc', 'edges', m/2, 'nodes1', top_gccind(curind), 'nodes2', [], 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
25 | %n = size(model, 2);
26 | %model(n+1) = struct('code', 'nc', 'nodes1', top_gccind(curind), 'nodes2', [], 'benefit', costGain);
27 | 
28 | end


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/encodeAsBC.m:
--------------------------------------------------------------------------------
 1 | function [ ] = encodeAsBC( curind, top_gccind, set1, set2, costGain, costGain_notEnc, out_fid, info )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Print the encoding of the given graph as bipartite core                %
 4 | %   Output is stored in the model file in the form:                       %
 5 | %     bc node_ids_of_1st_set, node_ids_of_2nd_set, costGain               %
 6 | %  Author: Danai Koutra                                                   %
 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 8 | global model; 
 9 | global model_idx;
10 | 
11 | if ~isempty(set1) && ~isempty(set2)
12 |     fprintf(out_fid, 'bc');
13 |     fprintf(out_fid, ' %d', top_gccind( curind(set1) ));
14 |     fprintf(out_fid, ',');
15 |     fprintf(out_fid, ' %d', top_gccind( curind(set2) ) );
16 |     if info == false
17 |             fprintf(out_fid, '\n');
18 |         else
19 |             fprintf(out_fid, ', %f | %f------ nearBC \n', costGain, costGain_notEnc);
20 |     end
21 | end
22 | 
23 | model_idx = model_idx + 1;
24 | model(model_idx) = struct('code', 'bc', 'edges', 0, 'nodes1', top_gccind(curind(set1)), 'nodes2', top_gccind(curind(set2)), 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
25 |          
26 | %model(n+1) = struct('code', 'bc', 'nodes1', top_gccind(curind(set1)), 'nodes2', top_gccind(curind(set2)), 'benefit', costGain);
27 |     
28 | end


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/encodeAsNB.m:
--------------------------------------------------------------------------------
 1 | function [ ] = encodeAsNB( curind, top_gccind, set1, set2, costGain, costGain_notEnc, out_fid, info )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Print the encoding of the given graph as bipartite core                %
 4 | %   Output is stored in the model file in the form:                       %
 5 | %     bc node_ids_of_1st_set, node_ids_of_2nd_set, costGain               %
 6 | %  Author: Danai Koutra                                                   %
 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 8 | global model; 
 9 | global model_idx;
10 | 
11 | if ~isempty(set1) && ~isempty(set2)
12 |     fprintf(out_fid, 'nb');
13 |     fprintf(out_fid, ' %d', top_gccind( curind(set1) ));
14 |     fprintf(out_fid, ',');
15 |     fprintf(out_fid, ' %d', top_gccind( curind(set2) ) );
16 |     if info == false
17 |             fprintf(out_fid, '\n');
18 |         else
19 |             fprintf(out_fid, ', %f | %f ------ NB \n', costGain, costGain_notEnc);
20 |     end
21 | end
22 | 
23 | model_idx = model_idx + 1;
24 | model(model_idx) = struct('code', 'nb', 'edges', 0, 'nodes1', top_gccind(curind(set1)), 'nodes2', top_gccind(curind(set2)), 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
25 | %n = size(model, 2);
26 | %model(n+1) = struct('code', 'bc', 'nodes1', top_gccind(curind(set1)), 'nodes2', top_gccind(curind(set2)), 'benefit', costGain);
27 |     
28 | end


--------------------------------------------------------------------------------
/MDL/test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | #from mdl import *
 5 | from error import Error;
 6 | from graph import Graph;
 7 | from model import *;
 8 | from mdl import *;
 9 | 
10 | gFilename = "cliqueStarClique.graph";
11 | mFilename = "cliqueStarClique_st1.model";
12 | 
13 | g = Graph();
14 | g.Load(gFilename);
15 | m = Model();
16 | 
17 | #g.Plot();
18 | 
19 | (l_total, l_model, l_error, E) = L(g,m);
20 | print "empty model:"
21 | print "  ", l_total, l_model, l_error, E.numErrors;
22 | 
23 | if(False) :
24 |     m = Model();
25 |     fc1 = FullClique([x for x in range(1,21)]);
26 |     m.addStruct(fc1);
27 |     fc2 = FullClique([x for x in range(27,52)]);
28 |     m.addStruct(fc2);
29 |     st1 = Star(21,[18,19,20,22,23,24,25,26,27,28,29]);
30 |     m.addStruct(st1);
31 | 
32 |     nc1 = NearClique([x for x in range(1,21)]);
33 |     #m.addStruct(nc1);
34 |     
35 |     (l_total, l_model, l_error, E) = L(g,m);
36 |     print "model with two full cliques, resp. over nodes 1--20, and 27--38:"
37 |     print "  ", l_total, l_model, l_error, E.numErrors;
38 |     
39 | if(True) :
40 |     m = Model();
41 |     m.Load(mFilename);
42 | 
43 |     (l_total, l_model, l_error, E) = L(g,m);
44 |     print "model \'" + mFilename + "\'";
45 |     print "  ", l_total, l_model, l_error, E.numErrors;
46 |     
47 | 
48 | 
49 | if(False):
50 |     #g.Plot()
51 |     E.plotCover();
52 |     E.plotError();
53 |     #print " ".join([str(x)+" "+str(E.errors[x]) for x in range(len(E.errors))])
54 |     #print " ".join([str(x)+" "+str(E.covered[x]) for x in range(len(E.covered))])
55 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/assertEqual.m:
--------------------------------------------------------------------------------
 1 | function assertEqual(A, B, custom_message)
 2 | %assertEqual Assert that inputs are equal
 3 | %   assertEqual(A, B) throws an exception if A and B are not equal.  A and B
 4 | %   must have the same class and sparsity to be considered equal.
 5 | %
 6 | %   assertEqual(A, B, MESSAGE) prepends the string MESSAGE to the assertion
 7 | %   message if A and B are not equal.
 8 | %
 9 | %   Examples
10 | %   --------
11 | %   % This call returns silently.
12 | %   assertEqual([1 NaN 2], [1 NaN 2]);
13 | %
14 | %   % This call throws an error.
15 | %   assertEqual({'A', 'B', 'C'}, {'A', 'foo', 'C'});
16 | %
17 | %   See also assertElementsAlmostEqual, assertVectorsAlmostEqual
18 | 
19 | %   Steven L. Eddins
20 | %   Copyright 2008-2010 The MathWorks, Inc.
21 | 
22 | if nargin < 3
23 |     custom_message = '';
24 | end
25 | 
26 | if ~ (issparse(A) == issparse(B))
27 |     message = xunit.utils.comparisonMessage(custom_message, ...
28 |         'One input is sparse and the other is not.', A, B);
29 |     throwAsCaller(MException('assertEqual:sparsityNotEqual', '%s', message));
30 | end
31 | 
32 | if ~strcmp(class(A), class(B))
33 |     message = xunit.utils.comparisonMessage(custom_message, ...
34 |         'The inputs differ in class.', A, B);
35 |     throwAsCaller(MException('assertEqual:classNotEqual', '%s', message));
36 | end
37 | 
38 | if ~isequalwithequalnans(A, B)
39 |     message = xunit.utils.comparisonMessage(custom_message, ...
40 |         'Inputs are not equal.', A, B);
41 |     throwAsCaller(MException('assertEqual:nonEqual', '%s', message));
42 | end
43 | 
44 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/mdlCostAsfANDnClique.m:
--------------------------------------------------------------------------------
 1 | function [ MDLcost_fc, MDLcost_nc ] = mdlCostAsfANDnClique( Asmall, N_tot )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Encode given graph as clique and near-clique                           %
 4 | %  Author: Danai Koutra                                                   %
 5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 6 | 
 7 | n = size(Asmall, 2);
 8 | 
 9 | %% Creating the adjacency matrix for the clique model (w/o noise).
10 | % Note that there is no Error matrix for the near-clique model.
11 | %M = ones(n,n) - eye(n);
12 | % Error matrix.
13 | %E1 = xor(M,Asmall);
14 | 
15 | % 0s in the error matrix  --- edges included in the structure (full clique)
16 | E(2) = nnz(Asmall);
17 | % 1s in the error matrix  --- edges excluded from the structure (full clique)
18 | E(1) = n^2 - n - E(2);
19 | 
20 | %% MDL cost of encoding given substructure as a full clique
21 | MDLcost_fc = compute_encodingCost( 'fc', N_tot, n, E);
22 | %% MDL cost of encoding given substructure as a near clique
23 | MDLcost_nc = compute_encodingCost( 'nc', N_tot, n, Asmall);
24 | 
25 | 
26 | % % %% Printing the encoded structure.
27 | % % % encode as full clique
28 | % % fprintf(out_fid, 'fc');
29 | % % for i=1:size(curind, 2)
30 | % %     fprintf(out_fid, ' %d', top_gccind( curind(i) ) );
31 | % % end
32 | % % fprintf(out_fid, '--- full clique \n');
33 | % % 
34 | % % % encode as near clique
35 | % % fprintf(out_fid, 'nc %d,', m/2);
36 | % % for i=1:size(curind, 2)
37 | % %     fprintf(out_fid, ' %d', top_gccind( curind(i) ) );
38 | % % end
39 | % % fprintf(out_fid, '--- nearClique \n');
40 | 
41 | end


--------------------------------------------------------------------------------
/MDL/mdl_base.py:
--------------------------------------------------------------------------------
 1 | import config;
 2 | 
 3 | from math import log,factorial;
 4 | from error import Error;
 5 | from graph import Graph;
 6 | from model import Model;
 7 | 
 8 | ### basic functions
 9 | # determine possible number of edges between `numEdges' nodes
10 | def CalcCliqueNumPosEdges(numEdges):
11 |   # directed graph, no self-loops
12 |   # (|n|^2)-n
13 |   return numEdges*numEdges - numEdges;
14 | 
15 | # (n choose k)
16 | def choose(n, k):
17 |  if 0 <= k <= n:
18 |    p = 1
19 |    for t in xrange(min(k, n - k)):
20 |      p = (p * (n - t)) // (t + 1)
21 |    return p;
22 |  else:
23 |    return 0;
24 | 
25 | def composition(n,k) :
26 |     return choose(n-1,k-1);
27 | 
28 | def LC(n,k) :
29 |     return log(composition(n,k),2);
30 | 
31 | def weakcomposition(n,k) :
32 |     return choose(n+k-1,k-1);
33 |     
34 | def LwC(n,k) :
35 |     return log(weakcomposition(n,k),2);
36 | 
37 | # Encoded length of `n` 0/1 entries with `k` 1s (aka, Naive Uniform)
38 | def LnU(n,k):
39 |     #print 'LnU', n, k
40 |     if n==0 or k==0 or k==n:
41 |         return 0;    
42 |     x = -log(k / float(n),2);
43 |     y = -log((n-k)/float(n),2);
44 |     return k * x + (n-k) * y;
45 |     
46 | # Encoded length of `n` 0/1 entries with `k` 1s (aka, Uniform)
47 | def LU(n,k) :
48 |     if n==0 or k==0 :
49 |         return 0;   
50 |     return log(choose(n,k),2);
51 | 
52 | # encoded size of an integer >=1 as by Rissanen's 1983 Universal code for integers
53 | def LN(z) :
54 |   if z <= 0 :
55 |     return 0;
56 |   c = log(2.865064,2);
57 |   i = log(z,2);
58 |   while i > 0 :
59 |     c = c + i;
60 |     i = log(i,2);
61 |   return c;
62 | 


--------------------------------------------------------------------------------
/MDL/graph.py:
--------------------------------------------------------------------------------
 1 | class Graph :
 2 |     def __init__(self):
 3 |         self.numNodes = 0;
 4 |         self.numEdges = 0;
 5 |         # per node i a list of node-ids j for which (i,j) \in E
 6 |         self.edges = [frozenset()];
 7 | 
 8 |     def hasEdge(self, i, j):
 9 |         return max(i,j)-1 in self.edges[min(i,j)-1];
10 |     
11 |     def load(self, fullpath):
12 |         fg = open(fullpath);
13 |         self.edges = [];
14 |         edgeList = [];
15 |         for line in fg :
16 |             tmp = line.strip().split(',');
17 |             if len(tmp) < 2 :
18 |                 continue;
19 |             
20 |             i = int(tmp[0]);
21 |             j = int(tmp[1]);
22 |             if i > self.numNodes :
23 |                 self.numNodes = i;
24 |             if j > self.numNodes :
25 |                 self.numNodes = j;
26 |             edgeList.append((min(i,j),max(i,j)));
27 | 
28 |         tmpAdj = [set() for i in range(self.numNodes)];
29 |         
30 |         for edge in edgeList :
31 |             (i,j) = edge;
32 |             # option 1
33 |             if(j-1 not in tmpAdj[i-1]) :
34 |                 tmpAdj[i-1].add(j-1);            
35 |                 self.numEdges += 1;
36 | 
37 |         # finalize edges into frozensets
38 |         self.edges = [frozenset(x) for x in tmpAdj];
39 |         
40 |         #print self.edges, self.numEdges;
41 |         return;
42 |   
43 |     def plot(self):
44 |         for idx in range(len(self.edges)) :
45 |             mystr = "".join(["." for x in range(0,idx+1)]);
46 |             for idy in range(idx+1,len(self.edges)) :
47 |                 if idy in self.edges[idx] :
48 |                     mystr += "1";
49 |                 else :
50 |                     mystr += "0";
51 |             print mystr;
52 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | Version 1.3
 2 | 
 3 | Code for 
 4 |    VoG: Summarizing and Understanding Large Graphs
 5 |    Danai Koutra, U Kang, Jilles Vreeken, and Christos Faloutsos
 6 |    http://www.cs.cmu.edu/~dkoutra/papers/VoG.pdf
 7 | 
 8 | 
 9 | Contact:
10 |    Danai Koutra, dkoutra@umich.edu
11 | 
12 | 
13 | To run:
14 |    type 'make'
15 | 
16 | 
17 | Difference from Version 1.0:
18 |    Using dynamic programming and the technique of memoization to 
19 |    speed up the application of the GREEDY'nFORGET heuristic.
20 | 
21 | 
22 | Algorithm:
23 | 
24 | Input: graph G
25 | Step 1: Subgraph Generation. Generate candidate – possibly
26 | overlapping – subgraphs using one or more graph decomposition
27 | methods.
28 | Step 2: Subgraph Labeling. Characterize each subgraph as a
29 | perfect structure x \in Omega, or an approximate structure by using
30 | MDL to ﬁnd the type x that locally minimizes the encoding cost.
31 | Populate the candidate set C.
32 | Step 3: Summary Assembly. Use the heuristics PLAIN, TOP10,
33 | TOP100, GREEDY’NFORGET (Sec. 4.3) to select a non-redundant
34 | subset from the candidate structures to instantiate the graph model
35 | M. Pick the model of the heuristic with the lowest description
36 | cost.
37 | Return graph summary M and its encoding cost.
38 | 
39 | 
40 | 
41 | Change Log:
42 | ===========
43 | 
44 | July 1, 2015
45 | - removed vpi():  using l2cnk.m to compute the log of n-choose-k efficiently
46 |   leads to 30x speedup in the chocolate-wiki dataset
47 | - tic/toc instead of cputime to compute the runtime: following the recommendation at http://www.mathworks.com/help/matlab/ref/cputime.html
48 | 
49 | January 9, 2015
50 | - Replaced the config.py file
51 | 
52 | July 30,  2014
53 | - Fixed ordering of nodes in cliques
54 | 
55 | June 15, 2014
56 | - Made the greedyNforget 100x faster by exploiting memoization
57 | 


--------------------------------------------------------------------------------
/demo_vog.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo ''
 4 | echo -e "\e[34m======== Steps 1 & 2: Subgraph Generation and Labeling  ==========\e[0m"
 5 | matlab -r run_structureDiscovery
 6 | echo ''
 7 | echo 'Structure discovery finished.'
 8 | 
 9 | unweighted_graph='DATA/cliqueStarClique.out'
10 | model='DATA/cliqueStarClique_orderedALL.model'
11 | modelFile='cliqueStarClique_orderedALL.model'
12 | modelTop10='DATA/cliqueStarClique_top10ordered.model'
13 | 
14 | echo ''
15 | echo -e "\e[34m=============== Step 3: Summary Assembly ===============\e[0m"
16 | echo ''
17 | echo -e "\e[31m=============== TOP 10 structures ===============\e[0m"
18 | head -n 10 $model > $modelTop10
19 | echo 'Computing the encoding cost...'
20 | echo ''
21 | python MDL/score.py $unweighted_graph $modelTop10 
22 | 
23 | echo ''
24 | echo 'Explanation of the above output:'
25 | echo 'L(G,M):  Number of bits to describe the data given a model M.'
26 | echo 'L(M): Number of bits to describe only the model.'
27 | echo 'L(E): Number of bits to describe only the error.'
28 | echo ': M_0 is the zero-model where the graph is encoded as noise (no structure is assumed).'
29 | echo ': M_x is the model of the graph as represented by the top-10 structures.'
30 | echo ''
31 | cat DATA/encoding_top10.out
32 | echo ''
33 | echo ''
34 | 
35 | echo -e "\e[31m========= Greedy selection of structures =========\e[0m"
36 | echo 'Computing the encoding cost...'
37 | echo ''
38 | python2.7 MDL/greedySearch_nStop.py $unweighted_graph $model >/dev/null 2>&1
39 | mv heuristic* DATA/
40 | echo '>> Outputs saved in DATA/. To interpret the structures that are selected, check the file MDL/readme.txt.'
41 | echo ": DATA/heuristicSelection_nStop_ALL_$modelFile has the lines of the $model structures included in the summary."
42 | echo ": DATA/heuristic_Selection_costs_ALL_$modelFile has the encoding cost of the considered model at each time step."
43 | echo ''
44 | echo ''
45 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/mdlCostAsBC.m:
--------------------------------------------------------------------------------
 1 | function [ MDLcost, set1, set2 ] = mdlCostAsBC( Asmall, N_tot )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Encode given graph as bipartite core                                   %
 4 | %  max cut problem --> NP hard                                            %
 5 | % Heuristic: we use FaBP with heterophily and we initialize               %
 6 | %            two nodes that are connected by an edge in opposite classes  %
 7 | %  Author: Danai Koutra                                                   %
 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 9 | 
10 | %% Constants and variables of FaBP
11 | % heterophily factor
12 | h = -0.01;
13 | % prior belief for belonging in the pos/neg class
14 | positive = 0.01;
15 | negative = -0.01;
16 | 
17 | a = 4*h^2/(1-4*h^2);
18 | c = 2*h/(1-4*h^2);
19 | 
20 | %% setting up the matrices and vectors involved in FaBP
21 | n = size(Asmall, 2);
22 | deg = full(sum(Asmall));
23 | D = diag(deg);
24 | matI = eye(n);
25 | 
26 | %% Initialization: pick high degree node, and initialize as positive.
27 | % Set all its neighbors in the opposite class.
28 | phi = zeros(n,1);
29 | [ ~, idx ] = max(deg);
30 | neighbors = find(Asmall(idx,:));
31 | phi(idx) = positive;
32 | phi(neighbors) = negative;
33 | 
34 | %% FaBP: main equation
35 | b = [ matI + a * D - c * Asmall ] \ phi;
36 | 
37 | %% Find the members of the two sets
38 | set1 = b > 0;
39 | set2 = b < 0;
40 | 
41 | %% Creating the adjacency matrix for the bc model (w/o noise).
42 | % According to this model, all the nodes in set1 are connected to all the
43 | % nodes in set2.
44 | M(n,n) = 0;
45 | M( set1, set2 ) = 1;
46 | % Error matrix
47 | E = xor(M,Asmall);
48 | 
49 | %% MDL cost of encoding given substructure as a star
50 | MDLcost = compute_encodingCost( 'bc', N_tot, sum(set1), E, sum(set2) )
51 | 
52 | 
53 | % % if nargin == 4 && ~isempty(set1) && ~isempty(set2)
54 | % %     fprintf(out_fid, 'bc');
55 | % %     fprintf(out_fid, ' %d', top_gccind( curind(set1) ));
56 | % %     fprintf(out_fid, ',');
57 | % %     fprintf(out_fid, ' %d', top_gccind( curind(set2) ) );
58 | % %     fprintf(out_fid, ' --- nearBC \n');
59 | % % end
60 |     
61 | end


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/mdlCostAsStar.m:
--------------------------------------------------------------------------------
 1 | function [ MDLcost, idxMaxDeg, satellitesIdx ] = mdlCostAsStar( Asmall, curind, N_tot )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Encode given graph as star                                             %
 4 | %  Find the highest degree node and set it as the hub. The rest nodes will%
 5 | %  be encoded as spokes.                                                  %
 6 | %  OUTPUT                                                                 %
 7 | %    MDLcost: the cost of encoding Asmall as a chain                      %
 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 9 | 
10 | n = size(Asmall, 2);
11 | deg = full(sum(Asmall));
12 | 
13 | if n < 3
14 |    return
15 | end
16 | 
17 | [ ~, idxMaxDeg ] = max(deg);
18 | 
19 | if idxMaxDeg ~= 1 && idxMaxDeg ~= n
20 |     satellitesIdx = [1 : (idxMaxDeg-1), (idxMaxDeg+1):n];
21 | elseif idxMaxDeg == 1
22 |     satellitesIdx = 2:n;
23 | elseif idxMaxDeg == n
24 |     satellitesIdx = 1:(n-1);
25 | end
26 | 
27 | %% Creating the adjacency matrix for the star model (w/o noise).
28 | % % M(n,n) = 0;
29 | % % for i = 1 : length( satellitesIdx )
30 | % %    M( idxMaxDeg, satellitesIdx(i) ) = 1; 
31 | % %    M( satellitesIdx(i), idxMaxDeg ) = 1; 
32 | % % end
33 | % % % Error matrix.
34 | % % E1 = xor(M,Asmall);
35 | % % 
36 | % % Einc1 = nnz(E1)
37 | % % Eexc1 = sum(E1(:)==0)
38 | 
39 | % 1s in the error matrix
40 | % missing edges in star + extra edges not in star
41 | E(1) = 2* (n-1-nnz(Asmall(idxMaxDeg,:))) + nnz(Asmall(satellitesIdx, satellitesIdx));
42 | % 0s in the error matrix
43 | % E(1) = n^2 - n - E(2);
44 | %wrong_edges_in_star = 2*(n-nnz(Asmall(idxMaxDeg,:)));
45 | E(2) = n^2 - E(1);
46 | 
47 | if E(1) < 0 || E(2) < 0
48 |  E
49 |  n
50 |  nnz(Asmall(idxMaxDeg,:))
51 | end
52 | 
53 | %% MDL cost of encoding given substructure as a star
54 | MDLcost = compute_encodingCost( 'st', N_tot, n, E);
55 | 
56 | 
57 | % % %% Printing the encoded structure.
58 | % % fprintf(out_fid, 'st %d,', top_gccind( curind(idxMaxDeg) ) );
59 | % % fprintf(out_fid, ' %d', top_gccind( curind(satellitesIdx) ) );
60 | % % fprintf(out_fid, '  --- nearStar \n');
61 | 
62 | 
63 | % % check if we have a tie (multiple highest-degree nodes)
64 | % idx_center = find( deg == deg(idxMaxDeg) );
65 | % 
66 | % for i = 1 : length(idx_center)
67 | %  :   idxMaxDeg = idx_center(i);
68 | %     fprintf(out_fid, 'st %d,', top_gccind( curind(idxMaxDeg) ) );
69 | % 
70 | %     if idxMaxDeg ~= 1 && idxMaxDeg ~= n
71 | %         satellitesIdx = curind( [1 : (idxMaxDeg-1), (idxMaxDeg+1):n] );
72 | %     elseif idxMaxDeg == 1
73 | %         satellitesIdx = curind( 2:n );
74 | %     elseif idxMaxDeg == n
75 | %         satellitesIdx = curind( 1:(n-1) );
76 | %     end
77 | %     fprintf(out_fid, ' %d', top_gccind( satellitesIdx ) );
78 | %     fprintf(out_fid, '\n');
79 | % end
80 |     
81 | end
82 | 


--------------------------------------------------------------------------------
/MDL/mdl_error.py:
--------------------------------------------------------------------------------
 1 | import config;
 2 | import mdl_base;
 3 | import mdl_structs;
 4 | 
 5 | from math import log,factorial;
 6 | from error import Error;
 7 | from graph import Graph;
 8 | from model import Model;
 9 | 
10 | from mdl_base import LU,LnU;
11 | from mdl_structs import *;
12 | 
13 | ### Encoding the Error
14 | 
15 | # here I encode all errors uniformly by a binomial -- hence, not yet the typed advanced stuff yet!
16 | def LErrorNaiveBinom(G, M, E) :
17 |     # possible number of edges in an undirected, non-self-connected graph of N nodes
18 |     posNumEdges = (G.numNodes * G.numNodes - G.numNodes) / 2
19 |     cost = LU(posNumEdges - E.numCellsExcluded, E.numUnmodelledErrors + E.numModellingErrors);
20 |     if config.optVerbosity > 1 : print ' - L_nb(E)', cost;
21 |     return cost;
22 | 
23 | def LErrorNaivePrefix(G, M, E) :
24 |     # possible number of edges in an undirected, non-self-connected graph of N nodes
25 |     posNumEdges = (G.numNodes * G.numNodes - G.numNodes) / 2
26 |     cost = LnU(posNumEdges - E.numCellsExcluded, E.numModellingErrors + E.numUnmodelledErrors);
27 |     if config.optVerbosity > 1 : print ' - L_np(E)', cost;
28 |     return cost;
29 | 
30 | # here I encode all errors uniformly by a binomial -- hence, not yet the typed advanced stuff yet!
31 | def LErrorTypedBinom(G, M, E) :
32 |     # possible number of edges in an undirected, non-self-connected graph of N nodes
33 |     posNumEdges = (G.numNodes * G.numNodes - G.numNodes) / 2
34 |     
35 |     # First encode the modelling errors
36 |     #print 'First encode the modelling errors'
37 |     #print 'E.numCellsCovered, E.numCellsExcluded, E.numModellingErrors;'
38 |     #print E.numCellsCovered, E.numCellsExcluded, E.numModellingErrors;
39 |     costM = LU(E.numCellsCovered - E.numCellsExcluded, E.numModellingErrors);
40 |     if config.optVerbosity > 1 : print ' - L_tb(E+)', costM;
41 | 
42 |     # Second encode the unmodelled errors
43 |     #print 'Second encode the unmodelled errors' (excluded cells are always covered!)
44 |     #print posNumEdges - E.numCellsCovered, E.numUnmodelledErrors;
45 |     costU = LU(posNumEdges - E.numCellsCovered, E.numUnmodelledErrors);
46 |     if config.optVerbosity > 1 : print ' - L_tb(E-)', costU;
47 |     return costM + costU;
48 | 
49 | def LErrorTypedPrefix(G, M, E) :
50 |     # possible number of edges in an undirected, non-self-connected graph of N nodes
51 |     posNumEdges = (G.numNodes * G.numNodes - G.numNodes) / 2
52 |     costM = LnU(E.numCellsCovered - E.numCellsExcluded, E.numModellingErrors);
53 |     if config.optVerbosity > 1 : print ' - L_tp(E+)', costM;
54 |     costU = LnU(posNumEdges - E.numCellsCovered, E.numUnmodelledErrors);
55 |     if config.optVerbosity > 1 : print ' - L_tp(E-)', costU;
56 |     #print E.numCellsCovered, E.numCellsExcluded, E.numModellingErrors, posNumEdges, E.numUnmodelledErrors;
57 |     return costM + costU;
58 | 


--------------------------------------------------------------------------------
/MDL/readme.txt:
--------------------------------------------------------------------------------
 1 | Author: Jilles Vreeken
 2 | Email:  jilles@mmci.uni-saarland.de
 3 | 
 4 | 
 5 | :: General Assumptions on Input
 6 | 	We deal with undirected, nonloopy graphs, where each node has an id, and node ids start with 1.
 7 | 
 8 | :: Usage
 9 | 	python score.py <graph file> <model file>
10 | 
11 | 	It prints the possible parameters if you give no options at all; if you only give a graph file it shows the encoded size by the empty model.
12 | 
13 | 
14 | :: Input Data Format for Graphs
15 | 	One edge per row, comma separated: 
16 | 	<source nodeId>,<dest nodeId>
17 | 
18 | 	e.g.
19 | 	1,2
20 | 	1,3
21 | 	...
22 | 	
23 | 	As we are working with undirected graphs, for pairs i,j it does not matter whether i<j or j>i. (non-loopy, so no i=j, though I currently don't check for that)
24 | 
25 | 
26 | :: Input Data Format for Models
27 | 	One structure per row, e.g.:
28 | 	fc 1 2 3 4
29 | 	fc 5 4 2 6
30 | 	bc 1 2 3, 21 2 1
31 | 	ch 10 11 200 12
32 | 
33 | 	The ordering of the rows does influence which structure encodes what part of the graph. Later, when we introduce structure-Typed-error encoding this may matter.
34 | 
35 | 	Where for the different structure types (and hence encodings) I have
36 | 
37 | 
38 | 	Full clique:
39 | fc [node ids]
40 | 	e.g.
41 | 	fc 1 2 3 4			for a full-clique over nodes 1 to 4. 
42 | 						This encoding is great for full cliques, and near-cliques with high connectivity
43 | 						E += { (1,2), (1,3), (1,4), (2,3), (2,4), (3,4) }
44 | 
45 | 
46 | nc <# of edges>, [node ids]
47 |      e.g.
48 |      nc 5, 1 2 3 4		for a near-clique over nodes 1 to 4, with 5 edges among them.
49 | 						This encoding gives the exact connections, without making error. 
50 | 						! in certain cases the full-clique encoding may be more efficient:
51 | 						  depending on Error encoding, encoding some superfluous edges can 
52 | 						  be cheaper. Formal analysis needed.
53 | 						E= exactly what is in the data, using (locally optimal) prefix codes
54 | 
55 | 	Chain
56 | ch [node ids]
57 | 	e.g.
58 | 	ch 4 2 1 3			for a chain from node 4 to 2 to 1 to 3
59 | 						E+={ (4,2), (2,1), (1,3) }
60 | 
61 | 	Star
62 | st <hub id>, [node ids]
63 | 	e.g.
64 | 	st 1, 2 3 4			for a star with node 1 as hub, and spokes to nodes 2, 3 and 4
65 | 						E+={ (1,2), (1,3), (1,4) }
66 | 	- BiPartiteCore of size 1
67 | 
68 | 
69 | 	BiPartiteCore
70 | bc [node id set A], [node id set B]
71 | 	e.g.
72 | 	bc 1 2 3, 4 5		for a fully connected bi-partite graph between node sets 1,2,3 and 4,5
73 | 						! also means there are no edges between nodes 1,2,3, nor between nodes 4,5
74 | 						E+={ (1,4), (1,5), (2,4), (2,5), (3,4), (3,5) }
75 | 
76 | 	NearBiPartiteCore
77 | nb [node id set A], [node id set B]
78 | 	e.g.
79 | 	nb 1 2 3, 4 5		for a possibly not fully connected bi-partite graph between node sets 1,2,3 and 4,5
80 | 						! implies there are no edges between nodes 1,2,3, nor between nodes 4,5 --- errors are pushed into error matrix
81 | 						E= edges within sets A and B, no errors between A and B
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/mdlCostAsChain.m:
--------------------------------------------------------------------------------
 1 | function [ MDLcost, chainExt ] = mdlCostAsChain( Asmall, N_tot )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Encode given graph as chain                                            %
 4 | %  Start from a node with deg 1 (p_rand) and find its furthest node using %
 5 | %  BFS. Then, starting from the found node (p_init), redo BFS and find its%
 6 | %  furthest node (p_fin). Report the shortest path between p_init and     %
 7 | %  p_fin. If there are extra nodes in the shortest path, report them after%
 8 | %  the path (separating with comma from the path nodes).                  %
 9 | %  DESCRIPTION OF SOME VARS:                                              %
10 | %    E = M xor Asmall, error matrix (xor between true model and adjacency %
11 | %                                                                  mat)   %
12 | %  OUTPUT                                                                 %
13 | %    MDLcost: the cost of encoding Asmall as a chain                      %
14 | %  Author: Danai Koutra                                                   %
15 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16 | 
17 | n = size(Asmall, 2);
18 | 
19 | if n < 3
20 |     return;
21 | end
22 | 
23 | deg = full(sum(Asmall));
24 | deg1_nodes = find(deg==1);
25 | if isempty( deg1_nodes )
26 |     d = min(deg);
27 |     deg1_nodes = find(deg==d);
28 | end
29 | p_rand = deg1_nodes(1); % pick as n_rand the first node with degree 1
30 | 
31 | [ p_init, ~, ~ ] = BFS( Asmall, p_rand, false );
32 | [ p_fin, chain, extra_nodes, chainExt, extra_nodesExt ] = ...
33 |                                           BFS( Asmall, p_init, true, true );
34 | 
35 | %% Creating the adjacency matrix for the chain model 
36 | % it describes the longest chain that we found, w/o noise).
37 | % % M(n,n) = 0;
38 | % % for i = 1 : length(chainExt)-1
39 | % %    M( chainExt(i), chainExt(i+1) ) = 1; 
40 | % %    M( chainExt(i+1), chainExt(i) ) = 1;
41 | % % end
42 | % % % Error matrix
43 | % % %E = xor(M,Asmall);
44 | % % E1 = xor(M,Asmall);
45 | % % 
46 | % % Einc1 = nnz(E1)
47 | % % Eexc1 = sum(E1(:)==0)
48 | 
49 | % 1s in the error matrix
50 | % missing edges in bc + extra edges within sets
51 | missing = 0;
52 | existing = 0;
53 | for i = 1 : length(chainExt)-1
54 |    if Asmall( chainExt(i), chainExt(i+1) ) == 0
55 |        missing = missing+1;
56 |    else
57 |        existing = existing+1;
58 |    end
59 | end
60 | E(1) = 2* missing + (nnz(Asmall) - 2*existing );
61 | % 0s in the error matrix
62 | E(2) = n^2 - E(1);
63 | 
64 | fprintf('E(1)=%d, E(2)=%d\n', E(1), E(2));
65 | 
66 | %% MDL cost of encoding given substructure as a chain
67 | MDLcost = compute_encodingCost( 'ch', N_tot, n, E);
68 | 
69 | % %% Printing the encoded structure.
70 | % fprintf(out_fid, 'ch ');
71 | % fprintf(out_fid, ' %d', top_gccind( curind(chain) ) );
72 | % if ~isempty(extra_nodes)
73 | %     fprintf(out_fid, ',');
74 | %     fprintf(out_fid, ' %d', top_gccind( curind(extra_nodes) ) );
75 | % end
76 | % fprintf(out_fid, '  --- nearChain \n');
77 | % 
78 | % 
79 | % fprintf(out_fid, 'ch ');
80 | % fprintf(out_fid, ' %d', top_gccind( curind(chainExt) ) );
81 | % %if ~isempty(extra_nodesExt)
82 | % %    fprintf(out_fid, ',');
83 | % %    fprintf(out_fid, ' %d', top_gccind( curind(extra_nodesExt) ) );
84 | % %end
85 | % fprintf(out_fid, '  --- nearChain Extended \n');
86 | 
87 |     
88 | end
89 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/mdlCostAsBCorNB.m:
--------------------------------------------------------------------------------
 1 | function [ MDLcostBC, MDLcostNB, set1, set2 ] = mdlCostAsBCorNB( Asmall, N_tot )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Encode given graph as bipartite core                                   %
 4 | %  max cut problem --> NP hard                                            %
 5 | % Heuristic: we use FaBP with heterophily and we initialize               %
 6 | %            two nodes that are connected by an edge in opposite classes  %
 7 | %  Author: Danai Koutra                                                   %
 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 9 | 
10 | %% Constants and variables of FaBP
11 | % heterophily factor
12 | h = -0.01;
13 | % prior belief for belonging in the pos/neg class
14 | positive = 0.01;
15 | negative = -0.01;
16 | 
17 | a = 4*h^2/(1-4*h^2);
18 | c = 2*h/(1-4*h^2);
19 | 
20 | %% setting up the matrices and vectors involved in FaBP
21 | n = size(Asmall, 2);
22 | deg = full(sum(Asmall));
23 | %D = diag(deg);
24 | D = spdiags(deg', 0, n, n);
25 | matI = speye(n);
26 | 
27 | %% Initialization: pick high degree node, and initialize as positive.
28 | % Set all its neighbors in the opposite class.
29 | phi = zeros(n,1);
30 | [ ~, idx ] = max(deg);
31 | neighbors = find(Asmall(idx,:));
32 | phi(idx) = positive;
33 | phi(neighbors) = negative;
34 | 
35 | %% FaBP: main equation
36 | b = ( matI + a * D - c * Asmall ) \ phi;
37 | 
38 | %% Find the members of the two sets
39 | set1 = b > 0;
40 | set2 = b < 0;
41 | 
42 | %% Creating the adjacency matrix for the bc model (w/o noise).
43 | % According to this model, all the nodes in set1 are connected to all the
44 | % nodes in set2.
45 | % % M(n,n) = 0;
46 | % % M( set1, set2 ) = 1;
47 | % % % M should be symmetric
48 | % % M( set2, set1 ) = 1;
49 | % % % Error matrix
50 | % % %E = xor(M,Asmall);
51 | % % E1 = xor(M,Asmall);
52 | % % 
53 | % % Einc1 = nnz(E1)
54 | % % Eexc1 = sum(E1(:)==0)
55 | 
56 | % 1s in the error matrix
57 | % missing edges in bc + extra edges within sets
58 | E(1) = 2* (sum(set1)*sum(set2)-nnz(Asmall(set1,set2))) + nnz(Asmall(set1, set1)) + nnz(Asmall(set2, set2));
59 | % 0s in the error matrix
60 | E(2) = n^2 - E(1);
61 | 
62 | %% MDL cost of encoding given substructure as a bipartite core
63 | MDLcostBC = compute_encodingCost( 'bc', N_tot, sum(set1), E, sum(set2) );
64 | 
65 | %% Creating the adjacency matrix for the nb model (w/o noise).
66 | % % % According to this model, some nodes in set1 are connected to some of the
67 | % % % nodes in set2.
68 | % % M(n,n) = 0;
69 | % % B(n,n) = 0;
70 | % % B(set1, set2) = Asmall(set1, set2);
71 | % % M = B + B';
72 | % % % Error matrix
73 | % % E1 = xor(M,Asmall);
74 | % % 
75 | % % Einc1 = nnz(E1)
76 | % % Eexc1 = sum(E1(:)==0)
77 | 
78 | % 1s in the error matrix
79 | % extra edges within sets
80 | E(1) = nnz(Asmall(set1, set1)) + nnz(Asmall(set2, set2));
81 | % 0s in the error matrix
82 | E(2) = n^2 - E(1);
83 | 
84 | %% MDL cost of encoding given substructure as a bipartite core
85 | MDLcostNB = compute_encodingCost( 'nb', N_tot, sum(set1), E, sum(set2), [nnz(Asmall(set1,set2)), sum(set1)*sum(set2)-nnz(Asmall(set1,set2))] );
86 | 
87 | % % if nargin == 4 && ~isempty(set1) && ~isempty(set2)
88 | % %     fprintf(out_fid, 'bc');
89 | % %     fprintf(out_fid, ' %d', top_gccind( curind(set1) ));
90 | % %     fprintf(out_fid, ',');
91 | % %     fprintf(out_fid, ' %d', top_gccind( curind(set2) ) );
92 | % %     fprintf(out_fid, ' --- nearBC \n');
93 | % % end
94 |     
95 | end


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/BFS.m:
--------------------------------------------------------------------------------
  1 | function [ furthest_node, chain, extra_nodes, ...
  2 |            chainExt, extra_nodesExt ] = BFS( Asmall, start, path, extend )
  3 | %% Given a graph and a node, find the node that
  4 | %  is furthest away from it. Also, report the path,
  5 | %  the variable 'path' is set to true.
  6 | %  DISCLAIMER: This will not give the longest chain in the graph, but
  7 | %              the shortest path between the furthest apart nodes.
  8 | %              Finding the longest path in a graph is NP-complete in
  9 | %              graphs with cycles. It is polynomial for DAGs.
 10 | 
 11 | n = size(Asmall,2);
 12 | extra_nodes = [];
 13 | extra_nodesExt = [];
 14 | extra_nodes_search = ones(1,n);
 15 | chain = [];
 16 | chainExt = [];
 17 | queue = [start];
 18 | % nodesList = 0 (if unvisited) or parentId (if visited)
 19 | nodesList = zeros(1,n);
 20 | nodesList(start)=start; % set as parent of the start node itself.
 21 | 
 22 | while ~( isempty(queue) )
 23 |     neighbors = find( Asmall(queue(1),:) ) ;
 24 |     
 25 |     for i = 1 : length(neighbors)
 26 |         if nodesList( neighbors(i) ) == 0 % unvisited neighbor
 27 |             nodesList( neighbors(i) ) = queue(1);
 28 |             queue = [ queue, neighbors(i) ];
 29 |         end
 30 |     end
 31 |     
 32 |     qsize = length(queue);
 33 |     % has the furthest node from start up to that point
 34 |     furthest_node = queue(qsize);
 35 |     queue = queue(2:qsize);
 36 | end
 37 | 
 38 | if path == true
 39 |     curr = furthest_node;
 40 |     while curr ~= start
 41 |         chain = [ curr, chain];
 42 |         extra_nodes_search(curr) = 0;
 43 |         curr = nodesList(curr);
 44 |     end
 45 |     chain = [ start, chain];
 46 |     extra_nodes_search(start) = 0;
 47 |     extra_nodes = find(extra_nodes_search==1);
 48 |     
 49 |     % heuristic: check for the extra nodes if they are neighboring with one
 50 |     % of the end points - then we can update our chain and make it longer
 51 |     % Do BFS in the induced subgraph of one endpoint and extra_nodes.
 52 |     % Repeat for the other endpoint. If a chain is returned, then make the
 53 |     % previously found path longer.
 54 |     
 55 |     if extend == true
 56 |         
 57 |         % chain except from p_init
 58 |         chain_head = chain(1:length(chain)-1);
 59 |         % chain except from p_fin
 60 |         chain_tail = chain(2:length(chain));
 61 |         
 62 |         %extend chain from start point (if possible)
 63 |         indSub_start = Asmall;
 64 |         indSub_start(chain_tail, :) = 0;
 65 |         indSub_start(:, chain_tail) = 0;
 66 |         [ furthestStart, chain1, ~ ] = BFS( indSub_start, chain(1), true, false );
 67 |         
 68 |         
 69 |         % extend chain from end point (if possible)
 70 |         indSub_end = Asmall;
 71 |         indSub_end(chain_head, :) = 0;
 72 |         indSub_end(:, chain_head) = 0;
 73 |         [ furthestEnd, chain2, ~ ] = BFS( indSub_end, chain(end), true, false );
 74 |     
 75 |         % checking if the chains have been extended to the same nodes.
 76 |         % This happened when I tried to encode a clique as a chain.
 77 |         overlap = false;
 78 |         for i = 2 : length(chain1)
 79 |             if ismember(chain1(i), chain2)
 80 |                 overlap = true;
 81 |                 % we include the nodes from chain1 up to the overlapped node
 82 |                 % (excluding the overlapped node)
 83 |                 chainExt = [ chain1((i-1):-1:2), chain, chain2(2:end) ];
 84 |                 break;
 85 |             end
 86 |         end
 87 |         
 88 |         % merging locally extended chains
 89 |         if overlap == false
 90 |             chainExt = [ chain1(end:-1:2), chain, chain2(2:end) ];
 91 |         end
 92 |         extra_nodesExtIdx = ~ismember(extra_nodes, chainExt);
 93 |         extra_nodesExt = extra_nodes(extra_nodesExtIdx);
 94 |         
 95 |     end 
 96 |     
 97 | end
 98 | 
 99 | end
100 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/EncodeSubgraph.m:
--------------------------------------------------------------------------------
 1 | function [] = EncodeSubgraph( B, curind, top_gccind, N_tot, out_fid, info, minSize )
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | %% Encode the connected component from SlashBurn:                         %
 4 | %   find whether it is clique, near-clique, star, chain or bipartite-core %
 5 | %   info: true (output the mdl benefit at the model file) /               %
 6 | %         false (Jilles' format for model file)                           %
 7 | %   minSize: smallest size of reported structures (number of nodes)       %
 8 | %  Author: Danai Koutra                                                   %
 9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
10 | 
11 | Asmall = B(curind,curind);
12 | 
13 | n = size(curind, 2);
14 | m = nnz(Asmall);
15 | 
16 | % If the structure has less than 10 nodes, do not report it in the
17 | % model file
18 | if n < minSize
19 |     return;
20 | end
21 | %fprintf('n=%d, m=%d\n', n, m);
22 | 
23 | 
24 | %% First try to find one of the synthetic structures (vocab words):
25 | %   clique, star, chain, bipartite core
26 | exact_found = ExactStructure( Asmall, curind, top_gccind, N_tot, out_fid, info, minSize );
27 | 
28 | %% If it is not, try encoding it as near-structure (mispelled word)
29 | %   and compute the MDL cost of each encoding.
30 | %%%%% TO DO: add some heuristics before we try to encode as chain for
31 | %%%%% instance -- check the degree distribution.
32 | % maxint = 2147483647
33 | MDLcosts = ones(1,5) * 2147483647;
34 | if ( exact_found == false )
35 |     [ MDLcostFC, MDLcostNC ] = mdlCostAsfANDnClique( Asmall, N_tot );
36 |     [ MDLcostST, hub, spokes ] = mdlCostAsStar( Asmall, curind, N_tot );
37 |     [ MDLcostBC, MDLcostNB, set1, set2 ] = mdlCostAsBCorNB( Asmall, N_tot );
38 |     MDLcosts = [ MDLcostFC, MDLcostNC, MDLcostST, MDLcostBC, MDLcostNB];
39 |     
40 |     if m < 1.5*n
41 |         [ MDLcostCH, chain ] = mdlCostAsChain( Asmall, N_tot );
42 |         MDLcosts = [ MDLcosts, MDLcostCH ];
43 |     end
44 |     
45 |     %% Find which structure best describes the given submatrix: i.e., find the
46 |     %  structure that has the minimum MDL cost. Then output to the model file
47 |     %  this structure and its encoding gain in bits (mdlcostNC -
48 |     %  mdlCostStructure).
49 |     [ ~, idxMin ] = min(MDLcosts);
50 |     
51 |     cost_notEnc = compute_encodingCost( 'err', 0, 0, [nnz(Asmall) n^2-nnz(Asmall)]);
52 |     
53 |     if isinf(MDLcosts(idxMin)) || isinf(MDLcosts(2))
54 |         costGain_notEnc = cost_notEnc - MDLcostNC;
55 |         encodeAsNClique( curind, top_gccind, m, 0, costGain_notEnc, out_fid, info );
56 |         %fprintf(out_fid, ' nan\n');
57 |     else
58 |         switch idxMin
59 |             case 1
60 |                 costGain = MDLcostNC - MDLcostFC;
61 |                 costGain_notEnc = cost_notEnc - MDLcostFC;
62 |                 encodeAsFClique( curind, top_gccind, costGain, costGain_notEnc, out_fid, info );
63 |             case 2
64 |                 costGain = MDLcostNC - MDLcostNC;
65 |                 costGain_notEnc = cost_notEnc - MDLcostNC;
66 |                 m = nnz(Asmall);
67 |                 encodeAsNClique( curind, top_gccind, m, costGain, costGain_notEnc, out_fid, info );
68 |             case 3
69 |                 costGain = MDLcostNC - MDLcostST;
70 |                 costGain_notEnc = cost_notEnc - MDLcostST;
71 |                 encodeAsStar( curind, top_gccind, hub, spokes, costGain, costGain_notEnc, out_fid, info );
72 |             case 4
73 |                 costGain = MDLcostNC - MDLcostBC;
74 |                 costGain_notEnc = cost_notEnc - MDLcostBC;
75 |                 encodeAsBC( curind, top_gccind, set1, set2, costGain, costGain_notEnc, out_fid, info );
76 |             case 5
77 |                 costGain = MDLcostNC - MDLcostNB;
78 |                 costGain_notEnc = cost_notEnc - MDLcostNB;
79 |                 encodeAsNB( curind, top_gccind, set1, set2, costGain, costGain_notEnc, out_fid, info );
80 |             case 6
81 |                 costGain = MDLcostNC - MDLcostCH;
82 |                 costGain_notEnc = cost_notEnc - MDLcostCH;
83 |                 encodeAsChain( curind, top_gccind, chain, costGain, costGain_notEnc, out_fid, info );
84 |             otherwise
85 |                 error_message = 'error: impossible to get this error...\n'
86 |         end
87 |     end
88 |     
89 | end
90 | 
91 | end
92 | 
93 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/structureSelectionGreedyNforget.m:
--------------------------------------------------------------------------------
  1 | function [ cost_noModel historyCosts historyCostsInc] = structureSelectionGreedyNforget(A, graphFile, model_ordered, cost_ALLencoded_struct, outfile)
  2 | %% Select the substructures to output to the user.
  3 | % The ranking of the substructures is based on their MDL benefit. Add one
  4 | % substructure at a time and compute the mdl cost of encoding the whole
  5 | % graph. If the MDL cost starts increasing, stop adding more structures.
  6 | %
  7 | % Inputs:
  8 | %    A: adjacency matrix of the whole graph
  9 | %    graphFile: edge file of the input graph (csv file, without weights)
 10 | %               if 'none', create the edge file from matrix A
 11 | %    model_ordered: the substructures ordered in decreasing mdl benefit
 12 | %    final_fid: the model file with the substructures (until the current
 13 | %               step)
 14 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 15 | 
 16 | cost = zeros(1,2);
 17 | 
 18 | if strcmp(graphFile, 'none')
 19 |     %% Creation of graph from adjacency matrix (may skip if already have the
 20 |     %% file)
 21 |     [~, fname, ~] = fileparts(outfile);
 22 |     graphFile = sprintf('%s.graph', fname)
 23 |     [i j k] = find( A );
 24 |     graph_fid = fopen( graph_name, 'w' );
 25 |     fprintf( graph_fid, '%d,%d\n', i, j );
 26 |     fclose( graph_fid );
 27 | end
 28 | 
 29 | % currentDirectory = pwd
 30 | % [~, deepestFolder, ~] = fileparts(currentDirectory)
 31 | 
 32 | fid = fopen(outfile, 'w');
 33 | 
 34 | comm = sprintf('python2.6 ../mdl/score.py %s > pythonOutput.txt;', graphFile )
 35 | system( comm )
 36 | 
 37 | pythonOutput = importdata('pythonOutput.txt');
 38 | % Initial cost: the MDL cost of the Empty Model.
 39 | cost(1) = str2num(pythonOutput.textdata{2,2});
 40 | cost_noModel = cost(1);
 41 | cost(2) = cost(1);
 42 | 
 43 | cnt = 0;
 44 | historyCosts(cnt+1) = cost(1);
 45 | historyCostsInc(cnt+1) = cost(1);
 46 | 
 47 | consecutiveInc = 0;
 48 | cnt_structsInc = 0;
 49 | 
 50 | [~,graphname,~] = fileparts(graphFile);
 51 | 
 52 | 
 53 | while cnt < length(model_ordered) || historyCostsInc(cnt_structsInc+1) > cost_ALLencoded_struct %cnt < length(model_ordered) && historyCosts(cnt+1) > cost_ALLencoded_struct  && consecutiveInc ~= 5 %cost(2) <= cost(1)
 54 |     cnt = cnt + 1;
 55 |     if mod(cnt,10) == 0 
 56 |         cnt
 57 |     end
 58 |     cost(1) = cost(2);
 59 |     printStructureToModelFile(model_ordered(cnt), fid);
 60 |     comm = sprintf('time -p python2.6 ../mdl/score.py %s %s > pythonOutput.txt;', ...
 61 |                     graphFile, outfile );
 62 |     %comm = sprintf('cd ../mdl; python score.py ../%s/%s ../%s/%s > ../%s/pythonOutput.txt; cd ../%s', ...
 63 |     %    deepestFolder, graph_name, deepestFolder, outfile, deepestFolder, deepestFolder )
 64 |     system(comm);
 65 |     pythonOutput = importdata('pythonOutput.txt');
 66 |     cost(2) = str2num(pythonOutput.textdata{3,2});
 67 |     historyCosts(cnt+1) = cost(2);
 68 |     % removing the structure that caused the increase in the encoding cost
 69 |     if cost(2) > cost(1)
 70 |         fclose(fid);
 71 |         fid = fopen(outfile, 'a');
 72 |         %comm = sprintf('cp %s tmp', outfile);
 73 |         comm = sprintf('head -n %d %s > tmp; cp tmp %s', cnt_structsInc, outfile, outfile);
 74 |         system(comm);
 75 |         %comm = sprintf('sed "N;$!P;$!D;$d" < tmp > %s; rm tmp', outfile);
 76 |         %system(comm);
 77 |         % update the cost to its last value
 78 |         cost(2) = historyCostsInc(cnt_structsInc+1);
 79 |     else
 80 |         cnt_structsInc = cnt_structsInc + 1;
 81 |         historyCostsInc(cnt_structsInc+1) = cost(2);
 82 |     end
 83 |    cnm=sprintf('%s_cost_nomodel_gnf', graphname);
 84 |    save(cnm, 'cost_noModel')
 85 |    cnm=sprintf('%s_all_costs_gnf', graphname);
 86 |    save(cnm, 'historyCosts')
 87 |    cnm=sprintf('%s_all_costs_incStruct_gnf', graphname);
 88 |    save(cnm, 'historyCostsInc')
 89 | 
 90 | end
 91 | 
 92 | fclose(fid);
 93 | 
 94 | % remove the structure at the last line, since it caused increase in the
 95 | % MDL encoding cost.
 96 | % comm = sprintf('cp %s tmp', outfile);
 97 | % system(comm);
 98 | % if consecutiveInc == 2
 99 | %     comm = sprintf('sed "N;$!P;$!D;$d" < tmp > %s; rm tmp', outfile);
100 | %     system(comm);
101 | %     comm = sprintf('sed "$d" < %s > tmp; cp tmp %s; rm tmp', outfile, outfile);
102 | % else
103 | %     comm = sprintf('sed "N;$!P;$!D;$d" < tmp > %s; rm tmp', outfile);
104 | % end
105 | % system(comm);
106 | 
107 | 
108 | end
109 | 


--------------------------------------------------------------------------------
/MDL/score.py:
--------------------------------------------------------------------------------
 1 | #!/usr/local/bin/python2.6
 2 | 
 3 | import sys
 4 | import os
 5 | import config
 6 | 
 7 | from time import time
 8 | 
 9 | #from mdl import *
10 | from error import Error;
11 | from graph import Graph;
12 | from model import *;
13 | from mdl import *;
14 | 
15 | if len(sys.argv) <= 1 :
16 |     print 'at least: <graph.graph> [model.model] [-pC] [-lC] [-pE] [-lE] [-e{NP,NB,TP,TB}]';
17 |     print ' optional argument model = file to read model from, otherwise only empty model';
18 |     print ' optional argument -vX    = verbosity (1, 2, or 3)';
19 |     print ' optional argument -pG    = plot Graph adjacency matrix';
20 |     print ' optional argument -pC    = plot Cover matrix';
21 |     print ' optional argument -pE    = plot Error matrix';
22 |     print ' optional argument -lC    = list Cover entries';
23 |     print ' optional argument -lE    = list Error entries';
24 |     print ' optional argument -eXX   = encode error resp. untyped using prefix (NP), or';
25 |     print '                            binomial (NB) codes, or using typed';
26 |     print '                            prefix (TP) or binomial (TB, default) codes';
27 |     exit();
28 | 
29 | if (len(sys.argv) > 1 and ("-v1" in sys.argv)) :
30 |     config.optVerbosity = 1;
31 | elif (len(sys.argv) > 1 and ("-v2" in sys.argv)) :
32 |     config.optVerbosity = 2;
33 | if (len(sys.argv) > 1 and ("-v3" in sys.argv)) :
34 |     config.optVerbosity = 3;
35 | 
36 | t0 = time()
37 | 
38 | gFilename = sys.argv[1];
39 | g = Graph();
40 | g.load(gFilename);
41 | 
42 | 
43 | if config.optVerbosity > 1 : print "- graph loaded."
44 | 
45 | m = Model();
46 | 
47 | errorEnc = config.optDefaultError;
48 | if (len(sys.argv) > 1 and ("-eNP" in sys.argv or "-NP'" in sys.argv)) :
49 |     errorEnc = "NP";
50 | elif (len(sys.argv) > 1 and ("-eNB" in sys.argv or "-NB" in sys.argv)) :
51 |     errorEnc = "NB";
52 | elif (len(sys.argv) > 1 and ("-eTP" in sys.argv or "-TP" in sys.argv)) :
53 |     errorEnc = "TP";
54 | elif (len(sys.argv) > 1 and ("-eTB" in sys.argv or "-TB" in sys.argv)) :
55 |     errorEnc = "TB";
56 |         
57 | if config.optVerbosity > 1 : print "- calculating L(M_0,G)"
58 | (l_total_0, l_model_0, l_error_0, E_0) = L(g,m, errorEnc);
59 | if config.optVerbosity > 1 : print "- calculated L(M_0,G)"
60 | print "   \t" + "L(G,M)" + "\tL(M)" + "\tL(E)" + "\t#E+" + "\t#E-" + "\t\t#Ex";
61 | print "M_0:\t" + '%.0f' % l_total_0 + "\t" + '%.0f' % l_model_0 + "\t" + '%.0f' %  l_error_0 + "\t" + str(E_0.numModellingErrors) + '/' + str(E_0.numCellsCovered) + '\t' + str(E_0.numUnmodelledErrors)  + '/' + str(((E_0.numNodes * E_0.numNodes)-E_0.numNodes)/2 - E_0.numCellsCovered) + '\t' + str(E_0.numCellsExcluded);
62 | 
63 | if len(sys.argv) > 2 and sys.argv[2][0] != '-' :
64 |     mFilename = sys.argv[2];
65 |     m.load(mFilename);
66 |     if config.optVerbosity > 1 : print "- M_x loaded."
67 |     (l_total_x, l_model_x, l_error_x, E_x) = L(g,m, errorEnc);
68 |     #print "M_x:\t", l_total_x, "\t" + str(l_model_x), "\t" + str(l_error_x), "\t" + str(E_x.numModellingErrors), "\t" + str(E_x.numUnmodelledErrors);
69 |     print "M_x:\t" + '%.0f' % l_total_x + "\t" + '%.0f' % l_model_x + "\t" + '%.0f' % l_error_x + "\t" + str(E_x.numModellingErrors) + '/' + str(E_x.numCellsCovered-E_x.numCellsExcluded) + '\t' + str(E_x.numUnmodelledErrors)  + '/' + str(((E_x.numNodes * E_x.numNodes)-E_x.numNodes)/2 - E_x.numCellsCovered) + '\t\t' + str(E_x.numCellsExcluded);
70 |     #return l_total_x;
71 | 
72 |     #print " -= ", l_total_0 - l_total_x, "\t" + str(l_model_0 - l_model_x), "\t" + str(l_error_0 - l_error_x), "\t" + str(E_0.numModellingErrors - E_x.numModellingErrors), "\t" + str(E_0.numUnmodelledErrors - E_x.numUnmodelledErrors);
73 |     #print " %= ", "%.2f\t\t%.2f\t%.2f\t\t%.2f" % ((l_total_x / l_total_0 * 100), (l_model_x / l_model_0 * 100), (l_error_x / l_error_0 * 100), (E_x.numModellingErrors / E_0.numModellingErrors * 100));
74 | 
75 | if (len(sys.argv) > 3 and "-pG" in sys.argv) :
76 |     print "Adjacency matrix:";
77 |     g.plot();
78 | 
79 | if (len(sys.argv) > 3 and "-pC" in sys.argv) :
80 |     print "Cover matrix:";
81 |     E_x.plotCover();
82 | 
83 | if (len(sys.argv) > 3 and "-pE" in sys.argv) :
84 |     print "Error matrix:";    
85 |     E_x.plotError();
86 | 
87 | if (len(sys.argv) > 3 and "-lC" in sys.argv) :
88 |     print "Cover list:";
89 |     E_x.listCover();
90 | 
91 | if (len(sys.argv) > 3 and "-lE" in sys.argv) :
92 |     print "Error list:";    
93 |     E_x.listError();
94 |     
95 | t1 = time()
96 | #print 'function vers1 takes %f' %(t1-t0)
97 | 


--------------------------------------------------------------------------------
/MDL/greedyScan.py:
--------------------------------------------------------------------------------
  1 | #!/usr/local/bin/python2.6
  2 | 
  3 | import sys
  4 | import os
  5 | import config
  6 | 
  7 | from time import time
  8 | 
  9 | #from mdl import *
 10 | from error import Error;
 11 | from graph import Graph;
 12 | from model import *;
 13 | from mdl import *;
 14 | 
 15 | if len(sys.argv) <= 1 :
 16 |     print 'at least: <graph.graph> [model.model] [-pC] [-lC] [-pE] [-lE] [-e{NP,NB,TP,TB}]';
 17 |     print ' optional argument model = file to read model from, otherwise only empty model';
 18 |     print ' optional argument -vX    = verbosity (1, 2, or 3)';
 19 |     print ' optional argument -pG    = plot Graph adjacency matrix';
 20 |     print ' optional argument -pC    = plot Cover matrix';
 21 |     print ' optional argument -pE    = plot Error matrix';
 22 |     print ' optional argument -lC    = list Cover entries';
 23 |     print ' optional argument -lE    = list Error entries';
 24 |     print ' optional argument -eXX   = encode error resp. untyped using prefix (NP), or';
 25 |     print '                            binomial (NB) codes, or using typed';
 26 |     print '                            prefix (TP) or binomial (TB, default) codes';
 27 |     exit();
 28 | 
 29 | if (len(sys.argv) > 1 and ("-v1" in sys.argv)) :
 30 |     config.optVerbosity = 1;
 31 | elif (len(sys.argv) > 1 and ("-v2" in sys.argv)) :
 32 |     config.optVerbosity = 2;
 33 | if (len(sys.argv) > 1 and ("-v3" in sys.argv)) :
 34 |     config.optVerbosity = 3;
 35 | 
 36 | t0 = time()
 37 | 
 38 | gFilename = sys.argv[1];
 39 | g = Graph();
 40 | g.load(gFilename);
 41 | 
 42 | 
 43 | if config.optVerbosity > 1 : print "- graph loaded."
 44 | 
 45 | m = Model();
 46 | 
 47 | errorEnc = config.optDefaultError;
 48 | if (len(sys.argv) > 1 and ("-eNP" in sys.argv or "-NP'" in sys.argv)) :
 49 |     errorEnc = "NP";
 50 | elif (len(sys.argv) > 1 and ("-eNB" in sys.argv or "-NB" in sys.argv)) :
 51 |     errorEnc = "NB";
 52 | elif (len(sys.argv) > 1 and ("-eTP" in sys.argv or "-TP" in sys.argv)) :
 53 |     errorEnc = "TP";
 54 | elif (len(sys.argv) > 1 and ("-eTB" in sys.argv or "-TB" in sys.argv)) :
 55 |     errorEnc = "TB";
 56 |         
 57 | if config.optVerbosity > 1 : print "- calculating L(M_0,G)"
 58 | (l_total_0, l_model_0, l_error_0, E_0) = L(g,m, errorEnc);
 59 | if config.optVerbosity > 1 : print "- calculated L(M_0,G)"
 60 | print "   \t" + "L(G,M)" + "\tL(M)" + "\tL(E)" + "\t#E+" + "\t#E-" + "\t\t#Ex";
 61 | print "M_0:\t" + '%.0f' % l_total_0 + "\t" + '%.0f' % l_model_0 + "\t" + '%.0f' %  l_error_0 + "\t" + str(E_0.numModellingErrors) + '/' + str(E_0.numCellsCovered) + '\t' + str(E_0.numUnmodelledErrors)  + '/' + str(((E_0.numNodes * E_0.numNodes)-E_0.numNodes) - E_0.numCellsCovered) + '\t' + str(E_0.numCellsExcluded);
 62 | 
 63 | 
 64 | if len(sys.argv) > 2 and sys.argv[2][0] != '-' :
 65 |     mFilename = sys.argv[2];
 66 |     m.load(mFilename);
 67 |     if config.optVerbosity > 1 : print "- M_x loaded."
 68 |     (l_total_x, l_model_x, l_error_x, E_x) = L(g,m, errorEnc);
 69 |     print "M_x:\t" + '%.0f' % l_total_x + "\t" + '%.0f' % l_model_x + "\t" + '%.0f' % l_error_x + "\t" + str(E_x.numModellingErrors) + '/' + str(E_x.numCellsCovered) + '\t' + str(E_x.numUnmodelledErrors)  + '/' + str(((E_x.numNodes * E_x.numNodes)-E_x.numNodes) - E_x.numCellsCovered) + '\t\t' + str(E_x.numCellsExcluded);
 70 |     
 71 |     l_total_all = l_total_x;
 72 |     lines = [];
 73 |     lines_all = [];
 74 |     l_total_prev = l_total_0;
 75 |     times = 1;
 76 |     maxStructs = m.numStructs;
 77 |     
 78 |     mFilename_list = mFilename.split('/');
 79 |     mFilename_main = mFilename_list[len(mFilename_list) - 1];
 80 |     print '%s' % mFilename_main
 81 |     #mFilenameGreedy = 'greedySelection_' + mFilename_main;
 82 |     #fgreedy = open(mFilenameGreedy,'w')
 83 |     #mFilenameGreedyCost = 'greedySelection_costs_' + mFilename_main;
 84 |     mFilenameTotalCost = 'greedyScan_totalCosts_' + mFilename_main;
 85 |     #fgreedyCost = open(mFilenameGreedyCost,'w')
 86 |     ftotalCost = open(mFilenameTotalCost,'w')    
 87 | 
 88 |     #fgreedyCost.write("%.0f\n" % l_total_0 )
 89 |     ftotalCost.write("%.0f\n" % l_total_0 )
 90 | 
 91 |     while times <= maxStructs :
 92 |        print "time\t" + '%.0f' % times;
 93 |        lines.append(times);
 94 |        m = Model();
 95 |        m.loadLines(mFilename, lines);
 96 |        (l_total_x, l_model_x, l_error_x, E_x) = L(g,m, errorEnc);
 97 |        print "M_x:\t" + '%.0f' % l_total_x + "\t" + '%.0f' % l_model_x + "\t" + '%.0f' % l_error_x + "\t" + str(E_x.numModellingErrors) + '/' + str(E_x.numCellsCovered) + '\t' + str(E_x.numUnmodelledErrors)  + '/' + str(((E_x.numNodes * E_x.numNodes)-E_x.numNodes) - E_x.numCellsCovered) + '\t\t' + str(E_x.numCellsExcluded);
 98 |        ftotalCost.write("%.0f\n" % l_total_x )
 99 |        times = times + 1; 
100 |        
101 |     ftotalCost.close();
102 |     #return l_total_x;
103 | 
104 |     #print " -= ", l_total_0 - l_total_x, "\t" + str(l_model_0 - l_model_x), "\t" + str(l_error_0 - l_error_x), "\t" + str(E_0.numModellingErrors - E_x.numModellingErrors), "\t" + str(E_0.numUnmodelledErrors - E_x.numUnmodelledErrors);
105 |     #print " %= ", "%.2f\t\t%.2f\t%.2f\t\t%.2f" % ((l_total_x / l_total_0 * 100), (l_model_x / l_model_0 * 100), (l_error_x / l_error_0 * 100), (E_x.numModellingErrors / E_0.numModellingErrors * 100));
106 | 
107 | if (len(sys.argv) > 3 and "-pG" in sys.argv) :
108 |     print "Adjacency matrix:";
109 |     g.plot();
110 | 
111 | if (len(sys.argv) > 3 and "-pC" in sys.argv) :
112 |     print "Cover matrix:";
113 |     E_x.plotCover();
114 | 
115 | if (len(sys.argv) > 3 and "-pE" in sys.argv) :
116 |     print "Error matrix:";    
117 |     E_x.plotError();
118 | 
119 | if (len(sys.argv) > 3 and "-lC" in sys.argv) :
120 |     print "Cover list:";
121 |     E_x.listCover();
122 | 
123 | if (len(sys.argv) > 3 and "-lE" in sys.argv) :
124 |     print "Error list:";    
125 |     E_x.listError();
126 |     
127 | t1 = time()
128 | #print 'function vers1 takes %f' %(t1-t0)
129 | 


--------------------------------------------------------------------------------
/DATA/cliqueStarClique.out:
--------------------------------------------------------------------------------
  1 | 1,2,1
  2 | 1,3,1
  3 | 1,4,1
  4 | 1,5,1
  5 | 1,6,1
  6 | 1,7,1
  7 | 1,8,1
  8 | 1,9,1
  9 | 1,10,1
 10 | 1,11,1
 11 | 1,12,1
 12 | 1,13,1
 13 | 1,14,1
 14 | 1,15,1
 15 | 1,16,1
 16 | 1,17,1
 17 | 1,18,1
 18 | 1,19,1
 19 | 1,20,1
 20 | 2,3,1
 21 | 2,4,1
 22 | 2,5,1
 23 | 2,6,1
 24 | 2,7,1
 25 | 2,8,1
 26 | 2,9,1
 27 | 2,10,1
 28 | 2,11,1
 29 | 2,12,1
 30 | 2,13,1
 31 | 2,14,1
 32 | 2,15,1
 33 | 2,16,1
 34 | 2,17,1
 35 | 2,18,1
 36 | 2,19,1
 37 | 2,20,1
 38 | 3,4,1
 39 | 3,5,1
 40 | 3,6,1
 41 | 3,7,1
 42 | 3,8,1
 43 | 3,9,1
 44 | 3,10,1
 45 | 3,11,1
 46 | 3,12,1
 47 | 3,13,1
 48 | 3,14,1
 49 | 3,15,1
 50 | 3,16,1
 51 | 3,17,1
 52 | 3,18,1
 53 | 3,19,1
 54 | 3,20,1
 55 | 4,5,1
 56 | 4,6,1
 57 | 4,7,1
 58 | 4,8,1
 59 | 4,9,1
 60 | 4,10,1
 61 | 4,11,1
 62 | 4,12,1
 63 | 4,13,1
 64 | 4,14,1
 65 | 4,15,1
 66 | 4,16,1
 67 | 4,17,1
 68 | 4,18,1
 69 | 4,19,1
 70 | 4,20,1
 71 | 5,6,1
 72 | 5,7,1
 73 | 5,8,1
 74 | 5,9,1
 75 | 5,10,1
 76 | 5,11,1
 77 | 5,12,1
 78 | 5,13,1
 79 | 5,14,1
 80 | 5,15,1
 81 | 5,16,1
 82 | 5,17,1
 83 | 5,18,1
 84 | 5,19,1
 85 | 5,20,1
 86 | 6,7,1
 87 | 6,8,1
 88 | 6,9,1
 89 | 6,10,1
 90 | 6,11,1
 91 | 6,12,1
 92 | 6,13,1
 93 | 6,14,1
 94 | 6,15,1
 95 | 6,16,1
 96 | 6,17,1
 97 | 6,18,1
 98 | 6,19,1
 99 | 6,20,1
100 | 7,8,1
101 | 7,9,1
102 | 7,10,1
103 | 7,11,1
104 | 7,12,1
105 | 7,13,1
106 | 7,14,1
107 | 7,15,1
108 | 7,16,1
109 | 7,17,1
110 | 7,18,1
111 | 7,19,1
112 | 7,20,1
113 | 8,9,1
114 | 8,10,1
115 | 8,11,1
116 | 8,12,1
117 | 8,13,1
118 | 8,14,1
119 | 8,15,1
120 | 8,16,1
121 | 8,17,1
122 | 8,18,1
123 | 8,19,1
124 | 8,20,1
125 | 9,10,1
126 | 9,11,1
127 | 9,12,1
128 | 9,13,1
129 | 9,14,1
130 | 9,15,1
131 | 9,16,1
132 | 9,17,1
133 | 9,18,1
134 | 9,19,1
135 | 9,20,1
136 | 10,11,1
137 | 10,12,1
138 | 10,13,1
139 | 10,14,1
140 | 10,15,1
141 | 10,16,1
142 | 10,17,1
143 | 10,18,1
144 | 10,19,1
145 | 10,20,1
146 | 11,12,1
147 | 11,13,1
148 | 11,14,1
149 | 11,15,1
150 | 11,16,1
151 | 11,17,1
152 | 11,18,1
153 | 11,19,1
154 | 11,20,1
155 | 12,13,1
156 | 12,14,1
157 | 12,15,1
158 | 12,16,1
159 | 12,17,1
160 | 12,18,1
161 | 12,19,1
162 | 12,20,1
163 | 13,14,1
164 | 13,15,1
165 | 13,16,1
166 | 13,17,1
167 | 13,18,1
168 | 13,19,1
169 | 13,20,1
170 | 14,15,1
171 | 14,16,1
172 | 14,17,1
173 | 14,18,1
174 | 14,19,1
175 | 14,20,1
176 | 15,16,1
177 | 15,17,1
178 | 15,18,1
179 | 15,19,1
180 | 15,20,1
181 | 16,17,1
182 | 16,18,1
183 | 16,19,1
184 | 16,20,1
185 | 17,18,1
186 | 17,19,1
187 | 17,20,1
188 | 18,19,1
189 | 18,20,1
190 | 19,20,1
191 | 21,20,1
192 | 21,19,1
193 | 21,18,1
194 | 21,22,1
195 | 21,23,1
196 | 21,24,1
197 | 21,25,1
198 | 21,26,1
199 | 21,27,1
200 | 21,28,1
201 | 21,29,1
202 | 27,28,1
203 | 27,29,1
204 | 27,30,1
205 | 27,31,1
206 | 27,32,1
207 | 27,33,1
208 | 27,34,1
209 | 27,35,1
210 | 27,36,1
211 | 27,37,1
212 | 27,38,1
213 | 27,39,1
214 | 27,40,1
215 | 27,41,1
216 | 27,42,1
217 | 27,43,1
218 | 27,44,1
219 | 27,45,1
220 | 27,46,1
221 | 27,47,1
222 | 27,48,1
223 | 27,49,1
224 | 27,50,1
225 | 27,51,1
226 | 28,29,1
227 | 28,30,1
228 | 28,31,1
229 | 28,32,1
230 | 28,33,1
231 | 28,34,1
232 | 28,35,1
233 | 28,36,1
234 | 28,37,1
235 | 28,38,1
236 | 28,39,1
237 | 28,40,1
238 | 28,41,1
239 | 28,42,1
240 | 28,43,1
241 | 28,44,1
242 | 28,45,1
243 | 28,46,1
244 | 28,47,1
245 | 28,48,1
246 | 28,49,1
247 | 28,50,1
248 | 28,51,1
249 | 29,30,1
250 | 29,31,1
251 | 29,32,1
252 | 29,33,1
253 | 29,34,1
254 | 29,35,1
255 | 29,36,1
256 | 29,37,1
257 | 29,38,1
258 | 29,39,1
259 | 29,40,1
260 | 29,41,1
261 | 29,42,1
262 | 29,43,1
263 | 29,44,1
264 | 29,45,1
265 | 29,46,1
266 | 29,47,1
267 | 29,48,1
268 | 29,49,1
269 | 29,50,1
270 | 29,51,1
271 | 30,31,1
272 | 30,32,1
273 | 30,33,1
274 | 30,34,1
275 | 30,35,1
276 | 30,36,1
277 | 30,37,1
278 | 30,38,1
279 | 30,39,1
280 | 30,40,1
281 | 30,41,1
282 | 30,42,1
283 | 30,43,1
284 | 30,44,1
285 | 30,45,1
286 | 30,46,1
287 | 30,47,1
288 | 30,48,1
289 | 30,49,1
290 | 30,50,1
291 | 30,51,1
292 | 31,32,1
293 | 31,33,1
294 | 31,34,1
295 | 31,35,1
296 | 31,36,1
297 | 31,37,1
298 | 31,38,1
299 | 31,39,1
300 | 31,40,1
301 | 31,41,1
302 | 31,42,1
303 | 31,43,1
304 | 31,44,1
305 | 31,45,1
306 | 31,46,1
307 | 31,47,1
308 | 31,48,1
309 | 31,49,1
310 | 31,50,1
311 | 31,51,1
312 | 32,33,1
313 | 32,34,1
314 | 32,35,1
315 | 32,36,1
316 | 32,37,1
317 | 32,38,1
318 | 32,39,1
319 | 32,40,1
320 | 32,41,1
321 | 32,42,1
322 | 32,43,1
323 | 32,44,1
324 | 32,45,1
325 | 32,46,1
326 | 32,47,1
327 | 32,48,1
328 | 32,49,1
329 | 32,50,1
330 | 32,51,1
331 | 33,34,1
332 | 33,35,1
333 | 33,36,1
334 | 33,37,1
335 | 33,38,1
336 | 33,39,1
337 | 33,40,1
338 | 33,41,1
339 | 33,42,1
340 | 33,43,1
341 | 33,44,1
342 | 33,45,1
343 | 33,46,1
344 | 33,47,1
345 | 33,48,1
346 | 33,49,1
347 | 33,50,1
348 | 33,51,1
349 | 34,35,1
350 | 34,36,1
351 | 34,37,1
352 | 34,38,1
353 | 34,39,1
354 | 34,40,1
355 | 34,41,1
356 | 34,42,1
357 | 34,43,1
358 | 34,44,1
359 | 34,45,1
360 | 34,46,1
361 | 34,47,1
362 | 34,48,1
363 | 34,49,1
364 | 34,50,1
365 | 34,51,1
366 | 35,36,1
367 | 35,37,1
368 | 35,38,1
369 | 35,39,1
370 | 35,40,1
371 | 35,41,1
372 | 35,42,1
373 | 35,43,1
374 | 35,44,1
375 | 35,45,1
376 | 35,46,1
377 | 35,47,1
378 | 35,48,1
379 | 35,49,1
380 | 35,50,1
381 | 35,51,1
382 | 36,37,1
383 | 36,38,1
384 | 36,39,1
385 | 36,40,1
386 | 36,41,1
387 | 36,42,1
388 | 36,43,1
389 | 36,44,1
390 | 36,45,1
391 | 36,46,1
392 | 36,47,1
393 | 36,48,1
394 | 36,49,1
395 | 36,50,1
396 | 36,51,1
397 | 37,38,1
398 | 37,39,1
399 | 37,40,1
400 | 37,41,1
401 | 37,42,1
402 | 37,43,1
403 | 37,44,1
404 | 37,45,1
405 | 37,46,1
406 | 37,47,1
407 | 37,48,1
408 | 37,49,1
409 | 37,50,1
410 | 37,51,1
411 | 38,39,1
412 | 38,40,1
413 | 38,41,1
414 | 38,42,1
415 | 38,43,1
416 | 38,44,1
417 | 38,45,1
418 | 38,46,1
419 | 38,47,1
420 | 38,48,1
421 | 38,49,1
422 | 38,50,1
423 | 38,51,1
424 | 39,40,1
425 | 39,41,1
426 | 39,42,1
427 | 39,43,1
428 | 39,44,1
429 | 39,45,1
430 | 39,46,1
431 | 39,47,1
432 | 39,48,1
433 | 39,49,1
434 | 39,50,1
435 | 39,51,1
436 | 40,41,1
437 | 40,42,1
438 | 40,43,1
439 | 40,44,1
440 | 40,45,1
441 | 40,46,1
442 | 40,47,1
443 | 40,48,1
444 | 40,49,1
445 | 40,50,1
446 | 40,51,1
447 | 41,42,1
448 | 41,43,1
449 | 41,44,1
450 | 41,45,1
451 | 41,46,1
452 | 41,47,1
453 | 41,48,1
454 | 41,49,1
455 | 41,50,1
456 | 41,51,1
457 | 42,43,1
458 | 42,44,1
459 | 42,45,1
460 | 42,46,1
461 | 42,47,1
462 | 42,48,1
463 | 42,49,1
464 | 42,50,1
465 | 42,51,1
466 | 43,44,1
467 | 43,45,1
468 | 43,46,1
469 | 43,47,1
470 | 43,48,1
471 | 43,49,1
472 | 43,50,1
473 | 43,51,1
474 | 44,45,1
475 | 44,46,1
476 | 44,47,1
477 | 44,48,1
478 | 44,49,1
479 | 44,50,1
480 | 44,51,1
481 | 45,46,1
482 | 45,47,1
483 | 45,48,1
484 | 45,49,1
485 | 45,50,1
486 | 45,51,1
487 | 46,47,1
488 | 46,48,1
489 | 46,49,1
490 | 46,50,1
491 | 46,51,1
492 | 47,48,1
493 | 47,49,1
494 | 47,50,1
495 | 47,51,1
496 | 48,49,1
497 | 48,50,1
498 | 48,51,1
499 | 49,50,1
500 | 49,51,1
501 | 50,51,1
502 | 


--------------------------------------------------------------------------------
/MDL/error.py:
--------------------------------------------------------------------------------
  1 | from graph import Graph;
  2 | 
  3 | class Error :
  4 |     numNodes = 0;
  5 | 
  6 |     # 1s present in G but not in M    
  7 |     numUnmodelledErrors = 0;
  8 |     unmodelled = [];
  9 |     numUnmodelledErrorsOld = 0;
 10 |     unmodelledOld = [];
 11 |     
 12 | 
 13 |     # incorrect cell values in M wrt G
 14 |     numModellingErrors = 0;
 15 |     modelled = [];
 16 |     numModellingErrorsOld = 0;
 17 |     modelledOld = [];
 18 | 
 19 |     # number of unique cells in M
 20 |     numCellsCovered = 0;
 21 |     covered = [];
 22 |     numCellsCoveredOld = 0;
 23 |     coveredOld = [];
 24 | 
 25 |     # number of cells directly encoded by M, no error possible
 26 |     numCellsExcluded = 0;
 27 |     excluded = [];
 28 |     numCellsExcludedOld = 0;
 29 |     excludedOld = [];
 30 | 
 31 |     
 32 |     def __init__(self, graph, err = None):
 33 | 
 34 |         if err is None :
 35 |             self.numNodes = graph.numNodes;
 36 | 
 37 |             self.unmodelled = [set(x) for x in graph.edges];
 38 |             self.numUnmodelledErrors = graph.numEdges;
 39 | 
 40 |             self.modelled = [set() for x in range(len(graph.edges))];
 41 |             self.numModellingErrors = 0;
 42 |         
 43 |             self.covered = [set() for i in range(self.numNodes)];
 44 |             self.numCellsCovered = 0;
 45 | 
 46 |             self.excluded = [set() for i in range(self.numNodes)];
 47 |             self.numCellsExcluded = 0;
 48 |         else :
 49 |             self.numNodes = err.numNodes;
 50 | 
 51 |             self.unmodelled = [set(x) for x in err.unmodelled];
 52 |             self.numUnmodelledErrors = err.numUnmodelledErrors;
 53 | 
 54 |             self.modelled = [set(x) for x in err.modelled];
 55 |             self.numModellingErrors = err.numModellingErrors;
 56 | 
 57 |             self.covered = [set(x) for x in err.covered];
 58 |             self.numCellsCovered = err.numCellsCovered;
 59 | 
 60 |             self.excluded = [set(x) for x in err.excluded];
 61 |             self.numCellsExcluded = err.numCellsExcluded;
 62 | 
 63 |     
 64 |     def recoverOld(self):
 65 |         self.numNodes = self.numNodesOld;
 66 |         
 67 |         self.unmodelled = self.unmodelledOld; 
 68 |         self.numUnmodelledErrors = self.numUnmodelledErrorsOld;
 69 | 
 70 |         self.modelled = self.modelledOld;
 71 |         self.numModellingErrors = self.numModellingErrorsOld;
 72 |         
 73 |         self.covered = self.coveredOld;
 74 |         self.numCellsCovered = self.numCellsCoveredOld;
 75 | 
 76 |         self.excluded = self.excludedOld;
 77 |         self.numCellsExcluded = self.numCellsExcludedOld;
 78 |  
 79 |        
 80 | 
 81 |     # checks whether edge (i,j) is covered
 82 |     def isModelled(self, i, j) :
 83 |         return (max(i,j)-1 in self.covered[min(i,j)-1]);
 84 |     def isCovered(self, i, j) :
 85 |         return self.isModelled(i,j);
 86 |         
 87 |     # annotates edge (i,j) as covered
 88 |     # ! (i,j) does not have to be in E of G(V,E)
 89 |     def cover(self, i, j) :
 90 |         self.covered[min(i,j)-1].add(max(i,j)-1);
 91 |         self.numCellsCovered += 1;
 92 |         return;
 93 | 
 94 |     # annotates edge (i,j) as both covered, and error-free
 95 |     # ! (i,j) does not have to be in E of G(V,E)
 96 |     def coverAndExclude(self, i, j) :
 97 |         self.cover(i,j)
 98 |         self.exclude(i,j);
 99 |         return;
100 |         
101 |     def exclude(self, i, j) :
102 |         self.excluded[min(i,j)-1].add(max(i,j)-1);
103 |         self.numCellsExcluded += 1;
104 |         return;
105 |         
106 |     def isError(self, i, j):
107 |         return max(i,j)-1 in self.unmodelled[min(i,j)-1] or max(i,j)-1 in self.modelled[min(i,j)-1];
108 |         
109 |     def isExcluded(self, i, j):
110 |         return max(i,j)-1 in self.excluded[min(i,j)-1];
111 |         
112 |     def isUnmodelledError(self, i, j):
113 |         return max(i,j)-1 in self.unmodelled[min(i,j)-1];
114 |     def isUnmodelledEdge(self, i, j):
115 |         return self.isUnmodelledError(i,j);
116 | 
117 |     def isModellingError(self, i, j):
118 |         return max(i,j)-1 in self.modelled[min(i,j)-1];
119 | 
120 |     # annotates edge (i,j) as correct
121 |     def delError(self, i, j) :
122 |         if self.isUnmodelledError(i,j) :
123 |             self.delUnmodelledError(i,j);
124 |         else :
125 |             self.delModellingError(i,j);      
126 | 
127 |     # annotates edge (i,j) as not-modelled
128 |     def addUnmodelledError(self, i, j) :
129 |         self.unmodelled[min(i,j)-1].add(max(i,j)-1);
130 |         self.numUnmodelledErrors += 1;
131 |         
132 |     # annotates edge (i,j) as correctly modelled
133 |     def delUnmodelledError(self, i, j) :
134 |         self.unmodelled[min(i,j)-1].remove(max(i,j)-1);
135 |         self.numUnmodelledErrors -= 1;
136 | 
137 |     # annotates edge (i,j) as erronously modelled
138 |     def addModellingError(self, i, j) :
139 |         self.modelled[min(i,j)-1].add(max(i,j)-1);
140 |         self.numModellingErrors += 1;
141 | 
142 |     # annotates edge (i,j) as incorrectly modelled
143 |     def delModellingError(self, i, j) :
144 |         self.modelled[min(i,j)-1].remove(max(i,j)-1);
145 |         self.numModellingErrors -= 1;
146 | 
147 | 
148 |     def plotCover(self):
149 |         for idx in range(len(self.covered)) :
150 |             mystr = "".join(["." for x in range(0,idx+1)]);
151 |             for idy in range(idx+1,len(self.covered)) :
152 |                 if idy in self.covered[idx] :
153 |                     mystr += "1";
154 |                 else :
155 |                     mystr += "-";
156 |             print mystr;
157 | 
158 |     def plotError(self):
159 |         for idx in range(len(self.unmodelled)) : # uses 'unmodelled' only as numNodes
160 |             mystr = "".join(["." for x in range(0,idx+1)]);
161 |             for idy in range(idx+1,len(self.unmodelled)) :
162 |                 if idy in self.covered[idx] :
163 |                     if idy in self.excluded[idx] :
164 |                         mystr += "*";
165 |                     elif idy in self.modelled[idx] :
166 |                         mystr += "+";
167 |                     else :
168 |                         mystr += "-";
169 |                 else :
170 |                     if idy in self.unmodelled[idx] :
171 |                         mystr += "1";
172 |                     else :
173 |                         mystr += "0";
174 |             print mystr;
175 | 
176 |     def plotExcluded(self):
177 |         for idx in range(len(self.excluded)) :
178 |             mystr = "".join(["." for x in range(0,idx+1)]);
179 |             for idy in range(idx+1,len(self.excluded)) :
180 |                 if idy in self.excluded[idx] :
181 |                     mystr += "1";
182 |                 else :
183 |                     mystr += "0";
184 |             print mystr;
185 | 
186 |     def listCover(self):
187 |         print self.covered;
188 |     
189 |     def listError(self):
190 |         for idx in range(len(self.unmodelled)) :
191 |             if len(self.unmodelled[idx]) > 0 :
192 |                 print idx+1, "+: "+str([x+1 for x in self.unmodelled[idx]]), "-: "+str([x+1 for x in self.modelled[idx]]),;
193 | 
194 |     def listExcluded(self):
195 |         print self.excluded;
196 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/compute_encodingCost.m:
--------------------------------------------------------------------------------
  1 | function [ MDLcost ] = compute_encodingCost( subgraph, N_tot, n_sub, E, n_sub2, nb_edges, ca)
  2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  3 | %% Computation of the local encoding cost of a given substructure:        %
  4 | %   INPUTS:                                                               %
  5 | %   subgraph: 'fc', 'nc', 'st', 'ch', 'bc', 'err'                               %
  6 | %   N_tot:  total number of nodes in the whole graph                      %
  7 | %   n_sub: number of nodes in the given substructure OR number of nodes   %
  8 | %           in the first set of a 'bc' (k)                                %
  9 | %   E         : error matrix                                              %
 10 | %   n_sub2:   optional - number of nodes in the second set of a 'bc' (l)  %
 11 | %   nb_edges:   optional -  edges *between* the two sets of               %
 12 | %                        the near-bipartite core                          %
 13 | %   ca:       true if cross-association is used (encoding of nc)          %
 14 | %  ---------------                                                        %
 15 | %  OUTPUTS:                                                               %
 16 | %   MDLcost = model_cost + error cost                                     %
 17 | %  Author: Danai Koutra                                                   %
 18 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 19 | 
 20 | if nargin < 7
 21 |     ca = false;
 22 | end
 23 | 
 24 | test_error_edges(E);
 25 | 
 26 | switch subgraph
 27 |     case 'fc'
 28 |         if E(1) == 0 || E(2) == 0 % no excluded edges
 29 |             MDLcost = LN( n_sub ) + l2cnk(N_tot, n_sub); %log2( nchoosek(vpi(N_tot), n_sub) );
 30 |         else
 31 |             MDLcost = LN( n_sub ) + l2cnk(N_tot, n_sub) + Lnu_opt(E);  %log2( nchoosek(vpi(N_tot), n_sub) ) + Lnu_opt( E );
 32 |         end
 33 |     case 'nc'
 34 |         % for the near clique: E is the Asmall matrix
 35 |         edges_inc = nnz(E);
 36 |         if ca == true % cross-association for bipartite graph (rectangular mat)
 37 |             edges_exc = size(E,1)*size(E,2)-nnz(E);%sum(E(:)==0); % computing the mdl cost of a bipartite graph encoded as near clique
 38 |         else
 39 |             edges_exc = size(E,1)*size(E,2)-nnz(E);%sum(E(:)==0)-n_sub; % diagonal elements are always 0 (no self-loops)
 40 |         end
 41 |         if edges_exc ~= 0 && edges_inc ~= 0
 42 |             MDLcost = LN( n_sub ) + l2cnk(N_tot, n_sub) + ... %log2(nchoosek(vpi(N_tot), n_sub)) + ...
 43 |                 log2( n_sub^2 ) + edges_inc * NLL( edges_inc, edges_exc, 1) + ...
 44 |                 edges_exc * NLL( edges_inc, edges_exc, 0);
 45 |         else
 46 |             MDLcost = LN( n_sub ) + log2(nchoosek(vpi(N_tot), n_sub));
 47 |         end
 48 |     case 'st'
 49 |         if E(1) == 0 || E(2) == 0 %if sum(sum(E)) == 0
 50 |             MDLcost = LN( n_sub-1 ) + log2( N_tot ) +  l2cnk(N_tot-1, n_sub-1); %log2( nchoosek( vpi(N_tot-1), n_sub-1 ) );
 51 |         else
 52 |             MDLcost = LN( n_sub-1 ) + log2( N_tot ) + ...
 53 |                 l2cnk(N_tot-1, n_sub-1) + ...  %log2( nchoosek( vpi(N_tot-1), n_sub-1 ) ) + ...
 54 |                 Lnu_opt( E );
 55 |         end
 56 |     case 'ch'
 57 |         x = 0:(n_sub-1);
 58 |         N_tot_vec = N_tot * ones(1, n_sub);
 59 |         if E(1) == 0 || E(2) == 0 %if sum(sum(E)) == 0
 60 |             MDLcost = LN( n_sub-1 ) + sum( log2(N_tot_vec - x) );
 61 |         else
 62 |             MDLcost =  LN( n_sub-1 ) + sum( log2(N_tot_vec - x) ) + Lnu_opt( E );
 63 |         end
 64 |     case 'bc'
 65 |         k = n_sub;
 66 |         l = n_sub2;
 67 |         if E(1) == 0 || E(2) == 0 %if sum(sum(E)) == 0
 68 |             MDLcost = LN(k) + LN(l) + l2cnk(N_tot, k) + l2cnk(N_tot-k, l); %log2( nchoosek(vpi(N_tot), k) ) + log2( nchoosek(vpi(N_tot-k), l) );
 69 |         else
 70 |             MDLcost = LN(k) + LN(l) + l2cnk(N_tot, k) + ...            %log2( nchoosek(vpi(N_tot), k) ) + ...
 71 |                 l2cnk(N_tot-k, l)        + ...      %log2( nchoosek(vpi(N_tot-k), l) ) + ...
 72 |                 Lnu_opt( E );
 73 |         end
 74 |     case 'nb'
 75 |         k = n_sub;
 76 |         l = n_sub2;
 77 |         if E(1) == 0 || strcmp('ca', true) || E(2) == 0 %if sum(sum(E)) == 0
 78 |             MDLcost = LN(k) + LN(l) + l2cnk(N_tot, k) + l2cnk(N_tot-k, l);         % log2( nchoosek(vpi(N_tot), k) ) + log2( nchoosek(vpi(N_tot-k), l) );
 79 |         else
 80 |             % The error matrix E has only the edges *within* the two sets
 81 |             % of nodes.
 82 |             edges_inc = nb_edges(1); %nnz(nb_mat);
 83 |             edges_exc = nb_edges(2); %2*k*l - edges_inc; % the bipartite core model has 2*k*l edges (all the edges between the two sets of nodes - counting them 2 times, because the adjacency matrix is symmetric)
 84 |             if edges_inc == 0 || edges_exc == 0
 85 |                 MDLcost = LN(k) + LN(l) + l2cnk(N_tot, k) + l2cnk(N_tot-k, l) +  Lnu_opt( E);  ... %log2( nchoosek(vpi(N_tot), k) ) + ...
 86 |                     %log2( nchoosek(vpi(N_tot-k), l) ) + Lnu_opt( E);
 87 |             else
 88 |                 MDLcost = LN(k) + LN(l) + l2cnk(N_tot, k) + l2cnk(N_tot-k, l) + ...         %log2( nchoosek(vpi(N_tot), k) ) + log2( nchoosek(vpi(N_tot-k), l) ) + 
 89 |                     log2( (n_sub+n_sub2)^2 ) + ...
 90 |                     edges_inc * NLL( edges_inc, edges_exc, 1) + ...
 91 |                     edges_exc * NLL( edges_inc, edges_exc, 0) + Lnu_opt( E );
 92 |                 
 93 |             end
 94 |         end
 95 |     case 'err'
 96 |         if E(1) ~= 0 && E(2) ~= 0
 97 |             MDLcost = Lnu_opt( E );
 98 |         elseif E(1) ~= 0 
 99 |             MDLcost = LN( E(1) );
100 |         elseif E(2) ~= 0
101 |             MDLcost = LN( E(2) );
102 |         end
103 |     otherwise
104 |         error_message = 'error: invalid structure...\n'
105 | end
106 | 
107 | testMDLcost(MDLcost);
108 | 
109 | %% encoded size of an integer >=1 as by Rissanen's 1983 Universal code for integers
110 |     function [ c ] = LN( n )
111 |         c0 = 2.865064;
112 |         c = log2(c0);
113 |         logTerm = log2(n);
114 |         while logTerm > 0
115 |             c = c + logTerm;
116 |             logTerm = log2(logTerm);
117 |         end
118 |     end
119 | 
120 | %% error per structure: Naive Uniform
121 |     function [ c_err ] = Lnu( E )
122 |         Einc = nnz(E);
123 |         Eexc = sum(E(:)==0);
124 |         c_err = LN( Einc ) + ...
125 |             Einc * NLL( Einc, Eexc, 1) + ...
126 |             Eexc * NLL( Einc, Eexc, 0);
127 |     end
128 | 
129 | %% error per full clique: Naive Uniform
130 |     function [ c_err ] = Lnu_opt( E )
131 |         % E has two entries: # of included edges, # of excluded edges
132 |         Einc = E(1);
133 |         Eexc = E(2);
134 |         c_err = LN( Einc ) + ...
135 |             Einc * NLL( Einc, Eexc, 1) + ...
136 |             Eexc * NLL( Einc, Eexc, 0);
137 |     end
138 | 
139 | 
140 | %% Alternative error per structure: Naive Data-to-Model
141 |     function [ c_err ] = Lnd( E )
142 |         Einc = nnz(E);
143 |         c_err = LN( Einc ) + l2cnk (N_tot^2, Einc);   %log2( nchoosek(vpi(N_tot^2), Einc) );
144 |     end
145 | 
146 | 
147 | %% Alternative error per structure: Naive Data-to-Model
148 |     function [ c_err ] = Lnd_opt( E )
149 |         Einc = E(1);
150 |         c_err = LN( Einc ) + l2cnk (N_tot^2, Einc);  %log2( nchoosek(vpi(N_tot^2), Einc) );
151 |     end
152 | 
153 | %% Negative log-likelihood
154 | % If sub = 0: p0 = -log2(excl / (incl + excl))
155 | % if sub = 1: p1 = -log2(incl / (incl + excl))
156 |     function [ l ] = NLL( incl, excl, sub )
157 |         if sub == 0
158 |             l = -log2(excl / (incl + excl));
159 |         elseif sub == 1
160 |             l = -log2(incl / (incl + excl));
161 |         else
162 |             err = 'error... Can only compute l0 ot l1 (negative log-likelihood)'
163 |         end
164 |     end
165 | 
166 | end
167 | 
168 | 


--------------------------------------------------------------------------------
/MDL/mdl.py:
--------------------------------------------------------------------------------
  1 | import config;
  2 | import mdl_base;
  3 | import mdl_structs;
  4 | import mdl_error;
  5 | from copy import deepcopy;
  6 | 
  7 | from math import log,factorial;
  8 | from error import Error;
  9 | from graph import Graph;
 10 | from model import Model;
 11 | 
 12 | from mdl_base import *;
 13 | from mdl_structs import *;
 14 | from mdl_error import *;
 15 | 
 16 | 
 17 | ### Our Encoding Starts Here ###
 18 | 
 19 | ### Total Encoded Size
 20 | def L(G, M, errorEnc): 
 21 |     E = Error(G); # initially, everything is error, nothing is covered
 22 |     error_cost = 0;
 23 |  
 24 |    
 25 | 
 26 |     model_cost = LN(M.numStructs+1);    # encode number of structures we're encoding with
 27 |     model_cost += LwC(M.numStructs, M.numStrucTypes);            # encode the number per structure
 28 | 
 29 |     # encode the structure-type identifier per type
 30 |     if M.numFullCliques > 0 :
 31 |         model_cost += M.numFullCliques * log(M.numFullCliques / float(M.numStructs), 2);
 32 |     if M.numNearCliques  > 0 :
 33 |         model_cost += M.numNearCliques * log(M.numNearCliques / float(M.numStructs), 2);
 34 |     if M.numChains > 0 :
 35 |         model_cost += M.numChains * log(M.numChains / float(M.numStructs), 2);
 36 |     if M.numStars > 0 :
 37 |         model_cost += M.numStars * log(M.numStars / float(M.numStructs), 2);
 38 |     # off-diagonals
 39 |     if M.numFullOffDiagonals > 0 :
 40 |         model_cost += M.numFullOffDiagonals * log(M.numFullOffDiagonals / float(M.numStructs), 2);
 41 |     if M.numNearOffDiagonals > 0 :
 42 |         model_cost += M.numNearOffDiagonals * log(M.numNearOffDiagonals / float(M.numStructs), 2);
 43 |     # bipartite-cores
 44 |     if M.numBiPartiteCores > 0 :
 45 |         model_cost += M.numBiPartiteCores * log(M.numBiPartiteCores / float(M.numStructs), 2);
 46 |     if M.numNearBiPartiteCores > 0 :
 47 |         model_cost += M.numNearBiPartiteCores * log(M.numNearBiPartiteCores / float(M.numStructs), 2);
 48 |     if M.numJellyFishes > 0 :
 49 |         model_cost += M.numJellyFishes * log(M.numJellyFishes / float(M.numStructs), 2);
 50 |     if M.numCorePeripheries > 0 :
 51 |         model_cost += M.numCorePeripheries * log(M.numCorePeripheries / float(M.numStructs), 2);
 52 | 
 53 |     # encode the structures
 54 |     for struc in M.structs :
 55 |         if struc.isFullClique() :
 56 |             model_cost += LfullClique(struc,M,G,E);
 57 |         elif struc.isNearClique() :
 58 |             model_cost += LnearClique(struc,M,G,E);
 59 |         elif struc.isChain() :
 60 |             model_cost += Lchain(struc,M,G,E);
 61 |         elif struc.isStar() :
 62 |             model_cost += Lstar(struc,M,G,E);
 63 |         elif struc.isCorePeriphery() :
 64 |             model_cost += LcorePeriphery(struc,M,G,E);
 65 |         elif struc.isJellyFish() :
 66 |             model_cost += LjellyFish(struc,M,G,E);
 67 |         elif struc.isBiPartiteCore() :
 68 |             model_cost += LbiPartiteCore(struc,M,G,E);
 69 |         elif struc.isNearBiPartiteCore() :
 70 |             model_cost += LnearBiPartiteCore(struc,M,G,E);
 71 |         elif struc.isFullOffDiagonal() :
 72 |             model_cost += LfullOffDiagonal(struc,M,G,E);
 73 |         elif struc.isNearOffDiagonal() :
 74 |             model_cost += LnearOffDiagonal(struc,M,G,E);
 75 |     
 76 |     # encode the error
 77 |     error_cost += 0 if E.numCellsCovered == 0 else log(E.numCellsCovered, 2);    # encode number of additive Errors
 78 |     if ((G.numNodes * G.numNodes - G.numNodes) / 2) - E.numCellsCovered > 0 :
 79 |         error_cost += log(((G.numNodes * G.numNodes - G.numNodes) / 2) - E.numCellsCovered, 2);    # encode number of Errors
 80 |         
 81 |     if errorEnc == "NP" :
 82 |         error_cost += LErrorNaivePrefix(G,M,E);
 83 |     elif errorEnc == "NB" :
 84 |         error_cost += LErrorNaiveBinom(G,M,E);
 85 |     elif errorEnc == "TP" :
 86 |         error_cost += LErrorTypedPrefix(G,M,E);
 87 |     elif errorEnc == "TB" :
 88 |         error_cost += LErrorTypedBinom(G,M,E);
 89 |     
 90 |     total_cost = model_cost + error_cost;
 91 |     
 92 |     return (total_cost, model_cost, error_cost, E);
 93 | 
 94 |     
 95 |     
 96 | ### Total Encoded Size for the greedy heuristic -- incrementally update the MDL cost
 97 | ## for the newly added stucture 'struc'
 98 | def Lgreedy(G, M, errorEnc, time, struc, totalCostOld, Eold, model_cost_struct): 
 99 |     
100 |     if time == 1:
101 |         E = Error(G); # initially, everything is error, nothing is covered
102 |         #E.saveOld();
103 |         # the cost for encoding each structure (to avoid recomputing it for the greedy updates)
104 |         model_cost2 = 0;
105 |     else :
106 |         E = Error(G, Eold);
107 |         #E.deepish_copy(Eold);
108 |         #E = copy.deepcopy(Eold);
109 |         #E = Eold;
110 |         # the cost for encoding each structure separately 
111 |         # Just update the up-to-now cost by adding the cost of the new structure
112 |         model_cost2 = model_cost_struct;
113 | 
114 |     error_cost = 0;
115 |     
116 |     model_cost = LN(M.numStructs+1);    # encode number of structures we're encoding with
117 |     model_cost += LwC(M.numStructs, M.numStrucTypes);            # encode the number per structure
118 | 
119 |     # encode the structure-type identifier per type
120 |     if M.numFullCliques > 0 :
121 |         model_cost += M.numFullCliques * log(M.numFullCliques / float(M.numStructs), 2);
122 |     if M.numNearCliques  > 0 :
123 |         model_cost += M.numNearCliques * log(M.numNearCliques / float(M.numStructs), 2);
124 |     if M.numChains > 0 :
125 |         model_cost += M.numChains * log(M.numChains / float(M.numStructs), 2);
126 |     if M.numStars > 0 :
127 |         model_cost += M.numStars * log(M.numStars / float(M.numStructs), 2);
128 |     # off-diagonals
129 |     if M.numFullOffDiagonals > 0 :
130 |         model_cost += M.numFullOffDiagonals * log(M.numFullOffDiagonals / float(M.numStructs), 2);
131 |     if M.numNearOffDiagonals > 0 :
132 |         model_cost += M.numNearOffDiagonals * log(M.numNearOffDiagonals / float(M.numStructs), 2);
133 |     # bipartite-cores
134 |     if M.numBiPartiteCores > 0 :
135 |         model_cost += M.numBiPartiteCores * log(M.numBiPartiteCores / float(M.numStructs), 2);
136 |     if M.numNearBiPartiteCores > 0 :
137 |         model_cost += M.numNearBiPartiteCores * log(M.numNearBiPartiteCores / float(M.numStructs), 2);
138 |     if M.numJellyFishes > 0 :
139 |         model_cost += M.numJellyFishes * log(M.numJellyFishes / float(M.numStructs), 2);
140 |     if M.numCorePeripheries > 0 :
141 |         model_cost += M.numCorePeripheries * log(M.numCorePeripheries / float(M.numStructs), 2);
142 | 
143 |     # encode the structures
144 |     if struc.isFullClique() :
145 |         model_cost2 += LfullClique(struc,M,G,E);
146 |     elif struc.isNearClique() :
147 |         model_cost2 += LnearClique(struc,M,G,E);
148 |     elif struc.isChain() :
149 |         model_cost2 += Lchain(struc,M,G,E);
150 |     elif struc.isStar() :
151 |         model_cost2 += Lstar(struc,M,G,E);
152 |     elif struc.isCorePeriphery() :
153 |         model_cost2 += LcorePeriphery(struc,M,G,E);
154 |     elif struc.isJellyFish() :
155 |         model_cost2 += LjellyFish(struc,M,G,E);
156 |     elif struc.isBiPartiteCore() :
157 |         model_cost2 += LbiPartiteCore(struc,M,G,E);
158 |     elif struc.isNearBiPartiteCore() :
159 |         model_cost2 += LnearBiPartiteCore(struc,M,G,E);
160 |     elif struc.isFullOffDiagonal() :
161 |         model_cost2 += LfullOffDiagonal(struc,M,G,E);
162 |     elif struc.isNearOffDiagonal() :
163 |         model_cost2 += LnearOffDiagonal(struc,M,G,E);
164 |     
165 |     # encode the error
166 |     error_cost += 0 if E.numCellsCovered == 0 else log(E.numCellsCovered, 2);    # encode number of additive Errors
167 |     if ((G.numNodes * G.numNodes - G.numNodes) / 2) - E.numCellsCovered > 0 :
168 |         error_cost += log(((G.numNodes * G.numNodes - G.numNodes) / 2) - E.numCellsCovered, 2);    # encode number of Errors
169 |  
170 |     if errorEnc == "NP" :
171 |         error_cost += LErrorNaivePrefix(G,M,E);
172 |     elif errorEnc == "NB" :
173 |         error_cost += LErrorNaiveBinom(G,M,E);
174 |     elif errorEnc == "TP" :
175 |         error_cost += LErrorTypedPrefix(G,M,E);
176 |     elif errorEnc == "TB" :
177 |         error_cost += LErrorTypedBinom(G,M,E);
178 |     
179 |     total_cost = model_cost + model_cost2 + error_cost;
180 |     model_cost_total = model_cost + model_cost2;    
181 | 
182 |     return (total_cost, model_cost_total, model_cost2, error_cost, E);
183 | 
184 |     
185 |     
186 |     
187 |     
188 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/SlashBurnEncode.m:
--------------------------------------------------------------------------------
  1 | %  Author: Danai Koutra
  2 | %  Adaptation and extension of U Kang's code for SlashBurn 
  3 | %   (http://www.cs.cmu.edu/~ukang/papers/sb_icdm2011.pdf)
  4 | %
  5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  6 | %                                                                           %
  7 | % SlashBurn Encode: encode graph using SlashBurn                            %
  8 | %                                                                           %
  9 | % Parameter                                                                 %
 10 | %   AOrig : adjacency matrix of a graph. We assume symmetric matrix with    %
 11 | %           both upper- and lower- diagonal elements are set.               %
 12 | %   k : # of nodes to cut in SlashBurn                                      %
 13 | %   outfile : file name to output the model                                 %
 14 | %   info : true for detailed output (encoding gain reported)                %
 15 | %          false for brief output (no encoding gain reported)               %
 16 | %   starOption: true for encoding the vicinities of top degree nodes as     %
 17 | %                     stars                                                 %
 18 | %               false for encoding these vicinities as stars, nc or fc      %
 19 | %               (depending on the smallest mdl cost)                        %
 20 | %   minSize: minimum size of structure that we want to encode               %
 21 | %   graphFile: path to the edge file                                        %
 22 | %                                                                           %
 23 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 24 | 
 25 | function [ ] = SlashBurnEncode(AOrig, k, outFolder, info, starOption, minSize, graphFile )
 26 | 
 27 | %addpath('./VariablePrecisionIntegers/VariablePrecisionIntegers');
 28 | 
 29 | %% Definition of global variables:
 30 | %  model:
 31 | global model;
 32 | global model_idx;
 33 | 
 34 | dir=0;
 35 | % cost of encoding all the structures
 36 | cost_ALLencoded_struct = 0;
 37 | % if greedy is selected, all_costs has all the costs by adding one extra
 38 | % structure for encoding
 39 | all_costs = 0;
 40 | 
 41 | %if nargin < 3
 42 | %    info = false;
 43 | %end
 44 | [~, fname, ~] = fileparts(graphFile);
 45 | allOutFile = sprintf('%s/%s_ALL.model', outFolder, fname);
 46 | outfile_ordered = sprintf('%s/%s_orderedALL.model', outFolder, fname);
 47 | % Open 'outfile' for writing
 48 | out_fid = fopen(allOutFile, 'w');
 49 | 
 50 | % Initialize variables
 51 | gccsize = zeros(0,0);
 52 | niter=0;
 53 | n = max(size(AOrig,1),size(AOrig,2));
 54 | AOrig(n,n)=0;
 55 | totalind = zeros(1,n);
 56 | cur_lpos = 1;
 57 | cur_rpos = n;
 58 | gccind = [1:n];
 59 | cur_gccsize = n;
 60 | total_SB_stars = 0;
 61 | encoded_SB_stars = 0;
 62 | total_cost = 0;
 63 | 
 64 | if info == true
 65 |     info = false
 66 |     changingYourOption = 'Setting info to false, so that we can compute the encoding cost of all the found structures'
 67 | end
 68 | 
 69 | tic 
 70 | 
 71 | while niter == 0 || cur_gccsize > k
 72 |     niter = niter+1;
 73 |     fprintf('Iteration %d...\n', niter);
 74 |     
 75 |     A = AOrig(gccind,gccind);
 76 |     [disind,newgccind,topind] = RemHdegreeGccEncode(A, k, dir, out_fid, gccind, n, info, minSize);
 77 |     % save 'star' structures
 78 |     star_cores = topind;
 79 |     for i=1:size(star_cores, 2)
 80 |         E = zeros(1,2);
 81 |         cur_center = star_cores(i);
 82 |         
 83 |         satellites = find(A(cur_center, :)>0);
 84 |         
 85 |         % If the structure has less than minSize nodes, do not report it in the
 86 |         % model file
 87 |         if length(satellites) < minSize
 88 |             continue;
 89 |         end
 90 |         
 91 |         n_star = length(satellites) + 1;
 92 |         Asmall = A([cur_center, satellites],[cur_center, satellites]);
 93 |         MDLcostNC = compute_encodingCost( 'nc', n, n_star, Asmall);
 94 |         % 1s in the error matrix
 95 |         % missing edges in star + extra edges not in star
 96 |         E(1) = 2* (n_star-1-nnz(A(cur_center,satellites))) + nnz(A(satellites, satellites));
 97 |         % 0s in the error matrix
 98 |         E(2) = n_star^2 - E(1);
 99 |         
100 |         cost_notEnc = compute_encodingCost( 'err', 0, 0, [nnz(Asmall) n_star^2-nnz(Asmall)]);
101 |         
102 |         test_error_edges(E);
103 |         
104 |         % MDL cost of encoding given substructure as a star
105 |         MDLcostST = compute_encodingCost( 'st', n, n_star, E);
106 |         total_SB_stars = total_SB_stars + 1;
107 |         
108 |         if isinf(MDLcostNC) || isinf(MDLcostST)
109 |             costGain = 0;
110 |             costGain_notEnc = 0;
111 |         else
112 |             costGain = MDLcostNC - MDLcostST;
113 |             costGain_notEnc = cost_notEnc - MDLcostST;
114 |         end
115 |         
116 |         % encode the vicinities of high-deg nodes as stars
117 |         if starOption == true
118 |             fprintf( out_fid, 'st %d,', gccind(cur_center));
119 |             fprintf( out_fid, ' %d', gccind(satellites) );
120 |             encoded_SB_stars = encoded_SB_stars + 1;
121 |             
122 |             
123 |             if info == false
124 |                 fprintf( out_fid, '\n');
125 |             else
126 |                 fprintf( out_fid, ', %f | %f -- SB \n', costGain, costGain_notEnc);
127 |             end
128 |             model_idx = model_idx + 1;
129 |             model(model_idx) = struct('code', 'st', 'edges', 0, 'nodes1', gccind(cur_center), 'nodes2', gccind(satellites), 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
130 |             % check which of the structures is best for encoding: star, fc, nc
131 |         elseif starOption == false
132 |             % 0s in the error matrix  --- edges included in the structure (full clique)
133 |             E(2) = nnz(Asmall);
134 |             % 1s in the error matrix  --- edges excluded from the structure (full clique)
135 |             E(1) = n_star^2 - n_star - E(2);
136 |             
137 |             % MDL cost of encoding given substructure as a full clique
138 |             MDLcostFC = compute_encodingCost( 'fc', n, n_star, E);
139 |             MDLcosts = [ MDLcostFC, MDLcostNC, MDLcostST ];
140 |             [minCost minIdx] = min(MDLcosts);
141 |             top_gccind = sort([gccind(cur_center), gccind(satellites)]);
142 |             curind = 1:size(top_gccind,2);
143 |             switch minIdx
144 |                 case 1
145 |                     costGain = MDLcostNC - MDLcostFC;
146 |                     costGain_notEnc = cost_notEnc - MDLcostFC;
147 |                     encodeAsFClique( curind, top_gccind, costGain, costGain_notEnc, out_fid, info );
148 |                 case 2
149 |                     costGain = MDLcostNC - MDLcostNC;
150 |                     costGain_notEnc = cost_notEnc - MDLcostNC;
151 |                     m = nnz(Asmall);
152 |                     encodeAsNClique( curind, top_gccind, m, costGain, costGain_notEnc, out_fid, info );
153 |                 case 3
154 |                     fprintf( out_fid, 'st %d,', gccind(cur_center));
155 |                     for j=1:size(satellites,2)
156 |                         fprintf( out_fid, ' %d', gccind(satellites(j)) );
157 |                     end
158 |                     
159 |                     if info == false
160 |                         fprintf( out_fid, '\n');
161 |                     else
162 |                         fprintf( out_fid, ', %f | %f -- SB \n', costGain, costGain_notEnc);
163 |                     end
164 |                     model_idx = model_idx + 1;
165 |                     model(model_idx) = struct('code', 'st', 'edges', 0, 'nodes1', gccind(cur_center), 'nodes2', gccind(satellites), 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
166 |                     encoded_SB_stars = encoded_SB_stars + 1;
167 |             end
168 |             
169 |         else
170 |             wrongMessage = 'starOption should be true or false. Invalid value given.'
171 |             return
172 |         end
173 |     end
174 |     
175 |     % save structures on the disconnected components
176 |     
177 |     % reorganize the matrix
178 |     topind_size = size(topind, 2);
179 |     
180 |     totalind(cur_lpos:cur_lpos + topind_size - 1) = gccind(topind);
181 |     cur_lpos = cur_lpos + topind_size;
182 |     totalind(cur_rpos - size(disind,2) + 1:cur_rpos) = gccind(disind);
183 |     cur_rpos = cur_rpos - size(disind,2);
184 |     
185 |     gccind = gccind(newgccind);
186 |     cur_gccsize = size(gccind, 2);
187 |     
188 | end
189 | 
190 | if k > 1 && cur_gccsize >= 2
191 |     EncodeSubgraph(AOrig(gccind,gccind), [1:size(gccind,2)], gccind, n, out_fid, info, minSize);
192 | end
193 | 
194 | %% Selection of structures:
195 | % Method 1: top 10
196 | % Method 2: greedy selection
197 | 
198 | 
199 | [~, order] = sort([model(:).benefit_notEnc], 'descend');
200 | model_ordered = model(order);
201 | printModel(model_ordered, outfile_ordered);
202 | all_costs = 0;
203 | all_costs_incStruct = 0;
204 | 
205 | runtime = toc
206 | time_stored = sprintf('%s/%s_runtime.txt', outFolder, fname);
207 | save(time_stored, 'runtime', '-ascii');
208 | 
209 | disp('=== Graph decomposition and structure labeling: finished! ===')
210 | 
211 | fclose(out_fid);
212 | 
213 | end
214 | 


--------------------------------------------------------------------------------
/MDL/greedySearch_nStop.py:
--------------------------------------------------------------------------------
  1 | #!/usr/local/bin/python2.6
  2 | 
  3 | #########################################################################
  4 | #                                                                       #
  5 | # Implementation of the GreedyNForget heuristic described in the paper  #
  6 | # VOG: Summarizing and Understanding Large Graphs                       #
  7 | # by Danai Koutra, U Kang, Jilles Vreeken, Christos Faloutsos           #
  8 | # http://www.cs.cmu.edu/~dkoutra/papers/VoG.pdf                         #
  9 | #                                                                       #
 10 | # v1.1: faster implementation using dynamic programming and the         #
 11 | #       technique of memoization. Now, we can add to the summary many   #
 12 | #       more structures much faster (structLim = 10000+)                #
 13 | #                                                                       #
 14 | # v1: very slow, naive implementation, computing the MDL encoding cost  #
 15 | #     from scratch for each structure addition (structLim = 200)        #
 16 | #                                                                       #
 17 | #########################################################################
 18 | 
 19 | 
 20 | import sys
 21 | import os
 22 | import config
 23 | import copy
 24 | 
 25 | from time import time
 26 | 
 27 | from mdl import *;
 28 | from error import Error;
 29 | from graph import Graph;
 30 | from model import *;
 31 | from random import shuffle;
 32 | #from description_length import *;
 33 | 
 34 | if len(sys.argv) <= 1 :
 35 |     print 'at least: <graph.graph> [model.model] [-pC] [-lC] [-pE] [-lE] [-e{NP,NB,TP,TB}]';
 36 |     print ' optional argument model = file to read model from, otherwise only empty model';
 37 |     print ' optional argument -vX    = verbosity (1, 2, or 3)';
 38 |     print ' optional argument -pG    = plot Graph adjacency matrix';
 39 |     print ' optional argument -pC    = plot Cover matrix';
 40 |     print ' optional argument -pE    = plot Error matrix';
 41 |     print ' optional argument -lC    = list Cover entries';
 42 |     print ' optional argument -lE    = list Error entries';
 43 |     print ' optional argument -eXX   = encode error resp. untyped using prefix (NP), or';
 44 |     print '                            binomial (NB) codes, or using typed';
 45 |     print '                            prefix (TP) or binomial (TB, default) codes';
 46 |     exit();
 47 | 
 48 | if (len(sys.argv) > 1 and ("-v1" in sys.argv)) :
 49 |     config.optVerbosity = 1;
 50 | elif (len(sys.argv) > 1 and ("-v2" in sys.argv)) :
 51 |     config.optVerbosity = 2;
 52 | if (len(sys.argv) > 1 and ("-v3" in sys.argv)) :
 53 |     config.optVerbosity = 3;
 54 | 
 55 | t0 = time()
 56 | 
 57 | gFilename = sys.argv[1];
 58 | g = Graph();
 59 | g.load(gFilename);
 60 | 
 61 | 
 62 | if config.optVerbosity > 1 : print "- graph loaded."
 63 | 
 64 | m = Model();
 65 | 
 66 | 
 67 | errorEnc = config.optDefaultError;
 68 | if (len(sys.argv) > 1 and ("-eNP" in sys.argv or "-NP'" in sys.argv)) :
 69 |     errorEnc = "NP";
 70 | elif (len(sys.argv) > 1 and ("-eNB" in sys.argv or "-NB" in sys.argv)) :
 71 |     errorEnc = "NB";
 72 | elif (len(sys.argv) > 1 and ("-eTP" in sys.argv or "-TP" in sys.argv)) :
 73 |     errorEnc = "TP";
 74 | elif (len(sys.argv) > 1 and ("-eTB" in sys.argv or "-TB" in sys.argv)) :
 75 |     errorEnc = "TB";
 76 |         
 77 | if config.optVerbosity > 1 : print "- calculating L(M_0,G)"
 78 | (l_total_0, l_model_0, l_error_0, E_0) = L(g,m, errorEnc);
 79 | if config.optVerbosity > 1 : print "- calculated L(M_0,G)"
 80 | print "   \t" + "L(G,M)" + "\tL(M)" + "\tL(E)" + "\t#E+" + "\t#E-" + "\t\t#Ex";
 81 | print "M_0:\t" + '%.0f' % l_total_0 + "\t" + '%.0f' % l_model_0 + "\t" + '%.0f' %  l_error_0 + "\t" + str(E_0.numModellingErrors) + '/' + str(E_0.numCellsCovered) + '\t' + str(E_0.numUnmodelledErrors)  + '/' + str(((E_0.numNodes * E_0.numNodes)-E_0.numNodes) - E_0.numCellsCovered) + '\t' + str(E_0.numCellsExcluded);
 82 | 
 83 | 
 84 | if len(sys.argv) > 2 and sys.argv[2][0] != '-' :
 85 |     mFilename = sys.argv[2];
 86 |     m.load(mFilename);
 87 |     print "Number of structures in the model: %.0f" % m.numStructs;
 88 |     if config.optVerbosity > 1 : print "- M_x loaded."
 89 |     (l_total_x, l_model_x, l_error_x, E_x) = L(g,m, errorEnc);
 90 |     print "M_x:\t" + '%.0f' % l_total_x + "\t" + '%.0f' % l_model_x + "\t" + '%.0f' % l_error_x + "\t" + str(E_x.numModellingErrors) + '/' + str(E_x.numCellsCovered) + '\t' + str(E_x.numUnmodelledErrors)  + '/' + str(((E_x.numNodes * E_x.numNodes)-E_x.numNodes) - E_x.numCellsCovered) + '\t\t' + str(E_x.numCellsExcluded);
 91 |     
 92 |     # reinitialize the model for the greedy approach
 93 |     m = Model();
 94 | 
 95 |     # maximum number of structures considered
 96 |     structLim = 10000;
 97 |     # read maxStructs structures from the model file and save it in modelContent
 98 |     mHandle = open(mFilename, 'r')
 99 |     mContent = mHandle.readlines();  #(structLim);
100 | #    print mContent[0]
101 | #    print "length of model file: %.0f" % len(mContent);
102 |     maxStructs = len(mContent);
103 | #    print "length of model file: %.0f" % maxStructs;
104 |     lines_all = [];
105 | #    shuffle(mContent);
106 | #    print mContent[0];
107 | 
108 |     l_total_prev = l_total_0;
109 |     # the encoding costper structure is 0 initially
110 |     lmodel_struct_prev = 0;
111 |     E_x = E_0; 
112 |     E_x_old = E_0;
113 |     structsInSummary = [];
114 |     times = 1;
115 |     
116 |     mFilename_list = mFilename.split('/');
117 |     mFilename_main = mFilename_list[len(mFilename_list) - 1];
118 |     print '%s' % mFilename_main
119 |     mFilenameGreedy = 'heuristicSelection_nStop_ALL_' + mFilename_main;
120 |     fgreedy = open(mFilenameGreedy,'w')
121 |     mFilenameGreedyCost = 'heuristic_Selection_costs_ALL_' + mFilename_main;
122 |     fgreedyCost = open(mFilenameGreedyCost,'w')
123 | 
124 |     fgreedyCost.write("l_total_0: %.0f\n" % l_total_0 )
125 | 
126 |     # number of structures in the summary
127 |     kept_struct = 0;
128 | 
129 |     while times <= maxStructs : #min(structLim, maxStructs) :  # add upto structLim structures or as many as there are in the model file  
130 |        print "time\t" + '%.0f' % times;
131 |        # add to the model the new structure
132 |        newStruct = m.loadLine(mContent, times-1);
133 |        (l_total_x, l_model_x, l_model_struct, l_error_x, E_x) = Lgreedy(g, m, errorEnc, times, newStruct, l_total_prev, E_x_old, lmodel_struct_prev);
134 |        print "M_x:\t" + '%.0f' % l_total_x + "\t" + '%.0f' % l_model_x + "\t" + '%.0f' % l_error_x + "\t" + str(E_x.numModellingErrors) + '/' + str(E_x.numCellsCovered) + '\t' + str(E_x.numUnmodelledErrors)  + '/' + str(((E_x.numNodes * E_x.numNodes)-E_x.numNodes) - E_x.numCellsCovered) + '\t\t' + str(E_x.numCellsExcluded);
135 |        # print "l_total_x %.0f" % l_total_x + "l_total_prev %.0f" % l_total_prev;
136 |        if l_total_x > l_total_prev :
137 |           print "dropped the structure";
138 |           l_total_x = l_total_prev;
139 | 	  # remove the last added structure
140 |           # print "structs in model %.0f " % m.numStructs;
141 |           m.rmStructure(newStruct);
142 |           # print "structs in model %.0f " % m.numStructs;
143 |           #E_x = copy.deepcopy(); #E_x.recoverOld(); 
144 |           print "-----------------------------------------------------------"
145 |        else : 
146 |           # print "kept the structure";
147 |           kept_struct += 1;
148 |           # print "structs in model %.0f " % m.numStructs;
149 |           # save the Error matrix to this point
150 |           #E_x.saveOld();
151 |           #E_x_old = copy.deepcopy(E_x);
152 |           E_x_old = E_x; 
153 |           #E_x_old = Error(g);
154 |           #E_x_old.deepish_copy(E_x, g);
155 |           
156 |           l_total_prev = l_total_x;
157 |           # update the up-to-now cost per structure
158 |           lmodel_struct_prev = l_model_struct;
159 |           structsInSummary.append(times);
160 |           fgreedyCost.write("Time %.0f" % times + "\t%.0f\n" % l_total_x )
161 |           print "-----------------------------------------------------------"
162 |           if kept_struct == structLim :
163 |              break;
164 |        if times == 50 or times % 100 == 0 :
165 |           mFilenameGreedyTemp = 'greedySelection_' + str(times) + '_' + mFilename_main;
166 |           fgreedyTemp = open(mFilenameGreedyTemp, 'w');
167 |           fgreedyTemp.write("Structures of model in the summary (each number is the corresponding line number of the structure in the model file)\n");
168 |           for line in structsInSummary:
169 |               # fgreedyTemp.write("%s" % line + "\t%s" % mContent[line]);
170 |               fgreedyTemp.write("%s\n" % line);
171 |        times += 1; 
172 | 	
173 |     print "structs in model %.0f " % m.numStructs;
174 | 
175 |     for line in structsInSummary:
176 |        fgreedy.write("%s\n" % line)
177 |     
178 |     fgreedy.close();
179 |     fgreedyCost.close();
180 | 
181 | 
182 | if (len(sys.argv) > 3 and "-pG" in sys.argv) :
183 |     print "Adjacency matrix:";
184 |     g.plot();
185 | 
186 | if (len(sys.argv) > 3 and "-pC" in sys.argv) :
187 |     print "Cover matrix:";
188 |     E_x.plotCover();
189 | 
190 | if (len(sys.argv) > 3 and "-pE" in sys.argv) :
191 |     print "Error matrix:";    
192 |     E_x.plotError();
193 | 
194 | if (len(sys.argv) > 3 and "-lC" in sys.argv) :
195 |     print "Cover list:";
196 |     E_x.listCover();
197 | 
198 | if (len(sys.argv) > 3 and "-lE" in sys.argv) :
199 |     print "Error list:";    
200 |     E_x.listError();
201 | 
202 | print time()-t0    
203 | print "Total running time %.2f" % (time()-t0);
204 | 
205 | mHandle.close()
206 | 


--------------------------------------------------------------------------------
/STRUCTURE_DISCOVERY/ExactStructure.m:
--------------------------------------------------------------------------------
  1 | % Encode the connected component from SlashBurn.
  2 | function [ exact_found ] = ExactStructure( Asmall, curind, top_gccind, N_tot, out_fid, info, minSize )
  3 | 
  4 | global model;
  5 | global model_idx;
  6 | 
  7 | % Asmall = B(curind,curind);
  8 | 
  9 | exact_found = false;
 10 | n = size(curind, 2);
 11 | m = nnz(Asmall);
 12 | 
 13 | if n==1
 14 |     return;
 15 | end
 16 | %fprintf('n=%d, m=%d\n', n, m);
 17 | 
 18 | % cost of encoding the structure as near-clique
 19 | MDLcost_nc = compute_encodingCost( 'nc', N_tot, n, Asmall);
 20 | % cost of not encoding the structure at all (noise)
 21 | cost_notEnc = compute_encodingCost( 'err', 0, 0, [nnz(Asmall) n^2-nnz(Asmall)]);
 22 | 
 23 | if ( m == n*n - n )      % full clique
 24 |     if n ~= 2
 25 |         MDLcost_fc = compute_encodingCost( 'fc', N_tot, n, zeros(n,n));
 26 |         costGain = MDLcost_nc - MDLcost_fc;
 27 |         costGain_notEnc = cost_notEnc - MDLcost_fc;
 28 |         fprintf(out_fid, 'fc');
 29 |         for i=1:size(curind, 2)
 30 |             fprintf(out_fid, ' %d', top_gccind( curind(i) ) );
 31 |         end
 32 |         if info == false
 33 |             fprintf(out_fid, '\n');
 34 |         else
 35 |             fprintf(out_fid, ', %f | %f -- exact \n', costGain, costGain_notEnc);
 36 |         end
 37 |         exact_found = true;
 38 |         model_idx = model_idx + 1;
 39 |         model(model_idx) = struct('code', 'fc', 'edges', 0, 'nodes1', top_gccind(curind), 'nodes2', [], 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
 40 |         %entries = size(model, 2);
 41 |         %model(entries+1) = struct('code', 'fc', 'nodes1', top_gccind(curind), 'nodes2', [], 'benefit', costGain);
 42 |     elseif n==2
 43 |         MDLcost_ch = compute_encodingCost( 'ch', N_tot, n, zeros(n,n));
 44 |         costGain = MDLcost_nc - MDLcost_ch;
 45 |         costGain_notEnc = cost_notEnc - MDLcost_ch;
 46 |         fprintf(out_fid, 'ch');
 47 |         fprintf(out_fid, ' %d', top_gccind( curind(1:2) ));
 48 |         if info == false
 49 |             fprintf(out_fid, '\n');
 50 |         else
 51 |             fprintf(out_fid, ', %f | %f -- exact \n', costGain, costGain_notEnc);
 52 |         end
 53 |         exact_found = true;
 54 |         model_idx = model_idx + 1;
 55 |         model(model_idx) = struct('code', 'ch', 'edges', 0, 'nodes1', top_gccind(curind), 'nodes2', [], 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
 56 |         %entries = size(model, 2);
 57 |         %model(entries+1) = struct('code', 'ch', 'nodes1', top_gccind(curind), 'nodes2', [], 'benefit', costGain);
 58 |     end
 59 | elseif (m == 2*(n-1))   % chain or star
 60 |     degree = sum(Asmall);
 61 |     ind = find(degree > 0);
 62 |     d1count=0;
 63 |     d2count=0;
 64 |     dn1count=0;
 65 |     
 66 |     for i=1:size(degree, 2)
 67 |         if( degree(i) == 1 )
 68 |             d1count = d1count + 1;
 69 |         elseif degree(i) == 2
 70 |             d2count = d2count + 1;
 71 |         elseif degree(i) == n-1
 72 |             dn1count = dn1count + 1;
 73 |         end
 74 |     end
 75 |     
 76 |     %fprintf('d1count=%d, d2count=%d, dn1count=%d\n', d1count, d2count, dn1count);
 77 |     
 78 |     if d1count == 2 && d2count == n-2     % chain
 79 |         MDLcost_ch = compute_encodingCost( 'ch', N_tot, n, zeros(n,n));
 80 |         costGain = MDLcost_nc - MDLcost_ch;
 81 |         costGain_notEnc = cost_notEnc - MDLcost_ch;
 82 |         fprintf(out_fid, 'ch');
 83 |         d1ind = find( degree == 1);
 84 |         fprintf(out_fid, ' %d', top_gccind( curind(d1ind(1)) ) );
 85 |         
 86 |         d2ind = find(degree==2);
 87 |         %for i=1:size(d2ind, 2)
 88 |         fprintf(out_fid, ' %d', top_gccind( curind(d2ind(1:size(d2ind, 2))) ) );
 89 |         %end
 90 |         
 91 |         fprintf(out_fid, ' %d', top_gccind( curind(d1ind(2) )) );
 92 |         
 93 |         if info == false
 94 |             fprintf(out_fid, '\n');
 95 |         else
 96 |             fprintf(out_fid, ', %f | %f -- exact \n', costGain, costGain_notEnc);
 97 |         end
 98 |         exact_found = true;
 99 |         model_idx = model_idx + 1;
100 |         model(model_idx) = struct('code', 'ch', 'edges', 0, 'nodes1', [top_gccind(curind(d1ind(1))) top_gccind(curind(d2ind(1:size(d2ind, 2)))) top_gccind(curind(d1ind(2)))], 'nodes2', [], 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
101 |         %entries = size(model, 2);
102 |         %model(entries+1) = struct('code', 'ch', 'nodes1', top_gccind(curind), 'nodes2', [], 'benefit', costGain);
103 |         
104 |         %     elseif d1count==n-1 & dn1count==1   % star
105 |         %         fprintf(out_fid, 'st');
106 |         %         dn1ind = find( degree == n-1);
107 |         %         fprintf(out_fid, ' %d,', top_gccind( dn1ind(1) ) );
108 |         %
109 |         %         d1ind = find(degree==2);
110 |         %         for i=1:size(d1ind, 2)
111 |         %             fprintf(out_fid, ' %d', top_gccind( d1ind(i) ) );
112 |         %         end
113 |         %
114 |         %         fprintf(out_fid, '\n');
115 |         %         exact_found = true;
116 |         %     end
117 |         % else            % near clique
118 |         %     fprintf(out_fid, 'nc %d,', m/2);
119 |         %     for i=1:size(curind, 2)
120 |         %         fprintf(out_fid, ' %d', top_gccind( curind(i) ) );
121 |         %     end
122 |         %     fprintf(out_fid, '\n');
123 |     end
124 | else
125 |     %evalmax = eigs( Asmall,1, 'LA' );
126 |     %evalmin = eigs( Asmall,1, 'SA' );
127 |     opts.tol = 1e-2; 
128 |     evals = eigs(Asmall, 2, 'lm', opts); % the eigenvalues with maximum magnitude
129 |     
130 |     if ( max(evals) == - min(evals) )  % bipartite graph (special case: star)
131 |         [ set1, set2 ] = BFScoloring( Asmall );
132 |         if length(set1)+length(set2) < minSize
133 |             exact_found = true;
134 |             return;
135 |         end
136 |         if length(set1) == 1 && length(set2) == 1
137 |             MDLcost_ch = compute_encodingCost( 'ch', N_tot, n, zeros(n,n));
138 |             costGain = MDLcost_nc - MDLcost_ch;
139 |             costGain_notEnc = cost_notEnc - MDLcost_ch;
140 |             fprintf(out_fid, 'ch');
141 |             fprintf(out_fid, ' %d', top_gccind( curind([set1, set2]) ));
142 |             if info == false
143 |                 fprintf(out_fid, '\n');
144 |             else
145 |                 fprintf(out_fid, ', %f | %f -- exact \n', costGain, costGain_notEnc);
146 |             end
147 |             exact_found = true;
148 |             model_idx = model_idx + 1;
149 |             model(model_idx) = struct('code', 'ch', 'edges', 0, 'nodes1', top_gccind(curind([set1, set2])), 'nodes2', [], 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
150 |         elseif length(set1) == 1
151 |             MDLcost_st = compute_encodingCost( 'st', N_tot, n, zeros(n,n));
152 |             costGain = MDLcost_nc - MDLcost_st;
153 |             costGain_notEnc = cost_notEnc - MDLcost_st;
154 |             fprintf(out_fid, 'st %d,', top_gccind( curind(set1) ));
155 |             fprintf(out_fid, ' %d', top_gccind( curind(set2) ) );
156 |             if info == false
157 |                 fprintf(out_fid, '\n');
158 |             else
159 |                 fprintf(out_fid, ', %f | %f -- exact \n', costGain, costGain_notEnc);
160 |             end
161 |             exact_found = true;
162 |             model_idx = model_idx + 1;
163 |             model(model_idx) = struct('code', 'fc', 'edges', 0, 'nodes1', top_gccind(curind), 'nodes2', [], 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
164 |             %entries = size(model, 2);
165 |             %model(entries+1) = struct('code', 'st', 'nodes1', top_gccind(curind(set1)), 'nodes2', top_gccind(curind(set2)), 'benefit', costGain);
166 |         elseif length(set2) == 1
167 |             MDLcost_st = compute_encodingCost( 'st', N_tot, n, zeros(n,n));
168 |             costGain = MDLcost_nc - MDLcost_st;
169 |             costGain_notEnc = cost_notEnc - MDLcost_st;
170 |             fprintf(out_fid, 'st %d,', top_gccind( curind(set2) ));
171 |             fprintf(out_fid, ' %d', top_gccind( curind(set1) ) );
172 |             if info == false
173 |                 fprintf(out_fid, '\n');
174 |             else
175 |                 fprintf(out_fid, ', %f | %f -- exact \n', costGain, costGain_notEnc);
176 |             end
177 |             exact_found = true;
178 |             model_idx = model_idx + 1;
179 |             model(model_idx) = struct('code', 'st', 'edges', 0, 'nodes1', top_gccind(curind(set1)), 'nodes2', top_gccind(curind(set1)), 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
180 |             %entries = size(model, 2);
181 |             %model(entries+1) = struct('code', 'st', 'nodes1', top_gccind(curind(set2)), 'nodes2', top_gccind(curind(set1)), 'benefit', costGain);
182 |         else % bipartite graph
183 |             degrees = sum(Asmall,2);
184 |             % First check if it is bipartite core: The degrees of the nodes
185 |             % in the first set should be equal to the number of nodes in
186 |             % the second set, and vice versa.
187 |             if sum(full(degrees(set1)) ~= length(set2)*ones(length(set1),1)) && ...
188 |                     sum(full(degrees(set2)) ~= length(set1)*ones(length(set2),1)) == 0
189 |                 MDLcost_bc = compute_encodingCost( 'bc', N_tot, length(set1), zeros(n,n), length(set2));
190 |                 costGain = MDLcost_nc - MDLcost_bc;
191 |                 costGain_notEnc = cost_notEnc - MDLcost_bc;
192 |                 fprintf(out_fid, 'bc');
193 |                 fprintf(out_fid, ' %d', top_gccind( curind(set1) ));
194 |                 fprintf(out_fid, ',');
195 |                 fprintf(out_fid, ' %d', top_gccind( curind(set2) ) );
196 |                 if info == false
197 |                     fprintf(out_fid, '\n');
198 |                 else
199 |                     fprintf(out_fid, ', %f | %f -- exact \n', costGain, costGain_notEnc);
200 |                 end
201 |                 exact_found = true;
202 |                 model_idx = model_idx + 1;
203 |                 model(model_idx) = struct('code', 'bc', 'edges', 0, 'nodes1', top_gccind(curind(set1)), 'nodes2', top_gccind(curind(set2)), 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
204 |             else
205 |                 % it's not a bipartite core (full bipartite graph) -
206 |                 % However, it is a bipartite graph. Let's see if we should
207 |                 % encode it as a bipartite core or a near bipartite core.
208 |                 MDLcost_bc = compute_encodingCost( 'bc', N_tot, length(set1), zeros(n,n), length(set2));
209 |                 MDLcost_nb = compute_encodingCost( 'nb', N_tot, length(set1), zeros(n,n), length(set2));
210 |                 if MDLcost_bc <= MDLcost_nb
211 |                     costGain = MDLcost_nc - MDLcost_bc;
212 |                     costGain_notEnc = cost_notEnc - MDLcost_bc;
213 |                     fprintf(out_fid, 'bc');
214 |                     fprintf(out_fid, ' %d', top_gccind( curind(set1) ));
215 |                     fprintf(out_fid, ',');
216 |                     fprintf(out_fid, ' %d', top_gccind( curind(set2) ) );
217 |                     if info == false
218 |                         fprintf(out_fid, '\n');
219 |                     else
220 |                         fprintf(out_fid, ', %f | %f -- not exact \n', costGain, costGain_notEnc);
221 |                     end
222 |                     exact_found = true;
223 |                     model_idx = model_idx + 1;
224 |                     model(model_idx) = struct('code', 'bc', 'edges', 0, 'nodes1', top_gccind(curind(set1)), 'nodes2', top_gccind(curind(set2)), 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
225 |                 else % better to encode it as near-bipartite core
226 |                     costGain = MDLcost_nc - MDLcost_nb;
227 |                     costGain_notEnc = cost_notEnc - MDLcost_nb;
228 |                     fprintf(out_fid, 'nb');
229 |                     fprintf(out_fid, ' %d', top_gccind( curind(set1) ));
230 |                     fprintf(out_fid, ',');
231 |                     fprintf(out_fid, ' %d', top_gccind( curind(set2) ) );
232 |                     if info == false
233 |                         fprintf(out_fid, '\n');
234 |                     else
235 |                         fprintf(out_fid, ', %f | %f -- not exact \n', costGain, costGain_notEnc);
236 |                     end
237 |                     exact_found = true;
238 |                     model_idx = model_idx + 1;
239 |                     model(model_idx) = struct('code', 'nb', 'edges', 0, 'nodes1', top_gccind(curind(set1)), 'nodes2', top_gccind(curind(set2)), 'benefit', costGain, 'benefit_notEnc', costGain_notEnc);
240 |                 end
241 |             end
242 |             
243 |             %entries = size(model, 2);
244 |             %model(entries+1) = struct('code', 'bc', 'nodes1', top_gccind(curind(set1)), 'nodes2', top_gccind(curind(set2)), 'benefit', costGain);
245 |         end
246 |         
247 |         
248 |     end
249 | end
250 | end
251 | 
252 | 
253 | 


--------------------------------------------------------------------------------
/MDL/model.py:
--------------------------------------------------------------------------------
  1 | #from math import log;
  2 | 
  3 | class Model :
  4 |     strucTypes = [];
  5 |     numStrucTypes = 0;
  6 |     structs = [];
  7 |     numStructs = 0;
  8 |     
  9 |     numFullCliques = 0;
 10 |     numNearCliques = 0;
 11 |     numFullOffDiagonals = 0;
 12 |     numNearOffDiagonals = 0;
 13 |     numChains = 0;
 14 |     numStars = 0;
 15 |     numBiPartiteCores = 0;
 16 |     numNearBiPartiteCores = 0;
 17 |     numCorePeripheries = 0;
 18 |     numJellyFishes = 0;
 19 |     
 20 |     def __init__(self):
 21 |         self.strucTypes = ["fc","nc","ch","st","bc","nb"]; #,"cp","jf","fod","nod"];
 22 |         self.numStrucTypes = len(self.strucTypes);
 23 |         self.structs = [];        
 24 |         self.numStructs = 0;
 25 | 
 26 |     def setStrucTypes(self, st) :
 27 |         self.strucTypes = st;
 28 |         self.numStrucTypes = len(self.strucTypes);
 29 |         
 30 |     # struct of type Struct
 31 |     def addStructure(self, struct) :
 32 |         self.structs.append(struct);
 33 |         self.numStructs += 1;
 34 |         
 35 |         if struct.getType() not in self.strucTypes :
 36 |             print "structure type not declared";
 37 |             
 38 |         if struct.isFullClique() :
 39 |             self.numFullCliques += 1;
 40 |         elif struct.isNearClique() :
 41 |             self.numNearCliques += 1;            
 42 |         if struct.isFullOffDiagonal() :
 43 |             self.numFullOffDiagonals+= 1;
 44 |         elif struct.isNearOffDiagonal() :
 45 |             self.numNearOffDiagonals += 1;            
 46 |         elif struct.isChain() :
 47 |             self.numChains += 1;
 48 |         elif struct.isStar() :
 49 |             self.numStars += 1;
 50 |         elif struct.isBiPartiteCore() :
 51 |             self.numBiPartiteCores += 1;        
 52 |         elif struct.isNearBiPartiteCore() :
 53 |             self.numNearBiPartiteCores += 1;
 54 |         elif struct.isCorePeriphery() :
 55 |             self.numCorePeripheries += 1;        
 56 |         elif struct.isJellyFish() :
 57 |             self.numJellyFishes += 1;        
 58 | 
 59 |     # remove structure struct
 60 |     def rmStructure(self, struct) :
 61 |         self.structs.remove(struct);
 62 |         self.numStructs -= 1;
 63 |         
 64 |         if struct.getType() not in self.strucTypes :
 65 |             print "structure type not declared";
 66 |             
 67 |         if struct.isFullClique() :
 68 |             self.numFullCliques -= 1;
 69 |         elif struct.isNearClique() :
 70 |             self.numNearCliques -= 1;            
 71 |         if struct.isFullOffDiagonal() :
 72 |             self.numFullOffDiagonals-= 1;
 73 |         elif struct.isNearOffDiagonal() :
 74 |             self.numNearOffDiagonals -= 1;            
 75 |         elif struct.isChain() :
 76 |             self.numChains -= 1;
 77 |         elif struct.isStar() :
 78 |             self.numStars -= 1;
 79 |         elif struct.isBiPartiteCore() :
 80 |             self.numBiPartiteCores -= 1;        
 81 |         elif struct.isNearBiPartiteCore() :
 82 |             self.numNearBiPartiteCores -= 1;
 83 |         elif struct.isCorePeriphery() :
 84 |             self.numCorePeripheries -= 1;        
 85 |         elif struct.isJellyFish() :
 86 |             self.numJellyFishes -= 1;        
 87 | 
 88 |     def load(self, fullpath):
 89 |         fg = open(fullpath);
 90 |         for line in fg :
 91 |             if len(line) < 4 or line[0] == "#" :
 92 |                 continue;
 93 |             struct = Structure.load(line);
 94 |             if struct != 0 :
 95 |                 self.addStructure(struct);
 96 |         return;
 97 |      
 98 |     def loadLine(self, content, lineNo):
 99 |         line = content[lineNo]; # line of the model to be added
100 |         if len(line) < 4 or line[0] == "#":
101 |             return -1;
102 |         struct = Structure.load(line);
103 |         if struct != 0 :
104 |             self.addStructure(struct);
105 | 	return struct;
106 | 
107 |     def loadLines(self, fullpath, lineList):
108 |         fg = open(fullpath);
109 |         lineNo = 0;
110 |         for line in fg :
111 |             lineNo = lineNo + 1;
112 |             if lineNo > lineList[len(lineList) - 1] :
113 |                 break;
114 |             if lineNo in lineList :
115 |             	if len(line) < 4 or line[0] == "#":
116 |                 	continue;
117 |             	struct = Structure.load(line);
118 |             	if struct != 0 :
119 |                 	self.addStructure(struct);
120 |         return;
121 | 
122 | class Structure :
123 |     def getType(self):
124 |         return "?";
125 |     getType = staticmethod(getType);
126 |         
127 |     def isFullClique(self):
128 |         return False;
129 |     def isNearClique(self):
130 |         return False;
131 | 
132 |     def isFullOffDiagonal(self):
133 |         return False;
134 |     def isNearOffDiagonal(self):
135 |         return False;
136 | 
137 |     def isChain(self):
138 |         return False;
139 |     def isStar(self):
140 |         return False;
141 | 
142 |     def isBiPartiteCore(self):
143 |         return False;
144 | 
145 |     def isNearBiPartiteCore(self):
146 |         return False;
147 | 
148 |     def isCorePeriphery(self):
149 |         return False;
150 | 
151 |     def isJellyFish(self):
152 |         return False;
153 | 
154 |     def load(line) :
155 |         if line[:2] == FullClique.getType() :
156 |             return FullClique.load(line);
157 |         elif line[:2] == NearClique.getType() :
158 |             return NearClique.load(line);
159 |         if line[:3] == FullOffDiagonal.getType() :
160 |             return FullOffDiagonal.load(line);
161 |         elif line[:3] == NearOffDiagonal.getType() :
162 |             return NearOffDiagonal.load(line);
163 |         elif line[:2] == Chain.getType() :
164 |             return Chain.load(line);
165 |         elif line[:2] == Star.getType() :
166 |             return Star.load(line);
167 |         elif line[:2] == BiPartiteCore.getType() :
168 |             return BiPartiteCore.load(line);
169 |         elif line[:2] == NearBiPartiteCore.getType() :
170 |             return NearBiPartiteCore.load(line);
171 |         elif line[:2] == CorePeriphery.getType() :
172 |             return CorePeriphery.load(line);
173 |         elif line[:2] == JellyFish.getType() :
174 |             return JellyFish.load(line);
175 |     load = staticmethod(load)
176 | 
177 | class Clique(Structure) :
178 |     nodes = [];
179 |     numNodes = 0;
180 | 
181 | 
182 | class FullClique(Clique) :
183 |     def __init__(self, nodes):
184 |         self.nodes = nodes;
185 |         self.numNodes = len(nodes);
186 |         
187 |     def getType():
188 |         return "fc";
189 |     getType = staticmethod(getType);
190 | 
191 |     def isFullClique(self):
192 |         return True;
193 |     
194 |     def load(line) :
195 |         # "fc 1 2 3 4 ..
196 |         if line[:2] != FullClique.getType() :
197 |             return 0;
198 |         parts = line[3:].strip().split(' ');
199 |         nodes = [];
200 |         for x in parts :
201 |             if x.find('-') > 0 :
202 |                 y = x.strip().split('-');
203 |                 nodes.extend([z for z in range(int(y[0]),int(y[1])+1)]);
204 |             else :
205 |                 nodes.append(int(x));
206 |         return FullClique(sorted(nodes));        
207 |     load = staticmethod(load);
208 | 
209 | 
210 | class NearClique(Clique) :
211 |     numEdges = 0;
212 |     
213 |     def __init__(self, nodes, numEdges):
214 |         self.nodes = nodes;
215 |         self.numNodes = len(nodes);
216 |         self.numEdges = numEdges;
217 | 
218 |     def getType():
219 |         return "nc";
220 |     getType = staticmethod(getType);
221 | 
222 |     def isNearClique(self):
223 |         return True;
224 | 
225 |     def load(line) :
226 |         # "nc <edge count>, 1 2 3 4 ..
227 |         if line[:2] != NearClique.getType() :
228 |             return 0;
229 |         cParts = line[3:].strip().split(',');
230 |         numEdges = int(float(cParts[0].strip()));
231 |         
232 |         sParts = cParts[1].strip().split(' ');
233 |         
234 |         nodes = [];
235 |         for x in sParts :
236 |             if x.find('-') > 0 :
237 |                 y = x.strip().split('-');
238 |                 nodes.extend([x for x in range(int(y[0]),int(y[1])+1)]);
239 |             else :
240 |                 nodes.append(int(x));
241 |         return NearClique(sorted(nodes), numEdges);        
242 |     load = staticmethod(load);
243 | 
244 | class Rectangle(Structure) :
245 | 	lNodeList = [];
246 | 	rNodeList = [];
247 | 	numNodesLeft = 0;
248 | 	numNodesRight = 0;
249 | 
250 | 	def __init__(self, left, right):
251 | 		self.lNodeList = left;
252 | 		self.rNodeList = right;
253 | 		self.numNodesLeft = len(left);
254 | 		self.numNodesRight = len(right);
255 | 
256 | class FullOffDiagonal(Rectangle) :
257 |     def __init__(self, left, right):
258 |         Rectangle.__init__(self, left, right)
259 | 
260 |     def getType():
261 |         return "fod";
262 |     getType = staticmethod(getType);
263 | 
264 |     def isFullOffDiagonal(self):
265 |         return True;
266 | 
267 |     def load(line) :
268 |         # "fod [left ids], [right ids]
269 |         if line[:3] != FullOffDiagonal.getType() :
270 |             return 0;
271 |         parts = line[4:].strip().split(',');
272 |         lParts = parts[0].strip().split(' ');
273 |         lNodeList = [];
274 |         for x in lParts :
275 |             if x.find('-') > 0 :
276 |                 y = x.strip().split('-');
277 |                 lNodeList.extend([z for z in range(int(y[0]),int(y[1])+1)]);
278 |             else :
279 |                 lNodeList.append(int(x));
280 |         rParts = parts[1].strip().split(' ');
281 |         rNodeList = [];
282 |         for x in rParts :
283 |             if x.find('-') > 0 :
284 |                 y = x.strip().split('-');
285 |                 rNodeList.extend([z for z in range(int(y[0]),int(y[1])+1)]);
286 |             else :
287 |                 rNodeList.append(int(x));
288 |         return FullOffDiagonal(sorted(lNodeList),sorted(rNodeList));
289 |     load = staticmethod(load);
290 | 
291 | 
292 | class NearOffDiagonal(Rectangle) :
293 |     def __init__(self, left, right):
294 |         Rectangle.__init__(self, left, right)
295 | 
296 |     def getType():
297 |         return "nod";
298 |     getType = staticmethod(getType);
299 | 
300 |     def isNearOffDiagonal(self):
301 |         return True;
302 | 
303 |     def load(line) :
304 |         # "fod [left ids], [right ids]
305 |         if line[:3] != NearOffDiagonal.getType() :
306 |             return 0;
307 |         parts = line[4:].strip().split(',');
308 |         lParts = parts[0].strip().split(' ');
309 |         lNodeList = [];
310 |         for x in lParts :
311 |             if x.find('-') > 0 :
312 |                 y = x.strip().split('-');
313 |                 lNodeList.extend([z for z in range(int(y[0]),int(y[1])+1)]);
314 |             else :
315 |                 lNodeList.append(int(x));
316 |         rParts = parts[1].strip().split(' ');
317 |         rNodeList = [];
318 |         for x in rParts :
319 |             if x.find('-') > 0 :
320 |                 y = x.strip().split('-');
321 |                 rNodeList.extend([z for z in range(int(y[0]),int(y[1])+1)]);
322 |             else :
323 |                 rNodeList.append(int(x));
324 |         return NearOffDiagonal(sorted(lNodeList),sorted(rNodeList));
325 |     load = staticmethod(load);
326 | 
327 | 
328 | 
329 | class Chain(Structure) :
330 |     nodes = [];
331 |     numNodes = 0;
332 |     
333 |     def __init__(self, nodes):
334 |         self.nodes = nodes;
335 |         self.numNodes = len(nodes);
336 | 
337 |     def getType():
338 |         return "ch";
339 |     getType = staticmethod(getType);
340 | 
341 |     def isChain(self):
342 |         return True;
343 | 
344 |     def load(line) :
345 |         # "ch 1 2 3 4 ..
346 |         if line[:2] != Chain.getType() :
347 |             return 0;
348 |         parts = line[3:].strip().split(' ');
349 |         nodes = [];
350 |         for x in parts :
351 |             if x.find('-') > 0 :
352 |                 y = x.strip().split('-');
353 |                 nodes.extend([x for x in range(int(y[0]),int(y[1])+1)]);
354 |             else :
355 |                 nodes.append(int(x));
356 |         return Chain(nodes);
357 |     load = staticmethod(load);
358 | 
359 | 
360 | class Star(Structure) :
361 |     cNode = -1;
362 |     sNodes = [];
363 |     numSpokes = 0;
364 |     
365 |     def __init__(self, hub, spokes):
366 |         self.cNode = hub;
367 |         self.sNodes = spokes;
368 |         self.numSpokes = len(spokes);
369 | 
370 |     def getType():
371 |         return "st";
372 |     getType = staticmethod(getType);
373 |         
374 |     def isStar(self):
375 |         return True;
376 | 
377 |     def load(line) :
378 |         # "st <hubid> [spoke ids ...]
379 |         if line[:2] != Star.getType() :
380 |             return 0;
381 |         parts = line[3:].strip().split(',');
382 |         cParts = parts[0].strip().split(' ');
383 |         cNodes = [];
384 |         for x in cParts :
385 |             if x.find('-') > 0 :
386 |                 y = x.split('-');
387 |                 cNodes.extend([x for x in range(int(y[0]),int(y[1])+1)]);
388 |             else :
389 |                 cNodes.append(int(x));
390 |         sParts = parts[1].strip().split(' ');
391 |         sNodes = [];
392 |         for x in sParts :
393 |             
394 |             if x.find('-') > 0 :
395 |                 y = x.split('-');
396 |                 sNodes.extend([x for x in range(int(y[0]),int(y[1])+1)]);
397 |             else :
398 |                 sNodes.append(int(x));
399 |         return Star(cNodes[0],sorted(sNodes));
400 |     load = staticmethod(load);
401 | 
402 | 
403 | class BiPartiteCore(Structure) :
404 |     lNodes = [];
405 |     numNodesLeft = 0;
406 |     rNodes = [];
407 |     numNodesRight = 0;
408 |     
409 |     def __init__(self, left, right):
410 |         self.lNodes = left;
411 |         self.numNodesLeft = len(left);
412 |         self.rNodes = right;
413 |         self.numNodesRight = len(right);
414 | 
415 |     def getType():
416 |         return "bc";
417 |     getType = staticmethod(getType);
418 | 
419 |     def isBiPartiteCore(self):
420 |         return True;
421 | 
422 |     def load(line) :
423 |         # "bc [left ids], [right ids]
424 |         if line[:2] != BiPartiteCore.getType() :
425 |             return 0;
426 |         parts = line[3:].strip().split(',');
427 |         lParts = parts[0].strip().split(' ');
428 |         lNodes = [];
429 |         for x in lParts :
430 |             if x.find('-') > 0 :
431 |                 y = x.strip().split('-');
432 |                 lNodes.extend([z for z in range(int(y[0]),int(y[1])+1)]);
433 |             else :
434 |                 lNodes.append(int(x));
435 |         rParts = parts[1].strip().split(' ');
436 |         rNodes = [];
437 |         for x in rParts :
438 |             if x.find('-') > 0 :
439 |                 y = x.strip().split('-');
440 |                 rNodes.extend([z for z in range(int(y[0]),int(y[1])+1)]);
441 |             else :
442 |                 rNodes.append(int(x));
443 |         return BiPartiteCore(sorted(lNodes),sorted(rNodes));
444 |     load = staticmethod(load);
445 |    
446 | 
447 | class NearBiPartiteCore(Structure) :
448 |     lNodes = [];
449 |     numNodesLeft = 0;
450 |     rNodes = [];
451 |     numNodesRight = 0;
452 |     
453 |     def __init__(self, left, right):
454 |         self.lNodes = left;
455 |         self.numNodesLeft = len(left);
456 |         self.rNodes = right;
457 |         self.numRightNodes = len(right);
458 | 
459 |     def getType():
460 |         return "nb";
461 |     getType = staticmethod(getType);
462 | 
463 |     def isNearBiPartiteCore(self):
464 |         return True;
465 | 
466 |     def load(line) :
467 |         # "nb [left ids], [right ids]
468 |         if line[:2] != NearBiPartiteCore.getType() :
469 |             return 0;
470 |         parts = line[3:].strip().split(',');
471 |         lParts = parts[0].strip().split(' ');
472 |         lNodes = [];
473 |         for x in lParts :
474 |             if x.find('-') > 0 :
475 |                 y = x.strip().split('-');
476 |                 lNodes.extend([z for z in range(int(y[0]),int(y[1])+1)]);
477 |             else :
478 |                 lNodes.append(int(x));
479 |         rParts = parts[1].strip().split(' ');
480 |         rNodes = [];
481 |         for x in rParts :
482 |             if x.find('-') > 0 :
483 |                 y = x.strip().split('-');
484 |                 rNodes.extend([z for z in range(int(y[0]),int(y[1])+1)]);
485 |             else :
486 |                 rNodes.append(int(x));
487 |         return NearBiPartiteCore(sorted(lNodes),sorted(rNodes));
488 |     load = staticmethod(load);
489 | 
490 | 
491 | class CorePeriphery(Structure) :
492 |     cNodes = [];
493 |     numCores = 0;
494 |     sNodes = [];
495 |     numSpokes = 0;
496 |     
497 |     def __init__(self, cores, spokes):
498 |         self.cNodes = cores;
499 |         self.numCores = len(cores);
500 |         self.sNodes = spokes;
501 |         self.numSpokes = len(spokes);
502 | 
503 |     def getType():
504 |         return "cp";
505 |     getType = staticmethod(getType);
506 | 
507 |     def isCorePeriphery(self):
508 |         return True;
509 | 
510 |     def load(line) :
511 |         # "cp [hubids], [spoke ids]
512 |         if line[:2] != CorePeriphery.getType() :
513 |             return 0;
514 |         parts = line[3:].strip().split(',');
515 |         cParts = parts[0].strip().split(' ');
516 |         cNodes = [];
517 |         for x in cParts :
518 |             if x.find('-') > 0 :
519 |                 y = x.strip().split('-');
520 |                 cNodes.extend([z for z in range(int(y[0]),int(y[1])+1)]);
521 |             else :
522 |                 cNodes.append(int(x));
523 |         sParts = parts[1].strip().split(' ');
524 |         sNodes = [];
525 |         for x in sParts :
526 |             if x.find('-') > 0 :
527 |                 y = x.strip().split('-');
528 |                 sNodes.extend([z for z in range(int(y[0]),int(y[1])+1)]);
529 |             else :
530 |                 sNodes.append(int(x));
531 |         return CorePeriphery(sorted(cNodes),sorted(sNodes));
532 |     load = staticmethod(load);
533 | 
534 | 
535 | class JellyFish(Structure) :
536 |     cNodes = [];
537 |     numCores = 0;
538 |     sNodes = [[]];
539 |     numSpokes = [];
540 |     numSpokeSum = 0;
541 |     
542 |     def __init__(self, cores, spokes):
543 |         self.cNodes = cores;
544 |         self.numCores = len(cores);
545 |         self.sNodes = spokes;
546 |         self.numSpokes = [len(s) for s in spokes];
547 |         self.numSpokeSum = sum(self.numSpokes);
548 | 
549 |     def getType():
550 |         return "jf";
551 |     getType = staticmethod(getType);
552 | 
553 |     def isJellyFish(self):
554 |         return True;
555 | 
556 |     def load(line) :
557 |         # jf [hubids], [[spoke ids],]
558 |         if line[:2] != JellyFish.getType() :
559 |             return 0;
560 |         parts = line[3:].strip().split(',');
561 |         cParts = parts[0].strip().split(' ');
562 |         cNodes = [];
563 |         for x in cParts :
564 |             if x.find('-') > 0 :
565 |                 y = x.strip().split('-');
566 |                 cNodes.extend([z for z in range(int(y[0]),int(y[1])+1)]);
567 |             else :
568 |                 cNodes.append(int(x));
569 |                 
570 |         sNodes = [[] for x in range(len(cNodes))];
571 |         
572 |         for i in range(len(cNodes)) :
573 |             sParts = parts[i+1].strip().split(' ');
574 |             for x in sParts :
575 |                 if x.find('-') > 0 :
576 |                     y = x.strip().split('-');
577 |                     sNodes[i].extend([z for z in range(int(y[0]),int(y[1])+1)]);
578 |                 else :
579 |                     sNodes[i].append(int(x));
580 |             sNodes[i] = sorted(sNodes[i]);
581 |         return JellyFish(sorted(cNodes),sNodes);
582 |     load = staticmethod(load);
583 | 
584 | 
585 | 
586 | 


--------------------------------------------------------------------------------
/MDL/mdl_structs.py:
--------------------------------------------------------------------------------
  1 | import config;
  2 | import mdl_base;
  3 | 
  4 | from math import log,factorial;
  5 | from error import Error;
  6 | from graph import Graph;
  7 | from model import Model;
  8 | 
  9 | from mdl_base import *;
 10 | 
 11 | # Encoded Size of a Full-Clique
 12 | def LfullClique(c, M, G, E):
 13 |     # update Error
 14 |     coverFullClique(G, E, c);
 15 |     
 16 |     cost = LN(c.numNodes);          # encode number of nodes
 17 |     if G.numNodes > 0 and c.numNodes > 0 :
 18 |         cost += LU(G.numNodes, c.numNodes);     # encode node ids
 19 |     return cost;
 20 | 
 21 | def coverFullClique(G, E, c):
 22 |     # c.nodes is ordered
 23 |     for i_idx in range(c.numNodes) :
 24 |         i = c.nodes[i_idx];
 25 |         for j_idx in range(i_idx+1,c.numNodes) :
 26 |             j = c.nodes[j_idx];
 27 |             
 28 |             if not E.isExcluded(i,j) :
 29 |                 # only if (i,j) is not modelled perfectly
 30 |                 
 31 |                 if not E.isCovered(i,j) :
 32 |                     # edge is not modelled yet
 33 |                     if G.hasEdge(i,j) :
 34 |                         # yet there is a real edge, so now we undo an error
 35 |                         E.delUnmodelledError(i,j);
 36 |                     else :
 37 |                         # there is no real edge, but now we say there is, so we introduce error
 38 |                         E.addModellingError(i,j);
 39 |                     E.cover(i,j);
 40 | 
 41 |                 else :
 42 |                     # edge is already modelled
 43 |                     if G.hasEdge(i,j) and E.isModellingError(i,j) :
 44 |                         # edge exists, but model denied
 45 |                         E.delModellingError(i,j);
 46 |                     elif not G.hasEdge(i,j) and not E.isModellingError(i,j) :
 47 |                         # edge does not exist, but now we say it does
 48 |                         E.addModellingError(i,j);
 49 |     return;
 50 | 
 51 | 
 52 | # Encoded Size of a Near-Clique  
 53 | def LnearClique(c, M, G, E) :
 54 |     # update Error, count coverage
 55 |     (cnt0,cnt1) = coverNearClique(G, E, c)
 56 |     
 57 |     cost = LN(c.numNodes);              # encode number of nodes
 58 |     cost += LU(G.numNodes, c.numNodes);  # encode node ids
 59 |     if cnt0+cnt1 > 0 :
 60 |         cost += log(cnt0+cnt1, 2);     # encode probability of a 1 (cnt0+cnt1 is number of cells we describe, upperbounded by numnodes 2)
 61 |         cost += LnU(cnt0+cnt1, cnt1);       # encode the edges
 62 |     return cost;
 63 | 	  
 64 | def coverNearClique(G, E, c) :
 65 |     # c.nodes is ordered    
 66 |     cnt0 = 0;
 67 |     cnt1 = 0;
 68 |     for i_idx in range(c.numNodes) :
 69 |         i = c.nodes[i_idx];
 70 |         for j_idx in range(i_idx+1, c.numNodes) :
 71 |             j = c.nodes[j_idx];
 72 |             
 73 |             if not E.isExcluded(i,j) :
 74 |                 # only if (i,j) is not already modelled perfectly
 75 |                 
 76 |                 if not E.isCovered(i,j) :
 77 |                     # edge is not modelled yet
 78 |                     if G.hasEdge(i,j) :
 79 |                         # yet there is a real edge, so now we undo an error
 80 |                         E.delUnmodelledError(i,j);
 81 |                     E.coverAndExclude(i,j);
 82 | 
 83 |                 else :
 84 |                     # edge is already modelled
 85 |                     if E.isModellingError(i,j) :
 86 |                         # but wrongly, we undo that error
 87 |                         E.delModellingError(i,j);
 88 |                     E.exclude(i,j)
 89 |                             
 90 |                 if G.hasEdge(i,j) :
 91 |                     cnt1 += 1;
 92 |                 else:
 93 |                     cnt0 += 1;
 94 |                 
 95 |     return (cnt0,cnt1);
 96 | 
 97 | ## Off Diagonals
 98 | # Encoded Size of a Full-Clique
 99 | def LfullOffDiagonal(c, M, G, E):
100 |     # update Error
101 |     coverFullOffDiagonal(G, E, c);
102 |     
103 |     cost = LN(c.numNodesLeft) + LN(c.numNodesRight);          # encode number of nodes
104 |     cost += LU(G.numNodes, c.numNodesLeft);     # encode node ids
105 |     cost += LU(G.numNodes-c.numNodesLeft, c.numNodesRight);     # encode node ids
106 |     return cost;
107 | 
108 | def coverFullOffDiagonal(G, E, c):
109 |     # c.nodeListLeft is ordered
110 |     for i_idx in range(c.numNodesLeft) :
111 |         i = c.lNodeList[i_idx];
112 |         for j_idx in range(c.numNodesRight) :
113 |             j = c.rNodeList[j_idx];
114 |             
115 |             if not E.isExcluded(i,j) :
116 |                 # only if (i,j) is not modelled perfectly
117 |                 
118 |                 if not E.isCovered(i,j) :
119 |                     # edge is not modelled yet
120 |                     if G.hasEdge(i,j) :
121 |                         # yet there is a real edge, so now we undo an error
122 |                         E.delUnmodelledError(i,j);
123 |                     else :
124 |                         # there is no real edge, but now we say there is, so we introduce error
125 |                         E.addModellingError(i,j);
126 |                     E.cover(i,j);
127 | 
128 |                 else :
129 |                     # edge is already modelled
130 |                     if G.hasEdge(i,j) and E.isModellingError(i,j) :
131 |                         # edge exists, but model denied
132 |                         E.delModellingError(i,j);
133 |                     elif not G.hasEdge(i,j) and not E.isModellingError(i,j) :
134 |                         # edge does not exist, but now we say it does
135 |                         E.addModellingError(i,j);
136 |     return;
137 | 
138 | 
139 | # Encoded Size of a Near-Off Diagonal
140 | def LnearOffDiagonal(c, M, G, E) :
141 |     # update Error, count coverage
142 |     (cnt0,cnt1) = coverNearOffDiagonal(G, E, c)
143 |     
144 |     cost = LN(c.numNodesLeft) + LN(c.numNodesRight);          # encode number of nodes
145 |     cost += LU(G.numNodes, c.numNodesLeft);     # encode node ids
146 |     cost += LU(G.numNodes-c.numNodesLeft, c.numNodesRight);     # encode node ids
147 | 
148 |     if cnt0+cnt1 > 0 :
149 |         cost += log(cnt0+cnt1, 2);     # encode probability of a 1 (cnt0+cnt1 is number of cells we describe, upperbounded by numnodes 2)
150 |         cost += LnU(cnt0+cnt1, cnt1);       # encode the edges
151 |     return cost;
152 | 	  
153 | def coverNearOffDiagonal(G, E, c) :
154 |     # c.nodes is ordered    
155 |     cnt0 = 0;
156 |     cnt1 = 0;
157 |     for i_idx in range(c.numNodesLeft) :
158 |         i = c.lNodeList[i_idx];
159 |         for j_idx in range(c.numNodesRight) :
160 |             j = c.rNodeList[j_idx];
161 |             
162 |             if not E.isExcluded(i,j) :
163 |                 # only if (i,j) is not already modelled perfectly
164 |                 
165 |                 if not E.isCovered(i,j) :
166 |                     # edge is not modelled yet
167 |                     if G.hasEdge(i,j) :
168 |                         # yet there is a real edge, so now we undo an error
169 |                         E.delUnmodelledError(i,j);
170 |                     E.coverAndExclude(i,j);
171 | 
172 |                 else :
173 |                     # edge is already modelled
174 |                     if E.isModellingError(i,j) :
175 |                         # but wrongly, we undo that error
176 |                         E.delModellingError(i,j);
177 |                     E.exclude(i,j)
178 |                             
179 |                 if G.hasEdge(i,j) :
180 |                     cnt1 += 1;
181 |                 else:
182 |                     cnt0 += 1;
183 |                 
184 |     return (cnt0,cnt1);
185 | 
186 | 
187 | 
188 | # Encoded Size of a Chain
189 | def Lchain(ch, M, G, E) :
190 |     # update Error
191 |     coverChain(G,E,ch);
192 |     
193 |     cost = LN(ch.numNodes-1); # we know chain is at least 2 nodes
194 |     cost += LU(G.numNodes,ch.numNodes); # identify the nodes
195 |     cost += log(factorial(ch.numNodes),2) # identify their order
196 |     
197 |     ## same as LU + log(factorial)
198 |     #for nid in range(ch.numNodes) :
199 |     #    cost += log(G.numNodes - nid, 2); # identify the node ids in order
200 |     return cost;
201 | 
202 | def coverChain(G, E, ch) :
203 |     # model chain
204 |     for i_idx in range(ch.numNodes-1) :
205 |         i = ch.nodes[i_idx];
206 |         j = ch.nodes[i_idx+1];
207 |         
208 |         if not E.isExcluded(i,j) :
209 |             # only if (i,j) is not already modelled perfectly
210 |             if not E.isCovered(i, j) :
211 |                 # edge is not modelled yet
212 |                 
213 |                 if G.hasEdge(i, j) :
214 |                     E.delUnmodelledError(i, j);
215 |                 else :
216 |                     E.addModellingError(i, j);
217 |                 E.cover(i,j);
218 | 
219 |             else :
220 |                 # edge is already modelled
221 | 
222 |                 if G.hasEdge(i,j) and E.isModellingError(i,j) :
223 |                     # model is wrong in saying no edge
224 |                     E.delModellingError(i,j);
225 |                 # elif G.hasEdge(i,j) and not E.isModellingError(i,j) :
226 |                 # there is an edge, and we knew that
227 |                 # elif not G.hasEdge(i,j) and E.isModellingError(i,j) :
228 |                 # there is no edge, but we keep saying there is
229 |                 elif not G.hasEdge(i,j) and not E.isModellingError(i,j) :
230 |                     # there is no edge, but now we say there is
231 |                     E.addModellingError(i,j);
232 | 
233 |     if config.optModelZeroes == True :
234 |         # model non-shortcuts
235 |         for i_idx in range(ch.numNodes) :
236 |             i = ch.nodes[i_idx];
237 |             for j_idx in range(i_idx+2, ch.numNodes) : # skip the direct neighbour
238 |                 j = ch.nodes[j_idx];
239 |                 
240 |                 if not E.isExcluded(i,j) :
241 |                     # only if (i,j) is not already modelled perfectly
242 |                     if not E.isCovered(i,j) :
243 |                         # edge not yet modelled
244 |                         if G.hasEdge(i,j) :
245 |                             # oops, there is an edge, but we say there aint
246 |                             E.addModellingError(i,j);
247 |                         #else :
248 |                              # there is no edge, so we're good
249 |                         E.cover(i,j);
250 |                     #else :
251 |                         # edge is modelled
252 |                         #if G.hasEdge(i,j) and E.isModellingError(i,j) :
253 |                             # model incorrect in saying there is no edge - no change
254 |                         #if G.hasEdge(i,j) and not E.isModellingError(i,j) :
255 |                             # model correct in saying there is an edge, no change
256 |                         # ...
257 |     return;
258 | 
259 | 
260 | 
261 | # Encoded Size of a Star
262 | def Lstar(star, M, G, E) :
263 |     # update Error
264 |     coverStar(G, E, star);
265 |     
266 |     cost = LN(star.numSpokes);      # number of spokes (we know there's one hub)
267 |     cost += log(G.numNodes, 2);     # identify the hub-node
268 |     
269 |     #cost += star.numSpokes * log(G.numNodes-1,2);  # identify the spoke-nodes
270 |     cost += LU(G.numNodes-1,star.numSpokes);  # identify the spoke-nodes
271 |     
272 |     return cost;
273 | 
274 | def coverStar(G, E, st) :
275 |     
276 |     i = st.cNode;
277 |     for j in st.sNodes:
278 |         x = min(i,j);
279 |         y = max(i,j);
280 |         if not E.isExcluded(i,j) :
281 |             # only if (i,j) is not already modelled perfectly
282 |             
283 |             if G.hasEdge(x,y) :
284 |                 if E.isCovered(x,y) :
285 |                     if E.isModellingError(x,y) :
286 |                         # previously modelled as 0, we fix the error
287 |                         E.delModellingError(x,y);
288 |                 else :
289 |                     E.delUnmodelledError(x,y);
290 |                     E.cover(x,y);
291 |             else :
292 |                 if E.isCovered(x,y) :
293 |                     if not E.isModellingError(x,y) :
294 |                         E.addModellingError(x,y);
295 |                 else :
296 |                     E.addModellingError(x,y);
297 |                     E.cover(x,y)
298 |                         
299 | 
300 |     if config.optModelZeroes == True :
301 |         # model non-shortcuts
302 |         for i_idx in range(st.numSpokes) :
303 |             i = st.sNodes[i_idx];
304 |             for j_idx in range(i_idx+1, st.numSpokes) :
305 |                 j = st.sNodes[j_idx];
306 |                     
307 |                 if not E.isExcluded(i,j) :
308 |                     # only if (i,j) is not already modelled perfectly
309 |                     
310 |                     if not E.isCovered(i,j) :
311 |                         # edge not yet modelled
312 |                         if G.hasEdge(i,j) :
313 |                             # oops, there is an edge, but we say there aint
314 |                             E.addModellingError(i,j);
315 |                         #else :
316 |                              # there is no edge, so we're good
317 |                         E.cover(i,j);
318 |                     #else :
319 |                         # edge is modelled
320 |                         #if G.hasEdge(i,j) and E.isModellingError(i,j) :
321 |                             # model incorrect in saying there is no edge - no change
322 |                         #if G.hasEdge(i,j) and not E.isModellingError(i,j) :
323 |                             # model correct in saying there is an edge, no change
324 |                         # ...
325 |             
326 |     return;
327 |     
328 | # Encoded Size of a bi-partite core
329 | def LbiPartiteCore(bc, M, G, E) :
330 |     # update Error
331 |     coverBiPartiteCore(G, E, bc);    
332 |     
333 |     cost = LN(bc.numNodesLeft) + LN(bc.numNodesRight);
334 |     cost += LU(G.numNodes, bc.numNodesLeft);
335 |     cost += LU(G.numNodes- bc.numNodesLeft, bc.numNodesRight);
336 |     return cost;
337 |     
338 | def coverBiPartiteCore(G, E, bc) :
339 |     
340 |     # 1. fill in the 1s between the parts
341 |     for i in bc.lNodes :
342 |         for j in bc.rNodes :
343 |             if not E.isExcluded(i,j) :
344 |                 # only if (i,j) is not already modelled perfectly
345 |                 if G.hasEdge(i,j) :
346 |                     # there is an edge
347 |                     if E.isCovered(i,j) :
348 |                         if E.isModellingError(i,j) :
349 |                             # model says 0, we fix to 1
350 |                             E.delModellingError(i,j);
351 |                     else :
352 |                         # model didnt say anything, we fix it
353 |                         E.delUnmodelledError(i,j);
354 |                         E.cover(i,j);
355 |                 else :
356 |                     # there is no edge
357 |                     if E.isCovered(i,j) :
358 |                         # but the cell is modelled
359 |                         if not E.isModellingError(i,j) :
360 |                             E.addModellingError(i,j); # we make a boo-boo
361 |                     else :
362 |                         # the cell is not modelled, yet
363 |                         E.addModellingError(i,j);
364 |                         E.cover(i, j);
365 |  #   print E.numCellsCovered;
366 |     
367 |     # 2. fill in the 0s in left part
368 |     for i_idx in range(len(bc.lNodes)-1) :
369 |         i = bc.lNodes[i_idx];
370 |         for j_idx in range(i_idx+1,len(bc.lNodes)) :
371 |             j = bc.lNodes[j_idx];
372 |             
373 |             if not E.isExcluded(i,j) and not E.isCovered(i,j) :
374 |                 # only if (i,j) is not covered or already modelled perfectly
375 |                     if E.isUnmodelledError(i,j) :
376 |                         # edge exists!
377 |                         E.delUnmodelledError(i,j);  # we now model this cell
378 |                         E.addModellingError(i,j);   # but do so wrongly
379 |                     E.cover(i,j);
380 |   #  print E.numCellsCovered;
381 |                 
382 |     # 3. fill in the 0s in right part
383 |     for i_idx in range(len(bc.rNodes)-1) :
384 |         i = bc.rNodes[i_idx];
385 |         for j_idx in range(i_idx+1,len(bc.rNodes)) :
386 |             j = bc.rNodes[j_idx];
387 |             
388 |             if not E.isExcluded(i,j) and not E.isCovered(i,j) :
389 |                 # only if (i,j) is not covered or already modelled perfectly
390 |                     if E.isUnmodelledError(i,j) :
391 |                         # edge exists!
392 |                         E.delUnmodelledError(i,j);  # we now model this cell
393 |                         E.addModellingError(i,j);   # but do so wrongly
394 |                     E.cover(i,j);
395 |    # print E.numCellsCovered;
396 |     return;
397 | 
398 | 
399 | # Encoded Size of a near bi-partite core
400 | def LnearBiPartiteCore(nb, M, G, E) :
401 |     # update Error
402 |     (cnt0,cnt1) = coverNearBiPartiteCore(G, E, nb);    
403 |     
404 |     # encode number of nodes in sets A and B
405 |     cost = LN(nb.numNodesLeft) + LN(nb.numNodesRight);
406 |     # encode node ids of sets A and B
407 |     cost += LU(G.numNodes, nb.numNodesLeft);
408 |     cost += LU(G.numNodes- nb.numNodesLeft, nb.numNodesRight);
409 |     
410 |     if cnt0+cnt1 > 0 :
411 |         # encode probability of a 1 between sets A and B
412 |         cost += log(cnt0+cnt1, 2);
413 |         # encode the actual edges between A and B
414 |         cost += LnU(cnt0+cnt1, cnt1);
415 |     return cost;
416 |     
417 | 	  
418 | def coverNearBiPartiteCore(G, E, nb) :
419 |     # first encode the edges between the parts
420 |     cnt0 = 0;
421 |     cnt1 = 0;
422 |     for i_idx in range(nb.numNodesLeft) :
423 |         i = nb.lNodes[i_idx];
424 |         for j_idx in range(nb.numNodesRight) :
425 |             j = nb.rNodes[j_idx];
426 | 
427 |             if not E.isExcluded(i,j) :
428 |                 # only if (i,j) is not already modelled perfectly
429 | 
430 |                 if not E.isCovered(i,j) :
431 |                     # edge is not modelled yet
432 |                     if G.hasEdge(i,j) :
433 |                         # yet there is a real edge, so now we undo an error
434 |                         E.delUnmodelledError(i,j);
435 |                     E.coverAndExclude(i,j);
436 | 
437 |                 else :
438 |                     # edge is already modelled
439 |                     if E.isModellingError(i,j) :
440 |                         # but wrongly, we undo that error
441 |                         E.delModellingError(i,j);
442 |                     E.exclude(i,j)
443 |                             
444 |                 if G.hasEdge(i,j) :
445 |                     cnt1 += 1;
446 |                 else:
447 |                     cnt0 += 1;
448 | 
449 | 
450 |     # 2. fill in the 0s in left part
451 |     for i_idx in range(len(nb.lNodes)-1) :
452 |         i = nb.lNodes[i_idx];
453 |         for j_idx in range(i_idx+1,len(nb.lNodes)) :
454 |             j = nb.lNodes[j_idx];
455 |             
456 |             if not E.isExcluded(i,j) and not E.isCovered(i,j) :
457 |                 # only if (i,j) is not covered or already modelled perfectly
458 |                     if E.isUnmodelledError(i,j) :
459 |                         # edge exists!
460 |                         E.delUnmodelledError(i,j);  # we now model this cell
461 |                         E.addModellingError(i,j);   # but do so wrongly
462 |                     E.cover(i,j);
463 |                 
464 |     # 3. fill in the 0s in right part
465 |     for i_idx in range(len(nb.rNodes)-1) :
466 |         i = nb.rNodes[i_idx];
467 |         for j_idx in range(i_idx+1,len(nb.rNodes)) :
468 |             j = nb.rNodes[j_idx];
469 |             
470 |             if not E.isExcluded(i,j) and not E.isCovered(i,j) :
471 |                 # only if (i,j) is not covered or already modelled perfectly
472 |                     if E.isUnmodelledError(i,j) :
473 |                         # edge exists!
474 |                         E.delUnmodelledError(i,j);  # we now model this cell
475 |                         E.addModellingError(i,j);   # but do so wrongly
476 |                     E.cover(i,j);
477 |             
478 |     return (cnt0,cnt1);
479 | 
480 | 
481 | # Encoded Size of a jellyfish structure
482 | def LjellyFish(jf, M, G, E) :
483 |     # update Error
484 |     coverJellyFish(G, E, jf);
485 |     
486 |     cost = LN(jf.numCores); # number of core nodes
487 |     cost += LU(G.numNodes, jf.numCores); # core node ids
488 | 
489 |     cost += LN(jf.numSpokeSum) + LC(jf.numSpokeSum, jf.numCores); # number of spokes per core node
490 |     cost += LU(G.numNodes - jf.numCores, jf.numSpokeSum); # spoke ids (-no- overlap between sets!)
491 |     return cost;
492 |     
493 | def coverJellyFish(G, E, jf) :
494 |     
495 |     # first link up the nodes in the core
496 |     for i_idx in range(len(jf.cNodes)) :
497 |         i = jf.cNodes[i_idx];
498 |         for j_idx in range(i_idx+1,len(jf.cNodes)) :
499 |             j = jf.cNodes[j_idx];
500 | 
501 |             if not E.isExcluded(i,j) :
502 |                 # only if (i,j) is not already modelled perfectly
503 |                 
504 |                 if G.hasEdge(i,j) :
505 |                     # there is an edge
506 |                     if E.isCovered(i,j) :
507 |                         if E.isModellingError(i,j) :
508 |                             E.delModellingError(i,j); # model said 0, but we say 1
509 |                     else :
510 |                         # edge is there, but not covered, we fix it!
511 |                         E.delUnmodelledError(i,j);
512 |                         E.cover(i,j);
513 |                 else :
514 |                     # there is no edge
515 |                     if E.isCovered(i,j) :
516 |                         if not E.isModellingError(i,j) :
517 |                             E.addModellingError(i,j); # model said 0, we say 1
518 |                     else :
519 |                         E.addModellingError(i,j);
520 |                         E.cover(i,j);
521 | 
522 |     # 2. link up the core nodes up to their respective spokes
523 |     for i_idx in range(len(jf.cNodes)) :
524 |         i = jf.cNodes[i_idx];
525 |         for j_idx in range(len(jf.sNodes[i_idx])) :
526 |             j = jf.sNodes[i_idx][j_idx];
527 |             
528 |             if not E.isExcluded(i,j) :
529 |                 # only if (i,j) is not already modelled perfectly
530 |                 
531 |                 if G.hasEdge(i,j) :
532 |                     # there is an edge
533 |                     if E.isCovered(i,j) :
534 |                         if E.isModellingError(i,j) :
535 |                             E.delModellingError(i,j); # model said 0, we fix to 1
536 |                     else :
537 |                         # edge is there, but not covered, we fix it
538 |                         E.delUnmodelledError(i,j);
539 |                         E.cover(i,j);
540 |                 else :
541 |                     # there is no edge
542 |                     if E.isCovered(i,j) :
543 |                         if not E.isModellingError(i,j) :
544 |                             E.addModellingError(i,j); # model said 0, but we say 1
545 |                     else :
546 |                         E.addModellingError(i,j);
547 |                         E.cover(i,j);
548 | 
549 |     if config.optModelZeroes == True :
550 |         # 3. model that the spokes within a set are not connected    
551 |         # !!!   code can be made more efficient, by incorporating it in previous loop
552 |         for i_idx in range(len(jf.cNodes)) :
553 |             
554 |             for j_idx in range(len(jf.sNodes[i_idx])-1) :
555 |                 j = jf.sNodes[i_idx][j_idx];
556 |                 
557 |                 for k_idx in range(j_idx+1,len(jf.sNodes[i_idx])) :
558 |                     k = jf.sNodes[i_idx][k_idx];
559 |                     
560 |                     if not E.isExcluded(j,k) :
561 |                         # only if (i,j) is not already modelled perfectly
562 |                         
563 |                         #if E.isModelled(j,k) :
564 |                             # we don't change previous modelling, but
565 |                         if not E.isModelled(j,k) :
566 |                             # cell not yet modelled, and should be a 0
567 |                             if G.hasEdge(j,k) :
568 |                                 # but, it has a 1, change it to modelling error
569 |                                 E.delUnmodelledError(j,k);
570 |                                 E.addModellingError(j,k);
571 |                             E.cover(j,k);
572 |     return;
573 |     
574 | 
575 | # Encoded Size of a core periphery
576 | def LcorePeriphery(cp, M, G, E) :
577 |     # update Error
578 |     coverCorePeriphery(G, E, cp);
579 |     
580 |     cost = LN(cp.numCores);     # number of core-nodes
581 |     cost += LN(cp.numSpokes);       # number of spoke-nodes
582 |     cost += cp.numCores * log(G.numNodes, 2);   # identify core-nodes
583 |     cost += cp.numSpokes * log(G.numNodes - cp.numCores, 2);    # identify spoke-nodes
584 |     return cost;
585 |     
586 | # check whether ok
587 | def coverCorePeriphery(G, E, cp) :
588 |     for i in cp.cNodes :
589 |         for j in cp.sNodes :
590 |             if not E.isModelled(i,j) :
591 |                 if G.hasEdge(i,j) :
592 |                     E.delUnmodelledError(i,j);
593 |                 else :
594 |                     E.addModellingError(i,j);
595 |                 E.cover(i,j);
596 |     return;
597 |     
598 | # Encoded Size of a core periphery (a bit smarter)
599 | def LcorePeripheryA(cp, M, G, E) :
600 |     cost = LN(cp.numCoreNodes);     # number of core-nodes
601 |     cost += LN(cp.numSpokes);       # number of spoke-nodes
602 |     cost += LU(G.numNodes, cp.numCoreNodes);    # identify core-nodes
603 |     cost += LU(G.numNodes - cp.numCoreNodes, cp.numSpokes); # identify spoke-nodes
604 |     return cost;
605 | 


--------------------------------------------------------------------------------
/MDL/description_length.py:
--------------------------------------------------------------------------------
  1 | import config;
  2 | 
  3 | from math import log,factorial;
  4 | from error import Error;
  5 | from graph import Graph;
  6 | from model import Model;
  7 | 
  8 | ### basic functions
  9 | # determine possible number of edges between `numEdges' nodes
 10 | def CalcCliqueNumPosEdges(numEdges):
 11 |   # directed graph, no self-loops
 12 |   # (|n|^2)-n
 13 |   return numEdges*numEdges - numEdges;
 14 | 
 15 | # (n choose k)
 16 | def choose(n, k):
 17 |  if 0 <= k <= n:
 18 |    p = 1
 19 |    for t in xrange(min(k, n - k)):
 20 |      p = (p * (n - t)) // (t + 1)
 21 |    return p;
 22 |  else:
 23 |    return 0;
 24 | 
 25 | def composition(n,k) :
 26 |     return choose(n-1,k-1);
 27 | 
 28 | def LC(n,k) :
 29 |     return log(composition(n,k),2);
 30 | 
 31 | def weakcomposition(n,k) :
 32 |     return choose(n+k-1,k-1);
 33 |     
 34 | def LwC(n,k) :
 35 |     return log(weakcomposition(n,k),2);
 36 | 
 37 | # Encoded length of `n` 0/1 entries with `k` 1s (aka, Naive Uniform)
 38 | def LnU(n,k):
 39 |     #print 'LnU', n, k
 40 |     if n==0 or k==0 or k==n:
 41 |         return 0;    
 42 |     x = -log(k / float(n),2);
 43 |     y = -log((n-k)/float(n),2);
 44 |     return k * x + (n-k) * y;
 45 |     
 46 | # Encoded length of `n` 0/1 entries with `k` 1s (aka, Uniform)
 47 | def LU(n,k) :
 48 |     if n==0 or k==0 :
 49 |         return 0;   
 50 |     return log(choose(n,k),2);
 51 | 
 52 | # encoded size of an integer >=1 as by Rissanen's 1983 Universal code for integers
 53 | def LN(z) :
 54 |   if z <= 0 :
 55 |     return 0;
 56 |   c = log(2.865064,2);
 57 |   i = log(z,2);
 58 |   while i > 0 :
 59 |     c = c + i;
 60 |     i = log(i,2);
 61 |   return c;
 62 |  
 63 | 
 64 | 
 65 | ### Our Encoding Starts Here ###
 66 | 
 67 | ### Total Encoded Size
 68 | def L(G, M, errorEnc): 
 69 |     E = Error(G); # initially, everything is error, nothing is covered
 70 |     error_cost = 0;
 71 |     
 72 |     model_cost = LN(M.numStructs+1);    # encode number of structures we're encoding with
 73 |     model_cost += LwC(M.numStructs, M.numStrucTypes);            # encode the number per structure
 74 | 
 75 |     # encode the structure-type identifier per type
 76 |     if M.numFullCliques > 0 :
 77 |         model_cost += M.numFullCliques * log(M.numFullCliques / float(M.numStructs), 2);
 78 |     if M.numNearCliques  > 0 :
 79 |         model_cost += M.numNearCliques * log(M.numNearCliques / float(M.numStructs), 2);
 80 |     if M.numChains > 0 :
 81 |         model_cost += M.numChains * log(M.numChains / float(M.numStructs), 2);
 82 |     if M.numStars > 0 :
 83 |         model_cost += M.numStars * log(M.numStars / float(M.numStructs), 2);
 84 |     if M.numBiPartiteCores > 0 :
 85 |         model_cost += M.numBiPartiteCores * log(M.numBiPartiteCores / float(M.numStructs), 2);
 86 |     if M.numNearBiPartiteCores > 0 :
 87 |         model_cost += M.numNearBiPartiteCores * log(M.numNearBiPartiteCores / float(M.numStructs), 2);
 88 |     if M.numJellyFishes > 0 :
 89 |         model_cost += M.numJellyFishes * log(M.numJellyFishes / float(M.numStructs), 2);
 90 |     if M.numCorePeripheries > 0 :
 91 |         model_cost += M.numCorePeripheries * log(M.numCorePeripheries / float(M.numStructs), 2);
 92 | 
 93 |     
 94 |     # encode the structures
 95 |     for struc in M.structs :
 96 |         if struc.isFullClique() :
 97 |             model_cost += LfullClique(struc,M,G,E);
 98 |         elif struc.isNearClique() :
 99 |             model_cost += LnearClique(struc,M,G,E);
100 |         elif struc.isChain() :
101 |             model_cost += Lchain(struc,M,G,E);
102 |         elif struc.isStar() :
103 |             model_cost += Lstar(struc,M,G,E);
104 |         elif struc.isCorePeriphery() :
105 |             model_cost += LcorePeriphery(struc,M,G,E);
106 |         elif struc.isBiPartiteCore() :
107 |             model_cost += LbiPartiteCore(struc,M,G,E);
108 |         elif struc.isNearBiPartiteCore() :
109 |             model_cost += LnearBiPartiteCore(struc,M,G,E);
110 |         elif struc.isJellyFish() :
111 |             model_cost += LjellyFish(struc,M,G,E);
112 |     
113 |     
114 |     # encode the error
115 |     error_cost += 0 if E.numCellsCovered == 0 else log(E.numCellsCovered, 2);    # encode number of additive Errors
116 |     if ((G.numNodes * G.numNodes - G.numNodes) / 2) - E.numCellsCovered > 0 :
117 |         error_cost += log(((G.numNodes * G.numNodes - G.numNodes) / 2) - E.numCellsCovered, 2);    # encode number of Errors
118 |         
119 |     if errorEnc == "NP" :
120 |         error_cost += LErrorNaivePrefix(G,M,E);
121 |     elif errorEnc == "NB" :
122 |         error_cost += LErrorNaiveBinom(G,M,E);
123 |     elif errorEnc == "TP" :
124 |         error_cost += LErrorTypedPrefix(G,M,E);
125 |     elif errorEnc == "TB" :
126 |         error_cost += LErrorTypedBinom(G,M,E);
127 |     
128 |     total_cost = model_cost + error_cost;
129 |     
130 |     return (total_cost, model_cost, error_cost, E);
131 | 
132 | # Encoded Size of a Full-Clique
133 | def LfullClique(c, M, G, E):
134 |     # update Error
135 |     coverFullClique(G, E, c);
136 |     
137 |     cost = LN(c.numNodes);          # encode number of nodes
138 |     if G.numNodes > 0 and c.numNodes > 0 :
139 |         cost += LU(G.numNodes, c.numNodes);     # encode node ids
140 |     return cost;
141 | 
142 | def coverFullClique(G, E, c):
143 |     # c.nodes is ordered
144 |     for i_idx in range(c.numNodes) :
145 |         i = c.nodes[i_idx];
146 |         for j_idx in range(i_idx+1,c.numNodes) :
147 |             j = c.nodes[j_idx];
148 |             
149 |             if not E.isExcluded(i,j) :
150 |                 # only if (i,j) is not modelled perfectly
151 |                 
152 |                 if not E.isCovered(i,j) :
153 |                     # edge is not modelled yet
154 |                     if G.hasEdge(i,j) :
155 |                         # yet there is a real edge, so now we undo an error
156 |                         E.delUnmodelledError(i,j);
157 |                     else :
158 |                         # there is no real edge, but now we say there is, so we introduce error
159 |                         E.addModellingError(i,j);
160 |                     E.cover(i,j);
161 | 
162 |                 else :
163 |                     # edge is already modelled
164 |                     if G.hasEdge(i,j) and E.isModellingError(i,j) :
165 |                         # edge exists, but model denied
166 |                         E.delModellingError(i,j);
167 |                     elif not G.hasEdge(i,j) and not E.isModellingError(i,j) :
168 |                         # edge does not exist, but now we say it does
169 |                         E.addModellingError(i,j);
170 |     return;
171 | 
172 | 
173 | # Encoded Size of a Near-Clique  
174 | def LnearClique(c, M, G, E) :
175 |     # update Error, count coverage
176 |     (cnt0,cnt1) = coverNearClique(G, E, c)
177 |     
178 |     cost = LN(c.numNodes);              # encode number of nodes
179 |     cost += LU(G.numNodes, c.numNodes);  # encode node ids
180 |     if cnt0+cnt1 > 0 :
181 |         cost += log(cnt0+cnt1, 2);     # encode probability of a 1 (cnt0+cnt1 is number of cells we describe, upperbounded by numnodes 2)
182 |         cost += LnU(cnt0+cnt1, cnt1);       # encode the edges
183 |     return cost;
184 | 	  
185 | def coverNearClique(G, E, c) :
186 |     # c.nodes is ordered    
187 |     cnt0 = 0;
188 |     cnt1 = 0;
189 |     for i_idx in range(c.numNodes) :
190 |         i = c.nodes[i_idx];
191 |         for j_idx in range(i_idx+1, c.numNodes) :
192 |             j = c.nodes[j_idx];
193 |             
194 |             if not E.isExcluded(i,j) :
195 |                 # only if (i,j) is not already modelled perfectly
196 |                 
197 |                 if not E.isCovered(i,j) :
198 |                     # edge is not modelled yet
199 |                     if G.hasEdge(i,j) :
200 |                         # yet there is a real edge, so now we undo an error
201 |                         E.delUnmodelledError(i,j);
202 |                     E.coverAndExclude(i,j);
203 | 
204 |                 else :
205 |                     # edge is already modelled
206 |                     if E.isModellingError(i,j) :
207 |                         # but wrongly, we undo that error
208 |                         E.delModellingError(i,j);
209 |                     E.exclude(i,j)
210 |                             
211 |                 if G.hasEdge(i,j) :
212 |                     cnt1 += 1;
213 |                 else:
214 |                     cnt0 += 1;
215 |                 
216 |     return (cnt0,cnt1);
217 | 
218 | 
219 | # Encoded Size of a Chain
220 | def Lchain(ch, M, G, E) :
221 |     # update Error
222 |     coverChain(G,E,ch);
223 |     
224 |     cost = LN(ch.numNodes-1); # we know chain is at least 2 nodes
225 |     cost += LU(G.numNodes,ch.numNodes); # identify the nodes
226 |     cost += log(factorial(ch.numNodes),2) # identify their order
227 |     
228 |     ## same as LU + log(factorial)
229 |     #for nid in range(ch.numNodes) :
230 |     #    cost += log(G.numNodes - nid, 2); # identify the node ids in order
231 |     return cost;
232 | 
233 | def coverChain(G, E, ch) :
234 |     # model chain
235 |     for i_idx in range(ch.numNodes-1) :
236 |         i = ch.nodes[i_idx];
237 |         j = ch.nodes[i_idx+1];
238 |         
239 |         if not E.isExcluded(i,j) :
240 |             # only if (i,j) is not already modelled perfectly
241 |             if not E.isCovered(i, j) :
242 |                 # edge is not modelled yet
243 |                 
244 |                 if G.hasEdge(i, j) :
245 |                     E.delUnmodelledError(i, j);
246 |                 else :
247 |                     E.addModellingError(i, j);
248 |                 E.cover(i,j);
249 | 
250 |             else :
251 |                 # edge is already modelled
252 | 
253 |                 if G.hasEdge(i,j) and E.isModellingError(i,j) :
254 |                     # model is wrong in saying no edge
255 |                     E.delModellingError(i,j);
256 |                 # elif G.hasEdge(i,j) and not E.isModellingError(i,j) :
257 |                 # there is an edge, and we knew that
258 |                 # elif not G.hasEdge(i,j) and E.isModellingError(i,j) :
259 |                 # there is no edge, but we keep saying there is
260 |                 elif not G.hasEdge(i,j) and not E.isModellingError(i,j) :
261 |                     # there is no edge, but now we say there is
262 |                     E.addModellingError(i,j);
263 | 
264 |     if config.optModelZeroes == True :
265 |         # model non-shortcuts
266 |         for i_idx in range(ch.numNodes) :
267 |             i = ch.nodes[i_idx];
268 |             for j_idx in range(i_idx+2, ch.numNodes) : # skip the direct neighbour
269 |                 j = ch.nodes[j_idx];
270 |                 
271 |                 if not E.isExcluded(i,j) :
272 |                     # only if (i,j) is not already modelled perfectly
273 |                     if not E.isCovered(i,j) :
274 |                         # edge not yet modelled
275 |                         if G.hasEdge(i,j) :
276 |                             # oops, there is an edge, but we say there aint
277 |                             E.addModellingError(i,j);
278 |                         #else :
279 |                              # there is no edge, so we're good
280 |                         E.cover(i,j);
281 |                     #else :
282 |                         # edge is modelled
283 |                         #if G.hasEdge(i,j) and E.isModellingError(i,j) :
284 |                             # model incorrect in saying there is no edge - no change
285 |                         #if G.hasEdge(i,j) and not E.isModellingError(i,j) :
286 |                             # model correct in saying there is an edge, no change
287 |                         # ...
288 |     return;
289 | 
290 | 
291 | 
292 | # Encoded Size of a Star
293 | def Lstar(star, M, G, E) :
294 |     # update Error
295 |     coverStar(G, E, star);
296 |     
297 |     cost = LN(star.numSpokes);      # number of spokes (we know there's one hub)
298 |     cost += log(G.numNodes, 2);     # identify the hub-node
299 |     
300 |     #cost += star.numSpokes * log(G.numNodes-1,2);  # identify the spoke-nodes
301 |     cost += LU(G.numNodes-1,star.numSpokes);  # identify the spoke-nodes
302 |     
303 |     return cost;
304 | 
305 | def coverStar(G, E, st) :
306 |     
307 |     i = st.cNode;
308 |     for j in st.sNodes:
309 |         x = min(i,j);
310 |         y = max(i,j);
311 |         if not E.isExcluded(i,j) :
312 |             # only if (i,j) is not already modelled perfectly
313 |             
314 |             if G.hasEdge(x,y) :
315 |                 if E.isCovered(x,y) :
316 |                     if E.isModellingError(x,y) :
317 |                         # previously modelled as 0, we fix the error
318 |                         E.delModellingError(x,y);
319 |                 else :
320 |                     E.delUnmodelledError(x,y);
321 |                     E.cover(x,y);
322 |             else :
323 |                 if E.isCovered(x,y) :
324 |                     if not E.isModellingError(x,y) :
325 |                         E.addModellingError(x,y);
326 |                 else :
327 |                     E.addModellingError(x,y);
328 |                     E.cover(x,y)
329 |                         
330 | 
331 |     if config.optModelZeroes == True :
332 |         # model non-shortcuts
333 |         for i_idx in range(st.numSpokes) :
334 |             i = st.sNodes[i_idx];
335 |             for j_idx in range(i_idx+1, st.numSpokes) :
336 |                 j = st.sNodes[j_idx];
337 |                     
338 |                 if not E.isExcluded(i,j) :
339 |                     # only if (i,j) is not already modelled perfectly
340 |                     
341 |                     if not E.isCovered(i,j) :
342 |                         # edge not yet modelled
343 |                         if G.hasEdge(i,j) :
344 |                             # oops, there is an edge, but we say there aint
345 |                             E.addModellingError(i,j);
346 |                         #else :
347 |                              # there is no edge, so we're good
348 |                         E.cover(i,j);
349 |                     #else :
350 |                         # edge is modelled
351 |                         #if G.hasEdge(i,j) and E.isModellingError(i,j) :
352 |                             # model incorrect in saying there is no edge - no change
353 |                         #if G.hasEdge(i,j) and not E.isModellingError(i,j) :
354 |                             # model correct in saying there is an edge, no change
355 |                         # ...
356 |             
357 |     return;
358 |     
359 | # Encoded Size of a bi-partite core
360 | def LbiPartiteCore(bc, M, G, E) :
361 |     # update Error
362 |     coverBiPartiteCore(G, E, bc);    
363 |     
364 |     cost = LN(bc.numLeftNodes) + LN(bc.numRightNodes);
365 |     cost += LU(G.numNodes, bc.numLeftNodes);
366 |     cost += LU(G.numNodes- bc.numLeftNodes, bc.numRightNodes);
367 |     return cost;
368 |     
369 | def coverBiPartiteCore(G, E, bc) :
370 |     
371 |     # 1. fill in the 1s between the parts
372 |     for i in bc.lNodes :
373 |         for j in bc.rNodes :
374 |             if not E.isExcluded(i,j) :
375 |                 # only if (i,j) is not already modelled perfectly
376 |                 if G.hasEdge(i,j) :
377 |                     # there is an edge
378 |                     if E.isCovered(i,j) :
379 |                         if E.isModellingError(i,j) :
380 |                             # model says 0, we fix to 1
381 |                             E.delModellingError(i,j);
382 |                     else :
383 |                         # model didnt say anything, we fix it
384 |                         E.delUnmodelledError(i,j);
385 |                         E.cover(i,j);
386 |                 else :
387 |                     # there is no edge
388 |                     if E.isCovered(i,j) :
389 |                         # but the cell is modelled
390 |                         if not E.isModellingError(i,j) :
391 |                             E.addModellingError(i,j); # we make a boo-boo
392 |                     else :
393 |                         # the cell is not modelled, yet
394 |                         E.addModellingError(i,j);
395 |                         E.cover(i,j);
396 | 
397 |     # 2. fill in the 0s in left part
398 |     for i_idx in range(len(bc.lNodes)-1) :
399 |         i = bc.lNodes[i_idx];
400 |         for j_idx in range(i_idx+1,len(bc.lNodes)) :
401 |             j = bc.lNodes[j_idx];
402 |             
403 |             if not E.isExcluded(i,j) and not E.isCovered(i,j) :
404 |                 # only if (i,j) is not covered or already modelled perfectly
405 |                     if E.isUnmodelledError(i,j) :
406 |                         # edge exists!
407 |                         E.delUnmodelledError(i,j);  # we now model this cell
408 |                         E.addModellingError(i,j);   # but do so wrongly
409 |                     E.cover(i,j);
410 |                 
411 |     # 3. fill in the 0s in right part
412 |     for i_idx in range(len(bc.rNodes)-1) :
413 |         i = bc.rNodes[i_idx];
414 |         for j_idx in range(i_idx+1,len(bc.rNodes)) :
415 |             j = bc.rNodes[j_idx];
416 |             
417 |             if not E.isExcluded(i,j) and not E.isCovered(i,j) :
418 |                 # only if (i,j) is not covered or already modelled perfectly
419 |                     if E.isUnmodelledError(i,j) :
420 |                         # edge exists!
421 |                         E.delUnmodelledError(i,j);  # we now model this cell
422 |                         E.addModellingError(i,j);   # but do so wrongly
423 |                     E.cover(i,j);
424 |     return;
425 | 
426 | 
427 | # Encoded Size of a near bi-partite core
428 | def LnearBiPartiteCore(nb, M, G, E) :
429 |     # update Error
430 |     (cnt0,cnt1) = coverNearBiPartiteCore(G, E, nb);    
431 |     
432 |     # encode number of nodes in sets A and B
433 |     cost = LN(nb.numLeftNodes) + LN(nb.numRightNodes);
434 |     # encode node ids of sets A and B
435 |     cost += LU(G.numNodes, nb.numLeftNodes);
436 |     cost += LU(G.numNodes- nb.numLeftNodes, nb.numRightNodes);
437 |     
438 |     if cnt0+cnt1 > 0 :
439 |         # encode probability of a 1 between sets A and B
440 |         cost += log(cnt0+cnt1, 2);
441 |         # encode the actual edges between A and B
442 |         cost += LnU(cnt0+cnt1, cnt1);
443 |     return cost;
444 |     
445 | 	  
446 | def coverNearBiPartiteCore(G, E, nb) :
447 |     # first encode the edges between the parts
448 |     cnt0 = 0;
449 |     cnt1 = 0;
450 |     for i_idx in range(nb.numLeftNodes) :
451 |         i = nb.lNodes[i_idx];
452 |         for j_idx in range(nb.numRightNodes) :
453 |             j = nb.rNodes[j_idx];
454 | 
455 |             if not E.isExcluded(i,j) :
456 |                 # only if (i,j) is not already modelled perfectly
457 | 
458 |                 if not E.isCovered(i,j) :
459 |                     # edge is not modelled yet
460 |                     if G.hasEdge(i,j) :
461 |                         # yet there is a real edge, so now we undo an error
462 |                         E.delUnmodelledError(i,j);
463 |                     E.coverAndExclude(i,j);
464 | 
465 |                 else :
466 |                     # edge is already modelled
467 |                     if E.isModellingError(i,j) :
468 |                         # but wrongly, we undo that error
469 |                         E.delModellingError(i,j);
470 |                     E.exclude(i,j)
471 |                             
472 |                 if G.hasEdge(i,j) :
473 |                     cnt1 += 1;
474 |                 else:
475 |                     cnt0 += 1;
476 | 
477 | 
478 |     # 2. fill in the 0s in left part
479 |     for i_idx in range(len(nb.lNodes)-1) :
480 |         i = nb.lNodes[i_idx];
481 |         for j_idx in range(i_idx+1,len(nb.lNodes)) :
482 |             j = nb.lNodes[j_idx];
483 |             
484 |             if not E.isExcluded(i,j) and not E.isCovered(i,j) :
485 |                 # only if (i,j) is not covered or already modelled perfectly
486 |                     if E.isUnmodelledError(i,j) :
487 |                         # edge exists!
488 |                         E.delUnmodelledError(i,j);  # we now model this cell
489 |                         E.addModellingError(i,j);   # but do so wrongly
490 |                     E.cover(i,j);
491 |                 
492 |     # 3. fill in the 0s in right part
493 |     for i_idx in range(len(nb.rNodes)-1) :
494 |         i = nb.rNodes[i_idx];
495 |         for j_idx in range(i_idx+1,len(nb.rNodes)) :
496 |             j = nb.rNodes[j_idx];
497 |             
498 |             if not E.isExcluded(i,j) and not E.isCovered(i,j) :
499 |                 # only if (i,j) is not covered or already modelled perfectly
500 |                     if E.isUnmodelledError(i,j) :
501 |                         # edge exists!
502 |                         E.delUnmodelledError(i,j);  # we now model this cell
503 |                         E.addModellingError(i,j);   # but do so wrongly
504 |                     E.cover(i,j);
505 |             
506 |     return (cnt0,cnt1);
507 | 
508 | 
509 | # Encoded Size of a jellyfish structure
510 | def LjellyFish(jf, M, G, E) :
511 |     # update Error
512 |     coverJellyFish(G, E, jf);
513 |     
514 |     cost = LN(jf.numCores); # number of core nodes
515 |     cost += LU(G.numNodes, jf.numCores); # core node ids
516 | 
517 |     cost += LN(jf.numSpokeSum) + LC(jf.numSpokeSum, jf.numCores); # number of spokes per core node
518 |     cost += LU(G.numNodes - jf.numCores, jf.numSpokeSum); # spoke ids (-no- overlap between sets!)
519 |     return cost;
520 |     
521 | def coverJellyFish(G, E, jf) :
522 |     
523 |     # first link up the nodes in the core
524 |     for i_idx in range(len(jf.cNodes)) :
525 |         i = jf.cNodes[i_idx];
526 |         for j_idx in range(i_idx+1,len(jf.cNodes)) :
527 |             j = jf.cNodes[j_idx];
528 | 
529 |             if not E.isExcluded(i,j) :
530 |                 # only if (i,j) is not already modelled perfectly
531 |                 
532 |                 if G.hasEdge(i,j) :
533 |                     # there is an edge
534 |                     if E.isCovered(i,j) :
535 |                         if E.isModellingError(i,j) :
536 |                             E.delModellingError(i,j); # model said 0, but we say 1
537 |                     else :
538 |                         # edge is there, but not covered, we fix it!
539 |                         E.delUnmodelledError(i,j);
540 |                         E.cover(i,j);
541 |                 else :
542 |                     # there is no edge
543 |                     if E.isCovered(i,j) :
544 |                         if not E.isModellingError(i,j) :
545 |                             E.addModellingError(i,j); # model said 0, we say 1
546 |                     else :
547 |                         E.addModellingError(i,j);
548 |                         E.cover(i,j);
549 | 
550 |     # 2. link up the core nodes up to their respective spokes
551 |     for i_idx in range(len(jf.cNodes)) :
552 |         i = jf.cNodes[i_idx];
553 |         for j_idx in range(len(jf.sNodes[i_idx])) :
554 |             j = jf.sNodes[i_idx][j_idx];
555 |             
556 |             if not E.isExcluded(i,j) :
557 |                 # only if (i,j) is not already modelled perfectly
558 |                 
559 |                 if G.hasEdge(i,j) :
560 |                     # there is an edge
561 |                     if E.isCovered(i,j) :
562 |                         if E.isModellingError(i,j) :
563 |                             E.delModellingError(i,j); # model said 0, we fix to 1
564 |                     else :
565 |                         # edge is there, but not covered, we fix it
566 |                         E.delUnmodelledError(i,j);
567 |                         E.cover(i,j);
568 |                 else :
569 |                     # there is no edge
570 |                     if E.isCovered(i,j) :
571 |                         if not E.isModellingError(i,j) :
572 |                             E.addModellingError(i,j); # model said 0, but we say 1
573 |                     else :
574 |                         E.addModellingError(i,j);
575 |                         E.cover(i,j);
576 | 
577 |     if config.optModelZeroes == True :
578 |         # 3. model that the spokes within a set are not connected    
579 |         # !!!   code can be made more efficient, by incorporating it in previous loop
580 |         for i_idx in range(len(jf.cNodes)) :
581 |             
582 |             for j_idx in range(len(jf.sNodes[i_idx])-1) :
583 |                 j = jf.sNodes[i_idx][j_idx];
584 |                 
585 |                 for k_idx in range(j_idx+1,len(jf.sNodes[i_idx])) :
586 |                     k = jf.sNodes[i_idx][k_idx];
587 |                     
588 |                     if not E.isExcluded(j,k) :
589 |                         # only if (i,j) is not already modelled perfectly
590 |                         
591 |                         #if E.isModelled(j,k) :
592 |                             # we don't change previous modelling, but
593 |                         if not E.isModelled(j,k) :
594 |                             # cell not yet modelled, and should be a 0
595 |                             if G.hasEdge(j,k) :
596 |                                 # but, it has a 1, change it to modelling error
597 |                                 E.delUnmodelledError(j,k);
598 |                                 E.addModellingError(j,k);
599 |                             E.cover(j,k);
600 |     return;
601 |     
602 | 
603 | # Encoded Size of a core periphery
604 | def LcorePeriphery(cp, M, G, E) :
605 |     # update Error
606 |     coverCorePeriphery(G, E, cp);
607 |     
608 |     cost = LN(cp.numCores);     # number of core-nodes
609 |     cost += LN(cp.numSpokes);       # number of spoke-nodes
610 |     cost += cp.numCores * log(G.numNodes, 2);   # identify core-nodes
611 |     cost += cp.numSpokes * log(G.numNodes - cp.numCores, 2);    # identify spoke-nodes
612 |     return cost;
613 |     
614 | # check whether ok
615 | def coverCorePeriphery(G, E, cp) :
616 |     for i in cp.cNodes :
617 |         for j in cp.sNodes :
618 |             if not E.isModelled(i,j) :
619 |                 if G.hasEdge(i,j) :
620 |                     E.delUnmodelledError(i,j);
621 |                 else :
622 |                     E.addModellingError(i,j);
623 |                 E.cover(i,j);
624 |     return;
625 |     
626 | # Encoded Size of a core periphery (a bit smarter)
627 | def LcorePeripheryA(cp, M, G, E) :
628 |     cost = LN(cp.numCoreNodes);     # number of core-nodes
629 |     cost += LN(cp.numSpokes);       # number of spoke-nodes
630 |     cost += LU(G.numNodes, cp.numCoreNodes);    # identify core-nodes
631 |     cost += LU(G.numNodes - cp.numCoreNodes, cp.numSpokes); # identify spoke-nodes
632 |     return cost;
633 |     
634 |     
635 | ### Encoding the Error
636 | 
637 | # here I encode all errors uniformly by a binomial -- hence, not yet the typed advanced stuff yet!
638 | def LErrorNaiveBinom(G, M, E) :
639 |     # possible number of edges in an undirected, non-self-connected graph of N nodes
640 |     posNumEdges = (G.numNodes * G.numNodes - G.numNodes) / 2
641 |     cost = LU(posNumEdges - E.numCellsExcluded, E.numUnmodelledErrors + E.numModellingErrors);
642 |     if config.optVerbosity > 1 : print ' - L_nb(E)', cost;
643 |     return cost;
644 | 
645 | def LErrorNaivePrefix(G, M, E) :
646 |     # possible number of edges in an undirected, non-self-connected graph of N nodes
647 |     posNumEdges = (G.numNodes * G.numNodes - G.numNodes) / 2
648 |     cost = LnU(posNumEdges - E.numCellsExcluded, E.numModellingErrors + E.numUnmodelledErrors);
649 |     if config.optVerbosity > 1 : print ' - L_np(E)', cost;
650 |     return cost;
651 | 
652 | # here I encode all errors uniformly by a binomial -- hence, not yet the typed advanced stuff yet!
653 | def LErrorTypedBinom(G, M, E) :
654 |     # possible number of edges in an undirected, non-self-connected graph of N nodes
655 |     posNumEdges = (G.numNodes * G.numNodes - G.numNodes) / 2
656 |     
657 |     # First encode the modelling errors
658 |     #print 'First encode the modelling errors'
659 |     #print 'E.numCellsCovered, E.numCellsExcluded, E.numModellingErrors;'
660 |     #print E.numCellsCovered, E.numCellsExcluded, E.numModellingErrors;
661 |     costM = LU(E.numCellsCovered - E.numCellsExcluded, E.numModellingErrors);
662 |     if config.optVerbosity > 1 : print ' - L_tb(E+)', costM;
663 | 
664 |     # Second encode the unmodelled errors
665 |     #print 'Second encode the unmodelled errors' (excluded cells are always covered!)
666 |     #print posNumEdges - E.numCellsCovered, E.numUnmodelledErrors;
667 |     costU = LU(posNumEdges - E.numCellsCovered, E.numUnmodelledErrors);
668 |     if config.optVerbosity > 1 : print ' - L_tb(E-)', costU;
669 |     return costM + costU;
670 | 
671 | def LErrorTypedPrefix(G, M, E) :
672 |     # possible number of edges in an undirected, non-self-connected graph of N nodes
673 |     posNumEdges = (G.numNodes * G.numNodes - G.numNodes) / 2
674 |     costM = LnU(E.numCellsCovered - E.numCellsExcluded, E.numModellingErrors);
675 |     if config.optVerbosity > 1 : print ' - L_tp(E+)', costM;
676 |     costU = LnU(posNumEdges - E.numCellsCovered, E.numUnmodelledErrors);
677 |     if config.optVerbosity > 1 : print ' - L_tp(E-)', costU;
678 |     return costM + costU;
679 | 


--------------------------------------------------------------------------------