├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── gmatch4py
    ├── __init__.py
    ├── bag_of_cliques.pyx
    ├── base.pxd
    ├── base.pyx
    ├── bon.pyx
    ├── embedding
    │   ├── __init__.py
    │   ├── deepwalk.pyx
    │   ├── graph.pyx
    │   ├── graph2vec.pyx
    │   ├── node2vec.pyx
    │   ├── skipgram.pyx
    │   └── walks.pyx
    ├── ged
    │   ├── __init__.py
    │   ├── abstract_graph_edit_dist.pxd
    │   ├── abstract_graph_edit_dist.pyx
    │   ├── bipartite_graph_matching_2.pyx
    │   ├── graph_edit_dist.pxd
    │   ├── graph_edit_dist.pyx
    │   ├── greedy_edit_distance.pyx
    │   └── hausdorff_edit_distance.pyx
    ├── graph.pxd
    ├── graph.pyx
    ├── helpers
    │   ├── __init__.py
    │   ├── general.pyx
    │   └── reader.pyx
    ├── jaccard.pyx
    ├── kernels
    │   ├── __init__.py
    │   ├── adjacency.pyx
    │   ├── random_walk_kernel.pyx
    │   ├── shortest_path_kernel.pyx
    │   └── weisfeiler_lehman.pyx
    ├── mcs.pyx
    ├── vertex_edge_overlap.pyx
    └── vertex_ranking.pyx
├── logo2.png
├── requirements.txt
├── setup.py
└── test
    ├── gmatch4py_performance_test.py
    └── test.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | *.cpp
106 | *.c
107 | .DS_Store
108 | .idea
109 | .vscode


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 | 
 5 | notifications:
 6 |   email: false
 7 | 
 8 | install:
 9 |     - pip install cython numpy networkx scipy scikit-learn pandas gensim joblib gensim psutil --upgrade
10 |     - pip install .
11 | 
12 | script:
13 |   - echo "1"
14 | 
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Jacques Fize
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![](logo2.png)
  2 | 
  3 | 
  4 | [![Build Status](https://travis-ci.com/Jacobe2169/GMatch4py.svg?branch=master)](https://travis-ci.com/Jacobe2169/GMatch4py)
  5 | # GMatch4py a graph matching library for Python 
  6 | 
  7 | 
  8 | GMatch4py is a library dedicated to graph matching. Graph structure are stored in NetworkX graph objects.
  9 | GMatch4py algorithms were implemented with Cython to enhance performance.
 10 | 
 11 | ## Requirements
 12 | 
 13 |  * Python 3
 14 |  * Numpy and Cython installed (if not : `(sudo) pip(3) install numpy cython`)
 15 |  
 16 | ## Installation
 17 | 
 18 | To install `GMatch4py`, run the following commands:
 19 | 
 20 | ```bash
 21 | git clone https://github.com/Jacobe2169/GMatch4py.git
 22 | cd GMatch4py
 23 | (sudo) pip(3) install .
 24 | ```
 25 | 
 26 | ## Get Started
 27 | ### Graph input format
 28 | 
 29 | In `GMatch4py`, algorithms manipulate `networkx.Graph`, a complete graph model that 
 30 | comes with a large spectrum of parser to load your graph from various inputs : `*.graphml,*.gexf,..` (check [here](https://networkx.github.io/documentation/stable/reference/readwrite/index.html) to see all the format accepted)
 31 | 
 32 | ### Use GMatch4py
 33 | If you want to use algorithms like *graph edit distances*, here is an example:
 34 | 
 35 | ```python
 36 | # Gmatch4py use networkx graph 
 37 | import networkx as nx 
 38 | # import the GED using the munkres algorithm
 39 | import gmatch4py as gm
 40 | ```
 41 | 
 42 | In this example, we use generated graphs using `networkx` helpers:
 43 | ```python
 44 | g1=nx.complete_bipartite_graph(5,4) 
 45 | g2=nx.complete_bipartite_graph(6,4)
 46 | ```
 47 | 
 48 | All graph matching algorithms in `Gmatch4py` work this way:
 49 |  * Each algorithm is associated with an object, each object having its specific parameters. In this case, the parameters are the edit costs (delete a vertex, add a vertex, ...)
 50 |  * Each object is associated with a `compare()` function with two parameters. First parameter is **a list of the graphs** you want to **compare**, i.e. measure the distance/similarity (depends on the algorithm). Then, you can specify a sample of graphs to be compared to all the other graphs. To this end, the second parameter should be **a list containing the indices** of these graphs (based on the first parameter list). If you rather compute the distance/similarity **between all graphs**, just use the `None` value.
 51 | 
 52 | ```python
 53 | ged=gm.GraphEditDistance(1,1,1,1) # all edit costs are equal to 1
 54 | result=ged.compare([g1,g2],None) 
 55 | print(result)
 56 | ```
 57 | 
 58 | The output is a similarity/distance matrix :
 59 | ```python
 60 | array([[0., 14.],
 61 |        [10., 0.]])
 62 | ```
 63 | This output result is "raw", if you wish to have normalized results in terms of distance (or similarity) you can use :
 64 | 
 65 | ```python
 66 | ged.similarity(result)
 67 | # or 
 68 | ged.distance(result)
 69 | ```
 70 | 
 71 | ## Exploit nodes and edges attributes
 72 | 
 73 | In this latest version, we add the possibility to exploit graph attributes ! To do so, the `base.Base` is extended with the `set_attr_graph_used(node_attr,edge_attr)` method.
 74 | 
 75 | ```python
 76 | import networkx as nx 
 77 | import gmatch4py as gm
 78 | ged = gm.GraphEditDistance(1,1,1,1)
 79 | ged.set_attr_graph_used("theme","color") # Edge colors and node themes attributes will be used.
 80 | ```
 81 | 
 82 | ## List of algorithms
 83 | 
 84 |  * Graph Embedding
 85 |     * Graph2Vec [1]
 86 |  * Node Embedding
 87 |     * DeepWalk [7]
 88 |     * Node2vec [8]
 89 |  * Graph kernels
 90 |     * Random Walk Kernel (*debug needed*) [3]
 91 |         * Geometrical 
 92 |         * K-Step 
 93 |     * Shortest Path Kernel [3]
 94 |     * Weisfeiler-Lehman Kernel [4]
 95 |         * Subtree Kernel 
 96 |  * Graph Edit Distance [5]
 97 |     * Approximated Graph Edit Distance 
 98 |     * Hausdorff Graph Edit Distance 
 99 |     * Bipartite Graph Edit Distance 
100 |     * Greedy Edit Distance
101 |  * Vertex Ranking [2]
102 |  * Vertex Edge Overlap [2]
103 |  * Bag of Nodes (a bag of words model using nodes as vocabulary)
104 |  * Bag of Cliques (a bag of words model using cliques as vocabulary)
105 |  * MCS [6]
106 |     
107 | 
108 | ## Publications associated
109 | 
110 |   * [1] Narayanan, Annamalai and Chandramohan, Mahinthan and Venkatesan, Rajasekar and Chen, Lihui and Liu, Yang. Graph2vec: Learning distributed representations of graphs. MLG 2017, 13th International Workshop on Mining and Learning with Graphs (MLGWorkshop 2017).
111 |   * [2] Papadimitriou, P., Dasdan, A., & Garcia-Molina, H. (2010). Web graph similarity for anomaly detection. Journal of Internet Services and Applications, 1(1), 19-30.
112 |   * [3] Vishwanathan, S. V. N., Schraudolph, N. N., Kondor, R., & Borgwardt, K. M. (2010). Graph kernels. Journal of Machine Learning Research, 11(Apr), 1201-1242.
113 |   * [4] Shervashidze, N., Schweitzer, P., Leeuwen, E. J. V., Mehlhorn, K., & Borgwardt, K. M. (2011). Weisfeiler-lehman graph kernels. Journal of Machine Learning Research, 12(Sep), 2539-2561.
114 |   * [5] Fischer, A., Riesen, K., & Bunke, H. (2017). Improved quadratic time approximation of graph edit distance by combining Hausdorff matching and greedy assignment. Pattern Recognition Letters, 87, 55-62.
115 |   * [6] A graph distance metric based on the maximal common subgraph, H. Bunke and K. Shearer, Pattern Recognition Letters, 1998  
116 |   * [7] Perozzi, B., Al-Rfou, R., & Skiena, S. (2014, August). Deepwalk: Online learning of social representations. In Proceedings of the 20th ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 701-710). ACM.
117 |   * [8] node2vec: Scalable Feature Learning for Networks. Aditya Grover and Jure Leskovec. Knowledge Discovery and Data Mining, 2016.
118 | 
119 | ## Author(s)
120 | 
121 | Jacques Fize, *jacques[dot]fize[at]cirad[dot]fr*
122 | 
123 | Some algorithms from other projects were integrated to Gmatch4py. **Be assured that
124 | each code is associated with a reference to the original.**
125 | 
126 | 
127 | ## CHANGELOG
128 | 
129 | ### 18.06.2022
130 |  * Debug the `skipgram` import
131 |  * Gmatch4py should work with new gensim version
132 | 
133 | 
134 | ### 7.05.2019
135 | 
136 |  * Debug (problems with float edge weight)
137 |  * Add the `AbstractEditDistance.edit_path(G,H)` method that return the edit path, the cost matrix and the selected cost index in the cost matrix
138 |  * Add a tqdm progress bar for the `gmatch4py.helpers.reader.import_dir()` function
139 | 
140 | ### 12.03.2019
141 | 
142 |  * Add Node2vec
143 | 
144 | ### 05.03.2019
145 | 
146 |  * Add Graph Embedding algorithms
147 |  * Remove depreciated methods and classes
148 |  * Add logo
149 |  * Update documentation
150 | 
151 | 
152 | ### 25.02.2019
153 |  * Add New Graph Class. Features : Cython Extensions, precomputed values (degrees, neighbor info), hash representation of edges and nodes for a faster comparison
154 |  * Some algorithms are parallelized such as graph edit distances or Jaccard
155 | 
156 | ## TODO List
157 | 
158 |   * Debug algorithms --> Random Walk Kernel, Deltacon
159 |   * Optimize algorithms --> Vertex Ranking
160 | 
161 | 


--------------------------------------------------------------------------------
/gmatch4py/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding = utf-8
 2 | 
 3 | # Graph Edit Distance algorithms import
 4 | from .ged.graph_edit_dist import *
 5 | from .ged.greedy_edit_distance import *
 6 | from .ged.bipartite_graph_matching_2 import *
 7 | from .ged.hausdorff_edit_distance import *
 8 | 
 9 | # Kernels algorithms import
10 | from .kernels.weisfeiler_lehman import *
11 | from .kernels.shortest_path_kernel import *
12 | 
13 | # Graph Embedding import
14 | from .embedding.graph2vec import *
15 | from .embedding.deepwalk import *
16 | from .embedding.node2vec import *
17 | # Helpers import
18 | from .helpers.reader import *
19 | from .helpers.general import *
20 | 
21 | # Basic algorithms import
22 | from .bag_of_cliques import *
23 | from .mcs import *
24 | from .vertex_edge_overlap import *
25 | from .vertex_ranking import *
26 | from .jaccard import *
27 | from .bon import *
28 | 


--------------------------------------------------------------------------------
/gmatch4py/bag_of_cliques.pyx:
--------------------------------------------------------------------------------
  1 | # coding = utf-8
  2 | 
  3 | import copy
  4 | from typing import Sequence
  5 | 
  6 | import networkx as nx
  7 | import numpy as np
  8 | cimport numpy as np
  9 | from scipy.sparse import csr_matrix,lil_matrix
 10 | import sys
 11 | 
 12 | from .base cimport Base
 13 | 
 14 | 
 15 | cdef class BagOfCliques(Base):
 16 |     """
 17 |     The Bag of Cliques is representation of a graph corpus using the well-known *bag of words* model. Here, instead of
 18 |     word, we use unique cliques found in the graphs as a vocabulary. A clique is a highly connected graph where all the vertices are connected by an edge.
 19 | 
 20 |     The resulting representation is then use to compute similarity value between graphs. For this purpose, we use the cosine
 21 |     similarity.
 22 |     """
 23 | 
 24 |     def __init__(self):
 25 |         """
 26 |         Constructor of Bag Of Cliques.
 27 |         """
 28 |         Base.__init__(self,0,True)
 29 | 
 30 | 
 31 |     cpdef np.ndarray compare(self,list listgs, list selected):
 32 |         b=BagOfCliques()
 33 |         bog=b.get_bag_of_cliques(listgs).astype(np.float32)
 34 |         cdef int n=bog.shape[0]
 35 |         cdef np.ndarray scores = np.zeros((n,n))
 36 |         cdef int i
 37 |         for i in range(len(scores)):
 38 |             if selected:
 39 |                 if not i in selected:
 40 |                     continue
 41 |             bog_i=bog[i]
 42 |             for j in range(i,len(scores)):
 43 |                 bog_j=bog[j]
 44 |                 scores[i,j]=(np.dot(bog_i,bog_j.T))/(np.sqrt(np.sum(bog_i**2))*np.sqrt(np.sum(bog_j**2))) # Can be computed in one line
 45 |                 scores[j,i]=scores[i,j]
 46 |         return scores
 47 | 
 48 |     def get_unique_cliques(self, graphs):
 49 |         """
 50 |         Return a cliques found in a set of graphs
 51 |         Parameters
 52 |         ----------
 53 |         graphs: networkx.Graph array
 54 |             list of graphs
 55 | 
 56 |         Returns
 57 |         -------
 58 |         list
 59 |             Cliques set
 60 |         """
 61 |         t = {}
 62 |         c_ = 0
 63 |         cdef list clique_vocab = []
 64 |         cdef list cli_temp
 65 |         cdef list cliques
 66 |         cdef int len_graphs=len(graphs)
 67 |         cdef int km= -1
 68 |         for g in graphs:
 69 |             km+=1
 70 |             if not g:
 71 |                 continue
 72 |             cliques = list(nx.find_cliques(nx.Graph(g)))
 73 |             for clique in cliques:
 74 |                 cli_temp = copy.deepcopy(clique)
 75 |                 new_clique = False
 76 |                 for i in range(len(clique)):
 77 |                     flag = False
 78 |                     v = None  # vertex deleted
 79 |                     for vertex in cli_temp:
 80 |                         if vertex in t:
 81 |                             v = vertex
 82 |                             flag = True
 83 | 
 84 |                     if not flag in t:
 85 |                         v = cli_temp[0]
 86 |                         t[v] = {}
 87 |                         new_clique = True
 88 |                     t = t[v]
 89 |                     cli_temp.remove(v)
 90 | 
 91 |                 if new_clique:
 92 |                     c_ += 1
 93 |                     clique_vocab.append(clique)
 94 |         return clique_vocab
 95 | 
 96 | 
 97 |     def clique2str(self,cliques):
 98 |         """
 99 |         Return a "hash" string of a clique
100 | 
101 |         Parameters
102 |         ----------
103 |         cliques: array
104 | 
105 |         Returns
106 |         -------
107 |         str
108 |             hash of a clique
109 |         """
110 |         try:
111 |             return "".join(sorted(cliques))
112 |         except:
113 |             return "".join(sorted(list(map(str,cliques))))
114 | 
115 |     def transform_clique_vocab(self,clique_vocab):
116 |         """
117 |         Transform cliques found in `get_unique_cliques()` in a proper format to build the "Bag of Cliques"
118 | 
119 |         Parameters
120 |         ----------
121 |         clique_vocab : array
122 |             contains cliques
123 |         Returns
124 |         -------
125 |         dict
126 |             new clique vocab format
127 |         """
128 |         cdef dict new_vocab={}
129 |         cdef int len_voc=len(clique_vocab)
130 |         for c in range(len_voc):
131 |             #print(c)
132 |             new_vocab[self.clique2str(clique_vocab[c])]=c
133 |         return new_vocab
134 | 
135 |     def get_bag_of_cliques(self, graphs):
136 |         """
137 |         Return a the Bag of Cliques representation from a graph set.
138 | 
139 |         Parameters
140 |         ----------
141 |         graphs : networkx.Graph array
142 |             list of graphs
143 | 
144 |         Returns
145 |         -------
146 |         np.ndarray
147 |             bag of cliques
148 |         """
149 |         cdef list clique_vocab=self.get_unique_cliques(graphs)
150 |         cdef dict map_str_cliques=self.transform_clique_vocab(clique_vocab)
151 |         cdef int l_v=len(clique_vocab)
152 |         boc = np.zeros((len(graphs), l_v))
153 |         cdef np.ndarray vector
154 |         cdef list cliques
155 |         cdef str hash
156 | 
157 |         for g in range(len(graphs)):
158 |             #sys.stdout.write("\r{0}/{1}".format(g,len(graphs)))
159 |             gr = graphs[g]
160 |             vector = np.zeros(l_v)
161 |             cliques = list(nx.find_cliques(nx.Graph(gr)))
162 |             for clique in cliques:
163 |                 hash=self.clique2str(clique)
164 |                 if hash in map_str_cliques:
165 |                     vector[map_str_cliques[hash]] = 1
166 |             boc[g] = vector
167 |         return boc
168 | 


--------------------------------------------------------------------------------
/gmatch4py/base.pxd:
--------------------------------------------------------------------------------
 1 | cimport numpy as np
 2 | 
 3 | cdef class Base:
 4 |     ## Attribute(s)
 5 |     cdef int type_alg
 6 |     cdef bint normalized
 7 |     cdef int cpu_count
 8 |     cdef str node_attr_key
 9 |     cdef str edge_attr_key
10 |     ## Methods
11 |     cpdef np.ndarray compare(self,list graph_list, list selected)
12 |     cpdef np.ndarray compare_old(self,list listgs, list selected)
13 |     cpdef np.ndarray distance(self, np.ndarray matrix)
14 |     cpdef np.ndarray similarity(self, np.ndarray matrix)
15 |     cpdef bint isAccepted(self,G,index,selected)
16 |     cpdef np.ndarray get_selected_array(self,selected,size_corpus)
17 | 
18 |     cpdef set_attr_graph_used(self, str node_attr_key, str edge_attr_key)
19 | 
20 | 


--------------------------------------------------------------------------------
/gmatch4py/base.pyx:
--------------------------------------------------------------------------------
  1 | # coding = utf-8
  2 | 
  3 | import numpy as np
  4 | cimport numpy as np
  5 | import networkx as nx
  6 | cimport cython
  7 | import multiprocessing
  8 | 
  9 | 
 10 | 
 11 | cpdef np.ndarray minmax_scale(np.ndarray matrix):
 12 |     """
 13 |     Optimize so it can works with Cython
 14 |     :param matrix: 
 15 |     :return: 
 16 |     """
 17 |     cdef double min_,max_
 18 |     cdef np.ndarray x
 19 |     x=np.ma.masked_invalid(matrix)
 20 |     max_=np.max(x)
 21 |     return x/(max_)
 22 | 
 23 | 
 24 | cdef class Base:
 25 |     """
 26 |     This class define the common methods to all Graph Matching algorithm.
 27 | 
 28 |     Attributes
 29 |     ----------
 30 |     type_alg : int
 31 |         Indicate the type of measure returned by the algorithm :
 32 | 
 33 |          * 0 : similarity
 34 |          * 1 : distance
 35 |     normalized : bool
 36 |         Indicate if the algorithm return normalized results (between 0 and 1)
 37 | 
 38 |     """
 39 |     def __cinit__(self):
 40 |         self.type_alg=0
 41 |         self.normalized=False
 42 | 
 43 |     def __init__(self,type_alg,normalized,node_attr_key="",edge_attr_key=""):
 44 |         """
 45 |         Constructor of Base
 46 | 
 47 |         Parameters
 48 |         ----------
 49 |         type_alg : int
 50 |             Indicate the type of measure returned by the algorithm :
 51 | 
 52 |              * **0** : similarity
 53 |              * **1** : distance
 54 |         normalized : bool
 55 |             Indicate if the algorithm return normalized results (between 0 and 1)
 56 |         """
 57 |         if type_alg <0:
 58 |             self.type_alg=0
 59 |         elif type_alg >1 :
 60 |             self.type_alg=1
 61 |         else:
 62 |             self.type_alg=type_alg
 63 |         self.normalized=normalized
 64 |         self.cpu_count=multiprocessing.cpu_count()
 65 |         self.node_attr_key=node_attr_key
 66 |         self.edge_attr_key=edge_attr_key
 67 | 
 68 |     cpdef set_attr_graph_used(self, str node_attr_key, str edge_attr_key):
 69 |         """
 70 |         Set graph attribute used by the algorithm to compare graphs.
 71 |         Parameters
 72 |         ----------
 73 |         node_attr_key : str
 74 |             key of the node attribute
 75 |         edge_attr_key: str
 76 |             key of the edge attribute
 77 | 
 78 |         """
 79 |         self.node_attr_key=node_attr_key
 80 |         self.edge_attr_key=edge_attr_key
 81 |     
 82 |     cpdef np.ndarray get_selected_array(self,selected,size_corpus):
 83 |         """
 84 |         Return an array which define which graph will be compared in the algorithms.
 85 |         Parameters
 86 |         ----------
 87 |         selected : list
 88 |             indices of graphs you wish to compare
 89 |         size_corpus : 
 90 |             size of your dataset
 91 | 
 92 |         Returns
 93 |         -------
 94 |         np.ndarray
 95 |             selected vector (1 -> selected, 0 -> not selected)
 96 |         """
 97 |         cdef double[:] selected_test = np.zeros(size_corpus)
 98 |         if not selected == None:
 99 |             for ix in range(len(selected)):
100 |                 selected_test[selected[ix]]=1
101 |             return np.array(selected_test)
102 |         else:
103 |             return np.array(selected_test)+1
104 |         
105 | 
106 |     cpdef np.ndarray compare_old(self,list listgs, list selected):
107 |         """
108 |         Soon will be depreciated ! To store the old version of an algorithm.
109 |         Parameters
110 |         ----------
111 |         listgs : list
112 |             list of graphs
113 |         selected
114 |             selected graphs
115 | 
116 |         Returns
117 |         -------
118 |         np.ndarray
119 |             distance/similarity matrix
120 |         """
121 |         pass
122 | 
123 |     @cython.boundscheck(False) 
124 |     cpdef np.ndarray compare(self,list graph_list, list selected):
125 |         """
126 |         Return the similarity/distance matrix using the current algorithm.
127 |         
128 |         >>>Base.compare([nx.Graph(),nx.Graph()],None)
129 |         >>>Base.compare([nx.Graph(),nx.Graph()],[0,1])
130 |         
131 |         Parameters
132 |         ----------
133 |         graph_list : networkx.Graph array
134 |             Contains the graphs to compare
135 |         selected : int array
136 |             Sometimes, you only wants to compute similarity of some graphs to every graphs. If so, indicate their indices in
137 |             `graph_list`, else, put the None value. 
138 |             the None value
139 |         Returns
140 |         -------
141 |         np.ndarray
142 |             distance/similarity matrix
143 |             
144 |         """
145 |         pass
146 | 
147 |     cpdef np.ndarray distance(self, np.ndarray matrix):
148 |         """
149 |         Return a normalized distance matrix
150 |         Parameters
151 |         ----------
152 |         matrix : np.ndarray
153 |             Similarity/distance matrix you wish to transform
154 | 
155 |         Returns
156 |         -------
157 |         np.ndarray
158 |             distance matrix
159 |         """
160 |         if self.type_alg == 1:
161 |             if not self.normalized:
162 |                 matrix=np.ma.getdata(minmax_scale(matrix))
163 |             return matrix
164 |         else:
165 |             if not self.normalized:
166 |                 matrix=np.ma.getdata(minmax_scale(matrix))
167 |             return 1-matrix
168 | 
169 |     cpdef np.ndarray similarity(self, np.ndarray matrix):
170 |         """
171 |         Return a normalized similarity matrix
172 |         Parameters
173 |         ----------
174 |         matrix : np.ndarray
175 |             Similarity/distance matrix you wish to transform
176 | 
177 |         Returns
178 |         -------
179 |         np.array
180 |             similarity matrix
181 |         """
182 |         if self.type_alg == 0:
183 |             return matrix
184 |         else:
185 |             if not self.normalized:
186 |                 matrix=np.ma.getdata(minmax_scale(matrix))
187 |             return 1-matrix
188 | 
189 | 
190 |     cpdef bint isAccepted(self,G,index,selected):
191 |         """
192 |         Indicate if the graph will be compared to the other. A graph is "accepted" if :
193 |             * G exists(!= None) and not empty (|vertices(G)| >0)
194 |             * If selected graph to compare were indicated, check if G exists in selected
195 |         
196 |         Parameters
197 |         ----------
198 |         G : networkx.Graph
199 |             Graph
200 |         index : int
201 |             index in the graph list parameter in `Base.compare()`
202 |         selected : int array
203 |             `selected` parameter value in `Base.compare()`
204 | 
205 |         Returns
206 |         -------
207 |         bool :
208 |             if is accepted
209 |         """
210 |         f=True
211 |         if not G:
212 |             f=False
213 |         elif len(G)== 0:
214 |             f=False
215 |         if selected:
216 |             if not index in selected:
217 |                 f=False
218 |         return f
219 | 


--------------------------------------------------------------------------------
/gmatch4py/bon.pyx:
--------------------------------------------------------------------------------
 1 | # coding = utf-8
 2 | 
 3 | import networkx as nx
 4 | import numpy as np
 5 | cimport numpy as np
 6 | from sklearn.metrics.pairwise import cosine_similarity
 7 | from .base cimport Base
 8 | 
 9 | cdef class BagOfNodes(Base):
10 |     """
11 |     We could call this algorithm Bag of nodes
12 |     """
13 |     def __init__(self):
14 |         Base.__init__(self,0,True)
15 | 
16 |     cpdef np.ndarray compare(self,list graph_list, list selected):
17 |         nodes = list()
18 |         for g in graph_list:
19 |             nodes.extend(list(g.nodes()))
20 | 
21 |         vocabulary = list(set(nodes))
22 |         hash_voc = {}
23 |         i = 0
24 |         for se in vocabulary:
25 |             hash_voc[se] = i
26 |             i += 1
27 |         n, m = len(graph_list), len(hash_voc)
28 |         bow_matrix = np.zeros((n, m))
29 |         i = 0
30 |         for g in range(len(graph_list)):
31 |             graph = graph_list[g]
32 |             nodes = list(graph.nodes())
33 |             for nod in nodes:
34 |                 j = hash_voc[nod]
35 |                 bow_matrix[i, j] = 1
36 |             i += 1
37 | 
38 |         sim_matrix = cosine_similarity(bow_matrix)
39 |         np.fill_diagonal(sim_matrix, 1)
40 |         return sim_matrix
41 | 


--------------------------------------------------------------------------------
/gmatch4py/embedding/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacquesfize/GMatch4py/4fc0a822514c65c0d8b12d090b5b89c0af50ef2a/gmatch4py/embedding/__init__.py


--------------------------------------------------------------------------------
/gmatch4py/embedding/deepwalk.pyx:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import os
  5 | import sys
  6 | import random
  7 | 
  8 | from io import open
  9 | from argparse import ArgumentParser, FileType, ArgumentDefaultsHelpFormatter
 10 | from collections import Counter
 11 | from concurrent.futures import ProcessPoolExecutor
 12 | import logging
 13 | from multiprocessing import cpu_count
 14 | 
 15 | import networkx as nx
 16 | import numpy as np
 17 | cimport numpy as np
 18 | from six import text_type as unicode
 19 | from six import iteritems
 20 | from six.moves import range
 21 | 
 22 | from gensim.models import Word2Vec
 23 | from sklearn.metrics.pairwise import cosine_similarity
 24 | from joblib import Parallel, delayed
 25 | import psutil
 26 | 
 27 | cimport cython
 28 | from ..base cimport Base
 29 | import gmatch4py.embedding.graph as graph2
 30 | import gmatch4py.embedding.walks as serialized_walks
 31 | from .skipgram import Skipgram
 32 | 
 33 | 
 34 | p = psutil.Process(os.getpid())
 35 | try:
 36 |     p.set_cpu_affinity(list(range(cpu_count())))
 37 | except AttributeError:
 38 |     try:
 39 |         p.cpu_affinity(list(range(cpu_count())))
 40 |     except AttributeError:
 41 |         pass
 42 | 
 43 | 
 44 | def process(gr, number_walks = 10, walk_length = 40, window_size = 5, vertex_freq_degree = False, workers = 1, representation_size = 64, max_memory_data_size = 1000000000, seed = 0):
 45 |     """
 46 |     Return a DeepWalk embedding for a graph
 47 |     
 48 |     Parameters
 49 |     ----------
 50 |     gr : nx.Graph
 51 |         graph
 52 |     number_walks : int, optional
 53 |         Number of walk (the default is 10)
 54 |     walk_length : int, optional
 55 |         Length of the random walk started at each node (the default is 40)
 56 |     window_size : int, optional
 57 |         Window size of skipgram model. (the default is 5)
 58 |     vertex_freq_degree : bool, optional
 59 |         Use vertex degree to estimate the frequency of nodes (the default is False)
 60 |     workers : int, optional
 61 |         Number of parallel processes (the default is 1)
 62 |     representation_size : int, optional
 63 |         Number of latent dimensions to learn for each node (the default is 64)
 64 |     max_memory_data_size : int, optional
 65 |         'Size to start dumping walks to disk, instead of keeping them in memory. (the default is 1000000000)
 66 |     seed : int, optional
 67 |         Seed for random walk generator (the default is 0)
 68 |     
 69 |     Returns
 70 |     -------
 71 |     np.array
 72 |         DeepWalk embedding
 73 |     """
 74 |     
 75 |     if len(gr.edges())<1:
 76 |         return np.zeros((1,representation_size))
 77 |     G = graph2.from_networkx(gr.copy(), undirected=gr.is_directed())
 78 |     num_walks = len(G.nodes()) * number_walks
 79 | 
 80 |     data_size = num_walks * walk_length
 81 | 
 82 |     #print("Data size (walks*length): {}".format(data_size))
 83 | 
 84 |     if data_size < max_memory_data_size:
 85 |         #print("Walking...")
 86 |         walks = graph2.build_deepwalk_corpus(G, num_paths=number_walks,
 87 |                                             path_length=walk_length, alpha=0, rand=random.Random(seed))
 88 |         #print("Training...")
 89 |         model = Word2Vec(walks, vector_size=representation_size,
 90 |                             window=window_size, min_count=0, sg=1, hs=1, workers=workers)
 91 |     else:
 92 |         #print("Data size {} is larger than limit (max-memory-data-size: {}).  Dumping walks to disk.".format(
 93 |         #    data_size, max_memory_data_size))
 94 |         #print("Walking...")
 95 | 
 96 |         walks_filebase = "temp.walks"
 97 |         walk_files = serialized_walks.write_walks_to_disk(G, walks_filebase, num_paths=number_walks,
 98 |                                                             path_length=walk_length, alpha=0, rand=random.Random(seed),
 99 |                                                             num_workers=workers)
100 | 
101 |         #print("Counting vertex frequency...")
102 |         if not vertex_freq_degree:
103 |             vertex_counts = serialized_walks.count_textfiles(
104 |                 walk_files, workers)
105 |         else:
106 |             # use degree distribution for frequency in tree
107 |             vertex_counts = G.degree(nodes=G.iterkeys())
108 | 
109 |         #print("Training...")
110 |         walks_corpus = serialized_walks.WalksCorpus(walk_files)
111 |         model = Skipgram(sentences=walks_corpus, vocabulary_counts=vertex_counts,
112 |                             size=representation_size,
113 |                             window=window_size, min_count=0, trim_rule=None, workers=workers)
114 | 
115 |     return model.wv.vectors
116 | 
117 | 
118 | cdef class DeepWalk(Base):
119 |     """
120 |     Based on :
121 |     @inproceedings{Perozzi:2014:DOL:2623330.2623732,
122 |         author = {Perozzi, Bryan and Al-Rfou, Rami and Skiena, Steven},
123 |         title = {DeepWalk: Online Learning of Social Representations},
124 |         booktitle = {Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
125 |         series = {KDD '14},
126 |         year = {2014},
127 |         isbn = {978-1-4503-2956-9},
128 |         location = {New York, New York, USA},
129 |         pages = {701--710},
130 |         numpages = {10},
131 |         url = {http://doi.acm.org/10.1145/2623330.2623732},
132 |         doi = {10.1145/2623330.2623732},
133 |         acmid = {2623732},
134 |         publisher = {ACM},
135 |         address = {New York, NY, USA},
136 |         keywords = {deep learning, latent representations, learning with partial labels, network classification, online learning, social networks},
137 |     }
138 | 
139 |     Original Code : https://github.com/phanein/deepwalk
140 | 
141 |     Modified by : Jacques Fize
142 |     """
143 | 
144 |     def __init__(self):
145 |         Base.__init__(self,0,False)
146 | 
147 |     def extract_embedding(self, listgs):
148 |         """
149 |         Extract DeepWalk embedding of each graph in `listgs`
150 |         
151 |         Parameters
152 |         ----------
153 |         listgs : list
154 |             list of graphs
155 |         
156 |         Returns
157 |         -------
158 |         list
159 |             list of embeddings
160 |         """
161 |         
162 |         from tqdm import tqdm
163 |         models =  Parallel(n_jobs = cpu_count())(delayed(process)(nx.Graph(g)) for g in tqdm(listgs,desc="Extracting Embeddings..."))
164 |         return models
165 | 
166 |     @cython.boundscheck(False)
167 |     cpdef np.ndarray compare(self,list listgs, list selected):
168 |         # Selected is ignored
169 |         models = self.extract_embedding(listgs)
170 |         vector_matrix = np.array([mod.mean(axis=0) for mod in models])   # Average nodes representations
171 |         cs = cosine_similarity(vector_matrix)
172 |         return cs
173 | 
174 | 


--------------------------------------------------------------------------------
/gmatch4py/embedding/graph.pyx:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """Graph utilities."""
  5 | 
  6 | import logging
  7 | import sys
  8 | from io import open
  9 | from os import path
 10 | from time import time
 11 | from glob import glob
 12 | from six.moves import range, zip, zip_longest
 13 | from six import iterkeys
 14 | 
 15 | try:
 16 |     from collections.abc import Iterable
 17 | except ImportError:
 18 |     from collections import Iterable
 19 | from collections import defaultdict
 20 | 
 21 | import random
 22 | from random import shuffle
 23 | from itertools import product,permutations
 24 | from scipy.io import loadmat
 25 | from scipy.sparse import issparse
 26 | 
 27 | logger = logging.getLogger("deepwalk")
 28 | 
 29 | 
 30 | __author__ = "Bryan Perozzi"
 31 | __email__ = "bperozzi@cs.stonybrook.edu"
 32 | 
 33 | LOGFORMAT = "%(asctime).19s %(levelname)s %(filename)s: %(lineno)s %(message)s"
 34 | 
 35 | class Graph(defaultdict):
 36 |   """Efficient basic implementation of nx `Graph' â€“ Undirected graphs with self loops"""  
 37 |   def __init__(self):
 38 |     super(Graph, self).__init__(list)
 39 | 
 40 |   def nodes(self):
 41 |     return self.keys()
 42 | 
 43 |   def adjacency_iter(self):
 44 |     return self.iteritems()
 45 | 
 46 |   def subgraph(self, nodes={}):
 47 |     subgraph = Graph()
 48 |     
 49 |     for n in nodes:
 50 |       if n in self:
 51 |         subgraph[n] = [x for x in self[n] if x in nodes]
 52 |         
 53 |     return subgraph
 54 | 
 55 |   def make_undirected(self):
 56 |   
 57 |     t0 = time()
 58 | 
 59 |     for v in self.keys():
 60 |       for other in self[v]:
 61 |         if v != other:
 62 |           self[other].append(v)
 63 |     
 64 |     t1 = time()
 65 |     logger.info('make_directed: added missing edges {}s'.format(t1-t0))
 66 | 
 67 |     self.make_consistent()
 68 |     return self
 69 | 
 70 |   def make_consistent(self):
 71 |     t0 = time()
 72 |     for k in iterkeys(self):
 73 |       self[k] = list(sorted(set(self[k])))
 74 |     
 75 |     t1 = time()
 76 |     logger.info('make_consistent: made consistent in {}s'.format(t1-t0))
 77 | 
 78 |     self.remove_self_loops()
 79 | 
 80 |     return self
 81 | 
 82 |   def remove_self_loops(self):
 83 | 
 84 |     removed = 0
 85 |     t0 = time()
 86 | 
 87 |     for x in self:
 88 |       if x in self[x]: 
 89 |         self[x].remove(x)
 90 |         removed += 1
 91 |     
 92 |     t1 = time()
 93 | 
 94 |     logger.info('remove_self_loops: removed {} loops in {}s'.format(removed, (t1-t0)))
 95 |     return self
 96 | 
 97 |   def check_self_loops(self):
 98 |     for x in self:
 99 |       for y in self[x]:
100 |         if x == y:
101 |           return True
102 |     
103 |     return False
104 | 
105 |   def has_edge(self, v1, v2):
106 |     if v2 in self[v1] or v1 in self[v2]:
107 |       return True
108 |     return False
109 | 
110 |   def degree(self, nodes=None):
111 |     if isinstance(nodes, Iterable):
112 |       return {v:len(self[v]) for v in nodes}
113 |     else:
114 |       return len(self[nodes])
115 | 
116 |   def order(self):
117 |     "Returns the number of nodes in the graph"
118 |     return len(self)    
119 | 
120 |   def number_of_edges(self):
121 |     "Returns the number of nodes in the graph"
122 |     return sum([self.degree(x) for x in self.keys()])/2
123 | 
124 |   def number_of_nodes(self):
125 |     "Returns the number of nodes in the graph"
126 |     return self.order()
127 | 
128 |   def random_walk(self, path_length, alpha=0, rand=random.Random(), start=None):
129 |     """ Returns a truncated random walk.
130 | 
131 |         path_length: Length of the random walk.
132 |         alpha: probability of restarts.
133 |         start: the start node of the random walk.
134 |     """
135 |     G = self
136 |     if start:
137 |       path = [start]
138 |     else:
139 |       # Sampling is uniform w.r.t V, and not w.r.t E
140 |       path = [rand.choice(list(G.keys()))]
141 | 
142 |     while len(path) < path_length:
143 |       cur = path[-1]
144 |       if len(G[cur]) > 0:
145 |         if rand.random() >= alpha:
146 |           path.append(rand.choice(G[cur]))
147 |         else:
148 |           path.append(path[0])
149 |       else:
150 |         break
151 |     return [str(node) for node in path]
152 | 
153 | # TODO add build_walks in here
154 | 
155 | def build_deepwalk_corpus(G, num_paths, path_length, alpha=0,
156 |                       rand=random.Random(0)):
157 |   walks = []
158 | 
159 |   nodes = list(G.nodes())
160 |   
161 |   for cnt in range(num_paths):
162 |     rand.shuffle(nodes)
163 |     for node in nodes:
164 |       walks.append(G.random_walk(path_length, rand=rand, alpha=alpha, start=node))
165 |   
166 |   return walks
167 | 
168 | def build_deepwalk_corpus_iter(G, num_paths, path_length, alpha=0,
169 |                       rand=random.Random(0)):
170 |   walks = []
171 | 
172 |   nodes = list(G.nodes())
173 | 
174 |   for cnt in range(num_paths):
175 |     rand.shuffle(nodes)
176 |     for node in nodes:
177 |       yield G.random_walk(path_length, rand=rand, alpha=alpha, start=node)
178 | 
179 | 
180 | def clique(size):
181 |     return from_adjlist(permutations(range(1,size+1)))
182 | 
183 | 
184 | # http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks-in-python
185 | def grouper(n, iterable, padvalue=None):
186 |     "grouper(3, 'abcdefg', 'x') --> ('a','b','c'), ('d','e','f'), ('g','x','x')"
187 |     return zip_longest(*[iter(iterable)]*n, fillvalue=padvalue)
188 | 
189 | def parse_adjacencylist(f):
190 |   adjlist = []
191 |   for l in f:
192 |     if l and l[0] != "#":
193 |       introw = [int(x) for x in l.strip().split()]
194 |       row = [introw[0]]
195 |       row.extend(set(sorted(introw[1:])))
196 |       adjlist.extend([row])
197 |   
198 |   return adjlist
199 | 
200 | def parse_adjacencylist_unchecked(f):
201 |   adjlist = []
202 |   for l in f:
203 |     if l and l[0] != "#":
204 |       adjlist.extend([[int(x) for x in l.strip().split()]])
205 |   
206 |   return adjlist
207 | 
208 | def load_adjacencylist(file_, undirected=False, chunksize=10000, unchecked=True):
209 | 
210 |   if unchecked:
211 |     parse_func = parse_adjacencylist_unchecked
212 |     convert_func = from_adjlist_unchecked
213 |   else:
214 |     parse_func = parse_adjacencylist
215 |     convert_func = from_adjlist
216 | 
217 |   adjlist = []
218 | 
219 |   t0 = time()
220 |   
221 |   total = 0 
222 |   with open(file_) as f:
223 |     for idx, adj_chunk in enumerate(map(parse_func, grouper(int(chunksize), f))):
224 |       adjlist.extend(adj_chunk)
225 |       total += len(adj_chunk)
226 |   
227 |   t1 = time()
228 | 
229 |   logger.info('Parsed {} edges with {} chunks in {}s'.format(total, idx, t1-t0))
230 | 
231 |   t0 = time()
232 |   G = convert_func(adjlist)
233 |   t1 = time()
234 | 
235 |   logger.info('Converted edges to graph in {}s'.format(t1-t0))
236 | 
237 |   if undirected:
238 |     t0 = time()
239 |     G = G.make_undirected()
240 |     t1 = time()
241 |     logger.info('Made graph undirected in {}s'.format(t1-t0))
242 | 
243 |   return G 
244 | 
245 | 
246 | def load_edgelist(file_, undirected=True):
247 |   G = Graph()
248 |   with open(file_) as f:
249 |     for l in f:
250 |       x, y = l.strip().split()[:2]
251 |       x = int(x)
252 |       y = int(y)
253 |       G[x].append(y)
254 |       if undirected:
255 |         G[y].append(x)
256 |   
257 |   G.make_consistent()
258 |   return G
259 | 
260 | 
261 | def load_matfile(file_, variable_name="network", undirected=True):
262 |   mat_varables = loadmat(file_)
263 |   mat_matrix = mat_varables[variable_name]
264 | 
265 |   return from_numpy(mat_matrix, undirected)
266 | 
267 | 
268 | def from_networkx(G_input, undirected=True):
269 |     G = Graph()
270 | 
271 |     for _, x in enumerate(G_input):
272 |         for y in iterkeys(G_input[x]):
273 |             G[x].append(y)
274 | 
275 |     if undirected:
276 |         G.make_undirected()
277 | 
278 |     return G
279 | 
280 | 
281 | def from_numpy(x, undirected=True):
282 |     G = Graph()
283 | 
284 |     if issparse(x):
285 |         cx = x.tocoo()
286 |         for i,j,v in zip(cx.row, cx.col, cx.data):
287 |             G[i].append(j)
288 |     else:
289 |       raise Exception("Dense matrices not yet supported.")
290 | 
291 |     if undirected:
292 |         G.make_undirected()
293 | 
294 |     G.make_consistent()
295 |     return G
296 | 
297 | 
298 | def from_adjlist(adjlist):
299 |     G = Graph()
300 |     
301 |     for row in adjlist:
302 |         node = row[0]
303 |         neighbors = row[1:]
304 |         G[node] = list(sorted(set(neighbors)))
305 | 
306 |     return G
307 | 
308 | 
309 | def from_adjlist_unchecked(adjlist):
310 |     G = Graph()
311 |     
312 |     for row in adjlist:
313 |         node = row[0]
314 |         neighbors = row[1:]
315 |         G[node] = neighbors
316 | 
317 |     return G
318 | 
319 | 
320 | 


--------------------------------------------------------------------------------
/gmatch4py/embedding/graph2vec.pyx:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import json
  3 | import glob
  4 | 
  5 | import pandas as pd
  6 | import networkx as nx
  7 | from tqdm import tqdm
  8 | cimport numpy as np
  9 | import numpy.distutils.system_info as sysinfo
 10 | 
 11 | from joblib import Parallel, delayed
 12 | from gensim.models.doc2vec import Doc2Vec, TaggedDocument
 13 | from sklearn.metrics.pairwise import cosine_similarity
 14 | 
 15 | from ..base cimport Base
 16 | cimport cython
 17 | 
 18 | 
 19 | class WeisfeilerLehmanMachine:
 20 |     """
 21 |     Weisfeiler Lehman feature extractor class.
 22 |     """
 23 |     def __init__(self, graph, features, iterations):
 24 |         """
 25 |         Initialization method which executes feature extraction.
 26 |         
 27 |         Parameters
 28 |         ----------
 29 |         graph : nx.Graph
 30 |             graph
 31 |         features : dict
 32 |             Feature hash table.
 33 |         iterations : int
 34 |             number of WL iteration
 35 |         
 36 |         """
 37 | 
 38 |         self.iterations = iterations
 39 |         self.graph = graph
 40 |         self.features = features
 41 |         self.nodes = self.graph.nodes()
 42 |         self.extracted_features = [str(v) for k,v in features.items()]
 43 |         self.do_recursions()
 44 | 
 45 |     def do_a_recursion(self):
 46 |         """
 47 |         The method does a single WL recursion.
 48 |         
 49 |         Returns
 50 |         -------
 51 |         dict
 52 |             The hash table with extracted WL features.
 53 |         """
 54 | 
 55 |         new_features = {}
 56 |         for node in self.nodes:
 57 |             nebs = self.graph.neighbors(node)
 58 |             degs = [self.features[neb] for neb in nebs]
 59 |             features = "_".join([str(self.features[node])]+list(set(sorted([str(deg) for deg in degs]))))
 60 |             hash_object = hashlib.md5(features.encode())
 61 |             hashing = hash_object.hexdigest()
 62 |             new_features[node] = hashing
 63 |         self.extracted_features = self.extracted_features + list(new_features.values())
 64 |         return new_features
 65 | 
 66 |     def do_recursions(self):
 67 |         """
 68 |         The method does a series of WL recursions.
 69 |         """
 70 |         for iteration in range(self.iterations):
 71 |             self.features = self.do_a_recursion()
 72 |         
 73 | 
 74 | def dataset_reader(graph):
 75 |     """
 76 |     Function to extract features from a networkx graph
 77 |     
 78 |     Parameters
 79 |     ----------
 80 |     graph : nx.Graph
 81 |         graph
 82 |     
 83 |     Returns
 84 |     -------
 85 |     dict
 86 |         Features hash table.
 87 |     """
 88 | 
 89 |     features = dict(nx.degree(graph))
 90 | 
 91 |     features = {k:v for k,v, in features.items()}
 92 |     return graph, features
 93 | 
 94 | 
 95 | def feature_extractor(graph, ix, rounds):
 96 |     """
 97 |     Function to extract WL features from a graph
 98 |     
 99 |     Parameters
100 |     ----------
101 |     graph : nx.Graph
102 |         graph
103 |     ix : int
104 |         index of the graph in the dataset
105 |     rounds : int
106 |         number of WL iterations
107 |     
108 |     Returns
109 |     -------
110 |     TaggedDocument 
111 |         random walks
112 |     """
113 | 
114 |     graph, features = dataset_reader(graph)
115 |     machine = WeisfeilerLehmanMachine(graph,features,rounds)
116 |     doc = TaggedDocument(words = machine.extracted_features , tags = ["g_{0}".format(ix)])
117 |     return doc
118 |         
119 | 
120 | 
121 | def generate_model(graphs, iteration = 2, dimensions = 64, min_count = 5, down_sampling =  0.0001, learning_rate = 0.0001, epochs = 10, workers = 4 ):
122 |     """
123 |     Main function to read the graph list, extract features, learn the embedding and save it.
124 |     
125 |     Parameters
126 |     ----------
127 |     graphs : nx.Graph
128 |         Input graph
129 |     iteration : int, optional
130 |         number of iteration (the default is 2)
131 |     dimensions : int, optional
132 |         output vector dimension (the default is 64)
133 |     min_count : int, optional
134 |         min count parameter of Doc2vec model (the default is 5)
135 |     down_sampling : float, optional
136 |         Down sampling rate for frequent features. (the default is 0.0001)
137 |     learning_rate : float, optional
138 |         Initial learning rate (the default is 0.0001, which [default_description])
139 |     epochs : int, optional
140 |         Number of epochs (the default is 10)
141 |     workers : int, optional
142 |         Number of workers (the default is 4)
143 |     
144 |     Returns
145 |     -------
146 |     [type]
147 |         [description]
148 |     """
149 | 
150 |     document_collections = Parallel(n_jobs = workers)(delayed(feature_extractor)(g, ix,iteration) for ix,g in tqdm(enumerate(graphs),desc="Extracting Features..."))
151 |     graphs=[nx.relabel_nodes(g,{node:str(node) for node in list(g.nodes)},copy=True) for g in graphs]
152 |     model = Doc2Vec(document_collections,
153 |                     vector_size = dimensions,
154 |                     window = 0,
155 |                     min_count = min_count,
156 |                     dm = 0,
157 |                     sample = down_sampling,
158 |                     workers = workers,
159 |                     epochs = epochs,
160 |                     alpha = learning_rate)
161 |     return model
162 | 
163 | cdef class Graph2Vec(Base):
164 |     """
165 |     Based on :
166 |     graph2vec: Learning distributed representations of graphs. 
167 |     Narayanan, Annamalai and Chandramohan, Mahinthan and Venkatesan, Rajasekar and Chen, Lihui and Liu, Yang 
168 |     MLG 2017, 13th International Workshop on Mining and Learning with Graphs (MLGWorkshop 2017)
169 | 
170 |     Original Code : https://github.com/benedekrozemberczki/graph2vec
171 | 
172 |     Modified by : Jacques Fize
173 |     """
174 | 
175 |     def __init__(self):
176 |         Base.__init__(self,0,False)
177 | 
178 |     @cython.boundscheck(False)
179 |     cpdef np.ndarray compare(self,list listgs, list selected):
180 |         # Selected is ignored
181 |         model  = generate_model(listgs)
182 |         vector_matrix = model.docvecs.vectors_docs
183 |         cs = cosine_similarity(vector_matrix)
184 |         return cs
185 | 


--------------------------------------------------------------------------------
/gmatch4py/embedding/node2vec.pyx:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import numpy as np
  4 | cimport numpy as np
  5 | from gensim.models import Word2Vec
  6 | from sklearn.metrics.pairwise import cosine_similarity
  7 | 
  8 | from ..base cimport Base
  9 | cimport cython
 10 | from joblib import Parallel, delayed
 11 | import networkx as nx
 12 | 
 13 | class Graph():
 14 |     def __init__(self, nx_G, is_directed, p, q):
 15 |         self.G = nx_G
 16 |         self.is_directed = is_directed
 17 |         self.p = p
 18 |         self.q = q
 19 | 
 20 |     def node2vec_walk(self, walk_length, start_node):
 21 |         '''
 22 |         Simulate a random walk starting from start node.
 23 |         '''
 24 |         G = self.G
 25 |         alias_nodes = self.alias_nodes
 26 |         alias_edges = self.alias_edges
 27 | 
 28 |         walk = [start_node]
 29 | 
 30 |         while len(walk) < walk_length:
 31 |             cur = walk[-1]
 32 |             cur_nbrs = sorted(G.neighbors(cur))
 33 |             if len(cur_nbrs) > 0:
 34 |                 if len(walk) == 1:
 35 |                     walk.append(
 36 |                         cur_nbrs[alias_draw(alias_nodes[cur][0], alias_nodes[cur][1])])
 37 |                 else:
 38 |                     prev = walk[-2]
 39 |                     next = cur_nbrs[alias_draw(alias_edges[(prev, cur)][0],
 40 |                                                alias_edges[(prev, cur)][1])]
 41 |                     walk.append(next)
 42 |             else:
 43 |                 break
 44 | 
 45 |         return walk
 46 | 
 47 |     def simulate_walks(self, num_walks, walk_length):
 48 |         '''
 49 |         Repeatedly simulate random walks from each node.
 50 |         '''
 51 |         # sys.stdout.write("\r")
 52 |         G = self.G
 53 |         walks = []
 54 |         nodes = list(G.nodes)
 55 |         for walk_iter in range(num_walks):
 56 |             # sys.stdout.write(
 57 |             #     '\rWalk iteration: {0}/{1}'.format(walk_iter + 1, num_walks))
 58 |             random.shuffle(nodes)
 59 |             for node in nodes:
 60 |                 walks.append(self.node2vec_walk(
 61 |                     walk_length=walk_length, start_node=node))
 62 | 
 63 |         return walks
 64 | 
 65 |     def get_alias_edge(self, src, dst):
 66 |         '''
 67 |         Get the alias edge setup lists for a given edge.
 68 |         '''
 69 |         G = self.G
 70 |         p = self.p
 71 |         q = self.q
 72 | 
 73 |         unnormalized_probs = []
 74 |         for dst_nbr in sorted(G.neighbors(dst)):
 75 |             if dst_nbr == src:
 76 |                 unnormalized_probs.append(G[dst][dst_nbr]['weight'] / p)
 77 |             elif G.has_edge(dst_nbr, src):
 78 |                 unnormalized_probs.append(G[dst][dst_nbr]['weight'])
 79 |             else:
 80 |                 unnormalized_probs.append(G[dst][dst_nbr]['weight'] / q)
 81 |         norm_const = sum(unnormalized_probs)
 82 |         normalized_probs = [
 83 |             float(u_prob) / norm_const for u_prob in unnormalized_probs]
 84 | 
 85 |         return alias_setup(normalized_probs)
 86 | 
 87 |     def preprocess_transition_probs(self):
 88 |         '''
 89 |         Preprocessing of transition probabilities for guiding the random walks.
 90 |         '''
 91 |         G = self.G
 92 |         is_directed = self.is_directed
 93 | 
 94 |         alias_nodes = {}
 95 |         for node in list(G.nodes):
 96 |             unnormalized_probs = [G[node][nbr]['weight']
 97 |                                   for nbr in sorted(G.neighbors(node))]
 98 |             norm_const = sum(unnormalized_probs)
 99 |             normalized_probs = [
100 |                 float(u_prob) / norm_const for u_prob in unnormalized_probs]
101 |             alias_nodes[node] = alias_setup(normalized_probs)
102 | 
103 |         alias_edges = {}
104 |         triads = {}
105 | 
106 |         if is_directed:
107 |             for edge in list(G.edges()):
108 |                 alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
109 |         else:
110 |             for edge in list(G.edges()):
111 |                 alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
112 |                 alias_edges[(edge[1], edge[0])] = self.get_alias_edge(
113 |                     edge[1], edge[0])
114 | 
115 |         self.alias_nodes = alias_nodes
116 |         self.alias_edges = alias_edges
117 | 
118 |         return
119 | 
120 | 
121 | def alias_setup(probs):
122 |     '''
123 |     Compute utility lists for non-uniform sampling from discrete distributions.
124 |     Refer to https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/
125 |     for details
126 |     '''
127 |     K = len(probs)
128 |     q = np.zeros(K)
129 |     J = np.zeros(K, dtype=np.int)
130 | 
131 |     smaller = []
132 |     larger = []
133 |     for kk, prob in enumerate(probs):
134 |         q[kk] = K * prob
135 |         if q[kk] < 1.0:
136 |             smaller.append(kk)
137 |         else:
138 |             larger.append(kk)
139 | 
140 |     while len(smaller) > 0 and len(larger) > 0:
141 |         small = smaller.pop()
142 |         large = larger.pop()
143 | 
144 |         J[small] = large
145 |         q[large] = q[large] + q[small] - 1.0
146 |         if q[large] < 1.0:
147 |             smaller.append(large)
148 |         else:
149 |             larger.append(large)
150 | 
151 |     return J, q
152 | 
153 | 
154 | def alias_draw(J, q):
155 |     '''
156 |     Draw sample from a non-uniform discrete distribution using alias sampling.
157 |     '''
158 |     K = len(J)
159 | 
160 |     kk = int(np.floor(np.random.rand() * K))
161 |     if np.random.rand() < q[kk]:
162 |         return kk
163 |     else:
164 |         return J[kk]
165 | 
166 | 
167 | def learn_embeddings(walks, dimensions, window_size, nb_workers, nb_iter):
168 |     '''
169 |     Learn embeddings by optimizing the Skipgram objective using SGD.
170 |     '''
171 |     walks_ = [list(map(str, walk)) for walk in walks]
172 |     model = Word2Vec(walks_, size=dimensions, window=window_size,
173 |                      min_count=0, sg=1, workers=nb_workers, iter=nb_iter)
174 |     return model
175 | 
176 | 
177 | def compute_graph_model(nx_graph, **kwargs):
178 |     '''
179 |     Pipeline for representational learning for all nodes in a graph.
180 |         @param nx_graph
181 |         @kwarg p: int
182 |         @kwarg q: int
183 |     '''
184 |     p = kwargs.get("p", 1)
185 |     q = kwargs.get("q", 1)
186 |     dimensions = kwargs.get("dimensions", 128)
187 |     window_size = kwargs.get("window_size", 10)
188 |     nb_workers = kwargs.get("nb_workers", 8)
189 |     nb_iter = kwargs.get("nb_iter", 1)
190 |     num_walks = kwargs.get("num_walks", 10)
191 |     walk_length = kwargs.get("walk_length", 80)
192 |     directed = kwargs.get("directed", False)
193 | 
194 |     G = Graph(nx_graph, directed, p, q)
195 |     G.preprocess_transition_probs()
196 |     walks = G.simulate_walks(num_walks, walk_length)
197 |     return learn_embeddings(walks, dimensions, window_size, nb_workers, nb_iter).wv.vectors
198 | 
199 | cdef class Node2Vec(Base):
200 |     """
201 |     Based on :
202 |     Extract Node2vec embedding of each graph in `listgs`
203 |         @inproceedings{Grover:2016:NSF:2939672.2939754,
204 |              author = {Grover, Aditya and Leskovec, Jure},
205 |              title = {Node2Vec: Scalable Feature Learning for Networks},
206 |              booktitle = {Proceedings of the 22Nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
207 |              series = {KDD '16},
208 |              year = {2016},
209 |              isbn = {978-1-4503-4232-2},
210 |              location = {San Francisco, California, USA},
211 |              pages = {855--864},
212 |              numpages = {10},
213 |              url = {http://doi.acm.org/10.1145/2939672.2939754},
214 |              doi = {10.1145/2939672.2939754},
215 |              acmid = {2939754},
216 |              publisher = {ACM},
217 |              address = {New York, NY, USA},
218 |              keywords = {feature learning, graph representations, information networks, node embeddings},
219 |         }
220 | 
221 |     Original code : https://github.com/aditya-grover/node2vec
222 | 
223 |     Modified by : Jacques Fize
224 |     """
225 | 
226 |     def __init__(self):
227 |         Base.__init__(self,0,False)
228 | 
229 |     def extract_embedding(self, listgs):
230 |         """
231 |         Extract Node2vec embedding of each graph in `listgs`
232 | 
233 |         Parameters
234 |         ----------
235 |         listgs : list
236 |             list of graphs
237 | 
238 |         Returns
239 |         -------
240 |         list
241 |             list of embeddings
242 |         """
243 | 
244 |         from tqdm import tqdm
245 |         models =  Parallel(n_jobs = self.cpu_count)(delayed(compute_graph_model)(g,directed=g.is_directed()) for g in tqdm(listgs,desc="Extracting Embeddings..."))
246 |         return models
247 | 
248 |     @cython.boundscheck(False)
249 |     cpdef np.ndarray compare(self,list listgs, list selected):
250 |         # Selected is ignored
251 |         [nx.set_edge_attributes(g,1,'weight') for g in listgs]
252 |         models = self.extract_embedding(listgs)
253 |         vector_matrix = np.array([mod.mean(axis=0) for mod in models])   # Average nodes representations
254 |         cs = cosine_similarity(vector_matrix)
255 |         return cs
256 | 


--------------------------------------------------------------------------------
/gmatch4py/embedding/skipgram.pyx:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from multiprocessing import cpu_count
 3 | 
 4 | from gensim.models import Word2Vec
 5 | 
 6 | logger = logging.getLogger("deepwalk")
 7 | 
 8 | class Skipgram(Word2Vec):
 9 |     """A subclass to allow more customization of the Word2Vec internals."""
10 | 
11 |     def __init__(self, vocabulary_counts=None, **kwargs):
12 | 
13 |         self.vocabulary_counts = None
14 | 
15 |         kwargs["min_count"] = kwargs.get("min_count", 0)
16 |         kwargs["workers"] = kwargs.get("workers", cpu_count())
17 |         kwargs["size"] = kwargs.get("size", 128)
18 |         kwargs["sentences"] = kwargs.get("sentences", None)
19 |         kwargs["window"] = kwargs.get("window", 10)
20 |         kwargs["sg"] = 1
21 |         kwargs["hs"] = 1
22 | 
23 |         if vocabulary_counts != None:
24 |           self.vocabulary_counts = vocabulary_counts
25 | 
26 |         super(Skipgram, self).__init__(**kwargs)
27 | 


--------------------------------------------------------------------------------
/gmatch4py/embedding/walks.pyx:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from io import open
  3 | from os import path
  4 | from time import time
  5 | from multiprocessing import cpu_count
  6 | import random
  7 | from concurrent.futures import ProcessPoolExecutor
  8 | from collections import Counter
  9 | 
 10 | from six.moves import zip
 11 | 
 12 | from . import graph
 13 | 
 14 | logger = logging.getLogger("deepwalk")
 15 | 
 16 | __current_graph = None
 17 | 
 18 | # speed up the string encoding
 19 | __vertex2str = None
 20 | 
 21 | def count_words(file):
 22 |   """ Counts the word frequences in a list of sentences.
 23 | 
 24 |   Note:
 25 |     This is a helper function for parallel execution of `Vocabulary.from_text`
 26 |     method.
 27 |   """
 28 |   c = Counter()
 29 |   with open(file, 'r') as f:
 30 |     for l in f:
 31 |       words = l.strip().split()
 32 |       c.update(words)
 33 |   return c
 34 | 
 35 | 
 36 | def count_textfiles(files, workers=1):
 37 |   c = Counter()
 38 |   with ProcessPoolExecutor(max_workers=workers) as executor:
 39 |     for c_ in executor.map(count_words, files):
 40 |       c.update(c_)
 41 |   return c
 42 | 
 43 | 
 44 | def count_lines(f):
 45 |   if path.isfile(f):
 46 |     num_lines = sum(1 for line in open(f))
 47 |     return num_lines
 48 |   else:
 49 |     return 0
 50 | 
 51 | def _write_walks_to_disk(args):
 52 |   num_paths, path_length, alpha, rand, f = args
 53 |   G = __current_graph
 54 |   t_0 = time()
 55 |   with open(f, 'w') as fout:
 56 |     for walk in graph.build_deepwalk_corpus_iter(G=G, num_paths=num_paths, path_length=path_length,
 57 |                                                  alpha=alpha, rand=rand):
 58 |       fout.write(u"{}\n".format(u" ".join(v for v in walk)))
 59 |   logger.debug("Generated new file {}, it took {} seconds".format(f, time() - t_0))
 60 |   return f
 61 | 
 62 | def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(),
 63 |                         always_rebuild=True):
 64 |   global __current_graph
 65 |   __current_graph = G
 66 |   files_list = ["{}.{}".format(filebase, str(x)) for x in list(range(num_paths))]
 67 |   expected_size = len(G)
 68 |   args_list = []
 69 |   files = []
 70 | 
 71 |   if num_paths <= num_workers:
 72 |     paths_per_worker = [1 for x in range(num_paths)]
 73 |   else:
 74 |     paths_per_worker = [len(list(filter(lambda z: z!= None, [y for y in x])))
 75 |                         for x in graph.grouper(int(num_paths / num_workers)+1, range(1, num_paths+1))]
 76 | 
 77 |   with ProcessPoolExecutor(max_workers=num_workers) as executor:
 78 |     for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker):
 79 |       if always_rebuild or size != (ppw*expected_size):
 80 |         args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_))
 81 |       else:
 82 |         files.append(file_)
 83 | 
 84 |   with ProcessPoolExecutor(max_workers=num_workers) as executor:
 85 |     for file_ in executor.map(_write_walks_to_disk, args_list):
 86 |       files.append(file_)
 87 | 
 88 |   return files
 89 | 
 90 | class WalksCorpus(object):
 91 |   def __init__(self, file_list):
 92 |     self.file_list = file_list
 93 |   def __iter__(self):
 94 |     for file in self.file_list:
 95 |       with open(file, 'r') as f:
 96 |         for line in f:
 97 |           yield line.split()
 98 | 
 99 | def combine_files_iter(file_list):
100 |   for file in file_list:
101 |     with open(file, 'r') as f:
102 |       for line in f:
103 |         yield line.split()
104 | 


--------------------------------------------------------------------------------
/gmatch4py/ged/__init__.py:
--------------------------------------------------------------------------------
1 | # coding = utf-8
2 | 
3 | 


--------------------------------------------------------------------------------
/gmatch4py/ged/abstract_graph_edit_dist.pxd:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | from ..base cimport Base
 4 | 
 5 | cdef class AbstractGraphEditDistance(Base):
 6 |     cdef double node_del
 7 |     cdef double node_ins
 8 |     cdef double edge_del
 9 |     cdef double edge_ins
10 |     cdef np.ndarray cost_matrix
11 |     cdef bint weighted
12 | 
13 |     cpdef double distance_ged(self,G,H)
14 |     cdef list edit_costs(self,G,H)
15 |     cpdef np.ndarray create_cost_matrix(self,G,H)
16 |     cdef double insert_cost(self, int i, int j, nodesH, H)
17 |     cdef double delete_cost(self, int i, int j, nodesG, G)
18 |     cpdef double substitute_cost(self, node1, node2, G, H)
19 |     
20 | 


--------------------------------------------------------------------------------
/gmatch4py/ged/abstract_graph_edit_dist.pyx:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF-8 -*-
  2 | from __future__ import print_function
  3 | 
  4 | import sys
  5 | import warnings
  6 | 
  7 | import numpy as np
  8 | cimport numpy as np
  9 | import networkx as nx
 10 | from cython.parallel cimport prange,parallel
 11 | 
 12 | try:
 13 |     from munkres import munkres
 14 | except ImportError:
 15 |     warnings.warn("To obtain optimal results install the Cython 'munkres' module at  https://github.com/jfrelinger/cython-munkres-wrapper")
 16 |     from scipy.optimize import linear_sum_assignment as munkres
 17 | 
 18 | from ..base cimport Base
 19 | from ..helpers.general import parsenx2graph
 20 | 
 21 | 
 22 | 
 23 | cdef class AbstractGraphEditDistance(Base):
 24 | 
 25 | 
 26 |     def __init__(self, node_del,node_ins,edge_del,edge_ins):
 27 |         Base.__init__(self,1,False)
 28 | 
 29 |         self.node_del = node_del
 30 |         self.node_ins = node_ins
 31 |         self.edge_del = edge_del
 32 |         self.edge_ins = edge_ins
 33 | 
 34 | 
 35 |     cpdef double distance_ged(self,G,H):
 36 |         """
 37 |         Return the distance value between G and H
 38 |         
 39 |         Parameters
 40 |         ----------
 41 |         G : gmatch4py.Graph
 42 |             graph
 43 |         H : gmatch4py.Graph
 44 |             graph
 45 |         
 46 |         Returns
 47 |         -------
 48 |         int 
 49 |             distance
 50 |         """
 51 |         cdef list opt_path = self.edit_costs(G,H)
 52 |         return np.sum(opt_path)
 53 | 
 54 |     def edit_path(self,G,H):
 55 |         """
 56 |         Return  the edit path along with the cost matrix and the selected indices from the Munkres Algorithm
 57 |         
 58 |         Parameters
 59 |         ----------
 60 |         G : nx.Graph
 61 |             first graph
 62 |         H : nx.Graph
 63 |             second graph
 64 |         
 65 |         Returns
 66 |         -------
 67 |         np.array(1D), np.array(2D), (np.array(2D) if munkres) or (np.array(1,2) if scipy) 
 68 |             edit_path, cost_matrix, munkres results
 69 |         """
 70 |         cost_matrix = self.create_cost_matrix(G,H).astype(float)
 71 |         index_path= munkres(cost_matrix)
 72 |         return cost_matrix[index_path], cost_matrix, index_path
 73 |     
 74 | 
 75 |     cdef list edit_costs(self, G, H):
 76 |         """
 77 |         Return the optimal path edit cost list, to transform G into H
 78 |         
 79 |         Parameters
 80 |         ----------
 81 |         G : gmatch4py.Graph
 82 |             graph
 83 |         H : gmatch4py.Graph
 84 |             graph
 85 |         
 86 |         Returns
 87 |         -------
 88 |         np.array 
 89 |             edit path
 90 |         """
 91 |         cdef np.ndarray cost_matrix = self.create_cost_matrix(G,H).astype(float)
 92 |         return cost_matrix[munkres(cost_matrix)].tolist()
 93 | 
 94 |     cpdef np.ndarray create_cost_matrix(self, G, H):
 95 |         """
 96 |         Creates a |N+M| X |N+M| cost matrix between all nodes in
 97 |         graphs G and H
 98 |         Each cost represents the cost of substituting,
 99 |         deleting or inserting a node
100 |         The cost matrix consists of four regions:
101 | 
102 |         substitute 	| insert costs
103 |         -------------------------------
104 |         delete 		| delete -> delete
105 | 
106 |         The delete -> delete region is filled with zeros
107 |         
108 |         Parameters
109 |         ----------
110 |         G : gmatch4py.Graph
111 |             graph
112 |         H : gmatch4py.Graph
113 |             graph
114 |         
115 |         Returns
116 |         -------
117 |         np.array 
118 |             cost matrix
119 |         """
120 |         cdef int n,m
121 |         try:
122 |             n = G.number_of_nodes()
123 |             m = H.number_of_nodes()
124 |         except:
125 |             n = G.size()
126 |             m = H.size()
127 |         cdef np.ndarray cost_matrix = np.zeros((n+m,n+m))
128 |         cdef list nodes1 = list(G.nodes())
129 |         cdef list nodes2 = list(H.nodes())
130 |         cdef int i,j
131 |         for i in range(n):
132 |             for j in range(m):
133 |                 cost_matrix[i,j] = self.substitute_cost(nodes1[i], nodes2[j], G, H)
134 | 
135 |         for i in range(m):
136 |             for j in range(m):
137 |                 cost_matrix[i+n,j] = self.insert_cost(i, j, nodes2, H)
138 | 
139 |         for i in range(n):
140 |             for j in range(n):
141 |                 cost_matrix[j,i+m] = self.delete_cost(i, j, nodes1, G)
142 | 
143 |         return cost_matrix
144 | 
145 |     cdef double insert_cost(self, int i, int j, nodesH, H):
146 |         """
147 |         Return the insert cost of the ith nodes in H
148 |         
149 |         Returns
150 |         -------
151 |         int
152 |             insert cost
153 |         """
154 |         raise NotImplementedError
155 | 
156 |     cdef double delete_cost(self, int i, int j, nodesG, G):
157 |         """
158 |         Return the delete cost of the ith nodes in H
159 |         
160 |         Returns
161 |         -------
162 |         int
163 |             delete cost
164 |         """
165 |         raise NotImplementedError
166 | 
167 |     cpdef double substitute_cost(self, node1, node2, G, H):
168 |         """
169 |         Return the substitute cost of between the node1 in G and the node2 in H
170 |         
171 |         Returns
172 |         -------
173 |         int
174 |             substitution cost
175 |         """
176 |         raise NotImplementedError
177 | 
178 | 
179 |     cpdef np.ndarray compare(self,list listgs, list selected):
180 |         cdef int n = len(listgs)
181 |         cdef double[:,:] comparison_matrix = np.zeros((n, n))
182 |         listgs=parsenx2graph(listgs,self.node_attr_key,self.edge_attr_key)
183 |         cdef long[:] n_nodes = np.array([g.size() for g in listgs])
184 |         cdef double[:] selected_test = np.array(self.get_selected_array(selected,n))
185 |         cdef int i,j
186 |         cdef float inf=np.inf
187 | 
188 |         with nogil, parallel(num_threads=self.cpu_count):
189 |             for i in prange(n,schedule='static'):
190 |                 for j in range(n):
191 |                     if n_nodes[i]>0 and n_nodes[j]>0 and selected_test[i] == 1 :
192 |                         with gil:
193 |                             comparison_matrix[i][j] = self.distance_ged(listgs[i],listgs[j])
194 |                     else:
195 |                         comparison_matrix[i][j] = inf
196 |                 #comparison_matrix[j, i] = comparison_matrix[i, j]
197 |         return np.array(comparison_matrix)
198 | 


--------------------------------------------------------------------------------
/gmatch4py/ged/bipartite_graph_matching_2.pyx:
--------------------------------------------------------------------------------
  1 | # coding = utf-8
  2 | import numpy as np
  3 | cimport numpy as np
  4 | from ..base cimport Base
  5 | from cython.parallel cimport prange,parallel
  6 | from ..helpers.general import parsenx2graph
  7 | cimport cython
  8 | 
  9 | cdef class BP_2(Base):
 10 | 
 11 | 
 12 |     cdef int node_del
 13 |     cdef int node_ins
 14 |     cdef int edge_del
 15 |     cdef int edge_ins
 16 | 
 17 |     def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1):
 18 |         """
 19 |         BP_2 Constructor
 20 | 
 21 |         Parameters
 22 |         ----------
 23 |         node_del :int
 24 |             Node deletion cost
 25 |         node_ins : int
 26 |             Node insertion cost
 27 |         edge_del : int
 28 |             Edge Deletion cost
 29 |         edge_ins : int
 30 |             Edge Insertion cost
 31 |         """
 32 |         Base.__init__(self,1,False)
 33 |         self.node_del = node_del
 34 |         self.node_ins = node_ins
 35 |         self.edge_del = edge_del
 36 |         self.edge_ins = edge_ins
 37 | 
 38 | 
 39 |     @cython.boundscheck(False)
 40 |     cpdef np.ndarray compare(self,list listgs, list selected):
 41 |         cdef int n = len(listgs)
 42 |         cdef list new_gs=parsenx2graph(listgs)
 43 |         cdef double[:,:] comparison_matrix = np.zeros((n, n))
 44 |         cdef double[:] selected_test = self.get_selected_array(selected,n)
 45 |         cdef int i,j
 46 |         cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
 47 |         cdef long[:] n_edges = np.array([g.density() for g in new_gs])
 48 | 
 49 |         with nogil, parallel(num_threads=self.cpu_count):
 50 |             for i in prange(n,schedule='static'):
 51 |                 for j in range(i,n):
 52 |                     if  n_nodes[i] > 0 and n_nodes[j] > 0  and selected_test[i] == 1:
 53 |                         with gil:
 54 |                             comparison_matrix[i, j] = self.bp2(new_gs[i], new_gs[j])
 55 |                     else:
 56 |                         comparison_matrix[i, j] = 0
 57 |                     comparison_matrix[j, i] = comparison_matrix[i, j]
 58 | 
 59 |         return np.array(comparison_matrix)
 60 | 
 61 | 
 62 |     cdef double bp2(self, g1, g2):
 63 |         """
 64 |         Compute the BP2 similarity value between two `networkx.Graph`
 65 |         
 66 |         Parameters
 67 |         ----------
 68 |         g1 : gmatch4py.Graph
 69 |             First Graph
 70 |         g2 : gmatch4py.Graph
 71 |             Second Graph
 72 | 
 73 |         Returns
 74 |         -------
 75 |         float 
 76 |             similarity value
 77 |         """
 78 |         return np.min([self.distance_bp2(self.psi(g1,g2)),self.distance_bp2(self.psi(g2,g1))])
 79 | 
 80 |     cdef double distance_bp2(self,e):
 81 |         """
 82 |         Return the distance based on the edit path found.
 83 |         Parameters
 84 |         ----------
 85 |         e : list
 86 |             Contains the edit path costs
 87 | 
 88 |         Returns
 89 |         -------
 90 |         double
 91 |             Return sum of the costs from the edit path
 92 |         """
 93 |         return np.sum(e)
 94 | 
 95 |     cdef list psi(self,g1,g2):
 96 |         """
 97 |         Return the optimal edit path :math:`\psi` based on BP2 algorithm.
 98 |         
 99 |         
100 |         Parameters
101 |         ----------
102 |         g1 : networkx.Graph
103 |             First Graph
104 |         g2 : networkx.Graph
105 |             Second Graph
106 | 
107 |         Returns
108 |         -------
109 |         list
110 |             list containing costs from the optimal edit path
111 |         """
112 |         cdef list psi_=[]
113 |         cdef list nodes1 = list(g1.nodes())
114 |         cdef list nodes2 = list(g2.nodes())
115 |         for u in nodes1:
116 |             v=None
117 |             for w in nodes2:
118 |                 if 2*self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,None) + self.fuv(g1,g2,None,w)\
119 |                      and self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,v):
120 |                     v=w
121 |                 psi_.append(self.fuv(g1,g2,u,v))
122 |             if u:
123 |                 nodes1= list(set(nodes1).difference(set([u])))
124 |             if v:
125 |                 nodes2= list(set(nodes2).difference(set([v])))
126 |         for v in nodes2:
127 |             psi_.append(self.fuv(g1,g2,None,v))
128 |         return  psi_
129 | 
130 | 
131 | 
132 |     cdef float fuv(self, g1, g2, str n1, str n2):
133 |         """
134 |         Compute the Node Distance function
135 |         Parameters
136 |         ----------
137 |         g1 : gmatch4py.Graph
138 |             First graph
139 |         g2 : gmatch4py.Graph
140 |             Second graph
141 |         n1 : int or str
142 |             identifier of the first node
143 |         n2 : int or str
144 |             identifier of the second node
145 | 
146 |         Returns
147 |         -------
148 |         float
149 |             node distance
150 |         """
151 |         if n2 == None:  # Del
152 |             return self.node_del + ((self.edge_del / 2.) * g1.degree(n1))
153 |         if n1 == None:  # Insert
154 |             return self.node_ins + ((self.edge_ins / 2.) * g2.degree(n2))
155 |         else:
156 |             if n1 == n2:
157 |                 return 0
158 |             return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2
159 | 
160 |     cdef float hed_edge(self, g1, g2, str n1, str n2):
161 |         """
162 |         Compute HEDistance between edges of n1 and n2, respectively in g1 and g2
163 |         Parameters
164 |         ----------
165 |         g1 : gmatch4py.Graph
166 |             First graph
167 |         g2 : gmatch4py.Graph
168 |             Second graph
169 |         n1 : int or str
170 |             identifier of the first node
171 |         n2 : int or str
172 |             identifier of the second node
173 | 
174 |         Returns
175 |         -------
176 |         float
177 |             HEDistance between g1 and g2
178 |         """
179 |         return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2)
180 | 
181 | 
182 |     cdef float sum_gpq(self, g1, str n1, g2, str n2):
183 |         """
184 |         Compute Nearest Neighbour Distance between edges around n1 in G1  and edges around n2 in G2
185 |         Parameters
186 |         ----------
187 |         g1 : gmatch4py.Graph
188 |             First graph
189 |         g2 : gmatch4py.Graph
190 |             Second graph
191 |         n1 : int or str
192 |             identifier of the first node
193 |         n2 : int or str
194 |             identifier of the second node
195 | 
196 |         Returns
197 |         -------
198 |         float
199 |             Nearest Neighbour Distance
200 |         """
201 | 
202 |         #if isinstance(g1, nx.MultiDiGraph):
203 |         cdef list edges1 = g1.get_edges_no(n1) if n1 else []
204 |         cdef list edges2 = g2.get_edges_no(n2) if n2 else []
205 | 
206 |         cdef np.ndarray min_sum = np.zeros(len(edges1))
207 |         edges2.extend([None])
208 |         cdef np.ndarray min_i
209 |         for i in range(len(edges1)):
210 |             min_i = np.zeros(len(edges2))
211 |             for j in range(len(edges2)):
212 |                 min_i[j] = self.gpq(edges1[i], edges2[j])
213 |             min_sum[i] = np.min(min_i)
214 |         return np.sum(min_sum)
215 | 
216 |     cdef float gpq(self, str e1, str e2):
217 |         """
218 |         Compute the edge distance function
219 |         Parameters
220 |         ----------
221 |         e1 : str
222 |             first edge identifier
223 |         e2
224 |             second edge indentifier
225 |         Returns
226 |         -------
227 |         float
228 |             edge distance
229 |         """
230 | 
231 |         if e2 == None:  # Del
232 |             return self.edge_del
233 |         if e1 == None:  # Insert
234 |             return self.edge_ins
235 |         else:
236 |             if e1 == e2:
237 |                 return 0
238 |             return (self.edge_del + self.edge_ins) / 2.
239 | 


--------------------------------------------------------------------------------
/gmatch4py/ged/graph_edit_dist.pxd:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | from .abstract_graph_edit_dist cimport AbstractGraphEditDistance
 4 | 
 5 | 
 6 | cdef class GraphEditDistance(AbstractGraphEditDistance):
 7 |     cpdef object relabel_cost(self, node1, node2, G, H)
 8 |     cpdef double substitute_cost(self, node1, node2, G, H)
 9 |     cdef double delete_cost(self, int i, int j, nodesG, G)
10 |     cdef double insert_cost(self, int i, int j, nodesH, H)


--------------------------------------------------------------------------------
/gmatch4py/ged/graph_edit_dist.pyx:
--------------------------------------------------------------------------------
 1 | # -*- coding: UTF-8 -*-
 2 | 
 3 | import sys
 4 | 
 5 | import networkx as nx
 6 | import numpy as np
 7 | cimport numpy as np
 8 | from .abstract_graph_edit_dist cimport AbstractGraphEditDistance
 9 | 
10 | 
11 | 
12 | cdef class GraphEditDistance(AbstractGraphEditDistance):
13 | 
14 |     def __init__(self,node_del,node_ins,edge_del,edge_ins,weighted=False):
15 |         AbstractGraphEditDistance.__init__(self,node_del,node_ins,edge_del,edge_ins)
16 |         self.weighted=weighted
17 |         
18 |     cpdef double substitute_cost(self, node1, node2, G, H):
19 |         return self.relabel_cost(node1, node2, G, H)
20 | 
21 |     cpdef object relabel_cost(self, node1, node2, G, H):
22 |         ## Si deux noeuds égaux
23 |         if node1 == node2 and G.degree(node1) == H.degree(node2):
24 |             return 0.0
25 |         elif node1 == node2 and G.degree(node1) != H.degree(node2):
26 |             #R = Graph(self.add_edges(node1,node2,G),G.get_node_key(),G.get_egde_key())
27 |             #R2 = Graph(self.add_edges(node1,node2,H),H.get_node_key(),H.get_egde_key())
28 |             #inter_= R.size_edge_intersect(R2)
29 |             R=set(G.get_edges_no(node1))
30 |             R2=set(H.get_edges_no(node2))
31 |             inter_=R.intersection(R2)
32 |             add_diff=abs(len(R2)-len(inter_))#abs(R2.density()-inter_)
33 |             del_diff=abs(len(R)-len(inter_))#abs(R.density()-inter_)
34 |             return (add_diff*self.edge_ins)+(del_diff*self.edge_del)
35 | 
36 | 
37 |         #si deux noeuds connectés
38 |         if  G.has_edge(node1,node2) or G.has_edge(node2,node1):
39 |             return self.node_ins+self.node_del
40 |         if not node2 in G.nodes():
41 |             nodesH=H.nodes()
42 |             index=list(nodesH).index(node2)
43 |             return self.node_del+self.node_ins+self.insert_cost(index,index,nodesH,H)
44 |         return sys.maxsize
45 | 
46 |     cdef double delete_cost(self, int i, int j, nodesG, G):
47 |         if i == j:
48 |             return self.node_del+(G.degree(nodesG[i],weight=True)*self.edge_del) # Deleting a node implicate to delete in and out edges
49 |         return sys.maxsize
50 | 
51 |     cdef double insert_cost(self, int i, int j, nodesH, H):
52 |         if i == j:
53 |             deg=H.degree(nodesH[j],weight=True)
54 |             if isinstance(deg,dict):deg=0
55 |             return self.node_ins+(deg*self.edge_ins)
56 |         else:
57 |             return sys.maxsize


--------------------------------------------------------------------------------
/gmatch4py/ged/greedy_edit_distance.pyx:
--------------------------------------------------------------------------------
 1 | # coding = utf-8
 2 | import sys
 3 | 
 4 | from .graph_edit_dist cimport GraphEditDistance
 5 | import numpy as np
 6 | cimport numpy as np
 7 | from cython.parallel cimport prange,parallel
 8 | 
 9 | cdef class GreedyEditDistance(GraphEditDistance):
10 |     """
11 |     Implementation of the Greedy Edit Distance presented in :
12 | 
13 |     Improved quadratic time approximation of graph edit distance by Hausdorff matching and greedy assignement
14 |     Andreas Fischer, Kaspar Riesen, Horst Bunke
15 |     2016
16 |     """
17 | 
18 |     def __init__(self,node_del,node_ins,edge_del,edge_ins):
19 |         GraphEditDistance.__init__(self,node_del,node_ins,edge_del,edge_ins)
20 | 
21 | 
22 |     cdef list edit_costs(self, G, H):
23 |         cdef np.ndarray cost_matrix=self.create_cost_matrix(G,H)
24 |         cdef np.ndarray cost_matrix_2=cost_matrix.copy().astype(np.double)
25 |         cdef list psi=[]
26 |         for i in range(len(cost_matrix)):
27 |             phi_i=np.argmin(cost_matrix_2[i])
28 |             cost_matrix_2[:,phi_i]=sys.maxsize
29 |             psi.append([i,phi_i]) #+i to compensate the previous column deletion
30 |         return [cost_matrix[psi[i][0]][psi[i][1]] for i in range(len(psi))]
31 | 


--------------------------------------------------------------------------------
/gmatch4py/ged/hausdorff_edit_distance.pyx:
--------------------------------------------------------------------------------
  1 | # coding = utf-8
  2 | 
  3 | import numpy as np
  4 | cimport numpy as np
  5 | from ..base cimport Base
  6 | from cython.parallel cimport prange,parallel
  7 | from ..helpers.general import parsenx2graph
  8 | cimport cython
  9 | 
 10 | cdef class HED(Base):
 11 |     """
 12 |     Implementation of Hausdorff Edit Distance described in
 13 | 
 14 |     Improved quadratic time approximation of graph edit distance by Hausdorff matching and greedy assignement
 15 |     Andreas Fischer, Kaspar Riesen, Horst Bunke
 16 |     2016
 17 |     """
 18 | 
 19 |     cdef int node_del
 20 |     cdef int node_ins
 21 |     cdef int edge_del
 22 |     cdef int edge_ins
 23 | 
 24 |     def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1):
 25 |         """
 26 |         HED Constructor
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         node_del :int
 31 |             Node deletion cost
 32 |         node_ins : int
 33 |             Node insertion cost
 34 |         edge_del : int
 35 |             Edge Deletion cost
 36 |         edge_ins : int
 37 |             Edge Insertion cost
 38 |         """
 39 |         Base.__init__(self,1,False)
 40 |         self.node_del = node_del
 41 |         self.node_ins = node_ins
 42 |         self.edge_del = edge_del
 43 |         self.edge_ins = edge_ins
 44 | 
 45 | 
 46 |     @cython.boundscheck(False)
 47 |     cpdef np.ndarray compare(self,list listgs, list selected):
 48 |         cdef int n = len(listgs)
 49 |         cdef list new_gs=parsenx2graph(listgs,self.node_attr_key,self.edge_attr_key)
 50 |         cdef double[:,:] comparison_matrix = np.zeros((n, n))
 51 |         cdef double[:] selected_test = np.array(self.get_selected_array(selected,n))
 52 |         cdef int i,j
 53 |         cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
 54 |         cdef long[:] n_edges = np.array([g.density() for g in new_gs])
 55 | 
 56 |         with nogil, parallel(num_threads=self.cpu_count):
 57 |             for i in prange(n,schedule='static'):
 58 |                 for j in range(i,n):
 59 |                     if  n_nodes[i] > 0 and n_nodes[j] > 0  and selected_test[i] == True:
 60 |                         with gil:
 61 |                             comparison_matrix[i, j] = self.hed(new_gs[i], new_gs[j])
 62 |                     else:
 63 |                         comparison_matrix[i, j] = 0
 64 |                     comparison_matrix[j, i] = comparison_matrix[i, j]
 65 | 
 66 |         return np.array(comparison_matrix)
 67 | 
 68 | 
 69 |     cdef float hed(self, g1, g2):
 70 |         """
 71 |         Compute the HED similarity value between two `gmatch4py.Graph`
 72 |         
 73 |         Parameters
 74 |         ----------
 75 |         g1 : gmatch4py.Graph
 76 |             First Graph
 77 |         g2 : gmatch4py.Graph
 78 |             Second Graph
 79 | 
 80 |         Returns
 81 |         -------
 82 |         float 
 83 |             similarity value
 84 |         """
 85 |         return self.sum_fuv(g1, g2) + self.sum_fuv(g2, g1)
 86 | 
 87 |     cdef float sum_fuv(self, g1, g2):
 88 |         """
 89 |         Compute Nearest Neighbour Distance between G1 and G2
 90 |         Parameters
 91 |         ----------
 92 |         g1 : gmatch4py.Graph
 93 |             First graph
 94 |         g2 : gmatch4py.Graph
 95 |             Second graph
 96 | 
 97 |         Returns
 98 |         -------
 99 |         float
100 |             Nearest Neighbour Distance
101 |         """
102 | 
103 |         cdef np.ndarray min_sum = np.zeros(g1.size())
104 |         cdef list nodes1 = list(g1.nodes())
105 |         cdef list nodes2 = list(g2.nodes())
106 |         nodes2.extend([None])
107 |         cdef np.ndarray min_i
108 |         for i in range(g1.size()):
109 |             min_i = np.zeros(g2.size())
110 |             for j in range(g2.size()):
111 |                 min_i[j] = self.fuv(g1, g2, nodes1[i], nodes2[j])
112 |             min_sum[i] = np.min(min_i)
113 |         return np.sum(min_sum)
114 | 
115 |     cdef float fuv(self, g1, g2, str n1, str n2):
116 |         """
117 |         Compute the Node Distance function
118 |         Parameters
119 |         ----------
120 |         g1 : gmatch4py.Graph
121 |             First graph
122 |         g2 : gmatch4py.Graph
123 |             Second graph
124 |         n1 : int or str
125 |             identifier of the first node
126 |         n2 : int or str
127 |             identifier of the second node
128 | 
129 |         Returns
130 |         -------
131 |         float
132 |             node distance
133 |         """
134 |         if n2 == None:  # Del
135 |             return self.node_del + ((self.edge_del / 2.) * g1.degree(n1))
136 |         if n1 == None:  # Insert
137 |             return self.node_ins + ((self.edge_ins / 2.) * g2.degree(n2))
138 |         else:
139 |             if n1 == n2:
140 |                 return 0
141 |             return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2
142 | 
143 |     cdef float hed_edge(self, g1, g2, str n1, str n2):
144 |         """
145 |         Compute HEDistance between edges of n1 and n2, respectively in g1 and g2
146 |         Parameters
147 |         ----------
148 |         g1 : gmatch4py.Graph
149 |             First graph
150 |         g2 : gmatch4py.Graph
151 |             Second graph
152 |         n1 : int or str
153 |             identifier of the first node
154 |         n2 : int or str
155 |             identifier of the second node
156 | 
157 |         Returns
158 |         -------
159 |         float
160 |             HEDistance between g1 and g2
161 |         """
162 |         return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2)
163 | 
164 | 
165 |     cdef float sum_gpq(self, g1, str n1, g2, str n2):
166 |         """
167 |         Compute Nearest Neighbour Distance between edges around n1 in G1  and edges around n2 in G2
168 |         Parameters
169 |         ----------
170 |         g1 : gmatch4py.Graph
171 |             First graph
172 |         g2 : gmatch4py.Graph
173 |             Second graph
174 |         n1 : int or str
175 |             identifier of the first node
176 |         n2 : int or str
177 |             identifier of the second node
178 | 
179 |         Returns
180 |         -------
181 |         float
182 |             Nearest Neighbour Distance
183 |         """
184 | 
185 |         #if isinstance(g1, nx.MultiDiGraph):
186 |         cdef list edges1 = g1.get_edges_no(n1) if n1 else [] # rename method ...
187 |         cdef list edges2 = g2.get_edges_no(n2) if n2 else []
188 | 
189 |         cdef np.ndarray min_sum = np.zeros(len(edges1))
190 |         edges2.extend([None])
191 |         cdef np.ndarray min_i
192 |         for i in range(len(edges1)):
193 |             min_i = np.zeros(len(edges2))
194 |             for j in range(len(edges2)):
195 |                 min_i[j] = self.gpq(edges1[i], edges2[j])
196 |             min_sum[i] = np.min(min_i)
197 |         return np.sum(min_sum)
198 | 
199 |     cdef float gpq(self, str e1, str e2):
200 |         """
201 |         Compute the edge distance function
202 |         Parameters
203 |         ----------
204 |         e1 : str
205 |             first edge identifier
206 |         e2
207 |             second edge indentifier
208 |         Returns
209 |         -------
210 |         float
211 |             edge distance
212 |         """
213 |         if e2 == None:  # Del
214 |             return self.edge_del
215 |         if e1 == None:  # Insert
216 |             return self.edge_ins
217 |         else:
218 |             if e1 == e2:
219 |                 return 0
220 |             return (self.edge_del + self.edge_ins) / 2.
221 | 


--------------------------------------------------------------------------------
/gmatch4py/graph.pxd:
--------------------------------------------------------------------------------
  1 | cimport numpy as np
  2 | 
  3 | cdef class Graph:
  4 |     ##################################
  5 |     #            ATTRIBUTES
  6 |     ##################################
  7 | 
  8 |     # GRAPH PROPERTY ATTRIBUTES
  9 |     ###########################
 10 |     cdef bint is_directed # If the graph is directed
 11 |     cdef bint is_multi # If the graph is a Multi-Graph
 12 |     cdef bint is_node_attr
 13 |     cdef bint is_edge_attr
 14 | 
 15 |     # ATTR VAL ATTRIBUTES
 16 |     #####################
 17 |     cdef str node_attr_key # Key that contains the main attr value for a node
 18 |     cdef str edge_attr_key # Key that contains the main attr value for an edge
 19 |     cdef set unique_node_attr_vals # list 
 20 |     cdef set unique_edge_attr_vals # list 
 21 |     
 22 | 
 23 |     ## NODE ATTRIBUTES
 24 |     #################
 25 | 
 26 |     cdef list nodes_list # list of nodes ids
 27 |     cdef list nodes_attr_list # list of attr value for each node (following nodes list order)
 28 |     cdef list nodes_hash # hash representation of every node
 29 |     cdef set nodes_hash_set # hash representation of every node (set version for intersection and union operation)
 30 |     cdef dict nodes_idx # index of each node in `nodes_list`
 31 |     cdef list nodes_weight # list that contains each node's weight (following nodes_list order)
 32 |     cdef long[:] nodes_degree # degree list
 33 |     cdef long[:] nodes_degree_in # in degree list
 34 |     cdef long[:] nodes_degree_out # out degree list
 35 |     cdef double[:] nodes_degree_weighted #weighted vers. of nodes_degree
 36 |     cdef double[:] nodes_degree_in_weighted #weighted vers. of nodes_degree_in
 37 |     cdef double[:] nodes_degree_out_weighted #weighted vers. of nodes_degree_out
 38 |     cdef dict degree_per_attr # degree information per attr val
 39 |     cdef dict degree_per_attr_weighted # degree information per attr val
 40 |     cdef list attr_nodes # list of attr(dict) values for each node
 41 |     cdef dict edges_of_nodes # list of egdes connected to each node
 42 | 
 43 |     # EDGES ATTRIBUTES
 44 |     ##################
 45 |     
 46 |     cdef list edges_list # edge list
 47 |     cdef list edges_attr_list # list of attr value for each edge (following nodes list order)
 48 |     cdef dict edges_hash_idx # index of hash in edges_list and edges_attr_list
 49 |     cdef list edges_hash # hash representation of every edges ## A VOIR !
 50 |     cdef set edges_hash_set # set of hash representation of every edges (set version for intersection and union operation)
 51 |     cdef dict edges_weight # list that contains each node's weight (following nodes_list order)
 52 |     cdef dict edges_hash_map #[id1,[id2,hash]] 
 53 |     cdef list attr_edges # list of attr(dict) values for each edge 
 54 |     
 55 |     # SIZE ATTTRIBUTE
 56 |     ###############
 57 | 
 58 |     cdef long number_of_nodes  # number of nodes
 59 |     cdef long number_of_edges # number of edges
 60 | 
 61 |     cdef dict number_of_edges_per_attr # number of nodes per attr value
 62 |     cdef dict number_of_nodes_per_attr # number of edges per attr value
 63 | 
 64 |     cdef object nx_g
 65 | 
 66 |     ##################################
 67 |     #            METHODS
 68 |     ##################################
 69 | 
 70 |     # DIMENSION GETTER
 71 |     ##################
 72 |     cpdef long size(self)
 73 |     cpdef int size_attr(self, attr_val)
 74 | 
 75 |     cpdef long density(self)
 76 |     cpdef int density_attr(self, str attr_val)
 77 | 
 78 |     # HASH FUNCTION
 79 |     ###############
 80 |     cpdef str hash_node(self,str n1)
 81 |     cpdef str hash_edge(self,str n1,str n2)
 82 |     cpdef str hash_node_attr(self,str n1, str attr_value)
 83 |     cpdef str hash_edge_attr(self,str n1,str n2, str attr_value)
 84 |     
 85 |     ## EXIST FUNCTION
 86 |     ###############
 87 |     cpdef bint has_node(self,str n_id)
 88 |     cpdef bint has_edge(self,str n_id1,str n_id2)
 89 | 
 90 |     ## LEN FUNCTION
 91 |     ###############
 92 |     cpdef int size_node_intersect(self,Graph G)
 93 |     cpdef int size_node_union(self,Graph G)
 94 |     
 95 |     cpdef int size_edge_intersect(self,Graph G)
 96 |     cpdef int size_edge_union(self,Graph G)
 97 | 
 98 |     # DEGREE FUNCTION
 99 |     #################
100 |     cpdef double degree(self,str n_id, bint weight=*)
101 |     cpdef double in_degree(self,str n_id, bint weight=*)
102 |     cpdef double out_degree(self,str n_id, bint weight=*)
103 | 
104 |     cpdef double in_degree_attr(self,str n_id,str attr_val, bint weight=*)
105 |     cpdef double out_degree_attr(self,str n_id,str attr_val, bint weight=*)
106 |     cpdef double degree_attr(self,str n_id,str attr_val, bint weight=*)
107 | 
108 |     ## GETTER
109 |     #########
110 | 
111 |     cpdef list get_edges_ed(self,str e1, str e2)
112 |     cpdef list get_edges_no(self,str n)
113 |     cpdef set get_edges_hash(self)
114 |     cpdef set get_nodes_hash(self)
115 |     
116 |     cpdef str get_node_key(self)
117 |     cpdef str get_egde_key(self)
118 | 
119 |     cpdef dict get_edge_attrs(self,edge_hash)
120 |     cpdef dict get_node_attrs(self, node_hash)
121 |     cpdef dict get_node_attr(self, node_hash)
122 |     cpdef dict get_edge_attr(self,edge_hash)


--------------------------------------------------------------------------------
/gmatch4py/graph.pyx:
--------------------------------------------------------------------------------
  1 | from libcpp.map cimport map
  2 | from libcpp.utility cimport pair
  3 | from libcpp.string  cimport string
  4 | from libcpp.vector  cimport vector
  5 | import numpy as np
  6 | cimport numpy as np
  7 | import networkx as nx
  8 | 
  9 | cdef class Graph:
 10 | 
 11 |     def __init__(self,G, node_attr_key="",edge_attr_key=""):
 12 |         self.nx_g=G
 13 | 
 14 |         #GRAPH PROPERTY INIT
 15 |         self.is_directed = G.is_directed()
 16 |         self.is_multi = G.is_multigraph()
 17 |         self.is_node_attr=(True if node_attr_key else False)
 18 |         self.is_edge_attr=(True if edge_attr_key else False)
 19 |         if self.is_multi and not self.is_edge_attr:
 20 |             if not len(nx.get_edge_attributes(G,"id")) == len(G.edges(data=True)):
 21 |                 i=0
 22 |                 for id1 in G.adj:
 23 |                     for id2 in G.adj[id1]:
 24 |                         for id3 in G.adj[id1][id2]:
 25 |                             G._adj[id1][id2][id3]["id"]=str(i)
 26 |                             i+=1
 27 |             self.is_edge_attr = True
 28 |             edge_attr_key = "id"  
 29 |             
 30 |         #    for ed in 
 31 | 
 32 |         #len(nx.get_edge_attributes(G1,"id")) == len(G1.edges(data=True))
 33 | 
 34 |         if len(G) ==0:
 35 |             self.__init_empty__()    
 36 | 
 37 |         else:
 38 |             a,b=list(zip(*list(G.nodes(data=True))))
 39 |             self.nodes_list,self.attr_nodes=list(a),list(b)
 40 |             if G.number_of_edges()>0:
 41 |                 e1,e2,d=zip(*list(G.edges(data=True)))
 42 |                 self.attr_edges=list(d)
 43 |                 self.edges_list=list(zip(e1,e2))
 44 |             else:
 45 |                 self.edges_list=[]
 46 |                 self.attr_edges=[]
 47 | 
 48 |             if self.is_node_attr:
 49 |                 self.node_attr_key = node_attr_key
 50 |                 self.nodes_attr_list = [attr_dict[node_attr_key] for attr_dict in self.attr_nodes]
 51 |                 self.unique_node_attr_vals=set(self.nodes_attr_list)
 52 |             
 53 |             if self.is_edge_attr:
 54 |                 self.edge_attr_key = edge_attr_key
 55 |                 self.edges_attr_list = [attr_dict[edge_attr_key] for attr_dict in self.attr_edges]
 56 |                 self.unique_edge_attr_vals=set(self.edges_attr_list)
 57 | 
 58 |             # NODE Information init
 59 |             #######################
 60 |             
 61 |             self.nodes_hash=[self.hash_node_attr(node,self.nodes_attr_list[ix]) if self.is_node_attr else self.hash_node(node) for ix, node in enumerate(self.nodes_list) ]
 62 |             self.nodes_hash_set=set(self.nodes_hash)
 63 |             self.nodes_idx={node:ix for ix, node in enumerate(self.nodes_list)}
 64 |             self.nodes_weight=[attr_dict["weight"] if "weight" in attr_dict else 1 for attr_dict in self.attr_nodes]
 65 |             degree_all=[]
 66 |             degree_in=[]
 67 |             degree_out=[]
 68 | 
 69 |             degree_all_weighted=[]
 70 |             degree_in_weighted=[]
 71 |             degree_out_weighted=[]
 72 |             if self.is_edge_attr:
 73 |                 self.degree_per_attr={attr_v:{n:{"in":0,"out":0} for n in self.nodes_list} for attr_v in self.unique_edge_attr_vals}
 74 |                 self.degree_per_attr_weighted={attr_v:{n:{"in":0,"out":0} for n in self.nodes_list} for attr_v in self.unique_edge_attr_vals}
 75 |             # Retrieving Degree Information
 76 |             self.edges_of_nodes={}
 77 |             for n in self.nodes_list:
 78 |                 self.edges_of_nodes[n]=[self.hash_edge_attr(e1,e2,attr_dict[self.edge_attr_key]) if self.is_edge_attr else self.hash_edge(e1,e2) for e1,e2,attr_dict in G.edges(n,data=True)]
 79 |                 degree_all.append(G.degree(n))
 80 |                 degree_all_weighted.append(G.degree(n,weight="weight"))
 81 |                 if self.is_directed:
 82 |                     degree_in.append(G.in_degree(n))
 83 |                     degree_in_weighted.append(G.in_degree(n,weight="weight"))
 84 |                     degree_out.append(G.out_degree(n))
 85 |                     degree_out_weighted.append(G.out_degree(n))
 86 |                 else:
 87 |                     degree_in.append(degree_all[-1])
 88 |                     degree_in_weighted.append(degree_all_weighted[-1])
 89 |                     degree_out.append(degree_all[-1])
 90 |                     degree_out_weighted.append(degree_all_weighted[-1])
 91 |                 if self.is_edge_attr:
 92 |                     if self.is_directed:
 93 |                         in_edge=list(G.in_edges(n,data=True))
 94 |                         out_edge=list(G.out_edges(n,data=True))
 95 |                         for n1,n2,attr_dict in in_edge:
 96 |                             self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["in"]+=1
 97 |                             self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["in"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 )
 98 |             
 99 |                         for n1,n2,attr_dict in out_edge:
100 |                             self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["out"]+=1
101 |                             self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["out"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 )
102 |         
103 |                     else:
104 |                         edges=G.edges(n,data=True)
105 |                         for n1,n2,attr_dict in edges:
106 |                             self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["in"]+=1
107 |                             self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["out"]+=1
108 |                             self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["in"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 )
109 |                             self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["out"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 )
110 |             
111 |             self.nodes_degree=np.array(degree_all)
112 |             self.nodes_degree_in=np.array(degree_in)
113 |             self.nodes_degree_out=np.array(degree_out)
114 | 
115 |             self.nodes_degree_weighted=np.array(degree_all_weighted).astype(np.double)
116 |             self.nodes_degree_in_weighted=np.array(degree_in_weighted).astype(np.double)
117 |             self.nodes_degree_out_weighted=np.array(degree_out_weighted).astype(np.double)
118 | 
119 | 
120 |             # EDGE INFO INIT
121 |             #################
122 |             
123 |             self.edges_hash=[]
124 |             self.edges_hash_map = {}
125 |             self.edges_hash_idx = {}
126 |             for ix, ed in enumerate(self.edges_list):
127 |                 e1,e2=ed
128 |                 if not e1 in self.edges_hash_map:self.edges_hash_map[e1]={}
129 |                 
130 |                 hash_=self.hash_edge_attr(e1,e2,self.edges_attr_list[ix]) if self.is_edge_attr else self.hash_edge(e1,e2)
131 |                 if self.is_multi and self.is_edge_attr:
132 |                     if not e2 in self.edges_hash_map[e1]:self.edges_hash_map[e1][e2]={}
133 |                     self.edges_hash_map[e1][e2][self.edges_attr_list[ix]]=hash_
134 |                 else:
135 |                     self.edges_hash_map[e1][e2]=hash_
136 |                 self.edges_hash_idx[hash_]=ix 
137 |                 self.edges_hash.append(hash_) 
138 |             self.edges_hash_set=set(self.edges_hash)
139 | 
140 |             self.edges_weight={}
141 |             for e1,e2,attr_dict in list(G.edges(data=True)):
142 |                 hash_=self.hash_edge_attr(e1,e2,attr_dict[self.edge_attr_key]) if self.is_edge_attr else self.hash_edge(e1,e2)
143 |                 self.edges_weight[hash_]=attr_dict["weight"] if "weight" in attr_dict else 1 
144 |             
145 |             self.number_of_edges = len(self.edges_list)
146 |             self.number_of_nodes = len(self.nodes_list)
147 |             
148 |             if self.is_edge_attr and self.number_of_edges >0:
149 |                 self.number_of_edges_per_attr={attr:0 for attr in self.unique_edge_attr_vals}
150 |                 for _,_,attr_dict in list(G.edges(data=True)):
151 |                     self.number_of_edges_per_attr[attr_dict[self.edge_attr_key]]+=1
152 |             
153 |             if self.is_node_attr and self.number_of_nodes >0:
154 |                 self.number_of_nodes_per_attr={attr:0 for attr in self.unique_node_attr_vals}
155 |                 for _,attr_dict in list(G.nodes(data=True)):
156 |                     self.number_of_nodes_per_attr[attr_dict[self.node_attr_key]]+=1
157 | 
158 |     
159 |     # HASH FUNCTION
160 |     cpdef str hash_node(self,str n1):
161 |         return "{0}".format(n1)
162 | 
163 |     cpdef str hash_edge(self,str n1,str n2):
164 |         if not self.is_directed:
165 |             return "_".join(sorted([n1,n2]))
166 |         return "_".join([n1,n2])
167 | 
168 |     cpdef str hash_node_attr(self,str n1, str attr_value):
169 |         return "_".join([n1,attr_value])
170 | 
171 |     cpdef str hash_edge_attr(self,str n1,str n2, str attr_value):
172 |         if self.is_directed:
173 |             return "_".join([n1,n2,attr_value])
174 |         ed=sorted([n1,n2])
175 |         ed.extend([attr_value])
176 |         return "_".join(ed)
177 |     
178 |     ## EXIST FUNCTION
179 |     cpdef bint has_node(self,str n_id):
180 |         if n_id in self.nodes_list:
181 |             return True
182 |         return False
183 | 
184 |     cpdef bint has_edge(self,str n_id1,str n_id2):
185 |         if self.number_of_edges == 0:
186 |             return False
187 |         if self.is_directed:
188 |             if n_id1 in self.edges_hash_map and n_id2 in self.edges_hash_map[n_id1]:
189 |                 return True
190 |         else:
191 |             if n_id1 in self.edges_hash_map and n_id2 in self.edges_hash_map[n_id1]:
192 |                 return True
193 |             if n_id2 in self.edges_hash_map and n_id1 in self.edges_hash_map[n_id2]:
194 |                 return True
195 |         return False
196 | 
197 |     ## LEN FUNCTION
198 |     cpdef int size_node_intersect(self,Graph G):
199 |         if self.number_of_nodes == 0:
200 |             return 0
201 |         return len(self.nodes_hash_set.intersection(G.nodes_hash_set))
202 |     cpdef int size_node_union(self,Graph G):
203 |         return len(self.nodes_hash_set.union(G.nodes_hash_set))
204 |     
205 |     cpdef int size_edge_intersect(self,Graph G):
206 |         if self.number_of_edges == 0:
207 |             return 0
208 |         return len(self.edges_hash_set.intersection(G.edges_hash_set))
209 |     cpdef int size_edge_union(self,Graph G):
210 |         return len(self.edges_hash_set.union(G.edges_hash_set))
211 |     
212 |         ## GETTER
213 |     
214 |     def get_nx(self):
215 |         return self.nx_g
216 | 
217 |     def nodes(self,data=False):
218 |         if data:
219 |             if self.number_of_nodes == 0:
220 |                     return [],[]
221 |             return self.nodes_list,self.attr_nodes
222 |         
223 |         if self.number_of_nodes == 0:
224 |                 return []
225 |         return self.nodes_list
226 |         
227 |     
228 |     def edges(self,data=False):
229 |         if data:
230 |             if self.number_of_edges == 0:
231 |                     return [],[]
232 |             return self.edges_list,self.attr_edges
233 |         
234 |         if self.number_of_edges == 0:
235 |             return []
236 |         return self.edges_list
237 |     
238 |     cpdef list get_edges_ed(self,str e1,str e2):
239 |         if self.is_edge_attr:
240 |             hashes=self.edges_hash_map[e1][e2]
241 |             return [(e1,e2,self.edges_attr_list[self.edges_hash_idx[hash_]])for hash_ in hashes]
242 |         
243 |         return [(e1,e2,None)]
244 |             
245 |     cpdef list get_edges_no(self,str n):
246 |         return self.edges_of_nodes[n]
247 | 
248 |     cpdef dict get_edge_attr(self,edge_hash):
249 |         return self.edges_attr_list[self.edges_hash_idx[edge_hash]]
250 |     
251 |     cpdef dict get_node_attr(self, node_hash):
252 |         return self.edges_attr_list[self.edges_hash_idx[node_hash]]
253 | 
254 |     cpdef dict get_edge_attrs(self,edge_hash):
255 |         return self.attr_edges[self.edges_hash_idx[edge_hash]]
256 |     
257 |     cpdef dict get_node_attrs(self, node_hash):
258 |         return self.attr_nodes[self.edges_hash_idx[node_hash]]
259 | 
260 |     cpdef set get_edges_hash(self):
261 |         return self.edges_hash_set
262 | 
263 |     cpdef set get_nodes_hash(self):
264 |         return self.nodes_hash_set
265 | 
266 |     cpdef str get_node_key(self):
267 |         return self.node_attr_key
268 |         
269 |     cpdef str get_egde_key(self):
270 |         return self.edge_attr_key
271 |     #####
272 | 
273 |     cpdef long size(self):
274 |         return self.number_of_nodes
275 |     
276 |     cpdef int size_attr(self, attr_val):
277 |         return self.number_of_nodes_per_attr[attr_val]
278 | 
279 |     cpdef long density(self):
280 |         return self.number_of_edges
281 |     
282 |     cpdef int density_attr(self, str attr_val):
283 |         return self.number_of_edges_per_attr[attr_val]
284 | 
285 |     cpdef double degree(self,str n_id, bint weight=False):
286 |         if weight:
287 |             return self.nodes_degree_weighted[self.nodes_idx[n_id]]
288 |         return self.nodes_degree[self.nodes_idx[n_id]]
289 |     
290 |     cpdef double in_degree(self,str n_id, bint weight=False):
291 |         if weight:
292 |             return self.nodes_degree_in_weighted[self.nodes_idx[n_id]]
293 |         return self.nodes_degree_in[self.nodes_idx[n_id]]
294 |     
295 |     cpdef double out_degree(self,str n_id, bint weight=False):
296 |         if weight:
297 |             return self.nodes_degree_out_weighted[self.nodes_idx[n_id]]
298 |         return self.nodes_degree_out[self.nodes_idx[n_id]]
299 | 
300 |     cpdef double in_degree_attr(self,str n_id,str attr_val, bint weight=False):
301 |         if not self.is_edge_attr and not self.is_directed:
302 |             raise AttributeError("No edge attribute have been defined")
303 |         if weight:
304 |             return self.degree_per_attr_weighted[attr_val][n_id]["in"]
305 |         return self.degree_per_attr[attr_val][n_id]["in"]
306 | 
307 |     cpdef double out_degree_attr(self,str n_id,str attr_val, bint weight=False):
308 |         if not self.is_edge_attr and not self.is_directed:
309 |             raise AttributeError("No edge attribute have been defined")
310 |         if weight:
311 |             return self.degree_per_attr_weighted[attr_val][n_id]["out"]
312 |         return self.degree_per_attr[attr_val][n_id]["out"]
313 | 
314 |     cpdef double degree_attr(self,str n_id,str attr_val, bint weight=False):
315 |         if not self.is_edge_attr:
316 |             raise AttributeError("No edge attribute have been defined")
317 |         if not self.is_directed:
318 |             if weight:
319 |                 return self.degree_per_attr_weighted[attr_val][n_id]["out"]
320 |             return self.degree_per_attr[attr_val][n_id]["out"]
321 |         if weight:
322 |             return self.degree_per_attr_weighted[attr_val][n_id]["in"] + self.degree_per_attr_weighted[attr_val][n_id]["out"]
323 |         return self.degree_per_attr[attr_val][n_id]["out"] + self.degree_per_attr[attr_val][n_id]["in"]
324 |     
325 |     #GRAPH SETTER
326 |     def add_node(self,str id_,**kwargs):
327 |         if not self.node_attr_key in kwargs:
328 |             print("Node not added because information lacks")
329 |             return self
330 |         if id_ in self.nodes_idx:
331 |             print("Already in G")
332 |             return self
333 |         G=self.nx_g.copy()
334 |         G.add_node(id_,**kwargs)
335 |         return Graph(G,self.node_attr_key,self.edge_attr_key)
336 |     
337 |     
338 |     def add_edge(self,str n1,str n2,**kwargs):
339 |         G=self.nx_g.copy()
340 |         G.add_edge(n1,n2,**kwargs)
341 |         return Graph(G,self.node_attr_key,self.edge_attr_key)
342 |     
343 |     def remove_node(self,str id_):
344 |         if not id_ in self.nodes_idx:
345 |             print("Already removed in G")
346 |             return self
347 |         G=self.nx_g.copy()
348 |         G.remove_node(id_)
349 |         return Graph(G,self.node_attr_key,self.edge_attr_key)
350 |     
351 |     def remove_edge(self,str n1,str n2,**kwargs):
352 |         G=self.nx_g.copy()
353 |         edges=G.edges([n1,n2],data=True)
354 |         if len(edges) == 0:
355 |             return self
356 |         elif len(edges)<2:
357 |             G.remove_edge(n1,n2)
358 |         else:
359 |             if not self.edge_attr_key in kwargs:
360 |                 for i in range(len(edges)):
361 |                     G.remove_edge(n1,n2,i)
362 |             else:
363 |                 key,val,i=self.edge_attr_key, kwargs[self.edge_attr_key],0
364 |                 for e1,ed2,attr_dict in edges:
365 |                     if attr_dict[key] == val:
366 |                         G.remove_edge(n1,n2,i)
367 |                         break
368 |                     i+=1
369 |                     
370 |         return Graph(G,self.node_attr_key,self.edge_attr_key)
371 | 
372 |     def __init_empty__(self):
373 |         self.nodes_list,self.nodes_attr_list,self.nodes_hash,self.nodes_weight,self.attr_nodes=[],[],[],[],[]
374 |         self.nodes_degree,self.nodes_degree_in,self.nodes_degree_out,self.nodes_degree_weighted,self.nodes_degree_in_weighted,self.nodes_degree_out_weighted=np.array([],dtype=np.long),np.array([],dtype=np.long),np.array([],dtype=np.long),np.array([],dtype=np.double),np.array([],dtype=np.double),np.array([],dtype=np.double)
375 |         self.nodes_idx,self.degree_per_attr,self.degree_per_attr_weighted={},{},{}
376 |         self.nodes_hash_set=set([])
377 |         self.number_of_nodes = 0
378 | 
379 |         self.number_of_edges = 0
380 |         self.edges_list=[]
381 |         self.edges_attr_list =[]
382 |         self.edges_hash_idx = {}
383 |         self.edges_hash = []
384 |         self.edges_hash_set= set([])
385 |         self.edges_weight={}
386 |         self.edges_hash_map={}
387 |         self.attr_edges=[]
388 | 
389 |     


--------------------------------------------------------------------------------
/gmatch4py/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | # coding = utf-8


--------------------------------------------------------------------------------
/gmatch4py/helpers/general.pyx:
--------------------------------------------------------------------------------
 1 | from ..graph cimport Graph
 2 | import networkx as nx
 3 | 
 4 | def parsenx2graph(list_gs,node_attr_key="",edge_attr_key=""):
 5 |     """
 6 |     Parse list of Networkx graphs into Gmatch4py graph format
 7 |     Parameters
 8 |     ----------
 9 |     list_gs : list
10 |         list of graph
11 |     node_attr_key : str
12 |         node attribute used for the hash
13 |     edge_attr_key: str
14 |         edge attribute used for the hash
15 | 
16 |     Returns
17 |     -------
18 |     list
19 |         list of gmatch4py.Graph
20 |     """
21 |     new_gs=[nx.relabel_nodes(g,{node:str(node) for node in list(g.nodes)},copy=True) for g in list_gs]
22 |     new_gs=[Graph(g,node_attr_key,edge_attr_key) for g in new_gs]
23 |     return new_gs
24 | 


--------------------------------------------------------------------------------
/gmatch4py/helpers/reader.pyx:
--------------------------------------------------------------------------------
 1 | # coding = utf-8
 2 | import sys, os, glob, json, re
 3 | import networkx as nx
 4 | from tqdm import tqdm
 5 | 
 6 | 
 7 | """
 8 | The reader submodule contains high-level function to read and store graphs from various files.
 9 | """
10 | 
11 | 
12 | 
13 | methods_read_graph={
14 |         "gexf":nx.read_gexf,
15 |         "gml":nx.read_gml,
16 |         "graphml":nx.read_graphml
17 |     }
18 | 
19 | def extract_index(fn):
20 |     """
21 |     Extract index from filename
22 |     Parameters
23 |     ----------
24 |     fn : str
25 |         filename
26 | 
27 |     Returns
28 |     -------
29 |     int
30 |         index
31 |     """
32 |     try:
33 |         return int(re.findall("\d+",fn)[-1])
34 |     except:
35 |         print("No number found !")
36 |         return 0
37 | 
38 | 
39 | def import_dir(directory,format="gexf",numbered=True):
40 |     """
41 |     Based on a given directory, import all graphs and store them in a list/array
42 | 
43 |     Parameters
44 |     ----------
45 |     directory : str
46 |         directory path where graphs are stored
47 |     format : str
48 |         graph file format
49 |     numbered
50 |         if graph filename are numbered
51 |     Returns
52 |     -------
53 |     array
54 |         graphs
55 |     """
56 |     if not os.path.exists(directory):
57 |         raise FileNotFoundError("{0} does not exists".format(directory))
58 |     if not format in methods_read_graph:
59 |         raise NotImplementedError("{0} is not implemented !".format(format))
60 | 
61 |     # Retrieve filename
62 |     fns = glob.glob(os.path.join(directory, "*.{0}".format(format)))
63 | 
64 |     graphs=[]
65 |     if numbered:
66 |         n=max([extract_index(fn) for fn in fns])
67 |         graphs= [nx.Graph()]*(n+1)
68 | 
69 |     association_map, i = {}, 0
70 |     for fn in tqdm(fns,desc="Loading Graphs from {0}".format(directory)):
71 |         if not numbered:
72 |             graphs.append(methods_read_graph[format](fn))
73 |             association_map[fn]=i
74 |             i+=1
75 |         else:
76 |             graphs[extract_index(fn)]=methods_read_graph[format](fn)
77 |     if not numbered:
78 |         return association_map,graphs
79 |     return graphs
80 | 


--------------------------------------------------------------------------------
/gmatch4py/jaccard.pyx:
--------------------------------------------------------------------------------
 1 | # coding = utf-8
 2 | 
 3 | import numpy as np
 4 | cimport numpy as np
 5 | 
 6 | from .base cimport Base
 7 | from .helpers.general import parsenx2graph
 8 | from cython.parallel cimport prange,parallel
 9 | cimport cython
10 | 
11 | cdef class Jaccard(Base):
12 | 
13 |     def __init__(self):
14 |         Base.__init__(self,0,True)
15 | 
16 | 
17 |     @cython.boundscheck(False)
18 |     cpdef np.ndarray compare(self,list listgs, list selected):
19 |         cdef int n = len(listgs)
20 |         cdef list new_gs=parsenx2graph(listgs,self.node_attr_key,self.edge_attr_key)
21 |         cdef double[:,:] comparison_matrix = np.zeros((n, n))
22 |         cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
23 |         cdef long[:] n_edges = np.array([g.density() for g in new_gs])
24 |         cdef int i,j
25 | 
26 |         cdef double[:] selected_test = np.array(self.get_selected_array(selected,n))
27 | 
28 |         cdef double[:,:] intersect_len_nodes = np.zeros((n, n))
29 |         cdef double[:,:] intersect_len_edges = np.zeros((n, n))
30 |         cdef double[:,:] union_len_nodes = np.zeros((n, n))
31 |         cdef double[:,:] union_len_edges = np.zeros((n, n))
32 |         for i in range(n):
33 |             for j in range(i,n):
34 |                 intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j])
35 |                 intersect_len_edges[i][j]=new_gs[i].size_edge_intersect(new_gs[j])#len(set(hash_edges[i]).intersection(hash_edges[j]))
36 |                 union_len_nodes[i][j]=new_gs[i].size_node_union(new_gs[j])
37 |                 union_len_edges[i][j]=new_gs[i].size_edge_union(new_gs[j])
38 |         with nogil, parallel(num_threads=self.cpu_count):
39 |             for i in prange(n,schedule='static'):
40 |                 for j in range(i,n):
41 |                     if  n_nodes[i] > 0 and n_nodes[j] > 0  and selected_test[i] == 1:
42 |                         if union_len_edges[i][j] >0 and union_len_nodes[i][j] >0:
43 |                             comparison_matrix[i][j]= \
44 |                                 (intersect_len_edges[i][j]/union_len_edges[i][j])*\
45 |                                 (intersect_len_nodes[i][j]/union_len_nodes[i][j])
46 |                         
47 |                         else:
48 |                             comparison_matrix[i][j] = 0.
49 | 
50 |                         comparison_matrix[j][i] = comparison_matrix[i][j]
51 | 
52 |         return np.array(comparison_matrix)
53 | 


--------------------------------------------------------------------------------
/gmatch4py/kernels/__init__.py:
--------------------------------------------------------------------------------
1 | # coding = utf-8


--------------------------------------------------------------------------------
/gmatch4py/kernels/adjacency.pyx:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import numpy as np
 3 | 
 4 | def get_adjacency(G1,G2):
 5 |     """
 6 |     Return adjacency matrices of two graph based on nodes present in both of them.
 7 |     
 8 |     Parameters
 9 |     ----------
10 |     G1 : nx.Graph
11 |         first graph
12 |     G2 : nx.Graph
13 |         second graph
14 |     
15 |     Returns
16 |     -------
17 |     tuple of np.array
18 |         adjacency matrices of G1 and G2
19 |     """
20 | 
21 |     # Extract nodes
22 |     nodes_G1=list(G1.nodes())
23 |     nodes_G2=list(G2.nodes())
24 | 
25 |     # Get Adjacency Matrix for each graph
26 |     adj_original_G1 = nx.convert_matrix.to_numpy_matrix(G1,nodes_G1)
27 |     adj_original_G2 = nx.convert_matrix.to_numpy_matrix(G2,nodes_G2)
28 | 
29 |     # Get old index
30 |     index_node_G1={node: ix for ix,node in enumerate(nodes_G1)}
31 |     index_node_G2={node: ix for ix,node in enumerate(nodes_G2)}
32 | 
33 |     # Building new indices
34 |     nodes_unique = list(set(nodes_G1).union(nodes_G2))
35 |     new_node_index = {node:i for i,node in enumerate(nodes_unique)}
36 | 
37 |     n=len(nodes_unique)
38 |     
39 |     #Generate new adjacent matrices
40 |     new_adj_G1= np.zeros((n,n))
41 |     new_adj_G2= np.zeros((n,n))
42 | 
43 |     # Filling old values
44 |     for n1 in nodes_unique:
45 |         for n2 in nodes_unique:
46 |             if n1 in G1.nodes() and n2 in G1.nodes():
47 |                 new_adj_G1[new_node_index[n1],new_node_index[n2]]=adj_original_G1[index_node_G1[n1],index_node_G1[n2]]
48 |             if n1 in G2.nodes() and n2 in G2.nodes():
49 |                 new_adj_G2[new_node_index[n1],new_node_index[n2]]=adj_original_G2[index_node_G2[n1],index_node_G2[n2]]
50 | 
51 |     return new_adj_G1,new_adj_G2
52 | 
53 | 


--------------------------------------------------------------------------------
/gmatch4py/kernels/random_walk_kernel.pyx:
--------------------------------------------------------------------------------
 1 | # coding = utf-8
 2 | 
 3 | import networkx as nx
 4 | import numpy as np
 5 | 
 6 | class GeometricRandomWalkKernel():
 7 |     __type__ = "sim"
 8 |     @staticmethod
 9 |     def maxDegree(G):
10 |         degree_sequence = sorted(nx.degree(G).values(), reverse=True)  # degree sequence
11 | 
12 |         # print "Degree sequence", degree_sequence
13 |         dmax = max(degree_sequence)
14 |         return dmax
15 |     @staticmethod
16 |     def compare(listgs):
17 | 
18 |         n = len(listgs)
19 |         comparison_matrix=np.zeros((n,n))
20 |         for i in range(n):
21 |             for j in range(i,n):
22 |                 if len(listgs[i]) <1 or len(listgs[j]) <1:
23 |                     comparison_matrix[i, j] = 0
24 |                     comparison_matrix[j, i] = 0
25 |                     continue
26 |                 direct_product_graph=nx.tensor_product(listgs[i],listgs[j])
27 |                 Ax = nx.adjacency_matrix(direct_product_graph).todense()
28 |                 try:
29 |                     la = 1/ ((GeometricRandomWalkKernel.maxDegree(direct_product_graph)**2)+1) # lambda value
30 |                 except:
31 |                     la= pow(1,-6)
32 |                 eps = pow(10,-10)
33 |                 I=np.identity(Ax.shape[0])
34 |                 I_vec=np.ones(Ax.shape[0])
35 |                 x=I_vec.copy()
36 |                 x_pre=np.zeros(Ax.shape[0])
37 |                 c=0
38 | 
39 |                 while (np.linalg.norm(x-x_pre)) > eps:
40 |                     if c > 100:
41 |                         break
42 |                     x_pre=x
43 | 
44 |                     x= I_vec + la*np.dot(Ax,x_pre.T)
45 |                     c+=1
46 |                 comparison_matrix[i,j]=np.sum(x)
47 |                 comparison_matrix[j,i]=comparison_matrix[i,j]
48 |         print(comparison_matrix)
49 |         for i in range(n):
50 |             for j in range(i,n):
51 |                 comparison_matrix[i,j] = (comparison_matrix[i,j]/np.sqrt(comparison_matrix[i,i]*comparison_matrix[j,j]))
52 |                 comparison_matrix[j,i]=comparison_matrix[i,j]
53 |         return comparison_matrix
54 | 
55 | class KStepRandomWalkKernel():
56 |     __type__ = "sim"
57 |     @staticmethod
58 |     def maxDegree(G):
59 |         degree_sequence = sorted(nx.degree(G).values(), reverse=True)  # degree sequence
60 |         # print "Degree sequence", degree_sequence
61 |         dmax = max(degree_sequence)
62 |         return dmax
63 |     @staticmethod
64 |     def compare(listgs,lambda_list=[1,1,1]):
65 |         k=len(lambda_list)
66 |         if not len(lambda_list) == k:
67 |             raise AttributeError
68 |         n = len(listgs)
69 |         comparison_matrix=np.zeros((n,n))
70 |         for i in range(n):
71 |             for j in range(i,n):
72 |                 if len(listgs[i]) <1 or len(listgs[j]) <1:
73 |                     comparison_matrix[i, j] = 0
74 |                     comparison_matrix[j, i] = 0
75 |                     continue
76 |                 direct_product_graph=nx.tensor_product(listgs[i],listgs[j])
77 |                 Ax = nx.adjacency_matrix(direct_product_graph).todense()
78 |                 eps = pow(10,-10)
79 |                 I=np.identity(Ax.shape[0])
80 |                 ax_pow = I.copy()
81 |                 sum_ = lambda_list[0] * I
82 |                 for kk in range(1, k):
83 |                     ax_pow *= Ax
84 |                     sum_ += lambda_list[kk] * ax_pow
85 | 
86 |                 comparison_matrix[i, j] = np.sum(sum_)/(len(listgs[i])**2 * len(listgs[j])**2)
87 |                 comparison_matrix[j,i] = comparison_matrix[i,j]
88 | 
89 |         for i in range(n):
90 |             for j in range(i,n):
91 |                 comparison_matrix[i,j] = comparison_matrix[i,j]/np.sqrt(comparison_matrix[i,i]*comparison_matrix[j,j])
92 |                 comparison_matrix[j,i]=comparison_matrix[i,j]
93 |         return comparison_matrix


--------------------------------------------------------------------------------
/gmatch4py/kernels/shortest_path_kernel.pyx:
--------------------------------------------------------------------------------
  1 | # coding = utf-8
  2 | 
  3 | """
  4 | Shortest-Path graph kernel.
  5 | Python implementation based on: "Shortest-path kernels on graphs", by
  6 | Borgwardt, K.M.; Kriegel, H.-P., in Data Mining, Fifth IEEE
  7 | International Conference on , vol., no., pp.8 pp.-, 27-30 Nov. 2005
  8 | doi: 10.1109/ICDM.2005.132
  9 | Author : Sandro Vega-Pons, Emanuele Olivetti
 10 | Modified by : Jacques Fize
 11 | """
 12 | 
 13 | import networkx as nx
 14 | import numpy as np
 15 | cimport numpy as np
 16 | from scipy.sparse.csgraph import floyd_warshall
 17 | from .adjacency import get_adjacency
 18 | from cython.parallel cimport prange,parallel
 19 | from ..helpers.general import parsenx2graph
 20 | from ..base cimport Base
 21 | cimport cython
 22 | 
 23 | cdef class ShortestPathGraphKernel(Base):
 24 |     """
 25 |     Shorthest path graph kernel.
 26 |     """
 27 |     def __init__(self):
 28 |         Base.__init__(self,0,False)
 29 |     
 30 |     def compare_two(self,g_1, g_2):
 31 |         """Compute the kernel value (similarity) between two graphs.
 32 |         Parameters
 33 |         ----------
 34 |         g1 : networkx.Graph
 35 |             First graph.
 36 |         g2 : networkx.Graph
 37 |             Second graph.
 38 |         Returns
 39 |         -------
 40 |         k : The similarity value between g1 and g2.
 41 |         """
 42 |         # Diagonal superior matrix of the floyd warshall shortest
 43 |         # paths:
 44 |         if isinstance(g_1,nx.Graph) and isinstance(g_2,nx.Graph):
 45 |             g_1,g_2= get_adjacency(g_1,g_2)
 46 | 
 47 |         fwm1 = np.array(floyd_warshall(g_1))
 48 |         fwm1[np.isinf(fwm1)] = 0
 49 |         fwm1[np.isnan(fwm1)] = 0 
 50 |         fwm1 = np.triu(fwm1, k=1)
 51 |         bc1 = np.bincount(fwm1.reshape(-1).astype(int))
 52 | 
 53 |         fwm2 = np.array(floyd_warshall(g_2))
 54 |         fwm2[np.isinf(fwm2)] = 0
 55 |         fwm2[np.isnan(fwm2)] = 0 
 56 |         fwm2 = np.triu(fwm2, k=1)
 57 |         bc2 = np.bincount(fwm2.reshape(-1).astype(int))
 58 | 
 59 |         # Copy into arrays with the same length the non-zero shortests
 60 |         # paths:
 61 |         v1 = np.zeros(max(len(bc1), len(bc2)) - 1)
 62 |         v1[range(0, len(bc1)-1)] = bc1[1:]
 63 | 
 64 |         v2 = np.zeros(max(len(bc1), len(bc2)) - 1)
 65 |         v2[range(0, len(bc2)-1)] = bc2[1:]
 66 | 
 67 |         return np.sum(v1 * v2)
 68 | 
 69 |     @cython.boundscheck(False)
 70 |     cpdef np.ndarray compare(self,list graph_list, list selected):
 71 |         """Compute the all-pairs kernel values for a list of graphs.
 72 |         This function can be used to directly compute the kernel
 73 |         matrix for a list of graphs. The direct computation of the
 74 |         kernel matrix is faster than the computation of all individual
 75 |         pairwise kernel values.
 76 |         Parameters
 77 |         ----------
 78 |         graph_list: list
 79 |             A list of graphs (list of networkx graphs)
 80 |         Return
 81 |         ------
 82 |         K: numpy.array, shape = (len(graph_list), len(graph_list))
 83 |         The similarity matrix of all graphs in graph_list.
 84 |         """
 85 |         cdef int n = len(graph_list)
 86 |         cdef double[:,:] k = np.zeros((n, n))
 87 |         cdef int cpu_count = self.cpu_count
 88 |         cdef int i,j
 89 |         cdef list adjacency_matrices = [[None for i in range(n)]for j in range(n)]
 90 |         
 91 |         for i in range(n):
 92 |             for j in range(i, n):
 93 |                 adjacency_matrices[i][j] = get_adjacency(graph_list[i],graph_list[j])
 94 |                 adjacency_matrices[j][i] = adjacency_matrices[i][j]
 95 |         
 96 |         with nogil, parallel(num_threads=cpu_count):
 97 |             for i in prange(n,schedule='static'):
 98 |                 for j in range(i, n):
 99 |                     with gil:
100 |                         if len(graph_list[i]) > 0 and len(graph_list[j]) >0: 
101 |                             a,b=adjacency_matrices[i][j]
102 |                             k[i][j] = self.compare_two(a,b)
103 |                     k[j][i] = k[i][j]
104 | 
105 |         k_norm = np.zeros((n,n))
106 |         for i in range(n):
107 |             for j in range(i,n):
108 |                 k_norm[i, j] = k[i][j] / np.sqrt(k[i][i] * k[j][j])
109 |                 k_norm[j, i] = k_norm[i, j]
110 | 
111 |         return np.nan_to_num(k_norm)
112 | 
113 | 
114 |     
115 | cdef class ShortestPathGraphKernelDotCostMatrix(ShortestPathGraphKernel):
116 |     """
117 |     Instead of just multiply the count of distance values fou,d between nodes of each graph, this version propose to multiply the node distance matrix generated from each graph.
118 |     """
119 |     def __init__(self):
120 |         ShortestPathGraphKernel.__init__(self)
121 |     
122 |     def compare_two(self,g_1, g_2):
123 |         """Compute the kernel value (similarity) between two graphs.
124 |         Parameters
125 |         ----------
126 |         g1 : networkx.Graph
127 |             First graph.
128 |         g2 : networkx.Graph
129 |             Second graph.
130 |         Returns
131 |         -------
132 |         k : The similarity value between g1 and g2.
133 |         """
134 |         # Diagonal superior matrix of the floyd warshall shortest
135 |         # paths:
136 |         if isinstance(g_1,nx.Graph) and isinstance(g_2,nx.Graph):
137 |             g_1,g_2= get_adjacency(g_1,g_2)
138 | 
139 |         fwm1 = np.array(floyd_warshall(g_1))
140 |         fwm1[np.isinf(fwm1)] = 0
141 |         fwm1[np.isnan(fwm1)] = 0 
142 |         fwm1 = np.triu(fwm1, k=1)
143 | 
144 |         fwm2 = np.array(floyd_warshall(g_2))
145 |         fwm2[np.isinf(fwm2)] = 0
146 |         fwm2[np.isnan(fwm2)] = 0 
147 |         fwm2 = np.triu(fwm2, k=1)
148 |         
149 |         return np.sum(fwm1 * fwm2)


--------------------------------------------------------------------------------
/gmatch4py/kernels/weisfeiler_lehman.pyx:
--------------------------------------------------------------------------------
  1 | # coding = utf-8
  2 | 
  3 | """Weisfeiler_Lehman graph kernel.
  4 | 
  5 | Python implementation based on: "Weisfeiler-Lehman Graph Kernels", by:
  6 | Nino Shervashidze, Pascal Schweitzer, Erik J. van Leeuwen, Kurt
  7 | Mehlhorn, Karsten M. Borgwardt, JMLR, 2012.
  8 | http://jmlr.csail.mit.edu/papers/v12/shervashidze11a.html
  9 | 
 10 | Author : Sandro Vega-Pons, Emanuele Olivetti
 11 | Source : https://github.com/emanuele/jstsp2015/blob/master/gk_weisfeiler_lehman.py
 12 | Modified by : Jacques Fize
 13 | """
 14 | 
 15 | import copy
 16 | 
 17 | import networkx as nx
 18 | import numpy as np
 19 | cimport numpy as np
 20 | from ..base cimport Base
 21 | from ..base import minmax_scale
 22 | from scipy.sparse import csc_matrix,lil_matrix
 23 | 
 24 | cdef class WeisfeleirLehmanKernel(Base):
 25 | 
 26 |     cdef int h
 27 | 
 28 |     def __init__(self,h=2):
 29 |         Base.__init__(self,0,True)
 30 |         self.h=h
 31 | 
 32 | 
 33 |     cpdef np.ndarray compare(self,list graph_list, list selected):
 34 |         """Compute the all-pairs kernel values for a list of graphs.
 35 |         This function can be used to directly compute the kernel
 36 |         matrix for a list of graphs. The direct computation of the
 37 |         kernel matrix is faster than the computation of all individual
 38 |         pairwise kernel values.
 39 |         Parameters
 40 |         ----------
 41 |         graph_list: list
 42 |             A list of graphs (list of networkx graphs)
 43 |         h : interger
 44 |             Number of iterations.
 45 |         node_label : boolean
 46 |             Whether to use original node labels. True for using node labels
 47 |             saved in the attribute 'node_label'. False for using the node
 48 |             degree of each node as node attribute.
 49 |         Return
 50 |         ------
 51 |         K: numpy.array, shape = (len(graph_list), len(graph_list))
 52 |         The similarity matrix of all graphs in graph_list.
 53 |         """
 54 | 
 55 |         cdef int n = len(graph_list)
 56 |         cdef int n_nodes = 0
 57 |         cdef int n_max = 0
 58 |         cdef int i,j
 59 |         # Compute adjacency lists and n_nodes, the total number of
 60 |         # nodes in the dataset.
 61 |         for i in range(n):
 62 |             n_nodes += graph_list[i].number_of_nodes()
 63 | 
 64 |             # Computing the maximum number of nodes in the graphs. It
 65 |             # will be used in the computation of vectorial
 66 |             # representation.
 67 |             if n_max < graph_list[i].number_of_nodes():
 68 |                 n_max = graph_list[i].number_of_nodes()
 69 | 
 70 |         phi = np.zeros((n_nodes, n), dtype=np.uint64)
 71 |         phi=lil_matrix(phi)
 72 | 
 73 |         # INITIALIZATION: initialize the nodes labels for each graph
 74 |         # with their labels or with degrees (for unlabeled graphs)
 75 | 
 76 |         cdef list labels = [0] * n
 77 |         cdef dict label_lookup = {}
 78 |         cdef int label_counter = 0
 79 | 
 80 | 
 81 |         # label_lookup is an associative array, which will contain the
 82 |         # mapping from multiset labels (strings) to short labels
 83 |         # (integers)
 84 | 
 85 |         cdef list nodes
 86 |         for i in range(n):
 87 |             nodes = list(graph_list[i].nodes)
 88 |             # It is assumed that the graph has an attribute
 89 |             # 'node_label'
 90 |             labels[i] = np.zeros(len(nodes), dtype=np.int32)
 91 | 
 92 |             for j in range(len(nodes)):
 93 |                 if not (nodes[j] in label_lookup):
 94 |                     label_lookup[nodes[j]] = str(label_counter)
 95 |                     labels[i][j] = label_counter
 96 |                     label_counter += 1
 97 |                 else:
 98 |                     labels[i][j] = label_lookup[nodes[j]]
 99 |                 # labels are associated to a natural number
100 |                 # starting with 0.
101 | 
102 |                 phi[labels[i][j], i] += 1
103 | 
104 |             graph_list[i]=nx.relabel_nodes(graph_list[i],label_lookup)
105 | 
106 |         # cdef np.ndarray[np.float64_t] k
107 |         k = np.dot(phi.transpose(), phi)
108 |         # MAIN LOOP
109 |         cdef int it = 0
110 | 
111 |         new_labels = copy.deepcopy(labels) # Can't work without it !!!
112 | 
113 |         while it < self.h:
114 |             # create an empty lookup table
115 |             label_lookup = {}
116 |             label_counter = 0
117 | 
118 |             phi = np.zeros((n_nodes, n))
119 |             for i in range(n):
120 |                 nodes = list(graph_list[i].nodes)
121 |                 for v in range(len(nodes)):
122 |                     # form a multiset label of the node v of the i'th graph
123 |                     # and convert it to a string
124 | 
125 |                     long_label = []
126 |                     long_label.extend(nx.neighbors(graph_list[i],nodes[v]))
127 | 
128 |                     long_label_string = "".join(long_label)
129 |                     # if the multiset label has not yet occurred, add it to the
130 |                     # lookup table and assign a number to it
131 |                     if not (long_label_string in label_lookup):
132 |                         label_lookup[long_label_string] = str(label_counter)
133 |                         new_labels[i][v] = label_counter
134 |                         label_counter += 1
135 |                     else:
136 |                         new_labels[i][v] = label_lookup[long_label_string]
137 |                 # fill the column for i'th graph in phi
138 |                 aux = np.bincount(new_labels[i])
139 |                 phi[new_labels[i], i] += aux[new_labels[i]]
140 | 
141 |             k += np.dot(phi.transpose(), phi)
142 |             it = it + 1
143 | 
144 |         return np.ma.getdata(minmax_scale(k))


--------------------------------------------------------------------------------
/gmatch4py/mcs.pyx:
--------------------------------------------------------------------------------
 1 | # coding = utf-8
 2 | import numpy as np
 3 | cimport numpy as np
 4 | from .graph cimport Graph
 5 | from .base cimport Base
 6 | from cython.parallel cimport prange,parallel
 7 | from .helpers.general import parsenx2graph
 8 | cimport cython
 9 | 
10 | cdef class MCS(Base):
11 |     """
12 |     *A graph distance metric based on the maximal common subgraph, H. Bunke and K. Shearer,
13 |     Pattern Recognition Letters, 1998*
14 |     """
15 |     def __init__(self):
16 |         Base.__init__(self,0,True)
17 | 
18 |     @cython.boundscheck(False)
19 |     cpdef np.ndarray compare(self,list listgs, list selected):
20 |         cdef int n = len(listgs)
21 |         cdef double [:,:] comparison_matrix = np.zeros((n, n))
22 |         cdef double[:] selected_test = np.array(self.get_selected_array(selected,n))
23 |         cdef list new_gs=parsenx2graph(listgs,self.node_attr_key,self.edge_attr_key)
24 |         cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
25 |         cdef double [:,:] intersect_len_nodes = np.zeros((n, n))
26 |         cdef int i,j
27 |         for i in range(n):
28 |             for j in range(i,n):
29 |                 intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j])
30 | 
31 |         with nogil, parallel(num_threads=self.cpu_count):
32 |             for i in prange(n,schedule='static'):
33 |                 for j in range(i, n):
34 |                     if  n_nodes[i] > 0 and n_nodes[j] > 0  and selected_test[i] == 1:
35 |                         comparison_matrix[i][j] = intersect_len_nodes[i][j]/max(n_nodes[i],n_nodes[j])
36 |                     else:
37 |                         comparison_matrix[i][j] = 0.
38 |                     if i==j:
39 |                         comparison_matrix[i][j]=1
40 |                     comparison_matrix[j][i] = comparison_matrix[i][j]
41 | 
42 |         
43 |         return np.array(comparison_matrix)
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/gmatch4py/vertex_edge_overlap.pyx:
--------------------------------------------------------------------------------
 1 | # coding = utf-8
 2 | 
 3 | import numpy as np
 4 | cimport numpy as np
 5 | 
 6 | from .graph cimport Graph
 7 | from cython.parallel cimport prange,parallel
 8 | from .helpers.general import parsenx2graph
 9 | cimport cython
10 | from .base cimport Base
11 | 
12 | cdef class VertexEdgeOverlap(Base):
13 | 
14 |     """
15 |     Vertex/Edge Overlap Algorithm
16 |     presented in Web graph similarity for anomaly detection, Journal of Internet Services and Applications, 2008
17 |     by P. Papadimitriou, A. Dasdan and H.Gracia-Molina
18 | 
19 |     Code Author : Jacques Fize
20 |     """
21 |     def __init__(self):
22 |         Base.__init__(self,0,True)
23 | 
24 |     @cython.boundscheck(False)
25 |     cpdef np.ndarray compare(self,list listgs, list selected):
26 |         cdef int n = len(listgs)
27 |         cdef list new_gs=parsenx2graph(listgs,self.node_attr_key,self.edge_attr_key)
28 |         cdef double[:,:] comparison_matrix = np.zeros((n, n))
29 |         cdef int denom,i,j
30 |         cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
31 |         cdef long[:] n_edges = np.array([g.density() for g in new_gs])
32 | 
33 |         cdef double[:] selected_test = np.array(self.get_selected_array(selected,n))
34 | 
35 |         cdef double[:,:] intersect_len_nodes = np.zeros((n, n))
36 |         cdef double[:,:] intersect_len_edges = np.zeros((n, n))
37 |         for i in range(n):
38 |             for j in range(i,n):
39 |                 intersect_len_nodes[i][j]=new_gs[i].size_node_intersect(new_gs[j])
40 |                 intersect_len_edges[i][j]=new_gs[i].size_edge_intersect(new_gs[j])#len(set(hash_edges[i]).intersection(hash_edges[j]))
41 |                 
42 |         with nogil, parallel(num_threads=self.cpu_count):
43 |             for i in prange(n,schedule='static'):
44 |                 for j in range(i,n):
45 |                     if  n_nodes[i] > 0 and n_nodes[j] > 0  and selected_test[i] == 1:
46 |                         denom=n_nodes[i]+n_nodes[j]+\
47 |                               n_edges[i]+n_edges[j]
48 |                         if  denom > 0:
49 |                             comparison_matrix[i][j]=(2*(intersect_len_nodes[i][j]
50 |                                                   +intersect_len_edges[i][j]))/denom # Data = True --> For nx.MultiDiGraph
51 |                         if i==j:
52 |                             comparison_matrix[i][j]=1
53 |                         comparison_matrix[j][i] = comparison_matrix[i][j]
54 |         return np.array(comparison_matrix)
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/gmatch4py/vertex_ranking.pyx:
--------------------------------------------------------------------------------
 1 | # coding = utf-8
 2 | 
 3 | import networkx as nx
 4 | import numpy as np
 5 | cimport numpy as np
 6 | from scipy.stats import spearmanr
 7 | 
 8 | from .base cimport Base
 9 | 
10 | cdef class VertexRanking(Base):
11 |     """
12 |     Vertex Ranking
13 |     presented in Web graph similarity for anomaly detection, Journal of Internet Services and Applications, 2008 # Maybe not ??
14 |     by P. Papadimitriou, A. Dasdan and H.Gracia-Molina
15 | 
16 |     Code Author : Jacques Fize
17 | 
18 |     """
19 |     def __init__(self):
20 |         Base.__init__(self,0,True)
21 | 
22 |     cpdef np.ndarray compare(self,list listgs, list selected):
23 |         cdef int n,i,j # number of graphs
24 |         n = len(listgs)
25 | 
26 |         cdef np.ndarray comparison_matrix = np.zeros((n,n)) #similarity matrix
27 |         cdef list X,Y,pager_i,pager_j,page_r,node_intersection #temp data (page rank data for the most part)
28 |         page_r=[nx.pagerank(nx.DiGraph(g)) for g in listgs]
29 |         for i in range(n):
30 |             pager_i=list(page_r[i])
31 |             for j in range(i,n):
32 |                 g1,g2=listgs[i],listgs[j]
33 |                 f=self.isAccepted(g1,i,selected)
34 |                 pager_j=list(page_r[j])
35 |                 node_intersection=list(set(pager_i) & set(pager_j))
36 |                 X,Y=[],[]
37 |                 for node in node_intersection:
38 |                     X.append(page_r[i][node])
39 |                     Y.append(page_r[j][node])
40 |                 comparison_matrix[i,j] = spearmanr(X,Y)[0]
41 |                 comparison_matrix[j,i] = comparison_matrix[i,j]
42 |         return np.nan_to_num(comparison_matrix)
43 | 


--------------------------------------------------------------------------------
/logo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacquesfize/GMatch4py/4fc0a822514c65c0d8b12d090b5b89c0af50ef2a/logo2.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | decorator
2 | scipy
3 | networkx==2.1
4 | numpy
5 | cython
6 | 
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #import setuptools
 2 | import sys, os, shutil
 3 | from distutils.core import setup
 4 | from distutils.extension import Extension
 5 | import numpy as np
 6 | import platform
 7 | try:
 8 |     from Cython.Build import cythonize
 9 |     from Cython.Distutils import build_ext
10 | except:
11 |     print("You don't seem to have Cython installed. Please get a")
12 |     print("copy from www.cython.org and install it")
13 |     sys.exit(1)
14 | 
15 | is_linux = sys.platform == 'linux'
16 | libs=[]
17 | if is_linux:  # Issue #42
18 |     libs.append('rt')  # -lrt for clock_gettime
19 | 
20 | def scandir(dir, files=[]):
21 |     for file in os.listdir(dir):
22 |         path = os.path.join(dir, file)
23 |         if os.path.isfile(path) and path.endswith(".pyx"):
24 |             files.append(path.replace(os.path.sep, ".")[:-4])
25 |         elif os.path.isdir(path):
26 |             scandir(path, files)
27 |     return files
28 | 
29 | # generate an Extension object from its dotted name
30 | def makeExtension(extName):
31 |     global libs
32 |     extPath = extName.replace(".", os.path.sep)+".pyx"
33 | 
34 |     ## For Mojave Users
35 |     if platform.system() == "Darwin":
36 |         if "10.14" in platform.mac_ver()[0]:
37 |             return Extension(
38 |             extName,
39 |             [extPath],include_dirs=[np.get_include()],language='c++',libraries=libs,
40 |             extra_compile_args=["-stdlib=libc++"]
41 |             )
42 |     
43 |     return Extension(
44 |         extName,
45 |         [extPath],include_dirs=[np.get_include()],language='c++',libraries=libs,
46 |         #extra_compile_args = ["-O0", "-fopenmp"],extra_link_args=['-fopenmp']
47 | 
48 |         )
49 | 
50 | # get the list of extensions
51 | extNames = scandir("gmatch4py")
52 | 
53 | # and build up the set of Extension objects
54 | extensions = cythonize([makeExtension(name) for name in extNames])
55 | 
56 | from os import path
57 | this_directory = path.abspath(path.dirname(__file__))
58 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:
59 |     long_description = f.read()
60 | 
61 | requirements=["numpy","networkx","scipy",'scikit-learn','tqdm','pandas',"joblib","gensim","psutil"]
62 | setup(
63 |     name="GMatch4py",
64 |     author="Jacques Fize",
65 |     description="A python module for graph matching (use Cython)",
66 |     long_description=long_description,
67 |     long_description_content_type='text/markdown',
68 |     url="https://github.com/Jacobe2169/GMatch4py",
69 |     packages=["gmatch4py"],
70 |     ext_modules=extensions,
71 |     cmdclass={'build_ext': build_ext},
72 |     setup_requires=requirements,
73 |     install_requires=requirements,
74 |     version="0.2.5b",
75 |     classifiers=[
76 |             "Programming Language :: Python :: 3",
77 |             "License :: OSI Approved :: MIT License",
78 |             "Operating System :: OS Independent",
79 |         ]
80 | )
81 | #Clean cpp and compiled file
82 | f=True
83 | if f:
84 |     if os.path.exists("build"):
85 |         shutil.rmtree("build")
86 |     if os.path.exists("dist"):
87 |         shutil.rmtree("dist")
88 |     os.system("find . -name \*.c -delete ; find . -name \*.cpp -delete ;")


--------------------------------------------------------------------------------
/test/gmatch4py_performance_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.chdir(os.environ["HOME"])
 3 | 
 4 | def test_mesure():
 5 |     import gmatch4py as gm
 6 |     import networkx as nx
 7 |     import time
 8 |     from tqdm import tqdm
 9 |     import pandas as pd
10 | 
11 | 
12 |     max_=100
13 |     size_g=10
14 |     graphs_all=[nx.random_tree(size_g) for i in range(max_)]
15 |     result_compiled=[]
16 |     for size_ in tqdm(range(50,max_,50)):
17 |         graphs=graphs_all[:size_]
18 |         comparator=None
19 |         for class_ in [gm.BagOfNodes,gm.WeisfeleirLehmanKernel, gm.GraphEditDistance,  gm.GreedyEditDistance, gm.HED, gm.BP_2,  gm.Jaccard, gm.MCS, gm.VertexEdgeOverlap]:
20 |             deb=time.time()
21 |             if class_ in (gm.GraphEditDistance, gm.BP_2, gm.GreedyEditDistance, gm.HED):
22 |                 comparator = class_(1, 1, 1, 1)
23 |             elif class_ == gm.WeisfeleirLehmanKernel:
24 |                 comparator = class_(h=2)
25 |             else:
26 |                 comparator=class_()
27 |             matrix = comparator.compare(graphs,None)
28 |             print([class_.__name__,size_,time.time()-deb])
29 |             result_compiled.append([class_.__name__,size_,time.time()-deb])
30 | 
31 |     df = pd.DataFrame(result_compiled,columns="algorithm size_data time_exec_s".split())
32 |     df.to_csv("new_gmatch4py_res_{0}graphs_{1}size.csv".format(max_,size_g))


--------------------------------------------------------------------------------
/test/test.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import os
  3 | import networkx as nx
  4 | 
  5 | def __import():
  6 |     # Gmatch4py use networkx graph 
  7 |     import networkx as nx 
  8 |     import gmatch4py as gm
  9 | 
 10 | 
 11 | def test_import():
 12 |     os.chdir(os.environ["HOME"] )
 13 |     __import()
 14 | 
 15 | def test_graph():
 16 |     os.chdir(os.environ["HOME"])
 17 |     import networkx as nx 
 18 |     import gmatch4py as gm
 19 | 
 20 |     # Simple Graph
 21 |     G1 = nx.Graph()
 22 |     G2 = nx.Graph()
 23 |     G1.add_edge("1","2")
 24 |     G1.add_edge("1","3")
 25 |     
 26 |     gm.graph.Graph(G1)
 27 | 
 28 |     # Digraph Graph
 29 |     G1 = nx.DiGraph()
 30 |     G1.add_edge("1","2")
 31 |     G1.add_edge("1","3")
 32 |     assert list(G1.edges()) == gm.graph.Graph(G1).edges()
 33 | 
 34 |     G1 = nx.DiGraph()
 35 |     G1.add_edge("1","2",color="blue")
 36 |     G1.add_edge("1","2",color="red")
 37 |     G1.add_edge("1","3",color="green")
 38 |     assert gm.graph.Graph(G1,edge_attr_key="color").density() == 2
 39 |     assert gm.graph.Graph(G1).density() == 2
 40 | 
 41 |     # Multi Graph
 42 |     G1 = nx.MultiGraph()
 43 |     G1.add_edge("1","2",color="blue")
 44 |     G1.add_edge("1","3",color="green")
 45 |     assert list(G1.edges()) == gm.graph.Graph(G1).edges()
 46 |     G1 = nx.MultiGraph()
 47 |     G1.add_edge("1","2",color="blue")
 48 |     G1.add_edge("1","3",color="green")
 49 |     assert len(set([gm.graph.Graph(G1).hash_edge_attr(ed[0],ed[1],ed[2]["color"]) for ed in list(G1.edges(data=True))]).intersection(gm.graph.Graph(G1,edge_attr_key="color").get_edges_hash())) == 2
 50 | 
 51 |     G1 = nx.MultiGraph()
 52 |     G1.add_edge("1","2",color="blue")
 53 |     G1.add_edge("1","2",color="red")
 54 |     G1.add_edge("1","3",color="green")
 55 |     assert gm.graph.Graph(G1,edge_attr_key="color").density() == len(G1.edges(data=True))
 56 |     assert gm.graph.Graph(G1).density() == len(G1.edges(data=True))
 57 | 
 58 |     # Multi DiGraph
 59 |     G1 = nx.MultiDiGraph()
 60 |     G1.add_edge("1","2",color="blue")
 61 |     G1.add_edge("1","2",color="red")
 62 |     G1.add_edge("1","3",color="green")
 63 |     assert gm.graph.Graph(G1,edge_attr_key="color").density() == len(G1.edges(data=True))
 64 |     assert gm.graph.Graph(G1).density() == len(G1.edges(data=True))
 65 | 
 66 | def test_hash():
 67 |     os.chdir(os.environ["HOME"])
 68 |     import networkx as nx 
 69 |     import gmatch4py as gm
 70 | 
 71 |     # Basic HASH
 72 |     G1 = nx.Graph()
 73 |     G_gm = gm.graph.Graph(G1)
 74 |     assert G_gm.hash_edge("1","2") == "1_2"
 75 |     assert G_gm.hash_edge("2","1") == "1_2"
 76 | 
 77 |     # IF directed
 78 |     G1 = nx.DiGraph()
 79 |     G1.add_edge("1","2")
 80 |     G_gm = gm.graph.Graph(G1)
 81 |     assert G_gm.hash_edge("3","2") == "3_2"
 82 |     assert G_gm.hash_edge("2","1") == "2_1"
 83 | 
 84 |     # IF color and directed
 85 |     G1 = nx.DiGraph()
 86 |     G1.add_edge("1","2",color="blue")
 87 |     G_gm = gm.graph.Graph(G1,edge_attr_key="color")
 88 |     assert G_gm.hash_edge_attr("3","2","blue") == "3_2_blue"
 89 |     assert G_gm.get_edges_hash() == {"1_2_blue"}
 90 | 
 91 |     # if color and not directed
 92 |     G1 = nx.Graph()
 93 |     G1.add_edge("1","2",color="blue")
 94 |     G_gm = gm.graph.Graph(G1,edge_attr_key="color")
 95 |     assert G_gm.hash_edge_attr("3","2","blue") == "2_3_blue"
 96 | 
 97 | def test_intersect_union():
 98 |     os.chdir(os.environ["HOME"])
 99 |     import networkx as nx 
100 |     import gmatch4py as gm
101 | 
102 |     # Basic 
103 |     G1 = nx.Graph()
104 |     G1.add_edge("1","2")
105 |     G1.add_edge("1","3")
106 |     G2 = G1.copy()
107 |     G2.add_edge("3","4")
108 |     GM1 = gm.graph.Graph(G1)
109 |     GM2 = gm.graph.Graph(G2)
110 | 
111 |     assert GM1.size_edge_union(GM2) == 3
112 |     assert GM1.size_node_union(GM2) == 4
113 | 
114 |     assert GM1.size_edge_intersect(GM2) == 2
115 |     assert GM1.size_node_intersect(GM2) == 3
116 | 
117 |     # BASIC and noised for hash
118 |     G1 = nx.Graph()
119 |     G1.add_edge("1","2")
120 |     G1.add_edge("1","3")
121 |     G2 = nx.Graph()
122 |     G2.add_edge("1","2")
123 |     G2.add_edge("3","1") # Changing the direction (no impact if working)
124 |     G2.add_edge("3","4")
125 |     GM1 = gm.graph.Graph(G1)
126 |     GM2 = gm.graph.Graph(G2)
127 | 
128 |     assert GM1.size_edge_union(GM2) == 3
129 |     assert GM1.size_node_union(GM2) == 4
130 |     
131 |     assert GM1.size_edge_intersect(GM2) == 2
132 |     assert GM1.size_node_intersect(GM2) == 3
133 | 
134 | 
135 |     # Directed 
136 |     G1 = nx.DiGraph()
137 |     G1.add_edge("1","2")
138 |     G1.add_edge("1","3")
139 |     G2 = nx.DiGraph()
140 |     G2.add_edge("1","2")
141 |     G2.add_edge("3","1") # Changing the direction (no impact if working)
142 |     G2.add_edge("3","4")
143 |     GM1 = gm.graph.Graph(G1)
144 |     GM2 = gm.graph.Graph(G2)
145 | 
146 |     assert GM1.size_edge_union(GM2) == 4
147 |     assert GM1.size_node_union(GM2) == 4
148 |     
149 |     assert GM1.size_edge_intersect(GM2) == 1
150 |     assert GM1.size_node_intersect(GM2) == 3
151 | 
152 | 
153 |     # IF COLOR
154 |     G1 = nx.DiGraph(); G1.add_node("1",color="blue")
155 |     G2 = nx.DiGraph(); G2.add_node("1",color="red")
156 | 
157 |     GM1,GM2 = gm.graph.Graph(G1),gm.graph.Graph(G2) 
158 |     assert GM1.size_node_intersect(GM2) == 1
159 |     GM1,GM2 = gm.graph.Graph(G1,node_attr_key="color"),gm.graph.Graph(G2,node_attr_key="color") 
160 |     assert GM1.size_node_intersect(GM2) == 0
161 |     
162 | 
163 |     G1 = nx.DiGraph(); G1.add_edge("1","2",color="blue")
164 |     G2 = nx.DiGraph(); G2.add_edge("1","2",color="red")
165 | 
166 |     GM1,GM2 = gm.graph.Graph(G1),gm.graph.Graph(G2) 
167 |     assert GM1.size_edge_intersect(GM2) == 1
168 |     assert GM1.size_edge_union(GM2) == 1
169 |     GM1,GM2 = gm.graph.Graph(G1,edge_attr_key="color"),gm.graph.Graph(G2,edge_attr_key="color") 
170 |     assert GM1.size_edge_intersect(GM2) == 0
171 |     assert GM1.size_edge_union(GM2) == 2
172 | 
173 | def test_degree():
174 |     os.chdir(os.environ["HOME"])
175 |     import networkx as nx 
176 |     import gmatch4py as gm
177 | 
178 |     # Not DIRECTED and no attr
179 |     G1 = nx.Graph()
180 |     G1.add_edge("1","2")
181 |     G1.add_edge("1","3")
182 |     GM1 = gm.graph.Graph(G1)
183 |     assert GM1.degree('1') == 2
184 | 
185 |     G1 = nx.DiGraph()
186 |     G1.add_edge("1","2")
187 |     G1.add_edge("3","1")
188 |     GM1 = gm.graph.Graph(G1)
189 |     assert GM1.degree('1') == 2
190 |     assert GM1.in_degree('1') == 1
191 |     assert GM1.out_degree('1') == 1
192 | 
193 |     G1 = nx.MultiGraph()
194 |     G1.add_edge("1","2",color="blue")
195 |     G1.add_edge("1","2",color="red")
196 |     G1.add_edge("1","3",color="blue")
197 |     GM1 = gm.graph.Graph(G1,edge_attr_key ="color")
198 |     
199 |     assert GM1.degree_attr('1',"blue") == 2
200 |     assert GM1.degree('1') == 3
201 | 
202 |     G1 = nx.MultiDiGraph()
203 |     G1.add_edge("1","2",color="blue")
204 |     G1.add_edge("1","2",color="red")
205 |     G1.add_edge("1","3",color="green")
206 |     GM1 = gm.graph.Graph(G1,edge_attr_key ="color")
207 |     assert GM1.in_degree_attr('2','red') == 1
208 |     assert GM1.in_degree('2') == 2
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 |     


--------------------------------------------------------------------------------