├── .gitignore ├── LICENSE ├── README.md ├── ged4py ├── __init__.py ├── algorithm │ ├── __init__.py │ ├── abstract_graph_edit_dist.py │ ├── edge_edit_dist.py │ └── graph_edit_dist.py ├── data │ ├── source │ │ └── source1.txt │ └── suspicious │ │ └── test.txt └── graph │ ├── __init__.py │ └── edge_graph.py ├── requirements.txt ├── setup.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | #Mac OS 104 | /.DS_Store 105 | .DS_Store 106 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jacques Fize 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ged4py 2 | 3 | **ged4py** is an implementation of the graph edit distance for **Python3** and **NetworkX** users. 4 | 5 | ## Depreciated 6 | 7 | Hi everyone, `ged4py` was the first part of a larger project: `gmatch4py`. Gmatch4py is a module that regroup Python implementations -- more particularly using Cython -- of GraphMatching algorithms. Algorithms such as the GED (graph edit distance) using the Munkres algorithms, are included in this new module and is more efficient thanks to Cython. For these reasons, this module won't be maintained anymore ! 8 | 9 | Feel free to check Gmatch4py at this address : https://github.com/Jacobe2169/GMatch4py 10 | 11 | 12 | # How-to use it ? 13 | First, you need to create (or load) your graphs using NetworkX. In the following example, we built two simple graphs. 14 | 15 | import networkx as nx 16 | g=nx.Graph() 17 | g.add_edge("A","B") 18 | 19 | g.add_node("C",weight=1) 20 | g2=g.copy() 21 | g.add_edge("A","C") 22 | 23 | Then, use the `compare` function available in the `ged4py.algorithm` 24 | 25 | from ged4py.algorithm import graph_edit_dist 26 | print(graph_edit_dist.compare(g,g2)) 27 | 28 | 29 | # Acknowledgments 30 | 31 | This library is a modification of the code available at [**haakondr/graph-edit-distance-python**](https://github.com/haakondr/graph-edit-distance-python). The core of the code was implemented by him, thus we'd like to thank him ! 32 | -------------------------------------------------------------------------------- /ged4py/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacquesfize/ged4py/8cd3918df43eb5a36d283a9f8674f3b145054ab7/ged4py/__init__.py -------------------------------------------------------------------------------- /ged4py/algorithm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacquesfize/ged4py/8cd3918df43eb5a36d283a9f8674f3b145054ab7/ged4py/algorithm/__init__.py -------------------------------------------------------------------------------- /ged4py/algorithm/abstract_graph_edit_dist.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | from __future__ import print_function 3 | 4 | from scipy.optimize import linear_sum_assignment 5 | import sys 6 | import numpy as np 7 | from networkx import __version__ as nxv 8 | 9 | 10 | class AbstractGraphEditDistance(object): 11 | def __init__(self, g1, g2): 12 | self.g1 = g1 13 | self.g2 = g2 14 | 15 | def normalized_distance(self): 16 | """ 17 | Returns the graph edit distance between graph g1 & g2 18 | The distance is normalized on the size of the two graphs. 19 | This is done to avoid favorisation towards smaller graphs 20 | """ 21 | avg_graphlen = (len(self.g1) + len(self.g2)) / 2 22 | return self.distance() / avg_graphlen 23 | 24 | def distance(self): 25 | return sum(self.edit_costs()) 26 | 27 | def edit_costs(self): 28 | cost_matrix = self.create_cost_matrix() 29 | row_ind,col_ind = linear_sum_assignment(cost_matrix) 30 | return [cost_matrix[row_ind[i]][col_ind[i]] for i in range(len(row_ind))] 31 | 32 | def create_cost_matrix(self): 33 | """ 34 | Creates a |N+M| X |N+M| cost matrix between all nodes in 35 | graphs g1 and g2 36 | Each cost represents the cost of substituting, 37 | deleting or inserting a node 38 | The cost matrix consists of four regions: 39 | 40 | substitute | insert costs 41 | ------------------------------- 42 | delete | delete -> delete 43 | 44 | The delete -> delete region is filled with zeros 45 | """ 46 | n = len(self.g1) 47 | m = len(self.g2) 48 | cost_matrix = np.zeros((n+m,n+m)) 49 | #cost_matrix = [[0 for i in range(n + m)] for j in range(n + m)] 50 | nodes1 = self.g1.nodes() if float(nxv) < 2 else list(self.g1.nodes()) 51 | nodes2 = self.g2.nodes() if float(nxv) < 2 else list(self.g2.nodes()) 52 | 53 | for i in range(n): 54 | for j in range(m): 55 | cost_matrix[i,j] = self.substitute_cost(nodes1[i], nodes2[j]) 56 | 57 | for i in range(m): 58 | for j in range(m): 59 | cost_matrix[i+n,j] = self.insert_cost(i, j, nodes2) 60 | 61 | for i in range(n): 62 | for j in range(n): 63 | cost_matrix[j,i+m] = self.delete_cost(i, j, nodes1) 64 | 65 | self.cost_matrix = cost_matrix 66 | return cost_matrix 67 | 68 | def insert_cost(self, i, j): 69 | raise NotImplementedError 70 | 71 | def delete_cost(self, i, j): 72 | raise NotImplementedError 73 | 74 | def substitute_cost(self, nodes1, nodes2): 75 | raise NotImplementedError 76 | 77 | def print_matrix(self): 78 | print("cost matrix:") 79 | for column in self.create_cost_matrix(): 80 | for row in column: 81 | if row == sys.maxsize: 82 | print ("inf\t") 83 | else: 84 | print ("%.2f\t" % float(row)) 85 | print("") 86 | -------------------------------------------------------------------------------- /ged4py/algorithm/edge_edit_dist.py: -------------------------------------------------------------------------------- 1 | from ged4py.algorithm.abstract_graph_edit_dist import AbstractGraphEditDistance 2 | import sys 3 | 4 | 5 | class EdgeEditDistance(AbstractGraphEditDistance): 6 | """ 7 | Calculates the graph edit distance between two edges. 8 | A node in this context is interpreted as a graph, 9 | and edges are interpreted as nodes. 10 | """ 11 | 12 | def __init__(self, g1, g2): 13 | AbstractGraphEditDistance.__init__(self, g1, g2) 14 | 15 | def insert_cost(self, i, j, nodes2): 16 | if i == j: 17 | return 1 18 | return sys.maxsize 19 | 20 | def delete_cost(self, i, j, nodes1): 21 | if i == j: 22 | return 1 23 | return sys.maxsize 24 | 25 | def substitute_cost(self, edge1, edge2): 26 | if edge1 == edge2: 27 | return 0. 28 | return 1 29 | -------------------------------------------------------------------------------- /ged4py/algorithm/graph_edit_dist.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | from __future__ import print_function 3 | from ged4py.algorithm.abstract_graph_edit_dist import AbstractGraphEditDistance 4 | from ged4py.algorithm.edge_edit_dist import EdgeEditDistance 5 | from ged4py.graph.edge_graph import EdgeGraph 6 | import sys 7 | from networkx import __version__ as nxv 8 | 9 | 10 | 11 | def compare(g1, g2, print_details=False): 12 | ged = GraphEditDistance(g1, g2) 13 | 14 | if print_details: 15 | ged.print_matrix() 16 | 17 | return ged.normalized_distance() 18 | 19 | 20 | class GraphEditDistance(AbstractGraphEditDistance): 21 | 22 | def __init__(self, g1, g2): 23 | AbstractGraphEditDistance.__init__(self, g1, g2) 24 | 25 | def substitute_cost(self, node1, node2): 26 | return self.relabel_cost(node1, node2) + self.edge_diff(node1, node2) 27 | 28 | def relabel_cost(self, node1, node2): 29 | if node1 == node2: 30 | return 0. 31 | else: 32 | return 1. 33 | 34 | def delete_cost(self, i, j, nodes1): 35 | if i == j: 36 | return 1 37 | return sys.maxsize 38 | 39 | def insert_cost(self, i, j, nodes2): 40 | if i == j: 41 | return 1 42 | else: 43 | return sys.maxsize 44 | 45 | def pos_insdel_weight(self, node): 46 | return 1 47 | 48 | def edge_diff(self, node1, node2): 49 | edges1 = list(self.g1.edge[node1].keys()) if float(nxv) < 2 else list(self.g1.edges(node1)) 50 | edges2 = list(self.g2.edge[node2].keys()) if float(nxv) < 2 else list(self.g2.edges(node2)) 51 | if len(edges1) == 0 or len(edges2) == 0: 52 | return max(len(edges1), len(edges2)) 53 | 54 | edit_edit_dist = EdgeEditDistance(EdgeGraph(node1,edges1), EdgeGraph(node2,edges2)) 55 | return edit_edit_dist.normalized_distance() 56 | -------------------------------------------------------------------------------- /ged4py/data/source/source1.txt: -------------------------------------------------------------------------------- 1 | { 2 | "id": "source1.txt-1", 3 | "sentenceNumber": 1, 4 | "length": 17, 5 | "tokens": [{ 6 | "id": "1", 7 | "lemma": "Haakon", 8 | "deprel": "nsubj", 9 | "word": "Haakon", 10 | "rel": "4", 11 | "pos": "NNP" 12 | }, { 13 | "id": "2", 14 | "lemma": "be", 15 | "deprel": "cop", 16 | "word": "is", 17 | "rel": "4", 18 | "pos": "VBZ" 19 | }, { 20 | "id": "3", 21 | "lemma": "my", 22 | "deprel": "poss", 23 | "word": "my", 24 | "rel": "4", 25 | "pos": "PRP$" 26 | }, { 27 | "id": "4", 28 | "lemma": "name", 29 | "deprel": "null", 30 | "word": "name", 31 | "rel": "0", 32 | "pos": "NN" 33 | }], 34 | "filename": "source1.txt", 35 | "offset": 0 36 | } 37 | -------------------------------------------------------------------------------- /ged4py/data/suspicious/test.txt: -------------------------------------------------------------------------------- 1 | {"id":"test.txt-1","sentenceNumber":1,"length":17,"tokens":[{"id":"1","lemma":"my","deprel":"poss","word":"My","rel":"2","pos":"PRP$"},{"id":"2","lemma":"name","deprel":"nsubj","word":"name","rel":"4","pos":"NN"},{"id":"3","lemma":"be","deprel":"cop","word":"is","rel":"4","pos":"VBZ"},{"id":"4","lemma":"Haakon","deprel":"null","word":"Haakon","rel":"0","pos":"NNP"}],"filename":"test.txt","offset":0} -------------------------------------------------------------------------------- /ged4py/graph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacquesfize/ged4py/8cd3918df43eb5a36d283a9f8674f3b145054ab7/ged4py/graph/__init__.py -------------------------------------------------------------------------------- /ged4py/graph/edge_graph.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | 3 | 4 | class EdgeGraph(): 5 | 6 | def __init__(self, init_node, nodes): 7 | self.init_node=init_node 8 | self.nodes_ = nodes 9 | 10 | def nodes(self): 11 | return self.nodes_ 12 | 13 | def size(self): 14 | return len(self.nodes) 15 | def __len__(self): 16 | return len(self.nodes_) 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.13.1 2 | scipy==0.19.1 3 | networkx>=1.11,~2 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='ged4py', 5 | vAersion='0.1dev-a', 6 | packages=find_packages(), 7 | author='Jacques Fize', 8 | license='MIT', 9 | long_description=open('README.md').read(), 10 | classifiers=[ 11 | 'Development Status :: 3 - Alpha', 12 | 'Intended Audience :: Developers', 13 | 'License :: OSI Approved :: MIT License', 14 | 'Programming Language :: Python :: 3', 15 | 'Programming Language :: Python :: 3.3', 16 | 'Programming Language :: Python :: 3.4', 17 | 'Programming Language :: Python :: 3.5', 18 | ] 19 | ) 20 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import networkx as nx 4 | g=nx.Graph() 5 | g.add_edge("A","B") 6 | 7 | g.add_node("C",weight=1) 8 | g2=g.copy() 9 | g.add_edge("A","C") 10 | from ged4py.algorithm import graph_edit_dist 11 | print(graph_edit_dist.compare(g,g2, True)) 12 | --------------------------------------------------------------------------------