├── .gitignore
├── .idea
├── encodings.xml
├── misc.xml
├── modules.xml
├── vcs.xml
└── wl-graph-kernels.iml
├── LICENSE
├── README.md
├── data
├── .gitkeep
├── Lexicon_NamedRockUnit.nt
└── download_datasets.sh
├── example_graphs
├── 07-Graph.dot
├── 07-Graph.pdf
├── 07-almost_relabeled.dot
├── 07-almost_relabeled.pdf
├── 07-relabeled.dot
├── 07-relabeled.pdf
├── 07-relabeled_vertical.dot
├── 07-relabeled_vertical.pdf
├── 07-subGraph_A1_B1.dot
├── 07-subGraph_A1_B1.pdf
├── 07-subGraph_A1_B1_vertical.dot
├── 07-subGraph_A1_B1_vertical.pdf
└── \
├── notebooks
├── affiliation_scores.ipynb
├── affiliation_timing.ipynb
├── lithogenesis_scores.ipynb
├── lithogenesis_timing.ipynb
└── no_labels_scores.ipynb
├── presentation
├── img
│ ├── 07-Graph.pdf
│ ├── 07-almost_relabeled.pdf
│ ├── 07-relabeled.pdf
│ ├── 07-relabeled_vertical.pdf
│ ├── 07-subGraph_A1_B1.pdf
│ ├── 07-subGraph_A1_B1_vertical.pdf
│ ├── wl_iteration_total.png
│ └── wl_iteration_upper.png
├── presentation.nav
├── presentation.pdf
└── presentation.tex
├── report
├── RefereeReport.pdf
├── RefereeReport.tex
└── img
│ ├── affiliation_timing.png
│ └── lithogenesis_timing.png
├── requirements.txt
├── results
├── affiliation_timing.png
├── csv_to_latex.py
├── lithogenesis_timing.png
├── wl_affiliation_results.csv
├── wl_affiliation_results_with_normalization.csv
├── wl_affiliation_results_with_normalization.tex
├── wl_lithogenesis_results_with_normalization.csv
├── wl_lithogenesis_results_with_normalization.tex
├── wl_no_labels.csv
├── wl_no_labels.tex
├── wlrdf_affiliation_results.csv
├── wlrdf_affiliation_results_with_normalization.csv
├── wlrdf_affiliation_results_with_normalization.tex
├── wlrdf_lithogenesis_results.csv
├── wlrdf_lithogenesis_results_with_normalization.csv
├── wlrdf_lithogenesis_results_with_normalization.tex
├── wlrdf_no_labels.csv
└── wlrdf_no_labels.tex
├── setup.py
├── tests
├── __init__.py
├── resources
│ ├── __init__.py
│ └── example.ttl
└── wlkernel_test.py
└── wlkernel
├── .idea
├── encodings.xml
├── misc.xml
├── modules.xml
├── vcs.xml
├── wlkernel.iml
└── workspace.xml
├── __init__.py
└── _wlkernel.py
/.gitignore:
--------------------------------------------------------------------------------
1 | */aifbfixed_complete.n3
2 | */Lexicon_NamedRockUnit.nt
3 | ./Report/*.aux
4 | ./Report/*.out
5 | ./Report/*.synctex.gz
6 | ./Report/*.err
7 | ./Report/*.log
8 | # Created by https://www.gitignore.io/api/python,pycharm,jupyternotebook,jupyternotebooks
9 | # Edit at https://www.gitignore.io/?templates=python,pycharm,jupyternotebook,jupyternotebooks
10 |
11 | ### JupyterNotebook ###
12 | .ipynb_checkpoints
13 | */.ipynb_checkpoints/*
14 |
15 | # Remove previous ipynb_checkpoints
16 | # git rm -r .ipynb_checkpoints/
17 | #
18 |
19 | ### JupyterNotebooks ###
20 | # gitignore template for Jupyter Notebooks
21 | # website: http://jupyter.org/
22 |
23 |
24 | # Remove previous ipynb_checkpoints
25 | # git rm -r .ipynb_checkpoints/
26 |
27 | ### PyCharm ###
28 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
29 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
30 |
31 | # User-specific stuff
32 | .idea/**/workspace.xml
33 | .idea/**/tasks.xml
34 | .idea/**/usage.statistics.xml
35 | .idea/**/dictionaries
36 | .idea/**/shelf
37 |
38 | # Generated files
39 | .idea/**/contentModel.xml
40 |
41 | # Sensitive or high-churn files
42 | .idea/**/dataSources/
43 | .idea/**/dataSources.ids
44 | .idea/**/dataSources.local.xml
45 | .idea/**/sqlDataSources.xml
46 | .idea/**/dynamic.xml
47 | .idea/**/uiDesigner.xml
48 | .idea/**/dbnavigator.xml
49 |
50 | # Gradle
51 | .idea/**/gradle.xml
52 | .idea/**/libraries
53 |
54 | # Gradle and Maven with auto-import
55 | # When using Gradle or Maven with auto-import, you should exclude module files,
56 | # since they will be recreated, and may cause churn. Uncomment if using
57 | # auto-import.
58 | # .idea/modules.xml
59 | # .idea/*.iml
60 | # .idea/modules
61 |
62 | # CMake
63 | cmake-build-*/
64 |
65 | # Mongo Explorer plugin
66 | .idea/**/mongoSettings.xml
67 |
68 | # File-based project format
69 | *.iws
70 |
71 | # IntelliJ
72 | out/
73 |
74 | # mpeltonen/sbt-idea plugin
75 | .idea_modules/
76 |
77 | # JIRA plugin
78 | atlassian-ide-plugin.xml
79 |
80 | # Cursive Clojure plugin
81 | .idea/replstate.xml
82 |
83 | # Crashlytics plugin (for Android Studio and IntelliJ)
84 | com_crashlytics_export_strings.xml
85 | crashlytics.properties
86 | crashlytics-build.properties
87 | fabric.properties
88 |
89 | # Editor-based Rest Client
90 | .idea/httpRequests
91 |
92 | # Android studio 3.1+ serialized cache file
93 | .idea/caches/build_file_checksums.ser
94 |
95 | # JetBrains templates
96 | **___jb_tmp___
97 |
98 | ### PyCharm Patch ###
99 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
100 |
101 | # *.iml
102 | # modules.xml
103 | # .idea/misc.xml
104 | # *.ipr
105 |
106 | # Sonarlint plugin
107 | .idea/sonarlint
108 |
109 | ### Python ###
110 | # Byte-compiled / optimized / DLL files
111 | __pycache__/
112 | *.py[cod]
113 | *$py.class
114 |
115 | # C extensions
116 | *.so
117 |
118 | # Distribution / packaging
119 | .Python
120 | build/
121 | develop-eggs/
122 | dist/
123 | downloads/
124 | eggs/
125 | .eggs/
126 | lib/
127 | lib64/
128 | parts/
129 | sdist/
130 | var/
131 | wheels/
132 | pip-wheel-metadata/
133 | share/python-wheels/
134 | *.egg-info/
135 | .installed.cfg
136 | *.egg
137 | MANIFEST
138 |
139 | # PyInstaller
140 | # Usually these files are written by a python script from a template
141 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
142 | *.manifest
143 | *.spec
144 |
145 | # Installer logs
146 | pip-log.txt
147 | pip-delete-this-directory.txt
148 |
149 | # Unit test / coverage reports
150 | htmlcov/
151 | .tox/
152 | .nox/
153 | .coverage
154 | .coverage.*
155 | .cache
156 | nosetests.xml
157 | coverage.xml
158 | *.cover
159 | .hypothesis/
160 | .pytest_cache/
161 |
162 | # Translations
163 | *.mo
164 | *.pot
165 |
166 | # Django stuff:
167 | *.log
168 | local_settings.py
169 | db.sqlite3
170 |
171 | # Flask stuff:
172 | instance/
173 | .webassets-cache
174 |
175 | # Scrapy stuff:
176 | .scrapy
177 |
178 | # Sphinx documentation
179 | docs/_build/
180 |
181 | # PyBuilder
182 | target/
183 |
184 | # Jupyter Notebook
185 |
186 | # IPython
187 | profile_default/
188 | ipython_config.py
189 |
190 | # pyenv
191 | .python-version
192 |
193 | # pipenv
194 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
195 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
196 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not
197 | # install all needed dependencies.
198 | #Pipfile.lock
199 |
200 | # celery beat schedule file
201 | celerybeat-schedule
202 |
203 | # SageMath parsed files
204 | *.sage.py
205 |
206 | # Environments
207 | .env
208 | .venv
209 | env/
210 | venv/
211 | ENV/
212 | env.bak/
213 | venv.bak/
214 |
215 | # Spyder project settings
216 | .spyderproject
217 | .spyproject
218 |
219 | # Rope project settings
220 | .ropeproject
221 |
222 | # mkdocs documentation
223 | /site
224 |
225 | # mypy
226 | .mypy_cache/
227 | .dmypy.json
228 | dmypy.json
229 |
230 | # Pyre type checker
231 | .pyre/
232 |
233 | # End of https://www.gitignore.io/api/python,pycharm,jupyternotebook,jupyternotebooks
234 |
235 | *.aux
236 | *.log
237 | *.out
238 | *synctex.gz
239 | *.toc
240 | *.vrb
241 | *.snm
242 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/wl-graph-kernels.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 lorenzo palloni
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Weisfeiler-Lehman Graph Kernels
2 |
3 | ## Installation
4 |
5 | Python >= 3.6 is supported.
6 |
7 | $ git clone https://github.com/deeplego/wl-graph-kernels.git
8 | $ cd wl-graph-kernels
9 | $ pip install -r requirements.txt
10 | $ pip install .
11 |
12 | ## Usage
13 |
14 | To download the datasets of the experiments:
15 |
16 | $ cd data
17 | $ ./download_datasets.sh
18 |
19 | The experiments are replicated in the jupyter notebooks in the `notebooks`
20 | directory.
21 |
--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/data/.gitkeep
--------------------------------------------------------------------------------
/data/download_datasets.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [[ $(basename $(pwd)) != "data" ]]
4 | then
5 | echo "This script must be run from ./data folder."
6 | exit 1
7 | fi
8 |
9 | if [[ ! -e "./aifbfixed_complete.n3" ]]
10 | then
11 | echo ">>> Downloading aifbfixed_complete.n3"
12 | wget -q https://ndownloader.figshare.com/files/1118822
13 | mv 1118822 aifbfixed_complete.n3
14 | fi
15 |
16 | if [[ ! -e "./Lexicon_NamedRockUnit.nt" ]]
17 | then
18 | echo ">>> Downloading Lexicon_NamedRockUnit.nt"
19 | wget -q http://data.bgs.ac.uk/downloads/Lexicon_NamedRockUnit.nt
20 | fi
21 |
22 | exit 0
23 |
--------------------------------------------------------------------------------
/example_graphs/07-Graph.dot:
--------------------------------------------------------------------------------
1 | digraph G {
2 | rankdir = LR;
3 | color = "blue2";
4 | node [color = "blue2", fontcolor = "blue2", style = "bold"];
5 | edge [fontsize = 12, style = "bold"];
6 |
7 | A [label = "class A", style = "filled", color = "lightgrey"]
8 | B [label = "class B", style = "filled", color = "lightgrey"]
9 |
10 | A1 -> A [label = "P1"]
11 | A2 -> A [label = "P1"]
12 | B2 -> B [label = "P1"]
13 | B1 -> B [label = "P1"]
14 |
15 | A1 -> C [label = "P2"]
16 | A1 -> D [label = "P3"]
17 | A2 -> D [label = "P2"]
18 | A2 -> E [label = "P3"]
19 | B2 -> E [label = "P3"]
20 | B2 -> F [label = "P2"]
21 | B1 -> F [label = "P3"]
22 | B1 -> G [label = "P2"]
23 |
24 | C -> H [label = "P4"]
25 | D -> H [label = "P4"]
26 | F -> I [label = "P5"]
27 | G -> I [label = "P5"]
28 |
29 | H -> A2 [label = "P6"]
30 | I -> B2 [label = "P6"]
31 |
32 | {rank = min; A; B;}
33 | {rank = same; A1; A2; B2; B1;}
34 | {rank = same; C; D; E; F; G;}
35 | {rank = max; H; I;}
36 | }
37 |
--------------------------------------------------------------------------------
/example_graphs/07-Graph.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-Graph.pdf
--------------------------------------------------------------------------------
/example_graphs/07-almost_relabeled.dot:
--------------------------------------------------------------------------------
1 | digraph G {
2 |
3 | newrank = true;
4 | rankdir = LR;
5 | node[style="bold"]
6 | edge[style="bold"]
7 |
8 | color = "aquamarine4";
9 | node [color = "aquamarine4", fontcolor = "aquamarine4"];
10 | root [label="ϵ (A1)"];
11 | right_root [label="ϵ (B1)"];
12 | depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
13 | {rank = same; depth_4_node; root; right_root};
14 |
15 | color = "blue4";
16 | node [color = "blue4", fontcolor = "blue4"];
17 | edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
18 | C [label = "C,P2"];
19 | D [label = "D,P3"];
20 | root -> C [label = "P2,ϵ"];
21 | root -> D [label = "P3,ϵ"];
22 | right_F [label = "F,P3"];
23 | right_G [label = "G,P2"];
24 | right_root -> right_F [label = "P3,ϵ"];
25 | right_root -> right_G [label = "P2,ϵ"];
26 | depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
27 | {rank = same; depth_3_node; C; D; right_F; right_G};
28 |
29 | color = "cyan4";
30 | node [color = "cyan4", fontcolor = "cyan4"];
31 | edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
32 | H [label = "H,P4P4"];
33 | C -> H [label = "P4,C"];
34 | D -> H [label = "P4,D"];
35 | right_I [label = "I,P5P5"];
36 | right_F -> right_I [label = "P5,F"];
37 | right_G -> right_I [label = "P5,G"];
38 | depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
39 | {rank = same; depth_2_node; H; right_I};
40 |
41 |
42 | color = "darkorchid4";
43 | node [color = "darkorchid4", fontcolor = "darkorchid4"];
44 | edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
45 | A2 [label = "A2,P6"]
46 | H -> A2 [label = "P6,H"];
47 | right_B2 [label = "B2,P6"];
48 | right_I -> right_B2 [label = "P6,I"];
49 | depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
50 | {rank = same; depth_1_node; right_B2; A2};
51 |
52 | color = "green4";
53 | node [color = "green4", fontcolor = "green4"];
54 | edge [color = "green4", fontsize = 10, fontcolor = "green4"];
55 | D_0 [label = "D,P2", style = "dotted"];
56 | E_0 [label = "E,P3"];
57 | F_0 [label = "F,P2", style = "dotted"];
58 | A2 -> D_0 [label = "P2,A2"];
59 | A2 -> E_0 [label = "P3,A2"];
60 | right_B2 -> E_0 [label = "P3,B2"];
61 | right_B2 -> F_0 [label = "P2,B2"];
62 |
63 | depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
64 | {rank = same; depth_0_node; D_0; E_0; F_0};
65 | }
66 |
--------------------------------------------------------------------------------
/example_graphs/07-almost_relabeled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-almost_relabeled.pdf
--------------------------------------------------------------------------------
/example_graphs/07-relabeled.dot:
--------------------------------------------------------------------------------
1 | digraph G {
2 |
3 | newrank = true;
4 | rankdir = LR;
5 | node [style = "bold"]
6 | edge [style = "bold"]
7 |
8 | color = "aquamarin4";
9 | node [color = "aquamarine4", fontcolor = "aquamarine4"];
10 | root [label = "ϵ (A1)", fontcolor = "aquamarine4"];
11 | right_root [label = "ϵ (B1)", fontcolor = "aquamarine4"];
12 | depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
13 | {rank = same; depth_4_node; root; right_root};
14 |
15 | color = "blue4";
16 | node [color = "blue4", fontcolor = "blue4"];
17 | edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
18 | C [label = "3"];
19 | D [label = "4"];
20 | root -> C [label = "1", fontcolor = "red2"];
21 | root -> D [label = "2", fontcolor = "red2"];
22 | right_F [label = "5"];
23 | right_G [label = "6"];
24 | right_root -> right_F [label = "2", fontcolor = "red2"];
25 | right_root -> right_G [label = "1", fontcolor = "red2"];
26 | depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
27 | {rank = same; depth_3_node; C; D; right_F; right_G};
28 |
29 | color = "cyan4";
30 | node [color = "cyan4", fontcolor = "cyan4"];
31 | edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
32 | H [label = "11"];
33 | C -> H [label = "7"];
34 | D -> H [label = "8"];
35 | right_I [label = "12"];
36 | right_F -> right_I [label = "9"];
37 | right_G -> right_I [label = "10"];
38 | depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
39 | {rank = same; depth_2_node; H; right_I};
40 |
41 |
42 | color = "darkorchid4";
43 | node [color = "darkorchid4", fontcolor = "darkorchid4"];
44 | edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
45 | A2 [label = "15"]
46 | H -> A2 [label = "13"];
47 | right_B2 [label = "16"];
48 | right_I -> right_B2 [label = "14"];
49 | depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
50 | {rank = same; depth_1_node; right_B2; A2};
51 |
52 | color = "green4";
53 | node [color = "green4", fontcolor = "green4"];
54 | edge [color = "green4", fontsize = 10, fontcolor = "green4"];
55 | D_0 [label = "21", style = "dotted"];
56 | E_0 [label = "22", fontcolor = "red2"];
57 | F_0 [label = "23", style = "dotted"];
58 | A2 -> D_0 [label = "17"];
59 | A2 -> E_0 [label = "18"];
60 | right_B2 -> E_0 [label = "19"];
61 | right_B2 -> F_0 [label = "20"];
62 |
63 | depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
64 | {rank = same; depth_0_node; D_0; E_0; F_0};
65 | }
66 |
--------------------------------------------------------------------------------
/example_graphs/07-relabeled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-relabeled.pdf
--------------------------------------------------------------------------------
/example_graphs/07-relabeled_vertical.dot:
--------------------------------------------------------------------------------
1 | digraph G {
2 |
3 | newrank = true;
4 | node [style = "bold"]
5 | edge [style = "bold"]
6 |
7 | color = "aquamarin4";
8 | node [color = "aquamarine4", fontcolor = "aquamarine4"];
9 | root [label = "ϵ", fontcolor = "aquamarine4"];
10 | right_root [label = "ϵ", fontcolor = "aquamarine4"];
11 | depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
12 | {rank = same; depth_4_node; root; right_root};
13 |
14 | color = "blue4";
15 | node [color = "blue4", fontcolor = "blue4"];
16 | edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
17 | C [label = "3"];
18 | D [label = "4"];
19 | root -> C [label = "1", fontcolor = "red2"];
20 | root -> D [label = "2", fontcolor = "red2"];
21 | right_F [label = "5"];
22 | right_G [label = "6"];
23 | right_root -> right_F [label = "2", fontcolor = "red2"];
24 | right_root -> right_G [label = "1", fontcolor = "red2"];
25 | depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
26 | {rank = same; depth_3_node; C; D; right_F; right_G};
27 |
28 | color = "cyan4";
29 | node [color = "cyan4", fontcolor = "cyan4"];
30 | edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
31 | H [label = "11"];
32 | C -> H [label = "7"];
33 | D -> H [label = "8"];
34 | right_I [label = "12"];
35 | right_F -> right_I [label = "9"];
36 | right_G -> right_I [label = "10"];
37 | depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
38 | {rank = same; depth_2_node; H; right_I};
39 |
40 |
41 | color = "darkorchid4";
42 | node [color = "darkorchid4", fontcolor = "darkorchid4"];
43 | edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
44 | A2 [label = "15"]
45 | H -> A2 [label = "13"];
46 | right_B2 [label = "16"];
47 | right_I -> right_B2 [label = "14"];
48 | depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
49 | {rank = same; depth_1_node; right_B2; A2};
50 |
51 | color = "green4";
52 | node [color = "green4", fontcolor = "green4"];
53 | edge [color = "green4", fontsize = 10, fontcolor = "green4"];
54 | D_0 [label = "21", style = "dotted"];
55 | E_0 [label = "22", fontcolor = "red2"];
56 | F_0 [label = "23", style = "dotted"];
57 | A2 -> D_0 [label = "17"];
58 | A2 -> E_0 [label = "18"];
59 | right_B2 -> E_0 [label = "19"];
60 | right_B2 -> F_0 [label = "20"];
61 |
62 | depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
63 | {rank = same; depth_0_node; D_0; E_0; F_0};
64 | }
65 |
--------------------------------------------------------------------------------
/example_graphs/07-relabeled_vertical.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-relabeled_vertical.pdf
--------------------------------------------------------------------------------
/example_graphs/07-subGraph_A1_B1.dot:
--------------------------------------------------------------------------------
1 | digraph G {
2 |
3 | newrank = true;
4 | rankdir = LR;
5 | node[style = "bold"]
6 | edge[style = "bold"]
7 | color = "aquamarine4";
8 | node [color = "aquamarine4", fontcolor = "aquamarine4"];
9 | root [label="ϵ (A1)"];
10 | o_root [label="ϵ (B1)"];
11 | depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
12 | {rank = same; depth_4_node; root; o_root};
13 |
14 | color = "blue4";
15 | node [color = "blue4", fontcolor = "blue4"];
16 | edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
17 | root -> C [label = "P2", fontcolor = "red2"];
18 | root -> D [label = "P3", fontcolor = "red2"];
19 | o_F [label = "F"];
20 | o_G [label = "G"];
21 | o_root -> o_F [label = "P3", fontcolor = "red2"];
22 | o_root -> o_G [label = "P2", fontcolor = "red2"];
23 | depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
24 | {rank = same; depth_3_node; C; D; o_F; o_G};
25 |
26 | color = "cyan4";
27 | node [color = "cyan4", fontcolor = "cyan4"];
28 | edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
29 | C -> H [label = "P4"];
30 | D -> H [label = "P4"];
31 | o_I [label = "I"];
32 | o_F -> o_I [label = "P5"];
33 | o_G -> o_I [label = "P5"];
34 | depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
35 | {rank = same; depth_2_node; H; o_I};
36 |
37 |
38 | color = "darkorchid4";
39 | node [color = "darkorchid4", fontcolor = "darkorchid4"];
40 | edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
41 | H -> A2 [label = "P6", fontcolor = "red2"];
42 | o_B2 [label = "B2"];
43 | o_I -> o_B2 [label = "P6", fontcolor = "red2"];
44 | depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
45 | {rank = same; depth_1_node; o_B2; A2};
46 |
47 | color = "green4";
48 | node [color = "green4", fontcolor = "green4"];
49 | edge [color = "green4", fontsize = 10, fontcolor = "green4"];
50 | D_0 [label = "D", style = "dotted"];
51 | E_0 [label = "E", fontcolor = "red2"];
52 | F_0 [label = "F", style = "dotted"];
53 | A2 -> D_0 [label = "P2", fontcolor = "red2"];
54 | A2 -> E_0 [label = "P3", fontcolor = "red2"];
55 | o_B2 -> E_0 [label = "P3", fontcolor = "red2"];
56 | o_B2 -> F_0 [label = "P2", fontcolor = "red2"];
57 |
58 | depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
59 | {rank = same; depth_0_node; D_0; E_0; F_0};
60 | }
61 |
--------------------------------------------------------------------------------
/example_graphs/07-subGraph_A1_B1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-subGraph_A1_B1.pdf
--------------------------------------------------------------------------------
/example_graphs/07-subGraph_A1_B1_vertical.dot:
--------------------------------------------------------------------------------
1 | digraph G {
2 |
3 | newrank = true;
4 | node[style = "bold"]
5 | edge[style = "bold"]
6 | color = "aquamarine4";
7 | node [color = "aquamarine4", fontcolor = "aquamarine4"];
8 | root [label="ϵ"];
9 | o_root [label="ϵ"];
10 | depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
11 | {rank = same; depth_4_node; root; o_root};
12 |
13 | color = "blue4";
14 | node [color = "blue4", fontcolor = "blue4"];
15 | edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
16 | root -> C [label = "P2", fontcolor = "red2"];
17 | root -> D [label = "P3", fontcolor = "red2"];
18 | o_F [label = "F"];
19 | o_G [label = "G"];
20 | o_root -> o_F [label = "P3", fontcolor = "red2"];
21 | o_root -> o_G [label = "P2", fontcolor = "red2"];
22 | depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
23 | {rank = same; depth_3_node; C; D; o_F; o_G};
24 |
25 | color = "cyan4";
26 | node [color = "cyan4", fontcolor = "cyan4"];
27 | edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
28 | C -> H [label = "P4"];
29 | D -> H [label = "P4"];
30 | o_I [label = "I"];
31 | o_F -> o_I [label = "P5"];
32 | o_G -> o_I [label = "P5"];
33 | depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
34 | {rank = same; depth_2_node; H; o_I};
35 |
36 |
37 | color = "darkorchid4";
38 | node [color = "darkorchid4", fontcolor = "darkorchid4"];
39 | edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
40 | H -> A2 [label = "P6", fontcolor = "red2"];
41 | o_B2 [label = "B2"];
42 | o_I -> o_B2 [label = "P6", fontcolor = "red2"];
43 | depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
44 | {rank = same; depth_1_node; o_B2; A2};
45 |
46 | color = "green4";
47 | node [color = "green4", fontcolor = "green4"];
48 | edge [color = "green4", fontsize = 10, fontcolor = "green4"];
49 | D_0 [label = "D", style = "dotted"];
50 | E_0 [label = "E", fontcolor = "red2"];
51 | F_0 [label = "F", style = "dotted"];
52 | A2 -> D_0 [label = "P2", fontcolor = "red2"];
53 | A2 -> E_0 [label = "P3", fontcolor = "red2"];
54 | o_B2 -> E_0 [label = "P3", fontcolor = "red2"];
55 | o_B2 -> F_0 [label = "P2", fontcolor = "red2"];
56 |
57 | depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
58 | {rank = same; depth_0_node; D_0; E_0; F_0};
59 | }
60 |
--------------------------------------------------------------------------------
/example_graphs/07-subGraph_A1_B1_vertical.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-subGraph_A1_B1_vertical.pdf
--------------------------------------------------------------------------------
/example_graphs/\:
--------------------------------------------------------------------------------
1 | digraph G {
2 |
3 | newrank = true;
4 | rankdir = LR;
5 | node[style="bold"]
6 | edge[style="bold"]
7 |
8 | color = "aquamarine4";
9 | node [color = "aquamarine4", fontcolor = "aquamarine4"];
10 | root [label="ϵ (A1)"];
11 | right_root [label="ϵ (B1)"];
12 | depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
13 | {rank = same; depth_4_node; root; right_root};
14 |
15 | color = "blue4";
16 | node [color = "blue4", fontcolor = "blue4"];
17 | edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
18 | C [label = "C,P2"];
19 | D [label = "D,P3"];
20 | root -> C [label = "P2,ϵ"];
21 | root -> D [label = "P3,ϵ"];
22 | right_F [label = "F,P3"];
23 | right_G [label = "G,P2"];
24 | right_root -> right_F [label = "P3,ϵ"];
25 | right_root -> right_G [label = "P2,ϵ"];
26 | depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
27 | {rank = same; depth_3_node; C; D; right_F; right_G};
28 |
29 | color = "cyan4";
30 | node [color = "cyan4", fontcolor = "cyan4"];
31 | edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
32 | H [label = "H,P4P4"];
33 | C -> H [label = "P4,C"];
34 | D -> H [label = "P4,D"];
35 | right_I [label = "I,P5P5"];
36 | right_F -> right_I [label = "P5,F"];
37 | right_G -> right_I [label = "P5,G"];
38 | depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
39 | {rank = same; depth_2_node; H; right_I};
40 |
41 |
42 | color = "darkorchid4";
43 | node [color = "darkorchid4", fontcolor = "darkorchid4"];
44 | edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
45 | A2 [label = "A2,P6"]
46 | H -> A2 [label = "P6,H"];
47 | right_B2 [label = "B2,P6"];
48 | right_I -> right_B2 [label = "P6,I"];
49 | depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
50 | {rank = same; depth_1_node; right_B2; A2};
51 |
52 | color = "green4";
53 | node [color = "green4", fontcolor = "green4"];
54 | edge [color = "green4", fontsize = 10, fontcolor = "green4"];
55 | D_0 [label = "D,P2", style = "dotted"];
56 | E_0 [label = "E,P3"];
57 | F_0 [label = "F,P2", style = "dotted"];
58 | A2 -> D_0 [label = "P2,A2"];
59 | A2 -> E_0 [label = "P3,A2"];
60 | right_B2 -> E_0 [label = "P3,B2"];
61 | right_B2 -> F_0 [label = "P2,B2"];
62 |
63 | depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
64 | {rank = same; depth_0_node; D_0; E_0; F_0};
65 | }
66 |
--------------------------------------------------------------------------------
/notebooks/affiliation_scores.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import sys\n",
10 | "sys.path.insert(0, '../')"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "from collections import Counter, OrderedDict\n",
20 | "import warnings\n",
21 | "\n",
22 | "import rdflib\n",
23 | "import numpy as np\n",
24 | "import pandas as pd\n",
25 | "from pprint import pprint\n",
26 | "from sklearn import svm\n",
27 | "from sklearn.model_selection import cross_validate\n",
28 | "\n",
29 | "import wlkernel"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 3,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "warnings.simplefilter('ignore')"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 4,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "rdf_graph = rdflib.Graph().parse('../data/aifbfixed_complete.n3', format='n3')"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 5,
53 | "metadata": {},
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "Most common classes with predicate equal to 'affiliation':\n",
60 | "[('http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id1instance',\n",
61 | " 73),\n",
62 | " ('http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id3instance',\n",
63 | " 60),\n",
64 | " ('http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id2instance',\n",
65 | " 28),\n",
66 | " ('http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id4instance',\n",
67 | " 16),\n",
68 | " ('http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id5instance',\n",
69 | " 1)]\n"
70 | ]
71 | }
72 | ],
73 | "source": [
74 | "affiliation_most_common = Counter(\n",
75 | " str(o) \n",
76 | " for s, p, o in rdf_graph\n",
77 | " if 'affiliation' in str(p)\n",
78 | ").most_common()\n",
79 | "print(\"Most common classes with predicate equal to 'affiliation':\")\n",
80 | "pprint(affiliation_most_common)"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 6,
86 | "metadata": {},
87 | "outputs": [],
88 | "source": [
89 | "instances_class_map = {\n",
90 | " str(s): str(o) for s, p, o in rdf_graph \n",
91 | " if 'affiliation' in str(p)\n",
92 | " and 'id5instance' not in str(o)\n",
93 | "}\n",
94 | "instances = list(instances_class_map.keys())\n",
95 | "y = list(instances_class_map.values())"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 7,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "name": "stdout",
105 | "output_type": "stream",
106 | "text": [
107 | "number of triples: 28699\n"
108 | ]
109 | }
110 | ],
111 | "source": [
112 | "triples = list(\n",
113 | " (str(s), str(p), str(o)) for s, p, o in rdf_graph\n",
114 | " if 'affiliation' not in str(p)\n",
115 | " and 'employs' not in str(p)\n",
116 | " and 'member' not in str(p)\n",
117 | " and 'head' not in str(p)\n",
118 | ")\n",
119 | "print('number of triples:', len(triples))"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 44,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=1)"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 45,
134 | "metadata": {},
135 | "outputs": [],
136 | "source": [
137 | "for i in range(len(wlrdf_graph.labels)):\n",
138 | " for k in wlrdf_graph.labels[i].keys():\n",
139 | " wlrdf_graph.labels[i][k] = 'banana'"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 48,
145 | "metadata": {},
146 | "outputs": [],
147 | "source": [
148 | "kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=0)\n",
149 | "kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)"
150 | ]
151 | },
152 | {
153 | "cell_type": "markdown",
154 | "metadata": {},
155 | "source": [
156 | "### Weisfeiler-Lehman RDF"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": 11,
162 | "metadata": {},
163 | "outputs": [],
164 | "source": [
165 | "RANDOM_STATE = 42\n",
166 | "\n",
167 | "depth_values = [1, 2, 3]\n",
168 | "iteration_values = [0, 2, 4, 6]\n",
169 | "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
170 | "\n",
171 | "results = OrderedDict()\n",
172 | "\n",
173 | "for d in depth_values:\n",
174 | " for it in iteration_values:\n",
175 | " wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=d)\n",
176 | " kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=it)\n",
177 | " kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
178 | " \n",
179 | " results[(d, it)] = [0, 0, 0]\n",
180 | " for c in C_values:\n",
181 | " classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
182 | " scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
183 | " \n",
184 | " acc_mean = scores['test_accuracy'].mean()\n",
185 | " f1_mean = scores['test_f1_macro'].mean()\n",
186 | " \n",
187 | " if acc_mean > results[(d, it)][0]:\n",
188 | " results[(d, it)] = [acc_mean, f1_mean, c]"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 12,
194 | "metadata": {},
195 | "outputs": [
196 | {
197 | "data": {
198 | "text/html": [
199 | "
\n",
200 | "\n",
213 | "
\n",
214 | " \n",
215 | " \n",
216 | " | \n",
217 | " | \n",
218 | " accuracy | \n",
219 | " f1 | \n",
220 | " C | \n",
221 | "
\n",
222 | " \n",
223 | " depth | \n",
224 | " iterations | \n",
225 | " | \n",
226 | " | \n",
227 | " | \n",
228 | "
\n",
229 | " \n",
230 | " \n",
231 | " \n",
232 | " 1 | \n",
233 | " 0 | \n",
234 | " 0.881955 | \n",
235 | " 0.795756 | \n",
236 | " 100.0 | \n",
237 | "
\n",
238 | " \n",
239 | " 2 | \n",
240 | " 0.881955 | \n",
241 | " 0.795756 | \n",
242 | " 100.0 | \n",
243 | "
\n",
244 | " \n",
245 | " 4 | \n",
246 | " 0.881955 | \n",
247 | " 0.795756 | \n",
248 | " 100.0 | \n",
249 | "
\n",
250 | " \n",
251 | " 6 | \n",
252 | " 0.881955 | \n",
253 | " 0.795756 | \n",
254 | " 100.0 | \n",
255 | "
\n",
256 | " \n",
257 | " 2 | \n",
258 | " 0 | \n",
259 | " 0.892114 | \n",
260 | " 0.826007 | \n",
261 | " 100.0 | \n",
262 | "
\n",
263 | " \n",
264 | " 2 | \n",
265 | " 0.880057 | \n",
266 | " 0.812488 | \n",
267 | " 100.0 | \n",
268 | "
\n",
269 | " \n",
270 | " 4 | \n",
271 | " 0.874501 | \n",
272 | " 0.803701 | \n",
273 | " 100.0 | \n",
274 | "
\n",
275 | " \n",
276 | " 6 | \n",
277 | " 0.874501 | \n",
278 | " 0.800821 | \n",
279 | " 100.0 | \n",
280 | "
\n",
281 | " \n",
282 | " 3 | \n",
283 | " 0 | \n",
284 | " 0.879579 | \n",
285 | " 0.812187 | \n",
286 | " 100.0 | \n",
287 | "
\n",
288 | " \n",
289 | " 2 | \n",
290 | " 0.913751 | \n",
291 | " 0.867388 | \n",
292 | " 100.0 | \n",
293 | "
\n",
294 | " \n",
295 | " 4 | \n",
296 | " 0.908196 | \n",
297 | " 0.863829 | \n",
298 | " 100.0 | \n",
299 | "
\n",
300 | " \n",
301 | " 6 | \n",
302 | " 0.908196 | \n",
303 | " 0.863829 | \n",
304 | " 100.0 | \n",
305 | "
\n",
306 | " \n",
307 | "
\n",
308 | "
"
309 | ],
310 | "text/plain": [
311 | " accuracy f1 C\n",
312 | "depth iterations \n",
313 | "1 0 0.881955 0.795756 100.0\n",
314 | " 2 0.881955 0.795756 100.0\n",
315 | " 4 0.881955 0.795756 100.0\n",
316 | " 6 0.881955 0.795756 100.0\n",
317 | "2 0 0.892114 0.826007 100.0\n",
318 | " 2 0.880057 0.812488 100.0\n",
319 | " 4 0.874501 0.803701 100.0\n",
320 | " 6 0.874501 0.800821 100.0\n",
321 | "3 0 0.879579 0.812187 100.0\n",
322 | " 2 0.913751 0.867388 100.0\n",
323 | " 4 0.908196 0.863829 100.0\n",
324 | " 6 0.908196 0.863829 100.0"
325 | ]
326 | },
327 | "execution_count": 12,
328 | "metadata": {},
329 | "output_type": "execute_result"
330 | }
331 | ],
332 | "source": [
333 | "fn = 'wlrdf_affiliation_results_with_normalization'\n",
334 | "\n",
335 | "df_res = pd.DataFrame(index=list(results.keys()))\n",
336 | "df_res['accuracy'] = [t[0] for t in results.values()]\n",
337 | "df_res['f1'] = [t[1] for t in results.values()]\n",
338 | "df_res['C'] = [t[2] for t in results.values()]\n",
339 | "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
340 | "df_res.to_csv(f'../results/{fn}.csv')\n",
341 | "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
342 | "df_res_test.to_html(f'../results/{fn}.html')\n",
343 | "df_res_test"
344 | ]
345 | },
346 | {
347 | "cell_type": "markdown",
348 | "metadata": {},
349 | "source": [
350 | "### Weisfeiler-Lehman"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": 13,
356 | "metadata": {},
357 | "outputs": [],
358 | "source": [
359 | "RANDOM_STATE = 42\n",
360 | "\n",
361 | "depth_values = [1, 2, 3]\n",
362 | "iteration_values = [0, 2, 4, 6]\n",
363 | "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
364 | "\n",
365 | "results = OrderedDict()\n",
366 | "\n",
367 | "for d in depth_values:\n",
368 | " for it in iteration_values:\n",
369 | " wl_graphs = [wlkernel.WLGraph(triples, instance, max_depth=d) for instance in instances]\n",
370 | " kernel_matrix = wlkernel.wl_kernel_matrix(wl_graphs, iterations=it)\n",
371 | " kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
372 | " \n",
373 | " results[(d, it)] = [0, 0, 0]\n",
374 | " for c in C_values:\n",
375 | " classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
376 | " scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
377 | " \n",
378 | " acc_mean = scores['test_accuracy'].mean()\n",
379 | " f1_mean = scores['test_f1_macro'].mean()\n",
380 | " \n",
381 | " if acc_mean > results[(d, it)][0]:\n",
382 | " results[(d, it)] = [acc_mean, f1_mean, c]"
383 | ]
384 | },
385 | {
386 | "cell_type": "code",
387 | "execution_count": 14,
388 | "metadata": {},
389 | "outputs": [
390 | {
391 | "data": {
392 | "text/html": [
393 | "\n",
394 | "\n",
407 | "
\n",
408 | " \n",
409 | " \n",
410 | " | \n",
411 | " | \n",
412 | " accuracy | \n",
413 | " f1 | \n",
414 | " C | \n",
415 | "
\n",
416 | " \n",
417 | " depth | \n",
418 | " iterations | \n",
419 | " | \n",
420 | " | \n",
421 | " | \n",
422 | "
\n",
423 | " \n",
424 | " \n",
425 | " \n",
426 | " 1 | \n",
427 | " 0 | \n",
428 | " 0.881955 | \n",
429 | " 0.795756 | \n",
430 | " 100.0 | \n",
431 | "
\n",
432 | " \n",
433 | " 2 | \n",
434 | " 0.868761 | \n",
435 | " 0.788673 | \n",
436 | " 100.0 | \n",
437 | "
\n",
438 | " \n",
439 | " 4 | \n",
440 | " 0.868761 | \n",
441 | " 0.788673 | \n",
442 | " 100.0 | \n",
443 | "
\n",
444 | " \n",
445 | " 6 | \n",
446 | " 0.868761 | \n",
447 | " 0.788673 | \n",
448 | " 100.0 | \n",
449 | "
\n",
450 | " \n",
451 | " 2 | \n",
452 | " 0 | \n",
453 | " 0.886851 | \n",
454 | " 0.819787 | \n",
455 | " 100.0 | \n",
456 | "
\n",
457 | " \n",
458 | " 2 | \n",
459 | " 0.858127 | \n",
460 | " 0.781563 | \n",
461 | " 100.0 | \n",
462 | "
\n",
463 | " \n",
464 | " 4 | \n",
465 | " 0.770446 | \n",
466 | " 0.604246 | \n",
467 | " 100.0 | \n",
468 | "
\n",
469 | " \n",
470 | " 6 | \n",
471 | " 0.752758 | \n",
472 | " 0.579145 | \n",
473 | " 100.0 | \n",
474 | "
\n",
475 | " \n",
476 | " 3 | \n",
477 | " 0 | \n",
478 | " 0.884843 | \n",
479 | " 0.818408 | \n",
480 | " 100.0 | \n",
481 | "
\n",
482 | " \n",
483 | " 2 | \n",
484 | " 0.890800 | \n",
485 | " 0.824622 | \n",
486 | " 100.0 | \n",
487 | "
\n",
488 | " \n",
489 | " 4 | \n",
490 | " 0.897343 | \n",
491 | " 0.840694 | \n",
492 | " 100.0 | \n",
493 | "
\n",
494 | " \n",
495 | " 6 | \n",
496 | " 0.896356 | \n",
497 | " 0.821343 | \n",
498 | " 100.0 | \n",
499 | "
\n",
500 | " \n",
501 | "
\n",
502 | "
"
503 | ],
504 | "text/plain": [
505 | " accuracy f1 C\n",
506 | "depth iterations \n",
507 | "1 0 0.881955 0.795756 100.0\n",
508 | " 2 0.868761 0.788673 100.0\n",
509 | " 4 0.868761 0.788673 100.0\n",
510 | " 6 0.868761 0.788673 100.0\n",
511 | "2 0 0.886851 0.819787 100.0\n",
512 | " 2 0.858127 0.781563 100.0\n",
513 | " 4 0.770446 0.604246 100.0\n",
514 | " 6 0.752758 0.579145 100.0\n",
515 | "3 0 0.884843 0.818408 100.0\n",
516 | " 2 0.890800 0.824622 100.0\n",
517 | " 4 0.897343 0.840694 100.0\n",
518 | " 6 0.896356 0.821343 100.0"
519 | ]
520 | },
521 | "execution_count": 14,
522 | "metadata": {},
523 | "output_type": "execute_result"
524 | }
525 | ],
526 | "source": [
527 | "fn = 'wl_affiliation_results_with_normalization'\n",
528 | "\n",
529 | "df_res = pd.DataFrame(index=list(results.keys()))\n",
530 | "df_res['accuracy'] = [t[0] for t in results.values()]\n",
531 | "df_res['f1'] = [t[1] for t in results.values()]\n",
532 | "df_res['C'] = [t[2] for t in results.values()]\n",
533 | "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
534 | "df_res.to_csv(f'../results/{fn}.csv')\n",
535 | "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
536 | "df_res_test.to_html(f'../results/{fn}.html')\n",
537 | "df_res_test"
538 | ]
539 | }
540 | ],
541 | "metadata": {
542 | "kernelspec": {
543 | "display_name": "Python 3",
544 | "language": "python",
545 | "name": "python3"
546 | },
547 | "language_info": {
548 | "codemirror_mode": {
549 | "name": "ipython",
550 | "version": 3
551 | },
552 | "file_extension": ".py",
553 | "mimetype": "text/x-python",
554 | "name": "python",
555 | "nbconvert_exporter": "python",
556 | "pygments_lexer": "ipython3",
557 | "version": "3.7.3"
558 | }
559 | },
560 | "nbformat": 4,
561 | "nbformat_minor": 2
562 | }
563 |
--------------------------------------------------------------------------------
/notebooks/affiliation_timing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import sys\n",
10 | "sys.path.insert(0, '../')"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "from collections import Counter\n",
20 | "import time\n",
21 | "import random\n",
22 | "\n",
23 | "import rdflib\n",
24 | "import numpy as np\n",
25 | "from pprint import pprint\n",
26 | "from sklearn import svm\n",
27 | "\n",
28 | "import wlkernel"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 3,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "rdf_graph = rdflib.Graph().parse('../data/aifbfixed_complete.n3', format='n3')"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 4,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "all_triples = [\n",
47 | " (str(subj), str(pred), str(obj))\n",
48 | " for subj, pred, obj in rdf_graph\n",
49 | "]"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 5,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "quantiles = np.linspace(0.1, 1, 10) # [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]\n",
59 | "results_wlrdf = []\n",
60 | "results_wl = []\n",
61 | "n = len(all_triples)\n",
62 | "RANDOM_STATE = 42"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 6,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "for q in quantiles:\n",
72 | " n_sub = int(n * q)\n",
73 | " random.seed(RANDOM_STATE)\n",
74 | " triples = random.sample(all_triples, n_sub)\n",
75 | " \n",
76 | " instances_class_map = {\n",
77 | " subj: obj\n",
78 | " for subj, pred, obj in triples\n",
79 | " if 'affiliation' in pred\n",
80 | " and 'id5instance' not in obj\n",
81 | " }\n",
82 | " instances = list(instances_class_map.keys())\n",
83 | " y = list(instances_class_map.values())\n",
84 | " \n",
85 | " triples = [\n",
86 | " (subj, pred, obj)\n",
87 | " for subj, pred, obj in triples\n",
88 | " if 'affiliation' not in pred\n",
89 | " and 'employs' not in pred\n",
90 | " and 'member' not in pred\n",
91 | " and 'head' not in pred\n",
92 | " ]\n",
93 | " t0 = time.time()\n",
94 | " wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=3)\n",
95 | " kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=0)\n",
96 | " t1 = time.time()\n",
97 | "\n",
98 | " results_wlrdf.append(t1 - t0)"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 7,
104 | "metadata": {},
105 | "outputs": [],
106 | "source": [
107 | "rdf_graph = rdflib.Graph().parse('../data/aifbfixed_complete.n3', format='n3')"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 8,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": [
116 | "all_triples = [\n",
117 | " (str(subj), str(pred), str(obj))\n",
118 | " for subj, pred, obj in rdf_graph\n",
119 | "]"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 9,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "for q in quantiles:\n",
129 | " n_sub = int(n * q)\n",
130 | " random.seed(RANDOM_STATE)\n",
131 | " triples = random.sample(all_triples, n_sub)\n",
132 | " \n",
133 | " instances_class_map = {\n",
134 | " subj: obj\n",
135 | " for subj, pred, obj in triples\n",
136 | " if 'affiliation' in pred\n",
137 | " and 'id5instance' not in obj\n",
138 | " }\n",
139 | " instances = list(instances_class_map.keys())\n",
140 | " y = list(instances_class_map.values())\n",
141 | " \n",
142 | " triples = [\n",
143 | " (subj, pred, obj)\n",
144 | " for subj, pred, obj in triples\n",
145 | " if 'affiliation' not in pred\n",
146 | " and 'employs' not in pred\n",
147 | " and 'member' not in pred\n",
148 | " and 'head' not in pred\n",
149 | " ]\n",
150 | " t0 = time.time()\n",
151 | " wl_graphs = [wlkernel.WLGraph(triples, instance, max_depth=3) for instance in instances]\n",
152 | " kernel_matrix = wlkernel.wl_kernel_matrix(wl_graphs, iterations=0)\n",
153 | " t1 = time.time()\n",
154 | "\n",
155 | " results_wl.append(t1 - t0)"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": 10,
161 | "metadata": {},
162 | "outputs": [],
163 | "source": [
164 | "import matplotlib.pyplot as plt\n",
165 | "\n",
166 | "class Result:\n",
167 | " def __init__(self, values = None, color = 'red', name = ''):\n",
168 | " if values is not None:\n",
169 | " self.values = values\n",
170 | " self.color = color\n",
171 | " self.name = name\n",
172 | "\n",
173 | "x = quantiles\n",
174 | "y = Result(results_wlrdf, 'orange', 'WL RDF')\n",
175 | "y1 = Result(results_wl, 'purple', 'WL')\n",
176 | "n = len(x)\n",
177 | "\n",
178 | "fig, ax = plt.subplots(figsize=(15, 8))\n",
179 | "for i in range(n - 1):\n",
180 | " plt.plot(x[i: i+2], y.values[i: i+2],\n",
181 | " 'o-', color=y.color, markersize=8)\n",
182 | " plt.plot(x[i: i+2], y1.values[i: i+2],\n",
183 | " 'o-', color= y1.color, markersize=8)\n",
184 | "\n",
185 | "ax.xaxis.label.set_text('fraction of the dataset')\n",
186 | "ax.yaxis.label.set_text('runnning time (s)')\n",
187 | "\n",
188 | "custom_lines = [plt.Line2D([0], [0], color=y.color, lw=4),\n",
189 | " plt.Line2D([0], [0], color=y1.color, lw=4)]\n",
190 | "ax.legend(custom_lines, [y.name, y1.name])\n",
191 | "plt.savefig('../results/affiliation_timing.png', format='png')"
192 | ]
193 | }
194 | ],
195 | "metadata": {
196 | "kernelspec": {
197 | "display_name": "Python 3",
198 | "language": "python",
199 | "name": "python3"
200 | },
201 | "language_info": {
202 | "codemirror_mode": {
203 | "name": "ipython",
204 | "version": 3
205 | },
206 | "file_extension": ".py",
207 | "mimetype": "text/x-python",
208 | "name": "python",
209 | "nbconvert_exporter": "python",
210 | "pygments_lexer": "ipython3",
211 | "version": "3.7.3"
212 | }
213 | },
214 | "nbformat": 4,
215 | "nbformat_minor": 2
216 | }
217 |
--------------------------------------------------------------------------------
/notebooks/lithogenesis_scores.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import sys\n",
10 | "sys.path.insert(0, '../')"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "from collections import Counter, OrderedDict\n",
20 | "import warnings\n",
21 | "\n",
22 | "import rdflib\n",
23 | "import numpy as np\n",
24 | "from pprint import pprint\n",
25 | "from sklearn import svm\n",
26 | "from sklearn.model_selection import cross_validate\n",
27 | "\n",
28 | "import wlkernel"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 3,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "warnings.simplefilter('ignore')"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 4,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "rdf_graph = rdflib.Graph().parse('../data/Lexicon_NamedRockUnit.nt', format='nt')"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 5,
52 | "metadata": {},
53 | "outputs": [
54 | {
55 | "name": "stdout",
56 | "output_type": "stream",
57 | "text": [
58 | "Most common classes with predicate equal to 'hasLithogenesis':\n",
59 | "[('http://data.bgs.ac.uk/id/Lexicon/LithogeneticType/FLUV', 93),\n",
60 | " ('http://data.bgs.ac.uk/id/Lexicon/LithogeneticType/GLACI', 53)]\n"
61 | ]
62 | }
63 | ],
64 | "source": [
65 | "hasLithogenesis_most_common = Counter(\n",
66 | " str(o)\n",
67 | " for s, p, o in rdf_graph\n",
68 | " if 'hasLithogenesis' in str(p)\n",
69 | ").most_common(2)\n",
70 | "print(\"Most common classes with predicate equal to 'hasLithogenesis':\")\n",
71 | "pprint(hasLithogenesis_most_common)\n",
72 | "classes = { c for c, _ in hasLithogenesis_most_common }"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 6,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "instances_class_map = {\n",
82 | " str(s): str(o)\n",
83 | " for s, p, o in rdf_graph\n",
84 | " if str(o) in classes\n",
85 | "}\n",
86 | "assert len(instances_class_map) == 146\n",
87 | "instances = list(instances_class_map.keys())\n",
88 | "assert len(instances) == len(set(instances))\n",
89 | "y = np.array(list(instances_class_map.values()))"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": 7,
95 | "metadata": {},
96 | "outputs": [
97 | {
98 | "name": "stdout",
99 | "output_type": "stream",
100 | "text": [
101 | "number of tripes: 313901\n"
102 | ]
103 | }
104 | ],
105 | "source": [
106 | "triples = list(\n",
107 | " (str(s), str(p), str(o))\n",
108 | " for s, p, o in rdf_graph\n",
109 | " if 'hasLithogenesis' not in str(p)\n",
110 | ")\n",
111 | "print('number of tripes: ', len(triples))"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 9,
117 | "metadata": {},
118 | "outputs": [],
119 | "source": [
120 | "RANDOM_STATE = 42\n",
121 | "\n",
122 | "depth_values = [1, 2, 3]\n",
123 | "iteration_values = [0, 2, 4, 6]\n",
124 | "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
125 | "\n",
126 | "results = OrderedDict()\n",
127 | "\n",
128 | "for d in depth_values:\n",
129 | " for it in iteration_values:\n",
130 | " wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=d)\n",
131 | " kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=it)\n",
132 | " kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
133 | " \n",
134 | " results[(d, it)] = [0, 0, 0]\n",
135 | " for c in C_values:\n",
136 | " classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
137 | " scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
138 | " \n",
139 | " acc_mean = scores['test_accuracy'].mean()\n",
140 | " f1_mean = scores['test_f1_macro'].mean()\n",
141 | " \n",
142 | " if acc_mean > results[(d, it)][0]:\n",
143 | " results[(d, it)] = [acc_mean, f1_mean, c]"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 11,
149 | "metadata": {},
150 | "outputs": [
151 | {
152 | "data": {
153 | "text/html": [
154 | "\n",
155 | "\n",
168 | "
\n",
169 | " \n",
170 | " \n",
171 | " | \n",
172 | " | \n",
173 | " accuracy | \n",
174 | " f1 | \n",
175 | " C | \n",
176 | "
\n",
177 | " \n",
178 | " depth | \n",
179 | " iterations | \n",
180 | " | \n",
181 | " | \n",
182 | " | \n",
183 | "
\n",
184 | " \n",
185 | " \n",
186 | " \n",
187 | " 1 | \n",
188 | " 0 | \n",
189 | " 0.795536 | \n",
190 | " 0.763739 | \n",
191 | " 10.0 | \n",
192 | "
\n",
193 | " \n",
194 | " 2 | \n",
195 | " 0.795536 | \n",
196 | " 0.763739 | \n",
197 | " 10.0 | \n",
198 | "
\n",
199 | " \n",
200 | " 4 | \n",
201 | " 0.795536 | \n",
202 | " 0.763739 | \n",
203 | " 10.0 | \n",
204 | "
\n",
205 | " \n",
206 | " 6 | \n",
207 | " 0.795536 | \n",
208 | " 0.763739 | \n",
209 | " 10.0 | \n",
210 | "
\n",
211 | " \n",
212 | " 2 | \n",
213 | " 0 | \n",
214 | " 0.906250 | \n",
215 | " 0.891229 | \n",
216 | " 100.0 | \n",
217 | "
\n",
218 | " \n",
219 | " 2 | \n",
220 | " 0.892857 | \n",
221 | " 0.874092 | \n",
222 | " 1.0 | \n",
223 | "
\n",
224 | " \n",
225 | " 4 | \n",
226 | " 0.892857 | \n",
227 | " 0.874092 | \n",
228 | " 1.0 | \n",
229 | "
\n",
230 | " \n",
231 | " 6 | \n",
232 | " 0.885714 | \n",
233 | " 0.866606 | \n",
234 | " 1.0 | \n",
235 | "
\n",
236 | " \n",
237 | " 3 | \n",
238 | " 0 | \n",
239 | " 0.891071 | \n",
240 | " 0.875862 | \n",
241 | " 100.0 | \n",
242 | "
\n",
243 | " \n",
244 | " 2 | \n",
245 | " 0.891964 | \n",
246 | " 0.873422 | \n",
247 | " 1.0 | \n",
248 | "
\n",
249 | " \n",
250 | " 4 | \n",
251 | " 0.906250 | \n",
252 | " 0.890104 | \n",
253 | " 1.0 | \n",
254 | "
\n",
255 | " \n",
256 | " 6 | \n",
257 | " 0.907143 | \n",
258 | " 0.888829 | \n",
259 | " 1.0 | \n",
260 | "
\n",
261 | " \n",
262 | "
\n",
263 | "
"
264 | ],
265 | "text/plain": [
266 | " accuracy f1 C\n",
267 | "depth iterations \n",
268 | "1 0 0.795536 0.763739 10.0\n",
269 | " 2 0.795536 0.763739 10.0\n",
270 | " 4 0.795536 0.763739 10.0\n",
271 | " 6 0.795536 0.763739 10.0\n",
272 | "2 0 0.906250 0.891229 100.0\n",
273 | " 2 0.892857 0.874092 1.0\n",
274 | " 4 0.892857 0.874092 1.0\n",
275 | " 6 0.885714 0.866606 1.0\n",
276 | "3 0 0.891071 0.875862 100.0\n",
277 | " 2 0.891964 0.873422 1.0\n",
278 | " 4 0.906250 0.890104 1.0\n",
279 | " 6 0.907143 0.888829 1.0"
280 | ]
281 | },
282 | "execution_count": 11,
283 | "metadata": {},
284 | "output_type": "execute_result"
285 | }
286 | ],
287 | "source": [
288 | "import pandas as pd\n",
289 | "\n",
290 | "fn = 'wlrdf_lithogenesis_results_with_normalization'\n",
291 | "\n",
292 | "df_res = pd.DataFrame(index=list(results.keys()))\n",
293 | "df_res['accuracy'] = [t[0] for t in results.values()]\n",
294 | "df_res['f1'] = [t[1] for t in results.values()]\n",
295 | "df_res['C'] = [t[2] for t in results.values()]\n",
296 | "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
297 | "df_res.to_csv(f'../results/{fn}.csv')\n",
298 | "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
299 | "df_res_test.to_html(f'../results/{fn}.html')\n",
300 | "df_res_test"
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": 13,
306 | "metadata": {},
307 | "outputs": [],
308 | "source": [
309 | "RANDOM_STATE = 42\n",
310 | "\n",
311 | "depth_values = [1, 2, 3]\n",
312 | "iteration_values = [0, 2, 4, 6]\n",
313 | "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
314 | "\n",
315 | "results = OrderedDict()\n",
316 | "\n",
317 | "for d in depth_values:\n",
318 | " for it in iteration_values:\n",
319 | " wl_graphs = [wlkernel.WLGraph(triples, instance, max_depth=d) for instance in instances]\n",
320 | " kernel_matrix = wlkernel.wl_kernel_matrix(wl_graphs, iterations=it)\n",
321 | " kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
322 | " \n",
323 | " results[(d, it)] = [0, 0, 0]\n",
324 | " for c in C_values:\n",
325 | " classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
326 | " scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
327 | " \n",
328 | " acc_mean = scores['test_accuracy'].mean()\n",
329 | " f1_mean = scores['test_f1_macro'].mean()\n",
330 | " \n",
331 | " if acc_mean > results[(d, it)][0]:\n",
332 | " results[(d, it)] = [acc_mean, f1_mean, c]"
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": 14,
338 | "metadata": {},
339 | "outputs": [
340 | {
341 | "data": {
342 | "text/html": [
343 | "\n",
344 | "\n",
357 | "
\n",
358 | " \n",
359 | " \n",
360 | " | \n",
361 | " | \n",
362 | " accuracy | \n",
363 | " f1 | \n",
364 | " C | \n",
365 | "
\n",
366 | " \n",
367 | " depth | \n",
368 | " iterations | \n",
369 | " | \n",
370 | " | \n",
371 | " | \n",
372 | "
\n",
373 | " \n",
374 | " \n",
375 | " \n",
376 | " 1 | \n",
377 | " 0 | \n",
378 | " 0.802679 | \n",
379 | " 0.774383 | \n",
380 | " 10.0 | \n",
381 | "
\n",
382 | " \n",
383 | " 2 | \n",
384 | " 0.796429 | \n",
385 | " 0.768842 | \n",
386 | " 10.0 | \n",
387 | "
\n",
388 | " \n",
389 | " 4 | \n",
390 | " 0.796429 | \n",
391 | " 0.768842 | \n",
392 | " 10.0 | \n",
393 | "
\n",
394 | " \n",
395 | " 6 | \n",
396 | " 0.796429 | \n",
397 | " 0.768842 | \n",
398 | " 10.0 | \n",
399 | "
\n",
400 | " \n",
401 | " 2 | \n",
402 | " 0 | \n",
403 | " 0.891964 | \n",
404 | " 0.877311 | \n",
405 | " 100.0 | \n",
406 | "
\n",
407 | " \n",
408 | " 2 | \n",
409 | " 0.892857 | \n",
410 | " 0.874092 | \n",
411 | " 1.0 | \n",
412 | "
\n",
413 | " \n",
414 | " 4 | \n",
415 | " 0.873214 | \n",
416 | " 0.854485 | \n",
417 | " 1.0 | \n",
418 | "
\n",
419 | " \n",
420 | " 6 | \n",
421 | " 0.865179 | \n",
422 | " 0.841353 | \n",
423 | " 1.0 | \n",
424 | "
\n",
425 | " \n",
426 | " 3 | \n",
427 | " 0 | \n",
428 | " 0.883929 | \n",
429 | " 0.871406 | \n",
430 | " 100.0 | \n",
431 | "
\n",
432 | " \n",
433 | " 2 | \n",
434 | " 0.913393 | \n",
435 | " 0.898291 | \n",
436 | " 1.0 | \n",
437 | "
\n",
438 | " \n",
439 | " 4 | \n",
440 | " 0.906250 | \n",
441 | " 0.890922 | \n",
442 | " 1.0 | \n",
443 | "
\n",
444 | " \n",
445 | " 6 | \n",
446 | " 0.906250 | \n",
447 | " 0.890922 | \n",
448 | " 1.0 | \n",
449 | "
\n",
450 | " \n",
451 | "
\n",
452 | "
"
453 | ],
454 | "text/plain": [
455 | " accuracy f1 C\n",
456 | "depth iterations \n",
457 | "1 0 0.802679 0.774383 10.0\n",
458 | " 2 0.796429 0.768842 10.0\n",
459 | " 4 0.796429 0.768842 10.0\n",
460 | " 6 0.796429 0.768842 10.0\n",
461 | "2 0 0.891964 0.877311 100.0\n",
462 | " 2 0.892857 0.874092 1.0\n",
463 | " 4 0.873214 0.854485 1.0\n",
464 | " 6 0.865179 0.841353 1.0\n",
465 | "3 0 0.883929 0.871406 100.0\n",
466 | " 2 0.913393 0.898291 1.0\n",
467 | " 4 0.906250 0.890922 1.0\n",
468 | " 6 0.906250 0.890922 1.0"
469 | ]
470 | },
471 | "execution_count": 14,
472 | "metadata": {},
473 | "output_type": "execute_result"
474 | }
475 | ],
476 | "source": [
477 | "fn = 'wl_lithogenesis_results_with_normalization'\n",
478 | "\n",
479 | "df_res = pd.DataFrame(index=list(results.keys()))\n",
480 | "df_res['accuracy'] = [t[0] for t in results.values()]\n",
481 | "df_res['f1'] = [t[1] for t in results.values()]\n",
482 | "df_res['C'] = [t[2] for t in results.values()]\n",
483 | "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
484 | "df_res.to_csv(f'../results/{fn}.csv')\n",
485 | "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
486 | "df_res_test.to_html(f'../results/{fn}.html')\n",
487 | "df_res_test"
488 | ]
489 | }
490 | ],
491 | "metadata": {
492 | "kernelspec": {
493 | "display_name": "Python 3",
494 | "language": "python",
495 | "name": "python3"
496 | },
497 | "language_info": {
498 | "codemirror_mode": {
499 | "name": "ipython",
500 | "version": 3
501 | },
502 | "file_extension": ".py",
503 | "mimetype": "text/x-python",
504 | "name": "python",
505 | "nbconvert_exporter": "python",
506 | "pygments_lexer": "ipython3",
507 | "version": "3.7.3"
508 | }
509 | },
510 | "nbformat": 4,
511 | "nbformat_minor": 2
512 | }
513 |
--------------------------------------------------------------------------------
/notebooks/lithogenesis_timing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import sys\n",
10 | "sys.path.insert(0, '../')"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "from collections import Counter\n",
20 | "import time\n",
21 | "import random\n",
22 | "\n",
23 | "import rdflib\n",
24 | "import numpy as np\n",
25 | "from pprint import pprint\n",
26 | "from sklearn import svm\n",
27 | "\n",
28 | "import wlkernel"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 3,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "rdf_graph = rdflib.Graph().parse('../data/Lexicon_NamedRockUnit.nt', format='nt')"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 4,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "all_triples = [\n",
47 | " (str(subj), str(pred), str(obj))\n",
48 | " for subj, pred, obj in rdf_graph\n",
49 | "]"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 5,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "quantiles = np.linspace(0.1, 1, 10) # [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]\n",
59 | "results_wlrdf = []\n",
60 | "results_wl = []\n",
61 | "n = len(all_triples)\n",
62 | "RANDOM_STATE = 42"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 6,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "for q in quantiles:\n",
72 | " n_sub = int(n * q)\n",
73 | " random.seed(RANDOM_STATE)\n",
74 | " triples = random.sample(all_triples, n_sub)\n",
75 | " \n",
76 | " instances_class_map = {\n",
77 | " subj: obj\n",
78 | " for subj, pred, obj in triples\n",
79 | " if 'hasLithogenesis' in pred\n",
80 | " }\n",
81 | " instances = list(instances_class_map.keys())\n",
82 | " y = list(instances_class_map.values())\n",
83 | " \n",
84 | " triples = [\n",
85 | " (subj, pred, obj)\n",
86 | " for subj, pred, obj in triples\n",
87 | " if 'hasLithogenesis' not in pred\n",
88 | " ]\n",
89 | " t0 = time.time()\n",
90 | " wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=3)\n",
91 | " kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=0)\n",
92 | " t1 = time.time()\n",
93 | "\n",
94 | " results_wlrdf.append(t1 - t0)"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 7,
100 | "metadata": {},
101 | "outputs": [],
102 | "source": [
103 | "rdf_graph = rdflib.Graph().parse('../data/Lexicon_NamedRockUnit.nt', format='nt')\n",
104 | "all_triples = [\n",
105 | " (str(subj), str(pred), str(obj))\n",
106 | " for subj, pred, obj in rdf_graph\n",
107 | "]"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 8,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": [
116 | "for q in quantiles:\n",
117 | " n_sub = int(n * q)\n",
118 | " random.seed(RANDOM_STATE)\n",
119 | " triples = random.sample(all_triples, n_sub)\n",
120 | " \n",
121 | " instances_class_map = {\n",
122 | " subj: obj\n",
123 | " for subj, pred, obj in triples\n",
124 | " if 'hasLithogenesis' in pred\n",
125 | " }\n",
126 | " instances = list(instances_class_map.keys())\n",
127 | " y = list(instances_class_map.values())\n",
128 | " \n",
129 | " triples = [\n",
130 | " (subj, pred, obj)\n",
131 | " for subj, pred, obj in triples\n",
132 | " if 'hasLithogenesis' not in pred\n",
133 | " ]\n",
134 | " t0 = time.time()\n",
135 | " wl_graphs = [wlkernel.WLGraph(triples, instance, max_depth=3) for instance in instances]\n",
136 | " kernel_matrix = wlkernel.wl_kernel_matrix(wl_graphs, iterations=0)\n",
137 | " t1 = time.time()\n",
138 | "\n",
139 | " results_wl.append(t1 - t0)"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 10,
145 | "metadata": {},
146 | "outputs": [
147 | {
148 | "data": {
149 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3sAAAHgCAYAAAAc83RKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdeXRV1eH28e9OwiwyKzKDoqiISEERVBQUFRUFERAFnKpvqx3Q2tb+Os+Dtba2ta21AirIJCKIOIEKBgcUBBFRHJhBZogMCcl+/8hViTIE5OYk4ftZKyv3nL3v5QlL1srjPufsEGNEkiRJklS+ZCQdQJIkSZJ08Fn2JEmSJKkcsuxJkiRJUjlk2ZMkSZKkcsiyJ0mSJEnlkGVPkiRJksqhrKQDfBV169aNzZo1SzqGJEmSJCXi9ddfXxtjrLe7sTJd9po1a8asWbOSjiFJkiRJiQghLN7TmJdxSpIkSVI5ZNmTJEmSpHLIsidJkiRJ5VCZvmdvd/Ly8li2bBnbt29POkqpVrlyZRo1akSFChWSjiJJkiQpDcpd2Vu2bBnVq1enWbNmhBCSjlMqxRhZt24dy5Yto3nz5knHkSRJkpQG5e4yzu3bt1OnTh2L3l6EEKhTp46rn5IkSVI5Vu7KHmDRKwb/jiRJkqTyrVyWvSQNGTKEu++++7Pj888/nxtuuOGz49tuu4277rqLjz76iNatW+/1s37+85/TsGFD2rZtywknnMDIkSM/G7vmmmto3rw5J598MsceeyyDBg1i2bJln403a9aMk046ibZt29K2bVuys7MP4k8pSZIkqbSz7B1knTt3/qxYFRQUsHbtWubPn//ZeHZ2Np06dSr25w0ZMoQ5c+YwYcIEbrrpJvLy8j4b+9Of/sSbb77JwoULOeWUU+jatSu5ubmfjU+bNo05c+YwZ86c/fozJUmSJJV95e4BLZ8ZkebLFAfE3Z7u1KkTQ4YMAWD+/Pm0bt2alStXsmHDBqpWrcqCBQto164dK1as2K8/rmXLllStWpUNGzZwxBFHFBkLITBkyBDGjx/Pk08+yaWXXnpgP5MkSZKkcqP8lr2ENGjQgKysLJYsWUJ2djann346y5cvZ+bMmdSoUYOTTjqJihUr7vfnvvHGG7Rs2fJLRW9X7dq145133vms7J1zzjlkZmZSqVIlXnnllQP+mSRJkiSVPZa9NOjUqRPZ2dlkZ2dz6623snz5crKzs6lRowadO3fer8/6y1/+wgMPPMC7777LxIkT9zo3xqKrjdOmTaNu3br7nV+SJElS2ec9e2nw6X178+bNo3Xr1nTs2JGZM2fu9/16UHjP3vz58xk3bhzXX3/9XrdLmD17Nscff/xXjS9JkiSpHCi/K3t7uKeuJHTq1Ik777yTFi1akJmZSe3atdm4cSPz58/nvvvuO6DP7NmzJ/fffz/Dhg3jpptuKjIWY+See+5h5cqVXHDBBQfjR5AkSZJUxrmylwYnnXQSa9eupWPHjkXO1ahRo8hllQsXLqRRo0affY0ZM2avn/vTn/6Uu+66i4KCAgBuv/32z7ZeeO2115g2bdoB3Q8oSZIkaTfycmDuz2BcPRiRUfh97s8Kz5cB4Yv3eZUl7du3j7NmzSpybsGCBV7KWEz+XUmSJEl7kJcDT3eEnPchf5dbqTIrw2FHQ/eXocJhyeVLCSG8HmNsv7sxV/YkSZIk6YsW/OnLRQ8Kj3PeLxwv5crvPXuSJEmSdKDe+ye5nxTw0qSzmfVsB7ZuqUrV6ltpf+5rdL44m4rv3QttfpF0yr2y7EmSJEnSF+Ru2sx/f3Y9G1bXZmdeBQC2bqlG9qTOLHjteG74xf8o7U/L8DJOSZIkSfqCl6Z0K1L0PrUzrwIbVtfmpSndEkpWfJY9SZIkSfqCWc+c+qWi96mdeRWY9WyHEk60/yx7kiRJkrSrGNm6ae9Ttm0KJZPlK7DsHWRDhgzh7rvv/uz4/PPP54Ybbvjs+LbbbuOuu+6idevWScSTJEmStC8fDqfqYVv3OqVKnaolFObAWfYOss6dO5OdnQ1AQUEBa9euZf78+Z+NZ2dn06lTp6TiSZIkSdqbLYtg1s207rZuj1OyKmfR/hu73dquVCm3T+P8RUjvY1B/Fn+22/OdOnViyJAhAMyfP5/WrVuzcuVKNmzYQNWqVVmwYAG1a9dOazZJkiRJByA/F166kp07q7D4g7aEjA1kVMggf0f+Z1OyKmdR6+hadL69c4JBi6fclr2kNGjQgKysLJYsWUJ2djann346y5cvZ+bMmdSoUYOTTjqJihVL+0NaJUmSpEPQvJ/C+lk8OemvrJ67nt4je7N2wVpm3TuLbeu2UaVOFdp/oz2db+9MxcNK/+/0lr006NSpE9nZ2WRnZ3PrrbeyfPlysrOzqVGjBp07l/7/AyBJkiQdclY9B2//kTfmf4c3Rm7gjB+dwUn9TwLgnF+ck3C4A+M9e2nw6X178+bNo3Xr1nTs2JGZM2d6v54kSZJUGm1fAzMHsuLjjkz+c11anNuCc35ZNgversrtyt6e7qkrCZ06deLOO++kRYsWZGZmUrt2bTZu3Mj8+fO57777yMnJSSybJEmSpF3ECK9cz9a1Wxnzl15UOyKL3iN6k5FZ9tfFyv5PUAqddNJJrF27lo4dOxY5V6NGDerWrQvAwoULadSo0WdfY8aMSSquJEmSdOh6758ULJ3Eo8NvZ8uqHfQd25dq9aolneqgKLcre0nKzMxk8+bNRc4NHTr0s9fNmjUjLy+vhFNJkiRJKmLjW/DGbbzwzPW8P2MnF/3rIhqe2jDpVAdNWlf2Qgg1QwhjQwjvhBAWhBBODyHUDiE8E0J4L/W9VmpuCCH8LYSwKIQwN4TQLp3ZJEmSJB3Cdm6Dl/rz7rw2vDi8EW2vacvXbvxa0qkOqnRfxvlXYEqMsRVwMrAA+CHwXIyxJfBc6hjgQqBl6utG4N40Z5MkSZJ0qJr9PTa8t4Lx/+hJ/bb16fHPHoQQkk51UKWt7IUQagBnAfcDxBhzY4wbgUuBYalpw4DLUq8vBYbHQi8DNUMIR6UrnyRJkqRD1LLHyZv/H0b/6xYIFeg7ri8VqlRIOtVBl86VvebAGuCBEMLsEMJ/QwjVgCNjjCtTc1YBR6ZeNwSW7vL+Zalz+y3GeICRDx3+HUmSJOmQtHU58eXreOLhQaxamEnvh3tTq0WtpFOlRTrLXhbQDrg3xngK8AmfX7IJQCxsHPvVOkIIN4YQZoUQZq1Zs+ZL45UrV2bdunWWmb2IMbJu3ToqV66cdBRJkiSp5MQCmDmI1586ljefbcJZPz2Llj1aJp0qbdL5NM5lwLIY4yup47EUlr3VIYSjYowrU5dpfpwaXw403uX9jVLniogx/gf4D0D79u2/1OgaNWrEsmXL2F0R1OcqV65Mo0aNko4hSZIklZwFf2J59kKmDPs6x1xwDF1+2iXpRGmVtrIXY1wVQlgaQjguxrgQ6Aa8nfoaDPw+9X1C6i2PA7eEEB4BTgM27XK5Z7FVqFCB5s2bH5SfQZIkSVI5sfZVPpn+W0b//RaqN6xJ74fLx8bpe5Puffa+BTwcQqgIfABcS+Glo6NDCNcDi4G+qbmTgR7AImBraq4kSZIkfTV5WyiYfhWP3tuPTzZV4fon+1KldpWkU6VdWstejHEO0H43Q912MzcCN6czjyRJkqRD0KxbmDasKR+82ZCe91/EUe0OjYf+l+91S0mSJEmHto9G8M6YV5gx4UxOueEUTrnulKQTlRjLniRJkqTyKecD1k34IY/9uw9Hfa0+Pe7pkXSiEmXZkyRJklT+FOSR+9wgRt/Vk4xKh9F3XD+yKqf7kSWly6H100qSJEk6JMS5v2TSn47k46X1uHrKFdRsWjPpSCXOlT1JkiRJ5cvqF3jt7qeZ91IbzvnlORzd/eikEyXCsidJkiSp/NixnqUP3MpTD13AsT1acOaPzkw6UWIse5IkSZLKhxjJmfINxvzxHGo0PoxeD19ByAhJp0qM9+xJkiRJKhcKFv6HcXdUYdvW6lw/bSCVa1ZOOlKiXNmTJEmSVPZtWsBz35/ARwuac/G/e1L/5PpJJ0qcZU+SJElS2Za/nbf/eBvZE0+j/ddP4OTBh87G6Xtj2ZMkSZJUpq197EdMuKstDU+pxvn39Eo6Tqlh2ZMkSZJUZuW+O5FR384lq3IFrpjwdbIq+ViST1n2JEmSJJVJcetKHr92FOtW1eXyUVdSo3GNpCOVKpY9SZIkSWVPLOCV2+9gfnZLuv60DS26t0o6Ualj2ZMkSZJU5ix+6E6e/ldTWp1bic4/9T693bHsSZIkSSpTtrydzdhvr6FWg1wuHfMdQjh0N07fG8ueJEmSpDIjf9tmxl7xMDu2VaLfhGupXLNK0pFKLR9VI0mSJKnMePa6X7Lk7SPofW8rjmh3bNJxSjVX9iRJkiSVCW/947+8/Eh1Tr1yJyf9v35Jxyn1XNmTJEmSVOqtmfUmj9/2IY1PyKH7//6QdJwywbInSZIkqVTbsfETRvV6mIqV4YrxN5BZuXLSkcoEy54kSZKkUivGyIQ+d7J+RRUGPdyI6seelHSkMsN79iRJkiSVWtk/H8GC5+Dcb66nWf+vJx2nTHFlT5IkSVKp9OHT83ju1+9yQqclnP7nu5OOU+ZY9iRJkiSVOpuXbWJc/9HUqb+Rng/fQKhYI+lIZY6XcUqSJEkqVfJz8xnT85/kbcun7731qdSsc9KRyiRX9iRJkiSVKk99axTLZufS58eLqHfxyKTjlFmWPUmSJEmlxtwH3+C1/7xHx4vf4MQ7/gEZmUlHKrMse5IkSZJKhdVzVzPxxsdp2mox5/71GqjaKOlIZZr37EmSJElK3PaN2xl92VAqV86hz11VyWzRK+lIZZ4re5IkSZISFQsijw0cxcYlnzD4d69y2HlTko5ULriyJ0mSJClRM34/g4WTPqL71VNpcsPfIatq0pHKBcueJEmSpMR88OwHTPvJVFqfPo9Tf9gXarVJOlK5YdmTJEmSlIhNSzYxrv8o6jZYwyU/ySUc962kI5Ur3rMnSZIkqcTt3LGT0Zc/ws6tn9Dv589S8ZzpEELSscoVV/YkSZIklbgp35nCilmruOymcdS5/B6oXC/pSOWOZU+SJElSiZozdA6v//t1Ol8yg+MHXQxHnZd0pHLJsidJkiSpxKycvZInvjGJ5q2X0vXGDdDmN0lHKre8Z0+SJElSidi2YRujLx9NlcO2cvm3HyfjrBmQWTHpWOWWK3uSJEmS0i4WRMZfPZ7NSzfS95YHqdb1D3D4sUnHKtcse5IkSZLS7sVfv8h7k9/jgoGTadStM7S4JulI5Z6XcUqSJElKq0VTFvH8z5+nzdnv077najj1KbdZKAGWPUmSJElps+HDDYwbMI4jj97BxYNHETpPhYo1k451SPAyTkmSJElpsXP7Tsb0GUPcmUvfb/6LCl/7P6jXOelYhwzLniRJkqS0mHzLZFa+sZJe3xhL7dYnwok/SjrSIcXLOCVJkiQddG/89w1m3z+bM698j+PaL4ZOcyHD+lGS/NuWJEmSdFCtmLWCyTdP5ujTCzi7xwg4bRRUa5J0rEOOl3FKkiRJOmi2rt3K6MtHc1i9LHoPvpOMltdBkyuSjnVIsuxJkiRJOigK8gt49KpHyVm1hSu+/QhVGzSCr/016ViHLC/jlCRJknRQvPCLF3j/6fe5+HtLaNjkbej8MmRVSzrWIcuyJ0mSJOkre3fSu7z4qxdp26cy7dr+D07+M9Rul3SsQ5qXcUqSJEn6Sta/v55Hr36U+m1q0qPnrwkNzodW30061iHPsidJkiTpgOVtzWP05aMJGYG+3xlJhWpVoeNQCFaNpHkZpyRJkqQDEmNk0v+bxOq5qxnwl83UqvwKnPYEVKmfdDSR5pW9EMJHIYR5IYQ5IYRZqXO1QwjPhBDeS32vlTofQgh/CyEsCiHMDSF4ga8kSZJUis361yzmPjiXLkOOomW9u+C470DDHknHUkpJrK2eE2NsG2Nsnzr+IfBcjLEl8FzqGOBCoGXq60bg3hLIJkmSJOkALHtlGVO+M4WW5zemS8f/g5onQ9s/JB1Lu0jiQtpLgWGp18OAy3Y5PzwWehmoGUI4KoF8kiRJkvbikzWfMKbPGA5veDi9/t8YQsEn0HkkZFZKOpp2ke6yF4GnQwivhxBuTJ07Msa4MvV6FXBk6nVDYOku712WOidJkiSplCjYWcC4/uPYunYrfe/cTpWtU+Brd0ON45OOpi9I9wNazogxLg8hHAE8E0J4Z9fBGGMMIcT9+cBUabwRoEmTJgcvqSRJkqR9mvqTqXw49UMuvacNR+X2hca94eivJx1Lu5HWlb0Y4/LU94+B8cCpwOpPL89Mff84NX050HiXtzdKnfviZ/4nxtg+xti+Xr166YwvSZIkaRfvPPYOL/3+Jdrd0Ia2TX9Q+NTNU++DEJKOpt1IW9kLIVQLIVT/9DXQHXgLeBwYnJo2GJiQev04MCj1VM6OwKZdLveUJEmSlKB1767jscGP0aBDAy4cOBG2LILTH4JKtZOOpj1I52WcRwLjQ2HLzwJGxBinhBBeA0aHEK4HFgN9U/MnAz2ARcBW4No0ZpMkSZJUTLmf5DKq9ygyKmTQ9+4qZH30AJz4YziyS9LRtBdpK3sxxg+Ak3dzfh3QbTfnI3BzuvJIkiRJ2n8xRiZ+fSJr3l7D1Y+dR43lF0KdjnDST5OOpn1IYusFSZIkSWXEq39/lbdGvkXXX53N0ZVvLzzZeQRkVEg2mPbJsidJkiRpt5a8tISnb32aYy85ljMumgprs6HDv+Cw5klHUzGke+sFSZIkSWVQzqocxvYdS42mNej15yMJs66G5oOg2ZVJR1MxWfYkSZIkFZGfl8/YfmPZtmEbNzzel8rzu0G1FtD+70lH036w7EmSJEkq4rk7nmPxi4vpNfwyjtz6I9i2ErpnQ4XqSUfTfvCePUmSJEmfeXvs28z880w63NyBNp1mwdKxcPKvoU6HpKNpP7myJ0mSJAmANQvWMOHaCTTq2Ijzf9YUpl4BR3aF429POpoOgGVPkiRJEju27GB079FkVcniikd6kjnrfMiqAqc/CMELAssiy54kSZJ0iIsx8vj1j7Pu3XUMfHYgh6/7PWyYDWc9DlUbJB1PB8iKLkmSJB3iXv7Ly7w95m26/a4bzY9bCO/cBS1vhkaXJB1NX4Ere5IkSdIhbPGLi3nm+8/QqlcrOt1yNEw5GWq0hlP+lHQ0fUWWPUmSJOkQtWXFFsb0HUPto2tz2QM9Ca9cDrmboOtzhffrqUyz7EmSJEmHoPy8fMb0HUPullwGPTeISiv/DSufLNw4vWbrpOPpIPCePUmSJOkQ9Mztz7D0paX0vL8nRzRYAXN+AA17QstvJh1NB4kre5IkSdIhZt7Iebzy11c47bun0bpPc5jSHirVgdPuhxCSjqeDxLInSZIkHUI+futjJt4wkSZnNOG8P54Hb3wTNi+Ers9A5bpJx9NB5GWckiRJ0iFi+6btjL58NJUOr0Sf0X3IXDUBFv0HTvg+1O+WdDwdZJY9SZIk6RAQY2TCtRNY//56+ozuQ/XDN8IrN0DtDtDmV0nHUxp4GackSZJ0CMj+UzbvjH+H7nd1p2nnRjC1KxTkQecRkFEh6XhKA8ueJEmSVM59OPVDnrvjOU7seyIdv9sR5v8GPn4ROg6D6sckHU9p4mWckiRJUjm2edlmxvYfS53j6tDz/p6EtS/DvJ9D0yuh+cCk4ymNLHuSJElSObVzx05G9xnNzm076fdoPypW3AbZA6BqY+hwr9sslHNexilJkiSVAzmrchjTbwxLpi+BCAQ4rP5h5KzM4YqxV1D3uDqQfRVsXQrnToeKNZKOrDSz7EmSJEllXM6qHO5udjf5O/I/PxkhZ2UOITPQpHMT+PBBWDyy8Mmb9U5PLqxKjJdxSpIkSWXcmH5jiha9XcT8yGNX/g9m3QxHdIET7ijhdEqKZU+SJEkq45ZMX7LX8eWvrSjcXuH0ByEjs4RSKWmWPUmSJKmsi3sf3rG1Epz2X6jWuGTyqFSw7EmSJEll3T4eqlmhUh407l0yWVRqWPYkSZKkMq7hqQ3Z0/JeZlYeR53avGQDqVSw7EmSJEll2Kalm9ixYS21jlxPVoW8ImNZFfKoWXcTfYZ2TSidkuTWC5IkSVIZtXHxRoadM4yvnfYk7bu9zMzJnZj1bAe25VShymHbaH/ua3S6KJtKi+ZC8xeSjqsSZtmTJEmSyqANH2xgWNdh7Ni0g9N7vERmZj7n9Hmec/o8/+XJa6aXeD4lz8s4JUmSpDJm3XvrGNplKLlbchn03CAyM3e/x97n9vG4TpVLlj1JkiSpDFn7zlqGdhnKzu07GTxtMEe1O4p9Po5zn+Mqjyx7kiRJUhnx8fyPGdplKLEgMvj5wRzZ5sjCgVrt9v7GememP5xKHe/ZkyRJksqAVW+u4sFzHySjQgaDpw6mbqu6hQObFsAnH+35jRmV4IxRJZJRpYsre5IkSVIpt/KNlQzvOpysyllc88I1nxe9ze/Cc10hIwvOnQ71zuLzSzZD4fGlH0GV+gklV5Jc2ZMkSZJKseWvLueh8x+iUo1KDJ42mFrNaxUObHm/sOjFfDj3eahxApzn9gr6nCt7kiRJUim1NHspw88dTpXaVbjmhWs+L3o5HxUWvfxt0PXZwqInfYEre5IkSVIptHj6Ykb0GMFhRx3G4KmDObzR4YUDnywtLHp5m6HbVKjVJtmgKrUse5IkSVIp8+G0Dxl58UhqNKnBoOcGUb1B9cKBrcsLi17uusIVvdqnJBtUpZqXcUqSJEmlyPtPv8+IHiOo2bwmg58f/HnR27YKpnaD7avgnKegTodkg6rUc2VPkiRJKiXem/weo3qPom6rugx8ZiDV6lUrHNi+prDofbK0sOjV7ZhsUJUJlj1JkiSpFFj4+EJG9xnNkW2OZODTA6lSu0rhwI51MPVcyPkQzp4MR5yRbFCVGZY9SZIkKWFvj3ubcf3HcVS7o7j6qaupXLNy4UDuBph6HmxeCGdPgiPPTjSnyhbv2ZMkSZIS9NaotxjbbywNT23I1U/vWvQ2wdTzYdN8OOsxqH9uskFV5riyJ0mSJCVk7kNzeWzwYzQ5owlXTrqSStUrFQ7kbYFpF8DGOXDmo9DggmSDqkyy7EmSJEkJmP3AbB6//nGan9Oc/o/3p2K1ioUDOz+B53vA+tfgjDHQ8OJkg6rMsuxJkiRJJez1/7zOpJsmcXT3o+n3WD8qVKlQOLBzKzx/MazNhk4joXGvZIOqTLPsSZIkSSXo1X+8ypO3PEnLHi3pO64vWZVTv5Lnb4cXL4WPX4BOD0HTvskGVZnnA1okSZKkEjLzLzN58pYnOe7S4+j76K5Fbwe82AtWPQcdH4BmA5INqnLBlT1JkiSpBLz0x5d49gfPckKfE+g9ojeZFTILB/JzYcYVsHIKnHoftBicbFCVG5Y9SZIkKc1e/PWLTPvJNFpf2Zpew3uRkZW6wK4gD17qD8snQod/wjE3JBtU5YplT5IkSUqTGCPP//x5Xvzli7QZ2IZLH7iUjMxPi95OyL4alo2Hr/0VWn4j2bAqd9J+z14IITOEMDuEMCl13DyE8EoIYVEIYVQIoWLqfKXU8aLUeLN0Z5MkSZLSJcbI1P+byou/fJG217X9QtHLh5evgSWj4ZQ74bhvJ5pV5VNJPKDlO8CCXY7/APwlxngMsAG4PnX+emBD6vxfUvMkSZKkMifGyDO3P8OM383gazd9jZ739fy86MUCePUG+OhhOPm3cPxtyYZVuZXWshdCaARcBPw3dRyArsDY1JRhwGWp15emjkmNd0vNlyRJksqMGCNTvjOFmX+eyanfOpWL7r2IkJH6tTYWwKv/Dz4YCif9Ak68I9GsKt/SvbJ3N/B9oCB1XAfYGGPcmTpeBjRMvW4ILAVIjW9KzZckSZLKhFgQeeKbT/DqPa/S8daOXPDXC/hs/SJGmPUteP8+OPH/oPVPkg2rci9tZS+EcDHwcYzx9YP8uTeGEGaFEGatWbPmYH60JEmSdMBiQWTijRN5/V+v0/mHnel+Z/eiRe+NIfDeP+H426HNr8CL2JRm6VzZ6wz0DCF8BDxC4eWbfwVqhhA+fQpoI2B56vVyoDFAarwGsO6LHxpj/E+MsX2MsX29evXSGF+SJEkqnoL8AiZcO4HZ98/mrJ+cRbffdita9OZ8Hxb+FY77LrT9g0VPJSJtZS/GeEeMsVGMsRnQH5gaY7wKmAb0SU0bDExIvX48dUxqfGqMMaYrnyRJknQwFOwsYPzA8bw5/E3O/uXZnPPLc4oWvbk/hgV3Qsubod1dFj2VmJJ4GucX/QC4NYSwiMJ78u5Pnb8fqJM6fyvwwwSySZIkScWWn5fPuCvH8dbIt+j2+250+UmXohPe+iXM/y0ccyO0/5tFTyWqRDZVjzE+Dzyfev0BcOpu5mwHriiJPJIkSdJXlZ+bz9h+Y3nnsXfofld3Th9yetEJ838L834OLa6BDvdCSGKdRYeyEil7kiRJUnmyc/tORvcZzXtPvMeF91zIqbd8YS1jwZ3w5v9Bs6vh1P9a9JQIy54kSZK0H/K25TGq1yjef+p9LvrXRbS/qX3RCe/8FWbfDk36QccHICMzmaA65Fn2JEmSpGLK/SSXR3o+wofTPqTn/T055bpTik5495/wxneh8eXQ6UHI8NdtJcf/+iRJkqRiyM3JZcRFI1gyYwm9hveizdVtik5YdB/Muhka9oROIyCjQjJBpRTLniRJkrQPOzbv4OEeD7Ps5WX0frg3rfu3Ljrhg6Hw6k1w1IVwxmjIrJhITmlXlj1JkiRpL7Zv3M5DFzzEytdX0mdUH064/ISiEz58GF6+DuqfC2c9CpmVkgkqfYFlT5IkSdqDbeu38WD3B1k9dzVXjL2CVpe2Kjph8Wh4eRAceTac9RhkVk4kp7Q7xSp7IYRaQANgG/BRjLEgrakkSZKkhG1du5Xh5w5n7Ttr6f9Yf1r2aFl0wtLxkD0A6naGLhMhq2oyQaU92GPZCyHUAG4GrgQqAmuAysCRIYSXgX/GGKeVSEpJkiSpBH3y8ScM7zac9YvWc+XjV0wxemEAACAASURBVHJ096OLTlg2EV7qB3VOhbOfgKxqyQSV9mJvK3tjgeHAmTHGjbsOhBC+BgwMIbSIMd6fzoCSJElSSdqycgvDuw1n0+JNDHhiAM27Ni86YcWTMKMP1GwLZz8JFaonE1Tahz2WvRjjeXsZex14PS2JJEmSpIRsXraZYV2HsWXFFq568iqantW06IRVz8KLvaBGa+j6FFSskUxQqRgy9jUhhNA5hFAt9frqEMJdIYSm+3qfJEmSVJZsXLyRoV2GkrMqh4FPD/xy0Vv9PLzQEw4/Dro+DRVrJZJTKq59lj3gXmBrCOFk4DbgfQov75QkSZLKhQ0fbmBol6FsXbeVQc8OonGnxkUnfDwdnr8IDmsBXZ+FSnWSCSrth+KUvZ0xxghcCvw9xvgPwAuTJUmSVC6sX7SeoV2Gkrsll8FTB9Pw1IZFJ6yZCc/3gGpNoOtzULleMkGl/VScrRe2hBDuAK4GzgohZAAV0htLkiRJSr+1C9cyvOtw8nPzGTR1EPVPrl90wrrX4PkLoHL9wqJX5chkgkoHoDgre/2AHcD1McZVQCPgT2lNJUmSJKXZmrfXMLTLUAp2FjB42uAvF731s2Fqd6hYB7pNhaoNkgkqHaC97bMXYqFVwF2fno8xLiF1z96nc9IfU5IkSTp4Vs9dzfBzh5ORlcHgqYOp26pu0Qkb5sLUc6HC4XDuNKjWePcfJJVie1vZmxZC+FYIocmuJ0MIFUMIXUMIw4DB6Y0nSZIkHVwrZ69k2DnDyKqUxTUvXPPlordxPkztBllVU0XPB9GrbNrbPXsXANcBI0MIzYGNQGUgE3gauDvGODv9ESVJkqSDY/lry3mo+0NUOrwSg6cNplaLL2yfsOmdwqKXUQG6Ti18+qZURu1tU/XtwD+Bf4YQKgB1gW0xxo0lFU6SJEk6WJbOXMrDFzxMlTpVGDxtMDWb1iw6YfN7MLUrEAuL3uEtE8kpHSzFeRonMcY8YGWas0iSJElpsWTGEh6+8GEOq38Yg6YOokbjGkUn5HxYWPQK8qDb81CjVSI5pYOpWGVPkiRJKqs+ev4jRlw0gsMbH87gqYOp3uALW0Z/shieOwd2bi186mbNE5MJKh1klj1JkiSVWx88+wEje46kVotaDHpuEIcdeVjRCVuXwXNdIXcTdHsOap2cTFApDYqzzx4hhKYhhHNTr6uEEKrv6z2SJElSkhZNWcSIi0dQp2UdBk8bvJuit6Kw6O1YC12fhtrtkgkqpck+y14I4evAWODfqVONgMfSGUqSJEn6KhZOXMgjlz5CvRPqMWjqIKrVq1Z0wrbVhU/d3LYSzn4S6nRIJqiURsVZ2bsZ6AxsBogxvgcckc5QkiRJ0oFa8OgCRvcezZEnH8mg5wZRtU7VohO2ryksep8sgbMnQ71OyQSV0qw4ZW9HjDH304MQQhYQ0xdJkiRJOjDzR89nTN8xNOjQgIHPDKRKrSpFJ+xYD1PPg5z34exJcMSZyQSVSkBxyt4LIYQfAVVCCOcBY4CJ6Y0lSZIk7Z+5D89l3JXjaNypMVc/dTWVa1QuOiF3Y2HR2/wOnPU4HHlOMkGlElKcsvdDYA0wD7gJmAz8OJ2hJEmSpP0xZ9gcxg8cT9MuTbnqyauoVL1S0Ql5m2Ha+bDpLThrPBx1XjJBpRK0z60XYowFwH2pL0mSJKlUef2+15l00yRanNuC/o/1p0LVCkUn5G2BaRfC+jfgzHHQ4MJkgkolrDhP47w4hDA7hLA+hLA5hLAlhLC5JMJJkiRJe/PaP19j0o2TaHlhS658/MovF72dn8ALF8O6V6DzI9CoZzJBpQQUZ1P1u4HewLwYow9mkSRJUqnw8l9f5qnvPsVxPY+jz+g+ZFX6wq+2O7fCCz1hzQzoNAKaXJ5MUCkhxSl7S4G3LHqSJEkqLbLvzOaZ25/h+MuP5/IRl5NZMbPohPzt8GIvWD0NTh8OTfslE1RKUHHK3veBySGEF4Adn56MMd6VtlSSJEnSHkz/7XSm/t9UWvdvTa8He5GR9YU7k/J3wPTLYdXT0PEBaH51MkGlhBWn7P0GyAEqAxXTG0eSJEnavRgjL/ziBV74xQu0uboNlz5w6ZeLXkEevNQPVkyGU/8NLa5JJKtUGhSn7DWIMbZOexJJkiRpD2KMTP3xVGb8dgZtr23LJfddQkbmF4veTnjpSlg2Adr/HY65MZmwUilRnH32JocQuqc9iSRJkrQbMUae/cGzzPjtDNrd2I6e/+25+6I3cyAsHQft/gLH3pxMWKkUKU7Z+wYwJYSwza0XJEmSVJJijDw15Cmy/5RNh1s6cPG/LiZkhKKTCvLh5etg8SPQ9o/Q6rvJhJVKmeJsql69JIJIkiRJu4oFkcm3TGbWvbPoOKQj3f/cnRDCFyfBqzfCRw/Cyb+BE25PJqxUCu2x7IUQWsUY3wkhtNvdeIzxjfTFkiRJ0qEsFkQm3jSR2f+dTafvd+Lc35+7m6IX4bVvwgf/g9Y/gxN/lExYqZTa28rercCNwJ93MxaBrmlJJEmSpENaQX4Bj1//OG8Oe5Mzf3wm5/zynN0Xvde/DYv+DSfcASf9LJmwUim2x7IXY/z08UUXxhi37zoWQqic1lSSJEk6JBXsLOCxwY8xb8Q8zv7l2XT5SZcvT4oR3rgN3v07HP+9wss3v1gGJRVr64Vs4IuXcu7unCRJklRsOatyGNNvDEumLym8bixA1TpV2bp2K91+140zfnjGl98UI8z5ISz8Cxz3ncIHslj0pN3a2z179YGGQJUQwinAp/+KDgeqlkA2SZIklVM5q3K4u9nd5O/I//xkhK1rtxIyA22vabv7N877GSz4I7T8RuEWCxY9aY/2trJ3PnAN0IjC+/Y+/Ze0GfDuV0mSJB2wMf3GFC16u4j5kTH9xnDtC9cWHZj3K3jrV3D0DYWbplv0pL3a2z17w4BhIYTLY4zjSjCTJEmSyrkl05fs3/j838O8n0LzwXDqvyEUZ7to6dC2z38lFj1JkiQddHE/xhfcBW/eAU0HwGn3W/SkYvJfiiRJkkrevq7A/HR84T0w+zZo0hdOHwYZmelOJpUblj1JkiSVqIWPL9znnCZnNoH3/lW4l16jXtDpIcgozoPkJX1qn/9iQgi9d3N6EzAvxvjxwY8kSZKk8uqVv73ClO9O4YiTjmDdO+vIz/3yQ1oyK2XS787t8No3ocHF0PkRyKiQQFqpbCvO/x65HjgdmJY6Pht4HWgeQvhljPHBNGWTJElSOVGQX8BTQ57i1XtepdVlrej1UC9yt+R+aZ+9Jmc2od+f86n67o1w1AVw5ljIrJh0fKlMKk7ZywKOjzGuBgghHAkMB04DXgQse5IkSdqj3Jxcxg0Yx7sT36XjrR0574/nkZGZQcWM9Vz746GwZjqftb3qx8G7C6F+NzjzUcislHB6qewqTtlr/GnRS/k4dW59CCEvTbkkSZJUDmxZsYURF49g9Zur6fGPHnT4ZofCgW2rYEIzKNixy+wIW94BAnT4D2RVSSCxVH4Up+w9H0KYBIxJHV+eOlcN2Ji2ZJIkSSrTVs9dzYiLRrBtwzb6P96fYy869vPBGf2+UPR2FeHla+C8F0oiplRuFedpnDcDQ4G2qa/hwM0xxk9ijOfs6U0hhMohhFdDCG+GEOaHEH6ROt88hPBKCGFRCGFUCKFi6nyl1PGi1Hizr/izSZIkKSGLpizif2f8j1gQuW7GdUWLHqQu3dyLfY1L2qd9ruzFGCMwNvW1P3YAXWOMOSGECsCMEMKTwK3AX2KMj4QQ/kXhA2DuTX3fEGM8JoTQH/gD0G8//0xJkiQlbNa/ZzH55skc0foIBkwawOGNDt/NrP3ZVV3Sgdjnyl4IoXcI4b0QwqYQwuYQwpYQwuZ9vS8WykkdVkh9RaArnxfHYcBlqdeXpo5JjXcLIexru01JkiSVErEg8vTtT/PE/3uCY84/hmunX7uHogfF31Vd0oEqzj17fwQuiTEu2N8PDyFkUrhNwzHAP4D3gY0xxp2pKcuAhqnXDYGlADHGnSGETUAdYO3+/rmSJEkqWXnb8hg/cDwLxi2g/Tfbc+FfLyQjaw/rChvmQkYlKNi+5w+sd2Z6gkqHkOKUvdUHUvQAYoz5QNsQQk1gPNDqQD5nVyGEG4EbAZo0afJVP06SJElfUc7qHB659BGWv7qc7nd1p+N3O7LHC7Q+fAhevREqVIfcfIi7ebh7RiU4Y1R6Q0uHgOKUvVkhhFHAYxTehwdAjPHR4v4hMcaNIYRpFG7OXjOEkJVa3WsELE9NWw40BpaFELKAGsC63XzWf4D/ALRv396LuSVJkhK05u01jLhoBDmrc+j3aD9aXbaH/7efnwtvDIH3/glHdIHOjxSen9Gv6D579c4sLHpV6pfUjyCVW8Upe4cDW4Huu5yLwF7LXgihHpCXKnpVgPMofOjKNKAP8AgwGJiQesvjqeOZqfGpqYfDSJIkqRT6cOqHjOo9iqzKWVzzwjU07NBw9xO3LoPpV8C6l+H478HJv4OM1K+hbq8gpU1xnsZ57QF+9lHAsNR9exnA6BjjpBDC28AjIYRfA7OB+1Pz7wceDCEsAtYD/Q/wz5UkSVKazX5gNpNunESd4+ow4IkB1Gxac/cTV02Fl/pD/jY4Yww06VOyQaVD2B7LXgjh+zHGP4YQ7mE3z76NMX57bx8cY5wLnLKb8x8Ap+7m/HbgiuKEliRJUjJijEz7yTSm/2Y6Lc5twRVjr6Byjcq7mwgL/ghv/giqHwdnPgo1vvLjGyTth72t7H36UJZZJRFEkiRJpdvO7TuZcN0E3hr5FqdcfwoX3XsRmRUyvzwxdxO8fA0sewya9IXT7ocKh5V4XulQt8eyF2OcmPo+bE9zJEmSdGjYunYro3qNYsmMJXT7XTc6/6Dz7p+4uXEeTL8ccj6Adn+B474Dbp0sJWKf9+yFEI4Fvgc023V+jLFr+mJJkiSptFj33jpG9BjBpqWb6DOqDyf2PXH3Ez8aAa98HSocDt2mwRHulSclqThP4xwD/Av4L5Cf3jiSJEkqTRZPX8yoy0YRMgKDpw6mcafGX56Unwuzvwfv3rPL1glHlXxYSUUUp+ztjDHem/YkkiRJKlXmjZjHhGsnULN5TQY8MYDaR9f+8qSty2HGFbB2Jhw3BE75A2RUKPmwkr6kOGVvYgjhm8B4im6qvj5tqSRJkpSYGCPTfzOdaT+ZRtMuTen3aD+q1K7y5YmrpxVuq7DzE+g8Cpr2LfmwkvaoOGVvcOr77buci0CLgx9HkiRJScrPzWfijRN5c9ibtBnYhkvuu4SsSl/4lTFGWHAnvPlDqH4sdHseahyfSF5Je1acTdWbl0QQSZIkJWvbhm2Mvnw0H037iC4/70KXn3b58hM38zbDy9fC0kehcR/o+D+oUD2ZwJL2qjgre4QQOvHlp3EOT1MmSZIklbANH25gRI8RrH9/Pb0e7EWbq9t8edLG+TC9N+S8D6f8GVoNcVsFqRQrztYLDwJHA3P4/GmcEbDsSZIklQPLXl7GyJ4jKdhZwMBnBtKsS7MvT/roEXjl+sJVvG5T4YizSjynpP1TnJW99sAJMcaY7jCSJEkqWW+PfZvxA8dTvUF1BkweQN3j6hadkJ8Ls2+Hd/8G9TpD59FQtUEyYSXtl+KUvbeA+sDKNGeRJElSCYkxkn1nNs9+/1kand6I/hP6U61etaKTtq6Al/rCmpfguO/AKX9yWwWpDClO2asLvB1CeJWiWy/0TFsqSZIkpU1+Xj6Tb5nMG/95gxP7nshlwy4jq/IXfi1c/QK81A925kCnkdCsfzJhJR2w4pS9n6c7hCRJkkrGjs07GHPFGN5/+n3OuOMMuv66KyFjl4esxAjv3AVzfgDVj4Guz0HNE5MLLOmAFWfrhRdKIogkSZLSa9PSTYy4aARrF6zlkv9eQrvr2xWdkLcFXr4Olo6Fxr2h4wNQ4fBkwkr6yorzNM7ewB+AI4CQ+ooxRv/lS5IklRErXl/ByEtGkvdJHlc9eRUtzm1RdMKmBYXbKmx5t/DevFa3ua2CVMYV5zLOPwKXxBgXpDuMJEmSDr6FExcyrv84qtatysDsgRxx4hFFJyweDa9cB1nVCi/bPPLsRHJKOriKU/ZWW/QkSZLKplf+9gpTvjuFBl9rwJUTr+Sw+od9PliQB7O/Dwvvhrqd4IzRULVhcmElHVTFKXuzQgijgMco+jTOR9OWSpIkSV9JQX4BTw15ilfveZVWl7Wi98O9qVB1l20Ttq2EGX1hzQw49tuFl25mVkwusKSDrjhl73BgK9B9l3MRsOxJkiSVQrk5uYwbMI53J75Lx1s7ct4fzyMjM+PzCR9PLyx6eZuh0whodmVyYSWlTXGexnltSQSRJEnSV7dlxRZGXDyC1W+upsc/etDhmx0+H4yx8JLN2bfDYS2g6zNQs3VyYSWlVXGexvkAhSt5RcQYr0tLIkmSJB2Q1XNXM+KiEWzfuJ0rJ15Jyx4tPx/M2wKv3ABLRkOjXoXbKlSskVxYSWlXnMs4J+3yujLQC1iRnjiSJEk6EIueWsSYK8ZQ6fBKXDv9Wuq3rf/54KYFMP1y2LIQ2v4Bjr/dbRWkQ0BxLuMct+txCGEkMCNtiSRJkrRfZv17FpNvnswRrY9gwKQBHN5ol+2Ql4yFl6+FzCpwzjNQv2tyQSWVqOKs7H1RSwo3WJckSVKCYkHkmR88w8w7Z9KyR0suf+RyKlWvVDhYsBPm/BDe+TPU6QhnjoGqjZINLKlEFeeevS0UvWdvFfCDtCWSJEnSPuVty2P8wPEsGLeADjd34IK7LyAjK/XEzW2r4KV+8PGL0PJmaHeX2ypIh6C9lr0QQgBOjDEuKaE8kiRJ2oec1Tk8cukjLH91Od3v6k7H73YkfHoP3scz4KW+kLsRTn8Iml+VbFhJidlr2YsxxhDCE8BJJZRHkiRJe7Hm7TWMuGgEOatz6PdoP1pd1qpwIEZY+DeY/T2o1gzOfwpq+iucdCgrzj17b4QQOsQYX0t7GkmSJO3Rh1M/ZFTvUWRVzuLaF6+lQfsGhQN5OaltFUZBo0uh4zC3VZBUrLJ3GnBVCGEx8AkQKFz0a5PWZJIkSfrMnKFzmPj1idQ5rg4DnhhAzaY1Cwc2L4TpvWHzO3Dyb+GEH0DISDaspFKhOGXv/LSnkCRJ0m7FGJn202lM//V0WpzbgivGXkHlGpULB5eMS22rUAnOeRrqd0s2rKRSpTj77C0uiSCSJEkqauf2nUy4bgJvjXyLU64/hYvuvYjMCpmF2yq8eQcsuBPqnAZnjIFqjZOOK6mUOZB99iRJkpRmW9duZVSvUSyZsYRuv+tG5x90Lnzi5rbV8FJ/+Ph5aPnN1LYKlZKOK6kUsuxJkiSVMuveW8eIHiPYtHQTfUb14cS+JxYOrMmGGVdA7gY4fTg0H5hsUEmlmmVPkiSpFFk8fTGjLhtFyAgMnjqYxp0aF26r8O7f4Y1boVpT6D4Zap2cdFRJpZxlT5IkqZSYN2IeE66dQM3mNRnwxABqH10bdn4Cr3wdFo+EhpcUruhVrJl0VEllgGVPkiQpYTFGpv9mOtN+Mo2mXZrS79F+VKldBTa/m9pWYQGc/Bs44YduqyCp2Cx7kiRJCcrPzWfSTZOYM3QObQa24ZL7LiGrUhYsHQ8zB0NmRTh7Chx1XtJRJZUxlj1JkqSEbNuwjdGXj+ajaR/R5edd6PLTLoSYD3N+CG//AWp3gDPHQrUmSUeVVAZZ9iRJkhKw4cMNjOgxgvXvr6fXg71oc3Ub2P5x4bYKq6fBMTfB1/7qtgqSDphlT5IkqYQte3kZI3uOpGBnAQOfGUizLs1gzczUtgrroONQaDE46ZiSyjjLniRJUgl6e+zbjB84nuoNqjNg8gDqHlsHFv4dZt8KVRpB95lQq23SMSWVA5Y9SZKkEhBjJPvObJ79/rM07tSYfo/1o1otYOZA+OhhaHARdHoQKtZKOqqkcsKyJ0mSlGYFOwuYfMtkXv/365zY90QuG3YZWbkfwtOXw8a3oM2v4MQfua2CpIPKsidJkpRGOzbvYEzfMbz/1PuccccZdP11V8KKx2HmIAhZcPaT0OD8pGNKKocse5IkSWmyaekmRlw0grUL1nLJfy+h3bVtYO6P4O3fQ+32qW0VmiYdU1I5ZdmTJElKgxWvr2DkJSPJ+ySPq568ihZnVIdpF8Dq5+CYG1PbKlROOqakcsyyJ0mSdJAtnLiQcf3HUbVuVQZmD+SIIz+EKV1g+xo47X9w9LVJR5R0CPAuYEmSpIPolb+9wiOXPkK9E+pxw8vXc0TFcfDsmYX353XPtuhJKjGu7EmSJB0EBfkFPDXkKV6951VaXdaK3sMuoMJbN8NHD0KDHnD6g1CpdtIxJR1CLHuSJElfUW5OLuMGjOPdie/S8daOnPeT5mTMOAs2zoOTfgGtf+y2CpJKnGVPkiTpK9iyYgsjLxnJqjmr6PGPHnTouQqeObWw3J09GRpckHRESYcoy54kSVIx5KzKYUy/MSyZvgQiEKB+u/rkrMghd0suV07oR8vGD8KLv4Fa7eDMcXBYs6RjSzqEWfYkSZL2IWdVDnc3u5v8Hfmfn4yw6vVVAAyc3JMWlb4F85+Bo6+H9n93WwVJiUvbxeMhhMYhhGkhhLdDCPNDCN9Jna8dQngmhPBe6nut1PkQQvhbCGFRCGFuCKFdurJJkiTtjzH9xpC/Y+dux0Io4P1//xg+fhFOvQ9O+69FT1KpkM47hXcCt8UYTwA6AjeHEP5/e3ceH1V973/89c1sScg6YZUQCBAFBEQIihFURKgWlVq3q1drsa23i61rW/3VWltvb5W23mp396Wt27WttLUisokJW5BVdgw7SYDsJJkl8/39MQOEJSEomZkk7+fjkcecOec753xOPCa88/2e7xkGPADMsdbmAXMi7wGuAPIiX3cAv2/H2kRERETabOfCHYA54TZrE1gxZzhMKYTBX41uYSIirWi3sGet3Wut/SiyXAusB/oC04CXIs1eAr4QWZ4GvGzDFgMZxpg+7VWfiIiISFtZa1vd3lCXBN4xUapGRKRtojIHsDFmAHAusAToZa3dG9lUCvSKLPcFdjb72K7IOhEREZGYsCHLypdWkpTS0Gq7pJT6KFUkItJ27R72jDEpwFvA3dbamubbbPjPZK3/qez4/d1hjCk2xhTv27fvNFYqIiIicsTOop08O+5Z3v7y24ydvBSnK3DCdk5XgLGTl0W5OhGRk2vXsGeMcREOen+21v41srrs0PDMyGt5ZP1uoF+zj2dH1h3FWvu0tTbfWpvfo0eP9iteREREuqTqndW8dfNbPH/h89TuruULL15FwdQiMntVHBf4nK4Amb0qKJi6KEbVioi0rN0evWCMMcBzwHpr7RPNNs0EbgMei7y+3Wz9ncaY14Dzgepmwz1FRERE2lWgPkDhjEIKZxSChQkPTWD8bT7cG26BGj9feeQ5iv5VQPH7Y2moSyIppYH8y5ZRMLUIT864WJcvInIcc7Ibjj/1jo0ZDywE1gChyOr/R/i+vTeAHGA7cIO1tiISDn8DXA7UA9OttcWtHSM/P98WF7faRERERKRV1lrWvraW97/3PjW7ajj7hrO57IeDyTjwMOz+B6QMguEPwdKvQ8h3/A4SPDBtGyT1jnrtIiLGmOXW2vwTbWu3nj1r7Ye0NEcxTDpBewt8q73qERERETnW7mW7mXX3LHYW7aTP6D588eXP0T/jRVh7czjEjXoczroLHB7oczl8eCPsW0h4ygEDPSbA+NcV9EQkLrVb2BMRERGJV7V7apnz4BxWvbyKbr26cfUzV3LORStIWDsR9pbDwOlwzv8cHeKSesPkBbErWkTkFCnsiYiISJcRaAiw6IlFfPizDwkFQlz4/QuZ8DUnnk13QPFH0P0CuPifkDU21qWKiHxmCnsiIiLS6VlrWf/Wet67/z2qt1cz5JohTP7x2XhrfgpLXoPkbCj4M/S/CUxLd6GIiHQsCnsiIiLSqe1dsZdZd89i+wfb6TWyF9NmX05ur9dh3XTAwvCHYdj3wNkt1qWKiJxWCnsiIiLSKdWV1TH3obmseG4FyVnJTP39VEZP3kzCmilQvhNyboBzZ0C3/rEuVUSkXSjsiYiISKcS9AVZ8tQSPnj0A4INQcbdM46L70wnccvdsKQQMkdBwZ+g50WxLlVEpF0p7ImIiEinYK1l48yNvHffe1RureTMK89kyk9Hk9Xwc1j0PHi6w3lPw8DbIcER63JFRNqdwp6IiIh0eGVryph1zyxK5pTQY1gPbnnnRgYN+Ces+S9oaoAh98LwH4I7PdaliohEjcKeiIiIdFj1++uZ9/A8lv9xOZ50D1c8dTljrirHseZqWLEZzpgKo38JaWfFulQRkahT2BMREZEOpynQxLLfLmPBjxfgq/Ux9ltjueTePiR98n0oei8c7i55B864ItaliojEjMKeiIiIdCib39nMrHtncWDjAQZNGcTnZoyjR+DXsOi34EyB0f8LZ34LElyxLlVEJKYU9kRERKRD2Ld+H+/d+x5b3t2CN8/LTW/fQN6QeZg148BfCYPugJE/gcQesS5VRCQuKOyJiIhIXGuoaGD+j+ez7LfLcKe4mfLLKZx33UEca2+E4jXQ8xIY8yvIPCfWpYqIxBWFPREREYlLoWCI4j8WM//h+TRWNTL6a6OZ+P2BdNv5EHz4N+g2AMb/H/T7IhgT63JFROKOwp6IiIjEna2ztzLrnlns+3gfAyYO4PKfT6CX41lYci0YJ4z87/DjFJxJsS5VRCRuKeyJiIhI3Diw+QDv3fcem/6xicyBmdzw1vUMOWcZZvV4aNgLA26FUT+D5L6xLlVEJO4p7ImIiEjMNVY38sGjH7DkqSU4PU4mPTaJcbck4Fw7HZYshazzYMJfofu4WJcqItJhKOyJiIhIzISaQqx4bgVzH5pL/f56Rk0fxaQfUWa20AAAIABJREFUDCOl9FFY8Aok9YFxL0HuLWASYl2uiEiHorAnIiIiMbFt/jbevftdylaVkTM+h8v/eQl9kv8Cy24GG4RhD8LZD4IrNdalioh0SAp7IiIiElWVn1Qy+7uzWf/X9aTnpHPda9cybNwGzMpL4eC28Oya5/4cUgbGulQRkQ5NYU9ERESiwlfrY+H/LGTxE4tJcCYw8dGJXHB7Cq5134bC+ZAxAi6dA70vjXWpIiKdgsKeiIiItCsbsqx6eRVzHpxDXWkdI28dyaQfjSLtwAxY8DS4M2Hs72DQ1yBB/zQRETld9BNVRERE2s2Owh3MunsWe4r3kD0umxv/ei3ZmX+Hj6ZDsA7y7oQRPwKPN9alioh0Ogp7IiIictpV76jm/e+/z9rX1pLaN5Vr/nQNIy7ejVl5BZRsgN5TYMz/QvqwWJcqItJpKeyJiIjIaeM/6Kfw8UKKfl4EwEUPX8SFX++Fe+P34IN/QcpguPgfcMZUMCbG1YqIdG4KeyIiIvKZWWtZ85c1vP/996ndXcvw/xjOZY+eR3rNkzD/SXAkhWfYPPPb4PDEulwRkS5BYU9EREQ+k91Ld/PuXe+ya/Eu+ozpw3WvfpGcPrNh1Rjw7YdBt8PIn0JSr1iXKiLSpSjsiYiIyKdSs7uGOQ/OYfUrq0npncK0F6ZxzuXVmBXXwNKV0GM8jHkXvKNjXaqISJeksCciIiKnJNAQYNEvF/Hhzz4k1BRi/IPjGf+d/ni2/ADmvgHJ/eDC1yDnBt2XJyISQwp7IiIi0ibWWta9uY7Z35tN9fZqhl47lMk/u5DMhqdhwQzAwIhHYOh3wZkc63JFRLo8hT0RERE5qb0f7eXdu99lx8Id9DqnF194YRoDchfBynFQvwv6/weMehy65cS6VBERiVDYExERkRbVldYx5wdzWPnCSpK7J3PlH6/k3C+ESFh5CxQVQeZoKHgVeo6PdakiInIMhT0RERE5TtAXZPGvFrPwpwsJNga54L4LuOi+s0jc9mN4/wVI7AnnPwe5t0GCI9bliojICSjsiYiIyGHWWjb8fQOz759N5SeVnHnVmUyZcQlZoVfgg2sh5Avfkzf8IXClxbpcERFphcKeiIiIAFC2uox3736XbfO20ePsHtwy6z8ZNOxj+GgC1G2FvlfBub+EtLxYlyoiIm2gsCciItLFHdx3kHk/nMdHz3xEYkYiV/zmCvJvTCRh9Tfgg/chbShMnAV9psS6VBEROQUKeyIiIl1Uk7+Jpb9ZyoKfLMBf52fsnWO55IFzSNr9M5j9e3CmwpinIO/rkOCKdbkiInKKFPZEREQ6qbrSOt688U12LNwBFjCQMyGH6167jr3L9/Lefe9xYNMBBl8+mCm/mEQP11tQeAsEqmDw12HEjyGxe6xPQ0REPiWFPRERkU6orrSOXw34FU2+piMrLez4YAdP9H0CLGSdlcXN/7qZvNHbYfllUP0x9LoUxvwKMkbErngRETktFPZEREQ6oTdvfPPooNechYyBGXyj6HM41n4X5r4N3XJhwt8gexoYE91iRUSkXSjsiYiIdEI7Fu5odXv9nnIc7w0P34t3zs9gyN3gSIxSdSIiEg0KeyIiIp2Rjdyk14KAzwn9b4Jz/geSz4heXSIiEjUKeyIiIp1IxdYKFj2xCJc7QMDvbrGdyxOAC16MXmEiIhJ1CnsiIiKdwJ7iPRTOKGT9W+tJcCZw/pSlLJ11PsHA8Y9McLoCjJ2yNAZViohINCnsiYiIdFDWWrbO2krhjEK2zduGJ81DwXcLOP+bI3DPfpTNK/OoLPMeFficrgCZvSqYcPXCGFYuIiLRoLAnIiLSwTQFmvj49Y8pnFFI+ZpyUvumMvnnkxnzH+l4yp6FJddBkp+v/vg5Cv9ZQPH7Y2moSyIppYH8y5Zx4ZVFuBMDsT4NERFpZwp7IiIiHYS/zs9Hz37EoicWUbOzhh7DejDt+asYcfF2HNsegQ9mg3FCznVQvR531SomXjefidfNP35nPS6KdvkiIhJlCnsiIiJxrq6sjiVPLaH4d8U0VjXS/6L+TH1yPHmDZ2O2XgOLd0BSXxjxExj8NUjqDQ2l8PYACPmO32GCB8a/HvXzEBGR6FLYExERiVMHNh2g6JdFrHppFU3+JoZeM4SCr6WTnfZn2PFfsMYPvSbC6CfCD0NPaPZrPak3TNsGH94I+xYCkUcx9JgQDnpJvWN0ViIiEi0KeyIiInFm15JdFM0oYv3f1uNwOxj1peFccEMpWYHHoWIF1KTC4Dsg75uQPrTlHSX1hskLole4iIjEFYU9ERGROGBDls3vbKZwRiE7Fu4gMSORCfedzXlTFpNSfTuUV0L62TD2dzDgFnClxrpkERGJcwp7IiIiMdTkb2LNX9ZQ9PMi9q3bR3pOGp97JJvRY/+Ku+oB2O+EftdA3reg50VgTKxLFhGRDkJhT0REJAYaqxtZ/vRylvxqCbV7auk1IotrHndx9qBf4/CVgK8PjHgEBn0Nks+IdbkiItIBKeyJiIhEUe2eWhY/uZjlf1iOr8ZH7oQsrr6/lEG9f4axPki/GM58HLK/AAmuk+9QRESkBe0W9owxzwNXAuXW2uGRdV7gdWAAsA24wVpbaYwxwJPA54F64MvW2o/aqzYREZFo27d+H0W/KGL1K6uxTZZhlydR8LmFnNF9DjhTIPf28IQrGcNjXaqIiHQS7dmz9yLwG+DlZuseAOZYax8zxjwQef994AogL/J1PvD7yKuIiEiHtqNwB0Uzitg4cyPOJAdjrvFxwUV/JjNzG6QNhbxfw8AvgSst1qWKiEgn025hz1r7gTFmwDGrpwGXRJZfAuYTDnvTgJettRZYbIzJMMb0sdbuba/6RERE2osNWTbO3EjhjEJ2LdpFUqaTi2/bzdhxf6FbemN4iGbec+Fn5GnCFRERaSfRvmevV7MAVwr0iiz3BXY2a7crsk5hT0REOoxgY5DVf1pN0S+KOLDxABnZhivuWMKo89/HneGFwfeGn4+XnB3rUkVEpAuI2QQt1lprjLGn+jljzB3AHQA5OTmnvS4REZFT1VjVSPEfilny5BLqSuvoc5aPa7/zb4blryahdwHkvQz9vggOd6xLFRGRLiTaYa/s0PBMY0wfoDyyfjfQr1m77Mi641hrnwaeBsjPzz/lsCgiInK6VO+sZvGvFvPR0x/hr/MzaPQ+rpn+DrkjyzADb4W8lyBzZKzLFBGRLiraYW8mcBvwWOT17Wbr7zTGvEZ4YpZq3a8nIiLxqmxNGYt+sYg1f1mNtSGGF2yi4PJ59B6ZDnl3Q+5t4E6PdZkiItLFteejF14lPBlLd2PMLuBHhEPeG8aYrwDbgRsizd8h/NiFLYQfvTC9veoSERH5NKy1bF+wncIZhWz59xZciU2MvWwZ465YQsaoi+HMV6DXJE24IiIicaM9Z+O8qYVNk07Q1gLfaq9aREREPq1QU4gNf9tA4eMfsKe4jOT0RiZeV0T+lSUkj/oSDP4jdNM95CIiEn9iNkGLiIhIPAs0BFj10iqKZsyjsqQeb+8Kpk4v4pxrEnGN+Ab0uxYcnliXKSIi0iKFPRERkWYaKhpY9ptFLHmqiPoDTZwxcDeX3bOUIdeNJWHoM5A5KtYlioiItInCnoiICFC1rYpFM95jxQvrCDQa8kZtouDebfS/5lrMoMfBnRnrEkVERE6Jwp6IiHRppSv2UvToX1k7cx+GECMuXEPBl930/Nzt0GcymIRYlygiIvKpKOyJiEiXY62lZNZaiv57JlsLg7gTfYz7/DrO/+ZQ0sf/L6QMiHWJIiIin5nCnoiIdBmhYIh1L75L0S8K2bvRTUpGLZOm7yH/rikkDn8YHImxLlFEROS0UdgTEZFOz19Tx8r/fYVFv99OVVkSWWfUctWDMPLbt+Dsc16syxMREWkXCnsiItJpHdy+hWWPvcLSP/toqE0i+6xaPvdQJmd95UeYpKxYlyciItKuFPZERKRzsZaKpf9m0WPvsvJfaQQDLs4qaKTge6PJufqHmnBFRES6DIU9ERHpHAI17Hn7eYqeXMO6wmxMQgYjrwxR8NDV9Mg/N9bViYiIRJ3CnoiIdGi2ci1bX3mWomdqKFnbH09yHwruyOD8//efpOb0jHV5IiIiMaOwJyIiHU8oQFPJ3/j4mbcoetVL2Y7epPZIZfIjQxhzzxfwpHliXaGIiEjMKeyJiEh8aCiFD2+EfQsBCxjoMQHGvw5JvcNt6vfgX/MMH/2xiEUzz6bmwDB6DDJMe+ZSRnzpAhxuRyzPQEREJK4o7ImISOw1lNL4ah5FM8eyfO791Ncmk5xaz5hLiynYmUfiZS9Tt/INlr6wn2Wzx9B4sID+56cw9YWp5E09C5NgYn0GIiIiccdYa2Ndw6eWn59vi4uLY12GiIh8RvVvXcoLdw2lqjyTYMB1eL3TFSDNW0O/s7azdtFImoIOhl6VQ8GDU8gelx3DikVEROKDMWa5tTb/RNvUsyciIjFX9LI5LugBBAMuKsq8VJZnMPqro7ng/glknann44mIiLSFwp6IiMROKAAHilkxb8xxQe8IQ2I3H1c+/YWoliYiItLRKeyJiEj0hJqgaiWUzoWyedRu+oiSVb2or72m1Y81HEyKUoEiIiKdh8KeiIi0HxuC6o+hbB6UzaXxk8VsW53JJx/nUrJ+CPt3ng+Ay+0n4He3uBuXOxCtikVERDoNhT0RETl9rIXazVA2F8rmEtixkJ1rk/lkbS4lG4ayd+sobMjgSnaSM6E/o76dy8BJA3nv7r+zc9EemoLH/1pyOIP0PU+TsYiIiJwqhT0REfls6rZFwt08QnvmsedjE+m5G8rOTXfQ5E8gwWnIHtePi27OJXdSLtnnZx/1TLxr37iVJwc8QUIoSCh0ZH1CQhPG4eLaN26NwYmJiIh0bAp7IiJyaur3HB6WaUvnUb7hICUfD6Rk/RC2rb8df304rPUe1Zvzvh0Od/0n9Med0vIwzZTeKdy17V7evPFNdizccfiZ6tnjc7n+9etJ6Z0SpZMTERHpPBT2RESkdY37oHx+eFKV8nlUbimLhLuzKFl3Mwcrw7Noegd7GXFrONzlTswluXvyKR0mpXcK0xdMb4cTEBER6ZoU9kRE5Gj+KihfcLj37uD2TyhZl8sn686kZP01VO1NBMLhbODnw+Fu4KSBpOekx7hwERERaU5hT0SkqwvUwb4PD99359u9lu3rc/hkXR4lGy6nvKQbAJ50D7kTc7lgUi65l+bSfWh3jDExLl5ERERaorAnItLVBBtg/6LD4S5YupydG/tQsn4wJRsnsHvjVGyTwZnoJGd8DiPuCPfe9RndhwRHQqyrFxERkTZS2BMR6eya/HBg6eFhmaHyxezd6qVk3SBKNo5mx7rJBH0G4zD0HduX8Q+Ew12/C/rhTNSvCRERkY5Kv8VFRDqbUBAqV4R77krnYss/ZP/O5PCkKpvOZdvai2msDQ+/7Dm8J2O+Hg53Ay4egCfNE+PiRURE5HRR2BMR6ehsCKrWHB6WSfkCqkuhZO1ASjafS8na+6jdF34cQsaADIbeEJkx89JcUnrpkQYiIiKdlcKeiEhHYy3UbDg8LJPy+dTvr2fbulw+2TSKko+/Q8XO8I/35B7J5F56ZMbMzIGZMS5eREREokVhT0Qk3lkLdZ8cCXdl8/BXVbB9Qw4lm86hZN0dlG7xgAV3qpsBFw9g7L3hgNfz7J6YBM2YKSIi0hUp7ImIxKP6XYcfYk7pXJpqdrNrS19KNo6kZMOX2LU+mVAAHG4H/Qr6MfFL4XB3Rv4ZOFyOWFcvIiIicUBhT0QkHjSUQfn8I5Oq1GyldEcvSjacTcmm69m+JpVAPWDgjDFncEGk5y7nwhxcya5YVy8iIiJxSGFPRCQWfBVQvuDwsExb9TEVZV5K1g+jZPNUSlZ5aaiyAHQf0p1R0yMzZl4ygKTMpBgXLyIiIh2Bwp6IyKfVUAof3gj7FgIWMNBjAox/HZJ6H902UAPlC4/cd1e5ktrKFErWn0XJlov5ZNX11JSGm6b1S+OsLwxkwKUDyL00l7S+adE+MxEREekEFPZERD6NhlIaX82jaOZYls+9n/raZJJT6xlzaTEFO/NIvH41HNwavu+ubC5UFNNQ52LbhjxKtoyjZM009peEd5XkTTo8Y2bupFy8g70Yo0lVRERE5LNR2BMR+RTq37mZFx7+ElXlmQQD4Xvm6mu7seidAjYUD+ErDCHBWHZsHkDJ1gJK1n6OvRsSsCFwJbvof1F/zv1mONz1Pqe3ZswUERGR005hT0TkVISa4OA2il42RwW9Q4IBFwf2duf33/sGdbVemvyWBGcC2eOyueiH4XCXfX42DrdmzBQREZH2pbAnInIsa8F3AGo3Qs1GqN0Ufq3ZCHVbIeRnxbzvHhf0DgmFHNRUpjHunnHkTsql/4T+uFPcUT4JERER6eoU9kSk62pqhNotkUC3EWo2HVn2VxIMOKjal8mBsp5UVOZxYN+lVJZdx4Fdbuprg63u2toEpvxiSpROREREROR4Cnsi0rnZUPgB5c175w4tH9xOUzCByvIMKkqzOFCRS8X+EVSUTaRidyLVey02dGRXiZmJZOVlkXOJlw1vLifgb7m3zuUOROHkRERERFqmsCcinYO/+uggd6inrnYTTT4/VfsyOFDqpaK8DxUVuVSUj+TA7mSq95qjA11GIt48L/0uzmLk4Eyy8rLw5nnxDvaSnJV8uN1LO8rYuWgPTcHjf4w6nEH6npcdjbMWERERaZHCnoh0HKEA1H1y/H10tZtoqttH1f5ID11pFhUVA6gon0jFnqupKnVgm47sxpPuISsvi+wJXkbmefHmecOhbrCXpKykNj324No3buXJAU+QEAoSCh2ZbCUhoQnjcHHtG7e2x3dAREREpM0U9kQkvlgLjaWRXrlImIssh6pLqCpPC/fQlXmp2JdNxb5zOLD3IqpKXUcHujQP3jwvZ4z3MrxZmPPmeUnunvyZn2OX0juFu7bdy5s3vsmOhTsOP1M9e3wu179+PSm9Uz7b90FERETkM1LYE5HYCB48PMyyeQ9dqGozVXsTqCjNoqLMG54cZX82FXtHUFXmJhQ8EtLcKe5woLswi+GRHjrv4HCwS+7x2QPdyaT0TmH6guntegwRERGRT0thT0TaT6gJ6rcf1TtHzUZCVZup3lkX7qEr9VJRlkXFvr5UlE2isnTq8YFusJfeBV6GHdND161nt3YPdCIiIiIdlcKeiHx2vgPHzXQZqtpE9bZyKvamRu6j81JR3ouK8tFUlk44KtC5urnwDvbSa1wWQyO9c4fuo+vWS4FORERE5NNQ2BPpIupKtjL7Kz9h87LeNNQlkZTSQN7YUiY/9zApuYNOvoOmRqjdetSDxkOVG6kp2UPFLkekly6LirLuVJQPoLJsBE2BhMMfdyU78Q7Oouf5XoY0G27pzfOS0jtFgU5ERETkNFPYE+kC6kq28sIFT1JT0Y9gwAVAQ10y6z7sx64LnmT6orvCgc/a455JZ6s3UVOyiwMlDVTszeRAqZfKMi8HyntRWTb5qEDnTHLgHZxFj/OyOOuYHrqUPgp0IiIiItGksCfSBcz+yk+OCnqHBAMuairSKLzz24y7sYqKzZUc2NMtPNPloV66srE0BcYd/owzMQHvoEy6j+3Bmc3CnDfPS2qfVEyCAp2IiIhIPFDYE+moQgHwVx7/5asAfyWhhkr81dX4qmvYvGzgcUHvkGDAxeJ3zmPxO0dCmsNj8A7MwDumJ4ObhTnvYC9pfdMU6EREREQ6AIU9kVgKBcFfBf4KbGMF/sr9+Ksq8VVW4q+uwVdVg7/mIL6aBvw1jfjq/Phrg/gONuGvT8Df4MbX6MHf6MbfGFmOrAv604H0yIHsSUuZ+oeph2e6TMtWoBMRERHp6BT2JG595glF2pm1lkB9AH91A77K/fgrDuCrrMBfWYWvugZ/dR3+2khQq/Xhqw3grwviPxjCdxD89Qn4GpzhkNbgIeBzn+Ao3SJfR3MnW9zdDJ5uCbhTXHi8btLSEnGnJuFOT8GTloI7PQlPqgd3qps5975BQ11yi+eSlFJP/n/ln75vjoiIiIjEnMKexKU2TyhyCqy1BBuD+Gv9+Ov8+Gp9+Gsjr9WN+Kqq8FdV46uuxV9dh6+mHn9tYySoBfHXN+GvC+GrN+FetUYHNpRw8gPjweVJwJ3kxpMcwt0N3MmGlDOceFNduFM9eFITcacl40lPwZ2RhicjHXdGJp6MFNwpbtyp7sPBzd3Nfcq9bjtefYp1Hx5/zx6A0xUgb2zZKe1PREREROKfwt5p4q+soPDBJyn+i4/62kSSUxvJv9nDhT+7C3emN9bldTitTShStT+d/7vql5xz39fw1/mPBLYaH/6aOnzVB/HXNOCrbcRfG8BXF8B/MIS/3h71bLfWOF0B3Il+3Il+PEk+3EkBkpMtGZngTknAk+LEneLGk5aIOy0Jd2q3I0EtMxN3phdPZhburJ6401NJcLQlFLafyc89zK4LnqSmIu2o76nTFSDNW8Pk5x6OYXUiIiIi0h6MtSe/lydajDGXA08CDuBZa+1jrbXPz8+3xcXFUamtNf7KCp4d9d9U7k0+7h/SmX3q+erKh+I68FlrCQVCBH1BmnxNx702+Y9f1+qrv+nIcmOQJl+AYGMg/OoL0uQLEmyMtPc3HWnvDx1+dbsOtjrs8FgJjqZwKEv040n04U6KvB4Ka4l+3MlB3N0ceFJckZ6yQ71p3XCnp+LJyMCdmYE7IwtHNy+4MyNfXnAmteN/geg4Miy2Fw0Hk0jq1kDe2LK4GRYrIiIiIqfOGLPcWnvC+3HiJuwZYxzAJmAysAtYBtxkrV3X0mfiJezN+/qPKHq+qcUhcgW3O5j4hx8DJwlWjUGCjX6aGn00+Xzh5QYfQV+ApkZ/JDAdeg1G1gePClhN/iBBXyQ0+UI0+UPhEOUPEfRZmvyWoB+aApamAOFl/2mciMNYnM4gDldTi68O54nWBXE2ey38x4VAK3UZy10vLMCdnoI7PR1nSkazcBb58niPfu9IBj3nTUREREQ6kdbCXjwN4zwP2GKt/QTAGPMaMA1oMezFi+K/+AgGTtzzEwy4+ODpEEteeYhgIIGmgOP0HfgEwepEQSrRGcSR1IQz9Zhg5bI43RaH0+Jwg9MFDrfF6Tbh926Dw21wuhNweCKvbgdOTwIOTwIOlwNnogOHx4nT48ThcZLgcmEcLkhwgfGEXxNcYFxHlpu/N86j10e2fTT4ndYnFOlWT8Zt807f91JEREREpJOJp7DXF9jZ7P0u4PxjGxlj7gDuAMjJyYlOZSdRX5vYegNrGPX5qkhwMjg8CZHgZHC6HeH3HgcOj+NwaHK4nTgTnUeCVKILp8cdfk1040j0RIKV+8Sh6dhw1byNcUHCaQyd7SBv7POaUERERERE5DOIp7DXJtbap4GnITyMM8blAJCc2kh9bcv3dCWnNXD5m7+JYkUdnyYUERERERH5bGI7ReDRdgP9mr3PjqyLe/k3e3C6Aifc5nQFyL/pJD1/cpyU3EFMX3QXw8bvJCnlIJgQSSkHGTZ+56d67IKIiIiISFcTTxO0OAlP0DKJcMhbBtxsrf24pc/EywQtHX02ThERERER6Zham6Albnr2rLVB4E5gFrAeeKO1oBdP3JlevrryIQpud5CcVo8xIZLT6im43aGgJyIiIiIiMRE3PXufRrz07ImIiIiIiMRCh+jZExERERERkdNHYU9ERERERKQTUtgTERERERHphBT2REREREREOiGFPRERERERkU5IYU9ERERERKQTUtgTERERERHphBT2REREREREOiGFPRERERERkU5IYU9ERERERKQTUtgTERERERHphBT2REREREREOiGFPRERERERkU5IYU9ERERERKQTMtbaWNfwqRlj9gHbY12HREV3YH+sixBpha5RiXe6RiXe6RqVeBev12h/a22PE23o0GFPug5jTLG1Nj/WdYi0RNeoxDtdoxLvdI1KvOuI16iGcYqIiIiIiHRCCnsiIiIiIiKdkMKedBRPx7oAkZPQNSrxTteoxDtdoxLvOtw1qnv2REREREREOiH17ImIiIiIiHRCCnsSN4wxlxtjNhpjthhjHjjB9nuNMeuMMauNMXOMMf1jUad0bSe7Tpu1u9YYY40xHWrWLun42nKNGmNuiPw8/dgY85do1yhdWxt+3+cYY+YZY1ZEfud/PhZ1StdkjHneGFNujFnbwnZjjHkqcv2uNsaMjnaNp0JhT+KCMcYB/Ba4AhgG3GSMGXZMsxVAvrV2JPB/wIzoVildXRuvU4wxqcBdwJLoVihdXVuuUWNMHvAgcKG19mzg7qgXKl1WG3+OPgS8Ya09F/gP4HfRrVK6uBeBy1vZfgWQF/m6A/h9FGr61BT2JF6cB2yx1n5irfUDrwHTmjew1s6z1tZH3i4GsqNco8hJr9OIR4HHgcZoFidC267RrwG/tdZWAlhry6Nco3RtbblGLZAWWU4H9kSxPunirLUfABWtNJkGvGzDFgMZxpg+0anu1CnsSbzoC+xs9n5XZF1LvgL8u10rEjneSa/TyHCOftbaf0WzMJGItvwsPRM40xhTaIxZbIxp7S/YIqdbW67RR4BbjDG7gHeAb0enNJE2OdV/s8aUM9YFiJwqY8wtQD5wcaxrEWnOGJMAPAF8OcaliLTGSXj40SWER0h8YIwZYa2timlVIkfcBLxorf2lMeYC4BVjzHBrbSjWhYl0NOrZk3ixG+jX7H12ZN1RjDGXAT8ArrbW+qJUm8ghJ7tOU4HhwHxjzDZgHDBTk7RIFLXlZ+kuYKa1NmCtLQE2EQ5/ItHQlmv0K8AbANbaRUAi0D0q1YmcXJv+zRovFPYkXiwD8owxucYYN+Ebsmc2b2CMORf4I+Ggp3tMJBZavU6ttdXW2u7W2gHW2gGE7y292lpbHJtypQuxQmdWAAAFbUlEQVQ66c9S4O+Ee/UwxnQnPKzzk2gWKV1aW67RHcAkAGPMUMJhb19UqxRp2UzgS5FZOccB1dbavbEuqiUaxilxwVobNMbcCcwCHMDz1tqPjTE/AYqttTOBnwMpwJvGGIAd1tqrY1a0dDltvE5FYqaN1+gsYIoxZh3QBHzXWnsgdlVLV9LGa/Q+4BljzD2EJ2v5srXWxq5q6UqMMa8S/oNY98h9oz8CXADW2j8Qvo/088AWoB6YHptK28bo/x0REREREZHOR8M4RUREREREOiGFPRERERERkU5IYU9ERERERKQTUtgTERERERHphBT2REREREREOiGFPRERaXfGmO8YY9YbY/58Gvb1ZWPMGc3eP2uMGfZZ99vK8TzGmPeNMSuNMTeepJZtkWfXnY7jvmiMue4kbY46/mk67ihjzOdP5z5FRCQ29Jw9ERGJhm8Cl1lrdzVfaYxxWmuDp7ivLwNrgT0A1tqvnpYKW3Zu5DijTlZLDLTH8UcB+YSfJSUiIh2YevZERKRdGWP+AAwE/m2MuccY84gx5hVjTCHwijFmgDFmoTHmo8hXQbPPft8Ys8YYs8oY81ikpysf+HOkpy3JGDPfGJMfaX9TpP1aY8zjzfZTZ4z5aWQ/i40xvU5Qp9cY83djzOpIm5HGmJ7An4CxkeMNatb+uFoim74dOY81xpghkbbdjDHPG2OWGmNWGGOmneD4xhjzG2PMRmPM+0DPZtseNsYsi5zX05G2J/peHNcu8vnvGGPWRc7ttZZqMsa4gZ8AN56oJ1NERDoWPVRdRETanTFmG5Bvrd1vjHkEuAoYb61tMMYkAyFrbaMxJg941Vqbb4y5Avgh4R7BemOM11pbYYyZD9xvrS2O7Hs+cD/h3q3FwBigEngPeMpa+3djjAWuttb+wxgzA6ix1v73MTX+Gthvrf2xMeZS4Alr7ShjzCWR4115gvM6tpZtwC+ttb82xnwTGG2t/aox5n+AddbaPxljMoClwLnW2oPN9vVF4BvA5UAvYB3wVWvt/x0690i7V4A3Iudy7PFbarcHyLXW+owxGdbaqpZqAq6P/Le6s+3/hUVEJB6pZ09ERGJhprW2IbLsAp4xxqwB3gQO3X93GfCCtbYe4FCIacVYYL61dl9kaOifgYsi2/zAPyPLy4EBJ/j8eOCVyLHmAlnGmLRTPTHgryc4zhTgAWPMSmA+kAjkHPO5iwgH3SZr7R5gbrNtE40xSyLfo0uBs1s4dkvtVhPuAbwFODRsti01iYhIB6Z79kREJBYONlu+BygDziH8R8jGdjhewB4ZytJE+/7+853gOAa41lq78VR3ZoxJBH5HuLdtZ6RnNPEU200lHCavAn5gjBnRUk3GmPNPtUYREYlP6tkTEZFYSwf2WmtDwK2AI7J+NjA9MswTY4w3sr4WSD3BfpYCFxtjuhtjHMBNwIJTqGMh8J+RY11CeEhnzUk+01Itx5pF+F6+Q/fQnXuCNh8QvlfOYYzpA0yMrD8U2PYbY1KA5jN0Nj/+CdsZYxKAftbaecD3CX+/U1qpqa3nJCIicU5hT0REYu13wG3GmFXAECK9ftbad4GZQHFkqOH9kfYvAn84ZlIUrLV7gQeAecAqYLm19u1TqOMRYIwxZjXwGHBbGz5zwlpO4FHCw1VXG2M+jrw/1t+AzYTv1XsZWARgra0CniE86+YsYNmJjk+4R/FE7RzAnyJDO1cQvo+xqpWa5gHDNEGLiEjHpwlaREREREREOiH17ImIiIiIiHRCCnsiIiIiIiKdkMKeiIiIiIhIJ6SwJyIiIiIi0gkp7ImIiIiIiHRCCnsiIiIiIiKdkMKeiIiIiIhIJ6SwJyIiIiIi0gn9f4h8kfduiiwdAAAAAElFTkSuQmCC\n",
150 | "text/plain": [
151 | ""
152 | ]
153 | },
154 | "metadata": {
155 | "needs_background": "light"
156 | },
157 | "output_type": "display_data"
158 | }
159 | ],
160 | "source": [
161 | "import matplotlib.pyplot as plt\n",
162 | "\n",
163 | "class Result:\n",
164 | " def __init__(self, values = None, color = 'red', name = ''):\n",
165 | " if values is not None:\n",
166 | " self.values = values\n",
167 | " self.color = color\n",
168 | " self.name = name\n",
169 | "\n",
170 | "x = quantiles\n",
171 | "y = Result(results_wlrdf, 'orange', 'WL RDF')\n",
172 | "y1 = Result(results_wl, 'purple', 'WL')\n",
173 | "n = len(x)\n",
174 | "\n",
175 | "fig, ax = plt.subplots(figsize=(15, 8))\n",
176 | "for i in range(n - 1):\n",
177 | " plt.plot(x[i: i+2], y.values[i: i+2],\n",
178 | " 'o-', color=y.color, markersize=8)\n",
179 | " plt.plot(x[i: i+2], y1.values[i: i+2],\n",
180 | " 'o-', color= y1.color, markersize=8)\n",
181 | "\n",
182 | "ax.xaxis.label.set_text('fraction of the dataset')\n",
183 | "ax.yaxis.label.set_text('runnning time (s)')\n",
184 | "\n",
185 | "custom_lines = [plt.Line2D([0], [0], color=y.color, lw=4),\n",
186 | " plt.Line2D([0], [0], color=y1.color, lw=4)]\n",
187 | "ax.legend(custom_lines, [y.name, y1.name])\n",
188 | "plt.savefig('../results/lithogenesis_timing.png', format='png')"
189 | ]
190 | }
191 | ],
192 | "metadata": {
193 | "kernelspec": {
194 | "display_name": "Python 3",
195 | "language": "python",
196 | "name": "python3"
197 | },
198 | "language_info": {
199 | "codemirror_mode": {
200 | "name": "ipython",
201 | "version": 3
202 | },
203 | "file_extension": ".py",
204 | "mimetype": "text/x-python",
205 | "name": "python",
206 | "nbconvert_exporter": "python",
207 | "pygments_lexer": "ipython3",
208 | "version": "3.7.3"
209 | }
210 | },
211 | "nbformat": 4,
212 | "nbformat_minor": 2
213 | }
214 |
--------------------------------------------------------------------------------
/notebooks/no_labels_scores.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import sys\n",
10 | "sys.path.insert(0, '../')"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "from typing import Union\n",
20 | "from collections import Counter, OrderedDict\n",
21 | "import warnings\n",
22 | "\n",
23 | "import rdflib\n",
24 | "import numpy as np\n",
25 | "import pandas as pd\n",
26 | "from pprint import pprint\n",
27 | "from sklearn import svm\n",
28 | "from sklearn.model_selection import cross_validate\n",
29 | "\n",
30 | "import wlkernel"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 3,
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "warnings.simplefilter('ignore')"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 4,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "rdf_graph = rdflib.Graph().parse('../data/aifbfixed_complete.n3', format='n3')"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 5,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "triples = [\n",
58 | " (str(subj), str(pred), str(obj))\n",
59 | " for subj, pred, obj in rdf_graph\n",
60 | "]"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 6,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "instances_class_map = {\n",
70 | " subj: obj\n",
71 | " for subj, pred, obj in triples\n",
72 | " if 'affiliation' in pred\n",
73 | " and 'id5instance' not in obj\n",
74 | "}\n",
75 | "instances = list(instances_class_map.keys())\n",
76 | "y = list(instances_class_map.values())"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 7,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "triples = [\n",
86 | " (subj, pred, obj)\n",
87 | " for subj, pred, obj in triples\n",
88 | " if 'affiliation' not in pred\n",
89 | " and 'employs' not in pred\n",
90 | " and 'member' not in pred\n",
91 | " and 'head' not in pred\n",
92 | "]"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "### Weisfeiler-Lehman RDF"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 1,
105 | "metadata": {},
106 | "outputs": [
107 | {
108 | "ename": "NameError",
109 | "evalue": "name 'Union' is not defined",
110 | "output_type": "error",
111 | "traceback": [
112 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
113 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
114 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0mbananize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mwlkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWLRDFGraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwlkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWLGraph\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mwlkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWLRDFGraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwlkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWLGraph\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m'All the label in the WLRDFGraph are replaced with the same label'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'banana'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
115 | "\u001b[0;31mNameError\u001b[0m: name 'Union' is not defined"
116 | ]
117 | }
118 | ],
119 | "source": [
120 | "def bananize(g: Union[wlkernel.WLRDFGraph, wlkernel.WLGraph]) -> Union[wlkernel.WLRDFGraph, wlkernel.WLGraph]:\n",
121 | " 'All the label in the WLRDFGraph are replaced with the same label'\n",
122 | " for i in range(len(g.labels)):\n",
123 | " for k in g.labels[i].keys():\n",
124 | " g.labels[i][k] = 'banana'\n",
125 | " return g"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 9,
131 | "metadata": {},
132 | "outputs": [],
133 | "source": [
134 | "RANDOM_STATE = 42\n",
135 | "\n",
136 | "depth_values = [1, 2, 3]\n",
137 | "iteration_values = [0, 2, 4, 6]\n",
138 | "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
139 | "\n",
140 | "results = OrderedDict()\n",
141 | "\n",
142 | "for d in depth_values:\n",
143 | " for it in iteration_values:\n",
144 | " wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=d)\n",
145 | " bananize(wlrdf_graph)\n",
146 | " kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=it)\n",
147 | " kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
148 | " \n",
149 | " results[(d, it)] = [0, 0, 0]\n",
150 | " for c in C_values:\n",
151 | " classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
152 | " scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
153 | " \n",
154 | " acc_mean = scores['test_accuracy'].mean()\n",
155 | " f1_mean = scores['test_f1_macro'].mean()\n",
156 | " \n",
157 | " if acc_mean > results[(d, it)][0]:\n",
158 | " results[(d, it)] = [acc_mean, f1_mean, c]"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 10,
164 | "metadata": {},
165 | "outputs": [
166 | {
167 | "data": {
168 | "text/html": [
169 | "\n",
170 | "\n",
183 | "
\n",
184 | " \n",
185 | " \n",
186 | " | \n",
187 | " | \n",
188 | " accuracy | \n",
189 | " f1 | \n",
190 | " C | \n",
191 | "
\n",
192 | " \n",
193 | " depth | \n",
194 | " iterations | \n",
195 | " | \n",
196 | " | \n",
197 | " | \n",
198 | "
\n",
199 | " \n",
200 | " \n",
201 | " \n",
202 | " 1 | \n",
203 | " 0 | \n",
204 | " 0.524847 | \n",
205 | " 0.305547 | \n",
206 | " 100.0 | \n",
207 | "
\n",
208 | " \n",
209 | " 2 | \n",
210 | " 0.647536 | \n",
211 | " 0.566394 | \n",
212 | " 100.0 | \n",
213 | "
\n",
214 | " \n",
215 | " 4 | \n",
216 | " 0.670780 | \n",
217 | " 0.591060 | \n",
218 | " 100.0 | \n",
219 | "
\n",
220 | " \n",
221 | " 6 | \n",
222 | " 0.677030 | \n",
223 | " 0.594329 | \n",
224 | " 100.0 | \n",
225 | "
\n",
226 | " \n",
227 | " 2 | \n",
228 | " 0 | \n",
229 | " 0.565936 | \n",
230 | " 0.340732 | \n",
231 | " 10.0 | \n",
232 | "
\n",
233 | " \n",
234 | " 2 | \n",
235 | " 0.681422 | \n",
236 | " 0.622212 | \n",
237 | " 100.0 | \n",
238 | "
\n",
239 | " \n",
240 | " 4 | \n",
241 | " 0.740048 | \n",
242 | " 0.663960 | \n",
243 | " 100.0 | \n",
244 | "
\n",
245 | " \n",
246 | " 6 | \n",
247 | " 0.762597 | \n",
248 | " 0.688069 | \n",
249 | " 100.0 | \n",
250 | "
\n",
251 | " \n",
252 | " 3 | \n",
253 | " 0 | \n",
254 | " 0.407394 | \n",
255 | " 0.293320 | \n",
256 | " 1.0 | \n",
257 | "
\n",
258 | " \n",
259 | " 2 | \n",
260 | " 0.898914 | \n",
261 | " 0.861681 | \n",
262 | " 100.0 | \n",
263 | "
\n",
264 | " \n",
265 | " 4 | \n",
266 | " 0.892079 | \n",
267 | " 0.854304 | \n",
268 | " 100.0 | \n",
269 | "
\n",
270 | " \n",
271 | " 6 | \n",
272 | " 0.893066 | \n",
273 | " 0.851358 | \n",
274 | " 10.0 | \n",
275 | "
\n",
276 | " \n",
277 | "
\n",
278 | "
"
279 | ],
280 | "text/plain": [
281 | " accuracy f1 C\n",
282 | "depth iterations \n",
283 | "1 0 0.524847 0.305547 100.0\n",
284 | " 2 0.647536 0.566394 100.0\n",
285 | " 4 0.670780 0.591060 100.0\n",
286 | " 6 0.677030 0.594329 100.0\n",
287 | "2 0 0.565936 0.340732 10.0\n",
288 | " 2 0.681422 0.622212 100.0\n",
289 | " 4 0.740048 0.663960 100.0\n",
290 | " 6 0.762597 0.688069 100.0\n",
291 | "3 0 0.407394 0.293320 1.0\n",
292 | " 2 0.898914 0.861681 100.0\n",
293 | " 4 0.892079 0.854304 100.0\n",
294 | " 6 0.893066 0.851358 10.0"
295 | ]
296 | },
297 | "execution_count": 10,
298 | "metadata": {},
299 | "output_type": "execute_result"
300 | }
301 | ],
302 | "source": [
303 | "fn = 'wlrdf_no_labels'\n",
304 | "\n",
305 | "df_res = pd.DataFrame(index=list(results.keys()))\n",
306 | "df_res['accuracy'] = [t[0] for t in results.values()]\n",
307 | "df_res['f1'] = [t[1] for t in results.values()]\n",
308 | "df_res['C'] = [t[2] for t in results.values()]\n",
309 | "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
310 | "df_res.to_csv(f'../results/{fn}.csv')\n",
311 | "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
312 | "df_res_test.to_html(f'../results/{fn}.html')\n",
313 | "df_res_test"
314 | ]
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {},
319 | "source": [
320 | "### Weisfeiler-Lehman"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": 11,
326 | "metadata": {},
327 | "outputs": [],
328 | "source": [
329 | "RANDOM_STATE = 42\n",
330 | "\n",
331 | "depth_values = [1, 2, 3]\n",
332 | "iteration_values = [0, 2, 4, 6]\n",
333 | "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
334 | "\n",
335 | "results = OrderedDict()\n",
336 | "\n",
337 | "for d in depth_values:\n",
338 | " for it in iteration_values:\n",
339 | " wl_graphs = [bananize(\n",
340 | " wlkernel.WLGraph(triples, instance, max_depth=d)\n",
341 | " ) for instance in instances]\n",
342 | " kernel_matrix = wlkernel.wl_kernel_matrix(wl_graphs, iterations=it)\n",
343 | " kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
344 | " \n",
345 | " results[(d, it)] = [0, 0, 0]\n",
346 | " for c in C_values:\n",
347 | " classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
348 | " scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
349 | " \n",
350 | " acc_mean = scores['test_accuracy'].mean()\n",
351 | " f1_mean = scores['test_f1_macro'].mean()\n",
352 | " \n",
353 | " if acc_mean > results[(d, it)][0]:\n",
354 | " results[(d, it)] = [acc_mean, f1_mean, c]"
355 | ]
356 | },
357 | {
358 | "cell_type": "code",
359 | "execution_count": 12,
360 | "metadata": {},
361 | "outputs": [
362 | {
363 | "data": {
364 | "text/html": [
365 | "\n",
366 | "\n",
379 | "
\n",
380 | " \n",
381 | " \n",
382 | " | \n",
383 | " | \n",
384 | " accuracy | \n",
385 | " f1 | \n",
386 | " C | \n",
387 | "
\n",
388 | " \n",
389 | " depth | \n",
390 | " iterations | \n",
391 | " | \n",
392 | " | \n",
393 | " | \n",
394 | "
\n",
395 | " \n",
396 | " \n",
397 | " \n",
398 | " 1 | \n",
399 | " 0 | \n",
400 | " 0.322153 | \n",
401 | " 0.194477 | \n",
402 | " 100.0 | \n",
403 | "
\n",
404 | " \n",
405 | " 2 | \n",
406 | " 0.530111 | \n",
407 | " 0.348672 | \n",
408 | " 10.0 | \n",
409 | "
\n",
410 | " \n",
411 | " 4 | \n",
412 | " 0.530111 | \n",
413 | " 0.347049 | \n",
414 | " 10.0 | \n",
415 | "
\n",
416 | " \n",
417 | " 6 | \n",
418 | " 0.530111 | \n",
419 | " 0.347049 | \n",
420 | " 10.0 | \n",
421 | "
\n",
422 | " \n",
423 | " 2 | \n",
424 | " 0 | \n",
425 | " 0.564547 | \n",
426 | " 0.355253 | \n",
427 | " 10.0 | \n",
428 | "
\n",
429 | " \n",
430 | " 2 | \n",
431 | " 0.503724 | \n",
432 | " 0.343148 | \n",
433 | " 1.0 | \n",
434 | "
\n",
435 | " \n",
436 | " 4 | \n",
437 | " 0.481437 | \n",
438 | " 0.392366 | \n",
439 | " 100.0 | \n",
440 | "
\n",
441 | " \n",
442 | " 6 | \n",
443 | " 0.502999 | \n",
444 | " 0.383461 | \n",
445 | " 1.0 | \n",
446 | "
\n",
447 | " \n",
448 | " 3 | \n",
449 | " 0 | \n",
450 | " 0.491697 | \n",
451 | " 0.343404 | \n",
452 | " 100.0 | \n",
453 | "
\n",
454 | " \n",
455 | " 2 | \n",
456 | " 0.641333 | \n",
457 | " 0.527556 | \n",
458 | " 100.0 | \n",
459 | "
\n",
460 | " \n",
461 | " 4 | \n",
462 | " 0.724551 | \n",
463 | " 0.602677 | \n",
464 | " 10.0 | \n",
465 | "
\n",
466 | " \n",
467 | " 6 | \n",
468 | " 0.713474 | \n",
469 | " 0.557335 | \n",
470 | " 100.0 | \n",
471 | "
\n",
472 | " \n",
473 | "
\n",
474 | "
"
475 | ],
476 | "text/plain": [
477 | " accuracy f1 C\n",
478 | "depth iterations \n",
479 | "1 0 0.322153 0.194477 100.0\n",
480 | " 2 0.530111 0.348672 10.0\n",
481 | " 4 0.530111 0.347049 10.0\n",
482 | " 6 0.530111 0.347049 10.0\n",
483 | "2 0 0.564547 0.355253 10.0\n",
484 | " 2 0.503724 0.343148 1.0\n",
485 | " 4 0.481437 0.392366 100.0\n",
486 | " 6 0.502999 0.383461 1.0\n",
487 | "3 0 0.491697 0.343404 100.0\n",
488 | " 2 0.641333 0.527556 100.0\n",
489 | " 4 0.724551 0.602677 10.0\n",
490 | " 6 0.713474 0.557335 100.0"
491 | ]
492 | },
493 | "execution_count": 12,
494 | "metadata": {},
495 | "output_type": "execute_result"
496 | }
497 | ],
498 | "source": [
499 | "fn = 'wl_no_labels'\n",
500 | "\n",
501 | "df_res = pd.DataFrame(index=list(results.keys()))\n",
502 | "df_res['accuracy'] = [t[0] for t in results.values()]\n",
503 | "df_res['f1'] = [t[1] for t in results.values()]\n",
504 | "df_res['C'] = [t[2] for t in results.values()]\n",
505 | "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
506 | "df_res.to_csv(f'../results/{fn}.csv')\n",
507 | "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
508 | "df_res_test.to_html(f'../results/{fn}.html')\n",
509 | "df_res_test"
510 | ]
511 | }
512 | ],
513 | "metadata": {
514 | "kernelspec": {
515 | "display_name": "Python 3",
516 | "language": "python",
517 | "name": "python3"
518 | },
519 | "language_info": {
520 | "codemirror_mode": {
521 | "name": "ipython",
522 | "version": 3
523 | },
524 | "file_extension": ".py",
525 | "mimetype": "text/x-python",
526 | "name": "python",
527 | "nbconvert_exporter": "python",
528 | "pygments_lexer": "ipython3",
529 | "version": "3.7.3"
530 | }
531 | },
532 | "nbformat": 4,
533 | "nbformat_minor": 2
534 | }
535 |
--------------------------------------------------------------------------------
/presentation/img/07-Graph.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-Graph.pdf
--------------------------------------------------------------------------------
/presentation/img/07-almost_relabeled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-almost_relabeled.pdf
--------------------------------------------------------------------------------
/presentation/img/07-relabeled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-relabeled.pdf
--------------------------------------------------------------------------------
/presentation/img/07-relabeled_vertical.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-relabeled_vertical.pdf
--------------------------------------------------------------------------------
/presentation/img/07-subGraph_A1_B1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-subGraph_A1_B1.pdf
--------------------------------------------------------------------------------
/presentation/img/07-subGraph_A1_B1_vertical.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-subGraph_A1_B1_vertical.pdf
--------------------------------------------------------------------------------
/presentation/img/wl_iteration_total.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/wl_iteration_total.png
--------------------------------------------------------------------------------
/presentation/img/wl_iteration_upper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/wl_iteration_upper.png
--------------------------------------------------------------------------------
/presentation/presentation.nav:
--------------------------------------------------------------------------------
1 | \headcommand {\slideentry {0}{0}{1}{1/1}{}{0}}
2 | \headcommand {\beamer@framepages {1}{1}}
3 | \headcommand {\slideentry {0}{0}{2}{2/2}{}{0}}
4 | \headcommand {\beamer@framepages {2}{2}}
5 | \headcommand {\slideentry {0}{0}{3}{3/3}{}{0}}
6 | \headcommand {\beamer@framepages {3}{3}}
7 | \headcommand {\slideentry {0}{0}{4}{4/4}{}{0}}
8 | \headcommand {\beamer@framepages {4}{4}}
9 | \headcommand {\slideentry {0}{0}{5}{5/5}{}{0}}
10 | \headcommand {\beamer@framepages {5}{5}}
11 | \headcommand {\slideentry {0}{0}{6}{6/6}{}{0}}
12 | \headcommand {\beamer@framepages {6}{6}}
13 | \headcommand {\slideentry {0}{0}{7}{7/7}{}{0}}
14 | \headcommand {\beamer@framepages {7}{7}}
15 | \headcommand {\slideentry {0}{0}{8}{8/8}{}{0}}
16 | \headcommand {\beamer@framepages {8}{8}}
17 | \headcommand {\slideentry {0}{0}{9}{9/9}{}{0}}
18 | \headcommand {\beamer@framepages {9}{9}}
19 | \headcommand {\slideentry {0}{0}{10}{10/10}{}{0}}
20 | \headcommand {\beamer@framepages {10}{10}}
21 | \headcommand {\slideentry {0}{0}{11}{11/11}{}{0}}
22 | \headcommand {\beamer@framepages {11}{11}}
23 | \headcommand {\slideentry {0}{0}{12}{12/12}{}{0}}
24 | \headcommand {\beamer@framepages {12}{12}}
25 | \headcommand {\slideentry {0}{0}{13}{13/13}{}{0}}
26 | \headcommand {\beamer@framepages {13}{13}}
27 | \headcommand {\slideentry {0}{0}{14}{14/14}{}{0}}
28 | \headcommand {\beamer@framepages {14}{14}}
29 | \headcommand {\slideentry {0}{0}{15}{15/15}{}{0}}
30 | \headcommand {\beamer@framepages {15}{15}}
31 | \headcommand {\slideentry {0}{0}{16}{16/16}{}{0}}
32 | \headcommand {\beamer@framepages {16}{16}}
33 | \headcommand {\slideentry {0}{0}{17}{17/17}{}{0}}
34 | \headcommand {\beamer@framepages {17}{17}}
35 | \headcommand {\slideentry {0}{0}{18}{18/18}{}{0}}
36 | \headcommand {\beamer@framepages {18}{18}}
37 | \headcommand {\slideentry {0}{0}{19}{19/19}{}{0}}
38 | \headcommand {\beamer@framepages {19}{19}}
39 | \headcommand {\slideentry {0}{0}{20}{20/20}{}{0}}
40 | \headcommand {\beamer@framepages {20}{20}}
41 | \headcommand {\slideentry {0}{0}{21}{21/21}{}{0}}
42 | \headcommand {\beamer@framepages {21}{21}}
43 | \headcommand {\beamer@partpages {1}{21}}
44 | \headcommand {\beamer@subsectionpages {1}{21}}
45 | \headcommand {\beamer@sectionpages {1}{21}}
46 | \headcommand {\beamer@documentpages {21}}
47 | \headcommand {\gdef \inserttotalframenumber {21}}
48 |
--------------------------------------------------------------------------------
/presentation/presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/presentation.pdf
--------------------------------------------------------------------------------
/presentation/presentation.tex:
--------------------------------------------------------------------------------
1 | \documentclass{beamer}
2 |
3 | \mode {
4 |
5 | % The Beamer class comes with a number of default slide themes
6 | % which change the colors and layouts of slides. Below this is a list
7 | % of all the themes, uncomment each in turn to see what they look like.
8 |
9 | %\usetheme{default}
10 | %\usetheme{AnnArbor}
11 | %\usetheme{Antibes}
12 | %\usetheme{Bergen}
13 | %\usetheme{Berkeley}
14 | %\usetheme{Berlin}
15 | %\usetheme{Boadilla}
16 | %\usetheme{CambridgeUS}
17 | %\usetheme{Copenhagen}
18 | %\usetheme{Darmstadt}
19 | %\usetheme{Dresden}
20 | %\usetheme{Frankfurt}
21 | %\usetheme{Goettingen}
22 | %\usetheme{Hannover}
23 | %\usetheme{Ilmenau}
24 | %\usetheme{JuanLesPins}
25 | %\usetheme{Luebeck}
26 | \usetheme{Madrid}
27 | %\usetheme{Malmoe}
28 | %\usetheme{Marburg}
29 | %\usetheme{Montpellier}
30 | %\usetheme{PaloAlto}
31 | %\usetheme{Pittsburgh}
32 | %\usetheme{Rochester}
33 | %\usetheme{Singapore}
34 | %\usetheme{Szeged}
35 | %\usetheme{Warsaw}
36 |
37 | % As well as themes, the Beamer class has a number of color themes
38 | % for any slide theme. Uncomment each of these in turn to see how it
39 | % changes the colors of your current slide theme.
40 |
41 | %\usecolortheme{albatross}
42 | %\usecolortheme{beaver}
43 | %\usecolortheme{beetle}
44 | %\usecolortheme{crane}
45 | %\usecolortheme{dolphin}
46 | %\usecolortheme{dove}
47 | %\usecolortheme{fly}
48 | %\usecolortheme{lily}
49 | %\usecolortheme{orchid}
50 | %\usecolortheme{rose}
51 | %\usecolortheme{seagull}
52 | %\usecolortheme{seahorse}
53 | %\usecolortheme{whale}
54 | %\usecolortheme{wolverine}
55 |
56 | %\setbeamertemplate{footline} % To remove the footer line in all slides uncomment this line
57 | %\setbeamertemplate{footline}[page number] % To replace the footer line in all slides with a simple slide count uncomment this line
58 |
59 | %\setbeamertemplate{navigation symbols}{} % To remove the navigation symbols from the bottom of all slides uncomment this line
60 | }
61 |
62 | \usepackage{graphicx} % Allows including images
63 | \usepackage{booktabs} % Allows the use of \toprule, \midrule and \bottomrule in tables
64 | \usepackage[utf8]{inputenc}
65 | \usepackage{float}
66 | \usepackage{subcaption}
67 |
68 | %----------------------------------------------------------------------------------------
69 | % TITLE PAGE
70 | %----------------------------------------------------------------------------------------
71 |
72 | \title[A Fast Approximation of WL RDF kernel]{A Fast Approximation of the Weisfeiler-Lehman Graph Kernel for RDF Data}
73 | \subtitle{Advanced Algorithms and Graph Minings}
74 | \author{Lorenzo Palloni \and Emilio Cecchini}
75 | \institute[]{
76 | Università Degli Studi di Firenze \\
77 | \medskip
78 | \textit{lorenzo.palloni@stud.unifi.it \and emilio.cecchini@stud.unifi.it}
79 | }
80 | \date{\today}
81 |
82 | \begin{document}
83 |
84 | \begin{frame}
85 | \titlepage % Print the title page as the first slide
86 | \end{frame}
87 |
88 | %----------------------------------------------------------------------------------------
89 | % PRESENTATION SLIDES
90 | %----------------------------------------------------------------------------------------
91 |
92 | \begin{frame}
93 | \frametitle{Introduction}
94 |
95 | \begin{itemize}
96 | \item
97 | In order to apply machine learning algorithms on graphs it is necessary to develop algorithms to compute how similar two graphs are.
98 |
99 | \item
100 | Starting from the well-known Weisfeiler-Lehman isomorphism test, kernel methods to measure the similarity between graphs have been developed.
101 |
102 | \item
103 | This paper proposes a fast approximation of a Weisfeiler-Lehman kernel applied to RDF data.
104 |
105 |
106 | \end{itemize}
107 |
108 | \end{frame}
109 |
110 | \begin{frame}
111 | \frametitle{Graph Kernels}
112 |
113 | Kernel-based machine learning algorithms abandon the explicit vector representations of data items by means of the \textit{kernel function}.
114 |
115 | \begin{definition}[Graph Kernel]
116 | Let $\mathbb{G}$ be a non-empty set of graphs. Any function $k: \mathbb{G} \times \mathbb{G} \rightarrow \mathbb{R}$ that takes as input two graphs $G$ and $G^\prime$ and returns a real number that is equal to the scalar product between $G$ and $G^\prime$ in a (even unknown) feature space is a valid kernel function.
117 | \end{definition}
118 |
119 | \end{frame}
120 |
121 | %------------------------------------------------
122 |
123 | \begin{frame}
124 | \frametitle{Graphs isomorphism}
125 |
126 | \begin{itemize}
127 | \item
128 | Two graphs $G$ and $G^\prime$ are isomorphic if exists a bijective mapping between the labels of $G$ to the label of $G^\prime$
129 |
130 | \item
131 | The graph isomorphism problem is NP.
132 |
133 | \item
134 | The graph kernel introduced in this paper uses concepts from the \textit{Weisfeiler-Lehman test} of isomorphism.
135 | \end{itemize}
136 |
137 | \end{frame}
138 |
139 | %------------------------------------------------
140 |
141 | \begin{frame}
142 | \frametitle{Weisfeiler-Lehman test}
143 |
144 | \begin{itemize}
145 | \item
146 | Assume we are given two graphs $G$ and $G^\prime$ and we would like to test whether they are isomorphic.
147 | \item
148 | The Weisfeiler-Lehman test performs $h$ iterations.
149 | \item
150 | The key idea of the algorithm is to augment the node labels by the sorted set of node labels of neighbouring nodes, and compress these augmented labels into new, short labels.
151 | \item
152 | These steps are then repeated until the node label sets of $G$ and $G^\prime$ differ, or the number of iterations reaches $h$.
153 | \item
154 | The runtime complexity of the Weisfeiler-Lehman algorithm with h iterations is $O(hk)$, where $k$ is the number of labels in $G$ and $G^\prime$.
155 | \end{itemize}
156 |
157 | \end{frame}
158 |
159 | %------------------------------------------------
160 |
161 | \begin{frame}
162 | \frametitle{Weisfeiler-Lehman test}
163 |
164 | \begin{center}
165 | \begin{figure}
166 | \end{figure}
167 | \includegraphics[width=\textwidth]{img/wl_iteration_upper.png}
168 | \end{center}
169 |
170 | \end{frame}
171 |
172 | %------------------------------------------------
173 |
174 | \begingroup
175 | \small
176 | \begin{frame}
177 | \frametitle{Weisfeiler-Lehman kernel}
178 |
179 | \begin{definition}[Weisfeiler-Lehman kernel]
180 | Let $G_i = (V, E, \ell_i)$ and $G_i^\prime = (V^\prime , E^\prime , \ell_i)$ be the i-th iteration rewriting of the graphs $G$ and $G^\prime$ with the Weisfeiler-Lehman algorithm and h the number of iterations. Then the Weisfeiler-Lehman kernel is defined as:
181 |
182 | \begin{align}
183 | k_{\mathrm{WL}}^{h}\left(G, G^\prime\right)=\sum_{i=0}^h k_\delta\left(G_i, G_i^\prime\right)
184 | \end{align}
185 |
186 | where
187 |
188 | \begin{align}
189 | k_\delta\left((V, E, \ell),\left(V^\prime, E^\prime, \ell^\prime\right)\right)=\sum_{v \in V} \sum_{v^{\prime} \in V^{\prime}} \delta\left(\ell(v), \ell^{\prime}\left(v^{\prime}\right)\right)
190 | \end{align}
191 |
192 | Here $\delta$ is the Dirac kernel, which tests for equality, it is 1 if its arguments are equal, and 0 otherwise.
193 | \end{definition}
194 |
195 | \end{frame}
196 | \endgroup
197 |
198 | %------------------------------------------------
199 |
200 | \begin{frame}
201 | \frametitle{Weisfeiler-Lehman Subtree Kernel}
202 |
203 | \begin{center}
204 | \begin{figure}
205 | \end{figure}
206 | \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{img/wl_iteration_total.png}
207 | \end{center}
208 |
209 | \end{frame}
210 |
211 | %------------------------------------------------
212 |
213 | \begin{frame}
214 | \frametitle{The Resource Description Framework}
215 |
216 | \begin{itemize}
217 | \item
218 | The Resource Description Framework (RDF) is the foundation for knowledge representation on the semantic web.
219 |
220 | \item
221 | It is based on the idea of making statements about resources in a \textit{subject-predicate-object} form, called \textit{triples}
222 |
223 | \item
224 | A set of triples represents a graph, that has subjects and objects as nodes and predicates as edges (note that is a \textit{directed multigraph with labeled edges}).
225 | \end{itemize}
226 |
227 | \end{frame}
228 |
229 | %------------------------------------------------
230 |
231 | \begin{frame}
232 | \frametitle{The Resource Description Framework}
233 |
234 | \begin{center}
235 | \begin{figure}
236 | \includegraphics[scale=0.55,keepaspectratio]{img/07-Graph}
237 | \end{figure}
238 | \end{center}
239 |
240 | \end{frame}
241 |
242 | %------------------------------------------------
243 |
244 | \begin{frame}
245 | \frametitle{Fast Weisfeiler-Lehman for RDF}
246 |
247 | \begin{itemize}
248 | \item
249 | The most immediate approach to apply graph kernels to RDF is to extract subgraphs for the instances that we are interested in and to compute the kernel on these subgraphs.
250 |
251 | \item
252 | Potentially it can be more efficient to do the kernel computation directly on the larger underlying RDF graph, instead of extracting many subgraphs.
253 |
254 | \item
255 | This paper proposes an approximation of the Weisfeiler-Lehman kernel designed for RDF data.
256 | \end{itemize}
257 |
258 | \end{frame}
259 |
260 | %------------------------------------------------
261 |
262 | \begin{frame}
263 | \frametitle{Weisfeiler-Lehman RDF graph}
264 |
265 | \begin{definition}[Weisfeiler-Lehman RDF graph]
266 | A Weisfeiler-Lehman RDF graph is a graph $G = (V, E, \ell)$, where $V$ is a set of vertices, $E$ a set of directed edges, and $\ell:(V \cup E) \times N \rightarrow \Sigma$ a labeling function from vertices $V$ or edges $E$ and a depth index $j \in \mathbb{N}$ to a set of labels $\Sigma$.
267 | \end{definition}
268 |
269 | \begin{definition}[Neighborhood]
270 | The neighborhood $N(v) = \{(v^\prime, v) \in E\}$ of a vertex is the set of edges going to the vertex $v$ and the neighborhood $N((v, v^\prime)) = {v}$ of an edge is the vertex that the edge comes from.
271 | \end{definition}
272 |
273 | \end{frame}
274 |
275 |
276 |
277 | \begin{frame}
278 | \frametitle{Graph extraction from RDF}
279 |
280 | \begin{itemize}
281 | \item
282 | Given a set of RDF triples and a set of instances I, there is an algorithm to build a Weisfeiler-Lehman RDF graph.
283 |
284 | \item
285 | For each instance $i$ a subgraph up to depth $d$ is extracted from the RDF dataset and this subgraph is added to the total graph G that the algorithm is building. Thus, vertices and edges are only added if they have not been added to the graph already.
286 |
287 | \item
288 | Next to the graph G we also construct mappings $\mathcal{V}_i$ and $\mathcal{E}_i$ for each instance $i$, which records which vertices and edges belong to the subgraph of instance $i$ at which depth.
289 | \end{itemize}
290 |
291 | \end{frame}
292 |
293 |
294 | \begingroup
295 | \small
296 | \begin{frame}
297 | \frametitle{Graph extraction from RDF}
298 |
299 | Extraction of the instances A1 and B1.
300 |
301 | \begin{center}
302 | \begin{figure}
303 | \includegraphics[width=\textwidth,keepaspectratio]{img/07-subGraph_A1_B1}
304 | \end{figure}
305 | \end{center}
306 |
307 | \end{frame}
308 | \endgroup
309 |
310 |
311 | \begin{frame}
312 | \frametitle{Relabeling of the Weisfeiler-Lehman RDF graph}
313 |
314 | \begin{itemize}
315 | \item
316 | The relabeling process is quite similar to the standard one.
317 |
318 | \item
319 | It is extended to directed and labeled edges.
320 |
321 | \item
322 | The augmented labels are constructed taking into account the new definition of neighborhood and the depths.
323 | \end{itemize}
324 |
325 | \end{frame}
326 |
327 | %----------------------------------------------------------------------------
328 |
329 | \begingroup
330 | \small
331 | \begin{frame}
332 | \frametitle{Relabeling of the Weisfeiler-Lehman RDF graph}
333 |
334 | Label propagation.
335 |
336 | \begin{center}
337 | \begin{figure}
338 | \includegraphics[width=\textwidth,keepaspectratio]{img/07-almost_relabeled}
339 | \end{figure}
340 | \end{center}
341 |
342 | \end{frame}
343 | \endgroup
344 |
345 | %----------------------------------------------------------------------------
346 |
347 | \begingroup
348 | \small
349 | \begin{frame}
350 | \frametitle{Relabeling of the Weisfeiler-Lehman RDF graph}
351 |
352 | Relabeling.
353 |
354 | \begin{center}
355 | \begin{figure}
356 | \includegraphics[width=\textwidth,keepaspectratio]{img/07-relabeled}
357 | \end{figure}
358 | \end{center}
359 |
360 | \end{frame}
361 | \endgroup
362 |
363 | %----------------------------------------------------------------------------
364 |
365 | \begingroup
366 | \small
367 | \begin{frame}
368 | \frametitle{Weisfeiler-Lehman kernel for RDF}
369 |
370 | \begin{definition}[Weisfeiler-Lehman kernel for RDF]
371 | Let $G$ be a Weisfeiler-Lehman RDF graph and rewritten for h iterations, and $\ell_0$ to $\ell_h$ the resulting label functions. Then we compute a kernel between two instances $i$, $i^\prime \in I$, as:
372 |
373 | \begin{align}
374 | k_{\mathrm{WLRDF}}^{h}\left(i, i^{\prime}\right)=\sum_{n=0}^{h} \frac{n+1}{h+1} k_{\delta, \mathrm{RDF}}^{n}\left(\left(\mathcal{V}_{i}, \mathcal{E}_{i}\right),\left(\mathcal{V}_{i^{\prime}}, \mathcal{E}_{i^{\prime}}\right)\right)
375 | \end{align}
376 |
377 | where
378 |
379 | \begin{align}
380 | k_{\delta, \mathrm{RDF}}^{n}\left(\left(\mathcal{V}_{i}, \mathcal{E}_{i}\right),\left(\mathcal{V}_{i^{\prime}}, \mathcal{E}_{i^{\prime}}\right)\right) &=\sum_{(v, d) \in \mathcal{V}_{i}\left(v^{\prime}, d^{\prime}\right) \in \mathcal{V}_{i^{\prime}}} \delta\left(\ell_{n}(v, d), \ell_{n}\left(v^{\prime}, d^{\prime}\right)\right) \\
381 | &+\sum_{(e, d) \in \mathcal{E}_{i}\left(c^{\prime}, d^{\prime}\right) \in \mathcal{V}_{i^{\prime}}} \delta\left(\ell_{n}(e, d), \ell_{n}\left(e^{\prime}, d^{\prime}\right)\right)
382 | \end{align}
383 |
384 | \end{definition}
385 |
386 | \end{frame}
387 | \endgroup
388 |
389 | %------------------------------------------------
390 |
391 | \begingroup
392 | \footnotesize
393 | \begin{frame}
394 | \frametitle{Weisfeiler-Lehman kernel for RDF}
395 |
396 | \begin{exampleblock}{Example}
397 | \begin{align*}
398 | k_{\mathrm{WLRDF}}^h\left(A1, B1\right) &= \sum_{n=0}^h \frac{n+1}{h+1} k_{\delta, \mathrm{RDF}}^{n}\left(\left(\mathcal{V}_{A1}, \mathcal{E}_{A1}\right),\left(\mathcal{V}_{B1}, \mathcal{E}_{B1}\right)\right) = \dfrac{1}{2} \cdot 10 + \dfrac{2}{2} \cdot 3 = 8
399 | \end{align*}
400 | \end{exampleblock}
401 |
402 | \begin{figure}
403 | \centering
404 | \begin{subfigure}{.5\textwidth}
405 | \centering
406 | \includegraphics[width=0.9\linewidth]{img/07-subGraph_A1_B1_vertical}
407 | \end{subfigure}%
408 | \begin{subfigure}{.5\textwidth}
409 | \centering
410 | \includegraphics[width=0.9\linewidth]{img/07-relabeled_vertical}
411 | \end{subfigure}
412 | \end{figure}
413 |
414 | \end{frame}
415 | \endgroup
416 | %------------------------------------------------
417 |
418 | \begin{frame}
419 | \frametitle{Complexity}
420 |
421 | \begin{itemize}
422 | \item
423 | The complexity of the standard relabeling algorithm on a set of graphs is O$(Nh(n + m))$, where $N$ is the number of graphs, $h$ is the number of iterations and $n$ and $m$ are the number of vertices and edges per graph.
424 | \item
425 | This new relabeling method does not have $N$ graphs, but it introduces $d$ labels per vertex/edge, where $d$ is the extraction depth.
426 | \item
427 | If the WL RDF graph has $k$ nodes and edges the complexity of this new algorithm is $O(dhk)$
428 | \item
429 | The new proposed method is faster than the regular one if $hk < N(n+m)$
430 | \end{itemize}
431 |
432 | \end{frame}
433 |
434 | %------------------------------------------------
435 |
436 | \begingroup
437 | \footnotesize
438 | \begin{frame}
439 | \frametitle{References}
440 |
441 | \begin{thebibliography}{99} % Beamer does not support BibTeX so references must be inserted manually as below
442 |
443 | \bibitem{lamport94}
444 | Vries Gerben Klaas Dirk,
445 | A Fast Approximation of the Weisfeiler-Lehman Graph Kernel for RDF Data,
446 | 2013
447 |
448 | \bibitem{wl-kernels}
449 | Shervashidze, N., Schweitzer, P., van Leeuwen, E.J., Mehlhorn, K., Borgwardt, K.M.
450 | Weisfeiler-lehman graph kernels,
451 | 2011
452 | \end{thebibliography}
453 |
454 | \end{frame}
455 | \endgroup
456 |
457 | %------------------------------------------------
458 |
459 | \end{document}
460 |
--------------------------------------------------------------------------------
/report/RefereeReport.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/report/RefereeReport.pdf
--------------------------------------------------------------------------------
/report/RefereeReport.tex:
--------------------------------------------------------------------------------
1 | \documentclass[12pt]{scrartcl}
2 | \usepackage[utf8]{inputenc}
3 | \usepackage{hyperref}
4 | \usepackage{booktabs}
5 | \usepackage{caption}
6 | \usepackage{graphicx}
7 |
8 | \begin{document}
9 |
10 |
11 | \title{A Fast Approximation of the Weisfeiler-Lehman Graph Kernel for RDF Data}
12 | \subtitle{Referee report}
13 | \author{
14 | Emilio Cecchini \\ \href{mailto:emilio.cecchini@stud.unifi.it}{emilio.cecchini@stud.unifi.it}
15 | \and
16 | Lorenzo Palloni \\ \href{mailto:lorenzo.palloni@stud.unifi.it}{lorenzo.palloni@stud.unifi.it}
17 | }
18 |
19 | \maketitle
20 |
21 | \section{Summary}
22 |
23 | The goal of this paper is to introduce a faster version of the Weisfeiler-Lehman graph kernel algorithm when applied to Resource Description Framework (RDF) data.
24 |
25 | The \textit{Resource Description Framework} (RDF) is the foundation for knowledge representation on the semantic web. A resource is described by a set of \textit{triples} which are of the form \textit{subject-predicate-object}. The entire collection of triples form a graph where the subjects and the objects are the nodes and the predicates are the edges.
26 |
27 | The \textit{Weisfeiler-Lehman test} is an algorithm that is used to compute graph isomorphism. The test proceeds in iterations where the key idea is to augment the node labels by the sorted set of node labels of neighbouring nodes, and compress these augmented labels into new, short labels. These steps are then repeated until the node label sets of the two graphs differ, or the number of iterations reaches the prefixed maximum.
28 |
29 | The \textit{Weisfeiler-Lehman kernel} is the state-of-the-art for graph kernels. It computes the number of subtrees shared between two graphs by using the Weisfeiler-Lehman test of graph isomorphism.
30 |
31 | This paper introduces an approximation of the Weisfeiler-Lehman kernel, which first extracts a set of subgraphs from the entire RDF graph and then the kernels are computed. For each instance a subgraph up to a certain depth is extracted from the RDF dataset and this subgraph is added to a total graph that the extraction algorithm is building. Thus, vertices and edges are only added if they have not been added to the graph already. For each node and edge, together with their labels, their extraction depth is stored. The relabeling process is the same of the standard Weisfeiler-Lehman test with the extension of the labels on the edges. Finally the kernel is computed by counting the number of common labels at each depth.
32 |
33 | \section{Evaluation}
34 |
35 | In this paper there is no formal theorem or proof. The author states that this kernel yields an approximation of the standard Weisfeiler-Lehman graph kernel, but he never actually gives any formal proof of the accuracy of that approximation. The comparison of the results with the standard Weisfeiler-Lehman graph kernel can be found only in the experiments section. However there is a good explanation on how the complexity of the algorithm is improved with this approximation.
36 |
37 | In the first experiment, where a classification on the SWRC ontology \cite{swrc} is performed, the author specify that the \textit{affiliation} relation and its inverse (the \textit{employs} relation) were removed from the dataset for training purposes. We instead discovered that there are two other relationships that must be removed because they link the instances to their corresponding class, these relationships are \textit{member} and \textit{head}. The fact that these two predicates were not removed from the training dataset led to a higher accuracy than the real one.
38 |
39 | The plots of the runtime experiments are inverted: the lithogenesis dataset is about ten time larger than the affiliation dataset, but the reported runtimes of the lithogenesis classification are ten smaller than the runtimes of the affiliation prediction.
40 |
41 | This paper proposes a new method on computing graph kernels, but it is limited only to RDF data. This method exploits the fact that usually, in the RDF graphs, the extracted sugraphs share many nodes and edges. This fact limits the number of scenarios in which the method is applicable with good results.
42 |
43 | The algorithm described in this paper is an approximation of the Weisfeiler-Lehman graph kernel proposed in \cite{wl-kernels}. The approximation algorithm is very similar to the standard one described in \cite{wl-kernels}, the only difference is that the label expansion process is also extended to the edges and the concept of \textit{depth} is introduced in order to have bigger graphs without storing duplicated nodes or edges. This two simple modifications seem to lead to a faster version, but there is not much innovation in this new proposed method.
44 |
45 | The proposed kernel method is a tool to perform machine learning algorithms on RDF data. There is a small section in the paper where the author introduces the \textit{Resource Description Framework}, but it is never clearly explained what it means to perform a classification on that kind of data.
46 |
47 | During the extraction process of the subgraphs of the instances, the algorithm keeps track of the extraction depth to which each node and edge were extracted. In the paper there is confusion about the order of the index of the depth. In the pseudocode of the algorithm the depth is counted backward, that is the root has index equal to the maximum extraction depth while the leaves of the tree has depth equal to zero. While in the explanation of the algorithm the author describes the process with the indexes inverted.
48 |
49 | The datasets used in the experiments are still available online. There is a GitHub repository that contains the source code of the experiments but it is quite old and we were not able to compile and to run it.
50 |
51 |
52 | \section{Replication of the experiments}
53 |
54 | Since we were not able to compile and to run the experiments done by the author, we have implemented a small part of the experiments in order to assess the validity of the results in the paper. We have implemented the standard Weisfeiler-Lehman graph kernel and its approximation proposed in the paper. The source code for the kernels and the experiments are available online \footnote{https://github.com/deeplego/wl-graph-kernels}. We have perfomed a classification on the AIFB dataset \cite{swrc} and the 'Named Rock Units' dataset of the British Geological Survey. We have used the C-Support Vector Machine algorithm found in the scikit-learn Python package. We have tried to compute the accuracy of the classification with the same method described in the paper, that is a 10-fold cross-validation, however we slightly semplified the process of computing the accuracy of the model in relation to the C parameter of the SVM. We executed a 10-fold cross-validation for each value of C in $\{10^{-3}, 10^{-2}, 10^{-1}, 1, 10^1, 10^2, 10^3\}$ and then we took the best accuracy value. The results of the classifications are reported in the tables below.
55 |
56 | \newpage
57 |
58 | \begin{center}
59 | \captionof{table}{Affiliation prediction with the standard Weisfeiler-Lehman kernel}
60 | \begin{tabular}{ccccc}
61 | \toprule
62 | depth & iterations & accuracy & f1 & C \\
63 | \midrule
64 | 1 & 0 & 0.842337 & 0.772552 & 100.0 \\
65 | & 2 & 0.836782 & 0.755789 & 100.0 \\
66 | & 4 & 0.836782 & 0.755789 & 100.0 \\
67 | & 6 & 0.836782 & 0.755789 & 100.0 \\
68 | \hline
69 | 2 & 0 & 0.892516 & 0.836455 & 100.0 \\
70 | & 2 & 0.826180 & 0.742251 & 100.0 \\
71 | & 4 & 0.774069 & 0.618519 & 100.0 \\
72 | & 6 & 0.740048 & 0.568392 & 100.0 \\
73 | \hline
74 | 3 & 0 & 0.892591 & 0.850147 & 100.0 \\
75 | & 2 & 0.897779 & 0.848919 & 100.0 \\
76 | & 4 & 0.909258 & 0.860964 & 100.0 \\
77 | & 6 & 0.881044 & 0.796105 & 100.0 \\
78 | \bottomrule
79 | \end{tabular}
80 | \end{center}
81 |
82 | \begin{center}
83 | \captionof{table}{Affiliation prediction with the Weisfeiler-Lehman for RDF}
84 | \begin{tabular}{ccccc}
85 | \toprule
86 | depth & iterations & accuracy & f1 & C \\
87 | \midrule
88 | 1 & 0 & 0.881955 & 0.795756 & 100.0 \\
89 | & 2 & 0.881955 & 0.795756 & 100.0 \\
90 | & 4 & 0.881955 & 0.795756 & 100.0 \\
91 | & 6 & 0.881955 & 0.795756 & 100.0 \\
92 | \hline
93 | 2 & 0 & 0.892114 & 0.826007 & 100.0 \\
94 | & 2 & 0.880057 & 0.812488 & 100.0 \\
95 | & 4 & 0.874501 & 0.803701 & 100.0 \\
96 | & 6 & 0.874501 & 0.800821 & 100.0 \\
97 | \hline
98 | 3 & 0 & 0.879579 & 0.812187 & 100.0 \\
99 | & 2 & 0.913751 & 0.867388 & 100.0 \\
100 | & 4 & 0.908196 & 0.863829 & 100.0 \\
101 | & 6 & 0.908196 & 0.863829 & 100.0 \\
102 | \bottomrule
103 | \end{tabular}
104 | \end{center}
105 |
106 | \newpage
107 |
108 | \begin{center}
109 | \captionof{table}{Lithogenesis prediction with the standard Weisfeiler-Lehman kernel}
110 | \begin{tabular}{ccccc}
111 | \toprule
112 | depth & iterations & accuracy & f1 & C \\
113 | \midrule
114 | 1 & 0 & 0.802679 & 0.774383 & 10.0 \\
115 | & 2 & 0.796429 & 0.768842 & 10.0 \\
116 | & 4 & 0.796429 & 0.768842 & 10.0 \\
117 | & 6 & 0.796429 & 0.768842 & 10.0 \\
118 | \hline
119 | 2 & 0 & 0.891964 & 0.877311 & 100.0 \\
120 | & 2 & 0.892857 & 0.874092 & 1.0 \\
121 | & 4 & 0.873214 & 0.854485 & 1.0 \\
122 | & 6 & 0.865179 & 0.841353 & 1.0 \\
123 | \hline
124 | 3 & 0 & 0.883929 & 0.871406 & 100.0 \\
125 | & 2 & 0.913393 & 0.898291 & 1.0 \\
126 | & 4 & 0.906250 & 0.890922 & 1.0 \\
127 | & 6 & 0.906250 & 0.890922 & 1.0 \\
128 | \bottomrule
129 | \end{tabular}
130 | \end{center}
131 |
132 | \begin{center}
133 | \captionof{table}{Lithogenesis prediction with the Weisfeiler-Lehman kernel for RDF}
134 | \begin{tabular}{ccccc}
135 | \toprule
136 | depth & iterations & accuracy & f1 & C \\
137 | \midrule
138 | 1 & 0 & 0.795536 & 0.763739 & 10.0 \\
139 | & 2 & 0.795536 & 0.763739 & 10.0 \\
140 | & 4 & 0.795536 & 0.763739 & 10.0 \\
141 | & 6 & 0.795536 & 0.763739 & 10.0 \\
142 | \hline
143 | 2 & 0 & 0.906250 & 0.891229 & 100.0 \\
144 | & 2 & 0.892857 & 0.874092 & 1.0 \\
145 | & 4 & 0.892857 & 0.874092 & 1.0 \\
146 | & 6 & 0.885714 & 0.866606 & 1.0 \\
147 | \hline
148 | 3 & 0 & 0.891071 & 0.875862 & 100.0 \\
149 | & 2 & 0.891964 & 0.873422 & 1.0 \\
150 | & 4 & 0.906250 & 0.890104 & 1.0 \\
151 | & 6 & 0.907143 & 0.888829 & 1.0 \\
152 | \bottomrule
153 | \end{tabular}
154 | \end{center}
155 |
156 | The accuracy values are almost the same as those reported in the paper. There is not much difference between the standard Weisfeiler-Lehman algorithm and its approximation in terms of accuracy.
157 |
158 | We have also replicated the experiment of the affiliation prediction where all the labels were removed from the graph. The results are given in the two table belows. This is the best scenario for the Weisfeiler-Lehman kernel for RDF data. As reported in the paper, these results are very similar to the performance on labeled graphs.
159 |
160 | \newpage
161 |
162 | \begin{center}
163 | \captionof{table}{Affiliation prediction with the standard Weisfeiler-Lehman kernel with all labels removed}
164 | \begin{tabular}{ccccc}
165 | \toprule
166 | depth & iterations & accuracy & f1 & C \\
167 | \midrule
168 | 1 & 0 & 0.322153 & 0.194477 & 100.0 \\
169 | & 2 & 0.530111 & 0.348672 & 10.0 \\
170 | & 4 & 0.530111 & 0.347049 & 10.0 \\
171 | & 6 & 0.530111 & 0.347049 & 10.0 \\
172 | \hline
173 | 2 & 0 & 0.564547 & 0.355253 & 10.0 \\
174 | & 2 & 0.503724 & 0.343148 & 1.0 \\
175 | & 4 & 0.481437 & 0.392366 & 100.0 \\
176 | & 6 & 0.502999 & 0.383461 & 1.0 \\
177 | \hline
178 | 3 & 0 & 0.491697 & 0.343404 & 100.0 \\
179 | & 2 & 0.641333 & 0.527556 & 100.0 \\
180 | & 4 & 0.724551 & 0.602677 & 10.0 \\
181 | & 6 & 0.713474 & 0.557335 & 100.0 \\
182 | \bottomrule
183 | \end{tabular}
184 | \end{center}
185 |
186 | \begin{center}
187 | \captionof{table}{Affiliation prediction with the Weisfeiler-Lehman kernel for RDF with all labels removed}
188 | \begin{tabular}{ccccc}
189 | \toprule
190 | depth & iterations & accuracy & f1 & C \\
191 | \midrule
192 | 1 & 0 & 0.524847 & 0.305547 & 100.0 \\
193 | & 2 & 0.647536 & 0.566394 & 100.0 \\
194 | & 4 & 0.670780 & 0.591060 & 100.0 \\
195 | & 6 & 0.677030 & 0.594329 & 100.0 \\
196 | \hline
197 | 2 & 0 & 0.565936 & 0.340732 & 10.0 \\
198 | & 2 & 0.681422 & 0.622212 & 100.0 \\
199 | & 4 & 0.740048 & 0.663960 & 100.0 \\
200 | & 6 & 0.762597 & 0.688069 & 100.0 \\
201 | \hline
202 | 3 & 0 & 0.407394 & 0.293320 & 1.0 \\
203 | & 2 & 0.898914 & 0.861681 & 100.0 \\
204 | & 4 & 0.892079 & 0.854304 & 100.0 \\
205 | & 6 & 0.893066 & 0.851358 & 10.0 \\
206 | \bottomrule
207 | \end{tabular}
208 | \end{center}
209 |
210 | \newpage
211 |
212 | Since this new method is supposed to be faster, we also replicated the experiments on the runtimes. The Weisfeiler-Lehman for RDF method is slightly faster then the regular one, but we were not able to see such a good improvements in the runtime as reported in the paper.
213 |
214 | \begin{center}
215 | \begin{figure}[h]
216 | \caption{Runtimes of the two kernels on the affiliation dataset}
217 | \includegraphics[width=\textwidth]{img/affiliation_timing.png}
218 | \end{figure}
219 | \end{center}
220 |
221 | \begin{center}
222 | \begin{figure}[h]
223 | \caption{Runtimes of the two kernels on the lithogenesis dataset}
224 | \includegraphics[width=\textwidth]{img/lithogenesis_timing.png}
225 | \end{figure}
226 | \end{center}
227 |
228 | \newpage
229 |
230 | \begin{thebibliography}{9}
231 |
232 | \bibitem{lamport94}
233 | Vries Gerben Klaas Dirk,
234 | A Fast Approximation of the Weisfeiler-Lehman Graph Kernel for RDF Data,
235 | 2013
236 |
237 | \bibitem{swrc}
238 | Sure, Y., Bloehdorn, S., Haase, P., Hartmann, J., Oberle, D.,
239 | The swrc ontology - semantic web for research communities.
240 | Volume 3803 of LNCS., Covilha,
241 | Portugal, Springer (Dezember 2005) 218 – 231
242 |
243 | \bibitem{wl-kernels}
244 | Shervashidze, N., Schweitzer, P., van Leeuwen, E.J., Mehlhorn, K., Borgwardt, K.M.
245 | Weisfeiler-lehman graph kernels,
246 | 2011
247 |
248 | \end{thebibliography}
249 |
250 | \end{document}
251 |
--------------------------------------------------------------------------------
/report/img/affiliation_timing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/report/img/affiliation_timing.png
--------------------------------------------------------------------------------
/report/img/lithogenesis_timing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/report/img/lithogenesis_timing.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | sklearn
2 | numpy
3 | nptyping
4 | rdflib
5 | path.py
6 | pytest
7 | pytest-cov
8 |
--------------------------------------------------------------------------------
/results/affiliation_timing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/results/affiliation_timing.png
--------------------------------------------------------------------------------
/results/csv_to_latex.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 |
3 | from path import Path
4 | import pandas as pd
5 |
6 |
7 | def convert(fn):
8 | df = pd.read_csv(fn, index_col=['depth', 'iterations'])
9 | df.to_latex(f'{fn.stripext()}.tex')
10 |
11 |
12 | if __name__ == '__main__':
13 | parser = ArgumentParser()
14 | parser.add_argument('--file', '-f', type=str)
15 | flags = parser.parse_args()
16 | convert(Path(flags.file))
17 |
--------------------------------------------------------------------------------
/results/lithogenesis_timing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/results/lithogenesis_timing.png
--------------------------------------------------------------------------------
/results/wl_affiliation_results.csv:
--------------------------------------------------------------------------------
1 | depth,iterations,accuracy,f1,C
2 | 1,0,0.7564907980736154,0.6433234000261864,1.0
3 | 1,2,0.7735251117991055,0.6534813180885163,1.0
4 | 1,4,0.7735251117991055,0.6534813180885163,1.0
5 | 1,6,0.7735251117991055,0.6534813180885163,1.0
6 | 2,0,0.8082408840729274,0.7098104342338087,1.0
7 | 2,2,0.7577700378396972,0.6579990189549013,1.0
8 | 2,4,0.7183350533195736,0.5632661696748384,1.0
9 | 2,6,0.6957451840385277,0.5284068915727786,0.1
10 | 3,0,0.8545042139662883,0.7886428236795884,10.0
11 | 3,2,0.8531217750257998,0.8065749405822935,0.001
12 | 3,4,0.832546439628483,0.7681329409754333,1.0
13 | 3,6,0.79578173374613,0.7226295853269538,0.001
14 |
--------------------------------------------------------------------------------
/results/wl_affiliation_results_with_normalization.csv:
--------------------------------------------------------------------------------
1 | depth,iterations,accuracy,f1,C
2 | 1,0,0.881955194358445,0.795756261282577,100.0
3 | 1,2,0.8687607499140008,0.7886729279492436,100.0
4 | 1,4,0.8687607499140008,0.7886729279492436,100.0
5 | 1,6,0.8687607499140008,0.7886729279492436,100.0
6 | 2,0,0.8868507051943585,0.8197871572871573,100.0
7 | 2,2,0.8581269349845201,0.7815634647000745,100.0
8 | 2,4,0.7704463364293086,0.6042463799100796,100.0
9 | 2,6,0.7527584279325765,0.5791451324733059,100.0
10 | 3,0,0.8848426212590299,0.8184077034077035,100.0
11 | 3,2,0.8908002235982112,0.8246224200635967,100.0
12 | 3,4,0.8973426212590299,0.840693848635025,100.0
13 | 3,6,0.8963557791537667,0.8213432686594452,100.0
14 |
--------------------------------------------------------------------------------
/results/wl_affiliation_results_with_normalization.tex:
--------------------------------------------------------------------------------
1 | \begin{tabular}{llrrr}
2 | \toprule
3 | & & accuracy & f1 & C \\
4 | depth & iterations & & & \\
5 | \midrule
6 | 1 & 0 & 0.842337 & 0.772552 & 100.0 \\
7 | & 2 & 0.836782 & 0.755789 & 100.0 \\
8 | & 4 & 0.836782 & 0.755789 & 100.0 \\
9 | & 6 & 0.836782 & 0.755789 & 100.0 \\
10 | 2 & 0 & 0.892516 & 0.836455 & 100.0 \\
11 | & 2 & 0.826180 & 0.742251 & 100.0 \\
12 | & 4 & 0.774069 & 0.618519 & 100.0 \\
13 | & 6 & 0.740048 & 0.568392 & 100.0 \\
14 | 3 & 0 & 0.892591 & 0.850147 & 100.0 \\
15 | & 2 & 0.897779 & 0.848919 & 100.0 \\
16 | & 4 & 0.909258 & 0.860964 & 100.0 \\
17 | & 6 & 0.881044 & 0.796105 & 100.0 \\
18 | \bottomrule
19 | \end{tabular}
20 |
--------------------------------------------------------------------------------
/results/wl_lithogenesis_results_with_normalization.csv:
--------------------------------------------------------------------------------
1 | depth,iterations,accuracy,f1,C
2 | 1,0,0.8026785714285714,0.7743826999976886,10.0
3 | 1,2,0.7964285714285714,0.7688415744565631,10.0
4 | 1,4,0.7964285714285714,0.7688415744565631,10.0
5 | 1,6,0.7964285714285714,0.7688415744565631,10.0
6 | 2,0,0.8919642857142858,0.8773113283868433,100.0
7 | 2,2,0.8928571428571429,0.8740923537433837,1.0
8 | 2,4,0.8732142857142857,0.8544845106061286,1.0
9 | 2,6,0.8651785714285714,0.841352739556401,1.0
10 | 3,0,0.8839285714285715,0.8714062451136755,100.0
11 | 3,2,0.9133928571428571,0.8982908605505859,1.0
12 | 3,4,0.90625,0.8909224394979542,1.0
13 | 3,6,0.90625,0.8909224394979542,1.0
14 |
--------------------------------------------------------------------------------
/results/wl_lithogenesis_results_with_normalization.tex:
--------------------------------------------------------------------------------
1 | \begin{tabular}{llrrr}
2 | \toprule
3 | & & accuracy & f1 & C \\
4 | depth & iterations & & & \\
5 | \midrule
6 | 1 & 0 & 0.802679 & 0.774383 & 10.0 \\
7 | & 2 & 0.796429 & 0.768842 & 10.0 \\
8 | & 4 & 0.796429 & 0.768842 & 10.0 \\
9 | & 6 & 0.796429 & 0.768842 & 10.0 \\
10 | 2 & 0 & 0.891964 & 0.877311 & 100.0 \\
11 | & 2 & 0.892857 & 0.874092 & 1.0 \\
12 | & 4 & 0.873214 & 0.854485 & 1.0 \\
13 | & 6 & 0.865179 & 0.841353 & 1.0 \\
14 | 3 & 0 & 0.883929 & 0.871406 & 100.0 \\
15 | & 2 & 0.913393 & 0.898291 & 1.0 \\
16 | & 4 & 0.906250 & 0.890922 & 1.0 \\
17 | & 6 & 0.906250 & 0.890922 & 1.0 \\
18 | \bottomrule
19 | \end{tabular}
20 |
--------------------------------------------------------------------------------
/results/wl_no_labels.csv:
--------------------------------------------------------------------------------
1 | depth,iterations,accuracy,f1,C
2 | 1,0,0.3221534227726178,0.19447672210830105,100.0
3 | 1,2,0.530110509115927,0.34867194939563356,10.0
4 | 1,4,0.530110509115927,0.3470491423780897,10.0
5 | 1,6,0.530110509115927,0.3470491423780897,10.0
6 | 2,0,0.564546783625731,0.35525302548328863,10.0
7 | 2,2,0.5037237702098383,0.3431478203169379,1.0
8 | 2,4,0.481437048503612,0.3923659673659673,100.0
9 | 2,6,0.502999226006192,0.38346129360835246,1.0
10 | 3,0,0.49169676642586857,0.34340422713681223,100.0
11 | 3,2,0.6413334193326453,0.527556055056055,100.0
12 | 3,4,0.7245506535947712,0.6026768084856319,10.0
13 | 3,6,0.7134739422084623,0.5573347090645852,100.0
14 |
--------------------------------------------------------------------------------
/results/wl_no_labels.tex:
--------------------------------------------------------------------------------
1 | \begin{tabular}{llrrr}
2 | \toprule
3 | & & accuracy & f1 & C \\
4 | depth & iterations & & & \\
5 | \midrule
6 | 1 & 0 & 0.322153 & 0.194477 & 100.0 \\
7 | & 2 & 0.530111 & 0.348672 & 10.0 \\
8 | & 4 & 0.530111 & 0.347049 & 10.0 \\
9 | & 6 & 0.530111 & 0.347049 & 10.0 \\
10 | 2 & 0 & 0.564547 & 0.355253 & 10.0 \\
11 | & 2 & 0.503724 & 0.343148 & 1.0 \\
12 | & 4 & 0.481437 & 0.392366 & 100.0 \\
13 | & 6 & 0.502999 & 0.383461 & 1.0 \\
14 | 3 & 0 & 0.491697 & 0.343404 & 100.0 \\
15 | & 2 & 0.641333 & 0.527556 & 100.0 \\
16 | & 4 & 0.724551 & 0.602677 & 10.0 \\
17 | & 6 & 0.713474 & 0.557335 & 100.0 \\
18 | \bottomrule
19 | \end{tabular}
20 |
--------------------------------------------------------------------------------
/results/wlrdf_affiliation_results.csv:
--------------------------------------------------------------------------------
1 | depth,iterations,accuracy,f1,C
2 | 1,0,0.7848877708978328,0.657970231522863,10.0
3 | 1,2,0.7848877708978328,0.657970231522863,10.0
4 | 1,4,0.7848877708978328,0.657970231522863,1.0
5 | 1,6,0.7848877708978328,0.657970231522863,1.0
6 | 2,0,0.8262555899552803,0.7310782851049878,1.0
7 | 2,2,0.7898929308565531,0.6821006728050072,1.0
8 | 2,4,0.7960676814585483,0.684156578500619,1.0
9 | 2,6,0.8023176814585483,0.6868838512278915,0.1
10 | 3,0,0.8416430168558652,0.7626665813546618,0.01
11 | 3,2,0.897703818369453,0.8631294273322137,0.001
12 | 3,4,0.8924406604747162,0.8590949650624573,0.001
13 | 3,6,0.8806759545923633,0.839124102591595,0.001
14 |
--------------------------------------------------------------------------------
/results/wlrdf_affiliation_results_with_normalization.csv:
--------------------------------------------------------------------------------
1 | depth,iterations,accuracy,f1,C
2 | 1,0,0.881955194358445,0.795756261282577,100.0
3 | 1,2,0.881955194358445,0.795756261282577,100.0
4 | 1,4,0.881955194358445,0.795756261282577,100.0
5 | 1,6,0.881955194358445,0.795756261282577,100.0
6 | 2,0,0.8921138630890952,0.8260073953823953,100.0
7 | 2,2,0.8800567595459237,0.8124879573041339,100.0
8 | 2,4,0.8745012039903681,0.8037011925982516,100.0
9 | 2,6,0.8745012039903681,0.8008212906374672,100.0
10 | 3,0,0.879579463364293,0.8121874653124653,100.0
11 | 3,2,0.9137512899896801,0.8673881673881674,100.0
12 | 3,4,0.9081957344341245,0.8638286754095578,100.0
13 | 3,6,0.9081957344341245,0.8638286754095578,100.0
14 |
--------------------------------------------------------------------------------
/results/wlrdf_affiliation_results_with_normalization.tex:
--------------------------------------------------------------------------------
1 | \begin{tabular}{llrrr}
2 | \toprule
3 | & & accuracy & f1 & C \\
4 | depth & iterations & & & \\
5 | \midrule
6 | 1 & 0 & 0.881955 & 0.795756 & 100.0 \\
7 | & 2 & 0.881955 & 0.795756 & 100.0 \\
8 | & 4 & 0.881955 & 0.795756 & 100.0 \\
9 | & 6 & 0.881955 & 0.795756 & 100.0 \\
10 | 2 & 0 & 0.892114 & 0.826007 & 100.0 \\
11 | & 2 & 0.880057 & 0.812488 & 100.0 \\
12 | & 4 & 0.874501 & 0.803701 & 100.0 \\
13 | & 6 & 0.874501 & 0.800821 & 100.0 \\
14 | 3 & 0 & 0.879579 & 0.812187 & 100.0 \\
15 | & 2 & 0.913751 & 0.867388 & 100.0 \\
16 | & 4 & 0.908196 & 0.863829 & 100.0 \\
17 | & 6 & 0.908196 & 0.863829 & 100.0 \\
18 | \bottomrule
19 | \end{tabular}
20 |
--------------------------------------------------------------------------------
/results/wlrdf_lithogenesis_results.csv:
--------------------------------------------------------------------------------
1 | depth,iterations,accuracy,f1,C
2 | 1,0,0.7946428571428571,0.7668365041391357,0.001
3 | 1,2,0.7946428571428571,0.7668365041391357,0.001
4 | 1,4,0.7946428571428571,0.7668365041391357,0.001
5 | 1,6,0.7946428571428571,0.7668365041391357,0.001
6 | 2,0,0.8991071428571429,0.8823099993065668,0.001
7 | 2,2,0.8571428571428571,0.8347124068405533,0.001
8 | 2,4,0.8571428571428571,0.8347124068405533,0.001
9 | 2,6,0.8571428571428571,0.8347124068405533,0.001
10 | 3,0,0.8866071428571429,0.8704565801079465,0.001
11 | 3,2,0.8928571428571427,0.8762965244773024,0.001
12 | 3,4,0.8857142857142856,0.8681093899743786,0.001
13 | 3,6,0.8857142857142856,0.8681093899743786,0.001
14 |
--------------------------------------------------------------------------------
/results/wlrdf_lithogenesis_results_with_normalization.csv:
--------------------------------------------------------------------------------
1 | depth,iterations,accuracy,f1,C
2 | 1,0,0.7955357142857142,0.7637394251438875,10.0
3 | 1,2,0.7955357142857142,0.7637394251438875,10.0
4 | 1,4,0.7955357142857142,0.7637394251438875,10.0
5 | 1,6,0.7955357142857142,0.7637394251438875,10.0
6 | 2,0,0.90625,0.891229457041814,100.0
7 | 2,2,0.8928571428571429,0.8740923537433837,1.0
8 | 2,4,0.8928571428571429,0.8740923537433837,1.0
9 | 2,6,0.8857142857142858,0.8666057227273407,1.0
10 | 3,0,0.8910714285714286,0.8758615567439098,100.0
11 | 3,2,0.8919642857142858,0.8734224394979544,1.0
12 | 3,4,0.90625,0.8901037260476619,1.0
13 | 3,6,0.9071428571428571,0.8888291958486466,1.0
14 |
--------------------------------------------------------------------------------
/results/wlrdf_lithogenesis_results_with_normalization.tex:
--------------------------------------------------------------------------------
1 | \begin{tabular}{llrrr}
2 | \toprule
3 | & & accuracy & f1 & C \\
4 | depth & iterations & & & \\
5 | \midrule
6 | 1 & 0 & 0.795536 & 0.763739 & 10.0 \\
7 | & 2 & 0.795536 & 0.763739 & 10.0 \\
8 | & 4 & 0.795536 & 0.763739 & 10.0 \\
9 | & 6 & 0.795536 & 0.763739 & 10.0 \\
10 | 2 & 0 & 0.906250 & 0.891229 & 100.0 \\
11 | & 2 & 0.892857 & 0.874092 & 1.0 \\
12 | & 4 & 0.892857 & 0.874092 & 1.0 \\
13 | & 6 & 0.885714 & 0.866606 & 1.0 \\
14 | 3 & 0 & 0.891071 & 0.875862 & 100.0 \\
15 | & 2 & 0.891964 & 0.873422 & 1.0 \\
16 | & 4 & 0.906250 & 0.890104 & 1.0 \\
17 | & 6 & 0.907143 & 0.888829 & 1.0 \\
18 | \bottomrule
19 | \end{tabular}
20 |
--------------------------------------------------------------------------------
/results/wlrdf_no_labels.csv:
--------------------------------------------------------------------------------
1 | depth,iterations,accuracy,f1,C
2 | 1,0,0.5248473512211902,0.30554684499217744,100.0
3 | 1,2,0.6475361197110423,0.5663935370185369,100.0
4 | 1,4,0.6707795837633299,0.5910602591852591,100.0
5 | 1,6,0.6770295837633299,0.59432948995449,100.0
6 | 2,0,0.5659356725146198,0.34073240549440237,10.0
7 | 2,2,0.6814219986240111,0.622212370962371,100.0
8 | 2,4,0.7400477296181631,0.6639600024158847,100.0
9 | 2,6,0.7625967492260062,0.6880690877749702,100.0
10 | 3,0,0.4073937908496732,0.2933196589272441,1.0
11 | 3,2,0.8989142586859306,0.8616813859944665,100.0
12 | 3,4,0.8920794633642931,0.8543043879924686,100.0
13 | 3,6,0.8930663054695562,0.85135795942104,10.0
14 |
--------------------------------------------------------------------------------
/results/wlrdf_no_labels.tex:
--------------------------------------------------------------------------------
1 | \begin{tabular}{llrrr}
2 | \toprule
3 | & & accuracy & f1 & C \\
4 | depth & iterations & & & \\
5 | \midrule
6 | 1 & 0 & 0.524847 & 0.305547 & 100.0 \\
7 | & 2 & 0.647536 & 0.566394 & 100.0 \\
8 | & 4 & 0.670780 & 0.591060 & 100.0 \\
9 | & 6 & 0.677030 & 0.594329 & 100.0 \\
10 | 2 & 0 & 0.565936 & 0.340732 & 10.0 \\
11 | & 2 & 0.681422 & 0.622212 & 100.0 \\
12 | & 4 & 0.740048 & 0.663960 & 100.0 \\
13 | & 6 & 0.762597 & 0.688069 & 100.0 \\
14 | 3 & 0 & 0.407394 & 0.293320 & 1.0 \\
15 | & 2 & 0.898914 & 0.861681 & 100.0 \\
16 | & 4 & 0.892079 & 0.854304 & 100.0 \\
17 | & 6 & 0.893066 & 0.851358 & 10.0 \\
18 | \bottomrule
19 | \end{tabular}
20 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | from wlkernel import __version__
4 |
5 |
6 | setup(
7 | name='wlkernel',
8 | version=__version__,
9 | description='Weisfeiler-Lehman kernel for RDF graphs',
10 | packages=find_packages(exclude=['tests']),
11 | )
12 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/tests/__init__.py
--------------------------------------------------------------------------------
/tests/resources/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/tests/resources/__init__.py
--------------------------------------------------------------------------------
/tests/resources/example.ttl:
--------------------------------------------------------------------------------
1 | 'A1' 'P2' 'C' .
2 | 'A1' 'P3' 'D' .
3 | 'A2' 'P2' 'D' .
4 | 'A2' 'P3' 'E' .
5 | 'B2' 'P3' 'E' .
6 | 'B2' 'P2' 'F' .
7 | 'B1' 'P3' 'F' .
8 | 'B1' 'P2' 'G' .
9 | 'C' 'P4' 'H' .
10 | 'D' 'P4' 'H' .
11 | 'F' 'P5' 'I' .
12 | 'G' 'P5' 'I' .
13 | 'H' 'P6' 'A2' .
14 | 'I' 'P6' 'B2' .
15 |
--------------------------------------------------------------------------------
/tests/wlkernel_test.py:
--------------------------------------------------------------------------------
1 | from os.path import abspath
2 | from pkg_resources import resource_filename
3 |
4 | import pytest
5 | import rdflib
6 |
7 | import wlkernel
8 |
9 |
10 | example_data = abspath(resource_filename('tests.resources', 'example.ttl'))
11 |
12 |
13 | def test_node_hash():
14 | n1 = wlkernel.Node()
15 | n1_bis = n1
16 | n2 = wlkernel.Node()
17 | n2_bis = n2
18 | assert hash(n1) != hash(n2)
19 | assert hash(n1) == hash(n1_bis)
20 | assert hash(n2) == hash(n2_bis)
21 |
22 |
23 | def test_edge_hash():
24 | e1 = wlkernel.Edge()
25 | e1_bis = e1
26 | e2 = wlkernel.Edge()
27 | e2_bis = e2
28 | assert hash(e1) != hash(e2)
29 | assert hash(e1) == hash(e1_bis)
30 | assert hash(e2) == hash(e2_bis)
31 |
32 |
33 | def test_wlgraph_depth_0():
34 | '''
35 | ######
36 | # A1 #
37 | ######
38 | '''
39 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
40 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
41 | wl_graph = wlkernel.WLGraph(triples, 'A1', 0)
42 | assert len(wl_graph.nodes) == 1
43 | assert len(wl_graph.edges) == 0
44 | assert len(wl_graph.labels) == 1
45 | assert len(wl_graph.labels[0]) == len(wl_graph.nodes) + len(wl_graph.edges)
46 |
47 |
48 | def test_wlgraph_depth_1():
49 | r'''
50 | ######
51 | # A1 #
52 | ######
53 | / \
54 | P2 / \ P3
55 | / \
56 | ##### #####
57 | # C # # D #
58 | ##### #####
59 | '''
60 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
61 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
62 | wl_graph = wlkernel.WLGraph(triples, 'A1', 1)
63 | assert len(wl_graph.nodes) == 3
64 | assert len(wl_graph.edges) == 2
65 | assert len(wl_graph.labels) == 1
66 | assert len(wl_graph.labels[0]) == len(wl_graph.nodes) + len(wl_graph.edges)
67 |
68 |
69 | def test_wlgraph_depth_2():
70 | r'''
71 | ######
72 | # A1 #
73 | ######
74 | / \
75 | P2 / \ P3
76 | / \
77 | ##### #####
78 | # C # # D #
79 | ##### #####
80 | \ /
81 | P4 \ / P4
82 | \ /
83 | #####
84 | # H #
85 | #####
86 | '''
87 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
88 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
89 | wl_graph = wlkernel.WLGraph(triples, 'A1', 2)
90 | assert len(wl_graph.nodes) == 4
91 | assert len(wl_graph.edges) == 4
92 | assert len(wl_graph.labels) == 1
93 | assert len(wl_graph.labels[0]) == len(wl_graph.nodes) + len(wl_graph.edges)
94 |
95 |
96 | def test_wlgraph_depth_4():
97 | r'''
98 | ######
99 | # A1 #
100 | ######
101 | / \
102 | P2 / \ P3
103 | / \
104 | ##### #####
105 | # C # # D #<----
106 | ##### ##### |
107 | \ / |
108 | P4 \ / P4 |
109 | \ / |
110 | ##### |
111 | # H # | P2
112 | ##### |
113 | | |
114 | | P6 |
115 | | |
116 | ###### |
117 | # A2 #-----------
118 | ######
119 | |
120 | | P3
121 | |
122 | #####
123 | # E #
124 | #####
125 | '''
126 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
127 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
128 | wl_graph = wlkernel.WLGraph(triples, 'A1', 4)
129 | assert len(wl_graph.nodes) == 6
130 | assert len(wl_graph.edges) == 7
131 | assert len(wl_graph.labels) == 1
132 | assert len(wl_graph.labels[0]) == len(wl_graph.nodes) + len(wl_graph.edges)
133 |
134 |
135 | def test_wl_relabel():
136 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
137 | triples = [(str(s), str(p), str(o)) for s, p, o in rdf_graph]
138 | wl_graph_a1 = wlkernel.WLGraph(triples, 'A1', 4)
139 | wl_graph_b1 = wlkernel.WLGraph(triples, 'B1', 4)
140 |
141 | uniq_labels_a1_0 = set(wl_graph_a1.labels[0].values())
142 | uniq_labels_b1_0 = set(wl_graph_b1.labels[0].values())
143 |
144 | wlkernel.wl_relabel([wl_graph_a1, wl_graph_b1])
145 | uniq_labels_a1_1 = set(wl_graph_a1.labels[1].values())
146 | uniq_labels_b1_1 = set(wl_graph_b1.labels[1].values())
147 | assert len(wl_graph_a1.labels) == len(wl_graph_b1.labels) == 2
148 | assert len(uniq_labels_a1_0) < len(uniq_labels_a1_1)
149 | assert len(uniq_labels_b1_0) < len(uniq_labels_b1_1)
150 |
151 | wlkernel.wl_relabel([wl_graph_a1, wl_graph_b1])
152 | uniq_labels_a1_2 = set(wl_graph_a1.labels[2].values())
153 | uniq_labels_b1_2 = set(wl_graph_b1.labels[2].values())
154 | assert len(wl_graph_a1.labels) == len(wl_graph_b1.labels) == 3
155 |
156 | wlkernel.wl_relabel([wl_graph_a1, wl_graph_b1])
157 | uniq_labels_a1_3 = set(wl_graph_a1.labels[3].values())
158 | uniq_labels_b1_3 = set(wl_graph_b1.labels[3].values())
159 | assert len(wl_graph_a1.labels) == len(wl_graph_b1.labels) == 4
160 |
161 |
162 | def test_wl_kernel():
163 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
164 | triples = [(str(s), str(p), str(o)) for s, p, o in rdf_graph]
165 | wl_graph_a1 = wlkernel.WLGraph(triples, 'A1', 4)
166 | wl_graph_b1 = wlkernel.WLGraph(triples, 'B1', 4)
167 |
168 | assert wlkernel.wl_kernel(wl_graph_a1, wl_graph_b1) == 11*1
169 | assert wlkernel.wl_kernel(wl_graph_a1, wl_graph_b1, 1) == 11*0.5 + 4*1
170 |
171 |
172 | def test_wl_kernel_matrix():
173 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
174 | triples = [(str(s), str(p), str(o)) for s, p, o in rdf_graph]
175 | wl_graph_a1 = wlkernel.WLGraph(triples, 'A1', 4)
176 | wl_graph_b1 = wlkernel.WLGraph(triples, 'B1', 4)
177 | wl_graph_a2 = wlkernel.WLGraph(triples, 'A2', 4)
178 |
179 | kernel_matrix = wlkernel.wl_kernel_matrix(
180 | [wl_graph_a1, wl_graph_b1, wl_graph_a2], iterations=1
181 | )
182 |
183 | assert len(kernel_matrix) == len(kernel_matrix[0]) == 3
184 | assert kernel_matrix[0][1] == wlkernel.wl_kernel(
185 | wl_graph_a1, wl_graph_b1, iterations=1
186 | )
187 | assert kernel_matrix[0][2] == wlkernel.wl_kernel(
188 | wl_graph_a1, wl_graph_a2, iterations=1
189 | )
190 | assert kernel_matrix[1][0] == wlkernel.wl_kernel(
191 | wl_graph_a1, wl_graph_b1, iterations=1
192 | )
193 |
194 |
195 | def test_wlrdfgraph_depth_0():
196 | '''
197 | ######
198 | # A1 #
199 | ######
200 | '''
201 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
202 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
203 | wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1'], 0)
204 | assert len(wlrdf_graph.nodes) == 1
205 | assert len(wlrdf_graph.edges) == 0
206 | assert len(wlrdf_graph.labels) == 1
207 | assert len(wlrdf_graph.labels[0]) == 1
208 | assert len(wlrdf_graph.instance_nodes) == 1
209 | assert len(wlrdf_graph.instance_nodes['A1']) == 0
210 | assert len(wlrdf_graph.instance_edges) == 1
211 | assert len(wlrdf_graph.instance_edges['A1']) == 0
212 |
213 |
214 | def test_wlrdfgraph_depth_1():
215 | r'''
216 | ######
217 | # A1 #
218 | ######
219 | / \
220 | P2 / \ P3
221 | / \
222 | ##### #####
223 | # C # # D #
224 | ##### #####
225 | '''
226 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
227 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
228 | wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1'], 1)
229 | assert len(wlrdf_graph.nodes) == 3
230 | assert len(wlrdf_graph.edges) == 2
231 | assert len(wlrdf_graph.labels) == 1
232 | assert len(wlrdf_graph.labels[0]) == 5
233 | assert len(wlrdf_graph.instance_nodes) == 1
234 | assert len(wlrdf_graph.instance_nodes['A1']) == 2
235 | assert len(wlrdf_graph.instance_edges) == 1
236 | assert len(wlrdf_graph.instance_edges['A1']) == 2
237 |
238 |
239 | def test_wlrdfgraph_depth_2():
240 | r'''
241 | ######
242 | # A1 #
243 | ######
244 | / \
245 | P2 / \ P3
246 | / \
247 | ##### #####
248 | # C # # D #
249 | ##### #####
250 | \ /
251 | P4 \ / P4
252 | \ /
253 | #####
254 | # H #
255 | #####
256 | '''
257 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
258 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
259 | wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1'], 2)
260 | assert len(wlrdf_graph.nodes) == 4
261 | assert len(wlrdf_graph.edges) == 4
262 | assert len(wlrdf_graph.labels) == 1
263 | assert len(wlrdf_graph.labels[0]) == 8
264 | assert len(wlrdf_graph.instance_nodes) == 1
265 | assert len(wlrdf_graph.instance_nodes['A1']) == 3
266 | assert len(wlrdf_graph.instance_edges) == 1
267 | assert len(wlrdf_graph.instance_edges['A1']) == 4
268 |
269 |
270 | def test_wlrdfgraph_depth_4():
271 | r'''
272 | ######
273 | # A1 #
274 | ######
275 | / \
276 | P2 / \ P3
277 | / \
278 | ##### #####
279 | # C # # D #
280 | ##### #####
281 | \ /
282 | P4 \ / P4
283 | \ /
284 | #####
285 | # H #
286 | #####
287 | |
288 | | P6
289 | |
290 | ######
291 | # A2 #
292 | ######
293 | / \
294 | P3 / \ P2
295 | / \
296 | ##### #####
297 | # E # # D #
298 | ##### #####
299 | '''
300 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
301 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
302 | wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1'], 4)
303 | assert len(wlrdf_graph.nodes) == 6
304 | assert len(wlrdf_graph.edges) == 7
305 | assert len(wlrdf_graph.labels) == 1
306 | assert len(wlrdf_graph.labels[0]) == 14
307 | assert len(wlrdf_graph.instance_nodes) == 1
308 | assert len(wlrdf_graph.instance_nodes['A1']) == 5
309 | assert len(wlrdf_graph.instance_edges) == 1
310 | assert len(wlrdf_graph.instance_edges['A1']) == 7
311 |
312 |
313 | def test_wlrdf_relabel():
314 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
315 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
316 | wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1', 'B1'], 4)
317 |
318 | uniq_labels_0 = set(wlrdf_graph.labels[0].values())
319 |
320 | wlrdf_graph.relabel()
321 | uniq_labels_1 = set(wlrdf_graph.labels[1].values())
322 |
323 | wlrdf_graph.relabel()
324 | uniq_labels_2 = set(wlrdf_graph.labels[1].values())
325 |
326 | assert len(wlrdf_graph.labels) == 3
327 | assert len(uniq_labels_0) < len(uniq_labels_1)
328 | assert len(uniq_labels_1) == len(uniq_labels_2)
329 |
330 |
331 | def test_wlrdf_kernel():
332 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
333 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
334 | wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1', 'B1'], 4)
335 | assert wlkernel.wlrdf_kernel(wlrdf_graph, 'A1', 'B1') == 10*1
336 | assert wlkernel.wlrdf_kernel(wlrdf_graph, 'A1', 'B1', 1) == 10*0.5 + 3
337 |
338 |
339 | def test_wlrdf_kernel_matrix():
340 | rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
341 | triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
342 | wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1', 'B1'], 4)
343 |
344 | kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, ['A1', 'B1'])
345 |
346 | assert len(kernel_matrix) == len(kernel_matrix[0]) == 2
347 | assert kernel_matrix[0][1] == wlkernel.wlrdf_kernel(
348 | wlrdf_graph, 'A1', 'B1'
349 | )
350 | assert kernel_matrix[1][0] == wlkernel.wlrdf_kernel(
351 | wlrdf_graph, 'A1', 'B1'
352 | )
353 |
--------------------------------------------------------------------------------
/wlkernel/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/wlkernel/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/wlkernel/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/wlkernel/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/wlkernel/.idea/wlkernel.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/wlkernel/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 | 1559639650080
105 |
106 |
107 | 1559639650080
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
--------------------------------------------------------------------------------
/wlkernel/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1'
2 |
3 |
4 | from ._wlkernel import (
5 | Node,
6 | Edge,
7 | WLGraph,
8 | wl_relabel,
9 | wl_kernel,
10 | wl_kernel_matrix,
11 | WLRDFGraph,
12 | wlrdf_kernel,
13 | wlrdf_kernel_matrix,
14 | )
15 |
--------------------------------------------------------------------------------
/wlkernel/_wlkernel.py:
--------------------------------------------------------------------------------
1 | from typing import (
2 | List,
3 | Dict,
4 | Tuple,
5 | Iterable,
6 | Union,
7 | Set,
8 | )
9 | from collections import Counter
10 | from itertools import chain
11 |
12 | from nptyping import Array
13 | import numpy as np
14 |
15 |
16 | class Node:
17 | 'A node of a Weisfeiler-Lehman RDF graph'
18 |
19 | def __init__(self):
20 | self.neighbors = set()
21 |
22 | def add_neighbor(self, edge):
23 | self.neighbors.add(edge)
24 |
25 | def __hash__(self):
26 | return hash(id(self))
27 |
28 |
29 | class Edge:
30 | 'An edge of a Weisfeiler-Lehman RDF graph'
31 |
32 | def __init__(self):
33 | self.neighbor = None
34 |
35 | def __hash__(self):
36 | return hash(id(self))
37 |
38 |
39 | class WLGraph:
40 | 'Standard Weisfeiler-Lehman graph with directed labeled edges'
41 |
42 | def __init__(self, triples: Iterable[Tuple[str, str, str]],
43 | instance: str, max_depth: int):
44 | 'Build a Weisfeiler-Lehman graph from a list of RDF triples'
45 | triples = list(triples)
46 | self.max_depth = max_depth
47 | self.nodes: Set[Node] = set()
48 | self.edges: Set[Edge] = set()
49 | self.labels: List[Dict[Union[Node, Edge], str]] = [dict()]
50 |
51 | v_map: Dict[str, Node] = dict()
52 | e_map: Dict[Tuple[str, str, str], Edge] = dict()
53 |
54 | root = Node()
55 | self.nodes.add(root)
56 | self.labels[0][root] = 'root'
57 | v_map[instance] = root
58 |
59 | search_front = {instance}
60 | for j in reversed(range(0, max_depth)):
61 | new_search_front = set()
62 | for r in search_front:
63 | r_triples = [(s, p, o) for s, p, o in triples if s == r]
64 | for sub, pred, obj in r_triples:
65 | new_search_front.add(obj)
66 |
67 | if obj not in v_map:
68 | v = Node()
69 | self.nodes.add(v)
70 | v_map[obj] = v
71 | self.labels[0][v_map[obj]] = obj
72 |
73 | t = (sub, pred, obj)
74 | if t not in e_map:
75 | e = Edge()
76 | self.edges.add(e)
77 | e_map[t] = e
78 | self.labels[0][e_map[t]] = pred
79 |
80 | v_map[obj].add_neighbor(e_map[t])
81 | e_map[t].neighbor = v_map[sub]
82 |
83 | search_front = new_search_front
84 |
85 |
86 | def wl_relabel(wl_graphs: Iterable[WLGraph], iterations: int = 1):
87 | 'Relabeling algorithm'
88 |
89 | wl_graphs = list(wl_graphs)
90 |
91 | assert len(set(len(wl_graph.labels) for wl_graph in wl_graphs))
92 | m = len(wl_graphs[0].labels)
93 | for i in range(m, m + iterations):
94 |
95 | # 1. Multiset-label determination
96 | multisets_list: List[Dict[Union[Node, Edge], List[str]]] = [
97 | dict() for _ in range(len(wl_graphs))
98 | ]
99 | for wl_graph, multisets in zip(wl_graphs, multisets_list):
100 | for v in wl_graph.nodes:
101 | if v in wl_graph.labels[0]:
102 | multisets[v] = [
103 | wl_graph.labels[i - 1][u] for u in v.neighbors
104 | if u in wl_graph.labels[i - 1]
105 | ]
106 | for e in wl_graph.edges:
107 | if e in wl_graph.labels[0]:
108 | multisets[e] = [ wl_graph.labels[i - 1][e.neighbor] ]
109 |
110 | # 2. Sorting each multiset
111 | expanded_labels_list: List[Dict[Union[Node, Edge], str]] = [
112 | dict() for _ in range(len(wl_graphs))
113 | ]
114 | for wl_graph, multisets, expanded_labels in zip(wl_graphs,
115 | multisets_list,
116 | expanded_labels_list):
117 | for k, multiset in multisets.items():
118 | expanded_labels[k] = (
119 | wl_graph.labels[i - 1][k] + ''.join(sorted(multiset))
120 | )
121 |
122 | # 3. Label compression
123 | total_label_set = (
124 | set(chain.from_iterable(e.values() for e in expanded_labels_list))
125 | )
126 | f = {
127 | old_label: str(compressed_label)
128 | for compressed_label, old_label in enumerate(total_label_set)
129 | }
130 |
131 |
132 | # 4. Relabeling
133 | for wl_graph, expanded_labels in zip(wl_graphs, expanded_labels_list):
134 | wl_graph.labels.append({
135 | k: f[expanded_labels[k]] for k in expanded_labels
136 | })
137 |
138 |
139 | def wl_kernel(wl_graph_1: WLGraph, wl_graph_2: WLGraph,
140 | iterations: int = 0) -> float:
141 | 'Compute the Weisfeiler-Lehman kernel for two WLGraphs'
142 |
143 | assert len(wl_graph_1.labels) == len(wl_graph_2.labels)
144 | m = len(wl_graph_1.labels)
145 | if iterations > m - 1:
146 | wl_relabel([wl_graph_1, wl_graph_2], iterations - m + 1)
147 |
148 | kernel = 0.0
149 | for it in range(iterations + 1):
150 | node_labels_1 = [
151 | wl_graph_1.labels[it][node] for node in wl_graph_1.nodes
152 | ]
153 | node_labels_2 = [
154 | wl_graph_2.labels[it][node] for node in wl_graph_2.nodes
155 | ]
156 | edge_labels_1 = [
157 | wl_graph_1.labels[it][edge] for edge in wl_graph_1.edges
158 | ]
159 | edge_labels_2 = [
160 | wl_graph_2.labels[it][edge] for edge in wl_graph_2.edges
161 | ]
162 | cc_nodes = count_commons(node_labels_1, node_labels_2)
163 | cc_edges = count_commons(edge_labels_1, edge_labels_2)
164 | w = (it + 1) / (iterations + 1)
165 | kernel += w * (cc_nodes + cc_edges)
166 | return kernel
167 |
168 |
169 | def wl_kernel_matrix(wl_graphs: Iterable[WLGraph],
170 | iterations: int = 0) -> List[List[float]]:
171 | 'Compute the matrix of the kernel values between each couple of WLGraphs'
172 | wl_graphs = list(wl_graphs)
173 |
174 | m = len(wl_graphs[0].labels)
175 | if iterations > m - 1:
176 | wl_relabel(wl_graphs, iterations - m + 1)
177 |
178 | n = len(wl_graphs)
179 | kernel_matrix = [[0.0]*n for _ in range(n)]
180 | for i in range(n):
181 | for j in range(i, n):
182 | kernel_matrix[i][j] = wl_kernel(
183 | wl_graphs[i], wl_graphs[j], iterations
184 | )
185 | for i in range(n):
186 | for j in range(0, i):
187 | kernel_matrix[i][j] = kernel_matrix[j][i]
188 | return kernel_matrix
189 |
190 |
191 | class WLRDFGraph:
192 | 'Weisfeiler-Lehman RDF graph'
193 |
194 | def __init__(self, triples: Iterable[Tuple[str, str, str]],
195 | instances: Iterable[str], max_depth: int):
196 | 'Build a Weisfeiler-Lehman RDF graph from a list of RDF triples'
197 | triples = list(triples)
198 | self.max_depth = max_depth
199 | self.nodes: Set[Node] = set()
200 | self.edges: Set[Edge] = set()
201 | self.labels: List[Dict[Tuple[Union[Node, Edge], int], str]] = [dict()]
202 | self.instance_nodes: Dict[str, Dict[Node, int]] = {
203 | instance: dict() for instance in instances
204 | }
205 | self.instance_edges: Dict[str, Dict[Edge, int]] = {
206 | instance: dict() for instance in instances
207 | }
208 |
209 | v_map: Dict[str, Node] = dict()
210 | e_map: Dict[Tuple[str, str, str], Edge] = dict()
211 |
212 | # 1. Initialization
213 | for instance in instances:
214 | root = Node()
215 | self.nodes.add(root)
216 | self.labels[0][(root, max_depth)] = 'root'
217 | v_map[instance] = root
218 |
219 | # 2. Subgraph Extraction
220 | for instance in instances:
221 | search_front = {instance}
222 | for j in reversed(range(0, max_depth)):
223 | new_search_front = set()
224 | for r in search_front:
225 | r_triples = ((s, p, o) for s, p, o in triples if s == r)
226 | for sub, pred, obj in r_triples:
227 | new_search_front.add(obj)
228 |
229 | if obj not in v_map:
230 | v = Node()
231 | self.nodes.add(v)
232 | v_map[obj] = v
233 | self.labels[0][(v_map[obj], j)] = obj
234 | if v_map[obj] not in self.instance_nodes[instance]:
235 | self.instance_nodes[instance][v_map[obj]] = j
236 |
237 | t = (sub, pred, obj)
238 | if t not in e_map:
239 | e = Edge()
240 | self.edges.add(e)
241 | e_map[t] = e
242 | self.labels[0][e_map[t], j] = pred
243 | if e_map[t] not in self.instance_edges[instance]:
244 | self.instance_edges[instance][e_map[t]] = j
245 |
246 | v_map[obj].add_neighbor(e_map[t])
247 | e_map[t].neighbor = v_map[sub]
248 |
249 | search_front = new_search_front
250 |
251 |
252 |
253 | def relabel(self, iterations: int = 1):
254 | 'Relabeling algorithm'
255 |
256 | for i in range(len(self.labels), len(self.labels) + iterations):
257 |
258 | multisets: Dict[Tuple[Union[Node, Edge], int], List[str]] = dict()
259 |
260 | # 1. Multiset-label determination
261 | for v in self.nodes:
262 | for j in range(self.max_depth + 1):
263 | if (v, j) in self.labels[0]:
264 | multisets[(v, j)] = [
265 | self.labels[i - 1][(u, j)] for u in v.neighbors
266 | if (u, j) in self.labels[i - 1]
267 | ]
268 | for e in self.edges:
269 | for j in range(self.max_depth):
270 | if (e, j) in self.labels[0]:
271 | multisets[(e, j)] = [
272 | self.labels[i - 1][(e.neighbor, j + 1)]
273 | ]
274 |
275 | # 2. Sorting each multiset
276 | expanded_labels = {
277 | (k, j): self.labels[i - 1][(k, j)] + ''.join(sorted(multiset))
278 | for (k, j), multiset in multisets.items()
279 | }
280 |
281 | # 3. Label compression
282 | f = {
283 | s: str(i)
284 | for i, s in enumerate(set(expanded_labels.values()))
285 | }
286 |
287 | # 4. Relabeling
288 | self.labels.append({
289 | (k, j): f[expanded_labels[(k, j)]]
290 | for (k, j) in expanded_labels
291 | })
292 |
293 |
294 | def count_commons(a: Iterable, b: Iterable) -> int:
295 | 'Return the number of common elements in the two iterables'
296 | uniques = set(a).intersection(set(b))
297 | counter_a = Counter(a)
298 | counter_b = Counter(b)
299 | commons = 0
300 | for u in uniques:
301 | commons += counter_a[u] * counter_b[u]
302 | return commons
303 |
304 |
305 | def wlrdf_kernel(graph: WLRDFGraph, instance_1: str, instance_2: str,
306 | iterations: int = 0) -> float:
307 | 'Compute the Weisfeiler-Lehman kernel for two instances'
308 |
309 | if iterations > len(graph.labels) - 1:
310 | graph.relabel(iterations - len(graph.labels) + 1)
311 |
312 | kernel = 0.0
313 | for it in range(iterations + 1):
314 | node_labels_1 = [
315 | graph.labels[it][(v, d)]
316 | for v, d in graph.instance_nodes[instance_1].items()
317 | ]
318 | node_labels_2 = [
319 | graph.labels[it][(v, d)]
320 | for v, d in graph.instance_nodes[instance_2].items()
321 | ]
322 | edge_labels_1 = [
323 | graph.labels[it][(e, d)]
324 | for e, d in graph.instance_edges[instance_1].items()
325 | ]
326 | edge_labels_2 = [
327 | graph.labels[it][(e, d)]
328 | for e, d in graph.instance_edges[instance_2].items()
329 | ]
330 | cc_nodes = count_commons(node_labels_1, node_labels_2)
331 | cc_edges = count_commons(edge_labels_1, edge_labels_2)
332 | w = (it + 1) / (iterations + 1)
333 | kernel += w * (cc_nodes + cc_edges)
334 | return kernel
335 |
336 |
337 | def wlrdf_kernel_matrix(graph: WLRDFGraph, instances: List[str],
338 | iterations: int = 0) -> Array[float]:
339 | 'Compute the matrix of the kernel values between each couple of instances'
340 | n = len(instances)
341 | kernel_matrix = np.zeros((n, n))
342 | for i in range(n):
343 | for j in range(i, n):
344 | kernel_matrix[i][j] = wlrdf_kernel(
345 | graph, instances[i], instances[j], iterations
346 | )
347 | for i in range(n):
348 | for j in range(0, i):
349 | kernel_matrix[i][j] = kernel_matrix[j][i]
350 | return kernel_matrix
351 |
352 |
353 | def kernel_normalization(kernel_matrix: Array[float]) -> Array[float]:
354 | n = kernel_matrix.shape[0]
355 | res = np.zeros((n, n))
356 | assert kernel_matrix.shape[1] == n
357 | for i in range(n):
358 | for j in range(n):
359 | res[i][j] = kernel_matrix[i][j] / np.sqrt(
360 | kernel_matrix[i][i] * kernel_matrix[j][j]
361 | )
362 | return res
363 |
--------------------------------------------------------------------------------