├── .gitignore
├── .idea
    ├── encodings.xml
    ├── misc.xml
    ├── modules.xml
    ├── vcs.xml
    └── wl-graph-kernels.iml
├── LICENSE
├── README.md
├── data
    ├── .gitkeep
    ├── Lexicon_NamedRockUnit.nt
    └── download_datasets.sh
├── example_graphs
    ├── 07-Graph.dot
    ├── 07-Graph.pdf
    ├── 07-almost_relabeled.dot
    ├── 07-almost_relabeled.pdf
    ├── 07-relabeled.dot
    ├── 07-relabeled.pdf
    ├── 07-relabeled_vertical.dot
    ├── 07-relabeled_vertical.pdf
    ├── 07-subGraph_A1_B1.dot
    ├── 07-subGraph_A1_B1.pdf
    ├── 07-subGraph_A1_B1_vertical.dot
    ├── 07-subGraph_A1_B1_vertical.pdf
    └── \
├── notebooks
    ├── affiliation_scores.ipynb
    ├── affiliation_timing.ipynb
    ├── lithogenesis_scores.ipynb
    ├── lithogenesis_timing.ipynb
    └── no_labels_scores.ipynb
├── presentation
    ├── img
    │   ├── 07-Graph.pdf
    │   ├── 07-almost_relabeled.pdf
    │   ├── 07-relabeled.pdf
    │   ├── 07-relabeled_vertical.pdf
    │   ├── 07-subGraph_A1_B1.pdf
    │   ├── 07-subGraph_A1_B1_vertical.pdf
    │   ├── wl_iteration_total.png
    │   └── wl_iteration_upper.png
    ├── presentation.nav
    ├── presentation.pdf
    └── presentation.tex
├── report
    ├── RefereeReport.pdf
    ├── RefereeReport.tex
    └── img
    │   ├── affiliation_timing.png
    │   └── lithogenesis_timing.png
├── requirements.txt
├── results
    ├── affiliation_timing.png
    ├── csv_to_latex.py
    ├── lithogenesis_timing.png
    ├── wl_affiliation_results.csv
    ├── wl_affiliation_results_with_normalization.csv
    ├── wl_affiliation_results_with_normalization.tex
    ├── wl_lithogenesis_results_with_normalization.csv
    ├── wl_lithogenesis_results_with_normalization.tex
    ├── wl_no_labels.csv
    ├── wl_no_labels.tex
    ├── wlrdf_affiliation_results.csv
    ├── wlrdf_affiliation_results_with_normalization.csv
    ├── wlrdf_affiliation_results_with_normalization.tex
    ├── wlrdf_lithogenesis_results.csv
    ├── wlrdf_lithogenesis_results_with_normalization.csv
    ├── wlrdf_lithogenesis_results_with_normalization.tex
    ├── wlrdf_no_labels.csv
    └── wlrdf_no_labels.tex
├── setup.py
├── tests
    ├── __init__.py
    ├── resources
    │   ├── __init__.py
    │   └── example.ttl
    └── wlkernel_test.py
└── wlkernel
    ├── .idea
        ├── encodings.xml
        ├── misc.xml
        ├── modules.xml
        ├── vcs.xml
        ├── wlkernel.iml
        └── workspace.xml
    ├── __init__.py
    └── _wlkernel.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | */aifbfixed_complete.n3
  2 | */Lexicon_NamedRockUnit.nt
  3 | ./Report/*.aux
  4 | ./Report/*.out
  5 | ./Report/*.synctex.gz
  6 | ./Report/*.err
  7 | ./Report/*.log
  8 | # Created by https://www.gitignore.io/api/python,pycharm,jupyternotebook,jupyternotebooks
  9 | # Edit at https://www.gitignore.io/?templates=python,pycharm,jupyternotebook,jupyternotebooks
 10 | 
 11 | ### JupyterNotebook ###
 12 | .ipynb_checkpoints
 13 | */.ipynb_checkpoints/*
 14 | 
 15 | # Remove previous ipynb_checkpoints
 16 | #   git rm -r .ipynb_checkpoints/
 17 | #
 18 | 
 19 | ### JupyterNotebooks ###
 20 | # gitignore template for Jupyter Notebooks
 21 | # website: http://jupyter.org/
 22 | 
 23 | 
 24 | # Remove previous ipynb_checkpoints
 25 | #   git rm -r .ipynb_checkpoints/
 26 | 
 27 | ### PyCharm ###
 28 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
 29 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 30 | 
 31 | # User-specific stuff
 32 | .idea/**/workspace.xml
 33 | .idea/**/tasks.xml
 34 | .idea/**/usage.statistics.xml
 35 | .idea/**/dictionaries
 36 | .idea/**/shelf
 37 | 
 38 | # Generated files
 39 | .idea/**/contentModel.xml
 40 | 
 41 | # Sensitive or high-churn files
 42 | .idea/**/dataSources/
 43 | .idea/**/dataSources.ids
 44 | .idea/**/dataSources.local.xml
 45 | .idea/**/sqlDataSources.xml
 46 | .idea/**/dynamic.xml
 47 | .idea/**/uiDesigner.xml
 48 | .idea/**/dbnavigator.xml
 49 | 
 50 | # Gradle
 51 | .idea/**/gradle.xml
 52 | .idea/**/libraries
 53 | 
 54 | # Gradle and Maven with auto-import
 55 | # When using Gradle or Maven with auto-import, you should exclude module files,
 56 | # since they will be recreated, and may cause churn.  Uncomment if using
 57 | # auto-import.
 58 | # .idea/modules.xml
 59 | # .idea/*.iml
 60 | # .idea/modules
 61 | 
 62 | # CMake
 63 | cmake-build-*/
 64 | 
 65 | # Mongo Explorer plugin
 66 | .idea/**/mongoSettings.xml
 67 | 
 68 | # File-based project format
 69 | *.iws
 70 | 
 71 | # IntelliJ
 72 | out/
 73 | 
 74 | # mpeltonen/sbt-idea plugin
 75 | .idea_modules/
 76 | 
 77 | # JIRA plugin
 78 | atlassian-ide-plugin.xml
 79 | 
 80 | # Cursive Clojure plugin
 81 | .idea/replstate.xml
 82 | 
 83 | # Crashlytics plugin (for Android Studio and IntelliJ)
 84 | com_crashlytics_export_strings.xml
 85 | crashlytics.properties
 86 | crashlytics-build.properties
 87 | fabric.properties
 88 | 
 89 | # Editor-based Rest Client
 90 | .idea/httpRequests
 91 | 
 92 | # Android studio 3.1+ serialized cache file
 93 | .idea/caches/build_file_checksums.ser
 94 | 
 95 | # JetBrains templates
 96 | **___jb_tmp___
 97 | 
 98 | ### PyCharm Patch ###
 99 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
100 | 
101 | # *.iml
102 | # modules.xml
103 | # .idea/misc.xml
104 | # *.ipr
105 | 
106 | # Sonarlint plugin
107 | .idea/sonarlint
108 | 
109 | ### Python ###
110 | # Byte-compiled / optimized / DLL files
111 | __pycache__/
112 | *.py[cod]
113 | *$py.class
114 | 
115 | # C extensions
116 | *.so
117 | 
118 | # Distribution / packaging
119 | .Python
120 | build/
121 | develop-eggs/
122 | dist/
123 | downloads/
124 | eggs/
125 | .eggs/
126 | lib/
127 | lib64/
128 | parts/
129 | sdist/
130 | var/
131 | wheels/
132 | pip-wheel-metadata/
133 | share/python-wheels/
134 | *.egg-info/
135 | .installed.cfg
136 | *.egg
137 | MANIFEST
138 | 
139 | # PyInstaller
140 | #  Usually these files are written by a python script from a template
141 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
142 | *.manifest
143 | *.spec
144 | 
145 | # Installer logs
146 | pip-log.txt
147 | pip-delete-this-directory.txt
148 | 
149 | # Unit test / coverage reports
150 | htmlcov/
151 | .tox/
152 | .nox/
153 | .coverage
154 | .coverage.*
155 | .cache
156 | nosetests.xml
157 | coverage.xml
158 | *.cover
159 | .hypothesis/
160 | .pytest_cache/
161 | 
162 | # Translations
163 | *.mo
164 | *.pot
165 | 
166 | # Django stuff:
167 | *.log
168 | local_settings.py
169 | db.sqlite3
170 | 
171 | # Flask stuff:
172 | instance/
173 | .webassets-cache
174 | 
175 | # Scrapy stuff:
176 | .scrapy
177 | 
178 | # Sphinx documentation
179 | docs/_build/
180 | 
181 | # PyBuilder
182 | target/
183 | 
184 | # Jupyter Notebook
185 | 
186 | # IPython
187 | profile_default/
188 | ipython_config.py
189 | 
190 | # pyenv
191 | .python-version
192 | 
193 | # pipenv
194 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
195 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
196 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
197 | #   install all needed dependencies.
198 | #Pipfile.lock
199 | 
200 | # celery beat schedule file
201 | celerybeat-schedule
202 | 
203 | # SageMath parsed files
204 | *.sage.py
205 | 
206 | # Environments
207 | .env
208 | .venv
209 | env/
210 | venv/
211 | ENV/
212 | env.bak/
213 | venv.bak/
214 | 
215 | # Spyder project settings
216 | .spyderproject
217 | .spyproject
218 | 
219 | # Rope project settings
220 | .ropeproject
221 | 
222 | # mkdocs documentation
223 | /site
224 | 
225 | # mypy
226 | .mypy_cache/
227 | .dmypy.json
228 | dmypy.json
229 | 
230 | # Pyre type checker
231 | .pyre/
232 | 
233 | # End of https://www.gitignore.io/api/python,pycharm,jupyternotebook,jupyternotebooks
234 | 
235 | *.aux
236 | *.log
237 | *.out
238 | *synctex.gz
239 | *.toc
240 | *.vrb
241 | *.snm
242 | 


--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="Encoding" addBOMForNewFiles="with NO BOM" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="JavaScriptSettings">
4 |     <option name="languageLevel" value="ES6" />
5 |   </component>
6 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (py37)" project-jdk-type="Python SDK" />
7 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/wl-graph-kernels.iml" filepath="$PROJECT_DIR$/.idea/wl-graph-kernels.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/wl-graph-kernels.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.7 (py37)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 lorenzo palloni
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Weisfeiler-Lehman Graph Kernels
 2 | 
 3 | ## Installation
 4 | 
 5 | Python >= 3.6 is supported.
 6 | 
 7 |     $ git clone https://github.com/deeplego/wl-graph-kernels.git
 8 |     $ cd wl-graph-kernels
 9 |     $ pip install -r requirements.txt
10 |     $ pip install .
11 | 
12 | ## Usage
13 | 
14 | To download the datasets of the experiments:
15 | 
16 |     $ cd data
17 |     $ ./download_datasets.sh
18 | 
19 | The experiments are replicated in the jupyter notebooks in the `notebooks`
20 | directory.
21 | 


--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/data/.gitkeep


--------------------------------------------------------------------------------
/data/download_datasets.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [[ $(basename $(pwd)) != "data" ]]
 4 | then
 5 |     echo "This script must be run from ./data folder."
 6 |     exit 1
 7 | fi
 8 | 
 9 | if [[ ! -e "./aifbfixed_complete.n3" ]]
10 | then
11 |     echo ">>> Downloading aifbfixed_complete.n3"
12 |     wget -q https://ndownloader.figshare.com/files/1118822 
13 |     mv 1118822 aifbfixed_complete.n3
14 | fi
15 | 
16 | if [[ ! -e "./Lexicon_NamedRockUnit.nt" ]]
17 | then
18 |     echo ">>> Downloading Lexicon_NamedRockUnit.nt"
19 |     wget -q http://data.bgs.ac.uk/downloads/Lexicon_NamedRockUnit.nt
20 | fi
21 | 
22 | exit 0
23 | 


--------------------------------------------------------------------------------
/example_graphs/07-Graph.dot:
--------------------------------------------------------------------------------
 1 | digraph G {
 2 |     rankdir = LR;
 3 |     color = "blue2";
 4 |     node [color = "blue2", fontcolor = "blue2", style = "bold"];
 5 |     edge [fontsize = 12, style = "bold"];
 6 |     
 7 |     A [label = "class A", style = "filled", color = "lightgrey"]
 8 |     B [label = "class B", style = "filled", color = "lightgrey"]
 9 | 
10 |     A1 -> A [label = "P1"]
11 |     A2 -> A [label = "P1"]
12 |     B2 -> B [label = "P1"]
13 |     B1 -> B [label = "P1"]
14 | 
15 |     A1 -> C [label = "P2"]
16 |     A1 -> D [label = "P3"]
17 |     A2 -> D [label = "P2"]
18 |     A2 -> E [label = "P3"]
19 |     B2 -> E [label = "P3"]
20 |     B2 -> F [label = "P2"]
21 |     B1 -> F [label = "P3"]
22 |     B1 -> G [label = "P2"]
23 | 
24 |     C -> H [label = "P4"] 
25 |     D -> H [label = "P4"]
26 |     F -> I [label = "P5"]
27 |     G -> I [label = "P5"]
28 | 
29 |     H -> A2 [label = "P6"]
30 |     I -> B2 [label = "P6"]
31 |     
32 |     {rank = min; A; B;}
33 |     {rank = same; A1; A2; B2; B1;}
34 |     {rank = same; C; D; E; F; G;}
35 |     {rank = max; H; I;}
36 | }
37 | 


--------------------------------------------------------------------------------
/example_graphs/07-Graph.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-Graph.pdf


--------------------------------------------------------------------------------
/example_graphs/07-almost_relabeled.dot:
--------------------------------------------------------------------------------
 1 | digraph G {
 2 |     
 3 |     newrank = true;
 4 |     rankdir = LR;
 5 |     node[style="bold"]
 6 |     edge[style="bold"]
 7 | 
 8 |     color = "aquamarine4";
 9 |     node [color = "aquamarine4", fontcolor = "aquamarine4"];
10 |     root [label="ϵ (A1)"];
11 |     right_root [label="ϵ (B1)"];
12 |     depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
13 |     {rank = same; depth_4_node; root; right_root};
14 |     
15 |     color = "blue4";
16 |     node [color = "blue4", fontcolor = "blue4"];
17 |     edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
18 |     C [label = "C,P2"];
19 |     D [label = "D,P3"];
20 |     root -> C [label = "P2,ϵ"];
21 |     root -> D [label = "P3,ϵ"];
22 |     right_F [label = "F,P3"];
23 |     right_G [label = "G,P2"];
24 |     right_root -> right_F [label = "P3,ϵ"];
25 |     right_root -> right_G [label = "P2,ϵ"];
26 |     depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
27 |     {rank = same; depth_3_node; C; D; right_F; right_G};
28 |     
29 |     color = "cyan4";
30 |     node [color = "cyan4", fontcolor = "cyan4"];
31 |     edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
32 |     H [label = "H,P4P4"];
33 |     C -> H [label = "P4,C"];
34 |     D -> H [label = "P4,D"];
35 |     right_I [label = "I,P5P5"];
36 |     right_F -> right_I [label = "P5,F"];
37 |     right_G -> right_I [label = "P5,G"];
38 |     depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
39 |     {rank = same; depth_2_node; H; right_I};
40 | 
41 |     
42 |     color = "darkorchid4";
43 |     node [color = "darkorchid4", fontcolor = "darkorchid4"];
44 |     edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
45 |     A2 [label = "A2,P6"]
46 |     H -> A2 [label = "P6,H"];
47 |     right_B2 [label = "B2,P6"];
48 |     right_I -> right_B2 [label = "P6,I"];
49 |     depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
50 |     {rank = same; depth_1_node; right_B2; A2};
51 | 
52 |     color = "green4";
53 |     node [color = "green4", fontcolor = "green4"];
54 |     edge [color = "green4", fontsize = 10, fontcolor = "green4"];
55 |     D_0 [label = "D,P2", style = "dotted"];
56 |     E_0 [label = "E,P3"];
57 |     F_0 [label = "F,P2", style = "dotted"];
58 |     A2 -> D_0 [label = "P2,A2"];
59 |     A2 -> E_0 [label = "P3,A2"];
60 |     right_B2 -> E_0 [label = "P3,B2"];
61 |     right_B2 -> F_0 [label = "P2,B2"];
62 | 
63 |     depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
64 |     {rank = same; depth_0_node; D_0; E_0; F_0};
65 | }
66 | 


--------------------------------------------------------------------------------
/example_graphs/07-almost_relabeled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-almost_relabeled.pdf


--------------------------------------------------------------------------------
/example_graphs/07-relabeled.dot:
--------------------------------------------------------------------------------
 1 | digraph G {
 2 |     
 3 |     newrank = true;
 4 |     rankdir = LR;
 5 |     node [style = "bold"]
 6 |     edge [style = "bold"]
 7 | 
 8 |     color = "aquamarin4";
 9 |     node [color = "aquamarine4", fontcolor = "aquamarine4"];
10 |     root [label = "ϵ (A1)", fontcolor = "aquamarine4"];
11 |     right_root [label = "ϵ (B1)", fontcolor = "aquamarine4"];
12 |     depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
13 |     {rank = same; depth_4_node; root; right_root};
14 |     
15 |     color = "blue4";
16 |     node [color = "blue4", fontcolor = "blue4"];
17 |     edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
18 |     C [label = "3"];
19 |     D [label = "4"];
20 |     root -> C [label = "1", fontcolor = "red2"];
21 |     root -> D [label = "2", fontcolor = "red2"];
22 |     right_F [label = "5"];
23 |     right_G [label = "6"];
24 |     right_root -> right_F [label = "2", fontcolor = "red2"];
25 |     right_root -> right_G [label = "1", fontcolor = "red2"];
26 |     depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
27 |     {rank = same; depth_3_node; C; D; right_F; right_G};
28 |     
29 |     color = "cyan4";
30 |     node [color = "cyan4", fontcolor = "cyan4"];
31 |     edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
32 |     H [label = "11"];
33 |     C -> H [label = "7"];
34 |     D -> H [label = "8"];
35 |     right_I [label = "12"];
36 |     right_F -> right_I [label = "9"];
37 |     right_G -> right_I [label = "10"];
38 |     depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
39 |     {rank = same; depth_2_node; H; right_I};
40 | 
41 |     
42 |     color = "darkorchid4";
43 |     node [color = "darkorchid4", fontcolor = "darkorchid4"];
44 |     edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
45 |     A2 [label = "15"]
46 |     H -> A2 [label = "13"];
47 |     right_B2 [label = "16"];
48 |     right_I -> right_B2 [label = "14"];
49 |     depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
50 |     {rank = same; depth_1_node; right_B2; A2};
51 | 
52 |     color = "green4";
53 |     node [color = "green4", fontcolor = "green4"];
54 |     edge [color = "green4", fontsize = 10, fontcolor = "green4"];
55 |     D_0 [label = "21", style = "dotted"];
56 |     E_0 [label = "22", fontcolor = "red2"];
57 |     F_0 [label = "23", style = "dotted"];
58 |     A2 -> D_0 [label = "17"];
59 |     A2 -> E_0 [label = "18"];
60 |     right_B2 -> E_0 [label = "19"];
61 |     right_B2 -> F_0 [label = "20"];
62 | 
63 |     depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
64 |     {rank = same; depth_0_node; D_0; E_0; F_0};
65 | }
66 | 


--------------------------------------------------------------------------------
/example_graphs/07-relabeled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-relabeled.pdf


--------------------------------------------------------------------------------
/example_graphs/07-relabeled_vertical.dot:
--------------------------------------------------------------------------------
 1 | digraph G {
 2 |     
 3 |     newrank = true;
 4 |     node [style = "bold"]
 5 |     edge [style = "bold"]
 6 | 
 7 |     color = "aquamarin4";
 8 |     node [color = "aquamarine4", fontcolor = "aquamarine4"];
 9 |     root [label = "ϵ", fontcolor = "aquamarine4"];
10 |     right_root [label = "ϵ", fontcolor = "aquamarine4"];
11 |     depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
12 |     {rank = same; depth_4_node; root; right_root};
13 |     
14 |     color = "blue4";
15 |     node [color = "blue4", fontcolor = "blue4"];
16 |     edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
17 |     C [label = "3"];
18 |     D [label = "4"];
19 |     root -> C [label = "1", fontcolor = "red2"];
20 |     root -> D [label = "2", fontcolor = "red2"];
21 |     right_F [label = "5"];
22 |     right_G [label = "6"];
23 |     right_root -> right_F [label = "2", fontcolor = "red2"];
24 |     right_root -> right_G [label = "1", fontcolor = "red2"];
25 |     depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
26 |     {rank = same; depth_3_node; C; D; right_F; right_G};
27 |     
28 |     color = "cyan4";
29 |     node [color = "cyan4", fontcolor = "cyan4"];
30 |     edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
31 |     H [label = "11"];
32 |     C -> H [label = "7"];
33 |     D -> H [label = "8"];
34 |     right_I [label = "12"];
35 |     right_F -> right_I [label = "9"];
36 |     right_G -> right_I [label = "10"];
37 |     depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
38 |     {rank = same; depth_2_node; H; right_I};
39 | 
40 |     
41 |     color = "darkorchid4";
42 |     node [color = "darkorchid4", fontcolor = "darkorchid4"];
43 |     edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
44 |     A2 [label = "15"]
45 |     H -> A2 [label = "13"];
46 |     right_B2 [label = "16"];
47 |     right_I -> right_B2 [label = "14"];
48 |     depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
49 |     {rank = same; depth_1_node; right_B2; A2};
50 | 
51 |     color = "green4";
52 |     node [color = "green4", fontcolor = "green4"];
53 |     edge [color = "green4", fontsize = 10, fontcolor = "green4"];
54 |     D_0 [label = "21", style = "dotted"];
55 |     E_0 [label = "22", fontcolor = "red2"];
56 |     F_0 [label = "23", style = "dotted"];
57 |     A2 -> D_0 [label = "17"];
58 |     A2 -> E_0 [label = "18"];
59 |     right_B2 -> E_0 [label = "19"];
60 |     right_B2 -> F_0 [label = "20"];
61 | 
62 |     depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
63 |     {rank = same; depth_0_node; D_0; E_0; F_0};
64 | }
65 | 


--------------------------------------------------------------------------------
/example_graphs/07-relabeled_vertical.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-relabeled_vertical.pdf


--------------------------------------------------------------------------------
/example_graphs/07-subGraph_A1_B1.dot:
--------------------------------------------------------------------------------
 1 | digraph G {
 2 |     
 3 |     newrank = true;
 4 |     rankdir = LR;
 5 |     node[style = "bold"]
 6 |     edge[style = "bold"]
 7 |     color = "aquamarine4";
 8 |     node [color = "aquamarine4", fontcolor = "aquamarine4"];
 9 |     root [label="ϵ (A1)"];
10 |     o_root [label="ϵ (B1)"];
11 |     depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
12 |     {rank = same; depth_4_node; root; o_root};
13 |     
14 |     color = "blue4";
15 |     node [color = "blue4", fontcolor = "blue4"];
16 |     edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
17 |     root -> C [label = "P2", fontcolor = "red2"];
18 |     root -> D [label = "P3", fontcolor = "red2"];
19 |     o_F [label = "F"];
20 |     o_G [label = "G"];
21 |     o_root -> o_F [label = "P3", fontcolor = "red2"];
22 |     o_root -> o_G [label = "P2", fontcolor = "red2"];
23 |     depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
24 |     {rank = same; depth_3_node; C; D; o_F; o_G};
25 |     
26 |     color = "cyan4";
27 |     node [color = "cyan4", fontcolor = "cyan4"];
28 |     edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
29 |     C -> H [label = "P4"];
30 |     D -> H [label = "P4"];
31 |     o_I [label = "I"];
32 |     o_F -> o_I [label = "P5"];
33 |     o_G -> o_I [label = "P5"];
34 |     depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
35 |     {rank = same; depth_2_node; H; o_I};
36 | 
37 |     
38 |     color = "darkorchid4";
39 |     node [color = "darkorchid4", fontcolor = "darkorchid4"];
40 |     edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
41 |     H -> A2 [label = "P6", fontcolor = "red2"];
42 |     o_B2 [label = "B2"];
43 |     o_I -> o_B2 [label = "P6", fontcolor = "red2"];
44 |     depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
45 |     {rank = same; depth_1_node; o_B2; A2};
46 | 
47 |     color = "green4";
48 |     node [color = "green4", fontcolor = "green4"];
49 |     edge [color = "green4", fontsize = 10, fontcolor = "green4"];
50 |     D_0 [label = "D", style = "dotted"];
51 |     E_0 [label = "E", fontcolor = "red2"];
52 |     F_0 [label = "F", style = "dotted"];
53 |     A2 -> D_0 [label = "P2", fontcolor = "red2"];
54 |     A2 -> E_0 [label = "P3", fontcolor = "red2"];
55 |     o_B2 -> E_0 [label = "P3", fontcolor = "red2"];
56 |     o_B2 -> F_0 [label = "P2", fontcolor = "red2"];
57 | 
58 |     depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
59 |     {rank = same; depth_0_node; D_0; E_0; F_0};
60 | }
61 | 


--------------------------------------------------------------------------------
/example_graphs/07-subGraph_A1_B1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-subGraph_A1_B1.pdf


--------------------------------------------------------------------------------
/example_graphs/07-subGraph_A1_B1_vertical.dot:
--------------------------------------------------------------------------------
 1 | digraph G {
 2 |     
 3 |     newrank = true;
 4 |     node[style = "bold"]
 5 |     edge[style = "bold"]
 6 |     color = "aquamarine4";
 7 |     node [color = "aquamarine4", fontcolor = "aquamarine4"];
 8 |     root [label="ϵ"];
 9 |     o_root [label="ϵ"];
10 |     depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
11 |     {rank = same; depth_4_node; root; o_root};
12 |     
13 |     color = "blue4";
14 |     node [color = "blue4", fontcolor = "blue4"];
15 |     edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
16 |     root -> C [label = "P2", fontcolor = "red2"];
17 |     root -> D [label = "P3", fontcolor = "red2"];
18 |     o_F [label = "F"];
19 |     o_G [label = "G"];
20 |     o_root -> o_F [label = "P3", fontcolor = "red2"];
21 |     o_root -> o_G [label = "P2", fontcolor = "red2"];
22 |     depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
23 |     {rank = same; depth_3_node; C; D; o_F; o_G};
24 |     
25 |     color = "cyan4";
26 |     node [color = "cyan4", fontcolor = "cyan4"];
27 |     edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
28 |     C -> H [label = "P4"];
29 |     D -> H [label = "P4"];
30 |     o_I [label = "I"];
31 |     o_F -> o_I [label = "P5"];
32 |     o_G -> o_I [label = "P5"];
33 |     depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
34 |     {rank = same; depth_2_node; H; o_I};
35 | 
36 |     
37 |     color = "darkorchid4";
38 |     node [color = "darkorchid4", fontcolor = "darkorchid4"];
39 |     edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
40 |     H -> A2 [label = "P6", fontcolor = "red2"];
41 |     o_B2 [label = "B2"];
42 |     o_I -> o_B2 [label = "P6", fontcolor = "red2"];
43 |     depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
44 |     {rank = same; depth_1_node; o_B2; A2};
45 | 
46 |     color = "green4";
47 |     node [color = "green4", fontcolor = "green4"];
48 |     edge [color = "green4", fontsize = 10, fontcolor = "green4"];
49 |     D_0 [label = "D", style = "dotted"];
50 |     E_0 [label = "E", fontcolor = "red2"];
51 |     F_0 [label = "F", style = "dotted"];
52 |     A2 -> D_0 [label = "P2", fontcolor = "red2"];
53 |     A2 -> E_0 [label = "P3", fontcolor = "red2"];
54 |     o_B2 -> E_0 [label = "P3", fontcolor = "red2"];
55 |     o_B2 -> F_0 [label = "P2", fontcolor = "red2"];
56 | 
57 |     depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
58 |     {rank = same; depth_0_node; D_0; E_0; F_0};
59 | }
60 | 


--------------------------------------------------------------------------------
/example_graphs/07-subGraph_A1_B1_vertical.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/example_graphs/07-subGraph_A1_B1_vertical.pdf


--------------------------------------------------------------------------------
/example_graphs/\:
--------------------------------------------------------------------------------
 1 | digraph G {
 2 |     
 3 |     newrank = true;
 4 |     rankdir = LR;
 5 |     node[style="bold"]
 6 |     edge[style="bold"]
 7 | 
 8 |     color = "aquamarine4";
 9 |     node [color = "aquamarine4", fontcolor = "aquamarine4"];
10 |     root [label="ϵ (A1)"];
11 |     right_root [label="ϵ (B1)"];
12 |     depth_4_node[label = "d = 4", shape = plaintext, fontcolor = "aquamarine4"];
13 |     {rank = same; depth_4_node; root; right_root};
14 |     
15 |     color = "blue4";
16 |     node [color = "blue4", fontcolor = "blue4"];
17 |     edge [color = "blue4", fontsize = 10, fontcolor = "blue4"];
18 |     C [label = "C,P2"];
19 |     D [label = "D,P3"];
20 |     root -> C [label = "P2,ϵ"];
21 |     root -> D [label = "P3,ϵ"];
22 |     right_F [label = "F,P3"];
23 |     right_G [label = "G,P2"];
24 |     right_root -> right_F [label = "P3,ϵ"];
25 |     right_root -> right_G [label = "P2,ϵ"];
26 |     depth_3_node[label = "d = 3", shape = plaintext, fontcolor = "blue4"];
27 |     {rank = same; depth_3_node; C; D; right_F; right_G};
28 |     
29 |     color = "cyan4";
30 |     node [color = "cyan4", fontcolor = "cyan4"];
31 |     edge [color = "cyan4", fontsize = 10, fontcolor = "cyan4"];
32 |     H [label = "H,P4P4"];
33 |     C -> H [label = "P4,C"];
34 |     D -> H [label = "P4,D"];
35 |     right_I [label = "I,P5P5"];
36 |     right_F -> right_I [label = "P5,F"];
37 |     right_G -> right_I [label = "P5,G"];
38 |     depth_2_node[label = "d = 2", shape = plaintext, fontcolor = "cyan4"];
39 |     {rank = same; depth_2_node; H; right_I};
40 | 
41 |     
42 |     color = "darkorchid4";
43 |     node [color = "darkorchid4", fontcolor = "darkorchid4"];
44 |     edge [color = "darkorchid4", fontsize = 10, fontcolor = "darkorchid4"];
45 |     A2 [label = "A2,P6"]
46 |     H -> A2 [label = "P6,H"];
47 |     right_B2 [label = "B2,P6"];
48 |     right_I -> right_B2 [label = "P6,I"];
49 |     depth_1_node[label = "d = 1", shape = plaintext, fontcolor = "darkorchid4"];
50 |     {rank = same; depth_1_node; right_B2; A2};
51 | 
52 |     color = "green4";
53 |     node [color = "green4", fontcolor = "green4"];
54 |     edge [color = "green4", fontsize = 10, fontcolor = "green4"];
55 |     D_0 [label = "D,P2", style = "dotted"];
56 |     E_0 [label = "E,P3"];
57 |     F_0 [label = "F,P2", style = "dotted"];
58 |     A2 -> D_0 [label = "P2,A2"];
59 |     A2 -> E_0 [label = "P3,A2"];
60 |     right_B2 -> E_0 [label = "P3,B2"];
61 |     right_B2 -> F_0 [label = "P2,B2"];
62 | 
63 |     depth_0_node[label = "d = 0", shape = plaintext, fontcolor = "green4"];
64 |     {rank = same; depth_0_node; D_0; E_0; F_0};
65 | }
66 | 


--------------------------------------------------------------------------------
/notebooks/affiliation_scores.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import sys\n",
 10 |     "sys.path.insert(0, '../')"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from collections import Counter, OrderedDict\n",
 20 |     "import warnings\n",
 21 |     "\n",
 22 |     "import rdflib\n",
 23 |     "import numpy as np\n",
 24 |     "import pandas as pd\n",
 25 |     "from pprint import pprint\n",
 26 |     "from sklearn import svm\n",
 27 |     "from sklearn.model_selection import cross_validate\n",
 28 |     "\n",
 29 |     "import wlkernel"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "warnings.simplefilter('ignore')"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 4,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "rdf_graph = rdflib.Graph().parse('../data/aifbfixed_complete.n3', format='n3')"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 5,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "Most common classes with predicate equal to 'affiliation':\n",
 60 |       "[('http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id1instance',\n",
 61 |       "  73),\n",
 62 |       " ('http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id3instance',\n",
 63 |       "  60),\n",
 64 |       " ('http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id2instance',\n",
 65 |       "  28),\n",
 66 |       " ('http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id4instance',\n",
 67 |       "  16),\n",
 68 |       " ('http://www.aifb.uni-karlsruhe.de/Forschungsgruppen/viewForschungsgruppeOWL/id5instance',\n",
 69 |       "  1)]\n"
 70 |      ]
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "affiliation_most_common = Counter(\n",
 75 |     "    str(o) \n",
 76 |     "    for s, p, o in rdf_graph\n",
 77 |     "    if 'affiliation' in str(p)\n",
 78 |     ").most_common()\n",
 79 |     "print(\"Most common classes with predicate equal to 'affiliation':\")\n",
 80 |     "pprint(affiliation_most_common)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 6,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "instances_class_map = {\n",
 90 |     "    str(s): str(o) for s, p, o in rdf_graph \n",
 91 |     "    if 'affiliation' in str(p)\n",
 92 |     "    and 'id5instance' not in str(o)\n",
 93 |     "}\n",
 94 |     "instances = list(instances_class_map.keys())\n",
 95 |     "y = list(instances_class_map.values())"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 7,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "number of triples: 28699\n"
108 |      ]
109 |     }
110 |    ],
111 |    "source": [
112 |     "triples = list(\n",
113 |     "    (str(s), str(p), str(o)) for s, p, o in rdf_graph\n",
114 |     "    if 'affiliation' not in str(p)\n",
115 |     "    and 'employs' not in str(p)\n",
116 |     "    and 'member' not in str(p)\n",
117 |     "    and 'head' not in str(p)\n",
118 |     ")\n",
119 |     "print('number of triples:', len(triples))"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 44,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=1)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 45,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "for i in range(len(wlrdf_graph.labels)):\n",
138 |     "    for k in wlrdf_graph.labels[i].keys():\n",
139 |     "        wlrdf_graph.labels[i][k] = 'banana'"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 48,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=0)\n",
149 |     "kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "### Weisfeiler-Lehman RDF"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 11,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "RANDOM_STATE = 42\n",
166 |     "\n",
167 |     "depth_values = [1, 2, 3]\n",
168 |     "iteration_values =  [0, 2, 4, 6]\n",
169 |     "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
170 |     "\n",
171 |     "results = OrderedDict()\n",
172 |     "\n",
173 |     "for d in depth_values:\n",
174 |     "    for it in iteration_values:\n",
175 |     "        wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=d)\n",
176 |     "        kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=it)\n",
177 |     "        kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
178 |     "        \n",
179 |     "        results[(d, it)] = [0, 0, 0]\n",
180 |     "        for c in C_values:\n",
181 |     "            classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
182 |     "            scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
183 |     "            \n",
184 |     "            acc_mean = scores['test_accuracy'].mean()\n",
185 |     "            f1_mean = scores['test_f1_macro'].mean()\n",
186 |     "            \n",
187 |     "            if acc_mean > results[(d, it)][0]:\n",
188 |     "                results[(d, it)] = [acc_mean, f1_mean, c]"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 12,
194 |    "metadata": {},
195 |    "outputs": [
196 |     {
197 |      "data": {
198 |       "text/html": [
199 |        "<div>\n",
200 |        "<style scoped>\n",
201 |        "    .dataframe tbody tr th:only-of-type {\n",
202 |        "        vertical-align: middle;\n",
203 |        "    }\n",
204 |        "\n",
205 |        "    .dataframe tbody tr th {\n",
206 |        "        vertical-align: top;\n",
207 |        "    }\n",
208 |        "\n",
209 |        "    .dataframe thead th {\n",
210 |        "        text-align: right;\n",
211 |        "    }\n",
212 |        "</style>\n",
213 |        "<table border=\"1\" class=\"dataframe\">\n",
214 |        "  <thead>\n",
215 |        "    <tr style=\"text-align: right;\">\n",
216 |        "      <th></th>\n",
217 |        "      <th></th>\n",
218 |        "      <th>accuracy</th>\n",
219 |        "      <th>f1</th>\n",
220 |        "      <th>C</th>\n",
221 |        "    </tr>\n",
222 |        "    <tr>\n",
223 |        "      <th>depth</th>\n",
224 |        "      <th>iterations</th>\n",
225 |        "      <th></th>\n",
226 |        "      <th></th>\n",
227 |        "      <th></th>\n",
228 |        "    </tr>\n",
229 |        "  </thead>\n",
230 |        "  <tbody>\n",
231 |        "    <tr>\n",
232 |        "      <th rowspan=\"4\" valign=\"top\">1</th>\n",
233 |        "      <th>0</th>\n",
234 |        "      <td>0.881955</td>\n",
235 |        "      <td>0.795756</td>\n",
236 |        "      <td>100.0</td>\n",
237 |        "    </tr>\n",
238 |        "    <tr>\n",
239 |        "      <th>2</th>\n",
240 |        "      <td>0.881955</td>\n",
241 |        "      <td>0.795756</td>\n",
242 |        "      <td>100.0</td>\n",
243 |        "    </tr>\n",
244 |        "    <tr>\n",
245 |        "      <th>4</th>\n",
246 |        "      <td>0.881955</td>\n",
247 |        "      <td>0.795756</td>\n",
248 |        "      <td>100.0</td>\n",
249 |        "    </tr>\n",
250 |        "    <tr>\n",
251 |        "      <th>6</th>\n",
252 |        "      <td>0.881955</td>\n",
253 |        "      <td>0.795756</td>\n",
254 |        "      <td>100.0</td>\n",
255 |        "    </tr>\n",
256 |        "    <tr>\n",
257 |        "      <th rowspan=\"4\" valign=\"top\">2</th>\n",
258 |        "      <th>0</th>\n",
259 |        "      <td>0.892114</td>\n",
260 |        "      <td>0.826007</td>\n",
261 |        "      <td>100.0</td>\n",
262 |        "    </tr>\n",
263 |        "    <tr>\n",
264 |        "      <th>2</th>\n",
265 |        "      <td>0.880057</td>\n",
266 |        "      <td>0.812488</td>\n",
267 |        "      <td>100.0</td>\n",
268 |        "    </tr>\n",
269 |        "    <tr>\n",
270 |        "      <th>4</th>\n",
271 |        "      <td>0.874501</td>\n",
272 |        "      <td>0.803701</td>\n",
273 |        "      <td>100.0</td>\n",
274 |        "    </tr>\n",
275 |        "    <tr>\n",
276 |        "      <th>6</th>\n",
277 |        "      <td>0.874501</td>\n",
278 |        "      <td>0.800821</td>\n",
279 |        "      <td>100.0</td>\n",
280 |        "    </tr>\n",
281 |        "    <tr>\n",
282 |        "      <th rowspan=\"4\" valign=\"top\">3</th>\n",
283 |        "      <th>0</th>\n",
284 |        "      <td>0.879579</td>\n",
285 |        "      <td>0.812187</td>\n",
286 |        "      <td>100.0</td>\n",
287 |        "    </tr>\n",
288 |        "    <tr>\n",
289 |        "      <th>2</th>\n",
290 |        "      <td>0.913751</td>\n",
291 |        "      <td>0.867388</td>\n",
292 |        "      <td>100.0</td>\n",
293 |        "    </tr>\n",
294 |        "    <tr>\n",
295 |        "      <th>4</th>\n",
296 |        "      <td>0.908196</td>\n",
297 |        "      <td>0.863829</td>\n",
298 |        "      <td>100.0</td>\n",
299 |        "    </tr>\n",
300 |        "    <tr>\n",
301 |        "      <th>6</th>\n",
302 |        "      <td>0.908196</td>\n",
303 |        "      <td>0.863829</td>\n",
304 |        "      <td>100.0</td>\n",
305 |        "    </tr>\n",
306 |        "  </tbody>\n",
307 |        "</table>\n",
308 |        "</div>"
309 |       ],
310 |       "text/plain": [
311 |        "                  accuracy        f1      C\n",
312 |        "depth iterations                           \n",
313 |        "1     0           0.881955  0.795756  100.0\n",
314 |        "      2           0.881955  0.795756  100.0\n",
315 |        "      4           0.881955  0.795756  100.0\n",
316 |        "      6           0.881955  0.795756  100.0\n",
317 |        "2     0           0.892114  0.826007  100.0\n",
318 |        "      2           0.880057  0.812488  100.0\n",
319 |        "      4           0.874501  0.803701  100.0\n",
320 |        "      6           0.874501  0.800821  100.0\n",
321 |        "3     0           0.879579  0.812187  100.0\n",
322 |        "      2           0.913751  0.867388  100.0\n",
323 |        "      4           0.908196  0.863829  100.0\n",
324 |        "      6           0.908196  0.863829  100.0"
325 |       ]
326 |      },
327 |      "execution_count": 12,
328 |      "metadata": {},
329 |      "output_type": "execute_result"
330 |     }
331 |    ],
332 |    "source": [
333 |     "fn = 'wlrdf_affiliation_results_with_normalization'\n",
334 |     "\n",
335 |     "df_res = pd.DataFrame(index=list(results.keys()))\n",
336 |     "df_res['accuracy'] = [t[0] for t in results.values()]\n",
337 |     "df_res['f1'] = [t[1] for t in results.values()]\n",
338 |     "df_res['C'] = [t[2] for t in results.values()]\n",
339 |     "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
340 |     "df_res.to_csv(f'../results/{fn}.csv')\n",
341 |     "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
342 |     "df_res_test.to_html(f'../results/{fn}.html')\n",
343 |     "df_res_test"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {},
349 |    "source": [
350 |     "### Weisfeiler-Lehman"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": 13,
356 |    "metadata": {},
357 |    "outputs": [],
358 |    "source": [
359 |     "RANDOM_STATE = 42\n",
360 |     "\n",
361 |     "depth_values = [1, 2, 3]\n",
362 |     "iteration_values =  [0, 2, 4, 6]\n",
363 |     "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
364 |     "\n",
365 |     "results = OrderedDict()\n",
366 |     "\n",
367 |     "for d in depth_values:\n",
368 |     "    for it in iteration_values:\n",
369 |     "        wl_graphs = [wlkernel.WLGraph(triples, instance, max_depth=d) for instance in instances]\n",
370 |     "        kernel_matrix = wlkernel.wl_kernel_matrix(wl_graphs, iterations=it)\n",
371 |     "        kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
372 |     "        \n",
373 |     "        results[(d, it)] = [0, 0, 0]\n",
374 |     "        for c in C_values:\n",
375 |     "            classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
376 |     "            scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
377 |     "            \n",
378 |     "            acc_mean = scores['test_accuracy'].mean()\n",
379 |     "            f1_mean = scores['test_f1_macro'].mean()\n",
380 |     "            \n",
381 |     "            if acc_mean > results[(d, it)][0]:\n",
382 |     "                results[(d, it)] = [acc_mean, f1_mean, c]"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": 14,
388 |    "metadata": {},
389 |    "outputs": [
390 |     {
391 |      "data": {
392 |       "text/html": [
393 |        "<div>\n",
394 |        "<style scoped>\n",
395 |        "    .dataframe tbody tr th:only-of-type {\n",
396 |        "        vertical-align: middle;\n",
397 |        "    }\n",
398 |        "\n",
399 |        "    .dataframe tbody tr th {\n",
400 |        "        vertical-align: top;\n",
401 |        "    }\n",
402 |        "\n",
403 |        "    .dataframe thead th {\n",
404 |        "        text-align: right;\n",
405 |        "    }\n",
406 |        "</style>\n",
407 |        "<table border=\"1\" class=\"dataframe\">\n",
408 |        "  <thead>\n",
409 |        "    <tr style=\"text-align: right;\">\n",
410 |        "      <th></th>\n",
411 |        "      <th></th>\n",
412 |        "      <th>accuracy</th>\n",
413 |        "      <th>f1</th>\n",
414 |        "      <th>C</th>\n",
415 |        "    </tr>\n",
416 |        "    <tr>\n",
417 |        "      <th>depth</th>\n",
418 |        "      <th>iterations</th>\n",
419 |        "      <th></th>\n",
420 |        "      <th></th>\n",
421 |        "      <th></th>\n",
422 |        "    </tr>\n",
423 |        "  </thead>\n",
424 |        "  <tbody>\n",
425 |        "    <tr>\n",
426 |        "      <th rowspan=\"4\" valign=\"top\">1</th>\n",
427 |        "      <th>0</th>\n",
428 |        "      <td>0.881955</td>\n",
429 |        "      <td>0.795756</td>\n",
430 |        "      <td>100.0</td>\n",
431 |        "    </tr>\n",
432 |        "    <tr>\n",
433 |        "      <th>2</th>\n",
434 |        "      <td>0.868761</td>\n",
435 |        "      <td>0.788673</td>\n",
436 |        "      <td>100.0</td>\n",
437 |        "    </tr>\n",
438 |        "    <tr>\n",
439 |        "      <th>4</th>\n",
440 |        "      <td>0.868761</td>\n",
441 |        "      <td>0.788673</td>\n",
442 |        "      <td>100.0</td>\n",
443 |        "    </tr>\n",
444 |        "    <tr>\n",
445 |        "      <th>6</th>\n",
446 |        "      <td>0.868761</td>\n",
447 |        "      <td>0.788673</td>\n",
448 |        "      <td>100.0</td>\n",
449 |        "    </tr>\n",
450 |        "    <tr>\n",
451 |        "      <th rowspan=\"4\" valign=\"top\">2</th>\n",
452 |        "      <th>0</th>\n",
453 |        "      <td>0.886851</td>\n",
454 |        "      <td>0.819787</td>\n",
455 |        "      <td>100.0</td>\n",
456 |        "    </tr>\n",
457 |        "    <tr>\n",
458 |        "      <th>2</th>\n",
459 |        "      <td>0.858127</td>\n",
460 |        "      <td>0.781563</td>\n",
461 |        "      <td>100.0</td>\n",
462 |        "    </tr>\n",
463 |        "    <tr>\n",
464 |        "      <th>4</th>\n",
465 |        "      <td>0.770446</td>\n",
466 |        "      <td>0.604246</td>\n",
467 |        "      <td>100.0</td>\n",
468 |        "    </tr>\n",
469 |        "    <tr>\n",
470 |        "      <th>6</th>\n",
471 |        "      <td>0.752758</td>\n",
472 |        "      <td>0.579145</td>\n",
473 |        "      <td>100.0</td>\n",
474 |        "    </tr>\n",
475 |        "    <tr>\n",
476 |        "      <th rowspan=\"4\" valign=\"top\">3</th>\n",
477 |        "      <th>0</th>\n",
478 |        "      <td>0.884843</td>\n",
479 |        "      <td>0.818408</td>\n",
480 |        "      <td>100.0</td>\n",
481 |        "    </tr>\n",
482 |        "    <tr>\n",
483 |        "      <th>2</th>\n",
484 |        "      <td>0.890800</td>\n",
485 |        "      <td>0.824622</td>\n",
486 |        "      <td>100.0</td>\n",
487 |        "    </tr>\n",
488 |        "    <tr>\n",
489 |        "      <th>4</th>\n",
490 |        "      <td>0.897343</td>\n",
491 |        "      <td>0.840694</td>\n",
492 |        "      <td>100.0</td>\n",
493 |        "    </tr>\n",
494 |        "    <tr>\n",
495 |        "      <th>6</th>\n",
496 |        "      <td>0.896356</td>\n",
497 |        "      <td>0.821343</td>\n",
498 |        "      <td>100.0</td>\n",
499 |        "    </tr>\n",
500 |        "  </tbody>\n",
501 |        "</table>\n",
502 |        "</div>"
503 |       ],
504 |       "text/plain": [
505 |        "                  accuracy        f1      C\n",
506 |        "depth iterations                           \n",
507 |        "1     0           0.881955  0.795756  100.0\n",
508 |        "      2           0.868761  0.788673  100.0\n",
509 |        "      4           0.868761  0.788673  100.0\n",
510 |        "      6           0.868761  0.788673  100.0\n",
511 |        "2     0           0.886851  0.819787  100.0\n",
512 |        "      2           0.858127  0.781563  100.0\n",
513 |        "      4           0.770446  0.604246  100.0\n",
514 |        "      6           0.752758  0.579145  100.0\n",
515 |        "3     0           0.884843  0.818408  100.0\n",
516 |        "      2           0.890800  0.824622  100.0\n",
517 |        "      4           0.897343  0.840694  100.0\n",
518 |        "      6           0.896356  0.821343  100.0"
519 |       ]
520 |      },
521 |      "execution_count": 14,
522 |      "metadata": {},
523 |      "output_type": "execute_result"
524 |     }
525 |    ],
526 |    "source": [
527 |     "fn = 'wl_affiliation_results_with_normalization'\n",
528 |     "\n",
529 |     "df_res = pd.DataFrame(index=list(results.keys()))\n",
530 |     "df_res['accuracy'] = [t[0] for t in results.values()]\n",
531 |     "df_res['f1'] = [t[1] for t in results.values()]\n",
532 |     "df_res['C'] = [t[2] for t in results.values()]\n",
533 |     "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
534 |     "df_res.to_csv(f'../results/{fn}.csv')\n",
535 |     "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
536 |     "df_res_test.to_html(f'../results/{fn}.html')\n",
537 |     "df_res_test"
538 |    ]
539 |   }
540 |  ],
541 |  "metadata": {
542 |   "kernelspec": {
543 |    "display_name": "Python 3",
544 |    "language": "python",
545 |    "name": "python3"
546 |   },
547 |   "language_info": {
548 |    "codemirror_mode": {
549 |     "name": "ipython",
550 |     "version": 3
551 |    },
552 |    "file_extension": ".py",
553 |    "mimetype": "text/x-python",
554 |    "name": "python",
555 |    "nbconvert_exporter": "python",
556 |    "pygments_lexer": "ipython3",
557 |    "version": "3.7.3"
558 |   }
559 |  },
560 |  "nbformat": 4,
561 |  "nbformat_minor": 2
562 | }
563 | 


--------------------------------------------------------------------------------
/notebooks/affiliation_timing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import sys\n",
 10 |     "sys.path.insert(0, '../')"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from collections import Counter\n",
 20 |     "import time\n",
 21 |     "import random\n",
 22 |     "\n",
 23 |     "import rdflib\n",
 24 |     "import numpy as np\n",
 25 |     "from pprint import pprint\n",
 26 |     "from sklearn import svm\n",
 27 |     "\n",
 28 |     "import wlkernel"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 3,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "rdf_graph = rdflib.Graph().parse('../data/aifbfixed_complete.n3', format='n3')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 4,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "all_triples = [\n",
 47 |     "    (str(subj), str(pred), str(obj))\n",
 48 |     "     for subj, pred, obj in rdf_graph\n",
 49 |     "]"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 5,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "quantiles = np.linspace(0.1, 1, 10)  # [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]\n",
 59 |     "results_wlrdf = []\n",
 60 |     "results_wl = []\n",
 61 |     "n = len(all_triples)\n",
 62 |     "RANDOM_STATE = 42"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 6,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "for q in quantiles:\n",
 72 |     "    n_sub = int(n * q)\n",
 73 |     "    random.seed(RANDOM_STATE)\n",
 74 |     "    triples = random.sample(all_triples, n_sub)\n",
 75 |     "    \n",
 76 |     "    instances_class_map = {\n",
 77 |     "        subj: obj\n",
 78 |     "        for subj, pred, obj in triples\n",
 79 |     "        if 'affiliation' in pred\n",
 80 |     "        and 'id5instance' not in obj\n",
 81 |     "    }\n",
 82 |     "    instances = list(instances_class_map.keys())\n",
 83 |     "    y = list(instances_class_map.values())\n",
 84 |     "    \n",
 85 |     "    triples = [\n",
 86 |     "        (subj, pred, obj)\n",
 87 |     "        for subj, pred, obj in triples\n",
 88 |     "        if 'affiliation' not in pred\n",
 89 |     "        and 'employs' not in pred\n",
 90 |     "        and 'member' not in pred\n",
 91 |     "        and 'head' not in pred\n",
 92 |     "    ]\n",
 93 |     "    t0 = time.time()\n",
 94 |     "    wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=3)\n",
 95 |     "    kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=0)\n",
 96 |     "    t1 = time.time()\n",
 97 |     "\n",
 98 |     "    results_wlrdf.append(t1 - t0)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 7,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "rdf_graph = rdflib.Graph().parse('../data/aifbfixed_complete.n3', format='n3')"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 8,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "all_triples = [\n",
117 |     "    (str(subj), str(pred), str(obj))\n",
118 |     "     for subj, pred, obj in rdf_graph\n",
119 |     "]"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 9,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "for q in quantiles:\n",
129 |     "    n_sub = int(n * q)\n",
130 |     "    random.seed(RANDOM_STATE)\n",
131 |     "    triples = random.sample(all_triples, n_sub)\n",
132 |     "    \n",
133 |     "    instances_class_map = {\n",
134 |     "        subj: obj\n",
135 |     "        for subj, pred, obj in triples\n",
136 |     "        if 'affiliation' in pred\n",
137 |     "        and 'id5instance' not in obj\n",
138 |     "    }\n",
139 |     "    instances = list(instances_class_map.keys())\n",
140 |     "    y = list(instances_class_map.values())\n",
141 |     "    \n",
142 |     "    triples = [\n",
143 |     "        (subj, pred, obj)\n",
144 |     "        for subj, pred, obj in triples\n",
145 |     "        if 'affiliation' not in pred\n",
146 |     "        and 'employs' not in pred\n",
147 |     "        and 'member' not in pred\n",
148 |     "        and 'head' not in pred\n",
149 |     "    ]\n",
150 |     "    t0 = time.time()\n",
151 |     "    wl_graphs = [wlkernel.WLGraph(triples, instance, max_depth=3) for instance in instances]\n",
152 |     "    kernel_matrix = wlkernel.wl_kernel_matrix(wl_graphs, iterations=0)\n",
153 |     "    t1 = time.time()\n",
154 |     "\n",
155 |     "    results_wl.append(t1 - t0)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 10,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "import matplotlib.pyplot as plt\n",
165 |     "\n",
166 |     "class Result:\n",
167 |     "    def __init__(self, values = None, color = 'red', name = ''):\n",
168 |     "        if values is not None:\n",
169 |     "            self.values = values\n",
170 |     "        self.color = color\n",
171 |     "        self.name = name\n",
172 |     "\n",
173 |     "x = quantiles\n",
174 |     "y = Result(results_wlrdf, 'orange', 'WL RDF')\n",
175 |     "y1 = Result(results_wl, 'purple', 'WL')\n",
176 |     "n = len(x)\n",
177 |     "\n",
178 |     "fig, ax = plt.subplots(figsize=(15, 8))\n",
179 |     "for i in range(n - 1):\n",
180 |     "    plt.plot(x[i: i+2], y.values[i: i+2],\n",
181 |     "             'o-', color=y.color, markersize=8)\n",
182 |     "    plt.plot(x[i: i+2], y1.values[i: i+2],\n",
183 |     "             'o-', color= y1.color, markersize=8)\n",
184 |     "\n",
185 |     "ax.xaxis.label.set_text('fraction of the dataset')\n",
186 |     "ax.yaxis.label.set_text('runnning time (s)')\n",
187 |     "\n",
188 |     "custom_lines = [plt.Line2D([0], [0], color=y.color, lw=4),\n",
189 |     "                plt.Line2D([0], [0], color=y1.color, lw=4)]\n",
190 |     "ax.legend(custom_lines, [y.name, y1.name])\n",
191 |     "plt.savefig('../results/affiliation_timing.png', format='png')"
192 |    ]
193 |   }
194 |  ],
195 |  "metadata": {
196 |   "kernelspec": {
197 |    "display_name": "Python 3",
198 |    "language": "python",
199 |    "name": "python3"
200 |   },
201 |   "language_info": {
202 |    "codemirror_mode": {
203 |     "name": "ipython",
204 |     "version": 3
205 |    },
206 |    "file_extension": ".py",
207 |    "mimetype": "text/x-python",
208 |    "name": "python",
209 |    "nbconvert_exporter": "python",
210 |    "pygments_lexer": "ipython3",
211 |    "version": "3.7.3"
212 |   }
213 |  },
214 |  "nbformat": 4,
215 |  "nbformat_minor": 2
216 | }
217 | 


--------------------------------------------------------------------------------
/notebooks/lithogenesis_scores.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import sys\n",
 10 |     "sys.path.insert(0, '../')"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from collections import Counter, OrderedDict\n",
 20 |     "import warnings\n",
 21 |     "\n",
 22 |     "import rdflib\n",
 23 |     "import numpy as np\n",
 24 |     "from pprint import pprint\n",
 25 |     "from sklearn import svm\n",
 26 |     "from sklearn.model_selection import cross_validate\n",
 27 |     "\n",
 28 |     "import wlkernel"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 3,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "warnings.simplefilter('ignore')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 4,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "rdf_graph = rdflib.Graph().parse('../data/Lexicon_NamedRockUnit.nt', format='nt')"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 5,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "Most common classes with predicate equal to 'hasLithogenesis':\n",
 59 |       "[('http://data.bgs.ac.uk/id/Lexicon/LithogeneticType/FLUV', 93),\n",
 60 |       " ('http://data.bgs.ac.uk/id/Lexicon/LithogeneticType/GLACI', 53)]\n"
 61 |      ]
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "hasLithogenesis_most_common = Counter(\n",
 66 |     "    str(o)\n",
 67 |     "    for s, p, o in rdf_graph\n",
 68 |     "    if 'hasLithogenesis' in str(p)\n",
 69 |     ").most_common(2)\n",
 70 |     "print(\"Most common classes with predicate equal to 'hasLithogenesis':\")\n",
 71 |     "pprint(hasLithogenesis_most_common)\n",
 72 |     "classes = { c for c, _ in hasLithogenesis_most_common }"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 6,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "instances_class_map = {\n",
 82 |     "    str(s): str(o)\n",
 83 |     "    for s, p, o in rdf_graph\n",
 84 |     "    if str(o) in classes\n",
 85 |     "}\n",
 86 |     "assert len(instances_class_map) == 146\n",
 87 |     "instances = list(instances_class_map.keys())\n",
 88 |     "assert len(instances) == len(set(instances))\n",
 89 |     "y = np.array(list(instances_class_map.values()))"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 7,
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "number of tripes:  313901\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "triples = list(\n",
107 |     "    (str(s), str(p), str(o))\n",
108 |     "    for s, p, o in rdf_graph\n",
109 |     "    if 'hasLithogenesis' not in str(p)\n",
110 |     ")\n",
111 |     "print('number of tripes: ', len(triples))"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 9,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "RANDOM_STATE = 42\n",
121 |     "\n",
122 |     "depth_values = [1, 2, 3]\n",
123 |     "iteration_values =  [0, 2, 4, 6]\n",
124 |     "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
125 |     "\n",
126 |     "results = OrderedDict()\n",
127 |     "\n",
128 |     "for d in depth_values:\n",
129 |     "    for it in iteration_values:\n",
130 |     "        wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=d)\n",
131 |     "        kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=it)\n",
132 |     "        kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
133 |     "        \n",
134 |     "        results[(d, it)] = [0, 0, 0]\n",
135 |     "        for c in C_values:\n",
136 |     "            classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
137 |     "            scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
138 |     "            \n",
139 |     "            acc_mean = scores['test_accuracy'].mean()\n",
140 |     "            f1_mean = scores['test_f1_macro'].mean()\n",
141 |     "            \n",
142 |     "            if acc_mean > results[(d, it)][0]:\n",
143 |     "                results[(d, it)] = [acc_mean, f1_mean, c]"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 11,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "data": {
153 |       "text/html": [
154 |        "<div>\n",
155 |        "<style scoped>\n",
156 |        "    .dataframe tbody tr th:only-of-type {\n",
157 |        "        vertical-align: middle;\n",
158 |        "    }\n",
159 |        "\n",
160 |        "    .dataframe tbody tr th {\n",
161 |        "        vertical-align: top;\n",
162 |        "    }\n",
163 |        "\n",
164 |        "    .dataframe thead th {\n",
165 |        "        text-align: right;\n",
166 |        "    }\n",
167 |        "</style>\n",
168 |        "<table border=\"1\" class=\"dataframe\">\n",
169 |        "  <thead>\n",
170 |        "    <tr style=\"text-align: right;\">\n",
171 |        "      <th></th>\n",
172 |        "      <th></th>\n",
173 |        "      <th>accuracy</th>\n",
174 |        "      <th>f1</th>\n",
175 |        "      <th>C</th>\n",
176 |        "    </tr>\n",
177 |        "    <tr>\n",
178 |        "      <th>depth</th>\n",
179 |        "      <th>iterations</th>\n",
180 |        "      <th></th>\n",
181 |        "      <th></th>\n",
182 |        "      <th></th>\n",
183 |        "    </tr>\n",
184 |        "  </thead>\n",
185 |        "  <tbody>\n",
186 |        "    <tr>\n",
187 |        "      <th rowspan=\"4\" valign=\"top\">1</th>\n",
188 |        "      <th>0</th>\n",
189 |        "      <td>0.795536</td>\n",
190 |        "      <td>0.763739</td>\n",
191 |        "      <td>10.0</td>\n",
192 |        "    </tr>\n",
193 |        "    <tr>\n",
194 |        "      <th>2</th>\n",
195 |        "      <td>0.795536</td>\n",
196 |        "      <td>0.763739</td>\n",
197 |        "      <td>10.0</td>\n",
198 |        "    </tr>\n",
199 |        "    <tr>\n",
200 |        "      <th>4</th>\n",
201 |        "      <td>0.795536</td>\n",
202 |        "      <td>0.763739</td>\n",
203 |        "      <td>10.0</td>\n",
204 |        "    </tr>\n",
205 |        "    <tr>\n",
206 |        "      <th>6</th>\n",
207 |        "      <td>0.795536</td>\n",
208 |        "      <td>0.763739</td>\n",
209 |        "      <td>10.0</td>\n",
210 |        "    </tr>\n",
211 |        "    <tr>\n",
212 |        "      <th rowspan=\"4\" valign=\"top\">2</th>\n",
213 |        "      <th>0</th>\n",
214 |        "      <td>0.906250</td>\n",
215 |        "      <td>0.891229</td>\n",
216 |        "      <td>100.0</td>\n",
217 |        "    </tr>\n",
218 |        "    <tr>\n",
219 |        "      <th>2</th>\n",
220 |        "      <td>0.892857</td>\n",
221 |        "      <td>0.874092</td>\n",
222 |        "      <td>1.0</td>\n",
223 |        "    </tr>\n",
224 |        "    <tr>\n",
225 |        "      <th>4</th>\n",
226 |        "      <td>0.892857</td>\n",
227 |        "      <td>0.874092</td>\n",
228 |        "      <td>1.0</td>\n",
229 |        "    </tr>\n",
230 |        "    <tr>\n",
231 |        "      <th>6</th>\n",
232 |        "      <td>0.885714</td>\n",
233 |        "      <td>0.866606</td>\n",
234 |        "      <td>1.0</td>\n",
235 |        "    </tr>\n",
236 |        "    <tr>\n",
237 |        "      <th rowspan=\"4\" valign=\"top\">3</th>\n",
238 |        "      <th>0</th>\n",
239 |        "      <td>0.891071</td>\n",
240 |        "      <td>0.875862</td>\n",
241 |        "      <td>100.0</td>\n",
242 |        "    </tr>\n",
243 |        "    <tr>\n",
244 |        "      <th>2</th>\n",
245 |        "      <td>0.891964</td>\n",
246 |        "      <td>0.873422</td>\n",
247 |        "      <td>1.0</td>\n",
248 |        "    </tr>\n",
249 |        "    <tr>\n",
250 |        "      <th>4</th>\n",
251 |        "      <td>0.906250</td>\n",
252 |        "      <td>0.890104</td>\n",
253 |        "      <td>1.0</td>\n",
254 |        "    </tr>\n",
255 |        "    <tr>\n",
256 |        "      <th>6</th>\n",
257 |        "      <td>0.907143</td>\n",
258 |        "      <td>0.888829</td>\n",
259 |        "      <td>1.0</td>\n",
260 |        "    </tr>\n",
261 |        "  </tbody>\n",
262 |        "</table>\n",
263 |        "</div>"
264 |       ],
265 |       "text/plain": [
266 |        "                  accuracy        f1      C\n",
267 |        "depth iterations                           \n",
268 |        "1     0           0.795536  0.763739   10.0\n",
269 |        "      2           0.795536  0.763739   10.0\n",
270 |        "      4           0.795536  0.763739   10.0\n",
271 |        "      6           0.795536  0.763739   10.0\n",
272 |        "2     0           0.906250  0.891229  100.0\n",
273 |        "      2           0.892857  0.874092    1.0\n",
274 |        "      4           0.892857  0.874092    1.0\n",
275 |        "      6           0.885714  0.866606    1.0\n",
276 |        "3     0           0.891071  0.875862  100.0\n",
277 |        "      2           0.891964  0.873422    1.0\n",
278 |        "      4           0.906250  0.890104    1.0\n",
279 |        "      6           0.907143  0.888829    1.0"
280 |       ]
281 |      },
282 |      "execution_count": 11,
283 |      "metadata": {},
284 |      "output_type": "execute_result"
285 |     }
286 |    ],
287 |    "source": [
288 |     "import pandas as pd\n",
289 |     "\n",
290 |     "fn = 'wlrdf_lithogenesis_results_with_normalization'\n",
291 |     "\n",
292 |     "df_res = pd.DataFrame(index=list(results.keys()))\n",
293 |     "df_res['accuracy'] = [t[0] for t in results.values()]\n",
294 |     "df_res['f1'] = [t[1] for t in results.values()]\n",
295 |     "df_res['C'] = [t[2] for t in results.values()]\n",
296 |     "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
297 |     "df_res.to_csv(f'../results/{fn}.csv')\n",
298 |     "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
299 |     "df_res_test.to_html(f'../results/{fn}.html')\n",
300 |     "df_res_test"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": 13,
306 |    "metadata": {},
307 |    "outputs": [],
308 |    "source": [
309 |     "RANDOM_STATE = 42\n",
310 |     "\n",
311 |     "depth_values = [1, 2, 3]\n",
312 |     "iteration_values =  [0, 2, 4, 6]\n",
313 |     "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
314 |     "\n",
315 |     "results = OrderedDict()\n",
316 |     "\n",
317 |     "for d in depth_values:\n",
318 |     "    for it in iteration_values:\n",
319 |     "        wl_graphs = [wlkernel.WLGraph(triples, instance, max_depth=d) for instance in instances]\n",
320 |     "        kernel_matrix = wlkernel.wl_kernel_matrix(wl_graphs, iterations=it)\n",
321 |     "        kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
322 |     "        \n",
323 |     "        results[(d, it)] = [0, 0, 0]\n",
324 |     "        for c in C_values:\n",
325 |     "            classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
326 |     "            scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
327 |     "            \n",
328 |     "            acc_mean = scores['test_accuracy'].mean()\n",
329 |     "            f1_mean = scores['test_f1_macro'].mean()\n",
330 |     "            \n",
331 |     "            if acc_mean > results[(d, it)][0]:\n",
332 |     "                results[(d, it)] = [acc_mean, f1_mean, c]"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": 14,
338 |    "metadata": {},
339 |    "outputs": [
340 |     {
341 |      "data": {
342 |       "text/html": [
343 |        "<div>\n",
344 |        "<style scoped>\n",
345 |        "    .dataframe tbody tr th:only-of-type {\n",
346 |        "        vertical-align: middle;\n",
347 |        "    }\n",
348 |        "\n",
349 |        "    .dataframe tbody tr th {\n",
350 |        "        vertical-align: top;\n",
351 |        "    }\n",
352 |        "\n",
353 |        "    .dataframe thead th {\n",
354 |        "        text-align: right;\n",
355 |        "    }\n",
356 |        "</style>\n",
357 |        "<table border=\"1\" class=\"dataframe\">\n",
358 |        "  <thead>\n",
359 |        "    <tr style=\"text-align: right;\">\n",
360 |        "      <th></th>\n",
361 |        "      <th></th>\n",
362 |        "      <th>accuracy</th>\n",
363 |        "      <th>f1</th>\n",
364 |        "      <th>C</th>\n",
365 |        "    </tr>\n",
366 |        "    <tr>\n",
367 |        "      <th>depth</th>\n",
368 |        "      <th>iterations</th>\n",
369 |        "      <th></th>\n",
370 |        "      <th></th>\n",
371 |        "      <th></th>\n",
372 |        "    </tr>\n",
373 |        "  </thead>\n",
374 |        "  <tbody>\n",
375 |        "    <tr>\n",
376 |        "      <th rowspan=\"4\" valign=\"top\">1</th>\n",
377 |        "      <th>0</th>\n",
378 |        "      <td>0.802679</td>\n",
379 |        "      <td>0.774383</td>\n",
380 |        "      <td>10.0</td>\n",
381 |        "    </tr>\n",
382 |        "    <tr>\n",
383 |        "      <th>2</th>\n",
384 |        "      <td>0.796429</td>\n",
385 |        "      <td>0.768842</td>\n",
386 |        "      <td>10.0</td>\n",
387 |        "    </tr>\n",
388 |        "    <tr>\n",
389 |        "      <th>4</th>\n",
390 |        "      <td>0.796429</td>\n",
391 |        "      <td>0.768842</td>\n",
392 |        "      <td>10.0</td>\n",
393 |        "    </tr>\n",
394 |        "    <tr>\n",
395 |        "      <th>6</th>\n",
396 |        "      <td>0.796429</td>\n",
397 |        "      <td>0.768842</td>\n",
398 |        "      <td>10.0</td>\n",
399 |        "    </tr>\n",
400 |        "    <tr>\n",
401 |        "      <th rowspan=\"4\" valign=\"top\">2</th>\n",
402 |        "      <th>0</th>\n",
403 |        "      <td>0.891964</td>\n",
404 |        "      <td>0.877311</td>\n",
405 |        "      <td>100.0</td>\n",
406 |        "    </tr>\n",
407 |        "    <tr>\n",
408 |        "      <th>2</th>\n",
409 |        "      <td>0.892857</td>\n",
410 |        "      <td>0.874092</td>\n",
411 |        "      <td>1.0</td>\n",
412 |        "    </tr>\n",
413 |        "    <tr>\n",
414 |        "      <th>4</th>\n",
415 |        "      <td>0.873214</td>\n",
416 |        "      <td>0.854485</td>\n",
417 |        "      <td>1.0</td>\n",
418 |        "    </tr>\n",
419 |        "    <tr>\n",
420 |        "      <th>6</th>\n",
421 |        "      <td>0.865179</td>\n",
422 |        "      <td>0.841353</td>\n",
423 |        "      <td>1.0</td>\n",
424 |        "    </tr>\n",
425 |        "    <tr>\n",
426 |        "      <th rowspan=\"4\" valign=\"top\">3</th>\n",
427 |        "      <th>0</th>\n",
428 |        "      <td>0.883929</td>\n",
429 |        "      <td>0.871406</td>\n",
430 |        "      <td>100.0</td>\n",
431 |        "    </tr>\n",
432 |        "    <tr>\n",
433 |        "      <th>2</th>\n",
434 |        "      <td>0.913393</td>\n",
435 |        "      <td>0.898291</td>\n",
436 |        "      <td>1.0</td>\n",
437 |        "    </tr>\n",
438 |        "    <tr>\n",
439 |        "      <th>4</th>\n",
440 |        "      <td>0.906250</td>\n",
441 |        "      <td>0.890922</td>\n",
442 |        "      <td>1.0</td>\n",
443 |        "    </tr>\n",
444 |        "    <tr>\n",
445 |        "      <th>6</th>\n",
446 |        "      <td>0.906250</td>\n",
447 |        "      <td>0.890922</td>\n",
448 |        "      <td>1.0</td>\n",
449 |        "    </tr>\n",
450 |        "  </tbody>\n",
451 |        "</table>\n",
452 |        "</div>"
453 |       ],
454 |       "text/plain": [
455 |        "                  accuracy        f1      C\n",
456 |        "depth iterations                           \n",
457 |        "1     0           0.802679  0.774383   10.0\n",
458 |        "      2           0.796429  0.768842   10.0\n",
459 |        "      4           0.796429  0.768842   10.0\n",
460 |        "      6           0.796429  0.768842   10.0\n",
461 |        "2     0           0.891964  0.877311  100.0\n",
462 |        "      2           0.892857  0.874092    1.0\n",
463 |        "      4           0.873214  0.854485    1.0\n",
464 |        "      6           0.865179  0.841353    1.0\n",
465 |        "3     0           0.883929  0.871406  100.0\n",
466 |        "      2           0.913393  0.898291    1.0\n",
467 |        "      4           0.906250  0.890922    1.0\n",
468 |        "      6           0.906250  0.890922    1.0"
469 |       ]
470 |      },
471 |      "execution_count": 14,
472 |      "metadata": {},
473 |      "output_type": "execute_result"
474 |     }
475 |    ],
476 |    "source": [
477 |     "fn = 'wl_lithogenesis_results_with_normalization'\n",
478 |     "\n",
479 |     "df_res = pd.DataFrame(index=list(results.keys()))\n",
480 |     "df_res['accuracy'] = [t[0] for t in results.values()]\n",
481 |     "df_res['f1'] = [t[1] for t in results.values()]\n",
482 |     "df_res['C'] = [t[2] for t in results.values()]\n",
483 |     "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
484 |     "df_res.to_csv(f'../results/{fn}.csv')\n",
485 |     "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
486 |     "df_res_test.to_html(f'../results/{fn}.html')\n",
487 |     "df_res_test"
488 |    ]
489 |   }
490 |  ],
491 |  "metadata": {
492 |   "kernelspec": {
493 |    "display_name": "Python 3",
494 |    "language": "python",
495 |    "name": "python3"
496 |   },
497 |   "language_info": {
498 |    "codemirror_mode": {
499 |     "name": "ipython",
500 |     "version": 3
501 |    },
502 |    "file_extension": ".py",
503 |    "mimetype": "text/x-python",
504 |    "name": "python",
505 |    "nbconvert_exporter": "python",
506 |    "pygments_lexer": "ipython3",
507 |    "version": "3.7.3"
508 |   }
509 |  },
510 |  "nbformat": 4,
511 |  "nbformat_minor": 2
512 | }
513 | 


--------------------------------------------------------------------------------
/notebooks/lithogenesis_timing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import sys\n",
 10 |     "sys.path.insert(0, '../')"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from collections import Counter\n",
 20 |     "import time\n",
 21 |     "import random\n",
 22 |     "\n",
 23 |     "import rdflib\n",
 24 |     "import numpy as np\n",
 25 |     "from pprint import pprint\n",
 26 |     "from sklearn import svm\n",
 27 |     "\n",
 28 |     "import wlkernel"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 3,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "rdf_graph = rdflib.Graph().parse('../data/Lexicon_NamedRockUnit.nt', format='nt')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 4,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "all_triples = [\n",
 47 |     "    (str(subj), str(pred), str(obj))\n",
 48 |     "     for subj, pred, obj in rdf_graph\n",
 49 |     "]"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 5,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "quantiles = np.linspace(0.1, 1, 10)  # [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]\n",
 59 |     "results_wlrdf = []\n",
 60 |     "results_wl = []\n",
 61 |     "n = len(all_triples)\n",
 62 |     "RANDOM_STATE = 42"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 6,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "for q in quantiles:\n",
 72 |     "    n_sub = int(n * q)\n",
 73 |     "    random.seed(RANDOM_STATE)\n",
 74 |     "    triples = random.sample(all_triples, n_sub)\n",
 75 |     "    \n",
 76 |     "    instances_class_map = {\n",
 77 |     "        subj: obj\n",
 78 |     "        for subj, pred, obj in triples\n",
 79 |     "        if 'hasLithogenesis' in pred\n",
 80 |     "    }\n",
 81 |     "    instances = list(instances_class_map.keys())\n",
 82 |     "    y = list(instances_class_map.values())\n",
 83 |     "    \n",
 84 |     "    triples = [\n",
 85 |     "        (subj, pred, obj)\n",
 86 |     "        for subj, pred, obj in triples\n",
 87 |     "        if 'hasLithogenesis' not in pred\n",
 88 |     "    ]\n",
 89 |     "    t0 = time.time()\n",
 90 |     "    wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=3)\n",
 91 |     "    kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=0)\n",
 92 |     "    t1 = time.time()\n",
 93 |     "\n",
 94 |     "    results_wlrdf.append(t1 - t0)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 7,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "rdf_graph = rdflib.Graph().parse('../data/Lexicon_NamedRockUnit.nt', format='nt')\n",
104 |     "all_triples = [\n",
105 |     "    (str(subj), str(pred), str(obj))\n",
106 |     "     for subj, pred, obj in rdf_graph\n",
107 |     "]"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 8,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "for q in quantiles:\n",
117 |     "    n_sub = int(n * q)\n",
118 |     "    random.seed(RANDOM_STATE)\n",
119 |     "    triples = random.sample(all_triples, n_sub)\n",
120 |     "    \n",
121 |     "    instances_class_map = {\n",
122 |     "        subj: obj\n",
123 |     "        for subj, pred, obj in triples\n",
124 |     "        if 'hasLithogenesis' in pred\n",
125 |     "    }\n",
126 |     "    instances = list(instances_class_map.keys())\n",
127 |     "    y = list(instances_class_map.values())\n",
128 |     "    \n",
129 |     "    triples = [\n",
130 |     "        (subj, pred, obj)\n",
131 |     "        for subj, pred, obj in triples\n",
132 |     "        if 'hasLithogenesis' not in pred\n",
133 |     "    ]\n",
134 |     "    t0 = time.time()\n",
135 |     "    wl_graphs = [wlkernel.WLGraph(triples, instance, max_depth=3) for instance in instances]\n",
136 |     "    kernel_matrix = wlkernel.wl_kernel_matrix(wl_graphs, iterations=0)\n",
137 |     "    t1 = time.time()\n",
138 |     "\n",
139 |     "    results_wl.append(t1 - t0)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 10,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "data": {
149 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3sAAAHgCAYAAAAc83RKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdeXRV1eH28e9OwiwyKzKDoqiISEERVBQUFRUFERAFnKpvqx3Q2tb+Os+Dtba2ta21AirIJCKIOIEKBgcUBBFRHJhBZogMCcl+/8hViTIE5OYk4ftZKyv3nL3v5QlL1srjPufsEGNEkiRJklS+ZCQdQJIkSZJ08Fn2JEmSJKkcsuxJkiRJUjlk2ZMkSZKkcsiyJ0mSJEnlkGVPkiRJksqhrKQDfBV169aNzZo1SzqGJEmSJCXi9ddfXxtjrLe7sTJd9po1a8asWbOSjiFJkiRJiQghLN7TmJdxSpIkSVI5ZNmTJEmSpHLIsidJkiRJ5VCZvmdvd/Ly8li2bBnbt29POkqpVrlyZRo1akSFChWSjiJJkiQpDcpd2Vu2bBnVq1enWbNmhBCSjlMqxRhZt24dy5Yto3nz5knHkSRJkpQG5e4yzu3bt1OnTh2L3l6EEKhTp46rn5IkSVI5Vu7KHmDRKwb/jiRJkqTyrVyWvSQNGTKEu++++7Pj888/nxtuuOGz49tuu4277rqLjz76iNatW+/1s37+85/TsGFD2rZtywknnMDIkSM/G7vmmmto3rw5J598MsceeyyDBg1i2bJln403a9aMk046ibZt29K2bVuys7MP4k8pSZIkqbSz7B1knTt3/qxYFRQUsHbtWubPn//ZeHZ2Np06dSr25w0ZMoQ5c+YwYcIEbrrpJvLy8j4b+9Of/sSbb77JwoULOeWUU+jatSu5ubmfjU+bNo05c+YwZ86c/fozJUmSJJV95e4BLZ8ZkebLFAfE3Z7u1KkTQ4YMAWD+/Pm0bt2alStXsmHDBqpWrcqCBQto164dK1as2K8/rmXLllStWpUNGzZwxBFHFBkLITBkyBDGjx/Pk08+yaWXXnpgP5MkSZKkcqP8lr2ENGjQgKysLJYsWUJ2djann346y5cvZ+bMmdSoUYOTTjqJihUr7vfnvvHGG7Rs2fJLRW9X7dq145133vms7J1zzjlkZmZSqVIlXnnllQP+mSRJkiSVPZa9NOjUqRPZ2dlkZ2dz6623snz5crKzs6lRowadO3fer8/6y1/+wgMPPMC7777LxIkT9zo3xqKrjdOmTaNu3br7nV+SJElS2ec9e2nw6X178+bNo3Xr1nTs2JGZM2fu9/16UHjP3vz58xk3bhzXX3/9XrdLmD17Nscff/xXjS9JkiSpHCi/K3t7uKeuJHTq1Ik777yTFi1akJmZSe3atdm4cSPz58/nvvvuO6DP7NmzJ/fffz/Dhg3jpptuKjIWY+See+5h5cqVXHDBBQfjR5AkSZJUxrmylwYnnXQSa9eupWPHjkXO1ahRo8hllQsXLqRRo0affY0ZM2avn/vTn/6Uu+66i4KCAgBuv/32z7ZeeO2115g2bdoB3Q8oSZIkaTfycmDuz2BcPRiRUfh97s8Kz5cB4Yv3eZUl7du3j7NmzSpybsGCBV7KWEz+XUmSJEl7kJcDT3eEnPchf5dbqTIrw2FHQ/eXocJhyeVLCSG8HmNsv7sxV/YkSZIk6YsW/OnLRQ8Kj3PeLxwv5crvPXuSJEmSdKDe+ye5nxTw0qSzmfVsB7ZuqUrV6ltpf+5rdL44m4rv3QttfpF0yr2y7EmSJEnSF+Ru2sx/f3Y9G1bXZmdeBQC2bqlG9qTOLHjteG74xf8o7U/L8DJOSZIkSfqCl6Z0K1L0PrUzrwIbVtfmpSndEkpWfJY9SZIkSfqCWc+c+qWi96mdeRWY9WyHEk60/yx7kiRJkrSrGNm6ae9Ttm0KJZPlK7DsHWRDhgzh7rvv/uz4/PPP54Ybbvjs+LbbbuOuu+6idevWScSTJEmStC8fDqfqYVv3OqVKnaolFObAWfYOss6dO5OdnQ1AQUEBa9euZf78+Z+NZ2dn06lTp6TiSZIkSdqbLYtg1s207rZuj1OyKmfR/hu73dquVCm3T+P8RUjvY1B/Fn+22/OdOnViyJAhAMyfP5/WrVuzcuVKNmzYQNWqVVmwYAG1a9dOazZJkiRJByA/F166kp07q7D4g7aEjA1kVMggf0f+Z1OyKmdR6+hadL69c4JBi6fclr2kNGjQgKysLJYsWUJ2djann346y5cvZ+bMmdSoUYOTTjqJihVL+0NaJUmSpEPQvJ/C+lk8OemvrJ67nt4je7N2wVpm3TuLbeu2UaVOFdp/oz2db+9MxcNK/+/0lr006NSpE9nZ2WRnZ3PrrbeyfPlysrOzqVGjBp07l/7/AyBJkiQdclY9B2//kTfmf4c3Rm7gjB+dwUn9TwLgnF+ck3C4A+M9e2nw6X178+bNo3Xr1nTs2JGZM2d6v54kSZJUGm1fAzMHsuLjjkz+c11anNuCc35ZNgversrtyt6e7qkrCZ06deLOO++kRYsWZGZmUrt2bTZu3Mj8+fO57777yMnJSSybJEmSpF3ECK9cz9a1Wxnzl15UOyKL3iN6k5FZ9tfFyv5PUAqddNJJrF27lo4dOxY5V6NGDerWrQvAwoULadSo0WdfY8aMSSquJEmSdOh6758ULJ3Eo8NvZ8uqHfQd25dq9aolneqgKLcre0nKzMxk8+bNRc4NHTr0s9fNmjUjLy+vhFNJkiRJKmLjW/DGbbzwzPW8P2MnF/3rIhqe2jDpVAdNWlf2Qgg1QwhjQwjvhBAWhBBODyHUDiE8E0J4L/W9VmpuCCH8LYSwKIQwN4TQLp3ZJEmSJB3Cdm6Dl/rz7rw2vDi8EW2vacvXbvxa0qkOqnRfxvlXYEqMsRVwMrAA+CHwXIyxJfBc6hjgQqBl6utG4N40Z5MkSZJ0qJr9PTa8t4Lx/+hJ/bb16fHPHoQQkk51UKWt7IUQagBnAfcDxBhzY4wbgUuBYalpw4DLUq8vBYbHQi8DNUMIR6UrnyRJkqRD1LLHyZv/H0b/6xYIFeg7ri8VqlRIOtVBl86VvebAGuCBEMLsEMJ/QwjVgCNjjCtTc1YBR6ZeNwSW7vL+Zalz+y3GeICRDx3+HUmSJOmQtHU58eXreOLhQaxamEnvh3tTq0WtpFOlRTrLXhbQDrg3xngK8AmfX7IJQCxsHPvVOkIIN4YQZoUQZq1Zs+ZL45UrV2bdunWWmb2IMbJu3ToqV66cdBRJkiSp5MQCmDmI1586ljefbcJZPz2Llj1aJp0qbdL5NM5lwLIY4yup47EUlr3VIYSjYowrU5dpfpwaXw403uX9jVLniogx/gf4D0D79u2/1OgaNWrEsmXL2F0R1OcqV65Mo0aNko4hSZIklZwFf2J59kKmDPs6x1xwDF1+2iXpRGmVtrIXY1wVQlgaQjguxrgQ6Aa8nfoaDPw+9X1C6i2PA7eEEB4BTgM27XK5Z7FVqFCB5s2bH5SfQZIkSVI5sfZVPpn+W0b//RaqN6xJ74fLx8bpe5Puffa+BTwcQqgIfABcS+Glo6NDCNcDi4G+qbmTgR7AImBraq4kSZIkfTV5WyiYfhWP3tuPTzZV4fon+1KldpWkU6VdWstejHEO0H43Q912MzcCN6czjyRJkqRD0KxbmDasKR+82ZCe91/EUe0OjYf+l+91S0mSJEmHto9G8M6YV5gx4UxOueEUTrnulKQTlRjLniRJkqTyKecD1k34IY/9uw9Hfa0+Pe7pkXSiEmXZkyRJklT+FOSR+9wgRt/Vk4xKh9F3XD+yKqf7kSWly6H100qSJEk6JMS5v2TSn47k46X1uHrKFdRsWjPpSCXOlT1JkiRJ5cvqF3jt7qeZ91IbzvnlORzd/eikEyXCsidJkiSp/NixnqUP3MpTD13AsT1acOaPzkw6UWIse5IkSZLKhxjJmfINxvzxHGo0PoxeD19ByAhJp0qM9+xJkiRJKhcKFv6HcXdUYdvW6lw/bSCVa1ZOOlKiXNmTJEmSVPZtWsBz35/ARwuac/G/e1L/5PpJJ0qcZU+SJElS2Za/nbf/eBvZE0+j/ddP4OTBh87G6Xtj2ZMkSZJUpq197EdMuKstDU+pxvn39Eo6Tqlh2ZMkSZJUZuW+O5FR384lq3IFrpjwdbIq+ViST1n2JEmSJJVJcetKHr92FOtW1eXyUVdSo3GNpCOVKpY9SZIkSWVPLOCV2+9gfnZLuv60DS26t0o6Ualj2ZMkSZJU5ix+6E6e/ldTWp1bic4/9T693bHsSZIkSSpTtrydzdhvr6FWg1wuHfMdQjh0N07fG8ueJEmSpDIjf9tmxl7xMDu2VaLfhGupXLNK0pFKLR9VI0mSJKnMePa6X7Lk7SPofW8rjmh3bNJxSjVX9iRJkiSVCW/947+8/Eh1Tr1yJyf9v35Jxyn1XNmTJEmSVOqtmfUmj9/2IY1PyKH7//6QdJwywbInSZIkqVTbsfETRvV6mIqV4YrxN5BZuXLSkcoEy54kSZKkUivGyIQ+d7J+RRUGPdyI6seelHSkMsN79iRJkiSVWtk/H8GC5+Dcb66nWf+vJx2nTHFlT5IkSVKp9OHT83ju1+9yQqclnP7nu5OOU+ZY9iRJkiSVOpuXbWJc/9HUqb+Rng/fQKhYI+lIZY6XcUqSJEkqVfJz8xnT85/kbcun7731qdSsc9KRyiRX9iRJkiSVKk99axTLZufS58eLqHfxyKTjlFmWPUmSJEmlxtwH3+C1/7xHx4vf4MQ7/gEZmUlHKrMse5IkSZJKhdVzVzPxxsdp2mox5/71GqjaKOlIZZr37EmSJElK3PaN2xl92VAqV86hz11VyWzRK+lIZZ4re5IkSZISFQsijw0cxcYlnzD4d69y2HlTko5ULriyJ0mSJClRM34/g4WTPqL71VNpcsPfIatq0pHKBcueJEmSpMR88OwHTPvJVFqfPo9Tf9gXarVJOlK5YdmTJEmSlIhNSzYxrv8o6jZYwyU/ySUc962kI5Ur3rMnSZIkqcTt3LGT0Zc/ws6tn9Dv589S8ZzpEELSscoVV/YkSZIklbgp35nCilmruOymcdS5/B6oXC/pSOWOZU+SJElSiZozdA6v//t1Ol8yg+MHXQxHnZd0pHLJsidJkiSpxKycvZInvjGJ5q2X0vXGDdDmN0lHKre8Z0+SJElSidi2YRujLx9NlcO2cvm3HyfjrBmQWTHpWOWWK3uSJEmS0i4WRMZfPZ7NSzfS95YHqdb1D3D4sUnHKtcse5IkSZLS7sVfv8h7k9/jgoGTadStM7S4JulI5Z6XcUqSJElKq0VTFvH8z5+nzdnv077najj1KbdZKAGWPUmSJElps+HDDYwbMI4jj97BxYNHETpPhYo1k451SPAyTkmSJElpsXP7Tsb0GUPcmUvfb/6LCl/7P6jXOelYhwzLniRJkqS0mHzLZFa+sZJe3xhL7dYnwok/SjrSIcXLOCVJkiQddG/89w1m3z+bM698j+PaL4ZOcyHD+lGS/NuWJEmSdFCtmLWCyTdP5ujTCzi7xwg4bRRUa5J0rEOOl3FKkiRJOmi2rt3K6MtHc1i9LHoPvpOMltdBkyuSjnVIsuxJkiRJOigK8gt49KpHyVm1hSu+/QhVGzSCr/016ViHLC/jlCRJknRQvPCLF3j/6fe5+HtLaNjkbej8MmRVSzrWIcuyJ0mSJOkre3fSu7z4qxdp26cy7dr+D07+M9Rul3SsQ5qXcUqSJEn6Sta/v55Hr36U+m1q0qPnrwkNzodW30061iHPsidJkiTpgOVtzWP05aMJGYG+3xlJhWpVoeNQCFaNpHkZpyRJkqQDEmNk0v+bxOq5qxnwl83UqvwKnPYEVKmfdDSR5pW9EMJHIYR5IYQ5IYRZqXO1QwjPhBDeS32vlTofQgh/CyEsCiHMDSF4ga8kSZJUis361yzmPjiXLkOOomW9u+C470DDHknHUkpJrK2eE2NsG2Nsnzr+IfBcjLEl8FzqGOBCoGXq60bg3hLIJkmSJOkALHtlGVO+M4WW5zemS8f/g5onQ9s/JB1Lu0jiQtpLgWGp18OAy3Y5PzwWehmoGUI4KoF8kiRJkvbikzWfMKbPGA5veDi9/t8YQsEn0HkkZFZKOpp2ke6yF4GnQwivhxBuTJ07Msa4MvV6FXBk6nVDYOku712WOidJkiSplCjYWcC4/uPYunYrfe/cTpWtU+Brd0ON45OOpi9I9wNazogxLg8hHAE8E0J4Z9fBGGMMIcT9+cBUabwRoEmTJgcvqSRJkqR9mvqTqXw49UMuvacNR+X2hca94eivJx1Lu5HWlb0Y4/LU94+B8cCpwOpPL89Mff84NX050HiXtzdKnfviZ/4nxtg+xti+Xr166YwvSZIkaRfvPPYOL/3+Jdrd0Ia2TX9Q+NTNU++DEJKOpt1IW9kLIVQLIVT/9DXQHXgLeBwYnJo2GJiQev04MCj1VM6OwKZdLveUJEmSlKB1767jscGP0aBDAy4cOBG2LILTH4JKtZOOpj1I52WcRwLjQ2HLzwJGxBinhBBeA0aHEK4HFgN9U/MnAz2ARcBW4No0ZpMkSZJUTLmf5DKq9ygyKmTQ9+4qZH30AJz4YziyS9LRtBdpK3sxxg+Ak3dzfh3QbTfnI3BzuvJIkiRJ2n8xRiZ+fSJr3l7D1Y+dR43lF0KdjnDST5OOpn1IYusFSZIkSWXEq39/lbdGvkXXX53N0ZVvLzzZeQRkVEg2mPbJsidJkiRpt5a8tISnb32aYy85ljMumgprs6HDv+Cw5klHUzGke+sFSZIkSWVQzqocxvYdS42mNej15yMJs66G5oOg2ZVJR1MxWfYkSZIkFZGfl8/YfmPZtmEbNzzel8rzu0G1FtD+70lH036w7EmSJEkq4rk7nmPxi4vpNfwyjtz6I9i2ErpnQ4XqSUfTfvCePUmSJEmfeXvs28z880w63NyBNp1mwdKxcPKvoU6HpKNpP7myJ0mSJAmANQvWMOHaCTTq2Ijzf9YUpl4BR3aF429POpoOgGVPkiRJEju27GB079FkVcniikd6kjnrfMiqAqc/CMELAssiy54kSZJ0iIsx8vj1j7Pu3XUMfHYgh6/7PWyYDWc9DlUbJB1PB8iKLkmSJB3iXv7Ly7w95m26/a4bzY9bCO/cBS1vhkaXJB1NX4Ere5IkSdIhbPGLi3nm+8/QqlcrOt1yNEw5GWq0hlP+lHQ0fUWWPUmSJOkQtWXFFsb0HUPto2tz2QM9Ca9cDrmboOtzhffrqUyz7EmSJEmHoPy8fMb0HUPullwGPTeISiv/DSufLNw4vWbrpOPpIPCePUmSJOkQ9Mztz7D0paX0vL8nRzRYAXN+AA17QstvJh1NB4kre5IkSdIhZt7Iebzy11c47bun0bpPc5jSHirVgdPuhxCSjqeDxLInSZIkHUI+futjJt4wkSZnNOG8P54Hb3wTNi+Ers9A5bpJx9NB5GWckiRJ0iFi+6btjL58NJUOr0Sf0X3IXDUBFv0HTvg+1O+WdDwdZJY9SZIk6RAQY2TCtRNY//56+ozuQ/XDN8IrN0DtDtDmV0nHUxp4GackSZJ0CMj+UzbvjH+H7nd1p2nnRjC1KxTkQecRkFEh6XhKA8ueJEmSVM59OPVDnrvjOU7seyIdv9sR5v8GPn4ROg6D6sckHU9p4mWckiRJUjm2edlmxvYfS53j6tDz/p6EtS/DvJ9D0yuh+cCk4ymNLHuSJElSObVzx05G9xnNzm076fdoPypW3AbZA6BqY+hwr9sslHNexilJkiSVAzmrchjTbwxLpi+BCAQ4rP5h5KzM4YqxV1D3uDqQfRVsXQrnToeKNZKOrDSz7EmSJEllXM6qHO5udjf5O/I/PxkhZ2UOITPQpHMT+PBBWDyy8Mmb9U5PLqxKjJdxSpIkSWXcmH5jiha9XcT8yGNX/g9m3QxHdIET7ijhdEqKZU+SJEkq45ZMX7LX8eWvrSjcXuH0ByEjs4RSKWmWPUmSJKmsi3sf3rG1Epz2X6jWuGTyqFSw7EmSJEll3T4eqlmhUh407l0yWVRqWPYkSZKkMq7hqQ3Z0/JeZlYeR53avGQDqVSw7EmSJEll2Kalm9ixYS21jlxPVoW8ImNZFfKoWXcTfYZ2TSidkuTWC5IkSVIZtXHxRoadM4yvnfYk7bu9zMzJnZj1bAe25VShymHbaH/ua3S6KJtKi+ZC8xeSjqsSZtmTJEmSyqANH2xgWNdh7Ni0g9N7vERmZj7n9Hmec/o8/+XJa6aXeD4lz8s4JUmSpDJm3XvrGNplKLlbchn03CAyM3e/x97n9vG4TpVLlj1JkiSpDFn7zlqGdhnKzu07GTxtMEe1O4p9Po5zn+Mqjyx7kiRJUhnx8fyPGdplKLEgMvj5wRzZ5sjCgVrt9v7GememP5xKHe/ZkyRJksqAVW+u4sFzHySjQgaDpw6mbqu6hQObFsAnH+35jRmV4IxRJZJRpYsre5IkSVIpt/KNlQzvOpysyllc88I1nxe9ze/Cc10hIwvOnQ71zuLzSzZD4fGlH0GV+gklV5Jc2ZMkSZJKseWvLueh8x+iUo1KDJ42mFrNaxUObHm/sOjFfDj3eahxApzn9gr6nCt7kiRJUim1NHspw88dTpXaVbjmhWs+L3o5HxUWvfxt0PXZwqInfYEre5IkSVIptHj6Ykb0GMFhRx3G4KmDObzR4YUDnywtLHp5m6HbVKjVJtmgKrUse5IkSVIp8+G0Dxl58UhqNKnBoOcGUb1B9cKBrcsLi17uusIVvdqnJBtUpZqXcUqSJEmlyPtPv8+IHiOo2bwmg58f/HnR27YKpnaD7avgnKegTodkg6rUc2VPkiRJKiXem/weo3qPom6rugx8ZiDV6lUrHNi+prDofbK0sOjV7ZhsUJUJlj1JkiSpFFj4+EJG9xnNkW2OZODTA6lSu0rhwI51MPVcyPkQzp4MR5yRbFCVGZY9SZIkKWFvj3ubcf3HcVS7o7j6qaupXLNy4UDuBph6HmxeCGdPgiPPTjSnyhbv2ZMkSZIS9NaotxjbbywNT23I1U/vWvQ2wdTzYdN8OOsxqH9uskFV5riyJ0mSJCVk7kNzeWzwYzQ5owlXTrqSStUrFQ7kbYFpF8DGOXDmo9DggmSDqkyy7EmSJEkJmP3AbB6//nGan9Oc/o/3p2K1ioUDOz+B53vA+tfgjDHQ8OJkg6rMsuxJkiRJJez1/7zOpJsmcXT3o+n3WD8qVKlQOLBzKzx/MazNhk4joXGvZIOqTLPsSZIkSSXo1X+8ypO3PEnLHi3pO64vWZVTv5Lnb4cXL4WPX4BOD0HTvskGVZnnA1okSZKkEjLzLzN58pYnOe7S4+j76K5Fbwe82AtWPQcdH4BmA5INqnLBlT1JkiSpBLz0x5d49gfPckKfE+g9ojeZFTILB/JzYcYVsHIKnHoftBicbFCVG5Y9SZIkKc1e/PWLTPvJNFpf2Zpew3uRkZW6wK4gD17qD8snQod/wjE3JBtU5YplT5IkSUqTGCPP//x5Xvzli7QZ2IZLH7iUjMxPi95OyL4alo2Hr/0VWn4j2bAqd9J+z14IITOEMDuEMCl13DyE8EoIYVEIYVQIoWLqfKXU8aLUeLN0Z5MkSZLSJcbI1P+byou/fJG217X9QtHLh5evgSWj4ZQ74bhvJ5pV5VNJPKDlO8CCXY7/APwlxngMsAG4PnX+emBD6vxfUvMkSZKkMifGyDO3P8OM383gazd9jZ739fy86MUCePUG+OhhOPm3cPxtyYZVuZXWshdCaARcBPw3dRyArsDY1JRhwGWp15emjkmNd0vNlyRJksqMGCNTvjOFmX+eyanfOpWL7r2IkJH6tTYWwKv/Dz4YCif9Ak68I9GsKt/SvbJ3N/B9oCB1XAfYGGPcmTpeBjRMvW4ILAVIjW9KzZckSZLKhFgQeeKbT/DqPa/S8daOXPDXC/hs/SJGmPUteP8+OPH/oPVPkg2rci9tZS+EcDHwcYzx9YP8uTeGEGaFEGatWbPmYH60JEmSdMBiQWTijRN5/V+v0/mHnel+Z/eiRe+NIfDeP+H426HNr8CL2JRm6VzZ6wz0DCF8BDxC4eWbfwVqhhA+fQpoI2B56vVyoDFAarwGsO6LHxpj/E+MsX2MsX29evXSGF+SJEkqnoL8AiZcO4HZ98/mrJ+cRbffdita9OZ8Hxb+FY77LrT9g0VPJSJtZS/GeEeMsVGMsRnQH5gaY7wKmAb0SU0bDExIvX48dUxqfGqMMaYrnyRJknQwFOwsYPzA8bw5/E3O/uXZnPPLc4oWvbk/hgV3Qsubod1dFj2VmJJ4GucX/QC4NYSwiMJ78u5Pnb8fqJM6fyvwwwSySZIkScWWn5fPuCvH8dbIt+j2+250+UmXohPe+iXM/y0ccyO0/5tFTyWqRDZVjzE+Dzyfev0BcOpu5mwHriiJPJIkSdJXlZ+bz9h+Y3nnsXfofld3Th9yetEJ838L834OLa6BDvdCSGKdRYeyEil7kiRJUnmyc/tORvcZzXtPvMeF91zIqbd8YS1jwZ3w5v9Bs6vh1P9a9JQIy54kSZK0H/K25TGq1yjef+p9LvrXRbS/qX3RCe/8FWbfDk36QccHICMzmaA65Fn2JEmSpGLK/SSXR3o+wofTPqTn/T055bpTik5495/wxneh8eXQ6UHI8NdtJcf/+iRJkqRiyM3JZcRFI1gyYwm9hveizdVtik5YdB/Muhka9oROIyCjQjJBpRTLniRJkrQPOzbv4OEeD7Ps5WX0frg3rfu3Ljrhg6Hw6k1w1IVwxmjIrJhITmlXlj1JkiRpL7Zv3M5DFzzEytdX0mdUH064/ISiEz58GF6+DuqfC2c9CpmVkgkqfYFlT5IkSdqDbeu38WD3B1k9dzVXjL2CVpe2Kjph8Wh4eRAceTac9RhkVk4kp7Q7xSp7IYRaQANgG/BRjLEgrakkSZKkhG1du5Xh5w5n7Ttr6f9Yf1r2aFl0wtLxkD0A6naGLhMhq2oyQaU92GPZCyHUAG4GrgQqAmuAysCRIYSXgX/GGKeVSEpJkiSpBH3y8ScM7zac9YvWc+XjV0wxemEAACAASURBVHJ096OLTlg2EV7qB3VOhbOfgKxqyQSV9mJvK3tjgeHAmTHGjbsOhBC+BgwMIbSIMd6fzoCSJElSSdqycgvDuw1n0+JNDHhiAM27Ni86YcWTMKMP1GwLZz8JFaonE1Tahz2WvRjjeXsZex14PS2JJEmSpIRsXraZYV2HsWXFFq568iqantW06IRVz8KLvaBGa+j6FFSskUxQqRgy9jUhhNA5hFAt9frqEMJdIYSm+3qfJEmSVJZsXLyRoV2GkrMqh4FPD/xy0Vv9PLzQEw4/Dro+DRVrJZJTKq59lj3gXmBrCOFk4DbgfQov75QkSZLKhQ0fbmBol6FsXbeVQc8OonGnxkUnfDwdnr8IDmsBXZ+FSnWSCSrth+KUvZ0xxghcCvw9xvgPwAuTJUmSVC6sX7SeoV2Gkrsll8FTB9Pw1IZFJ6yZCc/3gGpNoOtzULleMkGl/VScrRe2hBDuAK4GzgohZAAV0htLkiRJSr+1C9cyvOtw8nPzGTR1EPVPrl90wrrX4PkLoHL9wqJX5chkgkoHoDgre/2AHcD1McZVQCPgT2lNJUmSJKXZmrfXMLTLUAp2FjB42uAvF731s2Fqd6hYB7pNhaoNkgkqHaC97bMXYqFVwF2fno8xLiF1z96nc9IfU5IkSTp4Vs9dzfBzh5ORlcHgqYOp26pu0Qkb5sLUc6HC4XDuNKjWePcfJJVie1vZmxZC+FYIocmuJ0MIFUMIXUMIw4DB6Y0nSZIkHVwrZ69k2DnDyKqUxTUvXPPlordxPkztBllVU0XPB9GrbNrbPXsXANcBI0MIzYGNQGUgE3gauDvGODv9ESVJkqSDY/lry3mo+0NUOrwSg6cNplaLL2yfsOmdwqKXUQG6Ti18+qZURu1tU/XtwD+Bf4YQKgB1gW0xxo0lFU6SJEk6WJbOXMrDFzxMlTpVGDxtMDWb1iw6YfN7MLUrEAuL3uEtE8kpHSzFeRonMcY8YGWas0iSJElpsWTGEh6+8GEOq38Yg6YOokbjGkUn5HxYWPQK8qDb81CjVSI5pYOpWGVPkiRJKqs+ev4jRlw0gsMbH87gqYOp3uALW0Z/shieOwd2bi186mbNE5MJKh1klj1JkiSVWx88+wEje46kVotaDHpuEIcdeVjRCVuXwXNdIXcTdHsOap2cTFApDYqzzx4hhKYhhHNTr6uEEKrv6z2SJElSkhZNWcSIi0dQp2UdBk8bvJuit6Kw6O1YC12fhtrtkgkqpck+y14I4evAWODfqVONgMfSGUqSJEn6KhZOXMgjlz5CvRPqMWjqIKrVq1Z0wrbVhU/d3LYSzn4S6nRIJqiURsVZ2bsZ6AxsBogxvgcckc5QkiRJ0oFa8OgCRvcezZEnH8mg5wZRtU7VohO2ryksep8sgbMnQ71OyQSV0qw4ZW9HjDH304MQQhYQ0xdJkiRJOjDzR89nTN8xNOjQgIHPDKRKrSpFJ+xYD1PPg5z34exJcMSZyQSVSkBxyt4LIYQfAVVCCOcBY4CJ6Y0lSZIk7Z+5D89l3JXjaNypMVc/dTWVa1QuOiF3Y2HR2/wOnPU4HHlOMkGlElKcsvdDYA0wD7gJmAz8OJ2hJEmSpP0xZ9gcxg8cT9MuTbnqyauoVL1S0Ql5m2Ha+bDpLThrPBx1XjJBpRK0z60XYowFwH2pL0mSJKlUef2+15l00yRanNuC/o/1p0LVCkUn5G2BaRfC+jfgzHHQ4MJkgkolrDhP47w4hDA7hLA+hLA5hLAlhLC5JMJJkiRJe/PaP19j0o2TaHlhS658/MovF72dn8ALF8O6V6DzI9CoZzJBpQQUZ1P1u4HewLwYow9mkSRJUqnw8l9f5qnvPsVxPY+jz+g+ZFX6wq+2O7fCCz1hzQzoNAKaXJ5MUCkhxSl7S4G3LHqSJEkqLbLvzOaZ25/h+MuP5/IRl5NZMbPohPzt8GIvWD0NTh8OTfslE1RKUHHK3veBySGEF4Adn56MMd6VtlSSJEnSHkz/7XSm/t9UWvdvTa8He5GR9YU7k/J3wPTLYdXT0PEBaH51MkGlhBWn7P0GyAEqAxXTG0eSJEnavRgjL/ziBV74xQu0uboNlz5w6ZeLXkEevNQPVkyGU/8NLa5JJKtUGhSn7DWIMbZOexJJkiRpD2KMTP3xVGb8dgZtr23LJfddQkbmF4veTnjpSlg2Adr/HY65MZmwUilRnH32JocQuqc9iSRJkrQbMUae/cGzzPjtDNrd2I6e/+25+6I3cyAsHQft/gLH3pxMWKkUKU7Z+wYwJYSwza0XJEmSVJJijDw15Cmy/5RNh1s6cPG/LiZkhKKTCvLh5etg8SPQ9o/Q6rvJhJVKmeJsql69JIJIkiRJu4oFkcm3TGbWvbPoOKQj3f/cnRDCFyfBqzfCRw/Cyb+BE25PJqxUCu2x7IUQWsUY3wkhtNvdeIzxjfTFkiRJ0qEsFkQm3jSR2f+dTafvd+Lc35+7m6IX4bVvwgf/g9Y/gxN/lExYqZTa28rercCNwJ93MxaBrmlJJEmSpENaQX4Bj1//OG8Oe5Mzf3wm5/zynN0Xvde/DYv+DSfcASf9LJmwUim2x7IXY/z08UUXxhi37zoWQqic1lSSJEk6JBXsLOCxwY8xb8Q8zv7l2XT5SZcvT4oR3rgN3v07HP+9wss3v1gGJRVr64Vs4IuXcu7unCRJklRsOatyGNNvDEumLym8bixA1TpV2bp2K91+140zfnjGl98UI8z5ISz8Cxz3ncIHslj0pN3a2z179YGGQJUQwinAp/+KDgeqlkA2SZIklVM5q3K4u9nd5O/I//xkhK1rtxIyA22vabv7N877GSz4I7T8RuEWCxY9aY/2trJ3PnAN0IjC+/Y+/Ze0GfDuV0mSJB2wMf3GFC16u4j5kTH9xnDtC9cWHZj3K3jrV3D0DYWbplv0pL3a2z17w4BhIYTLY4zjSjCTJEmSyrkl05fs3/j838O8n0LzwXDqvyEUZ7to6dC2z38lFj1JkiQddHE/xhfcBW/eAU0HwGn3W/SkYvJfiiRJkkrevq7A/HR84T0w+zZo0hdOHwYZmelOJpUblj1JkiSVqIWPL9znnCZnNoH3/lW4l16jXtDpIcgozoPkJX1qn/9iQgi9d3N6EzAvxvjxwY8kSZKk8uqVv73ClO9O4YiTjmDdO+vIz/3yQ1oyK2XS787t8No3ocHF0PkRyKiQQFqpbCvO/x65HjgdmJY6Pht4HWgeQvhljPHBNGWTJElSOVGQX8BTQ57i1XtepdVlrej1UC9yt+R+aZ+9Jmc2od+f86n67o1w1AVw5ljIrJh0fKlMKk7ZywKOjzGuBgghHAkMB04DXgQse5IkSdqj3Jxcxg0Yx7sT36XjrR0574/nkZGZQcWM9Vz746GwZjqftb3qx8G7C6F+NzjzUcislHB6qewqTtlr/GnRS/k4dW59CCEvTbkkSZJUDmxZsYURF49g9Zur6fGPHnT4ZofCgW2rYEIzKNixy+wIW94BAnT4D2RVSSCxVH4Up+w9H0KYBIxJHV+eOlcN2Ji2ZJIkSSrTVs9dzYiLRrBtwzb6P96fYy869vPBGf2+UPR2FeHla+C8F0oiplRuFedpnDcDQ4G2qa/hwM0xxk9ijOfs6U0hhMohhFdDCG+GEOaHEH6ROt88hPBKCGFRCGFUCKFi6nyl1PGi1Hizr/izSZIkKSGLpizif2f8j1gQuW7GdUWLHqQu3dyLfY1L2qd9ruzFGCMwNvW1P3YAXWOMOSGECsCMEMKTwK3AX2KMj4QQ/kXhA2DuTX3fEGM8JoTQH/gD0G8//0xJkiQlbNa/ZzH55skc0foIBkwawOGNDt/NrP3ZVV3Sgdjnyl4IoXcI4b0QwqYQwuYQwpYQwuZ9vS8WykkdVkh9RaArnxfHYcBlqdeXpo5JjXcLIexru01JkiSVErEg8vTtT/PE/3uCY84/hmunX7uHogfF31Vd0oEqzj17fwQuiTEu2N8PDyFkUrhNwzHAP4D3gY0xxp2pKcuAhqnXDYGlADHGnSGETUAdYO3+/rmSJEkqWXnb8hg/cDwLxi2g/Tfbc+FfLyQjaw/rChvmQkYlKNi+5w+sd2Z6gkqHkOKUvdUHUvQAYoz5QNsQQk1gPNDqQD5nVyGEG4EbAZo0afJVP06SJElfUc7qHB659BGWv7qc7nd1p+N3O7LHC7Q+fAhevREqVIfcfIi7ebh7RiU4Y1R6Q0uHgOKUvVkhhFHAYxTehwdAjPHR4v4hMcaNIYRpFG7OXjOEkJVa3WsELE9NWw40BpaFELKAGsC63XzWf4D/ALRv396LuSVJkhK05u01jLhoBDmrc+j3aD9aXbaH/7efnwtvDIH3/glHdIHOjxSen9Gv6D579c4sLHpV6pfUjyCVW8Upe4cDW4Huu5yLwF7LXgihHpCXKnpVgPMofOjKNKAP8AgwGJiQesvjqeOZqfGpqYfDSJIkqRT6cOqHjOo9iqzKWVzzwjU07NBw9xO3LoPpV8C6l+H478HJv4OM1K+hbq8gpU1xnsZ57QF+9lHAsNR9exnA6BjjpBDC28AjIYRfA7OB+1Pz7wceDCEsAtYD/Q/wz5UkSVKazX5gNpNunESd4+ow4IkB1Gxac/cTV02Fl/pD/jY4Yww06VOyQaVD2B7LXgjh+zHGP4YQ7mE3z76NMX57bx8cY5wLnLKb8x8Ap+7m/HbgiuKEliRJUjJijEz7yTSm/2Y6Lc5twRVjr6Byjcq7mwgL/ghv/giqHwdnPgo1vvLjGyTth72t7H36UJZZJRFEkiRJpdvO7TuZcN0E3hr5FqdcfwoX3XsRmRUyvzwxdxO8fA0sewya9IXT7ocKh5V4XulQt8eyF2OcmPo+bE9zJEmSdGjYunYro3qNYsmMJXT7XTc6/6Dz7p+4uXEeTL8ccj6Adn+B474Dbp0sJWKf9+yFEI4Fvgc023V+jLFr+mJJkiSptFj33jpG9BjBpqWb6DOqDyf2PXH3Ez8aAa98HSocDt2mwRHulSclqThP4xwD/Av4L5Cf3jiSJEkqTRZPX8yoy0YRMgKDpw6mcafGX56Unwuzvwfv3rPL1glHlXxYSUUUp+ztjDHem/YkkiRJKlXmjZjHhGsnULN5TQY8MYDaR9f+8qSty2HGFbB2Jhw3BE75A2RUKPmwkr6kOGVvYgjhm8B4im6qvj5tqSRJkpSYGCPTfzOdaT+ZRtMuTen3aD+q1K7y5YmrpxVuq7DzE+g8Cpr2LfmwkvaoOGVvcOr77buci0CLgx9HkiRJScrPzWfijRN5c9ibtBnYhkvuu4SsSl/4lTFGWHAnvPlDqH4sdHseahyfSF5Je1acTdWbl0QQSZIkJWvbhm2Mvnw0H037iC4/70KXn3b58hM38zbDy9fC0kehcR/o+D+oUD2ZwJL2qjgre4QQOvHlp3EOT1MmSZIklbANH25gRI8RrH9/Pb0e7EWbq9t8edLG+TC9N+S8D6f8GVoNcVsFqRQrztYLDwJHA3P4/GmcEbDsSZIklQPLXl7GyJ4jKdhZwMBnBtKsS7MvT/roEXjl+sJVvG5T4YizSjynpP1TnJW99sAJMcaY7jCSJEkqWW+PfZvxA8dTvUF1BkweQN3j6hadkJ8Ls2+Hd/8G9TpD59FQtUEyYSXtl+KUvbeA+sDKNGeRJElSCYkxkn1nNs9+/1kand6I/hP6U61etaKTtq6Al/rCmpfguO/AKX9yWwWpDClO2asLvB1CeJWiWy/0TFsqSZIkpU1+Xj6Tb5nMG/95gxP7nshlwy4jq/IXfi1c/QK81A925kCnkdCsfzJhJR2w4pS9n6c7hCRJkkrGjs07GHPFGN5/+n3OuOMMuv66KyFjl4esxAjv3AVzfgDVj4Guz0HNE5MLLOmAFWfrhRdKIogkSZLSa9PSTYy4aARrF6zlkv9eQrvr2xWdkLcFXr4Olo6Fxr2h4wNQ4fBkwkr6yorzNM7ewB+AI4CQ+ooxRv/lS5IklRErXl/ByEtGkvdJHlc9eRUtzm1RdMKmBYXbKmx5t/DevFa3ua2CVMYV5zLOPwKXxBgXpDuMJEmSDr6FExcyrv84qtatysDsgRxx4hFFJyweDa9cB1nVCi/bPPLsRHJKOriKU/ZWW/QkSZLKplf+9gpTvjuFBl9rwJUTr+Sw+od9PliQB7O/Dwvvhrqd4IzRULVhcmElHVTFKXuzQgijgMco+jTOR9OWSpIkSV9JQX4BTw15ilfveZVWl7Wi98O9qVB1l20Ttq2EGX1hzQw49tuFl25mVkwusKSDrjhl73BgK9B9l3MRsOxJkiSVQrk5uYwbMI53J75Lx1s7ct4fzyMjM+PzCR9PLyx6eZuh0whodmVyYSWlTXGexnltSQSRJEnSV7dlxRZGXDyC1W+upsc/etDhmx0+H4yx8JLN2bfDYS2g6zNQs3VyYSWlVXGexvkAhSt5RcQYr0tLIkmSJB2Q1XNXM+KiEWzfuJ0rJ15Jyx4tPx/M2wKv3ABLRkOjXoXbKlSskVxYSWlXnMs4J+3yujLQC1iRnjiSJEk6EIueWsSYK8ZQ6fBKXDv9Wuq3rf/54KYFMP1y2LIQ2v4Bjr/dbRWkQ0BxLuMct+txCGEkMCNtiSRJkrRfZv17FpNvnswRrY9gwKQBHN5ol+2Ql4yFl6+FzCpwzjNQv2tyQSWVqOKs7H1RSwo3WJckSVKCYkHkmR88w8w7Z9KyR0suf+RyKlWvVDhYsBPm/BDe+TPU6QhnjoGqjZINLKlEFeeevS0UvWdvFfCDtCWSJEnSPuVty2P8wPEsGLeADjd34IK7LyAjK/XEzW2r4KV+8PGL0PJmaHeX2ypIh6C9lr0QQgBOjDEuKaE8kiRJ2oec1Tk8cukjLH91Od3v6k7H73YkfHoP3scz4KW+kLsRTn8Iml+VbFhJidlr2YsxxhDCE8BJJZRHkiRJe7Hm7TWMuGgEOatz6PdoP1pd1qpwIEZY+DeY/T2o1gzOfwpq+iucdCgrzj17b4QQOsQYX0t7GkmSJO3Rh1M/ZFTvUWRVzuLaF6+lQfsGhQN5OaltFUZBo0uh4zC3VZBUrLJ3GnBVCGEx8AkQKFz0a5PWZJIkSfrMnKFzmPj1idQ5rg4DnhhAzaY1Cwc2L4TpvWHzO3Dyb+GEH0DISDaspFKhOGXv/LSnkCRJ0m7FGJn202lM//V0WpzbgivGXkHlGpULB5eMS22rUAnOeRrqd0s2rKRSpTj77C0uiSCSJEkqauf2nUy4bgJvjXyLU64/hYvuvYjMCpmF2yq8eQcsuBPqnAZnjIFqjZOOK6mUOZB99iRJkpRmW9duZVSvUSyZsYRuv+tG5x90Lnzi5rbV8FJ/+Ph5aPnN1LYKlZKOK6kUsuxJkiSVMuveW8eIHiPYtHQTfUb14cS+JxYOrMmGGVdA7gY4fTg0H5hsUEmlmmVPkiSpFFk8fTGjLhtFyAgMnjqYxp0aF26r8O7f4Y1boVpT6D4Zap2cdFRJpZxlT5IkqZSYN2IeE66dQM3mNRnwxABqH10bdn4Cr3wdFo+EhpcUruhVrJl0VEllgGVPkiQpYTFGpv9mOtN+Mo2mXZrS79F+VKldBTa/m9pWYQGc/Bs44YduqyCp2Cx7kiRJCcrPzWfSTZOYM3QObQa24ZL7LiGrUhYsHQ8zB0NmRTh7Chx1XtJRJZUxlj1JkqSEbNuwjdGXj+ajaR/R5edd6PLTLoSYD3N+CG//AWp3gDPHQrUmSUeVVAZZ9iRJkhKw4cMNjOgxgvXvr6fXg71oc3Ub2P5x4bYKq6fBMTfB1/7qtgqSDphlT5IkqYQte3kZI3uOpGBnAQOfGUizLs1gzczUtgrroONQaDE46ZiSyjjLniRJUgl6e+zbjB84nuoNqjNg8gDqHlsHFv4dZt8KVRpB95lQq23SMSWVA5Y9SZKkEhBjJPvObJ79/rM07tSYfo/1o1otYOZA+OhhaHARdHoQKtZKOqqkcsKyJ0mSlGYFOwuYfMtkXv/365zY90QuG3YZWbkfwtOXw8a3oM2v4MQfua2CpIPKsidJkpRGOzbvYEzfMbz/1PuccccZdP11V8KKx2HmIAhZcPaT0OD8pGNKKocse5IkSWmyaekmRlw0grUL1nLJfy+h3bVtYO6P4O3fQ+32qW0VmiYdU1I5ZdmTJElKgxWvr2DkJSPJ+ySPq568ihZnVIdpF8Dq5+CYG1PbKlROOqakcsyyJ0mSdJAtnLiQcf3HUbVuVQZmD+SIIz+EKV1g+xo47X9w9LVJR5R0CPAuYEmSpIPolb+9wiOXPkK9E+pxw8vXc0TFcfDsmYX353XPtuhJKjGu7EmSJB0EBfkFPDXkKV6951VaXdaK3sMuoMJbN8NHD0KDHnD6g1CpdtIxJR1CLHuSJElfUW5OLuMGjOPdie/S8daOnPeT5mTMOAs2zoOTfgGtf+y2CpJKnGVPkiTpK9iyYgsjLxnJqjmr6PGPHnTouQqeObWw3J09GRpckHRESYcoy54kSVIx5KzKYUy/MSyZvgQiEKB+u/rkrMghd0suV07oR8vGD8KLv4Fa7eDMcXBYs6RjSzqEWfYkSZL2IWdVDnc3u5v8Hfmfn4yw6vVVAAyc3JMWlb4F85+Bo6+H9n93WwVJiUvbxeMhhMYhhGkhhLdDCPNDCN9Jna8dQngmhPBe6nut1PkQQvhbCGFRCGFuCKFdurJJkiTtjzH9xpC/Y+dux0Io4P1//xg+fhFOvQ9O+69FT1KpkM47hXcCt8UYTwA6AjeHEP5/e3ceH1V973/89c1sScg6YZUQCBAFBEQIihFURKgWlVq3q1drsa23i61rW/3VWltvb5W23mp396Wt27WttLUisokJW5BVdgw7SYDsJJkl8/39MQOEJSEomZkk7+fjkcecOec753xOPCa88/2e7xkGPADMsdbmAXMi7wGuAPIiX3cAv2/H2kRERETabOfCHYA54TZrE1gxZzhMKYTBX41uYSIirWi3sGet3Wut/SiyXAusB/oC04CXIs1eAr4QWZ4GvGzDFgMZxpg+7VWfiIiISFtZa1vd3lCXBN4xUapGRKRtojIHsDFmAHAusAToZa3dG9lUCvSKLPcFdjb72K7IOhEREZGYsCHLypdWkpTS0Gq7pJT6KFUkItJ27R72jDEpwFvA3dbamubbbPjPZK3/qez4/d1hjCk2xhTv27fvNFYqIiIicsTOop08O+5Z3v7y24ydvBSnK3DCdk5XgLGTl0W5OhGRk2vXsGeMcREOen+21v41srrs0PDMyGt5ZP1uoF+zj2dH1h3FWvu0tTbfWpvfo0eP9iteREREuqTqndW8dfNbPH/h89TuruULL15FwdQiMntVHBf4nK4Amb0qKJi6KEbVioi0rN0evWCMMcBzwHpr7RPNNs0EbgMei7y+3Wz9ncaY14Dzgepmwz1FRERE2lWgPkDhjEIKZxSChQkPTWD8bT7cG26BGj9feeQ5iv5VQPH7Y2moSyIppYH8y5ZRMLUIT864WJcvInIcc7Ibjj/1jo0ZDywE1gChyOr/R/i+vTeAHGA7cIO1tiISDn8DXA7UA9OttcWtHSM/P98WF7faRERERKRV1lrWvraW97/3PjW7ajj7hrO57IeDyTjwMOz+B6QMguEPwdKvQ8h3/A4SPDBtGyT1jnrtIiLGmOXW2vwTbWu3nj1r7Ye0NEcxTDpBewt8q73qERERETnW7mW7mXX3LHYW7aTP6D588eXP0T/jRVh7czjEjXoczroLHB7oczl8eCPsW0h4ygEDPSbA+NcV9EQkLrVb2BMRERGJV7V7apnz4BxWvbyKbr26cfUzV3LORStIWDsR9pbDwOlwzv8cHeKSesPkBbErWkTkFCnsiYiISJcRaAiw6IlFfPizDwkFQlz4/QuZ8DUnnk13QPFH0P0CuPifkDU21qWKiHxmCnsiIiLS6VlrWf/Wet67/z2qt1cz5JohTP7x2XhrfgpLXoPkbCj4M/S/CUxLd6GIiHQsCnsiIiLSqe1dsZdZd89i+wfb6TWyF9NmX05ur9dh3XTAwvCHYdj3wNkt1qWKiJxWCnsiIiLSKdWV1TH3obmseG4FyVnJTP39VEZP3kzCmilQvhNyboBzZ0C3/rEuVUSkXSjsiYiISKcS9AVZ8tQSPnj0A4INQcbdM46L70wnccvdsKQQMkdBwZ+g50WxLlVEpF0p7ImIiEinYK1l48yNvHffe1RureTMK89kyk9Hk9Xwc1j0PHi6w3lPw8DbIcER63JFRNqdwp6IiIh0eGVryph1zyxK5pTQY1gPbnnnRgYN+Ces+S9oaoAh98LwH4I7PdaliohEjcKeiIiIdFj1++uZ9/A8lv9xOZ50D1c8dTljrirHseZqWLEZzpgKo38JaWfFulQRkahT2BMREZEOpynQxLLfLmPBjxfgq/Ux9ltjueTePiR98n0oei8c7i55B864ItaliojEjMKeiIiIdCib39nMrHtncWDjAQZNGcTnZoyjR+DXsOi34EyB0f8LZ34LElyxLlVEJKYU9kRERKRD2Ld+H+/d+x5b3t2CN8/LTW/fQN6QeZg148BfCYPugJE/gcQesS5VRCQuKOyJiIhIXGuoaGD+j+ez7LfLcKe4mfLLKZx33UEca2+E4jXQ8xIY8yvIPCfWpYqIxBWFPREREYlLoWCI4j8WM//h+TRWNTL6a6OZ+P2BdNv5EHz4N+g2AMb/H/T7IhgT63JFROKOwp6IiIjEna2ztzLrnlns+3gfAyYO4PKfT6CX41lYci0YJ4z87/DjFJxJsS5VRCRuKeyJiIhI3Diw+QDv3fcem/6xicyBmdzw1vUMOWcZZvV4aNgLA26FUT+D5L6xLlVEJO4p7ImIiEjMNVY38sGjH7DkqSU4PU4mPTaJcbck4Fw7HZYshazzYMJfofu4WJcqItJhKOyJiIhIzISaQqx4bgVzH5pL/f56Rk0fxaQfUWa20AAAIABJREFUDCOl9FFY8Aok9YFxL0HuLWASYl2uiEiHorAnIiIiMbFt/jbevftdylaVkTM+h8v/eQl9kv8Cy24GG4RhD8LZD4IrNdalioh0SAp7IiIiElWVn1Qy+7uzWf/X9aTnpHPda9cybNwGzMpL4eC28Oya5/4cUgbGulQRkQ5NYU9ERESiwlfrY+H/LGTxE4tJcCYw8dGJXHB7Cq5134bC+ZAxAi6dA70vjXWpIiKdgsKeiIiItCsbsqx6eRVzHpxDXWkdI28dyaQfjSLtwAxY8DS4M2Hs72DQ1yBB/zQRETld9BNVRERE2s2Owh3MunsWe4r3kD0umxv/ei3ZmX+Hj6ZDsA7y7oQRPwKPN9alioh0Ogp7IiIictpV76jm/e+/z9rX1pLaN5Vr/nQNIy7ejVl5BZRsgN5TYMz/QvqwWJcqItJpKeyJiIjIaeM/6Kfw8UKKfl4EwEUPX8SFX++Fe+P34IN/QcpguPgfcMZUMCbG1YqIdG4KeyIiIvKZWWtZ85c1vP/996ndXcvw/xjOZY+eR3rNkzD/SXAkhWfYPPPb4PDEulwRkS5BYU9EREQ+k91Ld/PuXe+ya/Eu+ozpw3WvfpGcPrNh1Rjw7YdBt8PIn0JSr1iXKiLSpSjsiYiIyKdSs7uGOQ/OYfUrq0npncK0F6ZxzuXVmBXXwNKV0GM8jHkXvKNjXaqISJeksCciIiKnJNAQYNEvF/Hhzz4k1BRi/IPjGf+d/ni2/ADmvgHJ/eDC1yDnBt2XJyISQwp7IiIi0ibWWta9uY7Z35tN9fZqhl47lMk/u5DMhqdhwQzAwIhHYOh3wZkc63JFRLo8hT0RERE5qb0f7eXdu99lx8Id9DqnF194YRoDchfBynFQvwv6/weMehy65cS6VBERiVDYExERkRbVldYx5wdzWPnCSpK7J3PlH6/k3C+ESFh5CxQVQeZoKHgVeo6PdakiInIMhT0RERE5TtAXZPGvFrPwpwsJNga54L4LuOi+s0jc9mN4/wVI7AnnPwe5t0GCI9bliojICSjsiYiIyGHWWjb8fQOz759N5SeVnHnVmUyZcQlZoVfgg2sh5Avfkzf8IXClxbpcERFphcKeiIiIAFC2uox3736XbfO20ePsHtwy6z8ZNOxj+GgC1G2FvlfBub+EtLxYlyoiIm2gsCciItLFHdx3kHk/nMdHz3xEYkYiV/zmCvJvTCRh9Tfgg/chbShMnAV9psS6VBEROQUKeyIiIl1Uk7+Jpb9ZyoKfLMBf52fsnWO55IFzSNr9M5j9e3CmwpinIO/rkOCKdbkiInKKFPZEREQ6qbrSOt688U12LNwBFjCQMyGH6167jr3L9/Lefe9xYNMBBl8+mCm/mEQP11tQeAsEqmDw12HEjyGxe6xPQ0REPiWFPRERkU6orrSOXw34FU2+piMrLez4YAdP9H0CLGSdlcXN/7qZvNHbYfllUP0x9LoUxvwKMkbErngRETktFPZEREQ6oTdvfPPooNechYyBGXyj6HM41n4X5r4N3XJhwt8gexoYE91iRUSkXSjsiYiIdEI7Fu5odXv9nnIc7w0P34t3zs9gyN3gSIxSdSIiEg0KeyIiIp2Rjdyk14KAzwn9b4Jz/geSz4heXSIiEjUKeyIiIp1IxdYKFj2xCJc7QMDvbrGdyxOAC16MXmEiIhJ1CnsiIiKdwJ7iPRTOKGT9W+tJcCZw/pSlLJ11PsHA8Y9McLoCjJ2yNAZViohINCnsiYiIdFDWWrbO2krhjEK2zduGJ81DwXcLOP+bI3DPfpTNK/OoLPMeFficrgCZvSqYcPXCGFYuIiLRoLAnIiLSwTQFmvj49Y8pnFFI+ZpyUvumMvnnkxnzH+l4yp6FJddBkp+v/vg5Cv9ZQPH7Y2moSyIppYH8y5Zx4ZVFuBMDsT4NERFpZwp7IiIiHYS/zs9Hz37EoicWUbOzhh7DejDt+asYcfF2HNsegQ9mg3FCznVQvR531SomXjefidfNP35nPS6KdvkiIhJlCnsiIiJxrq6sjiVPLaH4d8U0VjXS/6L+TH1yPHmDZ2O2XgOLd0BSXxjxExj8NUjqDQ2l8PYACPmO32GCB8a/HvXzEBGR6FLYExERiVMHNh2g6JdFrHppFU3+JoZeM4SCr6WTnfZn2PFfsMYPvSbC6CfCD0NPaPZrPak3TNsGH94I+xYCkUcx9JgQDnpJvWN0ViIiEi0KeyIiInFm15JdFM0oYv3f1uNwOxj1peFccEMpWYHHoWIF1KTC4Dsg75uQPrTlHSX1hskLole4iIjEFYU9ERGROGBDls3vbKZwRiE7Fu4gMSORCfedzXlTFpNSfTuUV0L62TD2dzDgFnClxrpkERGJcwp7IiIiMdTkb2LNX9ZQ9PMi9q3bR3pOGp97JJvRY/+Ku+oB2O+EftdA3reg50VgTKxLFhGRDkJhT0REJAYaqxtZ/vRylvxqCbV7auk1IotrHndx9qBf4/CVgK8PjHgEBn0Nks+IdbkiItIBKeyJiIhEUe2eWhY/uZjlf1iOr8ZH7oQsrr6/lEG9f4axPki/GM58HLK/AAmuk+9QRESkBe0W9owxzwNXAuXW2uGRdV7gdWAAsA24wVpbaYwxwJPA54F64MvW2o/aqzYREZFo27d+H0W/KGL1K6uxTZZhlydR8LmFnNF9DjhTIPf28IQrGcNjXaqIiHQS7dmz9yLwG+DlZuseAOZYax8zxjwQef994AogL/J1PvD7yKuIiEiHtqNwB0Uzitg4cyPOJAdjrvFxwUV/JjNzG6QNhbxfw8AvgSst1qWKiEgn025hz1r7gTFmwDGrpwGXRJZfAuYTDnvTgJettRZYbIzJMMb0sdbuba/6RERE2osNWTbO3EjhjEJ2LdpFUqaTi2/bzdhxf6FbemN4iGbec+Fn5GnCFRERaSfRvmevV7MAVwr0iiz3BXY2a7crsk5hT0REOoxgY5DVf1pN0S+KOLDxABnZhivuWMKo89/HneGFwfeGn4+XnB3rUkVEpAuI2QQt1lprjLGn+jljzB3AHQA5OTmnvS4REZFT1VjVSPEfilny5BLqSuvoc5aPa7/zb4blryahdwHkvQz9vggOd6xLFRGRLiTaYa/s0PBMY0wfoDyyfjfQr1m77Mi641hrnwaeBsjPzz/lsCgiInK6VO+sZvGvFvPR0x/hr/MzaPQ+rpn+DrkjyzADb4W8lyBzZKzLFBGRLiraYW8mcBvwWOT17Wbr7zTGvEZ4YpZq3a8nIiLxqmxNGYt+sYg1f1mNtSGGF2yi4PJ59B6ZDnl3Q+5t4E6PdZkiItLFteejF14lPBlLd2PMLuBHhEPeG8aYrwDbgRsizd8h/NiFLYQfvTC9veoSERH5NKy1bF+wncIZhWz59xZciU2MvWwZ465YQsaoi+HMV6DXJE24IiIicaM9Z+O8qYVNk07Q1gLfaq9aREREPq1QU4gNf9tA4eMfsKe4jOT0RiZeV0T+lSUkj/oSDP4jdNM95CIiEn9iNkGLiIhIPAs0BFj10iqKZsyjsqQeb+8Kpk4v4pxrEnGN+Ab0uxYcnliXKSIi0iKFPRERkWYaKhpY9ptFLHmqiPoDTZwxcDeX3bOUIdeNJWHoM5A5KtYlioiItInCnoiICFC1rYpFM95jxQvrCDQa8kZtouDebfS/5lrMoMfBnRnrEkVERE6Jwp6IiHRppSv2UvToX1k7cx+GECMuXEPBl930/Nzt0GcymIRYlygiIvKpKOyJiEiXY62lZNZaiv57JlsLg7gTfYz7/DrO/+ZQ0sf/L6QMiHWJIiIin5nCnoiIdBmhYIh1L75L0S8K2bvRTUpGLZOm7yH/rikkDn8YHImxLlFEROS0UdgTEZFOz19Tx8r/fYVFv99OVVkSWWfUctWDMPLbt+Dsc16syxMREWkXCnsiItJpHdy+hWWPvcLSP/toqE0i+6xaPvdQJmd95UeYpKxYlyciItKuFPZERKRzsZaKpf9m0WPvsvJfaQQDLs4qaKTge6PJufqHmnBFRES6DIU9ERHpHAI17Hn7eYqeXMO6wmxMQgYjrwxR8NDV9Mg/N9bViYiIRJ3CnoiIdGi2ci1bX3mWomdqKFnbH09yHwruyOD8//efpOb0jHV5IiIiMaOwJyIiHU8oQFPJ3/j4mbcoetVL2Y7epPZIZfIjQxhzzxfwpHliXaGIiEjMKeyJiEh8aCiFD2+EfQsBCxjoMQHGvw5JvcNt6vfgX/MMH/2xiEUzz6bmwDB6DDJMe+ZSRnzpAhxuRyzPQEREJK4o7ImISOw1lNL4ah5FM8eyfO791Ncmk5xaz5hLiynYmUfiZS9Tt/INlr6wn2Wzx9B4sID+56cw9YWp5E09C5NgYn0GIiIiccdYa2Ndw6eWn59vi4uLY12GiIh8RvVvXcoLdw2lqjyTYMB1eL3TFSDNW0O/s7azdtFImoIOhl6VQ8GDU8gelx3DikVEROKDMWa5tTb/RNvUsyciIjFX9LI5LugBBAMuKsq8VJZnMPqro7ng/glknann44mIiLSFwp6IiMROKAAHilkxb8xxQe8IQ2I3H1c+/YWoliYiItLRKeyJiEj0hJqgaiWUzoWyedRu+oiSVb2or72m1Y81HEyKUoEiIiKdh8KeiIi0HxuC6o+hbB6UzaXxk8VsW53JJx/nUrJ+CPt3ng+Ay+0n4He3uBuXOxCtikVERDoNhT0RETl9rIXazVA2F8rmEtixkJ1rk/lkbS4lG4ayd+sobMjgSnaSM6E/o76dy8BJA3nv7r+zc9EemoLH/1pyOIP0PU+TsYiIiJwqhT0REfls6rZFwt08QnvmsedjE+m5G8rOTXfQ5E8gwWnIHtePi27OJXdSLtnnZx/1TLxr37iVJwc8QUIoSCh0ZH1CQhPG4eLaN26NwYmJiIh0bAp7IiJyaur3HB6WaUvnUb7hICUfD6Rk/RC2rb8df304rPUe1Zvzvh0Od/0n9Med0vIwzZTeKdy17V7evPFNdizccfiZ6tnjc7n+9etJ6Z0SpZMTERHpPBT2RESkdY37oHx+eFKV8nlUbimLhLuzKFl3Mwcrw7Noegd7GXFrONzlTswluXvyKR0mpXcK0xdMb4cTEBER6ZoU9kRE5Gj+KihfcLj37uD2TyhZl8sn686kZP01VO1NBMLhbODnw+Fu4KSBpOekx7hwERERaU5hT0SkqwvUwb4PD99359u9lu3rc/hkXR4lGy6nvKQbAJ50D7kTc7lgUi65l+bSfWh3jDExLl5ERERaorAnItLVBBtg/6LD4S5YupydG/tQsn4wJRsnsHvjVGyTwZnoJGd8DiPuCPfe9RndhwRHQqyrFxERkTZS2BMR6eya/HBg6eFhmaHyxezd6qVk3SBKNo5mx7rJBH0G4zD0HduX8Q+Ew12/C/rhTNSvCRERkY5Kv8VFRDqbUBAqV4R77krnYss/ZP/O5PCkKpvOZdvai2msDQ+/7Dm8J2O+Hg53Ay4egCfNE+PiRURE5HRR2BMR6ehsCKrWHB6WSfkCqkuhZO1ASjafS8na+6jdF34cQsaADIbeEJkx89JcUnrpkQYiIiKdlcKeiEhHYy3UbDg8LJPy+dTvr2fbulw+2TSKko+/Q8XO8I/35B7J5F56ZMbMzIGZMS5eREREokVhT0Qk3lkLdZ8cCXdl8/BXVbB9Qw4lm86hZN0dlG7xgAV3qpsBFw9g7L3hgNfz7J6YBM2YKSIi0hUp7ImIxKP6XYcfYk7pXJpqdrNrS19KNo6kZMOX2LU+mVAAHG4H/Qr6MfFL4XB3Rv4ZOFyOWFcvIiIicUBhT0QkHjSUQfn8I5Oq1GyldEcvSjacTcmm69m+JpVAPWDgjDFncEGk5y7nwhxcya5YVy8iIiJxSGFPRCQWfBVQvuDwsExb9TEVZV5K1g+jZPNUSlZ5aaiyAHQf0p1R0yMzZl4ygKTMpBgXLyIiIh2Bwp6IyKfVUAof3gj7FgIWMNBjAox/HZJ6H902UAPlC4/cd1e5ktrKFErWn0XJlov5ZNX11JSGm6b1S+OsLwxkwKUDyL00l7S+adE+MxEREekEFPZERD6NhlIaX82jaOZYls+9n/raZJJT6xlzaTEFO/NIvH41HNwavu+ubC5UFNNQ52LbhjxKtoyjZM009peEd5XkTTo8Y2bupFy8g70Yo0lVRERE5LNR2BMR+RTq37mZFx7+ElXlmQQD4Xvm6mu7seidAjYUD+ErDCHBWHZsHkDJ1gJK1n6OvRsSsCFwJbvof1F/zv1mONz1Pqe3ZswUERGR005hT0TkVISa4OA2il42RwW9Q4IBFwf2duf33/sGdbVemvyWBGcC2eOyueiH4XCXfX42DrdmzBQREZH2pbAnInIsa8F3AGo3Qs1GqN0Ufq3ZCHVbIeRnxbzvHhf0DgmFHNRUpjHunnHkTsql/4T+uFPcUT4JERER6eoU9kSk62pqhNotkUC3EWo2HVn2VxIMOKjal8mBsp5UVOZxYN+lVJZdx4Fdbuprg63u2toEpvxiSpROREREROR4Cnsi0rnZUPgB5c175w4tH9xOUzCByvIMKkqzOFCRS8X+EVSUTaRidyLVey02dGRXiZmJZOVlkXOJlw1vLifgb7m3zuUOROHkRERERFqmsCcinYO/+uggd6inrnYTTT4/VfsyOFDqpaK8DxUVuVSUj+TA7mSq95qjA11GIt48L/0uzmLk4Eyy8rLw5nnxDvaSnJV8uN1LO8rYuWgPTcHjf4w6nEH6npcdjbMWERERaZHCnoh0HKEA1H1y/H10tZtoqttH1f5ID11pFhUVA6gon0jFnqupKnVgm47sxpPuISsvi+wJXkbmefHmecOhbrCXpKykNj324No3buXJAU+QEAoSCh2ZbCUhoQnjcHHtG7e2x3dAREREpM0U9kQkvlgLjaWRXrlImIssh6pLqCpPC/fQlXmp2JdNxb5zOLD3IqpKXUcHujQP3jwvZ4z3MrxZmPPmeUnunvyZn2OX0juFu7bdy5s3vsmOhTsOP1M9e3wu179+PSm9Uz7b90FERETkM1LYE5HYCB48PMyyeQ9dqGozVXsTqCjNoqLMG54cZX82FXtHUFXmJhQ8EtLcKe5woLswi+GRHjrv4HCwS+7x2QPdyaT0TmH6guntegwRERGRT0thT0TaT6gJ6rcf1TtHzUZCVZup3lkX7qEr9VJRlkXFvr5UlE2isnTq8YFusJfeBV6GHdND161nt3YPdCIiIiIdlcKeiHx2vgPHzXQZqtpE9bZyKvamRu6j81JR3ouK8tFUlk44KtC5urnwDvbSa1wWQyO9c4fuo+vWS4FORERE5NNQ2BPpIupKtjL7Kz9h87LeNNQlkZTSQN7YUiY/9zApuYNOvoOmRqjdetSDxkOVG6kp2UPFLkekly6LirLuVJQPoLJsBE2BhMMfdyU78Q7Oouf5XoY0G27pzfOS0jtFgU5ERETkNFPYE+kC6kq28sIFT1JT0Y9gwAVAQ10y6z7sx64LnmT6orvCgc/a455JZ6s3UVOyiwMlDVTszeRAqZfKMi8HyntRWTb5qEDnTHLgHZxFj/OyOOuYHrqUPgp0IiIiItGksCfSBcz+yk+OCnqHBAMuairSKLzz24y7sYqKzZUc2NMtPNPloV66srE0BcYd/owzMQHvoEy6j+3Bmc3CnDfPS2qfVEyCAp2IiIhIPFDYE+moQgHwVx7/5asAfyWhhkr81dX4qmvYvGzgcUHvkGDAxeJ3zmPxO0dCmsNj8A7MwDumJ4ObhTnvYC9pfdMU6EREREQ6AIU9kVgKBcFfBf4KbGMF/sr9+Ksq8VVW4q+uwVdVg7/mIL6aBvw1jfjq/Phrg/gONuGvT8Df4MbX6MHf6MbfGFmOrAv604H0yIHsSUuZ+oeph2e6TMtWoBMRERHp6BT2JG595glF2pm1lkB9AH91A77K/fgrDuCrrMBfWYWvugZ/dR3+2khQq/Xhqw3grwviPxjCdxD89Qn4GpzhkNbgIeBzn+Ao3SJfR3MnW9zdDJ5uCbhTXHi8btLSEnGnJuFOT8GTloI7PQlPqgd3qps5975BQ11yi+eSlFJP/n/ln75vjoiIiIjEnMKexKU2TyhyCqy1BBuD+Gv9+Ov8+Gp9+Gsjr9WN+Kqq8FdV46uuxV9dh6+mHn9tYySoBfHXN+GvC+GrN+FetUYHNpRw8gPjweVJwJ3kxpMcwt0N3MmGlDOceFNduFM9eFITcacl40lPwZ2RhicjHXdGJp6MFNwpbtyp7sPBzd3Nfcq9bjtefYp1Hx5/zx6A0xUgb2zZKe1PREREROKfwt5p4q+soPDBJyn+i4/62kSSUxvJv9nDhT+7C3emN9bldTitTShStT+d/7vql5xz39fw1/mPBLYaH/6aOnzVB/HXNOCrbcRfG8BXF8B/MIS/3h71bLfWOF0B3Il+3Il+PEk+3EkBkpMtGZngTknAk+LEneLGk5aIOy0Jd2q3I0EtMxN3phdPZhburJ6401NJcLQlFLafyc89zK4LnqSmIu2o76nTFSDNW8Pk5x6OYXUiIiIi0h6MtSe/lydajDGXA08CDuBZa+1jrbXPz8+3xcXFUamtNf7KCp4d9d9U7k0+7h/SmX3q+erKh+I68FlrCQVCBH1BmnxNx702+Y9f1+qrv+nIcmOQJl+AYGMg/OoL0uQLEmyMtPc3HWnvDx1+dbsOtjrs8FgJjqZwKEv040n04U6KvB4Ka4l+3MlB3N0ceFJckZ6yQ71p3XCnp+LJyMCdmYE7IwtHNy+4MyNfXnAmteN/geg4Miy2Fw0Hk0jq1kDe2LK4GRYrIiIiIqfOGLPcWnvC+3HiJuwZYxzAJmAysAtYBtxkrV3X0mfiJezN+/qPKHq+qcUhcgW3O5j4hx8DJwlWjUGCjX6aGn00+Xzh5QYfQV+ApkZ/JDAdeg1G1gePClhN/iBBXyQ0+UI0+UPhEOUPEfRZmvyWoB+aApamAOFl/2mciMNYnM4gDldTi68O54nWBXE2ey38x4VAK3UZy10vLMCdnoI7PR1nSkazcBb58niPfu9IBj3nTUREREQ6kdbCXjwN4zwP2GKt/QTAGPMaMA1oMezFi+K/+AgGTtzzEwy4+ODpEEteeYhgIIGmgOP0HfgEwepEQSrRGcSR1IQz9Zhg5bI43RaH0+Jwg9MFDrfF6Tbh926Dw21wuhNweCKvbgdOTwIOTwIOlwNnogOHx4nT48ThcZLgcmEcLkhwgfGEXxNcYFxHlpu/N86j10e2fTT4ndYnFOlWT8Zt807f91JEREREpJOJp7DXF9jZ7P0u4PxjGxlj7gDuAMjJyYlOZSdRX5vYegNrGPX5qkhwMjg8CZHgZHC6HeH3HgcOj+NwaHK4nTgTnUeCVKILp8cdfk1040j0RIKV+8Sh6dhw1byNcUHCaQyd7SBv7POaUERERERE5DOIp7DXJtbap4GnITyMM8blAJCc2kh9bcv3dCWnNXD5m7+JYkUdnyYUERERERH5bGI7ReDRdgP9mr3PjqyLe/k3e3C6Aifc5nQFyL/pJD1/cpyU3EFMX3QXw8bvJCnlIJgQSSkHGTZ+56d67IKIiIiISFcTTxO0OAlP0DKJcMhbBtxsrf24pc/EywQtHX02ThERERER6Zham6Albnr2rLVB4E5gFrAeeKO1oBdP3JlevrryIQpud5CcVo8xIZLT6im43aGgJyIiIiIiMRE3PXufRrz07ImIiIiIiMRCh+jZExERERERkdNHYU9ERERERKQTUtgTERERERHphBT2REREREREOiGFPRERERERkU5IYU9ERERERKQTUtgTERERERHphBT2REREREREOiGFPRERERERkU5IYU9ERERERKQTUtgTERERERHphBT2REREREREOiGFPRERERERkU5IYU9ERERERKQTMtbaWNfwqRlj9gHbY12HREV3YH+sixBpha5RiXe6RiXe6RqVeBev12h/a22PE23o0GFPug5jTLG1Nj/WdYi0RNeoxDtdoxLvdI1KvOuI16iGcYqIiIiIiHRCCnsiIiIiIiKdkMKedBRPx7oAkZPQNSrxTteoxDtdoxLvOtw1qnv2REREREREOiH17ImIiIiIiHRCCnsSN4wxlxtjNhpjthhjHjjB9nuNMeuMMauNMXOMMf1jUad0bSe7Tpu1u9YYY40xHWrWLun42nKNGmNuiPw8/dgY85do1yhdWxt+3+cYY+YZY1ZEfud/PhZ1StdkjHneGFNujFnbwnZjjHkqcv2uNsaMjnaNp0JhT+KCMcYB/Ba4AhgG3GSMGXZMsxVAvrV2JPB/wIzoVildXRuvU4wxqcBdwJLoVihdXVuuUWNMHvAgcKG19mzg7qgXKl1WG3+OPgS8Ya09F/gP4HfRrVK6uBeBy1vZfgWQF/m6A/h9FGr61BT2JF6cB2yx1n5irfUDrwHTmjew1s6z1tZH3i4GsqNco8hJr9OIR4HHgcZoFidC267RrwG/tdZWAlhry6Nco3RtbblGLZAWWU4H9kSxPunirLUfABWtNJkGvGzDFgMZxpg+0anu1CnsSbzoC+xs9n5XZF1LvgL8u10rEjneSa/TyHCOftbaf0WzMJGItvwsPRM40xhTaIxZbIxp7S/YIqdbW67RR4BbjDG7gHeAb0enNJE2OdV/s8aUM9YFiJwqY8wtQD5wcaxrEWnOGJMAPAF8OcaliLTGSXj40SWER0h8YIwZYa2timlVIkfcBLxorf2lMeYC4BVjzHBrbSjWhYl0NOrZk3ixG+jX7H12ZN1RjDGXAT8ArrbW+qJUm8ghJ7tOU4HhwHxjzDZgHDBTk7RIFLXlZ+kuYKa1NmCtLQE2EQ5/ItHQlmv0K8AbANbaRUAi0D0q1YmcXJv+zRovFPYkXiwD8owxucYYN+Ebsmc2b2CMORf4I+Ggp3tMJBZavU6ttdXW2u7W2gHW2gGE7y292lpbHJtypQuxQmdWAAAFbUlEQVQ66c9S4O+Ee/UwxnQnPKzzk2gWKV1aW67RHcAkAGPMUMJhb19UqxRp2UzgS5FZOccB1dbavbEuqiUaxilxwVobNMbcCcwCHMDz1tqPjTE/AYqttTOBnwMpwJvGGIAd1tqrY1a0dDltvE5FYqaN1+gsYIoxZh3QBHzXWnsgdlVLV9LGa/Q+4BljzD2EJ2v5srXWxq5q6UqMMa8S/oNY98h9oz8CXADW2j8Qvo/088AWoB6YHptK28bo/x0REREREZHOR8M4RUREREREOiGFPRERERERkU5IYU9ERERERKQTUtgTERERERHphBT2REREREREOiGFPRERaXfGmO8YY9YbY/58Gvb1ZWPMGc3eP2uMGfZZ99vK8TzGmPeNMSuNMTeepJZtkWfXnY7jvmiMue4kbY46/mk67ihjzOdP5z5FRCQ29Jw9ERGJhm8Cl1lrdzVfaYxxWmuDp7ivLwNrgT0A1tqvnpYKW3Zu5DijTlZLDLTH8UcB+YSfJSUiIh2YevZERKRdGWP+AAwE/m2MuccY84gx5hVjTCHwijFmgDFmoTHmo8hXQbPPft8Ys8YYs8oY81ikpysf+HOkpy3JGDPfGJMfaX9TpP1aY8zjzfZTZ4z5aWQ/i40xvU5Qp9cY83djzOpIm5HGmJ7An4CxkeMNatb+uFoim74dOY81xpghkbbdjDHPG2OWGmNWGGOmneD4xhjzG2PMRmPM+0DPZtseNsYsi5zX05G2J/peHNcu8vnvGGPWRc7ttZZqMsa4gZ8AN56oJ1NERDoWPVRdRETanTFmG5Bvrd1vjHkEuAoYb61tMMYkAyFrbaMxJg941Vqbb4y5Avgh4R7BemOM11pbYYyZD9xvrS2O7Hs+cD/h3q3FwBigEngPeMpa+3djjAWuttb+wxgzA6ix1v73MTX+Gthvrf2xMeZS4Alr7ShjzCWR4115gvM6tpZtwC+ttb82xnwTGG2t/aox5n+AddbaPxljMoClwLnW2oPN9vVF4BvA5UAvYB3wVWvt/x0690i7V4A3Iudy7PFbarcHyLXW+owxGdbaqpZqAq6P/Le6s+3/hUVEJB6pZ09ERGJhprW2IbLsAp4xxqwB3gQO3X93GfCCtbYe4FCIacVYYL61dl9kaOifgYsi2/zAPyPLy4EBJ/j8eOCVyLHmAlnGmLRTPTHgryc4zhTgAWPMSmA+kAjkHPO5iwgH3SZr7R5gbrNtE40xSyLfo0uBs1s4dkvtVhPuAbwFODRsti01iYhIB6Z79kREJBYONlu+BygDziH8R8jGdjhewB4ZytJE+/7+853gOAa41lq78VR3ZoxJBH5HuLdtZ6RnNPEU200lHCavAn5gjBnRUk3GmPNPtUYREYlP6tkTEZFYSwf2WmtDwK2AI7J+NjA9MswTY4w3sr4WSD3BfpYCFxtjuhtjHMBNwIJTqGMh8J+RY11CeEhnzUk+01Itx5pF+F6+Q/fQnXuCNh8QvlfOYYzpA0yMrD8U2PYbY1KA5jN0Nj/+CdsZYxKAftbaecD3CX+/U1qpqa3nJCIicU5hT0REYu13wG3GmFXAECK9ftbad4GZQHFkqOH9kfYvAn84ZlIUrLV7gQeAecAqYLm19u1TqOMRYIwxZjXwGHBbGz5zwlpO4FHCw1VXG2M+jrw/1t+AzYTv1XsZWARgra0CniE86+YsYNmJjk+4R/FE7RzAnyJDO1cQvo+xqpWa5gHDNEGLiEjHpwlaREREREREOiH17ImIiIiIiHRCCnsiIiIiIiKdkMKeiIiIiIhIJ6SwJyIiIiIi0gkp7ImIiIiIiHRCCnsiIiIiIiKdkMKeiIiIiIhIJ6SwJyIiIiIi0gn9f4h8kfduiiwdAAAAAElFTkSuQmCC\n",
150 |       "text/plain": [
151 |        "<Figure size 1080x576 with 1 Axes>"
152 |       ]
153 |      },
154 |      "metadata": {
155 |       "needs_background": "light"
156 |      },
157 |      "output_type": "display_data"
158 |     }
159 |    ],
160 |    "source": [
161 |     "import matplotlib.pyplot as plt\n",
162 |     "\n",
163 |     "class Result:\n",
164 |     "    def __init__(self, values = None, color = 'red', name = ''):\n",
165 |     "        if values is not None:\n",
166 |     "            self.values = values\n",
167 |     "        self.color = color\n",
168 |     "        self.name = name\n",
169 |     "\n",
170 |     "x = quantiles\n",
171 |     "y = Result(results_wlrdf, 'orange', 'WL RDF')\n",
172 |     "y1 = Result(results_wl, 'purple', 'WL')\n",
173 |     "n = len(x)\n",
174 |     "\n",
175 |     "fig, ax = plt.subplots(figsize=(15, 8))\n",
176 |     "for i in range(n - 1):\n",
177 |     "    plt.plot(x[i: i+2], y.values[i: i+2],\n",
178 |     "             'o-', color=y.color, markersize=8)\n",
179 |     "    plt.plot(x[i: i+2], y1.values[i: i+2],\n",
180 |     "             'o-', color= y1.color, markersize=8)\n",
181 |     "\n",
182 |     "ax.xaxis.label.set_text('fraction of the dataset')\n",
183 |     "ax.yaxis.label.set_text('runnning time (s)')\n",
184 |     "\n",
185 |     "custom_lines = [plt.Line2D([0], [0], color=y.color, lw=4),\n",
186 |     "                plt.Line2D([0], [0], color=y1.color, lw=4)]\n",
187 |     "ax.legend(custom_lines, [y.name, y1.name])\n",
188 |     "plt.savefig('../results/lithogenesis_timing.png', format='png')"
189 |    ]
190 |   }
191 |  ],
192 |  "metadata": {
193 |   "kernelspec": {
194 |    "display_name": "Python 3",
195 |    "language": "python",
196 |    "name": "python3"
197 |   },
198 |   "language_info": {
199 |    "codemirror_mode": {
200 |     "name": "ipython",
201 |     "version": 3
202 |    },
203 |    "file_extension": ".py",
204 |    "mimetype": "text/x-python",
205 |    "name": "python",
206 |    "nbconvert_exporter": "python",
207 |    "pygments_lexer": "ipython3",
208 |    "version": "3.7.3"
209 |   }
210 |  },
211 |  "nbformat": 4,
212 |  "nbformat_minor": 2
213 | }
214 | 


--------------------------------------------------------------------------------
/notebooks/no_labels_scores.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import sys\n",
 10 |     "sys.path.insert(0, '../')"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from typing import Union\n",
 20 |     "from collections import Counter, OrderedDict\n",
 21 |     "import warnings\n",
 22 |     "\n",
 23 |     "import rdflib\n",
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd\n",
 26 |     "from pprint import pprint\n",
 27 |     "from sklearn import svm\n",
 28 |     "from sklearn.model_selection import cross_validate\n",
 29 |     "\n",
 30 |     "import wlkernel"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "warnings.simplefilter('ignore')"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 4,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "rdf_graph = rdflib.Graph().parse('../data/aifbfixed_complete.n3', format='n3')"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 5,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "triples = [\n",
 58 |     "    (str(subj), str(pred), str(obj))\n",
 59 |     "     for subj, pred, obj in rdf_graph\n",
 60 |     "]"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 6,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "instances_class_map = {\n",
 70 |     "    subj: obj\n",
 71 |     "    for subj, pred, obj in triples\n",
 72 |     "    if 'affiliation' in pred\n",
 73 |     "    and 'id5instance' not in obj\n",
 74 |     "}\n",
 75 |     "instances = list(instances_class_map.keys())\n",
 76 |     "y = list(instances_class_map.values())"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 7,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "triples = [\n",
 86 |     "    (subj, pred, obj)\n",
 87 |     "    for subj, pred, obj in triples\n",
 88 |     "    if 'affiliation' not in pred\n",
 89 |     "    and 'employs' not in pred\n",
 90 |     "    and 'member' not in pred\n",
 91 |     "    and 'head' not in pred\n",
 92 |     "]"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "### Weisfeiler-Lehman RDF"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 1,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "ename": "NameError",
109 |      "evalue": "name 'Union' is not defined",
110 |      "output_type": "error",
111 |      "traceback": [
112 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
113 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
114 |       "\u001b[0;32m<ipython-input-1-7243a2aadfda>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0mbananize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mwlkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWLRDFGraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwlkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWLGraph\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mwlkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWLRDFGraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwlkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWLGraph\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m     \u001b[0;34m'All the label in the WLRDFGraph are replaced with the same label'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m             \u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'banana'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
115 |       "\u001b[0;31mNameError\u001b[0m: name 'Union' is not defined"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "def bananize(g: Union[wlkernel.WLRDFGraph, wlkernel.WLGraph]) -> Union[wlkernel.WLRDFGraph, wlkernel.WLGraph]:\n",
121 |     "    'All the label in the WLRDFGraph are replaced with the same label'\n",
122 |     "    for i in range(len(g.labels)):\n",
123 |     "        for k in g.labels[i].keys():\n",
124 |     "            g.labels[i][k] = 'banana'\n",
125 |     "    return g"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 9,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "RANDOM_STATE = 42\n",
135 |     "\n",
136 |     "depth_values = [1, 2, 3]\n",
137 |     "iteration_values =  [0, 2, 4, 6]\n",
138 |     "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
139 |     "\n",
140 |     "results = OrderedDict()\n",
141 |     "\n",
142 |     "for d in depth_values:\n",
143 |     "    for it in iteration_values:\n",
144 |     "        wlrdf_graph = wlkernel.WLRDFGraph(triples, instances, max_depth=d)\n",
145 |     "        bananize(wlrdf_graph)\n",
146 |     "        kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, instances, iterations=it)\n",
147 |     "        kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
148 |     "        \n",
149 |     "        results[(d, it)] = [0, 0, 0]\n",
150 |     "        for c in C_values:\n",
151 |     "            classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
152 |     "            scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
153 |     "            \n",
154 |     "            acc_mean = scores['test_accuracy'].mean()\n",
155 |     "            f1_mean = scores['test_f1_macro'].mean()\n",
156 |     "            \n",
157 |     "            if acc_mean > results[(d, it)][0]:\n",
158 |     "                results[(d, it)] = [acc_mean, f1_mean, c]"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 10,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/html": [
169 |        "<div>\n",
170 |        "<style scoped>\n",
171 |        "    .dataframe tbody tr th:only-of-type {\n",
172 |        "        vertical-align: middle;\n",
173 |        "    }\n",
174 |        "\n",
175 |        "    .dataframe tbody tr th {\n",
176 |        "        vertical-align: top;\n",
177 |        "    }\n",
178 |        "\n",
179 |        "    .dataframe thead th {\n",
180 |        "        text-align: right;\n",
181 |        "    }\n",
182 |        "</style>\n",
183 |        "<table border=\"1\" class=\"dataframe\">\n",
184 |        "  <thead>\n",
185 |        "    <tr style=\"text-align: right;\">\n",
186 |        "      <th></th>\n",
187 |        "      <th></th>\n",
188 |        "      <th>accuracy</th>\n",
189 |        "      <th>f1</th>\n",
190 |        "      <th>C</th>\n",
191 |        "    </tr>\n",
192 |        "    <tr>\n",
193 |        "      <th>depth</th>\n",
194 |        "      <th>iterations</th>\n",
195 |        "      <th></th>\n",
196 |        "      <th></th>\n",
197 |        "      <th></th>\n",
198 |        "    </tr>\n",
199 |        "  </thead>\n",
200 |        "  <tbody>\n",
201 |        "    <tr>\n",
202 |        "      <th rowspan=\"4\" valign=\"top\">1</th>\n",
203 |        "      <th>0</th>\n",
204 |        "      <td>0.524847</td>\n",
205 |        "      <td>0.305547</td>\n",
206 |        "      <td>100.0</td>\n",
207 |        "    </tr>\n",
208 |        "    <tr>\n",
209 |        "      <th>2</th>\n",
210 |        "      <td>0.647536</td>\n",
211 |        "      <td>0.566394</td>\n",
212 |        "      <td>100.0</td>\n",
213 |        "    </tr>\n",
214 |        "    <tr>\n",
215 |        "      <th>4</th>\n",
216 |        "      <td>0.670780</td>\n",
217 |        "      <td>0.591060</td>\n",
218 |        "      <td>100.0</td>\n",
219 |        "    </tr>\n",
220 |        "    <tr>\n",
221 |        "      <th>6</th>\n",
222 |        "      <td>0.677030</td>\n",
223 |        "      <td>0.594329</td>\n",
224 |        "      <td>100.0</td>\n",
225 |        "    </tr>\n",
226 |        "    <tr>\n",
227 |        "      <th rowspan=\"4\" valign=\"top\">2</th>\n",
228 |        "      <th>0</th>\n",
229 |        "      <td>0.565936</td>\n",
230 |        "      <td>0.340732</td>\n",
231 |        "      <td>10.0</td>\n",
232 |        "    </tr>\n",
233 |        "    <tr>\n",
234 |        "      <th>2</th>\n",
235 |        "      <td>0.681422</td>\n",
236 |        "      <td>0.622212</td>\n",
237 |        "      <td>100.0</td>\n",
238 |        "    </tr>\n",
239 |        "    <tr>\n",
240 |        "      <th>4</th>\n",
241 |        "      <td>0.740048</td>\n",
242 |        "      <td>0.663960</td>\n",
243 |        "      <td>100.0</td>\n",
244 |        "    </tr>\n",
245 |        "    <tr>\n",
246 |        "      <th>6</th>\n",
247 |        "      <td>0.762597</td>\n",
248 |        "      <td>0.688069</td>\n",
249 |        "      <td>100.0</td>\n",
250 |        "    </tr>\n",
251 |        "    <tr>\n",
252 |        "      <th rowspan=\"4\" valign=\"top\">3</th>\n",
253 |        "      <th>0</th>\n",
254 |        "      <td>0.407394</td>\n",
255 |        "      <td>0.293320</td>\n",
256 |        "      <td>1.0</td>\n",
257 |        "    </tr>\n",
258 |        "    <tr>\n",
259 |        "      <th>2</th>\n",
260 |        "      <td>0.898914</td>\n",
261 |        "      <td>0.861681</td>\n",
262 |        "      <td>100.0</td>\n",
263 |        "    </tr>\n",
264 |        "    <tr>\n",
265 |        "      <th>4</th>\n",
266 |        "      <td>0.892079</td>\n",
267 |        "      <td>0.854304</td>\n",
268 |        "      <td>100.0</td>\n",
269 |        "    </tr>\n",
270 |        "    <tr>\n",
271 |        "      <th>6</th>\n",
272 |        "      <td>0.893066</td>\n",
273 |        "      <td>0.851358</td>\n",
274 |        "      <td>10.0</td>\n",
275 |        "    </tr>\n",
276 |        "  </tbody>\n",
277 |        "</table>\n",
278 |        "</div>"
279 |       ],
280 |       "text/plain": [
281 |        "                  accuracy        f1      C\n",
282 |        "depth iterations                           \n",
283 |        "1     0           0.524847  0.305547  100.0\n",
284 |        "      2           0.647536  0.566394  100.0\n",
285 |        "      4           0.670780  0.591060  100.0\n",
286 |        "      6           0.677030  0.594329  100.0\n",
287 |        "2     0           0.565936  0.340732   10.0\n",
288 |        "      2           0.681422  0.622212  100.0\n",
289 |        "      4           0.740048  0.663960  100.0\n",
290 |        "      6           0.762597  0.688069  100.0\n",
291 |        "3     0           0.407394  0.293320    1.0\n",
292 |        "      2           0.898914  0.861681  100.0\n",
293 |        "      4           0.892079  0.854304  100.0\n",
294 |        "      6           0.893066  0.851358   10.0"
295 |       ]
296 |      },
297 |      "execution_count": 10,
298 |      "metadata": {},
299 |      "output_type": "execute_result"
300 |     }
301 |    ],
302 |    "source": [
303 |     "fn = 'wlrdf_no_labels'\n",
304 |     "\n",
305 |     "df_res = pd.DataFrame(index=list(results.keys()))\n",
306 |     "df_res['accuracy'] = [t[0] for t in results.values()]\n",
307 |     "df_res['f1'] = [t[1] for t in results.values()]\n",
308 |     "df_res['C'] = [t[2] for t in results.values()]\n",
309 |     "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
310 |     "df_res.to_csv(f'../results/{fn}.csv')\n",
311 |     "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
312 |     "df_res_test.to_html(f'../results/{fn}.html')\n",
313 |     "df_res_test"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "metadata": {},
319 |    "source": [
320 |     "### Weisfeiler-Lehman"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": 11,
326 |    "metadata": {},
327 |    "outputs": [],
328 |    "source": [
329 |     "RANDOM_STATE = 42\n",
330 |     "\n",
331 |     "depth_values = [1, 2, 3]\n",
332 |     "iteration_values =  [0, 2, 4, 6]\n",
333 |     "C_values = [0.001, 0.01, 0.1, 1., 10., 100.]\n",
334 |     "\n",
335 |     "results = OrderedDict()\n",
336 |     "\n",
337 |     "for d in depth_values:\n",
338 |     "    for it in iteration_values:\n",
339 |     "        wl_graphs = [bananize(\n",
340 |     "            wlkernel.WLGraph(triples, instance, max_depth=d)\n",
341 |     "        ) for instance in instances]\n",
342 |     "        kernel_matrix = wlkernel.wl_kernel_matrix(wl_graphs, iterations=it)\n",
343 |     "        kernel_matrix = wlkernel.kernel_matrix_normalization(kernel_matrix)\n",
344 |     "        \n",
345 |     "        results[(d, it)] = [0, 0, 0]\n",
346 |     "        for c in C_values:\n",
347 |     "            classifier = svm.SVC(C=c, kernel='precomputed', class_weight='balanced', random_state=RANDOM_STATE)\n",
348 |     "            scores = cross_validate(classifier, kernel_matrix, y, cv=10, scoring=('accuracy', 'f1_macro'))\n",
349 |     "            \n",
350 |     "            acc_mean = scores['test_accuracy'].mean()\n",
351 |     "            f1_mean = scores['test_f1_macro'].mean()\n",
352 |     "            \n",
353 |     "            if acc_mean > results[(d, it)][0]:\n",
354 |     "                results[(d, it)] = [acc_mean, f1_mean, c]"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "code",
359 |    "execution_count": 12,
360 |    "metadata": {},
361 |    "outputs": [
362 |     {
363 |      "data": {
364 |       "text/html": [
365 |        "<div>\n",
366 |        "<style scoped>\n",
367 |        "    .dataframe tbody tr th:only-of-type {\n",
368 |        "        vertical-align: middle;\n",
369 |        "    }\n",
370 |        "\n",
371 |        "    .dataframe tbody tr th {\n",
372 |        "        vertical-align: top;\n",
373 |        "    }\n",
374 |        "\n",
375 |        "    .dataframe thead th {\n",
376 |        "        text-align: right;\n",
377 |        "    }\n",
378 |        "</style>\n",
379 |        "<table border=\"1\" class=\"dataframe\">\n",
380 |        "  <thead>\n",
381 |        "    <tr style=\"text-align: right;\">\n",
382 |        "      <th></th>\n",
383 |        "      <th></th>\n",
384 |        "      <th>accuracy</th>\n",
385 |        "      <th>f1</th>\n",
386 |        "      <th>C</th>\n",
387 |        "    </tr>\n",
388 |        "    <tr>\n",
389 |        "      <th>depth</th>\n",
390 |        "      <th>iterations</th>\n",
391 |        "      <th></th>\n",
392 |        "      <th></th>\n",
393 |        "      <th></th>\n",
394 |        "    </tr>\n",
395 |        "  </thead>\n",
396 |        "  <tbody>\n",
397 |        "    <tr>\n",
398 |        "      <th rowspan=\"4\" valign=\"top\">1</th>\n",
399 |        "      <th>0</th>\n",
400 |        "      <td>0.322153</td>\n",
401 |        "      <td>0.194477</td>\n",
402 |        "      <td>100.0</td>\n",
403 |        "    </tr>\n",
404 |        "    <tr>\n",
405 |        "      <th>2</th>\n",
406 |        "      <td>0.530111</td>\n",
407 |        "      <td>0.348672</td>\n",
408 |        "      <td>10.0</td>\n",
409 |        "    </tr>\n",
410 |        "    <tr>\n",
411 |        "      <th>4</th>\n",
412 |        "      <td>0.530111</td>\n",
413 |        "      <td>0.347049</td>\n",
414 |        "      <td>10.0</td>\n",
415 |        "    </tr>\n",
416 |        "    <tr>\n",
417 |        "      <th>6</th>\n",
418 |        "      <td>0.530111</td>\n",
419 |        "      <td>0.347049</td>\n",
420 |        "      <td>10.0</td>\n",
421 |        "    </tr>\n",
422 |        "    <tr>\n",
423 |        "      <th rowspan=\"4\" valign=\"top\">2</th>\n",
424 |        "      <th>0</th>\n",
425 |        "      <td>0.564547</td>\n",
426 |        "      <td>0.355253</td>\n",
427 |        "      <td>10.0</td>\n",
428 |        "    </tr>\n",
429 |        "    <tr>\n",
430 |        "      <th>2</th>\n",
431 |        "      <td>0.503724</td>\n",
432 |        "      <td>0.343148</td>\n",
433 |        "      <td>1.0</td>\n",
434 |        "    </tr>\n",
435 |        "    <tr>\n",
436 |        "      <th>4</th>\n",
437 |        "      <td>0.481437</td>\n",
438 |        "      <td>0.392366</td>\n",
439 |        "      <td>100.0</td>\n",
440 |        "    </tr>\n",
441 |        "    <tr>\n",
442 |        "      <th>6</th>\n",
443 |        "      <td>0.502999</td>\n",
444 |        "      <td>0.383461</td>\n",
445 |        "      <td>1.0</td>\n",
446 |        "    </tr>\n",
447 |        "    <tr>\n",
448 |        "      <th rowspan=\"4\" valign=\"top\">3</th>\n",
449 |        "      <th>0</th>\n",
450 |        "      <td>0.491697</td>\n",
451 |        "      <td>0.343404</td>\n",
452 |        "      <td>100.0</td>\n",
453 |        "    </tr>\n",
454 |        "    <tr>\n",
455 |        "      <th>2</th>\n",
456 |        "      <td>0.641333</td>\n",
457 |        "      <td>0.527556</td>\n",
458 |        "      <td>100.0</td>\n",
459 |        "    </tr>\n",
460 |        "    <tr>\n",
461 |        "      <th>4</th>\n",
462 |        "      <td>0.724551</td>\n",
463 |        "      <td>0.602677</td>\n",
464 |        "      <td>10.0</td>\n",
465 |        "    </tr>\n",
466 |        "    <tr>\n",
467 |        "      <th>6</th>\n",
468 |        "      <td>0.713474</td>\n",
469 |        "      <td>0.557335</td>\n",
470 |        "      <td>100.0</td>\n",
471 |        "    </tr>\n",
472 |        "  </tbody>\n",
473 |        "</table>\n",
474 |        "</div>"
475 |       ],
476 |       "text/plain": [
477 |        "                  accuracy        f1      C\n",
478 |        "depth iterations                           \n",
479 |        "1     0           0.322153  0.194477  100.0\n",
480 |        "      2           0.530111  0.348672   10.0\n",
481 |        "      4           0.530111  0.347049   10.0\n",
482 |        "      6           0.530111  0.347049   10.0\n",
483 |        "2     0           0.564547  0.355253   10.0\n",
484 |        "      2           0.503724  0.343148    1.0\n",
485 |        "      4           0.481437  0.392366  100.0\n",
486 |        "      6           0.502999  0.383461    1.0\n",
487 |        "3     0           0.491697  0.343404  100.0\n",
488 |        "      2           0.641333  0.527556  100.0\n",
489 |        "      4           0.724551  0.602677   10.0\n",
490 |        "      6           0.713474  0.557335  100.0"
491 |       ]
492 |      },
493 |      "execution_count": 12,
494 |      "metadata": {},
495 |      "output_type": "execute_result"
496 |     }
497 |    ],
498 |    "source": [
499 |     "fn = 'wl_no_labels'\n",
500 |     "\n",
501 |     "df_res = pd.DataFrame(index=list(results.keys()))\n",
502 |     "df_res['accuracy'] = [t[0] for t in results.values()]\n",
503 |     "df_res['f1'] = [t[1] for t in results.values()]\n",
504 |     "df_res['C'] = [t[2] for t in results.values()]\n",
505 |     "df_res = df_res.set_index(pd.MultiIndex.from_tuples(df_res.index, names=['depth', 'iterations']))\n",
506 |     "df_res.to_csv(f'../results/{fn}.csv')\n",
507 |     "df_res_test = pd.read_csv(f'../results/{fn}.csv', index_col=['depth', 'iterations'])\n",
508 |     "df_res_test.to_html(f'../results/{fn}.html')\n",
509 |     "df_res_test"
510 |    ]
511 |   }
512 |  ],
513 |  "metadata": {
514 |   "kernelspec": {
515 |    "display_name": "Python 3",
516 |    "language": "python",
517 |    "name": "python3"
518 |   },
519 |   "language_info": {
520 |    "codemirror_mode": {
521 |     "name": "ipython",
522 |     "version": 3
523 |    },
524 |    "file_extension": ".py",
525 |    "mimetype": "text/x-python",
526 |    "name": "python",
527 |    "nbconvert_exporter": "python",
528 |    "pygments_lexer": "ipython3",
529 |    "version": "3.7.3"
530 |   }
531 |  },
532 |  "nbformat": 4,
533 |  "nbformat_minor": 2
534 | }
535 | 


--------------------------------------------------------------------------------
/presentation/img/07-Graph.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-Graph.pdf


--------------------------------------------------------------------------------
/presentation/img/07-almost_relabeled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-almost_relabeled.pdf


--------------------------------------------------------------------------------
/presentation/img/07-relabeled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-relabeled.pdf


--------------------------------------------------------------------------------
/presentation/img/07-relabeled_vertical.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-relabeled_vertical.pdf


--------------------------------------------------------------------------------
/presentation/img/07-subGraph_A1_B1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-subGraph_A1_B1.pdf


--------------------------------------------------------------------------------
/presentation/img/07-subGraph_A1_B1_vertical.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/07-subGraph_A1_B1_vertical.pdf


--------------------------------------------------------------------------------
/presentation/img/wl_iteration_total.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/wl_iteration_total.png


--------------------------------------------------------------------------------
/presentation/img/wl_iteration_upper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/img/wl_iteration_upper.png


--------------------------------------------------------------------------------
/presentation/presentation.nav:
--------------------------------------------------------------------------------
 1 | \headcommand {\slideentry {0}{0}{1}{1/1}{}{0}}
 2 | \headcommand {\beamer@framepages {1}{1}}
 3 | \headcommand {\slideentry {0}{0}{2}{2/2}{}{0}}
 4 | \headcommand {\beamer@framepages {2}{2}}
 5 | \headcommand {\slideentry {0}{0}{3}{3/3}{}{0}}
 6 | \headcommand {\beamer@framepages {3}{3}}
 7 | \headcommand {\slideentry {0}{0}{4}{4/4}{}{0}}
 8 | \headcommand {\beamer@framepages {4}{4}}
 9 | \headcommand {\slideentry {0}{0}{5}{5/5}{}{0}}
10 | \headcommand {\beamer@framepages {5}{5}}
11 | \headcommand {\slideentry {0}{0}{6}{6/6}{}{0}}
12 | \headcommand {\beamer@framepages {6}{6}}
13 | \headcommand {\slideentry {0}{0}{7}{7/7}{}{0}}
14 | \headcommand {\beamer@framepages {7}{7}}
15 | \headcommand {\slideentry {0}{0}{8}{8/8}{}{0}}
16 | \headcommand {\beamer@framepages {8}{8}}
17 | \headcommand {\slideentry {0}{0}{9}{9/9}{}{0}}
18 | \headcommand {\beamer@framepages {9}{9}}
19 | \headcommand {\slideentry {0}{0}{10}{10/10}{}{0}}
20 | \headcommand {\beamer@framepages {10}{10}}
21 | \headcommand {\slideentry {0}{0}{11}{11/11}{}{0}}
22 | \headcommand {\beamer@framepages {11}{11}}
23 | \headcommand {\slideentry {0}{0}{12}{12/12}{}{0}}
24 | \headcommand {\beamer@framepages {12}{12}}
25 | \headcommand {\slideentry {0}{0}{13}{13/13}{}{0}}
26 | \headcommand {\beamer@framepages {13}{13}}
27 | \headcommand {\slideentry {0}{0}{14}{14/14}{}{0}}
28 | \headcommand {\beamer@framepages {14}{14}}
29 | \headcommand {\slideentry {0}{0}{15}{15/15}{}{0}}
30 | \headcommand {\beamer@framepages {15}{15}}
31 | \headcommand {\slideentry {0}{0}{16}{16/16}{}{0}}
32 | \headcommand {\beamer@framepages {16}{16}}
33 | \headcommand {\slideentry {0}{0}{17}{17/17}{}{0}}
34 | \headcommand {\beamer@framepages {17}{17}}
35 | \headcommand {\slideentry {0}{0}{18}{18/18}{}{0}}
36 | \headcommand {\beamer@framepages {18}{18}}
37 | \headcommand {\slideentry {0}{0}{19}{19/19}{}{0}}
38 | \headcommand {\beamer@framepages {19}{19}}
39 | \headcommand {\slideentry {0}{0}{20}{20/20}{}{0}}
40 | \headcommand {\beamer@framepages {20}{20}}
41 | \headcommand {\slideentry {0}{0}{21}{21/21}{}{0}}
42 | \headcommand {\beamer@framepages {21}{21}}
43 | \headcommand {\beamer@partpages {1}{21}}
44 | \headcommand {\beamer@subsectionpages {1}{21}}
45 | \headcommand {\beamer@sectionpages {1}{21}}
46 | \headcommand {\beamer@documentpages {21}}
47 | \headcommand {\gdef \inserttotalframenumber {21}}
48 | 


--------------------------------------------------------------------------------
/presentation/presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/presentation/presentation.pdf


--------------------------------------------------------------------------------
/presentation/presentation.tex:
--------------------------------------------------------------------------------
  1 | \documentclass{beamer}
  2 | 
  3 | \mode<presentation> {
  4 | 
  5 | % The Beamer class comes with a number of default slide themes
  6 | % which change the colors and layouts of slides. Below this is a list
  7 | % of all the themes, uncomment each in turn to see what they look like.
  8 | 
  9 | %\usetheme{default}
 10 | %\usetheme{AnnArbor}
 11 | %\usetheme{Antibes}
 12 | %\usetheme{Bergen}
 13 | %\usetheme{Berkeley}
 14 | %\usetheme{Berlin}
 15 | %\usetheme{Boadilla}
 16 | %\usetheme{CambridgeUS}
 17 | %\usetheme{Copenhagen}
 18 | %\usetheme{Darmstadt}
 19 | %\usetheme{Dresden}
 20 | %\usetheme{Frankfurt}
 21 | %\usetheme{Goettingen}
 22 | %\usetheme{Hannover}
 23 | %\usetheme{Ilmenau}
 24 | %\usetheme{JuanLesPins}
 25 | %\usetheme{Luebeck}
 26 | \usetheme{Madrid}
 27 | %\usetheme{Malmoe}
 28 | %\usetheme{Marburg}
 29 | %\usetheme{Montpellier}
 30 | %\usetheme{PaloAlto}
 31 | %\usetheme{Pittsburgh}
 32 | %\usetheme{Rochester}
 33 | %\usetheme{Singapore}
 34 | %\usetheme{Szeged}
 35 | %\usetheme{Warsaw}
 36 | 
 37 | % As well as themes, the Beamer class has a number of color themes
 38 | % for any slide theme. Uncomment each of these in turn to see how it
 39 | % changes the colors of your current slide theme.
 40 | 
 41 | %\usecolortheme{albatross}
 42 | %\usecolortheme{beaver}
 43 | %\usecolortheme{beetle}
 44 | %\usecolortheme{crane}
 45 | %\usecolortheme{dolphin}
 46 | %\usecolortheme{dove}
 47 | %\usecolortheme{fly}
 48 | %\usecolortheme{lily}
 49 | %\usecolortheme{orchid}
 50 | %\usecolortheme{rose}
 51 | %\usecolortheme{seagull}
 52 | %\usecolortheme{seahorse}
 53 | %\usecolortheme{whale}
 54 | %\usecolortheme{wolverine}
 55 | 
 56 | %\setbeamertemplate{footline} % To remove the footer line in all slides uncomment this line
 57 | %\setbeamertemplate{footline}[page number] % To replace the footer line in all slides with a simple slide count uncomment this line
 58 | 
 59 | %\setbeamertemplate{navigation symbols}{} % To remove the navigation symbols from the bottom of all slides uncomment this line
 60 | }
 61 | 
 62 | \usepackage{graphicx} % Allows including images
 63 | \usepackage{booktabs} % Allows the use of \toprule, \midrule and \bottomrule in tables
 64 | \usepackage[utf8]{inputenc}
 65 | \usepackage{float}
 66 | \usepackage{subcaption}
 67 | 
 68 | %----------------------------------------------------------------------------------------
 69 | %	TITLE PAGE
 70 | %----------------------------------------------------------------------------------------
 71 | 
 72 | \title[A Fast Approximation of WL RDF kernel]{A Fast Approximation of the Weisfeiler-Lehman Graph Kernel for RDF Data}
 73 | \subtitle{Advanced Algorithms and Graph Minings}
 74 | \author{Lorenzo Palloni \and Emilio Cecchini}
 75 | \institute[]{
 76 |     Università Degli Studi di Firenze \\
 77 |     \medskip
 78 |     \textit{lorenzo.palloni@stud.unifi.it \and emilio.cecchini@stud.unifi.it}
 79 | }
 80 | \date{\today}
 81 | 
 82 | \begin{document}
 83 | 
 84 | \begin{frame}
 85 | \titlepage % Print the title page as the first slide
 86 | \end{frame}
 87 | 
 88 | %----------------------------------------------------------------------------------------
 89 | %	PRESENTATION SLIDES
 90 | %----------------------------------------------------------------------------------------
 91 | 
 92 | \begin{frame}
 93 | \frametitle{Introduction}
 94 | 
 95 | \begin{itemize}
 96 | \item
 97 | In order to apply machine learning algorithms on graphs it is necessary to develop algorithms to compute how similar two graphs are.
 98 | 
 99 | \item
100 | Starting from the well-known Weisfeiler-Lehman isomorphism test, kernel methods to measure the similarity between graphs have been developed.
101 | 
102 | \item
103 | This paper proposes a fast approximation of a Weisfeiler-Lehman kernel applied to RDF data.
104 | 
105 | 
106 | \end{itemize}
107 | 
108 | \end{frame}
109 | 
110 | \begin{frame}
111 | \frametitle{Graph Kernels}
112 | 
113 | Kernel-based machine learning algorithms abandon the explicit vector representations of data items by means of the \textit{kernel function}.
114 | 
115 | \begin{definition}[Graph Kernel]
116 | Let $\mathbb{G}$ be a non-empty set of graphs. Any function $k: \mathbb{G} \times \mathbb{G} \rightarrow \mathbb{R}$ that takes as input two graphs $G$ and $G^\prime$ and returns a real number that is equal to the scalar product between $G$ and $G^\prime$ in a (even unknown) feature space is a valid kernel function.
117 | \end{definition}
118 | 
119 | \end{frame}
120 | 
121 | %------------------------------------------------
122 | 
123 | \begin{frame}
124 | \frametitle{Graphs isomorphism}
125 | 
126 | \begin{itemize}
127 | \item
128 | Two graphs $G$ and $G^\prime$ are isomorphic if exists a bijective mapping between the labels of $G$ to the label of $G^\prime$
129 | 
130 | \item
131 | The graph isomorphism problem is NP.
132 | 
133 | \item
134 | The graph kernel introduced in this paper uses concepts from the \textit{Weisfeiler-Lehman test} of isomorphism.
135 | \end{itemize}
136 | 
137 | \end{frame}
138 | 
139 | %------------------------------------------------
140 | 
141 | \begin{frame}
142 | \frametitle{Weisfeiler-Lehman test}
143 | 
144 | \begin{itemize}
145 | \item
146 | Assume we are given two graphs $G$ and $G^\prime$ and we would like to test whether they are isomorphic.
147 | \item
148 | The Weisfeiler-Lehman test performs $h$ iterations.
149 | \item
150 | The key idea of the algorithm is to augment the node labels by the sorted set of node labels of neighbouring nodes, and compress these augmented labels into new, short labels.
151 | \item
152 | These steps are then repeated until the node label sets of $G$ and $G^\prime$ differ, or the number of iterations reaches $h$.
153 | \item
154 | The runtime complexity of the Weisfeiler-Lehman algorithm with h iterations is $O(hk)$, where $k$ is the number of labels in $G$ and $G^\prime$.
155 | \end{itemize}
156 | 
157 | \end{frame}
158 | 
159 | %------------------------------------------------
160 | 
161 | \begin{frame}
162 | \frametitle{Weisfeiler-Lehman test}
163 | 
164 | \begin{center}
165 | \begin{figure}
166 | \end{figure}
167 | \includegraphics[width=\textwidth]{img/wl_iteration_upper.png}
168 | \end{center}
169 | 
170 | \end{frame}
171 | 
172 | %------------------------------------------------
173 | 
174 | \begingroup
175 | \small
176 | \begin{frame}
177 | \frametitle{Weisfeiler-Lehman kernel}
178 | 
179 | \begin{definition}[Weisfeiler-Lehman kernel]
180 | Let $G_i = (V, E, \ell_i)$ and $G_i^\prime = (V^\prime , E^\prime , \ell_i)$ be the i-th iteration rewriting of the graphs $G$ and $G^\prime$ with the Weisfeiler-Lehman algorithm and h the number of iterations. Then the Weisfeiler-Lehman kernel is defined as:
181 | 
182 | \begin{align}
183 | k_{\mathrm{WL}}^{h}\left(G, G^\prime\right)=\sum_{i=0}^h k_\delta\left(G_i, G_i^\prime\right)
184 | \end{align}
185 | 
186 | where
187 | 
188 | \begin{align}
189 | k_\delta\left((V, E, \ell),\left(V^\prime, E^\prime, \ell^\prime\right)\right)=\sum_{v \in V} \sum_{v^{\prime} \in V^{\prime}} \delta\left(\ell(v), \ell^{\prime}\left(v^{\prime}\right)\right)
190 | \end{align}
191 | 
192 | Here $\delta$ is the Dirac kernel, which tests for equality, it is 1 if its arguments are equal, and 0 otherwise.
193 | \end{definition}
194 | 
195 | \end{frame}
196 | \endgroup
197 | 
198 | %------------------------------------------------
199 | 
200 | \begin{frame}
201 | \frametitle{Weisfeiler-Lehman Subtree Kernel}
202 | 
203 | \begin{center}
204 | \begin{figure}
205 | \end{figure}
206 | \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{img/wl_iteration_total.png}
207 | \end{center}
208 | 
209 | \end{frame}
210 | 
211 | %------------------------------------------------
212 | 
213 | \begin{frame}
214 | \frametitle{The Resource Description Framework}
215 | 
216 | \begin{itemize}
217 | \item
218 | The Resource Description Framework (RDF) is the foundation for knowledge representation on the semantic web.
219 | 
220 | \item
221 | It is based on the  idea of making statements about resources in a \textit{subject-predicate-object} form, called \textit{triples}
222 | 
223 | \item
224 | A set of triples represents a graph, that has subjects and objects as nodes and predicates as edges (note that is a \textit{directed multigraph with labeled edges}).
225 | \end{itemize}
226 | 
227 | \end{frame}
228 | 
229 | %------------------------------------------------
230 | 
231 | \begin{frame}
232 | \frametitle{The Resource Description Framework}
233 | 
234 | \begin{center}
235 | \begin{figure}
236 | \includegraphics[scale=0.55,keepaspectratio]{img/07-Graph}
237 | \end{figure}
238 | \end{center}
239 | 
240 | \end{frame}
241 | 
242 | %------------------------------------------------
243 | 
244 | \begin{frame}
245 | \frametitle{Fast Weisfeiler-Lehman for RDF}
246 | 
247 | \begin{itemize}
248 | \item
249 | The most immediate approach to apply graph kernels to RDF is to extract subgraphs for the instances that we are interested in and to compute the kernel on these subgraphs.
250 | 
251 | \item
252 | Potentially it can be more efficient to do the kernel computation directly on the larger underlying RDF graph, instead of extracting many subgraphs.
253 | 
254 | \item
255 | This paper proposes an approximation of the Weisfeiler-Lehman kernel designed for RDF data.
256 | \end{itemize}
257 | 
258 | \end{frame}
259 | 
260 | %------------------------------------------------
261 | 
262 | \begin{frame}
263 | \frametitle{Weisfeiler-Lehman RDF graph}
264 | 
265 | \begin{definition}[Weisfeiler-Lehman RDF graph]
266 | A Weisfeiler-Lehman RDF graph is a graph $G = (V, E, \ell)$, where $V$ is a set of vertices, $E$ a set of directed edges, and $\ell:(V \cup E) \times N \rightarrow \Sigma$ a labeling function from vertices $V$ or edges $E$ and a depth index $j \in \mathbb{N}$ to a set of labels $\Sigma$.
267 | \end{definition}
268 | 
269 | \begin{definition}[Neighborhood]
270 | The neighborhood $N(v) = \{(v^\prime, v) \in E\}$ of a vertex is the set of edges going to the vertex $v$ and the neighborhood $N((v, v^\prime)) = {v}$ of an edge is the vertex that the edge comes from.
271 | \end{definition}
272 | 
273 | \end{frame}
274 | 
275 | 
276 | 
277 | \begin{frame}
278 | \frametitle{Graph extraction from RDF}
279 | 
280 | \begin{itemize}
281 | \item
282 | Given a set of RDF triples and a set of instances I, there is an algorithm to build a Weisfeiler-Lehman RDF graph.
283 | 
284 | \item
285 | For each instance $i$ a subgraph up to depth $d$ is extracted from the RDF dataset and this subgraph is added to the total graph G that the algorithm is building. Thus, vertices and edges are only added if they have not been added to the graph already.
286 | 
287 | \item
288 | Next to the graph G we also construct mappings $\mathcal{V}_i$ and $\mathcal{E}_i$ for each instance $i$, which records which vertices and edges belong to the subgraph of instance $i$ at which depth.
289 | \end{itemize}
290 | 
291 | \end{frame}
292 | 
293 | 
294 | \begingroup
295 | \small
296 | \begin{frame}
297 | \frametitle{Graph extraction from RDF}
298 | 
299 | Extraction of the instances A1 and B1.
300 | 
301 | \begin{center}
302 | \begin{figure}
303 | \includegraphics[width=\textwidth,keepaspectratio]{img/07-subGraph_A1_B1}
304 | \end{figure}
305 | \end{center}
306 | 
307 | \end{frame}
308 | \endgroup
309 | 
310 | 
311 | \begin{frame}
312 | \frametitle{Relabeling of the Weisfeiler-Lehman RDF graph}
313 | 
314 | \begin{itemize}
315 | \item
316 | The relabeling process is quite similar to the standard one.
317 | 
318 | \item
319 | It is extended to directed and labeled edges.
320 | 
321 | \item
322 | The augmented labels are constructed taking into account the new definition of neighborhood and the depths.
323 | \end{itemize}
324 | 
325 | \end{frame}
326 | 
327 | %----------------------------------------------------------------------------
328 | 
329 | \begingroup
330 | \small
331 | \begin{frame}
332 | \frametitle{Relabeling of the Weisfeiler-Lehman RDF graph}
333 | 
334 | Label propagation.
335 | 
336 | \begin{center}
337 | \begin{figure}
338 | \includegraphics[width=\textwidth,keepaspectratio]{img/07-almost_relabeled}
339 | \end{figure}
340 | \end{center}
341 | 
342 | \end{frame}
343 | \endgroup
344 | 
345 | %----------------------------------------------------------------------------
346 | 
347 | \begingroup
348 | \small
349 | \begin{frame}
350 | \frametitle{Relabeling of the Weisfeiler-Lehman RDF graph}
351 | 
352 | Relabeling.
353 | 
354 | \begin{center}
355 | \begin{figure}
356 | \includegraphics[width=\textwidth,keepaspectratio]{img/07-relabeled}
357 | \end{figure}
358 | \end{center}
359 | 
360 | \end{frame}
361 | \endgroup
362 | 
363 | %----------------------------------------------------------------------------
364 | 
365 | \begingroup
366 | \small
367 | \begin{frame}
368 | \frametitle{Weisfeiler-Lehman kernel for RDF}
369 | 
370 | \begin{definition}[Weisfeiler-Lehman kernel for RDF]
371 | Let $G$ be a Weisfeiler-Lehman RDF graph and rewritten for h iterations, and $\ell_0$ to $\ell_h$ the resulting label functions. Then we compute a kernel between two instances $i$, $i^\prime \in I$, as:
372 | 
373 | \begin{align}
374 | k_{\mathrm{WLRDF}}^{h}\left(i, i^{\prime}\right)=\sum_{n=0}^{h} \frac{n+1}{h+1} k_{\delta, \mathrm{RDF}}^{n}\left(\left(\mathcal{V}_{i}, \mathcal{E}_{i}\right),\left(\mathcal{V}_{i^{\prime}}, \mathcal{E}_{i^{\prime}}\right)\right)
375 | \end{align}
376 | 
377 | where
378 | 
379 | \begin{align}
380 | k_{\delta, \mathrm{RDF}}^{n}\left(\left(\mathcal{V}_{i}, \mathcal{E}_{i}\right),\left(\mathcal{V}_{i^{\prime}}, \mathcal{E}_{i^{\prime}}\right)\right) &=\sum_{(v, d) \in \mathcal{V}_{i}\left(v^{\prime}, d^{\prime}\right) \in \mathcal{V}_{i^{\prime}}} \delta\left(\ell_{n}(v, d), \ell_{n}\left(v^{\prime}, d^{\prime}\right)\right) \\
381 | &+\sum_{(e, d) \in \mathcal{E}_{i}\left(c^{\prime}, d^{\prime}\right) \in \mathcal{V}_{i^{\prime}}} \delta\left(\ell_{n}(e, d), \ell_{n}\left(e^{\prime}, d^{\prime}\right)\right)
382 | \end{align}
383 | 
384 | \end{definition}
385 | 
386 | \end{frame}
387 | \endgroup
388 | 
389 | %------------------------------------------------
390 | 
391 | \begingroup
392 | \footnotesize
393 | \begin{frame}
394 | \frametitle{Weisfeiler-Lehman kernel for RDF}
395 | 
396 | \begin{exampleblock}{Example}
397 | \begin{align*}
398 | k_{\mathrm{WLRDF}}^h\left(A1, B1\right) &= \sum_{n=0}^h \frac{n+1}{h+1} k_{\delta, \mathrm{RDF}}^{n}\left(\left(\mathcal{V}_{A1}, \mathcal{E}_{A1}\right),\left(\mathcal{V}_{B1}, \mathcal{E}_{B1}\right)\right) = \dfrac{1}{2} \cdot 10 + \dfrac{2}{2} \cdot 3 = 8
399 | \end{align*}
400 | \end{exampleblock}
401 | 
402 | \begin{figure}
403 | \centering
404 | \begin{subfigure}{.5\textwidth}
405 |     \centering
406 |     \includegraphics[width=0.9\linewidth]{img/07-subGraph_A1_B1_vertical}
407 | \end{subfigure}%
408 | \begin{subfigure}{.5\textwidth}
409 |     \centering
410 |     \includegraphics[width=0.9\linewidth]{img/07-relabeled_vertical}
411 | \end{subfigure}
412 | \end{figure}
413 | 
414 | \end{frame}
415 | \endgroup
416 | %------------------------------------------------
417 | 
418 | \begin{frame}
419 | \frametitle{Complexity}
420 | 
421 | \begin{itemize}
422 | \item
423 | The complexity of the standard relabeling algorithm on a set of graphs is O$(Nh(n + m))$, where $N$ is the number of graphs, $h$ is the number of iterations and $n$ and $m$ are the number of vertices and edges per graph.
424 | \item
425 | This new relabeling method does not have $N$ graphs, but it introduces $d$ labels per vertex/edge, where $d$ is the extraction depth.
426 | \item
427 | If the WL RDF graph has $k$ nodes and edges the complexity of this new algorithm is $O(dhk)$
428 | \item
429 | The new proposed method is faster than the regular one if $hk < N(n+m)$
430 | \end{itemize}
431 | 
432 | \end{frame}
433 | 
434 | %------------------------------------------------
435 | 
436 | \begingroup
437 | \footnotesize
438 | \begin{frame}
439 | \frametitle{References}
440 | 
441 | \begin{thebibliography}{99} % Beamer does not support BibTeX so references must be inserted manually as below
442 | 
443 | \bibitem{lamport94}
444 |     Vries Gerben Klaas Dirk,
445 |     A Fast Approximation of the Weisfeiler-Lehman Graph Kernel for RDF Data,
446 |     2013
447 | 
448 | \bibitem{wl-kernels}
449 |     Shervashidze, N., Schweitzer, P., van Leeuwen, E.J., Mehlhorn, K., Borgwardt, K.M.
450 |     Weisfeiler-lehman graph kernels,
451 |     2011
452 | \end{thebibliography}
453 | 
454 | \end{frame}
455 | \endgroup
456 | 
457 | %------------------------------------------------
458 | 
459 | \end{document}
460 | 


--------------------------------------------------------------------------------
/report/RefereeReport.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/report/RefereeReport.pdf


--------------------------------------------------------------------------------
/report/RefereeReport.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[12pt]{scrartcl}
  2 | \usepackage[utf8]{inputenc}
  3 | \usepackage{hyperref}
  4 | \usepackage{booktabs}
  5 | \usepackage{caption}
  6 | \usepackage{graphicx}
  7 | 
  8 | \begin{document}
  9 | 
 10 | 
 11 | \title{A Fast Approximation of the Weisfeiler-Lehman Graph Kernel for RDF Data}
 12 | \subtitle{Referee report}
 13 | \author{
 14 | Emilio Cecchini \\ \href{mailto:emilio.cecchini@stud.unifi.it}{emilio.cecchini@stud.unifi.it}
 15 | \and
 16 | Lorenzo Palloni \\ \href{mailto:lorenzo.palloni@stud.unifi.it}{lorenzo.palloni@stud.unifi.it}
 17 | }
 18 | 
 19 | \maketitle
 20 | 
 21 | \section{Summary}
 22 | 
 23 | The goal of this paper is to introduce a faster version of the Weisfeiler-Lehman graph kernel algorithm when applied to Resource Description Framework (RDF) data.
 24 | 
 25 | The \textit{Resource Description Framework} (RDF) is the foundation for knowledge representation on the semantic web. A resource is described by a set of \textit{triples} which are of the form \textit{subject-predicate-object}. The entire collection of triples form a graph where the subjects and the objects are the nodes and the predicates are the edges.
 26 | 
 27 | The \textit{Weisfeiler-Lehman test} is an algorithm that is used to compute graph isomorphism. The test proceeds in iterations where the key idea is to augment the node labels by the sorted set of node labels of neighbouring nodes, and compress these augmented labels into new, short labels. These steps are then repeated until the node label sets of the two graphs differ, or the number of iterations reaches the prefixed maximum.
 28 | 
 29 | The \textit{Weisfeiler-Lehman kernel} is the state-of-the-art for graph kernels. It computes the number of subtrees shared between two graphs by using the Weisfeiler-Lehman test of graph isomorphism.
 30 | 
 31 | This paper introduces an approximation of the Weisfeiler-Lehman kernel, which first extracts a set of subgraphs from the entire RDF graph and then the kernels are computed. For each instance a subgraph up to a certain depth is extracted from the RDF dataset and this subgraph is added to a total graph that the extraction algorithm is building. Thus, vertices and edges are only added if they have not been added to the graph already. For each node and edge, together with their labels, their extraction depth is stored. The relabeling process is the same of the standard Weisfeiler-Lehman test with the extension of the labels on the edges. Finally the kernel is computed by counting the number of common labels at each depth.
 32 | 
 33 | \section{Evaluation}
 34 | 
 35 | In this paper there is no formal theorem or proof. The author states that this kernel yields an approximation of the standard Weisfeiler-Lehman graph kernel, but he never actually gives any formal proof of the accuracy of that approximation. The comparison of the results with the standard Weisfeiler-Lehman graph kernel can be found only in the experiments section. However there is a good explanation on how the complexity of the algorithm is improved with this approximation.
 36 | 
 37 | In the first experiment, where a classification on the SWRC ontology \cite{swrc} is performed, the author specify that the \textit{affiliation} relation and its inverse (the \textit{employs} relation) were removed from the dataset for training purposes. We instead discovered that there are two other relationships that must be removed because they link the instances to their corresponding class, these relationships are \textit{member} and \textit{head}. The fact that these two predicates were not removed from the training dataset led to a higher accuracy than the real one.
 38 | 
 39 | The plots of the runtime experiments are inverted: the lithogenesis dataset is about ten time larger than the affiliation dataset, but the reported runtimes of the lithogenesis classification are ten smaller than the runtimes of the affiliation prediction.
 40 | 
 41 | This paper proposes a new method on computing graph kernels, but it is limited only to RDF data. This method exploits the fact that usually, in the RDF graphs, the extracted sugraphs share many nodes and edges. This fact limits the number of scenarios in which the method is applicable with good results.
 42 | 
 43 | The algorithm described in this paper is an approximation of the Weisfeiler-Lehman graph kernel proposed in \cite{wl-kernels}. The approximation algorithm is very similar to the standard one described in \cite{wl-kernels}, the only difference is that the label expansion process is also extended to the edges and the concept of \textit{depth} is introduced in order to have bigger graphs without storing duplicated nodes or edges. This two simple modifications seem to lead to a faster version, but there is not much innovation in this new proposed method.
 44 | 
 45 | The proposed kernel method is a tool to perform machine learning algorithms on RDF data. There is a small section in the paper where the author introduces the \textit{Resource Description Framework}, but it is never clearly explained what it means to perform a classification on that kind of data.
 46 | 
 47 | During the extraction process of the subgraphs of the instances, the algorithm keeps track of the extraction depth to which each node and edge were extracted. In the paper there is confusion about the order of the index of the depth. In the pseudocode of the algorithm the depth is counted backward, that is the root has index equal to the maximum extraction depth while the leaves of the tree has depth equal to zero. While in the explanation of the algorithm the author describes the process with the indexes inverted.
 48 | 
 49 | The datasets used in the experiments are still available online. There is a GitHub repository that contains the source code of the experiments but it is quite old and we were not able to compile and to run it.
 50 | 
 51 | 
 52 | \section{Replication of the experiments}
 53 | 
 54 | Since we were not able to compile and to run the experiments done by the author, we have implemented a small part of the experiments in order to assess the validity of the results in the paper. We have implemented the standard Weisfeiler-Lehman graph kernel and its approximation proposed in the paper. The source code for the kernels and the experiments are available online \footnote{https://github.com/deeplego/wl-graph-kernels}. We have perfomed a classification on the AIFB dataset \cite{swrc} and the 'Named Rock Units' dataset of the  British Geological Survey. We have used the C-Support Vector Machine algorithm found in the scikit-learn Python package. We have tried to compute the accuracy of the classification with the same method described in the paper, that is a 10-fold cross-validation, however we slightly semplified the process of computing the accuracy of the model in relation to the C parameter of the SVM. We executed a 10-fold cross-validation for each value of C in $\{10^{-3}, 10^{-2}, 10^{-1}, 1, 10^1, 10^2, 10^3\}$ and then we took the best accuracy value. The results of the classifications are reported in the tables below.
 55 | 
 56 | \newpage
 57 | 
 58 | \begin{center}
 59 | \captionof{table}{Affiliation prediction with the standard Weisfeiler-Lehman kernel}
 60 | \begin{tabular}{ccccc}
 61 | \toprule
 62 |  depth & iterations &  accuracy &        f1 &      C \\
 63 | \midrule
 64 | 1 & 0 &  0.842337 &  0.772552 &  100.0 \\
 65 |   & 2 &  0.836782 &  0.755789 &  100.0 \\
 66 |   & 4 &  0.836782 &  0.755789 &  100.0 \\
 67 |   & 6 &  0.836782 &  0.755789 &  100.0 \\
 68 | \hline
 69 | 2 & 0 &  0.892516 &  0.836455 &  100.0 \\
 70 |   & 2 &  0.826180 &  0.742251 &  100.0 \\
 71 |   & 4 &  0.774069 &  0.618519 &  100.0 \\
 72 |   & 6 &  0.740048 &  0.568392 &  100.0 \\
 73 | \hline
 74 | 3 & 0 &  0.892591 &  0.850147 &  100.0 \\
 75 |   & 2 &  0.897779 &  0.848919 &  100.0 \\
 76 |   & 4 &  0.909258 &  0.860964 &  100.0 \\
 77 |   & 6 &  0.881044 &  0.796105 &  100.0 \\
 78 | \bottomrule
 79 | \end{tabular}
 80 | \end{center}
 81 | 
 82 | \begin{center}
 83 | \captionof{table}{Affiliation prediction with the Weisfeiler-Lehman for RDF}
 84 | \begin{tabular}{ccccc}
 85 | \toprule
 86 | depth & iterations & accuracy & f1 & C \\
 87 | \midrule
 88 | 1 & 0 &  0.881955 &  0.795756 &  100.0 \\
 89 |   & 2 &  0.881955 &  0.795756 &  100.0 \\
 90 |   & 4 &  0.881955 &  0.795756 &  100.0 \\
 91 |   & 6 &  0.881955 &  0.795756 &  100.0 \\
 92 | \hline
 93 | 2 & 0 &  0.892114 &  0.826007 &  100.0 \\
 94 |   & 2 &  0.880057 &  0.812488 &  100.0 \\
 95 |   & 4 &  0.874501 &  0.803701 &  100.0 \\
 96 |   & 6 &  0.874501 &  0.800821 &  100.0 \\
 97 | \hline
 98 | 3 & 0 &  0.879579 &  0.812187 &  100.0 \\
 99 |   & 2 &  0.913751 &  0.867388 &  100.0 \\
100 |   & 4 &  0.908196 &  0.863829 &  100.0 \\
101 |   & 6 &  0.908196 &  0.863829 &  100.0 \\
102 | \bottomrule
103 | \end{tabular}
104 | \end{center}
105 | 
106 | \newpage
107 | 
108 | \begin{center}
109 | \captionof{table}{Lithogenesis prediction with the standard Weisfeiler-Lehman kernel}
110 | \begin{tabular}{ccccc}
111 | \toprule
112 | depth & iterations &  accuracy &        f1 &      C \\
113 | \midrule
114 | 1 & 0 &  0.802679 &  0.774383 &   10.0 \\
115 |   & 2 &  0.796429 &  0.768842 &   10.0 \\
116 |   & 4 &  0.796429 &  0.768842 &   10.0 \\
117 |   & 6 &  0.796429 &  0.768842 &   10.0 \\
118 | \hline
119 | 2 & 0 &  0.891964 &  0.877311 &  100.0 \\
120 |   & 2 &  0.892857 &  0.874092 &    1.0 \\
121 |   & 4 &  0.873214 &  0.854485 &    1.0 \\
122 |   & 6 &  0.865179 &  0.841353 &    1.0 \\
123 | \hline
124 | 3 & 0 &  0.883929 &  0.871406 &  100.0 \\
125 |   & 2 &  0.913393 &  0.898291 &    1.0 \\
126 |   & 4 &  0.906250 &  0.890922 &    1.0 \\
127 |   & 6 &  0.906250 &  0.890922 &    1.0 \\
128 | \bottomrule
129 | \end{tabular}
130 | \end{center}
131 | 
132 | \begin{center}
133 | \captionof{table}{Lithogenesis prediction with the Weisfeiler-Lehman kernel for RDF}
134 | \begin{tabular}{ccccc}
135 | \toprule
136 | depth & iterations & accuracy & f1 & C \\
137 | \midrule
138 | 1 & 0 &  0.795536 &  0.763739 &   10.0 \\
139 |   & 2 &  0.795536 &  0.763739 &   10.0 \\
140 |   & 4 &  0.795536 &  0.763739 &   10.0 \\
141 |   & 6 &  0.795536 &  0.763739 &   10.0 \\
142 | \hline
143 | 2 & 0 &  0.906250 &  0.891229 &  100.0 \\
144 |   & 2 &  0.892857 &  0.874092 &    1.0 \\
145 |   & 4 &  0.892857 &  0.874092 &    1.0 \\
146 |   & 6 &  0.885714 &  0.866606 &    1.0 \\
147 | \hline
148 | 3 & 0 &  0.891071 &  0.875862 &  100.0 \\
149 |   & 2 &  0.891964 &  0.873422 &    1.0 \\
150 |   & 4 &  0.906250 &  0.890104 &    1.0 \\
151 |   & 6 &  0.907143 &  0.888829 &    1.0 \\
152 | \bottomrule
153 | \end{tabular}
154 | \end{center}
155 | 
156 | The accuracy values are almost the same as those reported in the paper. There is not much difference between the standard Weisfeiler-Lehman algorithm and its approximation in terms of accuracy.
157 | 
158 | We have also replicated the experiment of the affiliation prediction where all the labels were removed from the graph. The results are given in the two table belows. This is the best scenario for the Weisfeiler-Lehman kernel for RDF data. As reported in the paper, these results are very similar to the performance on labeled graphs.
159 | 
160 | \newpage
161 | 
162 | \begin{center}
163 | \captionof{table}{Affiliation prediction with the standard Weisfeiler-Lehman kernel with all labels removed}
164 | \begin{tabular}{ccccc}
165 | \toprule
166 | depth & iterations & accuracy & f1 & C \\
167 | \midrule
168 | 1 & 0 &  0.322153 &  0.194477 &  100.0 \\
169 |   & 2 &  0.530111 &  0.348672 &   10.0 \\
170 |   & 4 &  0.530111 &  0.347049 &   10.0 \\
171 |   & 6 &  0.530111 &  0.347049 &   10.0 \\
172 | \hline
173 | 2 & 0 &  0.564547 &  0.355253 &   10.0 \\
174 |   & 2 &  0.503724 &  0.343148 &    1.0 \\
175 |   & 4 &  0.481437 &  0.392366 &  100.0 \\
176 |   & 6 &  0.502999 &  0.383461 &    1.0 \\
177 | \hline
178 | 3 & 0 &  0.491697 &  0.343404 &  100.0 \\
179 |   & 2 &  0.641333 &  0.527556 &  100.0 \\
180 |   & 4 &  0.724551 &  0.602677 &   10.0 \\
181 |   & 6 &  0.713474 &  0.557335 &  100.0 \\
182 | \bottomrule
183 | \end{tabular}
184 | \end{center}
185 | 
186 | \begin{center}
187 | \captionof{table}{Affiliation prediction with the Weisfeiler-Lehman kernel for RDF with all labels removed}
188 | \begin{tabular}{ccccc}
189 | \toprule
190 | depth & iterations & accuracy & f1 & C \\
191 | \midrule
192 | 1 & 0 &  0.524847 &  0.305547 &  100.0 \\
193 |   & 2 &  0.647536 &  0.566394 &  100.0 \\
194 |   & 4 &  0.670780 &  0.591060 &  100.0 \\
195 |   & 6 &  0.677030 &  0.594329 &  100.0 \\
196 | \hline
197 | 2 & 0 &  0.565936 &  0.340732 &   10.0 \\
198 |   & 2 &  0.681422 &  0.622212 &  100.0 \\
199 |   & 4 &  0.740048 &  0.663960 &  100.0 \\
200 |   & 6 &  0.762597 &  0.688069 &  100.0 \\
201 | \hline
202 | 3 & 0 &  0.407394 &  0.293320 &    1.0 \\
203 |   & 2 &  0.898914 &  0.861681 &  100.0 \\
204 |   & 4 &  0.892079 &  0.854304 &  100.0 \\
205 |   & 6 &  0.893066 &  0.851358 &   10.0 \\
206 | \bottomrule
207 | \end{tabular}
208 | \end{center}
209 | 
210 | \newpage
211 | 
212 | Since this new method is supposed to be faster, we also replicated the experiments on the runtimes. The Weisfeiler-Lehman for RDF method is slightly faster then the regular one, but we were not able to see such a good improvements in the runtime as reported in the paper.
213 | 
214 | \begin{center}
215 | \begin{figure}[h]
216 | \caption{Runtimes of the two kernels on the affiliation dataset}
217 | \includegraphics[width=\textwidth]{img/affiliation_timing.png}
218 | \end{figure}
219 | \end{center}
220 | 
221 | \begin{center}
222 | \begin{figure}[h]
223 | \caption{Runtimes of the two kernels on the lithogenesis dataset}
224 | \includegraphics[width=\textwidth]{img/lithogenesis_timing.png}
225 | \end{figure}
226 | \end{center}
227 | 
228 | \newpage
229 | 
230 | \begin{thebibliography}{9}
231 | 
232 | \bibitem{lamport94}
233 |     Vries Gerben Klaas Dirk,
234 |     A Fast Approximation of the Weisfeiler-Lehman Graph Kernel for RDF Data,
235 |     2013
236 | 
237 | \bibitem{swrc}
238 |     Sure, Y., Bloehdorn, S., Haase, P., Hartmann, J., Oberle, D.,
239 |     The swrc ontology - semantic web for research communities.
240 |     Volume 3803 of LNCS., Covilha,
241 |     Portugal, Springer (Dezember 2005) 218 – 231
242 | 
243 | \bibitem{wl-kernels}
244 |     Shervashidze, N., Schweitzer, P., van Leeuwen, E.J., Mehlhorn, K., Borgwardt, K.M.
245 |     Weisfeiler-lehman graph kernels,
246 |     2011
247 | 
248 | \end{thebibliography}
249 | 
250 | \end{document}
251 | 


--------------------------------------------------------------------------------
/report/img/affiliation_timing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/report/img/affiliation_timing.png


--------------------------------------------------------------------------------
/report/img/lithogenesis_timing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/report/img/lithogenesis_timing.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | sklearn
2 | numpy
3 | nptyping
4 | rdflib
5 | path.py
6 | pytest
7 | pytest-cov
8 | 


--------------------------------------------------------------------------------
/results/affiliation_timing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/results/affiliation_timing.png


--------------------------------------------------------------------------------
/results/csv_to_latex.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from path import Path
 4 | import pandas as pd
 5 | 
 6 | 
 7 | def convert(fn):
 8 |     df = pd.read_csv(fn, index_col=['depth', 'iterations'])
 9 |     df.to_latex(f'{fn.stripext()}.tex')
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     parser = ArgumentParser()
14 |     parser.add_argument('--file', '-f', type=str)
15 |     flags = parser.parse_args()
16 |     convert(Path(flags.file))
17 | 


--------------------------------------------------------------------------------
/results/lithogenesis_timing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/results/lithogenesis_timing.png


--------------------------------------------------------------------------------
/results/wl_affiliation_results.csv:
--------------------------------------------------------------------------------
 1 | depth,iterations,accuracy,f1,C
 2 | 1,0,0.7564907980736154,0.6433234000261864,1.0
 3 | 1,2,0.7735251117991055,0.6534813180885163,1.0
 4 | 1,4,0.7735251117991055,0.6534813180885163,1.0
 5 | 1,6,0.7735251117991055,0.6534813180885163,1.0
 6 | 2,0,0.8082408840729274,0.7098104342338087,1.0
 7 | 2,2,0.7577700378396972,0.6579990189549013,1.0
 8 | 2,4,0.7183350533195736,0.5632661696748384,1.0
 9 | 2,6,0.6957451840385277,0.5284068915727786,0.1
10 | 3,0,0.8545042139662883,0.7886428236795884,10.0
11 | 3,2,0.8531217750257998,0.8065749405822935,0.001
12 | 3,4,0.832546439628483,0.7681329409754333,1.0
13 | 3,6,0.79578173374613,0.7226295853269538,0.001
14 | 


--------------------------------------------------------------------------------
/results/wl_affiliation_results_with_normalization.csv:
--------------------------------------------------------------------------------
 1 | depth,iterations,accuracy,f1,C
 2 | 1,0,0.881955194358445,0.795756261282577,100.0
 3 | 1,2,0.8687607499140008,0.7886729279492436,100.0
 4 | 1,4,0.8687607499140008,0.7886729279492436,100.0
 5 | 1,6,0.8687607499140008,0.7886729279492436,100.0
 6 | 2,0,0.8868507051943585,0.8197871572871573,100.0
 7 | 2,2,0.8581269349845201,0.7815634647000745,100.0
 8 | 2,4,0.7704463364293086,0.6042463799100796,100.0
 9 | 2,6,0.7527584279325765,0.5791451324733059,100.0
10 | 3,0,0.8848426212590299,0.8184077034077035,100.0
11 | 3,2,0.8908002235982112,0.8246224200635967,100.0
12 | 3,4,0.8973426212590299,0.840693848635025,100.0
13 | 3,6,0.8963557791537667,0.8213432686594452,100.0
14 | 


--------------------------------------------------------------------------------
/results/wl_affiliation_results_with_normalization.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{llrrr}
 2 | \toprule
 3 |   &   &  accuracy &        f1 &      C \\
 4 | depth & iterations &           &           &        \\
 5 | \midrule
 6 | 1 & 0 &  0.842337 &  0.772552 &  100.0 \\
 7 |   & 2 &  0.836782 &  0.755789 &  100.0 \\
 8 |   & 4 &  0.836782 &  0.755789 &  100.0 \\
 9 |   & 6 &  0.836782 &  0.755789 &  100.0 \\
10 | 2 & 0 &  0.892516 &  0.836455 &  100.0 \\
11 |   & 2 &  0.826180 &  0.742251 &  100.0 \\
12 |   & 4 &  0.774069 &  0.618519 &  100.0 \\
13 |   & 6 &  0.740048 &  0.568392 &  100.0 \\
14 | 3 & 0 &  0.892591 &  0.850147 &  100.0 \\
15 |   & 2 &  0.897779 &  0.848919 &  100.0 \\
16 |   & 4 &  0.909258 &  0.860964 &  100.0 \\
17 |   & 6 &  0.881044 &  0.796105 &  100.0 \\
18 | \bottomrule
19 | \end{tabular}
20 | 


--------------------------------------------------------------------------------
/results/wl_lithogenesis_results_with_normalization.csv:
--------------------------------------------------------------------------------
 1 | depth,iterations,accuracy,f1,C
 2 | 1,0,0.8026785714285714,0.7743826999976886,10.0
 3 | 1,2,0.7964285714285714,0.7688415744565631,10.0
 4 | 1,4,0.7964285714285714,0.7688415744565631,10.0
 5 | 1,6,0.7964285714285714,0.7688415744565631,10.0
 6 | 2,0,0.8919642857142858,0.8773113283868433,100.0
 7 | 2,2,0.8928571428571429,0.8740923537433837,1.0
 8 | 2,4,0.8732142857142857,0.8544845106061286,1.0
 9 | 2,6,0.8651785714285714,0.841352739556401,1.0
10 | 3,0,0.8839285714285715,0.8714062451136755,100.0
11 | 3,2,0.9133928571428571,0.8982908605505859,1.0
12 | 3,4,0.90625,0.8909224394979542,1.0
13 | 3,6,0.90625,0.8909224394979542,1.0
14 | 


--------------------------------------------------------------------------------
/results/wl_lithogenesis_results_with_normalization.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{llrrr}
 2 | \toprule
 3 |   &   &  accuracy &        f1 &      C \\
 4 | depth & iterations &           &           &        \\
 5 | \midrule
 6 | 1 & 0 &  0.802679 &  0.774383 &   10.0 \\
 7 |   & 2 &  0.796429 &  0.768842 &   10.0 \\
 8 |   & 4 &  0.796429 &  0.768842 &   10.0 \\
 9 |   & 6 &  0.796429 &  0.768842 &   10.0 \\
10 | 2 & 0 &  0.891964 &  0.877311 &  100.0 \\
11 |   & 2 &  0.892857 &  0.874092 &    1.0 \\
12 |   & 4 &  0.873214 &  0.854485 &    1.0 \\
13 |   & 6 &  0.865179 &  0.841353 &    1.0 \\
14 | 3 & 0 &  0.883929 &  0.871406 &  100.0 \\
15 |   & 2 &  0.913393 &  0.898291 &    1.0 \\
16 |   & 4 &  0.906250 &  0.890922 &    1.0 \\
17 |   & 6 &  0.906250 &  0.890922 &    1.0 \\
18 | \bottomrule
19 | \end{tabular}
20 | 


--------------------------------------------------------------------------------
/results/wl_no_labels.csv:
--------------------------------------------------------------------------------
 1 | depth,iterations,accuracy,f1,C
 2 | 1,0,0.3221534227726178,0.19447672210830105,100.0
 3 | 1,2,0.530110509115927,0.34867194939563356,10.0
 4 | 1,4,0.530110509115927,0.3470491423780897,10.0
 5 | 1,6,0.530110509115927,0.3470491423780897,10.0
 6 | 2,0,0.564546783625731,0.35525302548328863,10.0
 7 | 2,2,0.5037237702098383,0.3431478203169379,1.0
 8 | 2,4,0.481437048503612,0.3923659673659673,100.0
 9 | 2,6,0.502999226006192,0.38346129360835246,1.0
10 | 3,0,0.49169676642586857,0.34340422713681223,100.0
11 | 3,2,0.6413334193326453,0.527556055056055,100.0
12 | 3,4,0.7245506535947712,0.6026768084856319,10.0
13 | 3,6,0.7134739422084623,0.5573347090645852,100.0
14 | 


--------------------------------------------------------------------------------
/results/wl_no_labels.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{llrrr}
 2 | \toprule
 3 |   &   &  accuracy &        f1 &      C \\
 4 | depth & iterations &           &           &        \\
 5 | \midrule
 6 | 1 & 0 &  0.322153 &  0.194477 &  100.0 \\
 7 |   & 2 &  0.530111 &  0.348672 &   10.0 \\
 8 |   & 4 &  0.530111 &  0.347049 &   10.0 \\
 9 |   & 6 &  0.530111 &  0.347049 &   10.0 \\
10 | 2 & 0 &  0.564547 &  0.355253 &   10.0 \\
11 |   & 2 &  0.503724 &  0.343148 &    1.0 \\
12 |   & 4 &  0.481437 &  0.392366 &  100.0 \\
13 |   & 6 &  0.502999 &  0.383461 &    1.0 \\
14 | 3 & 0 &  0.491697 &  0.343404 &  100.0 \\
15 |   & 2 &  0.641333 &  0.527556 &  100.0 \\
16 |   & 4 &  0.724551 &  0.602677 &   10.0 \\
17 |   & 6 &  0.713474 &  0.557335 &  100.0 \\
18 | \bottomrule
19 | \end{tabular}
20 | 


--------------------------------------------------------------------------------
/results/wlrdf_affiliation_results.csv:
--------------------------------------------------------------------------------
 1 | depth,iterations,accuracy,f1,C
 2 | 1,0,0.7848877708978328,0.657970231522863,10.0
 3 | 1,2,0.7848877708978328,0.657970231522863,10.0
 4 | 1,4,0.7848877708978328,0.657970231522863,1.0
 5 | 1,6,0.7848877708978328,0.657970231522863,1.0
 6 | 2,0,0.8262555899552803,0.7310782851049878,1.0
 7 | 2,2,0.7898929308565531,0.6821006728050072,1.0
 8 | 2,4,0.7960676814585483,0.684156578500619,1.0
 9 | 2,6,0.8023176814585483,0.6868838512278915,0.1
10 | 3,0,0.8416430168558652,0.7626665813546618,0.01
11 | 3,2,0.897703818369453,0.8631294273322137,0.001
12 | 3,4,0.8924406604747162,0.8590949650624573,0.001
13 | 3,6,0.8806759545923633,0.839124102591595,0.001
14 | 


--------------------------------------------------------------------------------
/results/wlrdf_affiliation_results_with_normalization.csv:
--------------------------------------------------------------------------------
 1 | depth,iterations,accuracy,f1,C
 2 | 1,0,0.881955194358445,0.795756261282577,100.0
 3 | 1,2,0.881955194358445,0.795756261282577,100.0
 4 | 1,4,0.881955194358445,0.795756261282577,100.0
 5 | 1,6,0.881955194358445,0.795756261282577,100.0
 6 | 2,0,0.8921138630890952,0.8260073953823953,100.0
 7 | 2,2,0.8800567595459237,0.8124879573041339,100.0
 8 | 2,4,0.8745012039903681,0.8037011925982516,100.0
 9 | 2,6,0.8745012039903681,0.8008212906374672,100.0
10 | 3,0,0.879579463364293,0.8121874653124653,100.0
11 | 3,2,0.9137512899896801,0.8673881673881674,100.0
12 | 3,4,0.9081957344341245,0.8638286754095578,100.0
13 | 3,6,0.9081957344341245,0.8638286754095578,100.0
14 | 


--------------------------------------------------------------------------------
/results/wlrdf_affiliation_results_with_normalization.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{llrrr}
 2 | \toprule
 3 |   &   &  accuracy &        f1 &      C \\
 4 | depth & iterations &           &           &        \\
 5 | \midrule
 6 | 1 & 0 &  0.881955 &  0.795756 &  100.0 \\
 7 |   & 2 &  0.881955 &  0.795756 &  100.0 \\
 8 |   & 4 &  0.881955 &  0.795756 &  100.0 \\
 9 |   & 6 &  0.881955 &  0.795756 &  100.0 \\
10 | 2 & 0 &  0.892114 &  0.826007 &  100.0 \\
11 |   & 2 &  0.880057 &  0.812488 &  100.0 \\
12 |   & 4 &  0.874501 &  0.803701 &  100.0 \\
13 |   & 6 &  0.874501 &  0.800821 &  100.0 \\
14 | 3 & 0 &  0.879579 &  0.812187 &  100.0 \\
15 |   & 2 &  0.913751 &  0.867388 &  100.0 \\
16 |   & 4 &  0.908196 &  0.863829 &  100.0 \\
17 |   & 6 &  0.908196 &  0.863829 &  100.0 \\
18 | \bottomrule
19 | \end{tabular}
20 | 


--------------------------------------------------------------------------------
/results/wlrdf_lithogenesis_results.csv:
--------------------------------------------------------------------------------
 1 | depth,iterations,accuracy,f1,C
 2 | 1,0,0.7946428571428571,0.7668365041391357,0.001
 3 | 1,2,0.7946428571428571,0.7668365041391357,0.001
 4 | 1,4,0.7946428571428571,0.7668365041391357,0.001
 5 | 1,6,0.7946428571428571,0.7668365041391357,0.001
 6 | 2,0,0.8991071428571429,0.8823099993065668,0.001
 7 | 2,2,0.8571428571428571,0.8347124068405533,0.001
 8 | 2,4,0.8571428571428571,0.8347124068405533,0.001
 9 | 2,6,0.8571428571428571,0.8347124068405533,0.001
10 | 3,0,0.8866071428571429,0.8704565801079465,0.001
11 | 3,2,0.8928571428571427,0.8762965244773024,0.001
12 | 3,4,0.8857142857142856,0.8681093899743786,0.001
13 | 3,6,0.8857142857142856,0.8681093899743786,0.001
14 | 


--------------------------------------------------------------------------------
/results/wlrdf_lithogenesis_results_with_normalization.csv:
--------------------------------------------------------------------------------
 1 | depth,iterations,accuracy,f1,C
 2 | 1,0,0.7955357142857142,0.7637394251438875,10.0
 3 | 1,2,0.7955357142857142,0.7637394251438875,10.0
 4 | 1,4,0.7955357142857142,0.7637394251438875,10.0
 5 | 1,6,0.7955357142857142,0.7637394251438875,10.0
 6 | 2,0,0.90625,0.891229457041814,100.0
 7 | 2,2,0.8928571428571429,0.8740923537433837,1.0
 8 | 2,4,0.8928571428571429,0.8740923537433837,1.0
 9 | 2,6,0.8857142857142858,0.8666057227273407,1.0
10 | 3,0,0.8910714285714286,0.8758615567439098,100.0
11 | 3,2,0.8919642857142858,0.8734224394979544,1.0
12 | 3,4,0.90625,0.8901037260476619,1.0
13 | 3,6,0.9071428571428571,0.8888291958486466,1.0
14 | 


--------------------------------------------------------------------------------
/results/wlrdf_lithogenesis_results_with_normalization.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{llrrr}
 2 | \toprule
 3 |   &   &  accuracy &        f1 &      C \\
 4 | depth & iterations &           &           &        \\
 5 | \midrule
 6 | 1 & 0 &  0.795536 &  0.763739 &   10.0 \\
 7 |   & 2 &  0.795536 &  0.763739 &   10.0 \\
 8 |   & 4 &  0.795536 &  0.763739 &   10.0 \\
 9 |   & 6 &  0.795536 &  0.763739 &   10.0 \\
10 | 2 & 0 &  0.906250 &  0.891229 &  100.0 \\
11 |   & 2 &  0.892857 &  0.874092 &    1.0 \\
12 |   & 4 &  0.892857 &  0.874092 &    1.0 \\
13 |   & 6 &  0.885714 &  0.866606 &    1.0 \\
14 | 3 & 0 &  0.891071 &  0.875862 &  100.0 \\
15 |   & 2 &  0.891964 &  0.873422 &    1.0 \\
16 |   & 4 &  0.906250 &  0.890104 &    1.0 \\
17 |   & 6 &  0.907143 &  0.888829 &    1.0 \\
18 | \bottomrule
19 | \end{tabular}
20 | 


--------------------------------------------------------------------------------
/results/wlrdf_no_labels.csv:
--------------------------------------------------------------------------------
 1 | depth,iterations,accuracy,f1,C
 2 | 1,0,0.5248473512211902,0.30554684499217744,100.0
 3 | 1,2,0.6475361197110423,0.5663935370185369,100.0
 4 | 1,4,0.6707795837633299,0.5910602591852591,100.0
 5 | 1,6,0.6770295837633299,0.59432948995449,100.0
 6 | 2,0,0.5659356725146198,0.34073240549440237,10.0
 7 | 2,2,0.6814219986240111,0.622212370962371,100.0
 8 | 2,4,0.7400477296181631,0.6639600024158847,100.0
 9 | 2,6,0.7625967492260062,0.6880690877749702,100.0
10 | 3,0,0.4073937908496732,0.2933196589272441,1.0
11 | 3,2,0.8989142586859306,0.8616813859944665,100.0
12 | 3,4,0.8920794633642931,0.8543043879924686,100.0
13 | 3,6,0.8930663054695562,0.85135795942104,10.0
14 | 


--------------------------------------------------------------------------------
/results/wlrdf_no_labels.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{llrrr}
 2 | \toprule
 3 |   &   &  accuracy &        f1 &      C \\
 4 | depth & iterations &           &           &        \\
 5 | \midrule
 6 | 1 & 0 &  0.524847 &  0.305547 &  100.0 \\
 7 |   & 2 &  0.647536 &  0.566394 &  100.0 \\
 8 |   & 4 &  0.670780 &  0.591060 &  100.0 \\
 9 |   & 6 &  0.677030 &  0.594329 &  100.0 \\
10 | 2 & 0 &  0.565936 &  0.340732 &   10.0 \\
11 |   & 2 &  0.681422 &  0.622212 &  100.0 \\
12 |   & 4 &  0.740048 &  0.663960 &  100.0 \\
13 |   & 6 &  0.762597 &  0.688069 &  100.0 \\
14 | 3 & 0 &  0.407394 &  0.293320 &    1.0 \\
15 |   & 2 &  0.898914 &  0.861681 &  100.0 \\
16 |   & 4 &  0.892079 &  0.854304 &  100.0 \\
17 |   & 6 &  0.893066 &  0.851358 &   10.0 \\
18 | \bottomrule
19 | \end{tabular}
20 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | from wlkernel import __version__
 4 | 
 5 | 
 6 | setup(
 7 |     name='wlkernel',
 8 |     version=__version__,
 9 |     description='Weisfeiler-Lehman kernel for RDF graphs',
10 |     packages=find_packages(exclude=['tests']),
11 | )
12 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/tests/__init__.py


--------------------------------------------------------------------------------
/tests/resources/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lorenzopalloni/wl-graph-kernels/ea046737f91017380090cb8d061efc1a44fefb5e/tests/resources/__init__.py


--------------------------------------------------------------------------------
/tests/resources/example.ttl:
--------------------------------------------------------------------------------
 1 | 'A1' 'P2' 'C' .
 2 | 'A1' 'P3' 'D' .
 3 | 'A2' 'P2' 'D' .
 4 | 'A2' 'P3' 'E' .
 5 | 'B2' 'P3' 'E' .
 6 | 'B2' 'P2' 'F' .
 7 | 'B1' 'P3' 'F' .
 8 | 'B1' 'P2' 'G' .
 9 | 'C' 'P4' 'H' .
10 | 'D' 'P4' 'H' .
11 | 'F' 'P5' 'I' .
12 | 'G' 'P5' 'I' .
13 | 'H' 'P6' 'A2' .
14 | 'I' 'P6' 'B2' .
15 | 


--------------------------------------------------------------------------------
/tests/wlkernel_test.py:
--------------------------------------------------------------------------------
  1 | from os.path import abspath
  2 | from pkg_resources import resource_filename
  3 | 
  4 | import pytest
  5 | import rdflib
  6 | 
  7 | import wlkernel
  8 | 
  9 | 
 10 | example_data = abspath(resource_filename('tests.resources', 'example.ttl'))
 11 | 
 12 | 
 13 | def test_node_hash():
 14 |     n1 = wlkernel.Node()
 15 |     n1_bis = n1
 16 |     n2 = wlkernel.Node()
 17 |     n2_bis = n2
 18 |     assert hash(n1) != hash(n2)
 19 |     assert hash(n1) == hash(n1_bis)
 20 |     assert hash(n2) == hash(n2_bis)
 21 | 
 22 | 
 23 | def test_edge_hash():
 24 |     e1 = wlkernel.Edge()
 25 |     e1_bis = e1
 26 |     e2 = wlkernel.Edge()
 27 |     e2_bis = e2
 28 |     assert hash(e1) != hash(e2)
 29 |     assert hash(e1) == hash(e1_bis)
 30 |     assert hash(e2) == hash(e2_bis)
 31 | 
 32 | 
 33 | def test_wlgraph_depth_0():
 34 |     '''
 35 |     ######
 36 |     # A1 #
 37 |     ######
 38 |     '''
 39 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
 40 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
 41 |     wl_graph = wlkernel.WLGraph(triples, 'A1', 0)
 42 |     assert len(wl_graph.nodes) == 1
 43 |     assert len(wl_graph.edges) == 0
 44 |     assert len(wl_graph.labels) == 1
 45 |     assert len(wl_graph.labels[0]) == len(wl_graph.nodes) + len(wl_graph.edges)
 46 | 
 47 | 
 48 | def test_wlgraph_depth_1():
 49 |     r'''
 50 |           ######
 51 |           # A1 #
 52 |           ######
 53 |           /    \
 54 |       P2 /      \ P3
 55 |         /        \
 56 |     #####        #####
 57 |     # C #        # D #
 58 |     #####        #####
 59 |     '''
 60 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
 61 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
 62 |     wl_graph = wlkernel.WLGraph(triples, 'A1', 1)
 63 |     assert len(wl_graph.nodes) == 3
 64 |     assert len(wl_graph.edges) == 2
 65 |     assert len(wl_graph.labels) == 1
 66 |     assert len(wl_graph.labels[0]) == len(wl_graph.nodes) + len(wl_graph.edges)
 67 | 
 68 | 
 69 | def test_wlgraph_depth_2():
 70 |     r'''
 71 |           ######
 72 |           # A1 #
 73 |           ######
 74 |           /    \
 75 |       P2 /      \ P3
 76 |         /        \
 77 |     #####        #####
 78 |     # C #        # D #
 79 |     #####        #####
 80 |         \       /
 81 |       P4 \     / P4
 82 |           \   /
 83 |           #####
 84 |           # H #
 85 |           #####
 86 |     '''
 87 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
 88 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
 89 |     wl_graph = wlkernel.WLGraph(triples, 'A1', 2)
 90 |     assert len(wl_graph.nodes) == 4
 91 |     assert len(wl_graph.edges) == 4
 92 |     assert len(wl_graph.labels) == 1
 93 |     assert len(wl_graph.labels[0]) == len(wl_graph.nodes) + len(wl_graph.edges)
 94 | 
 95 | 
 96 | def test_wlgraph_depth_4():
 97 |     r'''
 98 |           ######
 99 |           # A1 #
100 |           ######
101 |           /    \
102 |       P2 /      \ P3
103 |         /        \
104 |     #####        #####
105 |     # C #        # D #<----
106 |     #####        #####    |
107 |         \       /         |
108 |       P4 \     / P4       |
109 |           \   /           |
110 |           #####           |
111 |           # H #           | P2
112 |           #####           |
113 |             |             |
114 |             | P6          |
115 |             |             |
116 |           ######          |
117 |           # A2 #-----------
118 |           ######
119 |             |
120 |             | P3
121 |             |
122 |           #####
123 |           # E #
124 |           #####
125 |     '''
126 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
127 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
128 |     wl_graph = wlkernel.WLGraph(triples, 'A1', 4)
129 |     assert len(wl_graph.nodes) == 6
130 |     assert len(wl_graph.edges) == 7
131 |     assert len(wl_graph.labels) == 1
132 |     assert len(wl_graph.labels[0]) == len(wl_graph.nodes) + len(wl_graph.edges)
133 | 
134 | 
135 | def test_wl_relabel():
136 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
137 |     triples = [(str(s), str(p), str(o)) for s, p, o in rdf_graph]
138 |     wl_graph_a1 = wlkernel.WLGraph(triples, 'A1', 4)
139 |     wl_graph_b1 = wlkernel.WLGraph(triples, 'B1', 4)
140 | 
141 |     uniq_labels_a1_0 = set(wl_graph_a1.labels[0].values())
142 |     uniq_labels_b1_0 = set(wl_graph_b1.labels[0].values())
143 | 
144 |     wlkernel.wl_relabel([wl_graph_a1, wl_graph_b1])
145 |     uniq_labels_a1_1 = set(wl_graph_a1.labels[1].values())
146 |     uniq_labels_b1_1 = set(wl_graph_b1.labels[1].values())
147 |     assert len(wl_graph_a1.labels) == len(wl_graph_b1.labels) == 2
148 |     assert len(uniq_labels_a1_0) < len(uniq_labels_a1_1)
149 |     assert len(uniq_labels_b1_0) < len(uniq_labels_b1_1)
150 | 
151 |     wlkernel.wl_relabel([wl_graph_a1, wl_graph_b1])
152 |     uniq_labels_a1_2 = set(wl_graph_a1.labels[2].values())
153 |     uniq_labels_b1_2 = set(wl_graph_b1.labels[2].values())
154 |     assert len(wl_graph_a1.labels) == len(wl_graph_b1.labels) == 3
155 | 
156 |     wlkernel.wl_relabel([wl_graph_a1, wl_graph_b1])
157 |     uniq_labels_a1_3 = set(wl_graph_a1.labels[3].values())
158 |     uniq_labels_b1_3 = set(wl_graph_b1.labels[3].values())
159 |     assert len(wl_graph_a1.labels) == len(wl_graph_b1.labels) == 4
160 | 
161 | 
162 | def test_wl_kernel():
163 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
164 |     triples = [(str(s), str(p), str(o)) for s, p, o in rdf_graph]
165 |     wl_graph_a1 = wlkernel.WLGraph(triples, 'A1', 4)
166 |     wl_graph_b1 = wlkernel.WLGraph(triples, 'B1', 4)
167 | 
168 |     assert wlkernel.wl_kernel(wl_graph_a1, wl_graph_b1) == 11*1
169 |     assert wlkernel.wl_kernel(wl_graph_a1, wl_graph_b1, 1) == 11*0.5 + 4*1
170 | 
171 | 
172 | def test_wl_kernel_matrix():
173 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
174 |     triples = [(str(s), str(p), str(o)) for s, p, o in rdf_graph]
175 |     wl_graph_a1 = wlkernel.WLGraph(triples, 'A1', 4)
176 |     wl_graph_b1 = wlkernel.WLGraph(triples, 'B1', 4)
177 |     wl_graph_a2 = wlkernel.WLGraph(triples, 'A2', 4)
178 | 
179 |     kernel_matrix = wlkernel.wl_kernel_matrix(
180 |         [wl_graph_a1, wl_graph_b1, wl_graph_a2], iterations=1
181 |     )
182 | 
183 |     assert len(kernel_matrix) == len(kernel_matrix[0]) == 3
184 |     assert kernel_matrix[0][1] == wlkernel.wl_kernel(
185 |         wl_graph_a1, wl_graph_b1, iterations=1
186 |     )
187 |     assert kernel_matrix[0][2] == wlkernel.wl_kernel(
188 |         wl_graph_a1, wl_graph_a2, iterations=1
189 |     )
190 |     assert kernel_matrix[1][0] == wlkernel.wl_kernel(
191 |         wl_graph_a1, wl_graph_b1, iterations=1
192 |     )
193 | 
194 | 
195 | def test_wlrdfgraph_depth_0():
196 |     '''
197 |     ######
198 |     # A1 #
199 |     ######
200 |     '''
201 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
202 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
203 |     wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1'], 0)
204 |     assert len(wlrdf_graph.nodes) == 1
205 |     assert len(wlrdf_graph.edges) == 0
206 |     assert len(wlrdf_graph.labels) == 1
207 |     assert len(wlrdf_graph.labels[0]) == 1
208 |     assert len(wlrdf_graph.instance_nodes) == 1
209 |     assert len(wlrdf_graph.instance_nodes['A1']) == 0
210 |     assert len(wlrdf_graph.instance_edges) == 1
211 |     assert len(wlrdf_graph.instance_edges['A1']) == 0
212 | 
213 | 
214 | def test_wlrdfgraph_depth_1():
215 |     r'''
216 |           ######
217 |           # A1 #
218 |           ######
219 |           /    \
220 |       P2 /      \ P3
221 |         /        \
222 |     #####        #####
223 |     # C #        # D #
224 |     #####        #####
225 |     '''
226 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
227 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
228 |     wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1'], 1)
229 |     assert len(wlrdf_graph.nodes) == 3
230 |     assert len(wlrdf_graph.edges) == 2
231 |     assert len(wlrdf_graph.labels) == 1
232 |     assert len(wlrdf_graph.labels[0]) == 5
233 |     assert len(wlrdf_graph.instance_nodes) == 1
234 |     assert len(wlrdf_graph.instance_nodes['A1']) == 2
235 |     assert len(wlrdf_graph.instance_edges) == 1
236 |     assert len(wlrdf_graph.instance_edges['A1']) == 2
237 | 
238 | 
239 | def test_wlrdfgraph_depth_2():
240 |     r'''
241 |           ######
242 |           # A1 #
243 |           ######
244 |           /    \
245 |       P2 /      \ P3
246 |         /        \
247 |     #####        #####
248 |     # C #        # D #
249 |     #####        #####
250 |         \       /
251 |       P4 \     / P4
252 |           \   /
253 |           #####
254 |           # H #
255 |           #####
256 |     '''
257 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
258 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
259 |     wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1'], 2)
260 |     assert len(wlrdf_graph.nodes) == 4
261 |     assert len(wlrdf_graph.edges) == 4
262 |     assert len(wlrdf_graph.labels) == 1
263 |     assert len(wlrdf_graph.labels[0]) == 8
264 |     assert len(wlrdf_graph.instance_nodes) == 1
265 |     assert len(wlrdf_graph.instance_nodes['A1']) == 3
266 |     assert len(wlrdf_graph.instance_edges) == 1
267 |     assert len(wlrdf_graph.instance_edges['A1']) == 4
268 | 
269 | 
270 | def test_wlrdfgraph_depth_4():
271 |     r'''
272 |           ######
273 |           # A1 #
274 |           ######
275 |           /    \
276 |       P2 /      \ P3
277 |         /        \
278 |     #####        #####
279 |     # C #        # D #
280 |     #####        #####
281 |         \       /
282 |       P4 \     / P4
283 |           \   /
284 |           #####
285 |           # H #
286 |           #####
287 |             |
288 |             | P6
289 |             |
290 |           ######
291 |           # A2 #
292 |           ######
293 |           /    \
294 |       P3 /      \ P2
295 |         /        \
296 |      #####     #####
297 |      # E #     # D #
298 |      #####     #####
299 |     '''
300 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
301 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
302 |     wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1'], 4)
303 |     assert len(wlrdf_graph.nodes) == 6
304 |     assert len(wlrdf_graph.edges) == 7
305 |     assert len(wlrdf_graph.labels) == 1
306 |     assert len(wlrdf_graph.labels[0]) == 14
307 |     assert len(wlrdf_graph.instance_nodes) == 1
308 |     assert len(wlrdf_graph.instance_nodes['A1']) == 5
309 |     assert len(wlrdf_graph.instance_edges) == 1
310 |     assert len(wlrdf_graph.instance_edges['A1']) == 7
311 | 
312 | 
313 | def test_wlrdf_relabel():
314 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
315 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
316 |     wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1', 'B1'], 4)
317 | 
318 |     uniq_labels_0 = set(wlrdf_graph.labels[0].values())
319 | 
320 |     wlrdf_graph.relabel()
321 |     uniq_labels_1 = set(wlrdf_graph.labels[1].values())
322 | 
323 |     wlrdf_graph.relabel()
324 |     uniq_labels_2 = set(wlrdf_graph.labels[1].values())
325 | 
326 |     assert len(wlrdf_graph.labels) == 3
327 |     assert len(uniq_labels_0) < len(uniq_labels_1)
328 |     assert len(uniq_labels_1) == len(uniq_labels_2)
329 | 
330 | 
331 | def test_wlrdf_kernel():
332 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
333 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
334 |     wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1', 'B1'], 4)
335 |     assert wlkernel.wlrdf_kernel(wlrdf_graph, 'A1', 'B1') == 10*1
336 |     assert wlkernel.wlrdf_kernel(wlrdf_graph, 'A1', 'B1', 1) == 10*0.5 + 3
337 | 
338 | 
339 | def test_wlrdf_kernel_matrix():
340 |     rdf_graph = rdflib.Graph().parse(example_data, format='turtle')
341 |     triples = ((str(s), str(p), str(o)) for s, p, o in rdf_graph)
342 |     wlrdf_graph = wlkernel.WLRDFGraph(triples, ['A1', 'B1'], 4)
343 | 
344 |     kernel_matrix = wlkernel.wlrdf_kernel_matrix(wlrdf_graph, ['A1', 'B1'])
345 | 
346 |     assert len(kernel_matrix) == len(kernel_matrix[0]) == 2
347 |     assert kernel_matrix[0][1] == wlkernel.wlrdf_kernel(
348 |         wlrdf_graph, 'A1', 'B1'
349 |     )
350 |     assert kernel_matrix[1][0] == wlkernel.wlrdf_kernel(
351 |         wlrdf_graph, 'A1', 'B1'
352 |     )
353 | 


--------------------------------------------------------------------------------
/wlkernel/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="Encoding" addBOMForNewFiles="with NO BOM" />
4 | </project>


--------------------------------------------------------------------------------
/wlkernel/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="JavaScriptSettings">
4 |     <option name="languageLevel" value="ES6" />
5 |   </component>
6 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
7 | </project>


--------------------------------------------------------------------------------
/wlkernel/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/wlkernel.iml" filepath="$PROJECT_DIR$/.idea/wlkernel.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/wlkernel/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/wlkernel/.idea/wlkernel.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/wlkernel/.idea/workspace.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project version="4">
  3 |   <component name="ChangeListManager">
  4 |     <list default="true" id="e4c4a894-a7fb-4bfa-b066-5334f7a13e20" name="Default Changelist" comment="" />
  5 |     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
  6 |     <option name="SHOW_DIALOG" value="false" />
  7 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
  8 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
  9 |     <option name="LAST_RESOLUTION" value="IGNORE" />
 10 |   </component>
 11 |   <component name="FileEditorManager">
 12 |     <splitter split-orientation="horizontal" split-proportion="0.38802084">
 13 |       <split-first>
 14 |         <leaf>
 15 |           <file pinned="false" current-in-tab="true">
 16 |             <entry file="file://$PROJECT_DIR$/_wlkernel.py">
 17 |               <provider selected="true" editor-type-id="text-editor">
 18 |                 <state relative-caret-position="-3670">
 19 |                   <folding>
 20 |                     <element signature="e#0#44#0" expanded="true" />
 21 |                   </folding>
 22 |                 </state>
 23 |               </provider>
 24 |             </entry>
 25 |           </file>
 26 |         </leaf>
 27 |       </split-first>
 28 |       <split-second>
 29 |         <leaf>
 30 |           <file pinned="false" current-in-tab="true">
 31 |             <entry file="file://$PROJECT_DIR$/temp_wlkernel.py">
 32 |               <provider selected="true" editor-type-id="text-editor">
 33 |                 <state relative-caret-position="835">
 34 |                   <caret line="180" column="4" lean-forward="true" selection-start-line="180" selection-start-column="4" selection-end-line="180" selection-end-column="4" />
 35 |                   <folding>
 36 |                     <element signature="e#0#87#0" expanded="true" />
 37 |                   </folding>
 38 |                 </state>
 39 |               </provider>
 40 |             </entry>
 41 |           </file>
 42 |         </leaf>
 43 |       </split-second>
 44 |     </splitter>
 45 |   </component>
 46 |   <component name="Git.Settings">
 47 |     <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." />
 48 |   </component>
 49 |   <component name="IdeDocumentHistory">
 50 |     <option name="CHANGED_PATHS">
 51 |       <list>
 52 |         <option value="$PROJECT_DIR$/temp_wlkernel.py" />
 53 |       </list>
 54 |     </option>
 55 |   </component>
 56 |   <component name="ProjectFrameBounds" extendedState="6">
 57 |     <option name="y" value="6" />
 58 |     <option name="width" value="960" />
 59 |     <option name="height" value="1074" />
 60 |   </component>
 61 |   <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
 62 |   <component name="ProjectView">
 63 |     <navigator proportions="" version="1">
 64 |       <foldersAlwaysOnTop value="true" />
 65 |     </navigator>
 66 |     <panes>
 67 |       <pane id="ProjectPane">
 68 |         <subPane>
 69 |           <expand>
 70 |             <path>
 71 |               <item name="wlkernel" type="b2602c69:ProjectViewProjectNode" />
 72 |               <item name="wlkernel" type="462c0819:PsiDirectoryNode" />
 73 |             </path>
 74 |           </expand>
 75 |           <select />
 76 |         </subPane>
 77 |       </pane>
 78 |       <pane id="Scope" />
 79 |     </panes>
 80 |   </component>
 81 |   <component name="PropertiesComponent">
 82 |     <property name="WebServerToolWindowFactoryState" value="false" />
 83 |     <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
 84 |     <property name="nodejs_npm_path_reset_for_default_project" value="true" />
 85 |   </component>
 86 |   <component name="RunDashboard">
 87 |     <option name="ruleStates">
 88 |       <list>
 89 |         <RuleState>
 90 |           <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
 91 |         </RuleState>
 92 |         <RuleState>
 93 |           <option name="name" value="StatusDashboardGroupingRule" />
 94 |         </RuleState>
 95 |       </list>
 96 |     </option>
 97 |   </component>
 98 |   <component name="SvnConfiguration">
 99 |     <configuration />
100 |   </component>
101 |   <component name="TaskManager">
102 |     <task active="true" id="Default" summary="Default task">
103 |       <changelist id="e4c4a894-a7fb-4bfa-b066-5334f7a13e20" name="Default Changelist" comment="" />
104 |       <created>1559639650080</created>
105 |       <option name="number" value="Default" />
106 |       <option name="presentableId" value="Default" />
107 |       <updated>1559639650080</updated>
108 |       <workItem from="1559639651491" duration="244000" />
109 |     </task>
110 |     <servers />
111 |   </component>
112 |   <component name="TimeTrackingManager">
113 |     <option name="totallyTimeSpent" value="244000" />
114 |   </component>
115 |   <component name="ToolWindowManager">
116 |     <frame x="0" y="6" width="1920" height="1074" extended-state="6" />
117 |     <editor active="true" />
118 |     <layout>
119 |       <window_info id="Favorites" side_tool="true" />
120 |       <window_info content_ui="combo" id="Project" order="0" weight="0.1265625" />
121 |       <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
122 |       <window_info anchor="bottom" id="Docker" show_stripe_button="false" />
123 |       <window_info anchor="bottom" id="Database Changes" />
124 |       <window_info anchor="bottom" id="Version Control" />
125 |       <window_info anchor="bottom" id="Python Console" />
126 |       <window_info anchor="bottom" id="Terminal" />
127 |       <window_info anchor="bottom" id="Event Log" side_tool="true" />
128 |       <window_info anchor="bottom" id="Message" order="0" />
129 |       <window_info anchor="bottom" id="Find" order="1" />
130 |       <window_info anchor="bottom" id="Run" order="2" />
131 |       <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
132 |       <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
133 |       <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
134 |       <window_info anchor="bottom" id="TODO" order="6" />
135 |       <window_info anchor="right" id="SciView" />
136 |       <window_info anchor="right" id="Database" />
137 |       <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
138 |       <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
139 |       <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
140 |     </layout>
141 |   </component>
142 |   <component name="TypeScriptGeneratedFilesManager">
143 |     <option name="version" value="1" />
144 |   </component>
145 |   <component name="editorHistoryManager">
146 |     <entry file="file://$PROJECT_DIR$/_wlkernel.py">
147 |       <provider selected="true" editor-type-id="text-editor">
148 |         <state relative-caret-position="-3670">
149 |           <folding>
150 |             <element signature="e#0#44#0" expanded="true" />
151 |           </folding>
152 |         </state>
153 |       </provider>
154 |     </entry>
155 |     <entry file="file://$PROJECT_DIR$/temp_wlkernel.py">
156 |       <provider selected="true" editor-type-id="text-editor">
157 |         <state relative-caret-position="835">
158 |           <caret line="180" column="4" lean-forward="true" selection-start-line="180" selection-start-column="4" selection-end-line="180" selection-end-column="4" />
159 |           <folding>
160 |             <element signature="e#0#87#0" expanded="true" />
161 |           </folding>
162 |         </state>
163 |       </provider>
164 |     </entry>
165 |   </component>
166 | </project>


--------------------------------------------------------------------------------
/wlkernel/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = '0.1'
 2 | 
 3 | 
 4 | from ._wlkernel import (
 5 |     Node,
 6 |     Edge,
 7 |     WLGraph,
 8 |     wl_relabel,
 9 |     wl_kernel,
10 |     wl_kernel_matrix,
11 |     WLRDFGraph,
12 |     wlrdf_kernel,
13 |     wlrdf_kernel_matrix,
14 | )
15 | 


--------------------------------------------------------------------------------
/wlkernel/_wlkernel.py:
--------------------------------------------------------------------------------
  1 | from typing import (
  2 |     List,
  3 |     Dict,
  4 |     Tuple,
  5 |     Iterable,
  6 |     Union,
  7 |     Set,
  8 | )
  9 | from collections import Counter
 10 | from itertools import chain
 11 | 
 12 | from nptyping import Array
 13 | import numpy as np
 14 | 
 15 | 
 16 | class Node:
 17 |     'A node of a Weisfeiler-Lehman RDF graph'
 18 | 
 19 |     def __init__(self):
 20 |         self.neighbors = set()
 21 | 
 22 |     def add_neighbor(self, edge):
 23 |         self.neighbors.add(edge)
 24 | 
 25 |     def __hash__(self):
 26 |         return hash(id(self))
 27 | 
 28 | 
 29 | class Edge:
 30 |     'An edge of a Weisfeiler-Lehman RDF graph'
 31 | 
 32 |     def __init__(self):
 33 |         self.neighbor = None
 34 | 
 35 |     def __hash__(self):
 36 |         return hash(id(self))
 37 | 
 38 | 
 39 | class WLGraph:
 40 |     'Standard Weisfeiler-Lehman graph with directed labeled edges'
 41 | 
 42 |     def __init__(self, triples: Iterable[Tuple[str, str, str]],
 43 |                  instance: str, max_depth: int):
 44 |         'Build a Weisfeiler-Lehman graph from a list of RDF triples'
 45 |         triples = list(triples)
 46 |         self.max_depth = max_depth
 47 |         self.nodes: Set[Node] = set()
 48 |         self.edges: Set[Edge] = set()
 49 |         self.labels: List[Dict[Union[Node, Edge], str]] = [dict()]
 50 | 
 51 |         v_map: Dict[str, Node] = dict()
 52 |         e_map: Dict[Tuple[str, str, str], Edge] = dict()
 53 | 
 54 |         root = Node()
 55 |         self.nodes.add(root)
 56 |         self.labels[0][root] = 'root'
 57 |         v_map[instance] = root
 58 | 
 59 |         search_front = {instance}
 60 |         for j in reversed(range(0, max_depth)):
 61 |             new_search_front = set()
 62 |             for r in search_front:
 63 |                 r_triples = [(s, p, o) for s, p, o in triples if s == r]
 64 |                 for sub, pred, obj in r_triples:
 65 |                     new_search_front.add(obj)
 66 | 
 67 |                     if obj not in v_map:
 68 |                         v = Node()
 69 |                         self.nodes.add(v)
 70 |                         v_map[obj] = v
 71 |                         self.labels[0][v_map[obj]] = obj
 72 | 
 73 |                     t = (sub, pred, obj)
 74 |                     if t not in e_map:
 75 |                         e = Edge()
 76 |                         self.edges.add(e)
 77 |                         e_map[t] = e
 78 |                         self.labels[0][e_map[t]] = pred
 79 | 
 80 |                     v_map[obj].add_neighbor(e_map[t])
 81 |                     e_map[t].neighbor = v_map[sub]
 82 | 
 83 |             search_front = new_search_front
 84 | 
 85 | 
 86 | def wl_relabel(wl_graphs: Iterable[WLGraph], iterations: int = 1):
 87 |     'Relabeling algorithm'
 88 | 
 89 |     wl_graphs = list(wl_graphs)
 90 | 
 91 |     assert len(set(len(wl_graph.labels) for wl_graph in wl_graphs))
 92 |     m = len(wl_graphs[0].labels)
 93 |     for i in range(m, m + iterations):
 94 | 
 95 |         # 1. Multiset-label determination
 96 |         multisets_list: List[Dict[Union[Node, Edge], List[str]]] = [
 97 |             dict() for _ in range(len(wl_graphs))
 98 |         ]
 99 |         for wl_graph, multisets in zip(wl_graphs, multisets_list):
100 |             for v in wl_graph.nodes:
101 |                 if v in wl_graph.labels[0]:
102 |                     multisets[v] = [
103 |                         wl_graph.labels[i - 1][u] for u in v.neighbors
104 |                         if u in wl_graph.labels[i - 1]
105 |                     ]
106 |             for e in wl_graph.edges:
107 |                 if e in wl_graph.labels[0]:
108 |                     multisets[e] = [ wl_graph.labels[i - 1][e.neighbor] ]
109 | 
110 |         # 2. Sorting each multiset
111 |         expanded_labels_list: List[Dict[Union[Node, Edge], str]] = [
112 |             dict() for _ in range(len(wl_graphs))
113 |         ]
114 |         for wl_graph, multisets, expanded_labels in zip(wl_graphs,
115 |                                                         multisets_list,
116 |                                                         expanded_labels_list):
117 |             for k, multiset in multisets.items():
118 |                 expanded_labels[k] = (
119 |                     wl_graph.labels[i - 1][k] + ''.join(sorted(multiset))
120 |                 )
121 | 
122 |         # 3. Label compression
123 |         total_label_set = (
124 |             set(chain.from_iterable(e.values() for e in expanded_labels_list))
125 |         )
126 |         f = {
127 |             old_label: str(compressed_label)
128 |             for compressed_label, old_label in enumerate(total_label_set)
129 |         }
130 | 
131 | 
132 |         # 4. Relabeling
133 |         for wl_graph, expanded_labels in zip(wl_graphs, expanded_labels_list):
134 |             wl_graph.labels.append({
135 |                 k: f[expanded_labels[k]] for k in expanded_labels
136 |             })
137 | 
138 | 
139 | def wl_kernel(wl_graph_1: WLGraph, wl_graph_2: WLGraph,
140 |               iterations: int = 0) -> float:
141 |     'Compute the Weisfeiler-Lehman kernel for two WLGraphs'
142 | 
143 |     assert len(wl_graph_1.labels) == len(wl_graph_2.labels)
144 |     m = len(wl_graph_1.labels)
145 |     if iterations > m - 1:
146 |         wl_relabel([wl_graph_1, wl_graph_2], iterations - m + 1)
147 | 
148 |     kernel = 0.0
149 |     for it in range(iterations + 1):
150 |         node_labels_1 = [
151 |             wl_graph_1.labels[it][node] for node in wl_graph_1.nodes
152 |         ]
153 |         node_labels_2 = [
154 |             wl_graph_2.labels[it][node] for node in wl_graph_2.nodes
155 |         ]
156 |         edge_labels_1 = [
157 |             wl_graph_1.labels[it][edge] for edge in wl_graph_1.edges
158 |         ]
159 |         edge_labels_2 = [
160 |             wl_graph_2.labels[it][edge] for edge in wl_graph_2.edges
161 |         ]
162 |         cc_nodes = count_commons(node_labels_1, node_labels_2)
163 |         cc_edges = count_commons(edge_labels_1, edge_labels_2)
164 |         w = (it + 1) / (iterations + 1)
165 |         kernel += w * (cc_nodes + cc_edges)
166 |     return kernel
167 | 
168 | 
169 | def wl_kernel_matrix(wl_graphs: Iterable[WLGraph],
170 |                      iterations: int = 0) -> List[List[float]]:
171 |     'Compute the matrix of the kernel values between each couple of WLGraphs'
172 |     wl_graphs = list(wl_graphs)
173 | 
174 |     m = len(wl_graphs[0].labels)
175 |     if iterations > m - 1:
176 |         wl_relabel(wl_graphs, iterations - m + 1)
177 | 
178 |     n = len(wl_graphs)
179 |     kernel_matrix = [[0.0]*n for _ in range(n)]
180 |     for i in range(n):
181 |         for j in range(i, n):
182 |             kernel_matrix[i][j] = wl_kernel(
183 |                 wl_graphs[i], wl_graphs[j], iterations
184 |             )
185 |     for i in range(n):
186 |         for j in range(0, i):
187 |             kernel_matrix[i][j] = kernel_matrix[j][i]
188 |     return kernel_matrix
189 | 
190 | 
191 | class WLRDFGraph:
192 |     'Weisfeiler-Lehman RDF graph'
193 | 
194 |     def __init__(self, triples: Iterable[Tuple[str, str, str]],
195 |                  instances: Iterable[str], max_depth: int):
196 |         'Build a Weisfeiler-Lehman RDF graph from a list of RDF triples'
197 |         triples = list(triples)
198 |         self.max_depth = max_depth
199 |         self.nodes: Set[Node] = set()
200 |         self.edges: Set[Edge] = set()
201 |         self.labels: List[Dict[Tuple[Union[Node, Edge], int], str]] = [dict()]
202 |         self.instance_nodes: Dict[str, Dict[Node, int]] = {
203 |             instance: dict() for instance in instances
204 |         }
205 |         self.instance_edges: Dict[str, Dict[Edge, int]] = {
206 |             instance: dict() for instance in instances
207 |         }
208 | 
209 |         v_map: Dict[str, Node] = dict()
210 |         e_map: Dict[Tuple[str, str, str], Edge] = dict()
211 | 
212 |         # 1. Initialization
213 |         for instance in instances:
214 |             root = Node()
215 |             self.nodes.add(root)
216 |             self.labels[0][(root, max_depth)] = 'root'
217 |             v_map[instance] = root
218 | 
219 |         # 2. Subgraph Extraction
220 |         for instance in instances:
221 |             search_front = {instance}
222 |             for j in reversed(range(0, max_depth)):
223 |                 new_search_front = set()
224 |                 for r in search_front:
225 |                     r_triples = ((s, p, o) for s, p, o in triples if s == r)
226 |                     for sub, pred, obj in r_triples:
227 |                         new_search_front.add(obj)
228 | 
229 |                         if obj not in v_map:
230 |                             v = Node()
231 |                             self.nodes.add(v)
232 |                             v_map[obj] = v
233 |                         self.labels[0][(v_map[obj], j)] = obj
234 |                         if v_map[obj] not in self.instance_nodes[instance]:
235 |                             self.instance_nodes[instance][v_map[obj]] = j
236 | 
237 |                         t = (sub, pred, obj)
238 |                         if t not in e_map:
239 |                             e = Edge()
240 |                             self.edges.add(e)
241 |                             e_map[t] = e
242 |                         self.labels[0][e_map[t], j] = pred
243 |                         if e_map[t] not in self.instance_edges[instance]:
244 |                             self.instance_edges[instance][e_map[t]] = j
245 | 
246 |                         v_map[obj].add_neighbor(e_map[t])
247 |                         e_map[t].neighbor = v_map[sub]
248 | 
249 |                 search_front = new_search_front
250 | 
251 | 
252 | 
253 |     def relabel(self, iterations: int = 1):
254 |         'Relabeling algorithm'
255 | 
256 |         for i in range(len(self.labels), len(self.labels) + iterations):
257 | 
258 |             multisets: Dict[Tuple[Union[Node, Edge], int], List[str]] = dict()
259 | 
260 |             # 1. Multiset-label determination
261 |             for v in self.nodes:
262 |                 for j in range(self.max_depth + 1):
263 |                     if (v, j) in self.labels[0]:
264 |                         multisets[(v, j)] = [
265 |                             self.labels[i - 1][(u, j)] for u in v.neighbors
266 |                             if (u, j) in self.labels[i - 1]
267 |                         ]
268 |             for e in self.edges:
269 |                 for j in range(self.max_depth):
270 |                     if (e, j) in self.labels[0]:
271 |                         multisets[(e, j)] = [
272 |                             self.labels[i - 1][(e.neighbor, j + 1)]
273 |                         ]
274 | 
275 |             # 2. Sorting each multiset
276 |             expanded_labels = {
277 |                 (k, j): self.labels[i - 1][(k, j)] + ''.join(sorted(multiset))
278 |                 for (k, j), multiset in multisets.items()
279 |             }
280 | 
281 |             # 3. Label compression
282 |             f = {
283 |                 s: str(i)
284 |                 for i, s in enumerate(set(expanded_labels.values()))
285 |             }
286 | 
287 |             # 4. Relabeling
288 |             self.labels.append({
289 |                 (k, j): f[expanded_labels[(k, j)]]
290 |                 for (k, j) in expanded_labels
291 |             })
292 | 
293 | 
294 | def count_commons(a: Iterable, b: Iterable) -> int:
295 |     'Return the number of common elements in the two iterables'
296 |     uniques = set(a).intersection(set(b))
297 |     counter_a = Counter(a)
298 |     counter_b = Counter(b)
299 |     commons = 0
300 |     for u in uniques:
301 |         commons += counter_a[u] * counter_b[u]
302 |     return commons
303 | 
304 | 
305 | def wlrdf_kernel(graph: WLRDFGraph, instance_1: str, instance_2: str,
306 |                  iterations: int = 0) -> float:
307 |     'Compute the Weisfeiler-Lehman kernel for two instances'
308 | 
309 |     if iterations > len(graph.labels) - 1:
310 |         graph.relabel(iterations - len(graph.labels) + 1)
311 | 
312 |     kernel = 0.0
313 |     for it in range(iterations + 1):
314 |         node_labels_1 = [
315 |             graph.labels[it][(v, d)]
316 |             for v, d in graph.instance_nodes[instance_1].items()
317 |         ]
318 |         node_labels_2 = [
319 |             graph.labels[it][(v, d)]
320 |             for v, d in graph.instance_nodes[instance_2].items()
321 |         ]
322 |         edge_labels_1 = [
323 |             graph.labels[it][(e, d)]
324 |             for e, d in graph.instance_edges[instance_1].items()
325 |         ]
326 |         edge_labels_2 = [
327 |             graph.labels[it][(e, d)]
328 |             for e, d in graph.instance_edges[instance_2].items()
329 |         ]
330 |         cc_nodes = count_commons(node_labels_1, node_labels_2)
331 |         cc_edges = count_commons(edge_labels_1, edge_labels_2)
332 |         w = (it + 1) / (iterations + 1)
333 |         kernel += w * (cc_nodes + cc_edges)
334 |     return kernel
335 | 
336 | 
337 | def wlrdf_kernel_matrix(graph: WLRDFGraph, instances: List[str],
338 |                         iterations: int = 0) -> Array[float]:
339 |     'Compute the matrix of the kernel values between each couple of instances'
340 |     n = len(instances)
341 |     kernel_matrix = np.zeros((n, n))
342 |     for i in range(n):
343 |         for j in range(i, n):
344 |             kernel_matrix[i][j] = wlrdf_kernel(
345 |                 graph, instances[i], instances[j], iterations
346 |             )
347 |     for i in range(n):
348 |         for j in range(0, i):
349 |             kernel_matrix[i][j] = kernel_matrix[j][i]
350 |     return kernel_matrix
351 | 
352 | 
353 | def kernel_normalization(kernel_matrix: Array[float]) -> Array[float]:
354 |     n = kernel_matrix.shape[0]
355 |     res = np.zeros((n, n))
356 |     assert kernel_matrix.shape[1] == n
357 |     for i in range(n):
358 |         for j in range(n):
359 |             res[i][j] = kernel_matrix[i][j] / np.sqrt(
360 |                 kernel_matrix[i][i] * kernel_matrix[j][j]
361 |             )
362 |     return res
363 | 


--------------------------------------------------------------------------------