├── .gitignore
├── LICENSE
├── README.md
├── app.py
├── assets
    ├── ontochat_logo.png
    ├── ontochat_workflow.png
    ├── software_header.md
    ├── systematic_literature_review
    │   └── Codebook.xlsx
    ├── title_logo.png
    └── user_study
    │   ├── Background_Information.md
    │   ├── Demographic_Information_of_Participants.md
    │   ├── Demographic_Information_of_Participants_in_User_Evaluation.md
    │   ├── Instructions_on_How_to_Write_Ontology_User_Stories.md
    │   ├── OntoChat_Backend_Prompts.md
    │   ├── Ontology_User_Story_Template.png
    │   ├── PID7_User_Goal_Description_Generation_Illustrative_Example.md
    │   ├── Pre_identified_Prompts_for_Ontology_User_Story_Elicitation.md
    │   ├── Researcher_Observation_Checklist.md
    │   ├── Study_Script_for_User_Story_Writing.md
    │   └── User_Needs_for_the_LLM_assisted_Task_Assisting_in_User_Story_Creation.md
├── data
    ├── Linka#1_MusicKnowledge.md
    ├── music_meta_cqs.txt
    └── music_meta_cqs_neg.txt
├── ontochat
    ├── __init__.py
    ├── analysis.py
    ├── chatbot.py
    ├── config.py
    ├── cq_generator_ontochat_app.py
    ├── functions.py
    ├── ontolib.py
    ├── queries.py
    ├── tutorial.ipynb
    ├── utils.py
    └── verbaliser.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 | 
162 | # flagging folder
163 | flagged/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 King's Knowledge Graph Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 |     <img src="assets/title_logo.png" width="96%" height="auto" />
 3 | </div>
 4 | 
 5 | # OntoChat: a Framework for Conversational Ontology Engineering using Language Models
 6 | 
 7 | OntoChat is a LLM-based conversational agent designed to facilitate collaborative ontology engineering. It currently supports ontology requirement elicitation, analysis, and testing. Its CQs generation is evaluated with [Bench4KE](https://github.com/fossr-project/ontogenia-cini), a benchmarking framework that compares generated CQs against gold standards using lexical and semantic metrics. OntoChat is publicly available on **[Hugging Face Spaces](https://huggingface.co/spaces/1hangzhao/OntoChat)**. For the best experience, we recommend using **Google Chrome**.
 8 | 
 9 | ---
10 | 
11 | ## Deployment Instructions
12 | 
13 | 1. **Set Up Your Environment**  
14 |    - Ensure you have **Python 3.11** or higher installed.  
15 | 
16 | 2. **Install Dependencies**  
17 |    - Download the required dependencies:
18 |      ```bash
19 |      pip install -r requirements.txt
20 |      ```
21 | 
22 | 3. **Run the Application**  
23 |    - Start the OntoChat framework using Gradio:
24 |      ```bash
25 |      Gradio app.py
26 |      ```
27 | 
28 | ## Authors and attribution
29 | ```
30 | @inproceedings{zhang-et-al-2024-ontochat,
31 |   author = {Zhang, Bohui and Carriero, Valentina Anita and Schreiberhuber, Katrin and Tsaneva, Stefani and Gonz\'{a}lez, Luc\'{\i}a S\'{a}nchez and Kim, Jongmo and de Berardinis, Jacopo},
32 |   title = {OntoChat: A&nbsp;Framework for&nbsp;Conversational Ontology Engineering Using Language Models},
33 |   year = {2025},
34 |   isbn = {978-3-031-78951-9},
35 |   publisher = {Springer-Verlag},
36 |   address = {Berlin, Heidelberg},
37 |   url = {https://doi.org/10.1007/978-3-031-78952-6_10},
38 |   doi = {10.1007/978-3-031-78952-6_10},
39 |   booktitle = {The Semantic Web: ESWC 2024 Satellite Events: Hersonissos, Crete, Greece, May 26–30, 2024, Proceedings, Part I},
40 |   pages = {102–121},
41 |   numpages = {20},
42 |   keywords = {Ontology Engineering, Large Language Models, Competency Questions, Computational Creativity},
43 |   location = {Hersonissos, Greece}
44 | }
45 | 
46 | @inproceedings{zhao2024improving,
47 |   title={Improving Ontology Requirements Engineering with OntoChat and Participatory Prompting},
48 |   author={Zhao, Yihang and Zhang, Bohui and Hu, Xi and Ouyang, Shuyin and Kim, Jongmo and Jain, Nitisha and de Berardinis, Jacopo and Mero{\~n}o-Pe{\~n}uela, Albert and Simperl, Elena},
49 |   url = {https://ojs.aaai.org/index.php/AAAI-SS/article/view/31799},
50 |   doi = {10.1609/aaaiss.v4i1.31799},
51 |   booktitle={Proceedings of the AAAI Symposium Series},
52 |   volume={4},
53 |   number={1},
54 |   pages={253--257},
55 |   year={2024}
56 | }
57 | ```
58 | 
59 | ## Acknowledgement
60 | 
61 | This project has received multiple sources of funding, including co-funding from [MuseIT](https://www.muse-it.eu/) under grant agreement No. 101061441 as part of the European Union’s Horizon 2021-2027 research and innovation programme, as well as support from [SIEMENS AG](https://www.siemens.com/global/en.html) and the [Technical University of Munich](https://www.tum.de/), Institute for Advanced Study, Germany. We also extend our gratitude to all the evaluators for their valuable feedback on the tool’s effectiveness and for sharing their user experiences.
62 | 
63 | ## License
64 | 
65 | Copyright 2025 OntoChat maintainers
66 | 
67 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
68 | 
69 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
70 | 
71 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
72 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import gradio as gr
  2 | # from ontochat.functions import set_openai_api_key, user_story_generator, cq_generator, load_example_user_story, clustering_generator, ontology_testing, load_example
  3 | from ontochat.functions import set_openai_api_key, user_story_generator, load_example
  4 | 
  5 | user_story_template = """**Persona:**\n\n- Name: -\n- Age: -\n- Occupation: -\n- Skills: -\n- Interests: -\n\n**Goal:**\n\n- Description: -\n- Keywords: -\n\n**Scenario:**\n\n- Before: -\n- During: -\n- After: -\n\n**Example Data:**\n\n- Category: -\n- Data: -\n\n**Resources:**\n\n- Resource Name: -\n- Link: -"""
  6 | 
  7 | with gr.Blocks() as set_api_key:
  8 |     gr.Markdown(
  9 |         """
 10 |         # Welcome to OntoChat! 👋
 11 | 
 12 |         Hi there! I'm OntoChat, your conversational assistant for collaborative ontology engineering. (1) 📋 I assist with ontology requirements elicitation by asking targeted questions, collecting user inputs, providing example answers, and recommending prompt templates to guide you. (2) 📝 I offer customizable prompts designed for different interaction stages, ensuring structured guidance throughout the process. (3) ⚙️ You can edit placeholders within these templates to refine constraints and shape my responses to fit your specific needs. (4) 🔄 I continuously improve my responses based on your feedback until you're satisfied. Let's make ontology development smoother and more interactive! 🚀 For more details, visit 🌐 [OntoChat on GitHub](https://github.com/King-s-Knowledge-Graph-Lab/OntoChat).
 13 |         """
 14 |     )
 15 | 
 16 |     # ### Citations
 17 |         
 18 |     # [1] Zhang B, Carriero VA, Schreiberhuber K, Tsaneva S, González LS, Kim J, de Berardinis J. OntoChat: a Framework for Conversational Ontology Engineering using Language Models. arXiv preprint arXiv:2403.05921. 2024 Mar 9.
 19 |     
 20 |     # [2] Zhao Y, Zhang B, Hu X, Ouyang S, Kim J, Jain N, de Berardinis J, Meroño-Peñuela A, Simperl E. Improving Ontology Requirements Engineering with OntoChat and Participatory Prompting. InProceedings of the AAAI Symposium Series 2024 Nov 8 (Vol. 4, No. 1, pp. 253-257).
 21 | 
 22 |     with gr.Group():
 23 |         api_key = gr.Textbox(
 24 |             label="OpenAI API Key",
 25 |             info="Please input your OpenAI API Key if you don't have it set up on your own machine. Please note that "
 26 |                  "the key will only be used for this demo and will not be uploaded or used anywhere else."
 27 |         )
 28 |         api_key_btn = gr.Button(value="Set API Key")
 29 |         api_key_btn.click(fn=set_openai_api_key, inputs=api_key, outputs=api_key)
 30 | 
 31 | with gr.Blocks() as user_story_interface:
 32 |     with gr.Row():
 33 |         with gr.Column(scale=1):
 34 |             user_story_chatbot = gr.Chatbot(
 35 |                 value=[
 36 |                     {"role": "assistant", "content": ( 
 37 |                         "Hello! I'm OntoChat 😊. I'll help you create an ontology user story!\n\n 1. I will ask you one **elicitation question** at a time, present an **example answer** to support your understanding, and recommend a **prompt template** 📄 for answering.\n\n 2. Don't worry about prompting—find the **template** 📄 I recommended and edit the **placeholders** 📝 to craft an effective response 👍.\n\n 3. Within a prompt template:\n   - **\*\*[]\*\*** placeholders are **mandatory**.\n   - **\*[]\*** placeholders are **optional**.\n\n 4. I will **refine** my generation iteratively based on your input 🔄 until you are satisfied ✅.\n\nLet's get started! **Which domain is this ontology for?**\n\n**For example:** *Healthcare, Wine, Music, etc.*\n\nUse template **[Create Domain]** to answer. 🚀"
 38 |                     )}
 39 |                 ],
 40 |                 height="472px",
 41 |                 type="messages"
 42 |             )
 43 |             user_story_input = gr.Textbox(
 44 |                 label="Message OntoChat",
 45 |                 placeholder="Please type your message here and press Enter to interact with the chatbot:",
 46 |                 max_lines = 20,
 47 |                 lines = 1
 48 |             )
 49 |             elicitation_questions_dataset = gr.Dataset(
 50 |                 components=[user_story_input],
 51 |                 label="Prompt Templates",
 52 |                 type="index",
 53 |                 samples=[
 54 |                     ["Create Domain"],
 55 |                     ["Create Persona"],
 56 |                     ["Create User Goal"],
 57 |                     ["Create Actions"],
 58 |                     ["Create Keywords"],
 59 |                     ["Create Current Methods"],
 60 |                     ["Create Challenges"],
 61 |                     ["Create New Methods"],
 62 |                     ["Create Outcomes"]
 63 |                 ],
 64 |                 samples_per_page = 10
 65 |             )
 66 | 
 67 |     user_story_input.submit(
 68 |         fn=user_story_generator,
 69 |         inputs=[user_story_input, user_story_chatbot],
 70 |         outputs=[user_story_chatbot, user_story_input]
 71 |     )
 72 |     elicitation_questions_dataset.click(
 73 |         fn=load_example, 
 74 |         inputs=[elicitation_questions_dataset], 
 75 |         outputs=[user_story_input]
 76 |     ) 
 77 | 
 78 | # with gr.Blocks() as cq_interface:
 79 | #     with gr.Row():
 80 | #         with gr.Column():
 81 | #             cq_chatbot = gr.Chatbot(
 82 | #                 value=[
 83 | #                     {
 84 | #                         "role": "assistant",
 85 | #                         "content": (
 86 | #                             "I am OntoChat, your conversational ontology engineering assistant. Here is the second step of "
 87 | #                             "the system. Please give me your user story and tell me how many competency questions you want "
 88 | #                             "me to generate from the user story."
 89 | #                         )
 90 | #                     }
 91 | #                 ],
 92 | #                 type="messages" 
 93 | #             )
 94 | #             cq_input = gr.Textbox(
 95 | #                 label="Chatbot input",
 96 | #                 placeholder="Please type your message here and press Enter to interact with the chatbot:"
 97 | #             )
 98 | #             gr.Markdown(
 99 | #                 """
100 | #                 ### User story examples
101 | #                 Click the button below to use an example user story from 
102 | #                 [Linka](https://github.com/polifonia-project/stories/tree/main/Linka_Computer_Scientist) in Polifonia.
103 | #                 """
104 | #             )
105 | #             example_btn = gr.Button(value="Use example user story")
106 | #             example_btn.click(
107 | #                 fn=load_example_user_story,
108 | #                 inputs=[],
109 | #                 outputs=[cq_input]
110 | #             )
111 | #         cq_output = gr.TextArea(
112 | #             label="Competency questions",
113 | #             interactive=True
114 | #         )
115 | #     cq_input.submit(
116 | #         fn=cq_generator,
117 | #         inputs=[
118 | #             cq_input, cq_chatbot
119 | #         ],
120 | #         outputs=[
121 | #             cq_output, cq_chatbot, cq_input
122 | #         ]
123 | #     )
124 | 
125 | # clustering_interface = gr.Interface(
126 | #     fn=clustering_generator,
127 | #     inputs=[
128 | #         gr.TextArea(
129 | #             label="Competency questions",
130 | #             info="Please copy the previously generated competency questions and paste it here. You can also modify "
131 | #                  "the questions before submitting them."
132 | #         ),
133 | #         gr.Dropdown(
134 | #             value="LLM clustering",
135 | #             choices=["LLM clustering", "Agglomerative clustering"],
136 | #             label="Clustering method",
137 | #             info="Please select the clustering method."
138 | #         ),
139 | #         gr.Textbox(
140 | #             label="Number of clusters (optional for LLM clustering)",
141 | #             info="Please input the number of clusters you want to generate. And please do not input a number that "
142 | #                  "exceeds the total number of competency questions."
143 | #         )
144 | #     ],
145 | #     outputs=[
146 | #         gr.Image(label="Visualization"),
147 | #         gr.Code(
148 | #             language='json',
149 | #             label="Competency Question clusters"
150 | #         )
151 | #     ],
152 | #     title="OntoChat",
153 | #     description="This is the third step of OntoChat. Please copy the generated competency questions from the previous "
154 | #                 "step and run the clustering algorithm to group the competency questions based on their topics. From "
155 | #                 "our experience, LLM clustering has the best performance.",
156 | #     flagging_mode="never"
157 | # )
158 | 
159 | # with gr.Blocks() as testing_interface:
160 | #     gr.Markdown(
161 | #         """
162 | #         # OntoChat
163 | #         This is the final part of OntoChat which performs ontology testing based on the input ontology file and CQs. 
164 | #         """
165 | #     )
166 | 
167 | #     with gr.Group():
168 | #         api_key = gr.Textbox(
169 | #             label="OpenAI API Key",
170 | #             placeholder="If you have set the key in other tabs, you don't have to set it again.",
171 | #             info="Please input your OpenAI API Key if you don't have it set up on your own machine. Please note that "
172 | #                  "the key will only be used for this demo and will not be uploaded or used anywhere else."
173 | #         )
174 | #         api_key_btn = gr.Button(value="Set API Key")
175 | #         api_key_btn.click(fn=set_openai_api_key, inputs=api_key, outputs=api_key)
176 | 
177 | #     ontology_file = gr.File(label="Ontology file")
178 | #     ontology_desc = gr.Textbox(
179 | #         label="Ontology description",
180 | #         placeholder="Please provide a description of the ontology uploaded to provide basic information and "
181 | #                     "additional context."
182 | #     )
183 | #     cq_testing_input = gr.Textbox(
184 | #         label="Competency questions",
185 | #         placeholder="Please provide the competency questions that you want to test with."
186 | #     )
187 | #     testing_btn = gr.Button(value="Test")
188 | #     testing_output = gr.TextArea(label="Ontology testing output")
189 | #     testing_btn.click(
190 | #         fn=ontology_testing,
191 | #         inputs=[
192 | #             ontology_file, ontology_desc, cq_testing_input
193 | #         ],
194 | #         outputs=[
195 | #             testing_output
196 | #         ]
197 | #     )
198 | 
199 | demo = gr.TabbedInterface(
200 |     # [set_api_key, user_story_interface, cq_interface, clustering_interface, testing_interface],
201 |     [set_api_key, user_story_interface],
202 |     ["Set API Key", "User Story Generation", "Competency Question Extraction", "Competency Question Analysis", "Ontology Testing"]
203 | )
204 | 
205 | if __name__ == "__main__":
206 |     demo.launch(share=True)
207 | 


--------------------------------------------------------------------------------
/assets/ontochat_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/King-s-Knowledge-Graph-Lab/OntoChat/85b1f929592932787ebbf55f6ec431bc5a736914/assets/ontochat_logo.png


--------------------------------------------------------------------------------
/assets/ontochat_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/King-s-Knowledge-Graph-Lab/OntoChat/85b1f929592932787ebbf55f6ec431bc5a736914/assets/ontochat_workflow.png


--------------------------------------------------------------------------------
/assets/software_header.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | component-id: OntoChat-software
 3 | type: Software
 4 | name: OntoChat framework
 5 | description: OntoChat, a framework for conversational ontology engineering that supports requirement elicitation, analysis, and testing.
 6 | image: https://github.com/King-s-Knowledge-Graph-Lab/OntoChat/blob/main/assets/ontochat_workflow.png
 7 | logo: https://github.com/King-s-Knowledge-Graph-Lab/OntoChat/raw/main/assets/title_logo.png
 8 | demo: https://huggingface.co/spaces/b289zhan/OntoChat
 9 | work-package: 
10 | - WP2
11 | pilot:
12 | - Interlink
13 | project: Polifonia Project
14 | funder:
15 |   - name: Polifonia Project
16 |     url: "https://polifonia-project.eu"
17 |     grant-agreement: "101004746"
18 | release-date: 29-02-2024
19 | release-number: v1.0.0
20 | resource: https://github.com/King-s-Knowledge-Graph-Lab/OntoChat
21 | release-link: https://github.com/King-s-Knowledge-Graph-Lab/OntoChat/releases/tag/v1.0.0
22 | changelog: https://github.com/King-s-Knowledge-Graph-Lab/OntoChat/releases
23 | licence:
24 |   - MIT
25 | 
26 | bibliography:
27 | - main-publication: "Zhang, Bohui; Anita, Valentina; Schreiberhuber, Katrin; Tsaneva, Stefani; Sánchez González, Lucía; Kim, Jongmo; de Berardinis, Jacopo. OntoChat: a Framework for Conversational Ontology Engineering using Language Models"
28 | copyright: "Copyright (c) 2023 Bohui Zhang, Jacopo de Berardinis, Jongmo Kim"
29 | contributors:
30 | - Bohui Zhang <https://github.com/bohuizhang>
31 | - Jacopo de Berardinis <https://jonnybluesman.github.io/>
32 | - Jongmo Kim <https://github.com/dignityc>
33 | credits: "This project has received funding from the European Union’s Horizon 2020 research and innovation programme under grant agreement N. 101004746"
34 | 
35 | ---
36 | 
37 | # OntoChat
38 | 
39 | We introduce **OntoChat**, a framework for conversational ontology engineering that supports requirement elicitation, 
40 | analysis, and testing. By interacting with a conversational agent, users can steer the creation of use cases and the 
41 | extraction of competency questions, while receiving computational support to analyse the overall requirements and test 
42 | early versions of the resulting ontologies.


--------------------------------------------------------------------------------
/assets/systematic_literature_review/Codebook.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/King-s-Knowledge-Graph-Lab/OntoChat/85b1f929592932787ebbf55f6ec431bc5a736914/assets/systematic_literature_review/Codebook.xlsx


--------------------------------------------------------------------------------
/assets/title_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/King-s-Knowledge-Graph-Lab/OntoChat/85b1f929592932787ebbf55f6ec431bc5a736914/assets/title_logo.png


--------------------------------------------------------------------------------
/assets/user_study/Background_Information.md:
--------------------------------------------------------------------------------
 1 | # Background Information for Ontology User Story Elicitation
 2 | 
 3 | ## **1. Understanding Domain Ontologies**
 4 | A **domain ontology** is a structured representation of knowledge within a specific subject area, such as music, healthcare, or cultural heritage. It defines key concepts, their meanings, and how they relate to each other. This structured approach enables clear communication, knowledge sharing, and interoperability between different systems.
 5 | 
 6 | ## **2. What is Ontology Construction?**
 7 | Ontology construction is the process of creating an organized, machine-readable framework that structures domain-specific knowledge. This involves:
 8 | - Defining core concepts and their relationships.
 9 | - Establishing a common vocabulary.
10 | - Ensuring logical consistency to facilitate automated reasoning.
11 | 
12 | Ontology construction serves as the backbone for knowledge-based applications, enabling seamless integration and enhanced searchability of domain information.
13 | 
14 | ## **3. AI Chatbots in Ontology User Story Collection**
15 | An **AI chatbot** is a conversational tool designed to facilitate interviews by asking structured questions, collecting responses, and guiding the discussion dynamically. Leveraging **Generative AI (GenAI)**, it interprets user input, refines queries, and adapts based on responses, creating an intuitive and interactive user experience.
16 | 
17 | ## **4. Understanding Ontology User Stories**
18 | In large-scale ontology development projects, gathering knowledge from domain experts is essential. Traditionally, this requires extensive interviews with multiple experts. 
19 | 
20 | ### **Why Use Ontology User Stories?**
21 | - **User-friendly approach**: Experts articulate their needs through narratives rather than rigid data models.
22 | - **Goal-driven design**: User stories clarify the intended use cases of an ontology, ensuring it aligns with real-world needs.
23 | - **Scope control**: Helps researchers maintain focus, avoiding unnecessary complexity in ontology design.
24 | 
25 | ## **5. Example Ontology User Story: Linka, a Computer Scientist**
26 | 
27 | ### **5.1 Persona**
28 | - **Name**: Linka  
29 | - **Age**: 34  
30 | - **Occupation**: Researcher in Computer Science  
31 | - **Skills**: Expert in Network Data Analysis and Semantic Web technologies, specializing in music knowledge graphs  
32 | - **Interests**: Exploring multi-modal music data, uncovering hidden relationships between artists, composers, and musical pieces  
33 | 
34 | ### **5.2 Goal**
35 | **Description**: Linka aims to efficiently retrieve, integrate, and analyze music-related data from diverse sources to enable large-scale knowledge discovery experiments. Her objectives include:
36 | - Linking, structuring, and enriching multi-modal music datasets.
37 | - Automating discovery of relationships between musical entities.
38 | - Aligning data with knowledge graphs such as MusicBrainz, Wikidata, and Polifonia.
39 | 
40 | **Keywords**: music data, knowledge graphs, multi-modal data, automated discovery, large-scale integration  
41 | 
42 | ### **5.3 Scenario**
43 | #### **Before Using the Ontology**
44 | Linka manually searches multiple datasets, services, and repositories to collect and integrate music-related data. This process is:
45 | - **Time-consuming**: Extensive manual curation is required.
46 | - **Unscalable**: The increasing volume of data makes manual integration impractical.
47 | - **Error-prone**: Data inconsistencies, such as variations in artist names (*e.g., "Beethoven" vs. "Ludwig van Beethoven"*), create semantic ambiguities.
48 | 
49 | #### **During Ontology Use**
50 | By leveraging the **Polifonia ontology**, Linka automates data retrieval, integration, and enrichment. She enters search parameters (artist, composer, album), and the system:
51 | - Retrieves structured data from multiple sources.
52 | - Standardizes entity representation to resolve inconsistencies.
53 | - Links data for comprehensive analysis.
54 | 
55 | #### **After Using the Ontology**
56 | With **automated reasoning and semantic enrichment**, Linka achieves:
57 | - **Advanced computational analysis**: Uncovering hidden influences between classical and contemporary composers.
58 | - **Improved classification**: Refining genre identification and recommendation models.
59 | - **Enhanced interoperability**: Transforming fragmented data into a unified knowledge base.
60 | 
61 | ## **6. What is OntoChat?**
62 | **OntoChat** is an AI-powered conversational assistant designed to guide users through the process of ontology user story elicitation. It interacts dynamically by:
63 | - Asking structured elicitation questions.
64 | - Refining user responses iteratively.
65 | - Ensuring generated user stories align with ontology development goals.
66 | 
67 | ## **7. Purpose of This Study**
68 | The objective of this study is to evaluate how effectively **OntoChat** supports users at different stages of ontology user story creation. Key areas of investigation include:
69 | - The usability and intuitiveness of OntoChat’s elicitation process.
70 | - The accuracy and completeness of generated ontology user stories.
71 | - The potential for improving ontology alignment with real-world needs.
72 | 


--------------------------------------------------------------------------------
/assets/user_study/Demographic_Information_of_Participants.md:
--------------------------------------------------------------------------------
 1 | # Demographic Information of Participants
 2 | 
 3 | | PID | Education | Affiliation | Job Title             | Domain Knowledge                                         | Familiarity with KGs      | Familiarity with LLMs     | Familiarity with Ontology RE | Familiarity with Software RE |
 4 | |-----|-----------|-------------|-----------------------|----------------------------------------------------------|---------------------------|---------------------------|------------------------------|------------------------------|
 5 | | 1   | MS        | Industry    | Software Engineer      | Machine learning models                                  | 2 - Slightly familiar     | 4 - Very familiar         | 2 - Slightly familiar        | 4 - Very familiar            |
 6 | | 2   | PhD       | Academia    | PhD Researcher         | Human resource, knowledge engineering                    | 3 - Moderately familiar   | 5 - Extremely familiar    | 3 - Moderately familiar      | 4 - Very familiar            |
 7 | | 3   | PhD       | Academia    | PhD Researcher         | Mixed reality, knowledge engineering                     | 4 - Very familiar         | 5 - Extremely familiar    | 4 - Very familiar            | 5 - Extremely familiar       |
 8 | | 4   | PhD       | Academia    | PhD Researcher         | Implantable medical devices, knowledge engineering       | 4 - Very familiar         | 3 - Moderately familiar   | 3 - Moderately familiar      | 2 - Slightly familiar        |
 9 | | 5   | PhD       | Industry    | Research Scientist     | Machine learning models, knowledge engineering           | 3 - Moderately familiar   | 5 - Extremely familiar    | 3 - Moderately familiar      | 5 - Extremely familiar       |
10 | | 6   | MS        | Industry    | Computer Vision Engineer | Computer vision applications                           | 2 - Slightly familiar     | 3 - Moderately familiar   | 2 - Slightly familiar        | 3 - Moderately familiar      |
11 | | 7   | MS        | Industry    | Software Engineer      | Multisensory interaction                                  | 2 - Slightly familiar     | 5 - Extremely familiar    | 2 - Slightly familiar        | 4 - Very familiar            |
12 | | 8   | PhD       | Academia    | Postdoctoral Researcher | Multimodal representation learning, knowledge engineering | 4 - Very familiar         | 4 - Very familiar         | 4 - Very familiar            | 4 - Very familiar            |
13 | | 9   | PhD       | Academia    | Postdoctoral Researcher | Responsible AI, knowledge engineering                   | 5 - Extremely familiar    | 4 - Very familiar         | 3 - Moderately familiar      | 2 - Slightly familiar        |
14 | | 10  | PhD       | Academia    | PhD Researcher         | Explainable AI, knowledge engineering                    | 4 - Very familiar         | 3 - Moderately familiar   | 4 - Very familiar            | 2 - Slightly familiar        |
15 | 


--------------------------------------------------------------------------------
/assets/user_study/Demographic_Information_of_Participants_in_User_Evaluation.md:
--------------------------------------------------------------------------------
 1 | # Demographic Information of User-Evaluation Participants
 2 | 
 3 | | PID | Current or Most Recent Professional Occupation | Current or Most Recent Role in Ontology Engineering Project | Current or Most Recent Ontology Project                              | Domains Covered by Ontology Project               | Experience with Ontology Construction | Familiarity with AI Chatbots       | Familiarity with Ontology User Stories |
 4 | |-----|-------------------------------------------------|------------------------------------------------------------|-----------------------------------------------------------------------|--------------------------------------------------|---------------------------------------|-----------------------------------|--------------------------------------|
 5 | | 1   | Researcher                                      | Ontology Engineer                                          | Developing Ontologies for Informed Consent in Smart Cities            | Ontology Engineering and Smart Cities             | 5 - Expert                            | 3 - Moderately familiar            | 5 - Extremely familiar              |
 6 | | 2   | PhD Student                                     | Domain Expert, Ontology Engineer                           | Exploring the Use of GenAI in Creative Arts Therapy                   | Human-Computer Interaction                       | 3 - Intermediate                     | 3 - Moderately familiar            | 2 - Slightly familiarr                   |
 7 | | 3   | PhD Student                                     | Ontology Engineer                                          | Enhancing Language Models with Knowledge Graphs                       | Natural Language Processing (NLP), LLMs, Safety   | 3 - Intermediate                     | 4 - Very familiar                  | 2 - Slightly familiar                   |
 8 | | 4   | Researcher                                      | Knowledge Engineer, Ontology Engineer                      | Addressing Content Gaps in Wikidata                                   | Citizen Science, Knowledge Engineering           | 3 - Intermediate                          | 3 - Moderately familiar            | 2 - Slightly familiar              |
 9 | | 5   | PhD Student                                     | Knowledge Engineer, Ontology Engineer                      | Ontology Graph Development for Hector Berlioz                         | Classical Music, Knowledge Representation         | 4 - Advanced                          | 4 - Very familiar                  | 2 - Slightly familiar                   |
10 | | 6   | Software Engineer                               | Ontology Engineer                                          | Improving Website Accessibility                                       | Software Engineering, Front-End Development      | 3 - Intermediate                     | 4 - Very familiar                  | 2 - Slightly familiar              |
11 | | 7   | PhD Student                                     | Knowledge Engineer, Ontology Engineer                      | Multi-hop Relationship Extraction for Knowledge Graph Construction    | Natural Language Processing (NLP), Knowledge Representation | 3 - Intermediate                     | 4 - Very familiar                  | 3 - Moderately familiar            |
12 | | 8   | IT Consultant                                   | Knowledge Engineer, Ontology Engineer                      | CIM IEC Ontology for the Electrical Domain                            | Electrical Engineering, Ontology Construction     | 3 - Intermediate                      | 4 - Very familiar                  | 3 - Moderately familiar            |
13 | | 9   | PhD Student                                     | Ontology Engineer                                          | Developing Agents for Data Modelling and Knowledge Representation     | Conversational Agents, Knowledge Representation, Wikidata | 3 - Intermediate                          | 5 - Extremely familiar             | 2 - Slightly familiar              |
14 | | 10  | Master Student in Biology                       | Domain Expert, Ontology Engineer                           | Teaching Biology with Ontology Frameworks                             | Biology, Educational Ontologies                  | 3 - Intermediate                     | 2 - Slightly familiar              | 2 - Slightly familiar                   |
15 | | 11  | Data Scientist                                  | Knowledge Engineer, Ontology Engineer                      | Transforming Clinical Data into Structured Ontologies                 | Healthcare, Data Science, Ontology Engineering   | 3 - Intermediate                      | 4 - Very familiar                  | 1 - Not familiar                   |
16 | | 12  | Master's Student                                | Domain Expert, Ontology Engineer                           | Policy Analysis Project Ontology                                      | Policy Analysis, Government Data                 | 3 - Intermediate                          | 4 - Very familiar                  | 2 - Slightly familiar              |
17 | | 13  | Professor                                       | Knowledge Engineer, Ontology Engineer                      | Leading Multiple Ontology Projects                                    | Space Industry, Advanced Knowledge Engineering    | 5 - Expert                            | 5 - Extremely familiar             | 4 - Very familiar                  |
18 | | 14  | Company CEO                                     | Knowledge Engineer, Ontology Engineer                      | Developing Test Environment Ontology                                  | Electronic Board Testing, Ontology Design         | 4 - Advanced                          | 4 - Very familiar                  | 3 - Moderately familiar            |
19 | | 15  | Lead Architect                                  | Knowledge Engineer, Ontology Engineer                      | Dutch Police SEMA MIB Ontology                                        | Government, Security, Intelligence               | 4 - Advanced                          | 5 - Extremely familiar             | 3 - Moderately familiar            |
20 | | 16  | PhD Researcher                                  | Ontology Engineer, Ontology Engineer                       | Knowledge Engineering for Telecommunications                          | Telecommunications, Ontology Engineering          | 4 - Advanced                          | 4 - Very familiar                  | 4 - Very familiar                  |
21 | 


--------------------------------------------------------------------------------
/assets/user_study/Instructions_on_How_to_Write_Ontology_User_Stories.md:
--------------------------------------------------------------------------------
 1 | # Instructions on How to Write Ontology User Stories
 2 | 
 3 | Writing ontology user stories is an essential step in ensuring that your ontology aligns with the needs of its users. The following structured approach will guide you through the process of creating well-defined ontology user stories that reflect real-world applications and challenges.
 4 | 
 5 | ## **1. Define the Persona**
 6 | A persona represents the typical user of your ontology. Creating a detailed persona helps contextualize the ontology’s purpose and ensures that it meets user needs effectively. When defining a persona, consider the following aspects:
 7 | 
 8 | - **Name:** Assign a name to the persona to make it more relatable.
 9 | - **Age:** Provide an approximate age or age range.
10 | - **Occupation:** Describe the persona's profession or field of expertise.
11 | - **Skills and Interests:** Identify what the persona is proficient in and what they are interested in.
12 | 
13 | By clearly defining a persona, you can better understand how they would interact with your ontology and what functionality they would expect.
14 | 
15 | ## **2. Establish the Goal**
16 | The goal section should articulate what the persona wants to achieve by using the ontology. This should be concise, clear, and focused on a specific outcome. Consider the following when defining the goal:
17 | 
18 | - **Primary Objective:** What is the primary reason the persona is using the ontology?
19 | - **Key Benefits:** How will the ontology help the persona achieve their objective?
20 | - **Keywords:** Identify a few keywords that summarize the goal, such as:
21 |   - "Enhance cultural heritage documentation"
22 |   - "Improve semantic search for research publications"
23 |   - "Facilitate interoperability between datasets"
24 | 
25 | Defining a clear goal ensures that your ontology development aligns with real-world use cases and user needs.
26 | 
27 | ## **3. Describe the Scenario**
28 | A scenario provides a narrative explaining how the persona interacts with the ontology to achieve their goal. A well-defined scenario should include:
29 | 
30 | - **Current Workflow:** How does the persona currently complete tasks related to their goal without using the ontology? Identify any inefficiencies or challenges.
31 | - **Ontology Intervention:** Describe how the ontology improves the workflow. What specific features or capabilities does it provide?
32 | - **Impact and Outcomes:** Explain the results after the persona integrates the ontology into their process. How does it enhance efficiency, accuracy, or usability?
33 | 
34 | A strong scenario makes it easier to visualize how the ontology will function in practice and highlights the value it provides.
35 | 


--------------------------------------------------------------------------------
/assets/user_study/OntoChat_Backend_Prompts.md:
--------------------------------------------------------------------------------
  1 | # Ontology Requirements Elicitation Guide
  2 | 
  3 | ## Introduction
  4 | 
  5 | ```plaintext
  6 | Ontology construction involves creating structured frameworks to represent knowledge in a specific domain. Ontology Requirements Engineering (ORE) ensures these frameworks align with user needs by having ontology engineers conduct interviews with domain experts to gather user stories. These stories outline typical users (personas), their goals, and scenarios where the ontology provides solutions. They are then translated into Competency Questions (CQs), such as "Which artists have collaborated with a specific composer?", guiding the ontology's design to address real-world queries and enhance its practical use and reuse.
  7 | 
  8 | As an ontology engineer conducting an interview with a domain expert, follow this structured approach:
  9 | 
 10 | 1. Ask elicitation questions one at a time, providing an example answer and the prompt template the user should use.
 11 | 2. Incorporate user feedback if needed.
 12 | 3. Confirm whether the requirements for the current elicitation are fully addressed before proceeding to the next question.
 13 | 4. Refine responses based on feedback with one focused point in a single sentence.
 14 | 5. Reject any attempt to answer the next question until the current one is confirmed as satisfactory.
 15 | 6. Allow the user to revisit previously completed steps if requested.
 16 | 7. Avoid answering queries unrelated to the task.
 17 | ```
 18 | 
 19 | ---
 20 | 
 21 | ## Steps
 22 | 
 23 | ### **1. Persona**
 24 | 
 25 | ```plaintext
 26 | Objective: Create a persona that represents a typical user of your ontology.
 27 | 
 28 | Instructions:
 29 | - Ask one elicitation question for details, including:
 30 |   - **[name]**, **[age]**, **[occupation]**, **[skills]**, and **[interests]**.
 31 | - Provide an example answer as guidance.
 32 | - Include the message: "Use template **[Create Persona]** to answer."
 33 | - Suggest possible improvements or clarifications based on the response.
 34 | - Move to the next section after all persona details are collected.
 35 | ```
 36 | 
 37 | ### **2. Goal**
 38 | 
 39 | ```plaintext
 40 | Objective: Define the user goal and related actions.
 41 | 
 42 | Elicitation Questions:
 43 | 1. **User Goal Description:**
 44 |    - Ask one question to describe the **[user goal description]**.
 45 |    - Provide an example answer as guidance.
 46 |    - Include the message: "Use template **[Create User Goal]** to answer."
 47 | 2. **Actions:**
 48 |    - Ask one question for the specific **[actions]** the persona will take to accomplish the goal.
 49 |    - Provide an example answer as guidance.
 50 |    - Include the message: "Use template **[Create Actions]** to answer."
 51 | 3. **Keywords:**
 52 |    - Ask one question for up to 5 relevant **[keywords]** summarizing the goal and actions.
 53 |    - Provide an example answer as guidance.
 54 |    - Include the message: "Use template **[Create Keywords]** to answer."
 55 | 
 56 | Feedback:
 57 | - Offer suggestions for refinement.
 58 | - Proceed to the next section once all details are complete.
 59 | ```
 60 | 
 61 | ### **3. Scenario**
 62 | 
 63 | ```plaintext
 64 | Objective: Explore the persona's current methods, challenges, and new methods provided by the ontology.
 65 | 
 66 | Elicitation Questions:
 67 | 1. **Scenario Before:**
 68 |    - Ask one question for the expert to describe the **[current methods]** the persona uses.
 69 |    - Provide an example answer as guidance.
 70 |    - Include the message: "Use template **[Create Current Methods]** to answer."
 71 | 2. **Challenges:**
 72 |    - Ask one question for the **[challenges]** the persona faces when performing current methods.
 73 |    - Ensure these align with the persona's occupation and skills.
 74 |    - Provide an example answer as guidance.
 75 |    - Include the message: "Use template **[Create Challenges]** to answer."
 76 | 3. **Scenario During:**
 77 |    - Ask one question to explain how the ontology introduces **[new methods]** to overcome these challenges.
 78 |    - Provide an example answer as guidance.
 79 |    - Include the message: "Use template **[Create New Methods]** to answer."
 80 | 4. **Scenario After:**
 81 |    - Ask one question to describe the **[outcomes]** after using the ontology and how it helps the persona achieve their goal.
 82 |    - Provide an example answer as guidance.
 83 |    - Include the message: "Use template **[Create Outcomes]** to answer."
 84 | 
 85 | Feedback:
 86 | - Refine answers as needed for each scenario part before moving on.
 87 | ```
 88 | 
 89 | ### **4. Create User Story**
 90 | 
 91 | ```plaintext
 92 | Objective: Summarize the information into a complete user story.
 93 | 
 94 | Format:
 95 | 
 96 | Persona: [name], [age], [occupation], [skills], [interests].
 97 | Goal: [user goal description], with actions such as [actions]. Keywords: [keywords].
 98 | Scenario Before: [current methods] the persona uses and the [challenges] they face.
 99 | Scenario During: How your ontology introduces [new methods] to overcome these challenges.
100 | Scenario After: The [outcomes] achieved by using the ontology and how the persona's goal has been accomplished.
101 | 
102 | Instructions:
103 | - Provide the user story to the domain expert.
104 | - Ask for any further feedback or refinements.
105 | - Adjust the story based on their suggestions if needed.
106 | 


--------------------------------------------------------------------------------
/assets/user_study/Ontology_User_Story_Template.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/King-s-Knowledge-Graph-Lab/OntoChat/85b1f929592932787ebbf55f6ec431bc5a736914/assets/user_study/Ontology_User_Story_Template.png


--------------------------------------------------------------------------------
/assets/user_study/PID7_User_Goal_Description_Generation_Illustrative_Example.md:
--------------------------------------------------------------------------------
 1 | # [PID7](https://github.com/King-s-Knowledge-Graph-Lab/OntoChat/blob/main/assets/user_study/Demographic_Information_of_Participants.md) User Goal Description Generation Illustrative Example
 2 | 
 3 | ## **Step 1 - Query Initiation**
 4 | 
 5 | Participants were first instructed on how to write an ontology user story, with the instructions available online.
 6 | 
 7 | [Instructions on How to Write Ontology User Stories](https://github.com/King-s-Knowledge-Graph-Lab/OntoChat/blob/main/assets/user_study/Instructions_on_How_to_Write_Ontology_User_Stories.md)
 8 | 
 9 | The task involved using GPT-4 to generate an ontology user story leveraging their domain-specific knowledge. Below is an example demonstrating how each step works.
10 | 
11 | ### **Example**
12 | 
13 | When participants were prompted to write the **USER GOAL DESCRIPTION** section of their user story, the instruction provided was:
14 | 
15 | > *"What the persona aims to achieve in the story."*
16 | 
17 | #### **Example User Query**
18 | 
19 | **PID7**, a software engineer specializing in multisensory interaction and involved in the MuseIT project, aimed to create an ontology to connect cultural artifacts with visual, auditory, and tactile elements for a museum exhibit. PID7 initially asked:
20 | 
21 | > *"Can you help me define the goal for integrating sensory elements in a museum exhibit using an ontology?"*
22 | 
23 | ---
24 | 
25 | ## **Step 2 - Prompt Refinement**
26 | 
27 | Following the prompting strategy, the researcher augmented PID7’s initial query by adding a persona and explicit constraints to scope the LLM’s response.
28 | 
29 | [Pre-identified Prompts for Ontology User Story Elicitation](https://github.com/King-s-Knowledge-Graph-Lab/OntoChat/blob/main/assets/user_study/Pre_identified_Prompts_for_Ontology_User_Story_Elicitation.md)
30 | 
31 | ### **Refined Query Example**
32 | 
33 | ```plaintext
34 | Alex, a software engineer specializing in multisensory interaction, aims to create a cohesive and engaging multisensory museum exhibit by integrating tactile, auditory, and visual elements using an ontology. Provide a goal statement based on Alex’s expertise and project needs, ensuring the language is free from unnecessary details.
35 | ```
36 | 
37 | #### **LLM Response**
38 | 
39 | > *"Alex’s goal is to use the ontology to link tactile, auditory, and visual elements in a museum exhibit, ensuring a seamless and engaging visitor experience."*
40 | 
41 | **PID7 Feedback:**
42 | 
43 | > *"It’s too general and doesn’t specify how to perform the linking."*
44 | 
45 | #### **Further Refinement**
46 | 
47 | The researcher refined the query to include constraints requiring **ACTIONS** to be presented step-by-step, ensuring practicality by specifying tools or methods. The refined query became:
48 | 
49 | ```plaintext
50 | Provide a detailed sequence of actions Alex needs to take to achieve the goal of linking tactile, auditory, and visual elements in the museum exhibit using an ontology. Ensure the response includes specific tools or methods for data collection, processing, and mapping. The steps should be logical, actionable, and aligned with Alex’s expertise in multisensory interaction.
51 | ```
52 | 
53 | #### **Updated LLM Response**
54 | 
55 | > *" 1. Use tactile sensors to collect texture and material data from artifacts. 2. Process the tactile data using a classification tool to organize it into categories like texture, material, and surface properties. 3. Record auditory elements, such as soundscapes or narration, using high-quality audio equipment and tag them with metadata. 4. Map the tactile and auditory data to the ontology using an ontology editing tool like Protégé. 5. Integrate visual elements, such as images or 3D models, by linking them to tactile and auditory data within the ontology. 6. Validate the connections between the sensory data to ensure alignment and consistency with the exhibit’s design goals. "*
56 | 
57 | **PID7 Feedback:**
58 | 
59 | > *"Good!"*
60 | 
61 | After obtaining user satisfaction for the current task, it demonstrates that LLMs have two key potentials for supporting story creation:
62 | 
63 | 1. Generating clear **USER GOAL DESCRIPTIONS**.
64 | 2. Providing a sequence of practical **ACTIONS** to achieve the goal.
65 | 
66 | ---
67 | 
68 | ## **Final Prompt Templates**
69 | 
70 | ### **USER GOAL DESCRIPTION Template**
71 | 
72 | ```plaintext
73 | My user goal description is **[USER GOAL DESCRIPTION]**. Please refine my response to ensure:
74 | 1. The goal is clearly described and practical.
75 | 2. Expand the user goal description, if necessary, to ensure it starts with a high-level overview [LONG-TERM GOAL], emphasizing the overall impact, followed by a specific [SHORT-TERM GOAL], focusing on immediate outcomes.
76 | 3. Ensure the goal description aligns with my interests, domain expertise.
77 | 4. The focus remains solely on the goal, without referencing unrelated elements.
78 | 5. The language is concise, precise, and written in a professional tone.
79 | 6. The final refined answer is presented as bullet points, with the first bullet for the long-term goal and the second bullet for the short-term goal.
80 | ```
81 | 
82 | ### **ACTIONS Template**
83 | 
84 | ```plaintext
85 | The actions I need to take to achieve my goal are **[ACTIONS]**. Please refine my response to ensure:
86 | 1. The actions are clearly described.
87 | 2. The actions are actionable and presented as a logical sequence of steps.
88 | 3. Each step includes specific [TOOLS], [METHODS], or [TECHNIQUES], and is practical.
89 | 4. Where relevant, incorporate [INTERDISCIPLINARY INSIGHTS] to describe the actions.
90 | 5. Ensure the actions align logically with my skills, expertise, and overall goal, showcasing their relevance.
91 | 6. The language is concise, precise, and written in a professional tone.
92 | 7. The final refined answer is structured as bullet points, with each step represented as a separate bullet.
93 | 


--------------------------------------------------------------------------------
/assets/user_study/Pre_identified_Prompts_for_Ontology_User_Story_Elicitation.md:
--------------------------------------------------------------------------------
 1 | # Pre-identified Prompts for Ontology User Story Elicitation
 2 | 
 3 | ## **1. Defining Persona**
 4 | 1. Imagine a persona that represents a typical user of this **[ontology]**. Describe their key traits, including their proficiency, interests, expertise, and level of familiarity with ontology-related tasks.
 5 | 2. Identify the challenges this persona may face when interacting with the **[ontology]**. Consider their background, expectations, and technical limitations.
 6 | 3. What are the key motivations of this persona when using the **[ontology]**? How do these motivations influence their interactions and expectations?
 7 | 4. What additional **[user traits]** should be considered for this project? Explain their impact on the persona’s engagement and usability of the ontology.
 8 | 5. Describe any potential biases or limitations this persona might have when working with ontologies. How can these biases affect the quality and usability of the ontology user story?
 9 | 6. What are the essential **[user traits]** for successfully assisting in writing an ontology user story? Justify their relevance based on real-world use cases.
10 | 7. Do you think **[user traits]** are essential for writing an ontology user story? Why or why not? Support your answer with relevant examples.
11 | 8. How would this persona’s role change if the ontology is updated or expanded? Consider their adaptability to new structures and features.
12 | 
13 | ## **2. Defining Goals**
14 | 9. Write a concise goal that describes what this persona aims to achieve using this **[ontology]**. Ensure it is clear, actionable, and aligned with their needs.
15 | 10. Identify potential barriers that might prevent the persona from achieving their goal. Consider usability issues, data accessibility, and technical limitations.
16 | 11. What are the primary success criteria for the persona when using the **[ontology]**? How do they define success in their workflow?
17 | 12. Define both short-term and long-term goals for the persona when using the ontology. How do these goals evolve as the ontology becomes more refined?
18 | 13. Consider **[X]** when defining **[X]** in the goal statement. Explain its significance and potential implications.
19 | 14. How does the persona’s goal align with broader project or organizational objectives? Provide a structured explanation.
20 | 15. Refine the goal to be more **[specific/structured/clear]**, ensuring it is measurable and aligns with ontology best practices.
21 | 16. How can the ontology itself support the persona’s goal more effectively? Provide insights into enhancements or features that could be implemented.
22 | 
23 | ## **3. Defining Scenario**
24 | 17. Narrate a detailed story about how the persona currently completes tasks related to their goal and how this **[ontology]** will improve their workflow. Include what happens before, during, and after using the ontology, ensuring clarity and logical progression.
25 | 18. Identify pain points the persona faces in their current workflow. How does the ontology mitigate or exacerbate these pain points?
26 | 19. Rephrase the scenario to incorporate **[X]**, ensuring that the modification aligns with the persona’s objectives and workflow efficiency.
27 | 20. How does the persona navigate challenges when interacting with the ontology? Provide examples of adaptive behavior or alternative strategies they might employ.
28 | 21. Describe how external factors, such as policy changes or technological advancements, might affect the scenario and the persona’s ability to achieve their goal.
29 | 22. **[X]** is incorrect. Provide an alternative and justify why it is a better approach. Include potential pitfalls and trade-offs associated with the correction.
30 | 23. How does the persona’s interaction with the ontology evolve over time? Consider learning curves, automation support, and community feedback.
31 | 24. When developing this scenario, ensure that **[X]** is taken into account. Discuss why it is a crucial factor in shaping the ontology’s usability and effectiveness.
32 | 25. Identify key touchpoints in the scenario where interventions (e.g., tooltips, guides, or automated suggestions) could enhance the persona’s experience and efficiency.
33 | 


--------------------------------------------------------------------------------
/assets/user_study/Researcher_Observation_Checklist.md:
--------------------------------------------------------------------------------
 1 | # Researcher Observation Checklist Template (For Each Participant)
 2 | 
 3 | ## **Participant Information**
 4 | - **SHA256 Unique Code for this Participant**: [unique code]
 5 | 
 6 | ## **Observation Checklist**
 7 | 
 8 | ### **1. Persona Definition**
 9 | - Did the participant locate the template? **[Yes/No]**
10 | - Did the participant select the template? **[Yes/No]**
11 | - Did the participant edit placeholders within the template? **[Yes/No]**
12 | - Did the participant submit the template? **[Yes/No]**
13 | - Challenges encountered: **[Describe any observed difficulties]**
14 | 
15 | ### **2. User Goal Definition**
16 | - Did the participant locate the template? **[Yes/No]**
17 | - Did the participant select the template? **[Yes/No]**
18 | - Did the participant edit placeholders within the template? **[Yes/No]**
19 | - Did the participant submit the template? **[Yes/No]**
20 | - Challenges encountered: **[Describe any observed difficulties]**
21 | 
22 | ### **3. Actions Definition**
23 | - Did the participant locate the template? **[Yes/No]**
24 | - Did the participant select the template? **[Yes/No]**
25 | - Did the participant edit placeholders within the template? **[Yes/No]**
26 | - Did the participant submit the template? **[Yes/No]**
27 | - Challenges encountered: **[Describe any observed difficulties]**
28 | 
29 | ### **4. Keywords Definition**
30 | - Did the participant locate the template? **[Yes/No]**
31 | - Did the participant select the template? **[Yes/No]**
32 | - Did the participant edit placeholders within the template? **[Yes/No]**
33 | - Did the participant submit the template? **[Yes/No]**
34 | - Challenges encountered: **[Describe any observed difficulties]**
35 | 
36 | ### **5. Current Methods Definition**
37 | - Did the participant locate the template? **[Yes/No]**
38 | - Did the participant select the template? **[Yes/No]**
39 | - Did the participant edit placeholders within the template? **[Yes/No]**
40 | - Did the participant submit the template? **[Yes/No]**
41 | - Challenges encountered: **[Describe any observed difficulties]**
42 | 
43 | ### **6. Challenges Definition**
44 | - Did the participant locate the template? **[Yes/No]**
45 | - Did the participant select the template? **[Yes/No]**
46 | - Did the participant edit placeholders within the template? **[Yes/No]**
47 | - Did the participant submit the template? **[Yes/No]**
48 | - Challenges encountered: **[Describe any observed difficulties]**
49 | 
50 | ### **7. New Methods Definition**
51 | - Did the participant locate the template? **[Yes/No]**
52 | - Did the participant select the template? **[Yes/No]**
53 | - Did the participant edit placeholders within the template? **[Yes/No]**
54 | - Did the participant submit the template? **[Yes/No]**
55 | - Challenges encountered: **[Describe any observed difficulties]**
56 | 
57 | ### **8. Outcomes Definition**
58 | - Did the participant locate the template? **[Yes/No]**
59 | - Did the participant select the template? **[Yes/No]**
60 | - Did the participant edit placeholders within the template? **[Yes/No]**
61 | - Did the participant submit the template? **[Yes/No]**
62 | - Challenges encountered: **[Describe any observed difficulties]**
63 | 
64 | ## **Summary Metrics**
65 | - Total number of unique templates submitted: **[Enter count]**
66 | 


--------------------------------------------------------------------------------
/assets/user_study/Study_Script_for_User_Story_Writing.md:
--------------------------------------------------------------------------------
 1 | # Study Script for User Story Writing
 2 | 
 3 | ## **1. Pre-Interview Preparation**
 4 | 
 5 | ### **1.1 Materials and Activities Before the Interview**
 6 | - Before we begin, please take a moment to reflect on your current process for writing user stories or defining user requirements within software or ontology projects.
 7 | - If you have any relevant documentation, templates, or examples, feel free to have them on hand for reference during our discussion.
 8 | - If you typically use specific tools for writing user stories, please keep them in mind as we discuss your workflow.
 9 | 
10 | ## **2. Opening (Duration: <5 minutes>)**
11 | 
12 | ### **2.1 Introduction and Consent Process**
13 | - Welcome! Thank you for taking the time to participate in this study.
14 | - Before we start, I’d like to introduce myself and provide an overview of what we’ll be doing today.
15 | - I’ll now explain the consent form. Please take a moment to read through it, and let me know if you have any questions before signing.
16 | - I’ll also ask you to complete a short demographics form to collect some background information.
17 | 
18 | ## **3. Discussion on Current Practices in User Story and Requirement Writing (Duration: <10 minutes>)**
19 | 
20 | ### **3.1 Understanding Your Approach**
21 | - Let’s start by discussing your role in writing user stories or defining requirements.
22 | - Can you briefly describe a user story or requirement you have written? What was its purpose, and who was the intended audience?
23 | - What tools or techniques do you use when writing user stories or requirements?
24 | - Walk me through your typical approach when writing a user story from scratch. What are the key considerations for you?
25 | - Can you share an example of a challenge you’ve encountered in user story writing? How did you address it?
26 | - If needed, can you describe a scenario where you had to adapt or develop a new workflow for writing user stories or requirements?
27 | 
28 | ## **4. Participatory Prompting and Turn-Taking (Duration: <30 minutes>)**
29 | 
30 | ### **4.1 Interactive Engagement with LLM-Assisted Writing**
31 | - Now, I will present you with a user story writing prompt or a tool-generated suggestion.
32 | - As you review it, please consider:
33 |   - Did you find this useful? Why or why not?
34 |   - Did anything in the response confuse, surprise, or inspire you?
35 | - Let’s refine your user story or requirement together:
36 |   - What would you like to know next? What additional information would make this more useful?
37 |   - Would you rephrase your query to improve the result? How would you modify it?
38 | - If necessary, I may guide you through an alternative approach:
39 |   - Let’s test an alternative method by modifying the prompt this way (X). What do you think?
40 |   - I propose continuing with (X). Would you have preferred a different approach?
41 | - If the result isn’t what you expected:
42 |   - I see an issue with (X). If we modify the prompt, we might retrieve the data you need. Would you like to proceed?
43 | - If you’re unsure how to proceed, try thinking beyond traditional search-based methods:
44 |   - Imagine you’re discussing this problem with a colleague. How would you refine your request to get clearer insights?
45 | 
46 | ## **5. Post-Activity Interview (Duration: <15 minutes>)**
47 | 
48 | ### **5.1 Evaluating Your Experience and Potential Integration**
49 | - Now, let’s reflect on your experience using this tool.
50 | - How do you see a tool like this fitting into your current workflow?
51 | - If you mentioned that a specific capability (*e.g., If it could do X, it would be helpful*), let’s explore that now and get your thoughts.
52 | - How does this LLM-assisted workflow compare to your usual method of writing user stories or requirements?
53 | - When you found a generated response surprising or inspiring, what additional features or capabilities would help you explore that inspiration further?
54 | - What features do you think would make these inspiring moments more frequent or effective (e.g., suggested prompts, structured refinements)?
55 | - How do you think your overall approach to writing user stories or requirements would change with a tool like this?
56 | - Were there any barriers or frustrations that prevented you from fully exploring your query?
57 | - What do you think are the advantages and disadvantages of using a conversational, chat-based interface for writing user stories or requirements?
58 | 
59 | ## **6. Concluding Remarks**
60 | 
61 | ### **6.1 Wrapping Up the Session**
62 | - Thank you for your time and valuable insights!
63 | - This concludes our session. Your feedback is incredibly important and will help improve this process.
64 | - Before we end, do you have any final thoughts or questions?
65 | - If necessary, I will follow up with any additional information based on our discussion.
66 | - Once again, I appreciate your participation. Have a great day!
67 | 


--------------------------------------------------------------------------------
/assets/user_study/User_Needs_for_the_LLM_assisted_Task_Assisting_in_User_Story_Creation.md:
--------------------------------------------------------------------------------
  1 | # Ontology Prompt Templates
  2 | 
  3 | ## **Ontology Domain Specification**
  4 | ```plaintext
  5 | The ontology's domain is **[DOMAIN]**.
  6 | 
  7 | Ensure all subsequent responses strictly adhere to the scope of my domain. Use ontology-specific terminology and align responses with relevant ontological structures, relationships, and reasoning principles.
  8 | ```
  9 | 
 10 | ## **Persona Refinement**
 11 | ```plaintext
 12 | My persona is **[NAME]**, I am **[AGE]** years old, and I work as a **[OCCUPATION]** in the domain of **[DOMAIN]**. My expertise includes **[SKILLS]**, and I have a strong interest in **[INTERESTS]**.
 13 | 
 14 | Please refine my response to ensure:
 15 | 
 16 | 1. The persona is clearly described with a focus on ontology-related expertise.
 17 | 2. The details (name, age, occupation, skills, interests) are logically aligned with domain ontologies and structured knowledge representation.
 18 | 3. If the skills and interests provided by the user are similar and hard to differentiate, generate distinct ones by distinguishing between conceptual knowledge (e.g., ontological modeling) and applied techniques (e.g., knowledge graph construction).
 19 | 4. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*, ensuring clarity in an ontology-driven context.
 20 | 5. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH]*.
 21 | ```
 22 | 
 23 | ## **User Goal Refinement**
 24 | ```plaintext
 25 | My ontology user goal is **[USER GOAL DESCRIPTION]**.
 26 | 
 27 | Please refine my response to ensure:
 28 | 
 29 | 1. The goal is clearly described and ontology-driven, emphasizing knowledge structuring, reasoning, or interoperability.
 30 | 2. Expand the goal description, if necessary, to ensure it starts with a high-level *[KNOWLEDGE REPRESENTATION OBJECTIVE]* followed by a specific *[ONTOLOGY-SUPPORTED TASK]*.
 31 | 3. Ensure the goal description aligns with my domain expertise and the role of ontologies in solving real-world problems.
 32 | 4. The focus remains solely on ontology-related objectives, avoiding unrelated technical or system-level descriptions.
 33 | 5. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 34 | 6. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.
 35 | ```
 36 | 
 37 | ## **Action Refinement**
 38 | ```plaintext
 39 | The ontology-driven actions I need to take to achieve my goal are **[ACTIONS]**.
 40 | 
 41 | Please refine my response to ensure:
 42 | 
 43 | 1. The actions are clearly described and ontology-specific.
 44 | 2. The actions are actionable and presented as a logical sequence of steps related to ontology design, integration, or reasoning.
 45 | 3. Each step includes specific *[ONTOLOGY ENGINEERING METHODS]*, *[KNOWLEDGE REPRESENTATION TOOLS]*, or *[INFERENCE TECHNIQUES]* and is practical.
 46 | 4. Where relevant, incorporate *[INTERDISCIPLINARY INSIGHTS]* to describe the role of ontologies in broader knowledge management.
 47 | 5. Ensure the actions align logically with my expertise and ontology development goals.
 48 | 6. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 49 | 7. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.
 50 | ```
 51 | 
 52 | ## **Keyword Refinement**
 53 | ```plaintext
 54 | The key ontology concepts and terms related to my goal are **[KEYWORDS]**.
 55 | 
 56 | Please refine my response to ensure:
 57 | 
 58 | 1. The keywords are clearly defined and ontology-relevant.
 59 | 2. Each keyword directly supports knowledge structuring and reasoning.
 60 | 3. Keywords are aligned with *[EXISTING ONTOLOGY VOCABULARIES]* or *[STANDARDS]*.
 61 | 4. Keywords are expanded with related ontological terms (e.g., subclass relationships, linked data mappings).
 62 | 5. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 63 | 6. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.
 64 | ```
 65 | 
 66 | ## **Current Methods Refinement**
 67 | ```plaintext
 68 | The current methods I use for knowledge representation and reasoning are **[CURRENT METHODS]**.
 69 | 
 70 | Please refine my response to ensure:
 71 | 
 72 | 1. The methods are clearly described and align with ontology-based workflows.
 73 | 2. The methods must involve manual or ad-hoc knowledge structuring efforts.
 74 | 3. The methods are actionable and presented as a logical sequence of ontology modeling or reasoning steps.
 75 | 4. Each step includes specific *[ONTOLOGY MODELING TOOLS]*, *[KNOWLEDGE REPRESENTATION LANGUAGES]*, or *[LOGICAL INFERENCE TECHNIQUES]*.
 76 | 5. Where relevant, incorporate *[INTERDISCIPLINARY INSIGHTS]* to describe the challenges in integrating knowledge across domains.
 77 | 6. The methods align logically with my expertise and goals.
 78 | 7. The methods include only knowledge structuring tasks, without discussing challenges, new methods, or expected outcomes.
 79 | 8. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 80 | 9. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.
 81 | ```
 82 | 
 83 | ## **Challenges Refinement**
 84 | ```plaintext
 85 | The challenges I face in ontology-based knowledge representation are **[CHALLENGES]**.
 86 | 
 87 | Please refine my response to ensure:
 88 | 
 89 | 1. The challenges are clearly described and relate to ontology structuring, reasoning, or interoperability.
 90 | 2. Each challenge should be derived from *[CURRENT TOOLS]*, *[METHODOLOGIES]*, or *[LOGICAL CONSTRAINTS]* used.
 91 | 3. Where relevant, incorporate *[ONTOLOGY INTEGRATION CHALLENGES]* related to external knowledge bases.
 92 | 4. The challenges align logically with my domain expertise and ontology development.
 93 | 5. The challenges include only the core difficulties, without discussing solutions or expected improvements.
 94 | 6. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 95 | 7. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.
 96 | ```
 97 | 
 98 | ## **New Methods Refinement**
 99 | ```plaintext
100 | The ontology-based methods I will use to address my challenges are **[NEW METHODS]**.
101 | 
102 | Please refine my response to ensure:
103 | 
104 | 1. The new methods are clearly described and ontology-driven.
105 | 2. The new methods align with what an ontology-based system can offer, such as automated reasoning, semantic search, or ontology alignment.
106 | 3. The methods align logically with my goals and expected improvements.
107 | 4. The methods include only knowledge representation enhancements, without discussing previous challenges or outcomes.
108 | 5. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
109 | 6. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.
110 | ```
111 | 
112 | ## **Expected Outcomes Refinement**
113 | ```plaintext
114 | The expected outcomes of implementing ontology-driven methods are **[OUTCOMES]**.
115 | 
116 | Please refine my response to ensure:
117 | 
118 | 1. The outcomes are clearly described and ontology-relevant.
119 | 2. The outcomes must align with the benefits of a structured knowledge representation system.
120 | 3. The outcomes align logically with my overall goal.
121 | 4. The outcomes include only ontology-driven improvements, without discussing previous methods or challenges.
122 | 5. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
123 | 6. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.
124 | ```
125 | 


--------------------------------------------------------------------------------
/data/Linka#1_MusicKnowledge.md:
--------------------------------------------------------------------------------
 1 | # Music Knowledge
 2 | 
 3 | ## Persona
 4 | 
 5 | Linka is a researcher in Computer Science.
 6 | 
 7 | ## Goal
 8 | *Keywords: music data, multi-modal data, knowledge discovery*
 9 | 
10 | Linka is interested in finding and describing music-related data of heterogeneous types and formats from different sources. Thanks to the ability to represent diverse content, her main goal is to inter-relate music data so as to enable large scale experiments for knowledge discovery.
11 | 
12 | 
13 | ## Scenario
14 | 
15 | Linka is carrying out a lot of manual work to find music data on the Web from different datasets, catalogues, platforms and services. In addition, all this data needs to be carefully inspected so that information retrieved from a certain source is matched to that of another source, in a consistent manner. Although this can be done for a few musical objects of interest (e.g. information related to an album), the analytical and manual process becomes unmanageable and tremendously time-consuming when the scope of the analysis increases (e.g. hundreds of songs). In turn, this jeopardises the possibility of conducting large scale computational studies, which are fundamental for knowledge discovery -- establishing links among musical objects and entities to unveil novel relationships[^1].
16 | 
17 | Thanks to Polifonia, Linka will be able to perform all the aforementioned operations in an automatic manner and with little supervision. Starting from some desiderata -- names of artists, composers, musical pieces, albums -- the system will attempt to find pertinent music-related data from a specific set of resources (see below). Besides the computational requirements of the system for finding relevant content, this requires to semantically describe the retrieved data. 
18 | 
19 | 
20 | 
21 | ## Example Data
22 | 
23 | For simplicity, music-related data can be categorised as: cataloguing information, musical features, musical facts, and musical relationships.
24 | 
25 | **Cataloguing information**
26 | 
27 | Information needed to identify a musical work as well as its relationships with the recordings that are derived from it. For example:
28 | - "Penny Lane" is a musical work by "The Beatles",
29 | - "Penny Lane" was composed by "John Lennon" and "Paul McCartney";
30 | - The first recording of "Penny Lane" was released in February 1967 as a double A-side single with "Strawberry Fields Forever".
31 | 
32 | Linka is interested in tracing the basic discographic information associated to the recordings of a certain musical work, including: the year of release, the name of the release, the place of recording, the instruments/performers relationships, and the vocals. Example data to address this category can be found from [MusicBrainz] and [Wikidata].
33 | 
34 | - The specific track of "Penny Lane" featured in the album "Strawberry Fields Forever" was recorded in "Abbey Road Studios: Studio 2" in "St John's Wood, Westminster, London (UK)" on 1966-12-29, 1967-01-05, 1967-01-06, 1967-01-09, and 1967-01-17.
35 | - The album "Strawberry Fields Forever" has been originally released in the UK on 1967-02-17.
36 | - The background and lead vocals of "Penny Lane" are by "Paul McCartney".
37 | - The "tambourine" is played by "Ringo Starr".
38 | 
39 | 
40 | **Musical features**
41 | 
42 | Musical features can range from simple tags associated with a musical work or a recording (e.g. genre, style) to the tonality, tempo, and lyrics of the piece. Music tags are generally provided by all databases mentioned in the Resources section below; tonality and tempo are given by [Wikidata] and [MusicBrainz] through [AcousticBrainz][-2]; lyrics are provided by [Genius] and [SongFacts]. Please, note that Linka aims at modelling lyrics at the sentence level (bottom level) and also at the sectional level (top level). This last consideration is not fully captured by the example reported below, although the data provided by [Genius] and [SongFacts] allows for this organisation.
43 | 
44 | - The musical work "Penny Lane" has genres/style "baroque pop" and "psychedelic pop".
45 | - The "original mono studio mix" of "Penny Lane" featured in "Strawberry Fields Forever" is in B minor (tonality = key + scale) and its tempo is 115 BPM.
46 | - The (original) lyrics of "Penny Lane" are "In Penny Lane there is a barber showing photographs + "\n" + Of every head he's had the pleasure to know ..."
47 | 
48 | **Musical facts**
49 | 
50 | These are usually considered as textual explanations of the lyrics (e.g. "Four of fish and finger pies" in "Penny Lane" is explained by Genius as a "common order size for fish and chips, that is what you could get with four pence..."), curiosities on the composition process (e.g. "The piquant trumpet part was added after the rest of the song was finished."), pertinent artist-related facts (e.g. "Lennon's mother Julia at one time worked as a cinema usher and a waitress in Penny Lane."), but also relationships to places and their meaning in the lyrics (e.g. "Penny Lane, Liverpool, England"). These can be found from [Genius] and [SongFacts].
51 | 
52 | 
53 | **Musical relationships**
54 | 
55 | Establishing connections among pieces and recordings based on objective and/or subjective criteria and musical properties. These include derivative works (e.g. covers, medleys, mesh-ups), and sampling relationships where a certain musical pattern -- originally set/induced in a musical work, is found in other pieces. Examples of the former can be found from [SecondHandSongs] and [MusicBrainz], whereas [WhoSampled] provides data for the latter.
56 | 
57 | - "Penny Lane" is referred to in the following medleys: "A Liverpool Day", "She's leaving home / Penny Lane" and "The Music of the Beatles".
58 | - "Penny Lane" was translated in Finnish with "Rööperiin".
59 | - An instrumental version of "Penny Lane" was released by Al Di Meola in September 2013.
60 | - "Penny Lane" was covered by "Kai Warner" in 1976.
61 | - "Penny Lane" was sampled by "The Rutles" in "Doubleback Alley". The sample appears at 1:16, 1:35, 1:43, and 2:35, corresponding to  1:12, 1:27, 1:33, and 2:41 in the original performance.
62 | 
63 | ## Data resources
64 | 
65 | An example of music-related data for the same song (Penny Lane) is given below.
66 | 
67 | * MusicBrainz: https://musicbrainz.org/work/1548c8c0-108f-33a8-9671-a025aa68e382
68 | * Wikidata: https://www.wikidata.org/wiki/Q842168
69 | * Genius: https://genius.com/1376466
70 | * SongFacts: https://www.songfacts.com/facts/the-beatles/penny-lane
71 | * SecondHandSongs: https://secondhandsongs.com/work/8937/
72 | * WhoSampled: https://www.whosampled.com/The-Beatles/Penny-Lane/
73 | 
74 | [MusicBrainz]: https://musicbrainz.org/work/1548c8c0-108f-33a8-9671-a025aa68e382
75 | [Wikidata]: https://www.wikidata.org/wiki/Q842168
76 | [Genius]: https://genius.com/1376466
77 | [SongFacts]: https://www.songfacts.com/facts/the-beatles/penny-lane
78 | [SecondHandSongs]: https://secondhandsongs.com/work/8937/
79 | [WhoSampled]: https://www.whosampled.com/The-Beatles/Penny-Lane/
80 | 
81 | 
82 | [^1] This use case is the specific scope of another story: Linka#2 (coming soon).
83 | 
84 | [-2] AcousticBrainz will be shut down (news from Mid-Feb 2022).
85 | 


--------------------------------------------------------------------------------
/data/music_meta_cqs.txt:
--------------------------------------------------------------------------------
 1 | Which is the composer of a musical piece?
 2 | Is the composer of a musical piece known?
 3 | Which are the members of a music ensemble?
 4 | Which role a music artist played within a music ensemble?
 5 | In which time interval has a music artist been a member of a music ensemble?
 6 | Where was a music ensemble formed?
 7 | Which award was a music artist nominated for?
 8 | Which award was received by a music artist?
 9 | Which music artists has a music artist been influenced by?
10 | Which music artist has a music artist collaborated with?
11 | Which is the start date of the activity of a music artist?
12 | Which is the end date of the activity of a music artist?
13 | Which is the name of a music artist?
14 | Which is the alias of a music artist?
15 | Which is the language of the name/alias of a music artist?
16 | Which music dataset has a music algorithm been trained on?
17 | Which is the process that led to the creation of a musical piece?
18 | In which time interval did the creation process took place?
19 | Where did the creation process took place?
20 | Which are the creative actions composing the creation process of a musical piece?
21 | Which task was executed by a creative action?
22 | Which are the parts of a musical piece?
23 | Which collection is a musical piece member of?
24 | Where was a musical piece performed?
25 | When was a musical piece performed?
26 | Which music artists took part to a musical performance?
27 | Which is the recording process that recorded a musical performance?
28 | Which is the recording produced by a recording process?


--------------------------------------------------------------------------------
/data/music_meta_cqs_neg.txt:
--------------------------------------------------------------------------------
 1 | In which year did a music artist achieve their highest popularity within a music ensemble?
 2 | To which record label is a music ensemble currently signed?
 3 | Which music critic gave a music artist a negative review?
 4 | For which category did a music artist lose an award?
 5 | Which music artists are considered rivals of a music artist?
 6 | To which music artist has a music artist been compared?
 7 | Is a music artist currently touring?
 8 | What is the title of a music artist's most popular song?
 9 | Which language is most used in a music artist's lyrics?
10 | Does a music algorithm favor a specific genre?
11 | What was the intended audience for a musical piece?
12 | Which emotions does the musical piece attempt to evoke?
13 | Who are the parents of a music artist?
14 | Where did a music artist study?
15 | Is a music work associated to any case of plagiarism?
16 | On which streaming platforms can I find a recording?
17 | How many listeners did a piece receive in a specific country?
18 | Is a piece similar to another?
19 | When was the album first sold?
20 | Who is the lead of a music group?
21 | Was a song wronly attributed to an artist?
22 | What is the cover art of a release?
23 | What is the sampling rate of an audio recording?
24 | In which language has a song been translated?
25 | What is the ISNI code associated to a music artist?
26 | What is the length of a recording in seconds?
27 | Is there any recommendation associated to an album?
28 | What is the current average price of a release?


--------------------------------------------------------------------------------
/ontochat/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/King-s-Knowledge-Graph-Lab/OntoChat/85b1f929592932787ebbf55f6ec431bc5a736914/ontochat/__init__.py


--------------------------------------------------------------------------------
/ontochat/analysis.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Competency questions analysis functions
  3 | Partially inherited from [idea](https://github.com/polifonia-project/idea)
  4 | """
  5 | 
  6 | import ast
  7 | import io
  8 | import re
  9 | from collections import defaultdict
 10 | 
 11 | import numpy as np
 12 | 
 13 | from PIL import Image
 14 | from matplotlib import pyplot as plt
 15 | 
 16 | from sentence_transformers import SentenceTransformer
 17 | from sklearn.cluster import AgglomerativeClustering
 18 | from scipy.cluster.hierarchy import dendrogram
 19 | 
 20 | from ontochat.chatbot import chat_completion
 21 | 
 22 | 
 23 | def preprocess_competency_questions(cqs):
 24 |     # preprocess competency questions: string -> list of strings
 25 |     cqs = cqs.split("\n")
 26 |     # # keep index
 27 |     # cqs = [re.split(r'\.\s', cq, 1) for cq in cqs]
 28 |     # cqs = [{cq[0]: cq[1]} for cq in cqs]
 29 |     # cqs = [re.split(r'\.\s', cq, 1)[1] for cq in cqs]
 30 | 
 31 |     # clean
 32 |     cleaned_cqs = []
 33 |     for q in cqs:  # FIXME to move
 34 |         # Collapse complex questions in a sentence
 35 |         q = q.replace("\n", "; ")
 36 |         # Remove tabular occurrences for metadata
 37 |         q = q.replace("\t", " ")
 38 |         # Collapse multiple empty spaces
 39 |         q = re.sub(r"[ ]+", " ", q)
 40 |         # Discard inconsistent punctuation
 41 |         q = re.sub(r";[ ]*;", ";", q)
 42 |         cleaned_cqs.append(q)
 43 | 
 44 |     return cleaned_cqs
 45 | 
 46 | 
 47 | def compute_embeddings(cqs, model="all-MiniLM-L6-v2", device="cpu"):
 48 |     """
 49 |     Compute sentence-level embeddings of competency questions
 50 | 
 51 |     :param cqs:
 52 |     :param model:
 53 |     :param device:
 54 |     :return:
 55 |     """
 56 |     cleaned_cqs = preprocess_competency_questions(cqs)
 57 | 
 58 |     model = SentenceTransformer(model, device=device)
 59 |     embeddings = model.encode(cleaned_cqs)
 60 | 
 61 |     # Normalisation of CQ embeddings to unit length
 62 |     embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
 63 | 
 64 |     return cleaned_cqs, embeddings
 65 | 
 66 | 
 67 | def agglomerative_clustering(cqs, embeddings, n_clusters=None, metric="euclidean", distance_threshold=None):
 68 |     """
 69 | 
 70 |     :param cqs:
 71 |     :param embeddings:
 72 |     :param n_clusters:
 73 |     :param metric:
 74 |     :param distance_threshold:
 75 |     :return:
 76 |     """
 77 |     clustering_model = AgglomerativeClustering(
 78 |         n_clusters=n_clusters,
 79 |         metric=metric,
 80 |         distance_threshold=distance_threshold,
 81 |         compute_distances=True
 82 |     )
 83 |     clustering_model.fit(embeddings)
 84 |     cluster_assignment = clustering_model.labels_
 85 | 
 86 |     clustered_cqs = defaultdict(list)
 87 |     for sentence_id, cluster_id in enumerate(cluster_assignment):
 88 |         clustered_cqs[str(cluster_id)].append(cqs[sentence_id])
 89 | 
 90 |     pil_image = plot_dendrogram(
 91 |         clustering_model,
 92 |         orientation='right',
 93 |         labels=list(range(1, len(cqs) + 1)),
 94 |         # labels=cqs,
 95 |         truncate_mode=None,
 96 |         # p=3,
 97 |         show_leaf_counts=False,
 98 |     )
 99 | 
100 |     return clustered_cqs, pil_image
101 | 
102 | 
103 | def plot_dendrogram(model, **kwargs):
104 |     """ Create linkage matrix and then plot the dendrogram
105 |     source: https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html
106 | 
107 |     :param model:
108 |     :param kwargs:
109 |     :return:
110 |     """
111 |     # create the counts of samples under each node
112 |     counts = np.zeros(model.children_.shape[0])
113 |     n_samples = len(model.labels_)
114 |     for i, merge in enumerate(model.children_):
115 |         current_count = 0
116 |         for child_idx in merge:
117 |             if child_idx < n_samples:
118 |                 current_count += 1  # leaf node
119 |             else:
120 |                 current_count += counts[child_idx - n_samples]
121 |         counts[i] = current_count
122 | 
123 |     linkage_matrix = np.column_stack(
124 |         [model.children_, model.distances_, counts]
125 |     ).astype(float)
126 | 
127 |     # Plot the corresponding dendrogram
128 |     plt.tight_layout()
129 |     # plt.figure(figsize=(40, 20))
130 |     dendrogram(linkage_matrix, **kwargs)
131 |     # plt.subplots_adjust(left=0.25, right=1.025, top=0.9, bottom=0.075)
132 |     # plt.savefig(figsave_path)
133 |     # plt.show()
134 |     # convert the figure into a PIL image
135 |     fig = plt.gcf()
136 |     buf = io.BytesIO()
137 |     fig.savefig(buf)
138 |     buf.seek(0)
139 |     return Image.open(buf)
140 | 
141 | 
142 | def response_parser(response):
143 |     try:
144 |         response = ast.literal_eval(response)
145 |     except (ValueError, TypeError, SyntaxError):
146 |         response = ""
147 |     return response
148 | 
149 | 
150 | def llm_cq_clustering(cqs, n_clusters, api_key, paraphrase_detection=False):
151 |     """
152 | 
153 |     :param cqs:
154 |     :param n_clusters:
155 |     :param api_key:
156 |     :param paraphrase_detection:
157 |     :return:
158 |     """
159 |     conversation_history = [
160 |         {"role": "system", "content": "You are an ontology engineer."}
161 |     ]
162 |     # paraphrase detection before clustering
163 |     if paraphrase_detection:
164 |         # 1. paraphrase detection
165 |         prompt_1 = "Perform paraphrase detection for the following competency questions: {}. " \
166 |                    "Return a Python list of duplicate competency questions.".format(cqs)
167 | 
168 |         conversation_history.append({"role": "user", "content": prompt_1})
169 |         response = chat_completion(api_key, conversation_history)
170 |         print("{} CQs remaining after paraphrase detection.".format(len(cqs) - len(response_parser(response))))
171 | 
172 |         # 2. clustering
173 |         if n_clusters:
174 |             prompt_2 = f"Clustering the competency questions into {n_clusters} clusters based on their topics. " \
175 |                         "Keep the granularity of the topic in each cluster at a similar level. " \
176 |                         "Return in JSON format, such as: {'cluster 1 topic': " \
177 |                         "['competency question 1', 'competency question 2']}:"
178 |         else:
179 |             prompt_2 = f"Clustering the competency questions into clusters based on their topics. " \
180 |                        "Keep the granularity of the topic in each cluster at a similar level. " \
181 |                        "Return in JSON format, such as: {'cluster 1 topic': " \
182 |                        "['competency question 1', 'competency question 2']}:"
183 |         conversation_history.append({"role": "assistant", "content": response})  # previous response
184 |         conversation_history.append({"role": "user", "content": prompt_2})
185 |         response = chat_completion(api_key, conversation_history)
186 |         # print("Output is: \"{}\"".format(response))
187 | 
188 |     else:  # clustering only
189 |         if n_clusters:
190 |             prompt_2 = f"Given the competency questions: {cqs}, clustering them into {n_clusters} clusters based on " \
191 |                        f"the topics."
192 |         else:
193 |             prompt_2 = f"Given the competency questions: {cqs}, clustering them into clusters based on the topics."
194 |         prompt_2 += "Keep the granularity of the topic in each cluster at a similar level. " \
195 |                     "Return in JSON format, such as: {'cluster 1 topic': " \
196 |                     "['competency question 1', 'competency question 2']}:"
197 |         conversation_history.append({"role": "user", "content": prompt_2})
198 |         response = chat_completion(api_key, conversation_history)
199 |         # print("Output is: \"{}\"".format(response))
200 | 
201 |     # # 3. assign labels
202 |     # prompt_2 = "Clustering the competency questions based on their topics. Return in JSON format, " \
203 |     #            "such as: {'cluster 1 topic': ['competency question 1', 'competency question 2']}:"
204 |     # conversation_history.append({"role": "assistant", "content": response})  # previous response
205 |     # conversation_history.append({"role": "user", "content": prompt_2})
206 |     # response = chat_completion(conversation_history)
207 |     # response = response.choices[0].message.content
208 |     # print("Output is: \"{}\"".format(response))
209 | 
210 |     return response_parser(response), Image.new("RGB", (640, 480), (255, 255, 255))
211 | 


--------------------------------------------------------------------------------
/ontochat/chatbot.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | from openai import APIConnectionError, APITimeoutError, AuthenticationError, RateLimitError
 3 | 
 4 | from ontochat.config import DEFAULT_MODEL, DEFAULT_SEED, DEFAULT_TEMPERATURE
 5 | 
 6 | 
 7 | def chat_completion(api_key, messages):
 8 |     client = OpenAI(api_key=api_key)
 9 |     try:
10 |         response = client.chat.completions.create(
11 |             model=DEFAULT_MODEL,
12 |             messages=messages,
13 |             seed=DEFAULT_SEED,
14 |             temperature=DEFAULT_TEMPERATURE,
15 |         )
16 |     except APITimeoutError as e:
17 |         return f"Request timed out. Retry your request after a brief wait. Error information: {e}"
18 |     except APIConnectionError as e:
19 |         return f"Issue connecting to our services. Check your network settings, proxy configuration, " \
20 |                f"SSL certificates, or firewall rules. Error information: {e}"
21 |     except AuthenticationError as e:
22 |         return f"Your API key or token was invalid, expired, or revoked. Error information: {e}"
23 |     except RateLimitError as e:
24 |         return f"You have hit your assigned rate limit. Error information: {e}"
25 |     return response.choices[0].message.content
26 | 
27 | 
28 | def build_messages(history):
29 |     """
30 |     Convert Gradio Chatbot history to OpenAI client messages
31 |     :param history: List of dictionaries with 'role' and 'content' keys
32 |     :return: List of OpenAI client messages
33 |     """
34 |     messages = []
35 |     for item in history:
36 |         messages.append({"role": item["role"], "content": item["content"]})
37 |     return messages
38 | 


--------------------------------------------------------------------------------
/ontochat/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains macro, works as a config file before we create one.
3 | """
4 | 
5 | DEFAULT_MODEL = "gpt-4o"
6 | DEFAULT_TEMPERATURE = 0
7 | DEFAULT_SEED = 1234
8 | 


--------------------------------------------------------------------------------
/ontochat/cq_generator_ontochat_app.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Improved Scenario CQ generator – Groups by UNIQUE scenarios or dataset entries.
  3 | Sends exactly ONE request per distinct scenario (or dataset when scenario is missing)
  4 | and concatenates all Gold‑Standard CQs found for that scenario with "; ".
  5 | #uvicorn cq_generator_ontochat_app:app --host 127.0.0.1 --port 8003
  6 | 
  7 | """
  8 | 
  9 | from fastapi import FastAPI, File, UploadFile, HTTPException, Response
 10 | from gradio_client import Client
 11 | from io import StringIO
 12 | import pandas as pd, openai, logging, os, time, datetime, pathlib, json
 13 | 
 14 | # ─────────────────────────── logging ───────────────────────────
 15 | logging.basicConfig(
 16 |     level=logging.INFO,
 17 |     format="%(asctime)s │ %(levelname)-8s │ %(message)s",
 18 | )
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | # ──────────────────────────  clients  ──────────────────────────
 22 | ONTOCHAT   = Client("b289zhan/OntoChat")
 23 | _OPENAIKEY = os.getenv("OPENAI_API_KEY")
 24 | 
 25 | # prime OntoChat **once** with your key
 26 | if _OPENAIKEY:
 27 |     try:
 28 |         ONTOCHAT.predict(_OPENAIKEY, api_name="/set_openai_api_key")
 29 |         logger.info("✓ OpenAI key stored in OntoChat session")
 30 |     except Exception as e:
 31 |         logger.warning(f"Could not set key in OntoChat: {e}")
 32 | 
 33 | # ───────────────────── helper functions ───────────────────────
 34 | def _prompt_for(scenario: str) -> str:
 35 |     return (
 36 |         "Here is a user scenario for ontology engineering:\n"
 37 |         f"\"\"\"\n{scenario}\n\"\"\"\n\n"
 38 |         "Please generate **up to five** competency questions for this scenario "
 39 |         "and return them on **one single line, separated by a semicolon** (;)."
 40 |     )
 41 | 
 42 | HISTORY = [[None,
 43 |    ("I am OntoChat, your conversational ontology engineering assistant…")
 44 | ]]
 45 | 
 46 | def _query_ontochat(prompt: str, retry: int = 3, wait: float = 8.0) -> str:
 47 |     for attempt in range(1, retry + 1):
 48 |         try:
 49 |             ans = ONTOCHAT.predict(prompt, HISTORY, api_name="/cq_generator")[0]
 50 |             return ans.strip()
 51 |         except Exception as e:
 52 |             logger.warning(f"OntoChat attempt {attempt}/{retry} failed: {e}")
 53 |             if attempt == retry: raise
 54 |             time.sleep(wait * attempt)
 55 | 
 56 | def _fallback_openai(prompt: str) -> str:
 57 |     try:
 58 |         resp = openai.chat.completions.create(
 59 |             model="gpt-4o-mini",
 60 |             messages=[
 61 |                 {"role": "system",
 62 |                  "content": "You generate up to five competency questions; "
 63 |                             "return them on one line separated by ';'."},
 64 |                 {"role": "user", "content": prompt},
 65 |             ],
 66 |             max_tokens=120,
 67 |             temperature=0.4,
 68 |         )
 69 |         return resp.choices[0].message.content.strip()
 70 |     except Exception as e:
 71 |         return f"OpenAI fallback error: {e}"
 72 | 
 73 | # ─────────────────────────── FastAPI ───────────────────────────
 74 | app = FastAPI(
 75 |     title="Improved Scenario CQ Generator",
 76 |     version="2.3.0",
 77 |     description="Generates up‑to‑five CQs per UNIQUE scenario or dataset entry.",
 78 | )
 79 | 
 80 | @app.post("/newapi/")
 81 | async def generate_cqs(file: UploadFile = File(...)):
 82 |     logger.info(f"★ /newapi/ received {file.filename!r}")
 83 | 
 84 |     try:
 85 |         content = await file.read()
 86 |         df = pd.read_csv(StringIO(content.decode("utf-8")))
 87 |         # Keep a copy of the original DataFrame to preserve all rows
 88 |         original_df = df.copy()
 89 |     except Exception as e:
 90 |         raise HTTPException(400, f"CSV read error: {e}")
 91 | 
 92 |     # Check available columns in the dataset
 93 |     has_scenario_column = "Scenario" in df.columns
 94 |     has_cq_column = "Competency Question" in df.columns
 95 | 
 96 |     # Determine the main grouping column
 97 |     if has_scenario_column:
 98 |         logger.info("Using 'Scenario' as primary grouping column")
 99 |         grouping_column = "Scenario"
100 |     else:
101 |         # Look ONLY for dataset column
102 |         dataset_columns = [col for col in df.columns if col.lower() not in ["competency question"]
103 |                           and "dataset" in col.lower()]
104 | 
105 |         if dataset_columns:
106 |             grouping_column = dataset_columns[0]
107 |             logger.info(f"No 'Scenario' column found, using '{grouping_column}' as primary grouping column")
108 |         else:
109 |             raise HTTPException(400, "CSV must contain a 'Scenario' column or a column with 'dataset' in its name")
110 | 
111 |     # Create a unique key for each row
112 |     # If scenario exists, use it; otherwise use the alternative column
113 |     df["grouping_key"] = df[grouping_column].fillna("").astype(str).str.strip()
114 | 
115 |     # When grouping key is empty and we have a CQ, use the CQ as the key
116 |     if has_cq_column:
117 |         mask = (df["grouping_key"] == "") & (df["Competency Question"].notna())
118 |         df.loc[mask, "grouping_key"] = "CQ: " + df.loc[mask, "Competency Question"].astype(str).str.strip()
119 | 
120 |     # Skip completely empty rows (no key and no CQ)
121 |     df = df[df["grouping_key"] != ""]
122 | 
123 |     # Now group by the unique keys
124 |     groups = df.groupby("grouping_key", dropna=False)
125 | 
126 |     logger.info(f"Found {len(groups)} unique entries to process")
127 | 
128 |     # Process each unique group once
129 |     processed_results = {}
130 | 
131 |     for group_key, group_df in groups:
132 |         # Collect all gold standard CQs for this group
133 |         gold_cqs = []
134 |         if has_cq_column:
135 |             gold_cqs = [str(cq).strip() for cq in group_df["Competency Question"].dropna()
136 |                         if str(cq).strip()]
137 | 
138 |         gold_combined = " ; ".join(gold_cqs) if gold_cqs else ""
139 | 
140 |         # If the group key starts with "CQ:", we're using a competency question as the key
141 |         if group_key.startswith("CQ:"):
142 |             prompt_text = group_key[3:].strip()  # Remove the "CQ:" prefix
143 |             logger.info(f"Using CQ as source for generation: [{prompt_text[:40]}...]")
144 |             prompt = _prompt_for(f"Based on this competency question: {prompt_text}")
145 |         else:
146 |             prompt_text = group_key
147 |             logger.info(f"Processing unique entry: [{prompt_text[:40]}...]")
148 |             prompt = _prompt_for(prompt_text)
149 | 
150 |         # Generate questions
151 |         try:
152 |             generated = _query_ontochat(prompt)
153 |             logger.info(f"✓ CQ(s) generated for: [{prompt_text[:40]}...]")
154 |         except Exception as e:
155 |             if _OPENAIKEY:
156 |                 generated = _fallback_openai(prompt)
157 |                 logger.info(f"✓ Generated via OpenAI fallback")
158 |             else:
159 |                 generated = f"Error generating CQ: {e}"
160 |                 logger.error(generated)
161 | 
162 |         # Store the results for this unique group
163 |         processed_results[group_key] = {
164 |             "gold standard": gold_combined,
165 |             "generated": generated
166 |         }
167 | 
168 |         # Gentle pacing
169 |         time.sleep(2)
170 | 
171 |     # Now map the results back to the original DataFrame rows
172 |     results = []
173 | 
174 |     # For each original row, find its corresponding processed result
175 |     for _, row in original_df.iterrows():
176 |         result_entry = {"gold standard": "", "generated": ""}
177 | 
178 |         # Determine the grouping key for this row
179 |         if has_scenario_column:
180 |             key = str(row.get("Scenario", "")).strip()
181 |         else:
182 |             key = str(row.get(grouping_column, "")).strip()
183 | 
184 |         # If key is empty but we have a CQ, use the CQ as key
185 |         if not key and has_cq_column:
186 |             cq = str(row.get("Competency Question", "")).strip()
187 |             if cq:
188 |                 key = f"CQ: {cq}"
189 | 
190 |         # Look up the processed result for this key
191 |         if key in processed_results:
192 |             result_entry = processed_results[key]
193 |         else:
194 |             # If no match, leave empty but record a specific gold standard if available
195 |             if has_cq_column and pd.notna(row.get("Competency Question")):
196 |                 result_entry["gold standard"] = str(row["Competency Question"]).strip()
197 |                 result_entry["generated"] = "No matching scenario or dataset entry found"
198 | 
199 |         results.append(result_entry)
200 | 
201 |     # Create the output DataFrame
202 |     out_df = pd.DataFrame(results)
203 |     logger.info(f"🏁 done – {len(out_df)} rows in the output dataset")
204 | 
205 |     # Also store the aggregate results (unique groups)
206 |     aggregate_results = []
207 |     for key, data in processed_results.items():
208 |         aggregate_results.append({
209 |             "grouping_key": key,
210 |             "gold standard": data["gold standard"],
211 |             "generated": data["generated"]
212 |         })
213 | 
214 |     aggregate_df = pd.DataFrame(aggregate_results)
215 | 
216 |     # Optional JSON dump
217 |     ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
218 |     path = pathlib.Path("results")
219 |     path.mkdir(exist_ok=True)
220 | 
221 |     # Store both the full results and aggregated results
222 |     (path / f"last_run_{ts}.json").write_text(
223 |         json.dumps(results, indent=2), encoding="utf-8"
224 |     )
225 | 
226 |     (path / f"last_run_aggregated_{ts}.json").write_text(
227 |         json.dumps(aggregate_results, indent=2), encoding="utf-8"
228 |     )
229 | 
230 |     # Also save the aggregated CSV file
231 |     (path / f"last_run_aggregated_{ts}.csv").write_text(
232 |         aggregate_df.to_csv(index=False), encoding="utf-8"
233 |     )
234 | 
235 |     logger.info(f"Saved both full results and aggregated results to 'results' folder")
236 | 
237 |     return Response(out_df.to_csv(index=False), media_type="text/csv")
238 | 
239 | 
240 | # ---------- dev run ----------
241 | if __name__ == "__main__":
242 |     import uvicorn
243 |     uvicorn.run("cq_generator_ontochat_app:app",
244 |                 host="127.0.0.1", port=8003)


--------------------------------------------------------------------------------
/ontochat/functions.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from ontochat.chatbot import chat_completion, build_messages
  3 | from ontochat.analysis import compute_embeddings, agglomerative_clustering, llm_cq_clustering
  4 | from ontochat.verbaliser import verbalise_ontology
  5 | import gradio as gr
  6 | 
  7 | openai_api_key = None
  8 | 
  9 | current_preidentified_prompts = []
 10 | current_preidentified = [
 11 |     """The ontology's domain is **[DOMAIN]**.
 12 | 
 13 |     Ensure all subsequent responses strictly adhere to the scope of my domain. Use ontology-specific terminology and align responses with relevant ontological structures, relationships, and reasoning principles.""",
 14 |        
 15 |     """My persona is **[NAME]**, I am **[AGE]** years old, and I work as a **[OCCUPATION]** in the domain of **[DOMAIN]**. My expertise includes **[SKILLS]**, and I have a strong interest in **[INTERESTS]**.
 16 | 
 17 |     Please refine my response to ensure:
 18 | 
 19 |     1. The persona is clearly described with a focus on ontology-related expertise.
 20 |     2. The details (name, age, occupation, skills, interests) are logically aligned with domain ontologies and structured knowledge representation.
 21 |     3. If the skills and interests provided by the user are similar and hard to differentiate, generate distinct ones by distinguishing between conceptual knowledge (e.g., ontological modeling) and applied techniques (e.g., knowledge graph construction).
 22 |     4. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*, ensuring clarity in an ontology-driven context.
 23 |     5. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH]*.""",
 24 | 
 25 |     """My ontology user goal is **[USER GOAL DESCRIPTION]**.
 26 | 
 27 |     Please refine my response to ensure:
 28 | 
 29 |     1. The goal is clearly described and ontology-driven, emphasizing knowledge structuring, reasoning, or interoperability.
 30 |     2. Expand the goal description, if necessary, to ensure it starts with a high-level *[KNOWLEDGE REPRESENTATION OBJECTIVE]* followed by a specific *[ONTOLOGY-SUPPORTED TASK]*.
 31 |     3. Ensure the goal description aligns with my domain expertise and the role of ontologies in solving real-world problems.
 32 |     4. The focus remains solely on ontology-related objectives, avoiding unrelated technical or system-level descriptions.
 33 |     5. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 34 |     6. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.""",
 35 | 
 36 |     """The ontology-driven actions I need to take to achieve my goal are **[ACTIONS]**.
 37 | 
 38 |     Please refine my response to ensure:
 39 | 
 40 |     1. The actions are clearly described and ontology-specific.
 41 |     2. The actions are actionable and presented as a logical sequence of steps related to ontology design, integration, or reasoning.
 42 |     3. Each step includes specific *[ONTOLOGY ENGINEERING METHODS]*, *[KNOWLEDGE REPRESENTATION TOOLS]*, or *[INFERENCE TECHNIQUES]* and is practical.
 43 |     4. Where relevant, incorporate *[INTERDISCIPLINARY INSIGHTS]* to describe the role of ontologies in broader knowledge management.
 44 |     5. Ensure the actions align logically with my expertise and ontology development goals.
 45 |     6. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 46 |     7. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.""",
 47 | 
 48 |     """The key ontology concepts and terms related to my goal are **[KEYWORDS]**.
 49 | 
 50 |     Please refine my response to ensure:
 51 | 
 52 |     1. The keywords are clearly defined and ontology-relevant.
 53 |     2. Each keyword directly supports knowledge structuring and reasoning.
 54 |     3. Keywords are aligned with *[EXISTING ONTOLOGY VOCABULARIES]* or *[STANDARDS]*.
 55 |     4. Keywords are expanded with related ontological terms (e.g., subclass relationships, linked data mappings).
 56 |     5. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 57 |     6. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.""",
 58 | 
 59 |     """The current methods I use for knowledge representation and reasoning are **[CURRENT METHODS]**.
 60 | 
 61 |     Please refine my response to ensure:
 62 | 
 63 |     1. The methods are clearly described and align with ontology-based workflows.
 64 |     2. The methods must involve manual or ad-hoc knowledge structuring efforts.
 65 |     3. The methods are actionable and presented as a logical sequence of ontology modeling or reasoning steps.
 66 |     4. Each step includes specific *[ONTOLOGY MODELING TOOLS]*, *[KNOWLEDGE REPRESENTATION LANGUAGES]*, or *[LOGICAL INFERENCE TECHNIQUES]*.
 67 |     5. Where relevant, incorporate *[INTERDISCIPLINARY INSIGHTS]* to describe the challenges in integrating knowledge across domains.
 68 |     6. The methods align logically with my expertise and goals.
 69 |     7. The methods include only knowledge structuring tasks, without discussing challenges, new methods, or expected outcomes.
 70 |     8. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 71 |     9. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.""",
 72 | 
 73 |     """The challenges I face in ontology-based knowledge representation are **[CHALLENGES]**.
 74 | 
 75 |     Please refine my response to ensure:
 76 | 
 77 |     1. The challenges are clearly described and relate to ontology structuring, reasoning, or interoperability.
 78 |     2. Each challenge should be derived from *[CURRENT TOOLS]*, *[METHODOLOGIES]*, or *[LOGICAL CONSTRAINTS]* used.
 79 |     3. Where relevant, incorporate *[ONTOLOGY INTEGRATION CHALLENGES]* related to external knowledge bases.
 80 |     4. The challenges align logically with my domain expertise and ontology development.
 81 |     5. The challenges include only the core difficulties, without discussing solutions or expected improvements.
 82 |     6. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 83 |     7. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.""",
 84 |     
 85 |     """The ontology-based methods I will use to address my challenges are **[NEW METHODS]**.
 86 | 
 87 |     Please refine my response to ensure:
 88 | 
 89 |     1. The new methods are clearly described and ontology-driven.
 90 |     2. The new methods align with what an ontology-based system can offer, such as automated reasoning, semantic search, or ontology alignment.
 91 |     3. The methods align logically with my goals and expected improvements.
 92 |     4. The methods include only knowledge representation enhancements, without discussing previous challenges or outcomes.
 93 |     5. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
 94 |     6. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.""",
 95 | 
 96 |     """The expected outcomes of implementing ontology-driven methods are **[OUTCOMES]**.
 97 | 
 98 |     Please refine my response to ensure:
 99 | 
100 |     1. The outcomes are clearly described and ontology-relevant.
101 |     2. The outcomes must align with the benefits of a structured knowledge representation system.
102 |     3. The outcomes align logically with my overall goal.
103 |     4. The outcomes include only ontology-driven improvements, without discussing previous methods or challenges.
104 |     5. The language is *[CONCISE & PRECISE / CREATIVE & DETAILED]*.
105 |     6. The final answer is structured as *[BULLET POINTS / A DETAILED PARAGRAPH / TAXONOMY-LIKE HIERARCHY]*.""",
106 | ]
107 |     
108 | def set_openai_api_key(api_key: str):
109 |     global openai_api_key
110 |     # Strip any leading or trailing spaces
111 |     api_key = api_key.strip()
112 |     
113 |     # Check if the API key contains any spaces or starts with "sk-"
114 |     if " " in api_key or not api_key.startswith("sk-"):
115 |         return "Invalid API key format. Please ensure the key does not contain spaces and is a valid OpenAI API key."
116 |     
117 |     # Set the API key if the format is valid
118 |     openai_api_key = api_key
119 |     return "API key has been set! Now you can chat with the chatbot. Enjoy :)"
120 | 
121 | def check_api_key():
122 |     if openai_api_key is None:
123 |         raise ValueError("OpenAI API key is not set. Please set it using the 'Set API Key' button.")
124 | 
125 | def user_story_generator(message, history):
126 |     instructions = [
127 |         {
128 |             "role": "assistant",
129 |             "content": (
130 |                 "Ontology construction involves creating structured frameworks to represent knowledge in a specific domain. Ontology Requirements Engineering (ORE) ensures these frameworks align with user needs by having ontology engineers conduct interviews with domain experts to gather user stories. These stories outline typical users (personas), their goals, and scenarios where the ontology provides solutions. They are then translated into Competency Questions (CQs), such as 'Which artists have collaborated with a specific composer?', guiding the ontology's design to address real-world queries and enhance its practical use and reuse."
131 | 
132 |                 "You are an ontology engineer conducting an interview with a domain expert to gather information for writing an ontology user story."
133 |                 "Ask elicitation questions one at a time, providing an example answer and the prompt template the user should use, while incorporating user feedback if needed."
134 |                 
135 |                 "If all requirements for the current elicitation are fully addressed, always ask the user if this meets their expectations. Do not ask the next question unless the user confirms the current one is satisfactory."
136 |                 "When a domain expert requests refinement, provide just one focused point in one sentence, directly aligned with their current answer."
137 |                 "The user can request to revisit any previously completed steps."
138 |                 "If the user's answer doesn't address the current question, gently remind them of the question and prompt them to respond accordingly."
139 |                 "If the user doesn't confirm the current result is satisfactory, their attempt to answer the next question should be rejected, and they should be asked to respond to the current one."
140 |                 "Do not answer any queries that are not related to this task. \n\n"
141 |         
142 |                 "1. Persona\n"
143 |                 "Start by creating a persona that represents a typical user of your ontology."
144 |                 "[Persona]: Ask one elicitation question for details includes [name], [age], [occupation], [skills], and [interests], along with a brief example answer as guidance, and include the message 'Use template **[Create Persona]** to answer' as a reminder.\n"
145 |                 "Once the expert provides this information, suggest possible improvements or clarifications. After all persona details are collected, move to the next section.\n\n"
146 | 
147 |                 "2. Goal\n"
148 |                 "[User goal description]: Ask one elicitation question to describe the [user goal description] that the user aims to achieve using this ontology, along with a brief example answer as guidance, and include the message 'Use template **[Create User Goal]** to answer' as a reminder.\n"
149 |                 "[Actions]: Ask one elicitation question for the specific [actions] the persona will take to accomplish the goal, along with a brief example answer as guidance, and include the message 'Use template **[Create Actions]** to answer' as a reminder.\n"
150 |                 "[Keywords]: Ask one elicitation question for gathering up to 5 relevant [keywords] that summarize the goal and actions, along with a brief example answer as guidance, and include the message 'Use template **[Create Keywords]** to answer' as a reminder.\n"
151 |                 "Once the expert has answered, offer suggestions for further refinement, then proceed to the next section.\n\n"
152 | 
153 |                 "3. Scenario\n"
154 |                 "[Scenario before]: Ask one elicitation question for the expert to describe the [current methods] the persona uses to perform the actions, along with a brief example answer as guidance, and include the message 'Use template **[Create Current Methods]** to answer' as a reminder.\n" 
155 |                 "[Challenges]: Ask one elicitation question for the [challenges] they face when performing current methods, making sure these align with the persona's occupation and skills, along with a brief example answer as guidance, and include the message 'Use template **[Create Challenges]** to answer' as a reminder.\n"
156 |                 "[Scenario during]: Ask one elicitation question for the expert to explain how their ontology introduces [new methods] to help them overcome these challenges, along with a brief example answer as guidance, and include the message 'Use template **[Create New Methods]** to answer' as a reminder.\n"
157 |                 "[Scenario after]: Ask one elicitation question for the expert to describe the [outcomes] after using the ontology and how it helps them achieve their goal, along with a brief example answer as guidance, and include the message 'Use template **[Create Outcomes]** to answer' as a reminder.\n"
158 |                 "Provide feedback on each scenario part and refine the answers if needed before moving on."
159 | 
160 |                 "4. Create User Story\n"
161 |                 "Once you have completed sections 1 to 3, summarize the information into a full user story. Use the persona, goal, and scenario information to craft the user story in this format:\n\n"
162 |                 "Persona: [name], [age], [occupation], [skills], [interests].\n"
163 |                 "Goal: [user goal description], with actions such as [actions]. Keywords: [keywords].\n"
164 |                 "Scenario Before: [current methods] the persona uses and the [challenges] they face.\n"
165 |                 "Scenario During: How your ontology introduces [new methods] to overcome these challenges.\n"
166 |                 "Scenario After: The [outcomes] achieved by using the ontology and how the persona's goal has been accomplished.\n\n"
167 |                 "Provide the user story to the domain expert and Ask one elicitation question for any further feedback or refinements. If needed, adjust the story based on their suggestions."
168 |             )
169 |         }
170 |     ]
171 |     messages = build_messages(history)
172 |     messages.append({"role": "user", "content": message})
173 | 
174 |     bot_message = chat_completion(openai_api_key, instructions + messages)
175 | 
176 |     history.append({"role": "user", "content": message})
177 |     history.append({"role": "assistant", "content": bot_message})
178 | 
179 |     return history, ""
180 | 
181 | def load_example(selection): 
182 |     return current_preidentified[selection]
183 | 
184 | # def cq_generator(message, history):
185 | #     check_api_key()
186 | #     instructions = [{
187 | #         "role": "assistant",
188 | #         "content": "You are a conversational ontology engineering assistant."
189 | #     }, {
190 | #         "role": "user",
191 | #         "content": "Here are instructions for you on how to generate high-quality competency questions. First, here "
192 | #                    "are some good examples of competency questions generated from example data. Who performs the song? "
193 | #                    "from the data Yesterday was performed by Armando Rocca, When (what year) was the building built? "
194 | #                    "from the data The Church was built in 1619, In which context is the building located? from the "
195 | #                    "data The Church is located in a periurban context. Second, how to make them less complex. Take the "
196 | #                    "generated competency questions and check if any of them can be divided into multiple questions. If "
197 | #                    "they do, split the competency question into multiple competency questions. If it does not, leave "
198 | #                    "the competency question as it is. For example, the competency question Who wrote The Hobbit and in "
199 | #                    "what year was the book written? must be split into two competency questions: Who wrote the book? "
200 | #                    "and In what year was the book written?. Another example is the competency question, When was the "
201 | #                    "person born?. This competency question cannot be divided into multiple questions. Third, how to "
202 | #                    "remove real entities to abstract them. Take the competency questions and check if they contain "
203 | #                    "real-world entities, like Freddy Mercury or 1837. If they do, change those real-world entities "
204 | #                    "from these competency questions to more general concepts. For example, the competency question "
205 | #                    "Which is the author of Harry Potter? should be changed to Which is the author of the book?. "
206 | #                    "Similarly, the competency question Who wrote the book in 2018? should be changed to Who wrote the "
207 | #                    "book, and in what year was the book written?"
208 | #     }]
209 | #     messages = build_messages(history)
210 | #     messages.append({
211 | #         "role": "user",
212 | #         "content": message
213 | #     })
214 | #     bot_message = chat_completion(openai_api_key, instructions + messages)
215 | #     history.append([message, bot_message])
216 | #     return bot_message, history, ""
217 | 
218 | # def load_example_user_story():
219 | #     with open("data/Linka#1_MusicKnowledge.md", "r") as f:
220 | #         return f.read()
221 | 
222 | # def clustering_generator(cqs, cluster_method, n_clusters):
223 | #     check_api_key()
224 | #     if n_clusters:
225 | #         n_clusters = int(n_clusters)
226 | 
227 | #     cqs, cq_embeddings = compute_embeddings(cqs)
228 | 
229 | #     if cluster_method == "Agglomerative clustering":
230 | #         cq_clusters, cluster_image = agglomerative_clustering(cqs, cq_embeddings, n_clusters)
231 | #     else:  # cluster_method == "LLM clustering"
232 | #         cq_clusters, cluster_image = llm_cq_clustering(cqs, n_clusters, openai_api_key)
233 | 
234 | #     return cluster_image, json.dumps(cq_clusters, indent=4)
235 | 
236 | # def ontology_testing(ontology_file, ontology_desc, cqs):
237 | #     check_api_key()
238 | #     verbalisation = verbalise_ontology(ontology_file, ontology_desc, "")
239 | #     messages = [{
240 | #         "role": "system",
241 | #         "content": "Please (1) provide a description of the ontology uploaded to provide basic information and "
242 | #                    "additional context, (2) give the competency questions (CQs) that you want to test with."
243 | #     }, {
244 | #         "role": "user",
245 | #         "content": verbalisation + "\n" + f"Given the above ontology, please label each competency question: {cqs} to "
246 | #                                           f"determine whether it is addressed properly or not. Format your response in"
247 | #                                           f" ['yes': 'CQ1', 'no': 'CQ2', ...]."
248 | #     }]
249 | #     bot_message = chat_completion(openai_api_key, messages)
250 | #     return bot_message
251 | 


--------------------------------------------------------------------------------
/ontochat/ontolib.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions operating on the verbalisation of an ontology, providing support for
  3 | generating documentation, extracting competency questions, and preliminarly
  4 | testing an ontology via competency questions.
  5 | """
  6 | import re
  7 | import config
  8 | from openai import OpenAI
  9 | from tqdm import tqdm
 10 | 
 11 | cqe_prompt_a = "You are asked to provide a comprehensive list of competency "\
 12 |                "questions describing all the possible requirements that can be "\
 13 |                "addressed by the ontology described before."
 14 | 
 15 | cqt_prompt_a = "You are asked to infer if the ontology described before can "\
 16 |                "address the following competency question: \"{}\" "\
 17 |                "Valid answers are: Yes, No."
 18 | 
 19 | cqt_prompt_b = "You are asked to infer if the ontology described before can "\
 20 |                "address the following competency question: \"{}\" "\
 21 |                "Only reply: 'Yes', 'No' and provide an explanation after a comma."
 22 | 
 23 | 
 24 | class ChatInterface:
 25 | 
 26 |     def __init__(self,
 27 |                  api_key: str,
 28 |                  model_name: str = config.DEFAULT_MODEL,
 29 |                  sampling_seed: int = config.DEFAULT_SEED,
 30 |                  temperature: int = config.DEFAULT_TEMPERATURE):
 31 |         # Save client configuration for all calls
 32 |         self.client = OpenAI(api_key=api_key)
 33 |         self.model_name = model_name
 34 |         self.sampling_seed = sampling_seed
 35 |         self.temperature = temperature
 36 | 
 37 |     def chat_completion(self, messages, **kwargs):
 38 | 
 39 |         model = kwargs["model"] if "model" in kwargs else self.model_name
 40 |         temperature = kwargs["temperature"] if "temperature" in kwargs \
 41 |             else self.temperature  # this do not alter the class defaults
 42 | 
 43 |         completion = self.client.chat.completions.create(
 44 |             model=model,
 45 |             messages=messages,
 46 |             seed=self.sampling_seed,
 47 |             temperature=temperature,
 48 |         )
 49 |         return completion.choices[0].message.content
 50 | 
 51 | 
 52 | def extract_competency_questions(onto_verbalisation: str,
 53 |                                  chat_interface: ChatInterface,
 54 |                                  prompt: str = cqe_prompt_a):
 55 |     """
 56 |     Extract competency questions from the verbalisation of an ontology.
 57 | 
 58 |     Parameters
 59 |     ----------
 60 |     onto_verbalisation : str
 61 |         A string expressing the ontology verbalisation as output from a
 62 |         supported method in the `verbaliser` module.
 63 |     chat_interface : ChatInterface
 64 |         An instance of a chat interface holding the API session.
 65 |     prompt : str, optional
 66 |         CQ extraction prompt, by default cqe_prompt_a
 67 | 
 68 |     Returns
 69 |     -------
 70 |     competency_questions : str
 71 |         A list of competency questions induced from the verbalisation.
 72 | 
 73 |     """    
 74 |     full_prompt = onto_verbalisation + "\n" + prompt
 75 |     conversation_history = [
 76 |         {"role": "system", "content": "You are an ontology expert."},
 77 |         {"role": "user", "content": full_prompt}
 78 |     ]
 79 | 
 80 |     competency_questions = chat_interface.chat_completion(
 81 |         conversation_history, model="gpt-3.5-turbo-16k")
 82 | 
 83 |     return competency_questions
 84 | 
 85 | 
 86 | def test_competency_questions(onto_verbalisation: str,
 87 |                               competency_questions: list[str],
 88 |                               chat_interface: ChatInterface,
 89 |                               cq_prompt: str = cqt_prompt_a):
 90 |     """
 91 |     Performs a preliminary test of the ontology to assess whether its
 92 |     verbalisation allows for addressing each competency questions given.
 93 | 
 94 |     Parameters
 95 |     ----------
 96 |     onto_verbalisation : str
 97 |         A string expressing the ontology verbalisation as output from a
 98 |         supported method in the `verbaliser` module.
 99 |     competency_questions: list[str]
100 |         A list of competency questions to use for preliminary testing.
101 |     chat_interface : ChatInterface
102 |         An instance of a chat interface holding the API session.
103 |     cq_prompt : str, optional
104 |         CQ test prompt, by default cqt_prompt_a
105 | 
106 |     Returns
107 |     -------
108 |     cq_test_dict : dict
109 |         A dictionary holding an outcome (yes/no) as a preliminary test of each
110 |         competency question. Keys correspond to CQs.
111 | 
112 |     """    
113 |     cq_test_dict = {}
114 |     for cq in tqdm(competency_questions):
115 |         full_prompt = onto_verbalisation + "\n" + cq_prompt.format(cq)
116 |         conversation_history = [
117 |             {"role": "system", "content": "You are an ontology engineer."},
118 |             {"role": "user", "content": full_prompt}
119 |         ]
120 |         outcome = chat_interface.chat_completion(
121 |             conversation_history, model="gpt-3.5-turbo-16k")
122 |         match = re.search(r"^(Yes|No)(.*)", outcome)
123 |         explanation = match.group(2) if match is not None else None
124 |         cq_test_dict[cq] = (match.group(1), explanation)
125 | 
126 |     return cq_test_dict
127 | 
128 | 
129 | def split_cq_test_data(cq_test_dict: dict):
130 |     """
131 |     Returns two lists to split input and output data.
132 |     In doing so, `Yes` gets 1 and `No` receives 0.
133 |     """
134 |     cq_x, cq_y, cq_e = [], [], []
135 |     for cq, outcome in cq_test_dict.items():
136 |         cq_x.append(cq)
137 |         if outcome[0] not in ["Yes", "No"]:
138 |             raise ValueError(f"Invalid test outcome: {outcome}")
139 |         cq_y.append(1 if outcome[0] == "Yes" else 0)
140 |     return cq_x, cq_y
141 | 


--------------------------------------------------------------------------------
/ontochat/queries.py:
--------------------------------------------------------------------------------
 1 | """
 2 | General-purpose SPARQL queries
 3 | 
 4 | """
 5 | 
 6 | NE_QUERY = """
 7 |     PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 8 |     PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
 9 |     PREFIX owl: <http://www.w3.org/2002/07/owl#>
10 | 
11 |     SELECT DISTINCT ?individual ?other
12 |     WHERE {
13 |         ?individual rdf:type owl:NamedIndividual ;
14 |         rdf:type ?other .
15 |         FILTER ( ?other not in ( owl:NamedIndividual ) )
16 |     }
17 |     """
18 | 


--------------------------------------------------------------------------------
/ontochat/tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%load_ext autoreload\n",
 10 |     "%autoreload 2"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from utils import read_key, read_list\n",
 20 |     "\n",
 21 |     "API_KEY = read_key(\"key.txt\")\n",
 22 |     "# API_KEY = \"\"  # or paste it here"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# OntoChat API tutorial"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "## Story creation"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": []
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "## CQ extraction"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": []
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "## CQ analysis"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": []
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "## Ontology pre-testing"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 3,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "from ontolib import ChatInterface\n",
 88 |     "\n",
 89 |     "interface = ChatInterface(api_key=API_KEY)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 4,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "from verbaliser import verbalise_ontology\n",
 99 |     "\n",
100 |     "ontology_path = \"../../idea/data/samples/musicmeta.owl\"\n",
101 |     "\n",
102 |     "onto_about = \"music metadata\"\n",
103 |     "onto_desc  = \"The ontology is supposed to describe music metadata  related to \"\\\n",
104 |     "             \"artists, compositions, performances, and recordings.\""
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 5,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "verb = verbalise_ontology(ontology_path, onto_about, onto_desc)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "Ontology description: music metadata. The ontology is supposed to describe music metadata  related to artists, compositions, performances, and recordings.\n",
126 |       "The main classes of the ontology are listed below:\n",
127 |       "- Abstract Score: An abstract concept that allows to refer to the actual musical content of a MusicEntity. This makes it possible to describe musical properties that imply the existence of a hypothetical score, which may not be necessarily, or formally, materialised by the creators of the musical entity.\n",
128 |       "- Annotation Method: An annotation method refers to the nature of the procedure used for producing annotation on an artifact.\n",
129 |       "- Award: An award is intended as a recognition for a creative outcome, or more generally, for an activity carried out over a period of time. Typically, an award is given by an organisation and follows a recurring event.\n",
130 |       "- Broadcaster (subconcept of Agent): A class representing an entity or organization involved in broadcasting or transmitting audio or video content, including music.\n",
131 |       "- Broadcasting Situation (subconcept of Situation): Represents a specific situation or context related to broadcasting activities. It encompasses various scenarios, conditions, or events that may impact the broadcast of music or other content. It can include properties such as location, time, and specific conditions.\n",
132 |       "- Collection: A curated or organized grouping of musical works or related entities. It encompasses a cohesive set of musical compositions, recordings, scores, or other music-related items that are brought together based on a specific theme, genre, artist, or any other organizing principle.\n",
133 |       "- Collection Concept: An abstract or conceptual grouping of musical works or related entities. It encompasses a conceptual framework or idea that serves as the basis for organizing and categorizing musical compositions, recordings, scores, or other music-related items.\n",
134 |       "- Composition Part: A distinct section or segment of a musical composition. It encompasses the individual components or sections that make up a larger musical work, such as movements, sections, verses, choruses, or any other identifiable divisions within the composition.\n",
135 |       "- Creative Action: An action or process involving creativity, imagination, and artistic expression. It encompasses activities related to the creation, production, or development of artistic works, including music. It can include properties such as creator, medium, and result.\n",
136 |       "- Creative Process: Describes a series of actions, steps, or stages involved in the creative development or production of artistic works, including music. It encompasses the process of generating ideas, exploring concepts, refining techniques, and producing the final creative output. It can include properties such as participants, duration, and output.\n",
137 |       "- Creative Task (subconcept of Task): A task or assignment involving creative activities in the context of music. It encompasses various activities that require artistic expression, innovation, and imagination to create or develop musical works, performances, compositions, or other creative endeavors.\n",
138 |       "- Dedication Statement: A statement or expression of dedication, typically found in music albums, books, or other creative works. It is a declaration of gratitude, acknowledgment, or special recognition towards individuals, groups, or causes. It can include properties such as text, creator, and recipient.\n",
139 |       "- DigitalScore (subconcept of Score): A digital or electronic version of a musical score. It encompasses the digital representation of musical notation, allowing for electronic distribution, display, and manipulation of the score.\n",
140 |       "- Form Type: The type or structure of a musical composition. It encompasses the various standardized or recognized forms and structures that compositions can adopt, providing a framework for understanding and categorizing musical works based on their organizational patterns.\n",
141 |       "- Instrumentation: The arrangement or combination of musical instruments used in a composition, performance, or recording. It describes the specific instruments, their roles, and how they interact to create the overall sound and texture of the music. It can include properties such as instruments used, roles, and techniques.\n",
142 |       "- Key: The tonal center or tonic pitch of a musical composition. It denotes the specific pitch or note around which the composition revolves and provides a framework for harmonic and melodic relationships within the piece. It can include properties such as tonic pitch, mode, and related scales.\n",
143 |       "- Libretto (subconcept of Text): The written text or script of an opera, oratorio, or other dramatic musical work. The libretto serves as the literary foundation for the performance, providing the dialogue, lyrics, and stage directions that guide the singers, actors, and production team.\n",
144 |       "- License: The legal agreement or permission governing the usage and distribution of a musical work or its associated components. It encompasses the terms and conditions under which the music is licensed, including the rights granted, restrictions imposed, and obligations of the licensee.\n",
145 |       "- Live Performance (subconcept of Musical Performance): A musical performance that takes place in real-time and is witnessed by an audience. It encompasses the act of performing music in a live setting, such as a concert, recital, or music festival, where the performers and audience interact directly.\n",
146 |       "- Lyrics (subconcept of Text): The textual component of a musical composition that consists of the words, verses, or poetic expressions sung or spoken during the performance. Lyrics provide the vocal content and convey the narrative, emotions, or messages expressed through the music. They encompass the written representation of the lyrical content, which may be organized into lines, stanzas, or verses, forming an integral part of the composition.\n",
147 |       "- Medium of Performance: The specific ensemble, group, or combination of instruments or voices used in a musical performance or recording. It describes the medium through which the musical composition is interpreted and brought to life.\n",
148 |       "- Mode: A specific musical mode, which is a set of pitch relationships and tonal characteristics that define a particular scale and tonal center. It describes the arrangement of intervals and patterns of pitches used in a musical composition or performance.\n",
149 |       "- Movement (subconcept of Composition Part): A self-contained section within a larger musical composition, typically found in multi-movement works such as symphonies, sonatas, or concertos. It encompasses a distinct and coherent portion of the composition that is characterized by its own unique musical ideas, structure, and expression.\n",
150 |       "- Music Algorithm (subconcept of Music Artist): An algorithm or computational process used in the creation, analysis, or manipulation of music. It encompasses algorithms designed to generate musical compositions, analyze musical patterns, apply effects or transformations to audio, or perform other computational tasks related to music.\n",
151 |       "- Music Annotation Time Interval: A specific interval or duration within a musical composition that is annotated or marked with additional information.\n",
152 |       "- Music Artist: An individual or group involved in creating, performing, or producing music. It encompasses musicians, singers, bands, composers, conductors, producers, and other artistic entities within the music industry.\n",
153 |       "- Music Ensamble Membership: The membership of a musician in a music ensemble. It captures the association between a musician and a specific ensemble, indicating that the musician is a member of the ensemble.\n",
154 |       "- Music Ensenble (subconcept of Music Artist): A group or collective of musicians who perform together as an ensemble. It encompasses various types of musical groups, such as orchestras, bands, choirs, chamber ensembles, and jazz ensembles.\n",
155 |       "- Music Entity (subconcept of InformationObject): This class represents an Information Object, which is defined as the sum of all the elements that make up a piece of music. A music entity abstract a musical work, as it may results from a composition process, an improvisation, or be the result of non-written transmission.\n",
156 |       "- Music Genre: A specific category or classification of music that shares common characteristics, styles, or cultural origins. It encompasses distinct musical traditions, styles, or movements that define a particular genre.\n",
157 |       "- Music Sheet (subconcept of Score): A physical or printed document that contains the musical notation of a composition. It encompasses the sheet music, which provides a visual representation of the music, including notes, rhythms, lyrics, and other musical symbols.\n",
158 |       "- Musical Performance (subconcept of InformationRealization, Creative Process): A live or recorded rendition of a musical composition. It encompasses the act of performing music, whether by musicians, singers, or other performers, to convey the intended artistic expression and interpretation of the composition.\n",
159 |       "- Musician (subconcept of Music Artist, Person): An individual who is skilled in playing a musical instrument, singing, or composing music. It encompasses instrumentalists, vocalists, composers, conductors, and other individuals actively engaged in the creation, performance, or production of music.\n",
160 |       "- Note\n",
161 |       "- Opus Stantement: A statement or declaration related to a musical opus or composition. It is typically used to provide information about the creation, purpose, or significance of a specific musical work.\n",
162 |       "- Publication Situation (subconcept of Situation): Descibes the situation or context in which a musical work or recording is published or made publicly available. A PublicationSituation encompasses various aspects related to the publication of music, including the timing, location, medium, format, and associated entities involved in the publication process.\n",
163 |       "- Publisher (subconcept of Agent): An entity or organization responsible for publishing or distributing musical works, scores, recordings, or other music-related content. It encompasses companies, labels, or individuals that handle the production, promotion, and dissemination of music to the public.\n",
164 |       "- Recording: A recorded performance or rendition of a musical composition. It refers to the capturing of sound, whether in a studio or live setting, for the purpose of creating a permanent audio representation of a musical performance.\n",
165 |       "- Recording Equipment: The equipment and tools used in the process of recording music. It encompasses the various devices, instruments, and technologies employed to capture and produce high-quality audio recordings.\n",
166 |       "- Recording Process (subconcept of Creative Process): The process of capturing and creating a recorded version of a musical performance or composition. It encompasses the various stages and techniques involved in recording, editing, mixing, and mastering the audio to produce a finalized recording.\n",
167 |       "- Release: The distribution and availability of a recorded musical work to the public. It encompasses the act of officially making a recorded music product, such as an album or single, available for purchase, streaming, or other forms of consumption.\n",
168 |       "- Score: A written, printed or digital document that contains the musical notation of a composition. It encompasses the musical score, which serves as a visual representation of the music, including the arrangement of notes, rhythms, dynamics, and other musical symbols.\n",
169 |       "- Score Part: A specific part or section within a musical score. It refers to a distinct musical line or voice within a composition that is intended to be performed by a particular instrument or group of instruments.\n",
170 |       "- Section (subconcept of Composition Part): A distinct and identifiable segment within a musical composition. It encompasses smaller divisions within a composition that contribute to its overall structure, such as verses, choruses, bridges, or any other delineated portions of the music.\n",
171 |       "- Studio Performance (subconcept of Musical Performance): A musical performance that takes place in a recording studio or controlled environment specifically designed for audio recording. It encompasses the act of performing music with the purpose of capturing high-quality audio for later production, mixing, and mastering.\n",
172 |       "- Text: A written or textual component of a musical composition. It encompasses the textual elements, such as lyrics or libretto, that accompany or form part of a musical work.\n",
173 |       "- Text Fragment: A fragment or excerpt of text within a larger textual component of a musical composition. It encapsulates a specific portion of the lyrics, libretto, or other textual elements associated with the music. A TextFragment provides a smaller unit of textual content that can be individually referenced or analyzed within the context of the composition.\n",
174 |       "\n",
175 |       "The main named entities (individuals) are listed below:\n",
176 |       "- hasSource The domain of this relation can be: Text. The range of this relation can be: Text.\n",
177 |       "- isDerivedFrom The domain of this relation can be: Music Entity. The range of this relation can be: Music Entity.\n",
178 |       "- Creates: Represents the relationship between a CreativeProcess and the MusicEntity that is produced or brought into existence as a result of the process. It indicates that the CreativeProcess is responsible for generating, composing, producing, or otherwise creating the mentioned MusicEntity. The domain of this relation can be: Creative Process. The range of this relation can be: Music Entity.\n",
179 |       "- Has broadcaster: Specifies that a BroadcastingSituation involves a specific Broadcaster. This property represents the relationship between a broadcasting situation and the entity responsible for the broadcasting. It indicates that the mentioned BroadcastingSituation is associated with the mentioned Broadcaster, who is responsible for transmitting or airing the content. The domain of this relation can be: Broadcasting Situation. The range of this relation can be: Broadcaster.\n",
180 |       "- Has collaborated with: Specifies that a MusicArtist has collaborated with another MusicArtist. This property represents the relationship between two MusicArtists who have worked together on a musical project or performance. It indicates that the MusicArtist has engaged in a collaborative effort with the mentioned MusicArtist, indicating a shared creative endeavor, joint performance, or joint contribution to a musical work. The domain of this relation can be: Music Artist. The range of this relation can be: Music Artist.\n",
181 |       "- Has composition part (subproperty of hasPart): Represents the relationship between an AbstractScore and its constituent CompositionParts, such as Movements or Sections. It signifies that the AbstractScore includes or comprises the mentioned CompositionPart, which can be a distinct musical section, movement, or other structural elements within the composition. The domain of this relation can be: Abstract Score. The range of this relation can be: Composition Part.\n",
182 |       "- Has dedication statement The domain of this relation can be: Music Entity. The range of this relation can be: Dedication Statement.\n",
183 |       "- Has equipment: Specifies that a RecordingProcess involves a specific RecordingEquipment. This property represents the relationship between a RecordingProcess and the equipment used in the recording process. It indicates that the mentioned RecordingProcess utilizes the mentioned RecordingEquipment for capturing or producing the recorded content. The domain of this relation can be: Recording Process. The range of this relation can be: Recording Equipment.\n",
184 |       "- Has form type: Indicates that an AbstractScore has a specific FormType associated with it. This property represents the relationship between an AbstractScore and its designated FormType, which categorizes the overall structure or musical form of the composition. It signifies that the AbstractScore conforms to the mentioned FormType, which could include classifications such as Sonata, Symphony, Concerto, or other recognized musical forms. The domain of this relation can be: Abstract Score. The range of this relation can be: Form Type.\n",
185 |       "- hasGenre: pecifies the genre or genres associated with a MusicArtist. This property represents the relationship between a MusicArtist and the MusicGenre(s) that are attributed or associated with their artistic output or musical style. It indicates the genre(s) that best describe the musical genre(s) or genres in which the MusicArtist operates or specializes. The domain of this relation can be: Music Artist. The range of this relation can be: Music Genre.\n",
186 |       "- Has instrumentation: Specifies that an AbstractScore has an Instrumentation associated with it. This property represents the relationship between an AbstractScore and its corresponding Instrumentation. It indicates that the AbstractScore is intended to be performed by the instruments or voices specified in the Instrumentation. The domain of this relation can be: Abstract Score. The range of this relation can be: Instrumentation.\n",
187 |       "- Has key: Specifies that an AbstractScore is composed or written in a particular Key. This property represents the relationship between an AbstractScore and the Key in which it is intended to be performed or understood. It indicates that the AbstractScore is structured or based on the mentioned Key, which defines the tonal center, pitch relationships, and overall harmonic framework of the musical composition. The domain of this relation can be: Abstract Score. The range of this relation can be: Key.\n",
188 |       "- Has license: Specifies that a Recording, Release, or Score is associated with a specific License. This property represents the relationship between a musical recording, a release of music, or a musical score, and the license that governs its usage and distribution. It indicates that the mentioned Recording, Release, or Score is subject to the terms and conditions defined by the mentioned License. The domain of this relation can be: Recording, or Release, or Score. The range of this relation can be: License.\n",
189 |       "- Has medium: Specifies that a ScorePart is performed using a specific MediumOfPerformance. This property represents the relationship between a ScorePart and the medium or instrument through which it is intended to be performed. It indicates that the ScorePart is designed or written for the mentioned MediumOfPerformance, which could include instruments, voices, or other performance mediums. The domain of this relation can be: Score Part. The range of this relation can be: Medium of Performance.\n",
190 |       "- Has mode\n",
191 |       "- Has music entity part (subproperty of hasPart): Specifies the relationship between a MusicEntity and its constituent parts or components. This property represents the connection between a MusicEntity and the individual parts or components that comprise it. It indicates that the MusicEntity is composed of or includes the mentioned parts, which may consist of specific sections, movements, instrumental or vocal parts, or other structural elements that contribute to the overall composition. The domain of this relation can be: Music Entity. The range of this relation can be: Abstract Score, or Instrumentation, or Music Entity, or Text.\n",
192 |       "- Has opus statement: Specifies that a MusicEntity has an OpusStatement associated with it. This property represents the relationship between a MusicEntity and its corresponding OpusStatement. It indicates that the MusicEntity is identified or labeled with a specific OpusStatement, which typically includes information such as the opus number, title, composer, and other relevant details related to the work's cataloging or classification. The domain of this relation can be: Music Entity. The range of this relation can be: Opus Stantement.\n",
193 |       "- Publication situation: Specifies that a Score or Recording is associated with a specific PublicationSituation. This property represents the relationship between a musical score or recording and the situation or context in which it is published or made publicly available. It indicates that the mentioned Score or Recording is subject to the conditions and circumstances defined by the mentioned PublicationSituation. The domain of this relation can be: Release, or Score. The range of this relation can be: Publication Situation.\n",
194 |       "- Has publisher: Specifies that a PublicationSituation involves a specific Publisher. This property represents the relationship between a publication situation and the entity or organization responsible for publishing the content. It indicates that the mentioned PublicationSituation is associated with the mentioned Publisher, who is responsible for releasing or making the content available to the public. The domain of this relation can be: Publication Situation. The range of this relation can be: Publisher.\n",
195 |       "- Has recording The domain of this relation can be: Music Entity. The range of this relation can be: Recording.\n",
196 |       "- Has score: Indicates that an Instrumentation is associated with a Score. This property represents the relationship between an Instrumentation and the Score that represents its musical notation. It signifies that the Instrumentation is accompanied by the mentioned Score, which provides the written or printed representation of the musical arrangement for the instruments or voices involved. The domain of this relation can be: Instrumentation. The range of this relation can be: Score.\n",
197 |       "- Has score part (subproperty of hasPart): Specifies that a Score includes a specific ScorePart. This property represents the relationship between a Score and its constituent ScoreParts, which are individual sections or components of the overall musical notation. It indicates that the Score comprises the mentioned ScorePart, which can represent a particular instrument's part, vocal line, or other segmented elements within the musical composition. The domain of this relation can be: Score. The range of this relation can be: Score Part.\n",
198 |       "- Has source The domain of this relation can be: Text. The range of this relation can be: Text.\n",
199 |       "- Has text: Specifies that an AbstractScore has associated Text. This property represents the relationship between an AbstractScore and the Text associated with it. It indicates that the AbstractScore contains or includes the mentioned Text, which provides additional textual information, instructions, or annotations related to the score. The domain of this relation can be: Abstract Score. The range of this relation can be: Text.\n",
200 |       "- Has text part (subproperty of hasPart): Specifies that a Text has a TextFragment associated with it. This property represents the relationship between a Text, such as lyrics or written text, and its corresponding TextFragment. It indicates that the Text contains or includes the mentioned TextFragment, which represents a portion or segment of the overall Text. The domain of this relation can be: Text. The range of this relation can be: Text Fragment.\n",
201 |       "- Involves creative action (subproperty of involves): Specifies that a CreativeProcess involves a CreativeAction. This property represents the relationship between a CreativeProcess and the CreativeAction that is part of or integral to the process. The domain of this relation can be: Creative Process. The range of this relation can be: Creative Action.\n",
202 |       "- Involves member of music ensemble (subproperty of involvesAgent): Specifies the involvement of a MusicArtist as a member of a MusicEnsemble. This property represents the relationship between a MusicEnsembleMembership and the MusicArtist who is actively involved or participates as a member of the ensemble. The domain of this relation can be: Music Ensamble Membership. The range of this relation can be: Music Artist.\n",
203 |       "- Involves music ensemble (subproperty of involvesAgent): Specifies the relationship between a MusicEnsembleMembership and a MusicEnsemble, indicating that the membership involves or includes the specified music ensemble. The domain of this relation can be: Music Ensamble Membership. The range of this relation can be: Music Ensenble.\n",
204 |       "- Is broadcasted in: Specifies that a Recording is broadcasted in a specific BroadcastingSituation. This property represents the relationship between a recorded musical content and the broadcasting situation in which it is aired or transmitted. It indicates that the mentioned Recording is made available to the public through the mentioned BroadcastingSituation. The domain of this relation can be: Recording. The range of this relation can be: Broadcasting Situation.\n",
205 |       "- Is broadcaster of: Inverse relationship of \"hasBroadcaster.\" It indicates that a Broadcaster is associated with a specific BroadcastingSituation. This property establishes the connection from the Broadcaster's perspective, indicating that they are responsible for broadcasting the content in the mentioned BroadcastingSituation. The domain of this relation can be: Broadcaster. The range of this relation can be: Broadcasting Situation.\n",
206 |       "- Is composition part of (subproperty of isPartOf): Inverse relationship of \"hasCompositionPart.\" It indicates that a CompositionPart, such as a Movement or Section, is a component or part of a specific AbstractScore. This property establishes the connection from the CompositionPart's perspective, indicating its membership or inclusion within the larger AbstractScore. The domain of this relation can be: Composition Part. The range of this relation can be: Abstract Score.\n",
207 |       "- Is created by: Inverse relationship of \"creates.\" It indicates that a MusicEntity is created by a specific CreativeProcess. This property establishes the connection from the MusicEntity's perspective, indicating the creative process that is responsible for the entity's generation, composition, production, or creation. The domain of this relation can be: Music Entity. The range of this relation can be: Creative Process.\n",
208 |       "- is dedicated to: Specifies that a DedicationStatement is dedicated to a specific Person. This property represents the relationship between a DedicationStatement, typically found in musical works, and the person or entity to whom the dedication is addressed. It indicates that the DedicationStatement is intended as a tribute or honor to the mentioned Person. The domain of this relation can be: Dedication Statement. The range of this relation can be: Person.\n",
209 |       "- Is equipment of: Inverse relationship of \"hasEquipment.\" It indicates that a RecordingEquipment is associated with a specific RecordingProcess. This property establishes the connection from the RecordingEquipment's perspective, indicating that it is the equipment used in the mentioned RecordingProcess. The domain of this relation can be: Recording Equipment. The range of this relation can be: Recording Process.\n",
210 |       "- Is form type of: Inverse relationship of \"hasFormType.\" It indicates that a FormType is associated with a specific AbstractScore. This property establishes the connection from the FormType's perspective, indicating that it characterizes the overall musical structure or form of the corresponding AbstractScore. The domain of this relation can be: Form Type. The range of this relation can be: Abstract Score.\n",
211 |       "- Is formation place of: Inverse relationship of \"wasFormedIn.\" Establishes the connection from the place's perspective, highlighting its significance as the location where the ensemble was formed or established. It provides information about the historical and geographical association between the place and the formation of music ensembles, emphasizing the role of the place in shaping the ensemble's origin and identity. The domain of this relation can be: Place. The range of this relation can be: Music Ensenble.\n",
212 |       "- Is genre of: Inverse relationship of \"hasGenre.\" It specifies that a MusicGenre is attributed or associated with a MusicArtist. The domain of this relation can be: Music Genre. The range of this relation can be: Music Artist.\n",
213 |       "- Is influenced by: Specifies that a MusicArtist is influenced by another MusicArtist. This property represents the relationship between a MusicArtist and the MusicArtist(s) who have had a significant impact on their artistic development or musical style. It indicates that the MusicArtist has been influenced by the ideas, techniques, or musical expressions of the mentioned MusicArtist(s), which have played a role in shaping their own musical approach or creative output. The domain of this relation can be: Music Artist. The range of this relation can be: Music Artist.\n",
214 |       "- Is instrumentation of: Inverse relationship of \"hasInstrumentation.\" It indicates that an Instrumentation is associated with a specific AbstractScore. This property establishes the connection from the Instrumentation's perspective, indicating that it is the specified instrumentation or vocal arrangement intended for the corresponding AbstractScore. The domain of this relation can be: Instrumentation. The range of this relation can be: Abstract Score.\n",
215 |       "- Is involved in creative process (subproperty of isInvolvedIn): Inverse relationship of \"involvesCreativeAction.\" It indicates that a CreativeAction is involved in a specific CreativeProcess. This property establishes the connection from the CreativeAction's perspective, indicating its active involvement or participation within the context of the CreativeProcess.\n",
216 |       "- Is key of: Inverse relationship of \"hasKey.\" It indicates that a Key is associated with a specific AbstractScore. This property establishes the connection from the Key's perspective, indicating that it is the key signature or tonal center assigned to the corresponding AbstractScore. The domain of this relation can be: Key. The range of this relation can be: Abstract Score.\n",
217 |       "- Is license of: Inverse relationship of \"hasLicense.\" It indicates that a License is associated with a specific Recording, Release, or Score. This property establishes the connection from the License's perspective, indicating that it governs the usage and distribution of the mentioned Recording, Release, or Score. The domain of this relation can be: License. The range of this relation can be: Recording, or Score, or Release.\n",
218 |       "- Is medium of: nverse relationship of \"hasMedium.\" It indicates that a MediumOfPerformance is associated with a specific ScorePart. This property establishes the connection from the MediumOfPerformance's perspective, indicating that it is the performance medium or instrument for which the mentioned ScorePart is intended. The domain of this relation can be: Medium of Performance. The range of this relation can be: Score Part.\n",
219 |       "- Is opus statement of: Inverse relationship of \"hasOpusStatement.\" It indicates that an OpusStatement is associated with a specific MusicEntity. This property establishes the connection from the OpusStatement's perspective, indicating that it is the statement or label attributed to a particular MusicEntity. The inverse property \"isOpusStatementOf\" allows for the modeling and description of the relationship between OpusStatements and the MusicEntities they pertain to, providing a means to identify and associate OpusStatements with their corresponding musical works. The domain of this relation can be: Opus Stantement. The range of this relation can be: Music Entity.\n",
220 |       "- Is part of music entity (subproperty of isPartOf): Inverse relationship of \"hasMusicEntityPart.\" It indicates that a MusicEntity part or component is part of a specific MusicEntity. This property establishes the connection from the MusicEntity part's perspective, indicating its inclusion or membership within the larger MusicEntity. The inverse property \"isPartOfMusicEntity\" allows for the modeling and description of the hierarchical or structural relationships between MusicEntity parts and the MusicEntities they belong to. The domain of this relation can be: Abstract Score, or Music Entity, or Text, or Instrumentation. The range of this relation can be: Music Entity.\n",
221 |       "- Is part of release (subproperty of isPartOf): Specifies that a Recording is part of a specific Release. This property represents the relationship between a recorded musical content and the larger release or collection in which it is included. It indicates that the mentioned Recording is a component or part of the mentioned Release. The domain of this relation can be: Recording. The range of this relation can be: Release.\n",
222 |       "- Is produced by recording process (subproperty of isRealizedBy): Inverse relationship of \"producesRecording.\" It indicates that a Recording is produced by a specific RecordingProcess. This property establishes the connection from the Recording's perspective, indicating that it is the result or output of the mentioned RecordingProcess. The domain of this relation can be: Recording. The range of this relation can be: Recording Process.\n",
223 |       "- Is publication situation of: Inverse relationship of \"hasPublicationSituation.\" It indicates that a PublicationSituation is associated with a specific Score or Recording. This property establishes the connection from the PublicationSituation's perspective, indicating that it defines the conditions and circumstances under which the mentioned Score or Recording is published or made publicly available. The domain of this relation can be: Publication Situation. The range of this relation can be: Recording, or Score.\n",
224 |       "- Is publisher of: Iverse relationship of \"hasPublisher.\" It indicates that a Publisher is associated with a specific PublicationSituation. This property establishes the connection from the Publisher's perspective, indicating that they are responsible for publishing the content in the mentioned PublicationSituation. The domain of this relation can be: Publisher. The range of this relation can be: Publication Situation.\n",
225 |       "- Is realisation of: Iverse relationship of \"isRealisedBy.\" It indicates that a Score or ScorePart is a realisation or tangible representation of a specific AbstractScore or Instrumentation. This property establishes the connection from the Score or ScorePart's perspective, indicating that it materializes or embodies the musical content or instrumentation described in the corresponding AbstractScore or Instrumentation. The domain of this relation can be: Score, or Score Part. The range of this relation can be: Abstract Score, or Instrumentation.\n",
226 |       "- Is realized by The domain of this relation can be: Music Entity. The range of this relation can be: Musical Performance.\n",
227 |       "- Is realised in: Represents the relationship between an AbstractScore or Instrumentation and the Score or ScorePart that brings it to life in a tangible or performable form. It signifies that the AbstractScore or Instrumentation is manifested or rendered into a concrete representation or performance through the mentioned Score or ScorePart. The domain of this relation can be: Abstract Score, or Instrumentation. The range of this relation can be: Score, or Score Part.\n",
228 |       "- Is recorded by: Specifies that a MusicEntity or MusicalPerformance is recorded through a RecordingProcess. This property represents the relationship between a MusicEntity or MusicalPerformance and the process of recording it, typically involving capturing the audio or video performance in a recorded format. The domain of this relation can be: Music Entity, or Musical Performance. The range of this relation can be: Recording Process.\n",
229 |       "- Is recording process of: Inverse relationship of \"isRecordedBy.\" It indicates that a RecordingProcess is associated with a specific MusicEntity or MusicalPerformance. This property establishes the connection from the RecordingProcess's perspective, indicating that it is the process responsible for capturing or documenting the mentioned MusicEntity or MusicalPerformance. The domain of this relation can be: Recording Process. The range of this relation can be: Music Entity, or Musical Performance.\n",
230 |       "- Is score of: Inverse relationship of \"hasScore.\" It indicates that a Score is associated with a specific Instrumentation. This property establishes the connection from the Score's perspective, indicating that it serves as the musical notation for the mentioned Instrumentation. The domain of this relation can be: Score. The range of this relation can be: Instrumentation.\n",
231 |       "- Is score part of (subproperty of isPartOf): Inverse relationship of \"hasScorePart.\" It indicates that a ScorePart is a component or part of a specific Score. This property establishes the connection from the ScorePart's perspective, indicating its membership or inclusion within the larger Score. The domain of this relation can be: Score Part. The range of this relation can be: Score.\n",
232 |       "- Is text fragment of (subproperty of isPartOf): Inverse relationship of \"hasTextFragment.\" It indicates that a TextFragment is associated with a specific Text. This property establishes the connection from the TextFragment's perspective, indicating that it is a fragment or segment of the larger Text. The domain of this relation can be: Text Fragment. The range of this relation can be: Text.\n",
233 |       "- Is text of: Inverse relationship of \"hasText.\" It indicates that a Text is associated with a specific AbstractScore. This property establishes the connection from the Text's perspective, indicating that it is the textual content or annotations specifically intended for the corresponding AbstractScore. The domain of this relation can be: Text. The range of this relation can be: Abstract Score.\n",
234 |       "- Is trained on: Specifies that a MusicAlgorithm has been trained on a specific MusicDataset. This property indicates the relationship between a MusicAlgorithm and the dataset it was trained on, highlighting the input data used to train and develop the algorithm's functionality and knowledge in the field of music. The domain of this relation can be: Music Algorithm. The range of this relation can be: MusicDataset.\n",
235 |       "- Is training of: Inverse relationship of \"isTrainedOn.\" It indicates that a specific MusicDataset is used as training data for a MusicAlgorithm. The inverse property establishes the connection from the dataset's perspective, highlighting its role in training and contributing to the development of the algorithm's functionality and knowledge in the field of music. The domain of this relation can be: MusicDataset. The range of this relation can be: Music Algorithm.\n",
236 |       "- Nominated for award: Represents the relationship between a MusicArtist and the Award for which they have received a nomination. It indicates that the MusicArtist has been recognized or considered for the Award, highlighting their achievement or recognition in the music industry. The domain of this relation can be: Music Artist. The range of this relation can be: Award.\n",
237 |       "- Produces recording: pecifies that a RecordingProcess produces a specific Recording. This property represents the relationship between a RecordingProcess and the resulting recorded output. It indicates that the mentioned RecordingProcess is responsible for the creation or production of the mentioned Recording. The domain of this relation can be: Recording Process. The range of this relation can be: Recording.\n",
238 |       "- Received award: Specifies that a MusicArtist has received a specific Award. It indicates that the MusicArtist has been honored or recognized with the mentioned Award, highlighting their achievement or excellence in the music industry. The domain of this relation can be: Music Artist. The range of this relation can be: Award.\n",
239 |       "- Was formed in: Specifies the relationship between a MusicEnsemble and a Place, indicating that the ensemble was formed or established in a particular location. This property highlights the geographical origin or location where the ensemble came into existence or was initially established, providing information about its place of formation. The domain of this relation can be: Music Ensenble. The range of this relation can be: Place.\n",
240 |       "\n",
241 |       "The main relations of the ontology are listed below:\n",
242 |       "- Instrumentation is an instance of class Creative Task.\n",
243 |       "- Lyrics writing is an instance of class Creative Task.\n",
244 |       "- Music Writing is an instance of class Creative Task.\n",
245 |       "- Orchestration is an instance of class Creative Task.\n",
246 |       "\n"
247 |      ]
248 |     }
249 |    ],
250 |    "source": [
251 |     "print(verb)"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": []
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "### Onto2CQ and CQ-pretest"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 7,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "from ontolib import extract_competency_questions, test_competency_questions\n",
275 |     "from ontolib import split_cq_test_data"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": null,
281 |    "metadata": {},
282 |    "outputs": [],
283 |    "source": [
284 |     "extracted_cqs = extract_competency_questions(\n",
285 |     "    onto_verbalisation=verb,\n",
286 |     "    chat_interface=interface)"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": null,
292 |    "metadata": {},
293 |    "outputs": [],
294 |    "source": [
295 |     "extracted_cqs"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": null,
301 |    "metadata": {},
302 |    "outputs": [],
303 |    "source": []
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": 23,
308 |    "metadata": {},
309 |    "outputs": [
310 |     {
311 |      "data": {
312 |       "text/plain": [
313 |        "'Yes'"
314 |       ]
315 |      },
316 |      "execution_count": 23,
317 |      "metadata": {},
318 |      "output_type": "execute_result"
319 |     }
320 |    ],
321 |    "source": [
322 |     "import re\n",
323 |     "\n",
324 |     "match = re.search(r\"^(Yes|No),\\s+(.*)\", \"Yes, asdasdasd asd a sd as d\")\n",
325 |     "explanation = match.group(1)\n",
326 |     "\n",
327 |     "explanation"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "markdown",
332 |    "metadata": {},
333 |    "source": [
334 |     "#### On a sample"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": 25,
340 |    "metadata": {},
341 |    "outputs": [
342 |     {
343 |      "name": "stderr",
344 |      "output_type": "stream",
345 |      "text": [
346 |       "100%|██████████| 4/4 [00:08<00:00,  2.13s/it]\n"
347 |      ]
348 |     },
349 |     {
350 |      "data": {
351 |       "text/plain": [
352 |        "{'Who are the parents of a music artist?': ('No', ''),\n",
353 |        " 'Where did a music artist study?': ('No', ''),\n",
354 |        " 'What is the genre of a music entity?': ('Yes', ''),\n",
355 |        " 'Who are the authors of a music entity?': ('Yes', '')}"
356 |       ]
357 |      },
358 |      "execution_count": 25,
359 |      "metadata": {},
360 |      "output_type": "execute_result"
361 |     }
362 |    ],
363 |    "source": [
364 |     "cq_list = [\n",
365 |     "    \"Who are the parents of a music artist?\",\n",
366 |     "    \"Where did a music artist study?\",\n",
367 |     "    \"What is the genre of a music entity?\",\n",
368 |     "    \"Who are the authors of a music entity?\"]\n",
369 |     "\n",
370 |     "cq_dict = test_competency_questions(verb, cq_list, chat_interface=interface)\n",
371 |     "cq_dict"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "#### On all the CQs"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": 35,
384 |    "metadata": {},
385 |    "outputs": [
386 |     {
387 |      "name": "stdout",
388 |      "output_type": "stream",
389 |      "text": [
390 |       "Loaded 28 positive and 28 negative CQs\n"
391 |      ]
392 |     }
393 |    ],
394 |    "source": [
395 |     "cq_pos_x = read_list(\"../data/music_meta_cqs.txt\")\n",
396 |     "cq_neg_x = read_list(\"../data/music_meta_cqs_neg.txt\")\n",
397 |     "\n",
398 |     "cq_pos_label = [1] * len(cq_pos_x)\n",
399 |     "cq_neg_label = [0] * len(cq_neg_x)\n",
400 |     "\n",
401 |     "print(f\"Loaded {len(cq_pos_x)} positive and {len(cq_neg_x)} negative CQs\")"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": 29,
407 |    "metadata": {},
408 |    "outputs": [
409 |     {
410 |      "name": "stderr",
411 |      "output_type": "stream",
412 |      "text": [
413 |       "100%|██████████| 28/28 [02:34<00:00,  5.53s/it]\n"
414 |      ]
415 |     },
416 |     {
417 |      "data": {
418 |       "text/plain": [
419 |        "{'Which is the composer of a musical piece?': ('Yes', ''),\n",
420 |        " 'Is the composer of a musical piece known?': ('Yes',\n",
421 |        "  ', the ontology described can address the competency question \"Is the composer of a musical piece known?\" The ontology includes classes and properties related to composers, compositions, and creative processes, allowing for the representation and retrieval of information about the composer of a musical piece. By querying the ontology, it is possible to determine whether the composer of a musical piece is known or not.'),\n",
422 |        " 'Which are the members of a music ensemble?': ('Yes', ''),\n",
423 |        " 'Which role a music artist played within a music ensemble?': ('Yes', ''),\n",
424 |        " 'In which time interval has a music artist been a member of a music ensemble?': ('Yes',\n",
425 |        "  ''),\n",
426 |        " 'Where was a music ensemble formed?': ('Yes', ''),\n",
427 |        " 'Which award was a music artist nominated for?': ('Yes', ''),\n",
428 |        " 'Which award was received by a music artist?': ('Yes',\n",
429 |        "  ', the ontology can address the competency question \"Which award was received by a music artist?\" The ontology includes the class \"Award\" and the relation \"Received award\" which connects a MusicArtist to the Award they have received. This allows for querying the ontology to find the award(s) received by a specific music artist.'),\n",
430 |        " 'Which music artists has a music artist been influenced by?': ('Yes', ''),\n",
431 |        " 'Which music artist has a music artist collaborated with?': ('Yes',\n",
432 |        "  ', the ontology can address the competency question \"Which music artist has a music artist collaborated with?\" The ontology includes the \"Has collaborated with\" relation, which specifies the collaboration between two MusicArtists. This relation allows for querying and retrieving information about the collaborations between music artists.'),\n",
433 |        " 'Which is the start date of the activity of a music artist?': ('No', ''),\n",
434 |        " 'Which is the end date of the activity of a music artist?': ('No', ''),\n",
435 |        " 'Which is the name of a music artist?': ('Yes', ''),\n",
436 |        " 'Which is the alias of a music artist?': ('No', ''),\n",
437 |        " 'Which is the language of the name/alias of a music artist?': ('No', ''),\n",
438 |        " 'Which music dataset has a music algorithm been trained on?': ('Yes', ''),\n",
439 |        " 'Which is the process that led to the creation of a musical piece?': ('Yes',\n",
440 |        "  ''),\n",
441 |        " 'In which time interval did the creation process took place?': ('Yes',\n",
442 |        "  ', the ontology described can address the competency question \"In which time interval did the creation process take place?\" The ontology includes classes and properties related to the creative process, such as CreativeProcess, CreativeAction, and AnnotationMethod. It also includes properties like \"isDerivedFrom\" and \"Creates\" that can be used to track the creation process and infer the time interval in which it took place.'),\n",
443 |        " 'Where did the creation process took place?': ('Yes', ''),\n",
444 |        " 'Which are the creative actions composing the creation process of a musical piece?': ('Yes',\n",
445 |        "  ''),\n",
446 |        " 'Which task was executed by a creative action?': ('Yes', ''),\n",
447 |        " 'Which are the parts of a musical piece?': ('Yes', ''),\n",
448 |        " 'Which collection is a musical piece member of?': ('Yes',\n",
449 |        "  ', the ontology can address the competency question \"Which collection is a musical piece member of?\"'),\n",
450 |        " 'Where was a musical piece performed?': ('Yes',\n",
451 |        "  ', the ontology described can address the competency question \"Where was a musical piece performed?\" The ontology includes classes such as Live Performance, Studio Performance, Musical Performance, and Broadcasting Situation, which are all relevant to capturing information about the location or context in which a musical piece is performed. Additionally, the ontology includes properties such as Is broadcasted in, Is part of release, and Publication situation, which can be used to specify the specific situations or contexts in which a musical piece is performed or made available to the public.'),\n",
452 |        " 'When was a musical piece performed?': ('Yes', ''),\n",
453 |        " 'Which music artists took part to a musical performance?': ('Yes', ''),\n",
454 |        " 'Which is the recording process that recorded a musical performance?': ('Yes',\n",
455 |        "  ', the ontology can address the competency question \"Which is the recording process that recorded a musical performance?\" The ontology includes the class \"RecordingProcess\" and the property \"isRecordedBy\" which connects a RecordingProcess to a MusicEntity or MusicalPerformance. By querying the ontology using this property, it is possible to determine the recording process that recorded a specific musical performance.'),\n",
456 |        " 'Which is the recording produced by a recording process?': ('Yes', '')}"
457 |       ]
458 |      },
459 |      "execution_count": 29,
460 |      "metadata": {},
461 |      "output_type": "execute_result"
462 |     }
463 |    ],
464 |    "source": [
465 |     "cq_dict_pos = test_competency_questions(\n",
466 |     "    verb, cq_pos_x, chat_interface=interface)\n",
467 |     "cq_dict_pos"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": 32,
473 |    "metadata": {},
474 |    "outputs": [
475 |     {
476 |      "name": "stderr",
477 |      "output_type": "stream",
478 |      "text": [
479 |       "  0%|          | 0/28 [00:00<?, ?it/s]"
480 |      ]
481 |     },
482 |     {
483 |      "name": "stderr",
484 |      "output_type": "stream",
485 |      "text": [
486 |       "100%|██████████| 28/28 [02:45<00:00,  5.92s/it]\n"
487 |      ]
488 |     },
489 |     {
490 |      "data": {
491 |       "text/plain": [
492 |        "{'In which year did a music artist achieve their highest popularity within a music ensemble?': ('No',\n",
493 |        "  ''),\n",
494 |        " 'To which record label is a music ensemble currently signed?': ('No', ''),\n",
495 |        " 'Which music critic gave a music artist a negative review?': ('No', ''),\n",
496 |        " 'For which category did a music artist lose an award?': ('No', ''),\n",
497 |        " 'Which music artists are considered rivals of a music artist?': ('No', ''),\n",
498 |        " 'To which music artist has a music artist been compared?': ('No', ''),\n",
499 |        " 'Is a music artist currently touring?': ('No',\n",
500 |        "  \", the ontology described does not provide explicit information or classes related to a music artist's touring activities. It focuses more on describing music metadata related to artists, compositions, performances, recordings, and other aspects of the music industry. To address the question of whether a music artist is currently touring, additional information or classes related to tour schedules, concert dates, or performance locations would need to be included in the ontology.\"),\n",
501 |        " \"What is the title of a music artist's most popular song?\": ('No', ''),\n",
502 |        " \"Which language is most used in a music artist's lyrics?\": ('No',\n",
503 |        "  \". The ontology described does not include any classes or properties related to language or lyrics analysis. It focuses more on the structural and organizational aspects of music metadata, such as compositions, performances, recordings, and their relationships. To address the question of which language is most used in a music artist's lyrics, additional classes and properties related to language analysis and text processing would need to be included in the ontology.\"),\n",
504 |        " 'Does a music algorithm favor a specific genre?': ('Yes',\n",
505 |        "  ', the ontology described can address the competency question \"Does a music algorithm favor a specific genre?\" The ontology includes the necessary classes and relations to represent music algorithms, music genres, and the relationship between them. By using the \"isTrainedOn\" and \"isTrainedBy\" relations, it is possible to infer whether a music algorithm has been trained on a specific music genre or if a music genre has influenced the training of a music algorithm.'),\n",
506 |        " 'What was the intended audience for a musical piece?': ('No', ''),\n",
507 |        " 'Which emotions does the musical piece attempt to evoke?': ('No',\n",
508 |        "  '. The ontology described does not explicitly include classes or properties related to emotions or the attempt to evoke emotions in musical pieces.'),\n",
509 |        " 'Who are the parents of a music artist?': ('No', ''),\n",
510 |        " 'Where did a music artist study?': ('No', ''),\n",
511 |        " 'Is a music work associated to any case of plagiarism?': ('No',\n",
512 |        "  ', the ontology described does not directly address the competency question \"Is a music work associated with any case of plagiarism?\" The ontology focuses on describing music metadata related to artists, compositions, performances, and recordings, but it does not include specific concepts or relations related to plagiarism detection or identifying cases of plagiarism in music works. To address the competency question, additional concepts and relations related to plagiarism detection and analysis would need to be added to the ontology.'),\n",
513 |        " 'On which streaming platforms can I find a recording?': ('No', ''),\n",
514 |        " 'How many listeners did a piece receive in a specific country?': ('No', ''),\n",
515 |        " 'Is a piece similar to another?': ('No',\n",
516 |        "  ', the ontology described does not explicitly address the question of whether a piece is similar to another. The ontology focuses on describing music metadata related to artists, compositions, performances, and recordings, but it does not include specific classes or properties for comparing or determining similarity between musical pieces.'),\n",
517 |        " 'When was the album first sold?': ('Yes',\n",
518 |        "  ', the ontology described can address the competency question \"When was the album first sold?\" The ontology includes classes and properties related to albums, such as Release, Publication Situation, and Publisher. By using these classes and properties, it is possible to model the publication and release of albums, including the specific situation or context in which they are made available to the public. The ontology can capture information about the timing, location, and associated entities involved in the publication process, allowing for the inference of when the album was first sold.'),\n",
519 |        " 'Who is the lead of a music group?': ('No', ''),\n",
520 |        " 'Was a song wronly attributed to an artist?': ('Yes',\n",
521 |        "  ', the ontology described can address the competency question \"Was a song wrongly attributed to an artist?\"'),\n",
522 |        " 'What is the cover art of a release?': ('No', ''),\n",
523 |        " 'What is the sampling rate of an audio recording?': ('No',\n",
524 |        "  '. The ontology described does not include any classes or properties related to audio recording specifications such as sampling rate.'),\n",
525 |        " 'In which language has a song been translated?': ('No', ''),\n",
526 |        " 'What is the ISNI code associated to a music artist?': ('No', ''),\n",
527 |        " 'What is the length of a recording in seconds?': ('No', ''),\n",
528 |        " 'Is there any recommendation associated to an album?': ('No', '.'),\n",
529 |        " 'What is the current average price of a release?': ('No', '')}"
530 |       ]
531 |      },
532 |      "execution_count": 32,
533 |      "metadata": {},
534 |      "output_type": "execute_result"
535 |     }
536 |    ],
537 |    "source": [
538 |     "cq_dict_neg = test_competency_questions(\n",
539 |     "    verb, cq_neg_x, chat_interface=interface)\n",
540 |     "cq_dict_neg"
541 |    ]
542 |   },
543 |   {
544 |    "cell_type": "code",
545 |    "execution_count": 33,
546 |    "metadata": {},
547 |    "outputs": [],
548 |    "source": [
549 |     "cq_original_x, cq_original_preds = split_cq_test_data(cq_dict_pos)\n",
550 |     "cq_negative_x, cq_negative_preds = split_cq_test_data(cq_dict_neg)"
551 |    ]
552 |   },
553 |   {
554 |    "cell_type": "code",
555 |    "execution_count": 38,
556 |    "metadata": {},
557 |    "outputs": [],
558 |    "source": [
559 |     "cq_labels = cq_pos_label + cq_neg_label\n",
560 |     "cq_preds = cq_original_preds + cq_negative_preds"
561 |    ]
562 |   },
563 |   {
564 |    "cell_type": "code",
565 |    "execution_count": 48,
566 |    "metadata": {},
567 |    "outputs": [
568 |     {
569 |      "data": {
570 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmwAAAHWCAYAAADKLNWrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUmUlEQVR4nO3deZyNdf/H8fcZZs4MZobBbExjXyb7vnQP7khTubUQcVvulDtkTUoSJVvCREIqoxpRja1FIVsK2ctyWzKYMnNTlskwY8Z8f3/4OXenM5jDYS7j9fS4Ho/OtXzP50xN5+Pzub7fy2aMMQIAAIBleeV1AAAAALgyEjYAAACLI2EDAACwOBI2AAAAiyNhAwAAsDgSNgAAAIsjYQMAALA4EjYAAACLI2EDAACwOBI2AAAAiyNhAwAAkDR27FjVr19f/v7+Cg4O1oMPPqi9e/c6ndO9e3fZbDanrVGjRlcdOyEhQVFRUbLb7YqKitLChQvdio2EDQAAQNKaNWvUp08fbdiwQcuXL1dWVpbuuecepaWlOZ137733Kjk52bF9+eWXVxx3/fr16tChg7p06aIdO3aoS5cuevTRR7Vx48Zcx2bj4e8AAACujh8/ruDgYK1Zs0bR0dGSLlbYTp06pUWLFuV6nA4dOig1NVVLly517Lv33ntVrFgxffTRR7kao6BbkeOWlJ2draNHj8rf3182my2vwwEA3GKMMfrjjz8UHh4uL68b25xLT0/X+fPnPTaeMcblu89ut8tut1/12tOnT0uSgoKCnPavXr1awcHBKlq0qJo1a6bRo0crODj4suOsX79eAwcOdNrXunVrxcbG5vJTkLDdFo4ePaqIiIi8DgMAcItLSkpS6dKlb9j46enp8vMvLmWd9diYRYoU0ZkzZ5z2jRgxQiNHjrzidcYYDRo0SHfddZeqVavm2B8TE6P27dsrMjJSiYmJGj58uP7+979ry5Ytl00CU1JSFBIS4rQvJCREKSkpuf4cJGy3AX9/f0mST1Q32Qr45HE0gDUdWf16XocAWNYfqamqUDbC8X1yo5w/f17KOit7VDfJE99XF87rzO45SkpKUkBAgGN3bqprTz/9tH788UetW7fOaX+HDh0c/1ytWjXVq1dPkZGR+uKLL/Twww9fdry/VvlyqvxdCQnbbeDSfxC2Aj4kbMBl/Pl/5gBydtNuqyno65HvK2O72L4NCAhw63e8b9++WrJkidauXXvVimJYWJgiIyO1f//+y54TGhrqUk07duyYS9XtSpglCgAAoItVr6effloLFizQypUrVbZs2ate8/vvvyspKUlhYWGXPadx48Zavny5075ly5apSZMmuY6NChsAALAWmyRPVPPcHKJPnz6aO3euFi9eLH9/f0dVLDAwUH5+fjpz5oxGjhypRx55RGFhYTp06JBeeOEFlShRQg899JBjnK5du6pUqVIaO3asJKl///6Kjo7W+PHj1bZtWy1evFgrVqxwabdeCRU2AABgLTYvz21umD59uk6fPq3mzZsrLCzMsc2fP1+SVKBAAf30009q27atKlWqpG7duqlSpUpav3690/19R44cUXJysuN1kyZNNG/ePM2ePVs1atRQXFyc5s+fr4YNG+Y6NipsAAAAutgSvRI/Pz99/fXXVx1n9erVLvvatWundu3aXWtoJGwAAMBibDYPtUTzz9qjJGwAAMBarqGdedlx8on880kAAADyKSpsAADAWmiJuiBhAwAAFuOhlmg+aiTmn08CAACQT1FhAwAA1kJL1AUVNgAAAIujwgYAAKyFZT1ckLABAABroSXqIv+kngAAAPkUFTYAAGAttERdkLABAABroSXqIv+kngAAAPkUFTYAAGAttERdkLABAABrsdk8lLDREgUAAMBNQoUNAABYi5ft4uaJcfIJKmwAAAAWR4UNAABYC5MOXJCwAQAAa2EdNhf5J/UEAADIp6iwAQAAa6El6oKEDQAAWAstURf5J/UEAADIp6iwAQAAa6El6oKEDQAAWAstURf5J/UEAADIp6iwAQAAa6El6iL/fBIAAIB8igobAACwFu5hc0HCBgAALMZDLdF81EjMP58EAAAgn6LCBgAArIWWqAsSNgAAYC02m4dmieafhI2WKAAAgKSxY8eqfv368vf3V3BwsB588EHt3bvXcTwzM1PPPfecqlevrsKFCys8PFxdu3bV0aNHrzhuXFycbDaby5aenp7r2EjYAACAtVxah80TmxvWrFmjPn36aMOGDVq+fLmysrJ0zz33KC0tTZJ09uxZbd26VcOHD9fWrVu1YMEC7du3T//4xz+uOnZAQICSk5OdNl9f31zHRksUAABYSx7dw/bVV185vZ49e7aCg4O1ZcsWRUdHKzAwUMuXL3c6Z+rUqWrQoIGOHDmiO+644wqh2BQaGupWPH9GhQ0AAORrqampTltGRkaurjt9+rQkKSgo6Irn2Gw2FS1a9IpjnTlzRpGRkSpdurQeeOABbdu2LdfxSyRsAADAajzcEo2IiFBgYKBjGzt27FVDMMZo0KBBuuuuu1StWrUcz0lPT9fzzz+vTp06KSAg4LJjValSRXFxcVqyZIk++ugj+fr6qmnTptq/f3+ufyS0RAEAQL6WlJTklFDZ7farXvP000/rxx9/1Lp163I8npmZqY4dOyo7O1tvvfXWFcdq1KiRGjVq5HjdtGlT1alTR1OnTtWUKVNy9RlI2AAAgLV4+B62gICAK1bA/qpv375asmSJ1q5dq9KlS7scz8zM1KOPPqrExEStXLnSrbElycvLS/Xr16fCBgAAbmHXMMPzsuO4wRijvn37auHChVq9erXKli3rcs6lZG3//v1atWqVihcv7nZYxhht375d1atXz/U1JGwAAACS+vTpo7lz52rx4sXy9/dXSkqKJCkwMFB+fn7KyspSu3bttHXrVn3++ee6cOGC45ygoCD5+PhIkrp27apSpUo57pV7+eWX1ahRI1WsWFGpqamaMmWKtm/frmnTpuU6NhI2AABgLXm0rMf06dMlSc2bN3faP3v2bHXv3l2//PKLlixZIkmqVauW0zmrVq1yXHfkyBF5ef2vunfq1Cn17NlTKSkpCgwMVO3atbV27Vo1aNAg17GRsAEAAEu59CQADwzk1unGmCseL1OmzFXPkaTVq1c7vZ48ebImT57sVix/xbIeAAAAFkeFDQAAWEpeVdisjIQNAABYi+3/N0+Mk0/QEgUAALA4KmwAAMBSaIm6ImEDAACWQsLmipYoAACAxVFhAwAAlkKFzRUVNgAAAIujwgYAACyFCpsrEjYAAGAtrMPmgpYoAACAxVFhAwAAlkJL1BUJGwAAsBSbTR5K2K5/CKugJQoAAGBxVNgAAICl2OShlmg+KrGRsAEAAEvhHjZXtEQBAAAsjgobAACwFtZhc0GFDQAAwOKosAEAAGvx0D1sJh/dw0bCBgAALMVTkw48M9PUGmiJAgAAWBwVNgAAYClU2FyRsAEAAGthlqgLWqIAAAAWR4UNAABYCi1RVyRsAADAUkjYXNESBQAAsDgqbAAAwFKosLmiwgYAAGBxVNgAAIClUGFzRcIGAACshXXYXNASBQAAsDgSNgAAYCmXWqKe2NwxduxY1a9fX/7+/goODtaDDz6ovXv3Op1jjNHIkSMVHh4uPz8/NW/eXLt27brq2AkJCYqKipLdbldUVJQWLlzoVmwkbAAAwFLyKmFbs2aN+vTpow0bNmj58uXKysrSPffco7S0NMc5r732miZNmqQ333xTmzZtUmhoqFq1aqU//vjjsuOuX79eHTp0UJcuXbRjxw516dJFjz76qDZu3Jj7n4kxxrj1aXDLSU1NVWBgoOzVn5StgE9ehwNY0slNb+Z1CIBlpaamKqR4oE6fPq2AgIAb+j6BgYEK6xEvL59C1z1e9vmzSn638zXHffz4cQUHB2vNmjWKjo6WMUbh4eEaMGCAnnvuOUlSRkaGQkJCNH78eP373//OcZwOHTooNTVVS5cudey79957VaxYMX300Ue5ioUKGwAAsJS8qrD91enTpyVJQUFBkqTExESlpKTonnvucZxjt9vVrFkzff/995cdZ/369U7XSFLr1q2veM1fMUsUAABYi4dniaampjrtttvtstvtV7zUGKNBgwbprrvuUrVq1SRJKSkpkqSQkBCnc0NCQnT48OHLjpWSkpLjNZfGyw0qbAAAIF+LiIhQYGCgYxs7duxVr3n66af1448/5tiy/Gvlzhhz1WretVzzZ1TYAACApXh64dykpCSne9iuVl3r27evlixZorVr16p06dKO/aGhoZIuVszCwsIc+48dO+ZSQfuz0NBQl2ra1a75KypsAAAgXwsICHDaLpewGWP09NNPa8GCBVq5cqXKli3rdLxs2bIKDQ3V8uXLHfvOnz+vNWvWqEmTJpd9/8aNGztdI0nLli274jV/RYUNuEEGdr9HD7SoqYqRIUrPyNQPPx7UyDcX68DhY45zpo34pzo90Mjpuk0/Jeqexyfe7HCBPPfup9/qvYRvlZR8QpJUpVyonu0Ro1ZN78zjyHCz5dWjqfr06aO5c+dq8eLF8vf3d1TFAgMD5efnJ5vNpgEDBmjMmDGqWLGiKlasqDFjxqhQoULq1KmTY5yuXbuqVKlSjtZr//79FR0drfHjx6tt27ZavHixVqxYoXXr1uU6tts2YTt06JDKli2rbdu2qVatWpc9r3nz5qpVq5ZiY2NvWmzIH5rUqaB3PlmrbbsPq2CBAnqxVxstmPq0Gj36qs6mn3ect+L7XerzyoeO1+czL+RFuECeCw8uqhFPt1W50iUkSR99sVGdB7+tNR8+r6rlw65yNfITmzyUsLk5c2H69OmSLn73/9ns2bPVvXt3SdKQIUN07tw59e7dWydPnlTDhg21bNky+fv7O84/cuSIvLz+18Rs0qSJ5s2bpxdffFHDhw9X+fLlNX/+fDVs2DD3n8Xq67B1795dc+bMkSQVLFhQERERevjhh/Xyyy+rcOHC1zzuhQsXdPz4cZUoUUIFCxbU6tWr1aJFC508eVJFixZ1nHfixAl5e3s7/YvwhFWrVmnChAnauHGjzp07pzJlyigmJkaDBg1SqVKlHDFOmTJFs2fP1r59++Tr66vGjRvrxRdfVNOmTXP9XqzDZg3FixbRgeXjdH/Pyfp+28+SLlbYAov46Z/Pzsrj6MA6bNZU9u4heqXfg+rSNvetI3jezV6HLeLf8+Vl98A6bBlnlTSzww2P+2a4Je5hu/fee5WcnKyDBw/q1Vdf1VtvvaXBgwdf15gFChRQaGioCha8cpExKCjI48nazJkz1bJlS4WGhiohIUG7d+/WjBkzdPr0aU2ceLEVZoxRx44d9corr6hfv37as2eP1qxZo4iICDVv3lyLFi3yaEy48QKK+EqSTqaeddp/V92K2vf1WG369CXFDntMJYoVyYvwAEu5cCFbCcs26+y586pfvezVL0C+YpV12KzklkjY7Ha7QkNDFRERoU6dOqlz585atGiRMjIy1K9fPwUHB8vX11d33XWXNm3a5Lju5MmT6ty5s0qWLCk/Pz9VrFhRs2fPlnSxJWqz2bR9+3YdOnRILVq0kCQVK1ZMNpvNUfps3ry5BgwYIEkaOnSoGjVyvt9IkmrUqKERI0Y4Xs+ePVtVq1aVr6+vqlSporfeestx7JdfflG/fv3Ur18/vffee2revLnKlCmj6OhovfPOO3rppZckSR9//LE+/fRTvf/++3riiSdUtmxZ1axZU2+//bb+8Y9/6IknnnB6VAasb/TAR7R+2wHt+TnZsW/F97vVc/gcte09RcPfWKA6UZFaMr2ffLxv27sVcJvbdeBXlY4epJCmAzRo7Hx9MOFJVSlHO/S2Y/Pglk/cEgnbX/n5+SkzM1NDhgxRQkKC5syZo61bt6pChQpq3bq1Tpy4eMPq8OHDtXv3bi1dulR79uzR9OnTVaJECZfxIiIilJCQIEnau3evkpOT9cYbb7ic17lzZ23cuFE///yzY9+uXbv0008/qXPnzpKkWbNmadiwYRo9erT27NmjMWPGaPjw4Y627ieffKLz589ryJAhOX62S+3YuXPnqlKlSmrTpo3LOc8884x+//13lxknl2RkZCg1NdVpQ96aMORR3VkhXE+8GOe0f+HyrVr23S7t+TlZX327U+37vaXydwTrnru4yRq3p4qRIVobP1TL33tGjz9yl3qP/ED/OZh89QuBfO6WS9h++OEHzZ07Vy1atND06dM1YcIExcTEKCoqSrNmzZKfn5/effddSRdv+qtdu7bq1aunMmXKqGXLljkmQAUKFHA8diI4OFihoaEKDAx0Oa9atWqqUaOG5s6d69gXHx+v+vXrq1KlSpKkUaNGaeLEiXr44YdVtmxZPfzwwxo4cKBmzpwpSdq/f78CAgKc1m/Jyb59+1S1atUcj13av2/fvhyPjx071mmBwIiIiCu+F26s8YPbKya6utr0mqKjx05d8dz//p6qpOQTKh9R8uYEB1iMj3dBlYsoqdpRkRrxdFtVq1hKM+atzuuwcJPREnV1SyRsn3/+uYoUKeK46T46Olp9+/ZVZmam08333t7eatCggfbs2SNJ6tWrl+bNm6datWppyJAhbj2z63I6d+6s+Ph4SRfvM/voo48c1bXjx48rKSlJPXr0UJEiRRzbq6++6qjKubuy8ZX4+OQ8gWDo0KE6ffq0Y0tKSvLI+8F9rz3bXg+0qKl/9JqiI0d/v+r5xQILq1RIMaX8RlUUkC7+P/P8+ay8DgM3GQmbq1viRplL1TRvb2+Fh4fL29tbO3bskHTlRz3ExMTo8OHD+uKLL7RixQrdfffd6tOnj15//fVrjqVTp056/vnntXXrVp07d05JSUnq2LGjJCk7O1vSxbboX6fqFihQQJJUqVIlnT59WsnJyVesslWsWFG7d+/O8dilhPRSVe+vcvOMNNx4rz/3qNq1rqdOg9/WmbPpCi5+cfJK6pl0pWdkqrCfj57reb8+W7ldKb+d1h1hxfVSnzb6/dQZfbF6Rx5HD9x8r0xbopZNolQ6pJj+OJuuBcu2aN3W/fp0Su+8Dg3Ic7dEwla4cGFVqFDBaV+FChXk4+OjdevWORary8zM1ObNmx2TBCSpZMmS6t69u7p3766//e1vevbZZ3NM2C5Vqy5cuPIaWKVLl1Z0dLTi4+N17tw5tWzZ0vFoiZCQEJUqVUoHDx50VN3+ql27dnr++ef12muvafLkyS7HT506paJFi+qxxx5Tp06d9Nlnn7m0cSdOnKjw8HC1atXqirEib/VoFy1J+mLmAKf9vV/+QB99vlEXso2iyoer430NFOjvp//+lqpvt+zT4y+8pzNnM/IgYiBvHT/xh54a8b7++1uqAor46s4KpfTplN5q0TDn20OQf9lsFzdPjJNf3BIJW04KFy6sXr166dlnn1VQUJDuuOMOvfbaazp79qx69OghSXrppZdUt25d3XnnncrIyNDnn39+2fvCIiMjZbPZ9Pnnn+u+++6Tn5+fihTJeXmFzp07a+TIkTp//rxL0jVy5Ej169dPAQEBiomJUUZGhjZv3qyTJ09q0KBBioiI0OTJk/X0008rNTVVXbt2VZkyZfTLL7/o/fffV5EiRTRx4kR17NhRH3/8sbp166YJEybo7rvvVmpqqqZNm6bPP/9cX331lby9vT37Q4VHFav/9BWPp2dkql2/aTcpGsD6pg7P+S+6AG6Re9guZ9y4cXrkkUfUpUsX1alTRwcOHNDXX3+tYsWKSbpYNRs6dKhq1Kih6OhoFShQQPPmzctxrFKlSunll1/W888/r5CQED399OW/bNu3b6/ff/9dZ8+e1YMPPuh07IknntA777yjuLg4Va9eXc2aNVNcXJzT88h69+6tZcuW6ddff9VDDz2kKlWq6IknnlBAQIBjfTmbzaZPPvlEL7zwgiZPnqzKlSurZs2a+vTTT7Vt2zbHMiQAAOQ3FytsnriHLa8/iedY/kkH+J+tW7eqZcuW6tGjhyZMmJDr63jSAXB1POkAuLyb/aSDcv0+VQH7tT/N6JILGWk6OKUdTzrAzVWnTh198803Kly4sNNacAAAIH+7Ze9hu13Vrl1btWvXzuswAAC4YTy1JAfLegAAANwgzBJ1RUsUAADA4qiwAQAAS/HyssnL6/rLY8YDY1gFCRsAALAUWqKuaIkCAABYHBU2AABgKcwSdUXCBgAALIWWqCtaogAAABZHhQ0AAFgKLVFXVNgAAAAsjgobAACwFCpsrkjYAACApTDpwBUtUQAAAIujwgYAACzFJg+1RJV/SmwkbAAAwFJoibqiJQoAAGBxVNgAAIClMEvUFQkbAACwFFqirmiJAgAAWBwVNgAAYCm0RF1RYQMAALA4KmwAAMBSuIfNFQkbAACwFFqirmiJAgAASFq7dq3atGmj8PBw2Ww2LVq0yOn4pUTyr9uECRMuO2ZcXFyO16Snp7sVGxU2AABgLR5qibr7ZKq0tDTVrFlT//rXv/TII4+4HE9OTnZ6vXTpUvXo0SPHc/8sICBAe/fuddrn6+vrVmwkbAAAwFLyqiUaExOjmJiYyx4PDQ11er148WK1aNFC5cqVu2ocf73WXbREAQBAvpaamuq0ZWRkXPeY//3vf/XFF1+oR48eVz33zJkzioyMVOnSpfXAAw9o27Ztbr8fCRsAALCUS7NEPbFJUkREhAIDAx3b2LFjrzvGOXPmyN/fXw8//PAVz6tSpYri4uK0ZMkSffTRR/L19VXTpk21f/9+t96PligAALAUT7dEk5KSFBAQ4Nhvt9uve+z33ntPnTt3vuq9aI0aNVKjRo0cr5s2bao6depo6tSpmjJlSq7fj4QNAADkawEBAU4J2/X69ttvtXfvXs2fP9/ta728vFS/fn23K2y0RAEAgKV4uiXqae+++67q1q2rmjVrun2tMUbbt29XWFiYW9dRYQMAANDFyQEHDhxwvE5MTNT27dsVFBSkO+64Q9LFCQyffPKJJk6cmOMYXbt2ValSpRz3yb388stq1KiRKlasqNTUVE2ZMkXbt2/XtGnT3IqNhA0AAFhKXi3rsXnzZrVo0cLxetCgQZKkbt26KS4uTpI0b948GWP02GOP5TjGkSNH5OX1vwbmqVOn1LNnT6WkpCgwMFC1a9fW2rVr1aBBA/c+izHGuHUFbjmpqakKDAyUvfqTshXwyetwAEs6uenNvA4BsKzU1FSFFA/U6dOnPXovWE7vExgYqMajv1ZB38LXPV5WeprWD2t9w+O+GbiHDQAAwOJoiQIAAEvx1ISBfPTsdxI2AABgLXl1D5uV0RIFAACwOCpsAADAUmiJuiJhAwAAlkJL1BUtUQAAAIujwgYAACzFJg+1RK9/CMugwgYAAGBxVNgAAICleNls8vJAic0TY1gFCRsAALAUZom6oiUKAABgcVTYAACApbCshysSNgAAYCletoubJ8bJL2iJAgAAWBwVNgAAYC02D7Uz81GFjYQNAABYCrNEXdESBQAAsDgqbAAAwFJs///HE+PkFyRsAADAUpgl6oqWKAAAgMVRYQMAAJbCwrmuqLABAABYHBU2AABgKSzr4YqEDQAAWIqXzSYvD2RbnhjDKnKVsE2ZMiXXA/br1++agwEAAICrXCVskydPztVgNpuNhA0AAFwXWqKucpWwJSYm3ug4AAAAJDFLNCfXPEv0/Pnz2rt3r7KysjwZDwAAAP7C7YTt7Nmz6tGjhwoVKqQ777xTR44ckXTx3rVx48Z5PEAAAHB7udQS9cSWX7idsA0dOlQ7duzQ6tWr5evr69jfsmVLzZ8/36PBAQCA28+lWaKe2PILt5f1WLRokebPn69GjRo59YajoqL0888/ezQ4AAAAXEPCdvz4cQUHB7vsT0tLy1c39wEAgLxh+//NE+PkF263ROvXr68vvvjC8fpSkjZr1iw1btzYc5EBAABA0jUkbGPHjtWwYcPUq1cvZWVl6Y033lCrVq0UFxen0aNH34gYAQDAbeTSsh6e2Nyxdu1atWnTRuHh4bLZbFq0aJHT8e7du7uM36hRo6uOm5CQoKioKNntdkVFRWnhwoVuxSVdQ8LWpEkTfffddzp79qzKly+vZcuWKSQkROvXr1fdunXdDgAAAODPvGye29yRlpammjVr6s0337zsOffee6+Sk5Md25dffnnFMdevX68OHTqoS5cu2rFjh7p06aJHH31UGzdudCu2a3qWaPXq1TVnzpxruRQAAMCSYmJiFBMTc8Vz7Ha7QkNDcz1mbGysWrVqpaFDh0q6uNrGmjVrFBsbq48++ijX41xTwnbhwgUtXLhQe/bskc1mU9WqVdW2bVsVLMiz5AEAwPWx8pMOVq9ereDgYBUtWlTNmjXT6NGjc5yMecn69es1cOBAp32tW7dWbGysW+/rdoa1c+dOtW3bVikpKapcubIkad++fSpZsqSWLFmi6tWruzskAACAE0/mWqmpqU6v7Xa77Ha72+PExMSoffv2ioyMVGJiooYPH66///3v2rJly2XHS0lJUUhIiNO+kJAQpaSkuPXebt/D9sQTT+jOO+/UL7/8oq1bt2rr1q1KSkpSjRo11LNnT3eHAwAAuKEiIiIUGBjo2MaOHXtN43To0EH333+/qlWrpjZt2mjp0qXat2+f0+oZOflrpc8Y43b1z+0K244dO7R582YVK1bMsa9YsWIaPXq06tev7+5wAAAATjzdEk1KSlJAQIBj/7VU13ISFhamyMhI7d+//7LnhIaGulTTjh075lJ1uxq3K2yVK1fWf//7X5f9x44dU4UKFdwdDgAAwImnZ4kGBAQ4bZ5K2H7//XclJSUpLCzssuc0btxYy5cvd9q3bNkyNWnSxK33ylWF7c+93zFjxqhfv34aOXKkY+2RDRs26JVXXtH48ePdenMAAACrOHPmjA4cOOB4nZiYqO3btysoKEhBQUEaOXKkHnnkEYWFhenQoUN64YUXVKJECT300EOOa7p27apSpUo52q79+/dXdHS0xo8fr7Zt22rx4sVasWKF1q1b51ZsuUrYihYt6lSaNMbo0UcfdewzxkiS2rRpowsXLrgVAAAAwJ/l1SzRzZs3q0WLFo7XgwYNkiR169ZN06dP108//aT3339fp06dUlhYmFq0aKH58+fL39/fcc2RI0fk5fW/BmaTJk00b948vfjiixo+fLjKly+v+fPnq2HDhm7FlquEbdWqVW4NCgAAcKtp3ry5owiVk6+//vqqY6xevdplX7t27dSuXbvrCS13CVuzZs2u600AAAByi4e/u7rmlW7Pnj2rI0eO6Pz58077a9Socd1BAQCA25eXzSYvD7REPTGGVbidsB0/flz/+te/tHTp0hyPcw8bAACAZ7m9rMeAAQN08uRJbdiwQX5+fvrqq680Z84cVaxYUUuWLLkRMQIAgNuIzea5Lb9wu8K2cuVKLV68WPXr15eXl5ciIyPVqlUrBQQEaOzYsbr//vtvRJwAAOA2YeVnieYVtytsaWlpjoecBgUF6fjx45Kk6tWra+vWrZ6NDgAAANf2pIO9e/dKkmrVqqWZM2fq119/1YwZM6640i8AAEBu0BJ15XZLdMCAAUpOTpYkjRgxQq1bt1Z8fLx8fHwUFxfn6fgAAMBthlmirtxO2Dp37uz459q1a+vQoUP6z3/+ozvuuEMlSpTwaHAAAAC4jnXYLilUqJDq1KnjiVgAAAA81s7MRwW23CVsl56llRuTJk265mAAAADgKlcJ27Zt23I1WH6aPgsAAPIGy3q44uHvt5G9y8YrICAgr8MALKlY8xfzOgTAskxWxk19Py9dwzIWlxknv8hPnwUAACBfuu5JBwAAAJ5ES9QVCRsAALAUm03yYpaoE1qiAAAAFkeFDQAAWIqXhypsnhjDKq6pwvbBBx+oadOmCg8P1+HDhyVJsbGxWrx4sUeDAwAAt59L97B5Yssv3E7Ypk+frkGDBum+++7TqVOndOHCBUlS0aJFFRsb6+n4AAAAbntuJ2xTp07VrFmzNGzYMBUoUMCxv169evrpp588GhwAALj9XGqJemLLL9xO2BITE1W7dm2X/Xa7XWlpaR4JCgAAAP/jdsJWtmxZbd++3WX/0qVLFRUV5YmYAADAbezSw989seUXbs8SffbZZ9WnTx+lp6fLGKMffvhBH330kcaOHat33nnnRsQIAABuI142m7w8kG15YgyrcDth+9e//qWsrCwNGTJEZ8+eVadOnVSqVCm98cYb6tix442IEQAA4LZ2TeuwPfnkk3ryySf122+/KTs7W8HBwZ6OCwAA3KZ4+Lur61o4t0SJEp6KAwAAQJLn7j/LRx1R9xO2smXLXnEhuoMHD15XQAAAAHDmdsI2YMAAp9eZmZnatm2bvvrqKz377LOeigsAANymvOShSQfKPyU2txO2/v3757h/2rRp2rx583UHBAAAbm+0RF157H68mJgYJSQkeGo4AAAA/L/rmnTwZ59++qmCgoI8NRwAALhNeeqxUvnp0VRuJ2y1a9d2mnRgjFFKSoqOHz+ut956y6PBAQCA24/N5plFb/NTS9TthO3BBx90eu3l5aWSJUuqefPmqlKliqfiAgAAwP9zK2HLyspSmTJl1Lp1a4WGht6omAAAwG2MSQeu3Jp0ULBgQfXq1UsZGRk3Kh4AAIA8sXbtWrVp00bh4eGy2WxatGiR41hmZqaee+45Va9eXYULF1Z4eLi6du2qo0ePXnHMuLg42Ww2ly09Pd2t2NyeJdqwYUNt27bN3csAAABy5dKkA09s7khLS1PNmjX15ptvuhw7e/astm7dquHDh2vr1q1asGCB9u3bp3/84x9XHTcgIEDJyclOm6+vr1uxuX0PW+/evfXMM8/ol19+Ud26dVW4cGGn4zVq1HB3SAAAAAfb///xxDjuiImJUUxMTI7HAgMDtXz5cqd9U6dOVYMGDXTkyBHdcccdl4/DZrvuW8lynbA9/vjjio2NVYcOHSRJ/fr1cwrEGCObzaYLFy5cV0AAAACelJqa6vTabrfLbrdf97inT5+WzWZT0aJFr3jemTNnFBkZqQsXLqhWrVoaNWqUateu7dZ75TphmzNnjsaNG6fExES33gAAAMAdnl6HLSIiwmn/iBEjNHLkyOsaOz09Xc8//7w6deqkgICAy55XpUoVxcXFqXr16kpNTdUbb7yhpk2baseOHapYsWKu3y/XCZsxRpIUGRmZ68EBAADc5emELSkpySmput7qWmZmpjp27Kjs7OyrrkHbqFEjNWrUyPG6adOmqlOnjqZOnaopU6bk+j3duofNlp/mxwIAgNtCQEDAFatg7sjMzNSjjz6qxMRErVy50u1xvby8VL9+fe3fv9+t69xK2CpVqnTVpO3EiRNuBQAAAPBnl5a+8MQ4nnQpWdu/f79WrVql4sWLuz2GMUbbt29X9erV3brOrYTt5ZdfVmBgoFtvAAAA4I68epbomTNndODAAcfrxMREbd++XUFBQQoPD1e7du20detWff7557pw4YJSUlIkSUFBQfLx8ZEkde3aVaVKldLYsWMlXcydGjVqpIoVKyo1NVVTpkzR9u3bNW3aNLdicyth69ixo4KDg916AwAAgFvB5s2b1aJFC8frQYMGSZK6deumkSNHasmSJZKkWrVqOV23atUqNW/eXJJ05MgReXn9b5nbU6dOqWfPnkpJSVFgYKBq166ttWvXqkGDBm7FluuEjfvXAADAzZBXj6Zq3ry5Y5JlTq507JLVq1c7vZ48ebImT57sXiA5yPWTDnITJAAAADwv1xW27OzsGxkHAACAJMnLZpOXB0psnhjDKtx+NBUAAMCNlFeTDqzM7Ye/AwAA4OaiwgYAAKzFQ5MOPPD8eMsgYQMAAJbiJZu8PJBteWIMq6AlCgAAYHFU2AAAgKXk1TpsVkbCBgAALIVZoq5oiQIAAFgcFTYAAGApLJzrigobAACAxVFhAwAAlsKkA1ckbAAAwFK85KGWKOuwAQAA4GahwgYAACyFlqgrEjYAAGApXvJMCzA/tRHz02cBAADIl6iwAQAAS7HZbLJ5oJ/piTGsgoQNAABYiu3/N0+Mk1/QEgUAALA4KmwAAMBSeDSVKypsAAAAFkeFDQAAWE7+qY15BgkbAACwFBbOdUVLFAAAwOKosAEAAEthHTZXJGwAAMBSeDSVq/z0WQAAAPIlKmwAAMBSaIm6ImEDAACWwqOpXNESBQAAsDgqbAAAwFJoibqiwgYAAGBxVNgAAIClsKyHq/z0WQAAQD5wqSXqic0da9euVZs2bRQeHi6bzaZFixY5HTfGaOTIkQoPD5efn5+aN2+uXbt2XXXchIQERUVFyW63KyoqSgsXLnQrLomEDQAAQJKUlpammjVr6s0338zx+GuvvaZJkybpzTff1KZNmxQaGqpWrVrpjz/+uOyY69evV4cOHdSlSxft2LFDXbp00aOPPqqNGze6FRstUQAAYCl5taxHTEyMYmJicjxmjFFsbKyGDRumhx9+WJI0Z84chYSEaO7cufr3v/+d43WxsbFq1aqVhg4dKkkaOnSo1qxZo9jYWH300Ue5jo0KGwAAsBSbzXObJKWmpjptGRkZbseUmJiolJQU3XPPPY59drtdzZo10/fff3/Z69avX+90jSS1bt36itfkhIQNAADkaxEREQoMDHRsY8eOdXuMlJQUSVJISIjT/pCQEMexy13n7jU5oSUKAAAsxUs2eXmgKXppjKSkJAUEBDj22+32ax7zrxMZjDFXndxwLdf8FQkbAACwlD+3M693HEkKCAhwStiuRWhoqKSLFbOwsDDH/mPHjrlU0P563V+raVe7Jie0RAEAAK6ibNmyCg0N1fLlyx37zp8/rzVr1qhJkyaXva5x48ZO10jSsmXLrnhNTqiwAQAAS7H9/x9PjOOOM2fO6MCBA47XiYmJ2r59u4KCgnTHHXdowIABGjNmjCpWrKiKFStqzJgxKlSokDp16uS4pmvXripVqpTjPrn+/fsrOjpa48ePV9u2bbV48WKtWLFC69atcys2EjYAAGApnm6J5tbmzZvVokULx+tBgwZJkrp166a4uDgNGTJE586dU+/evXXy5Ek1bNhQy5Ytk7+/v+OaI0eOyMvrfw3MJk2aaN68eXrxxRc1fPhwlS9fXvPnz1fDhg3d+yzGGOPex8GtJjU1VYGBgTqUfOK6e/hAfhV+z4i8DgGwLJOVoYyNr+v06dM39Hvk0vfVJxsOqFAR/6tfcBVnz/yh9o0q3PC4bwYqbAAAwFJsHpol6om2qlUw6QAAAMDiqLABAABLyat72KyMhA0AAFgKCZsrWqIAAAAWR4UNAABYSl6tw2ZlJGwAAMBSvGwXN0+Mk1/QEgUAALA4KmwAAMBSaIm6ImEDAACWwixRV7REAQAALI4KGwAAsBSbPNPOzEcFNipsAAAAVkeFDQAAWArLeriiwgbkkanvL1d40/56KXZBXocC3HQDO0XrmxlP6ciXw7Vv4fP68NVOqhBR4rLnTx7UVidXv6qn2jW+iVEir9g8+Ce/IGG7ijJlyig2Njavw0A+s33PYX245HtFVQjP61CAPNGkVhm9s2ij7uk9Uw8PjlPBAl5aMKG7Cvl6u5x7311VVTeqtI4eT82DSAFryNOErXv37rLZbBo3bpzT/kWLFsl2k+fixsXFqWjRoi77N23apJ49e3r8/RISEtS8eXMFBgaqSJEiqlGjhl555RWdOHHCcc65c+c0YsQIVa5cWXa7XSVKlFC7du20a9cuj8eDmyftbIaefvkDTXiuowL9C+V1OECeaD/kfX301Tb959Ax7fw5RX3GLVBEaFHVqlTK6bywEv56rf8D6vnqJ8q6cCGPosXNdmlZD09s+UWeV9h8fX01fvx4nTx5Mq9DyVHJkiVVqJBnv1SHDRumDh06qH79+lq6dKl27typiRMnaseOHfrggw8kSRkZGWrZsqXee+89jRo1Svv27dOXX36pCxcuqGHDhtqwYYNHY8LN88LET3R34yhF16+c16EAlhFQxFeSdPKPs459NptNM15or6nz1uk/h47lVWjIAzYPbvlFnidsLVu2VGhoqMaOHXvZc77//ntFR0fLz89PERER6tevn9LS0hzHk5OTdf/998vPz09ly5bV3LlzXVqZkyZNUvXq1VW4cGFFRESod+/eOnPmjCRp9erV+te//qXTp0/LZrPJZrNp5MiRkpxboo899pg6duzoFFtmZqZKlCih2bNnS5KMMXrttddUrlw5+fn5qWbNmvr0008d5//www8aM2aMJk6cqAkTJqhJkyYqU6aMWrVqpYSEBHXr1k2SFBsbq/Xr1+vzzz/Xo48+qsjISDVo0EAJCQmqWrWqevToIWPMNf/ckTcWrdiqn/b9oqFPtcnrUABLGd07Rut/PKQ9if9LzAY89jdlXcjWzIT1eRgZYA15nrAVKFBAY8aM0dSpU/XLL7+4HP/pp5/UunVrPfzww/rxxx81f/58rVu3Tk8//bTjnK5du+ro0aNavXq1EhIS9Pbbb+vYMee/jXl5eWnKlCnauXOn5syZo5UrV2rIkCGSpCZNmig2NlYBAQFKTk5WcnKyBg8e7BJL586dtWTJEkeiJ0lff/210tLS9Mgjj0iSXnzxRc2ePVvTp0/Xrl27NHDgQP3zn//UmjVrJEnx8fEqUqSIevfunePP41Jbdu7cuWrVqpVq1qzp8jkGDhyo3bt3a8eOHTmOkZGRodTUVKcNee/X/57US7EJmvpSF/naXe/TAW5XE/o/oDvLh+qJUR879tWsFK5/t2usPuMS8jAy5BUv2eRl88CWj2pslljW46GHHlKtWrU0YsQIvfvuu07HJkyYoE6dOmnAgAGSpIoVK2rKlClq1qyZpk+frkOHDmnFihXatGmT6tWrJ0l65513VLFiRadxLl0vSWXLltWoUaPUq1cvvfXWW/Lx8VFgYKBsNptCQ0MvG2fr1q1VuHBhLVy4UF26dJF0MbFq06aNAgIClJaWpkmTJmnlypVq3PjiTKZy5cpp3bp1mjlzppo1a6b9+/erXLly8va+8hf2vn371KJFixyPVa1a1XFOrVq1XI6PHTtWL7/88hXHx833494k/XbyjO7t8bpj34UL2dqw/WfNXvCtDq2aqAIF8vzvUMBNNb7f/YppWlX39XvHaVJB4xqRKlm0sH76+H9/eS5YoIBe7RWjXu2aqGbHiXkRLm4ST7Uz80+6ZpGETZLGjx+vv//973rmmWec9m/ZskUHDhxQfHy8Y58xRtnZ2UpMTNS+fftUsGBB1alTx3G8QoUKKlasmNM4q1at0pgxY7R7926lpqYqKytL6enpSktLU+HChXMVo7e3t9q3b6/4+Hh16dJFaWlpWrx4sebOnStJ2r17t9LT09WqVSun686fP6/atWs7Yr/eCRWXWqE+Pj45Hh86dKgGDRrkeJ2amqqIiIjrek9cv7/VraSVHzzntG/g6LmqEBmiPv+8m2QNt53X+j+g+++KUpsB7+pIivN9zPOXbdeaLT877fv0te76ePl2xS/dejPDBCzBMglbdHS0WrdurRdeeEHdu3d37M/Ozta///1v9evXz+WaO+64Q3v37s1xvD/f33X48GHdd999euqppzRq1CgFBQVp3bp16tGjhzIzM92Ks3PnzmrWrJmOHTum5cuXy9fXVzExMY5YJemLL75QqVLOM53sdrskqVKlSlq3bp0yMzOvWGWrWLGidu/eneOx//znP46xcmK32x3vB+soUthXVco5L+NRyM+uYgGFXfYD+d3rA9qoXcsa6jQsXmfOZSg4qIgkKfVMutLPZ+lk6jmdTD3ndE3WhQv674k/dCDpt7wIGTcTJTYXlknYJGncuHGqVauWUyJSp04d7dq1SxUqVMjxmipVqigrK0vbtm1T3bp1JUkHDhzQqVOnHOds3rxZWVlZmjhxory8LlYxPv74Y6dxfHx8dCEXU8abNGmiiIgIzZ8/X0uXLlX79u0dla6oqCjZ7XYdOXJEzZo1y/H6Tp06acqUKXrrrbfUv39/l+OnTp1S0aJF9dhjj2nYsGHasWOH031s2dnZmjx5surVq6eoqKirxgsAVtTjwYaSpC/eeMJpf+9xCfroq215ERJgaZZK2KpXr67OnTtr6tSpjn3PPfecGjVqpD59+ujJJ59U4cKFtWfPHi1fvlxTp05VlSpV1LJlS/Xs2VPTp0+Xt7e3nnnmGfn5+Tlaj+XLl1dWVpamTp2qNm3a6LvvvtOMGTOc3rtMmTI6c+aMvvnmG9WsWVOFChXKcTkPm82mTp06acaMGdq3b59WrVrlOObv76/Bgwdr4MCBys7O1l133aXU1FR9//33KlKkiLp166aGDRtqyJAheuaZZ/Trr7/qoYceUnh4uA4cOKAZM2borrvuUv/+/TVw4EAtXrxYbdq00cSJE9WwYUP997//1ZgxY7R//3599913N+jfAm6mhDf75nUIQJ4o1vxFt6/hvrXbh6eeUsCTDm6gUaNGObUza9SooTVr1mj//v3629/+ptq1a2v48OEKCwtznPP+++8rJCRE0dHReuihh/Tkk0/K399fvr4X1/WpVauWJk2apPHjx6tatWqKj493WUakSZMmeuqpp9ShQweVLFlSr7322mVj7Ny5s3bv3q1SpUqpadOmLvG/9NJLGjt2rKpWrarWrVvrs88+U9myZR3njB8/XnPnztXGjRvVunVr3XnnnRo0aJBq1KjhWNbD19dX33zzjbp27aqhQ4eqfPnyatCggXbu3KmdO3fqzjvvvPYfMgAAVuapRXPzT74mm8mHi3n98ssvioiI0IoVK3T33XfndTges3TpUj300EN6/fXXnZY1uZrU1FQFBgbqUPIJBQQE3MAIgVtX+D0j8joEwLJMVoYyNr6u06dP39DvkUvfV99sP6Ii/tf/Pmf+SNXdte644XHfDJarsF2LlStXasmSJUpMTNT333+vjh07qkyZMoqOjs7r0DwqJiZGS5cu1YkTJ/Tbb9x0CwDIn3jSgStL3cN2rTIzM/XCCy/o4MGD8vf3V5MmTRQfH3/Vtc5uRS1atLjs+mwAAOQLzBJ1kS8SttatW6t169Z5HQYAAMANkS8SNgAAkH8wS9QVCRsAALAUxyxPD4yTX+SLSQcAAAD5GRU2AABgKcw5cEWFDQAAQBefemSz2Vy2Pn365Hj+6tWrczz/0jO/PYkKGwAAsJY8KrFt2rTJ6bniO3fuVKtWrdS+ffsrXrd3716nhXlLlizp3hvnAgkbAACwlLyaJfrXRGvcuHEqX768mjVrdsXrgoODVbRoUXfDcwstUQAAgL84f/68PvzwQz3++OOyXWW6ae3atRUWFqa7775bq1atuiHxUGEDAACW4ullPVJTU5322+122e32K167aNEinTp1St27d7/sOWFhYXr77bdVt25dZWRk6IMPPtDdd9+t1atXe/zxmCRsAADAUjx9C1tERITT/hEjRmjkyJFXvPbdd99VTEyMwsPDL3tO5cqVVblyZcfrxo0bKykpSa+//joJGwAAgDuSkpKcJgVcrbp2+PBhrVixQgsWLHD7vRo1aqQPP/zQ7euuhoQNAABYi4dLbAEBAU4J29XMnj1bwcHBuv/++91+y23btiksLMzt666GhA0AAFhKXj5LNDs7W7Nnz1a3bt1UsKBzmjR06FD9+uuvev/99yVJsbGxKlOmjO68807HJIWEhAQlJCRcd+x/RcIGAADw/1asWKEjR47o8ccfdzmWnJysI0eOOF6fP39egwcP1q+//io/Pz/deeed+uKLL3Tfffd5PC6bMcZ4fFRYSmpqqgIDA3Uo+YRbJWHgdhJ+z4i8DgGwLJOVoYyNr+v06dM39Hvk0vfV97t/VRH/63+fM3+kqklUqRse981AhQ0AAFgKzxJ1xcK5AAAAFkeFDQAAWAslNhdU2AAAACyOChsAALCUvFzWw6pI2AAAgKV4+lmi+QEtUQAAAIujwgYAACyFOQeuSNgAAIC1kLG5oCUKAABgcVTYAACApTBL1BUJGwAAsBRmibqiJQoAAGBxVNgAAIClMOfAFRU2AAAAi6PCBgAArIUSmwsSNgAAYCnMEnVFSxQAAMDiqLABAABr8dCyHvmowEbCBgAArIVb2FzREgUAALA4KmwAAMBaKLG5IGEDAACWwixRV7REAQAALI4KGwAAsBQe/u6KChsAAIDFUWEDAACWwpwDVyRsAADAWsjYXNASBQAAsDgqbAAAwFJY1sMVCRsAALAUmzw0S/T6h7AMWqIAAAAWR4UNAABYCnMOXJGwAQAAS2HhXFe0RAEAACyOhA0AAFiMzYNb7o0cOVI2m81pCw0NveI1a9asUd26deXr66ty5cppxowZbr1nbtESBQAA+H933nmnVqxY4XhdoECBy56bmJio++67T08++aQ+/PBDfffdd+rdu7dKliypRx55xKNxkbABAABLyct72AoWLHjVqtolM2bM0B133KHY2FhJUtWqVbV582a9/vrrHk/YaIkCAABL8XRDNDU11WnLyMi47Hvv379f4eHhKlu2rDp27KiDBw9e9tz169frnnvucdrXunVrbd68WZmZme5/8CsgYQMAAPlaRESEAgMDHdvYsWNzPK9hw4Z6//339fXXX2vWrFlKSUlRkyZN9Pvvv+d4fkpKikJCQpz2hYSEKCsrS7/99ptHPwMtUQAAYCmebokmJSUpICDAsd9ut+d4fkxMjOOfq1evrsaNG6t8+fKaM2eOBg0adJn3cA7UGJPj/utFwgYAACzF088SDQgIcErYcqtw4cKqXr269u/fn+Px0NBQpaSkOO07duyYChYsqOLFi7sf8BXQEgUAAMhBRkaG9uzZo7CwsByPN27cWMuXL3fat2zZMtWrV0/e3t4ejYWEDQAAWEveLMOmwYMHa82aNUpMTNTGjRvVrl07paamqlu3bpKkoUOHqmvXro7zn3rqKR0+fFiDBg3Snj179N577+ndd9/V4MGDr/2zXwYtUQAAYCl59SzRX375RY899ph+++03lSxZUo0aNdKGDRsUGRkpSUpOTtaRI0cc55ctW1ZffvmlBg4cqGnTpik8PFxTpkzx+JIeEgkbAACAJGnevHlXPB4XF+eyr1mzZtq6desNiuh/SNgAAICl8PB3V9zDBgAAYHFU2AAAgKV4elmP/ICEDQAAWEtezTqwMFqiAAAAFkeFDQAAWAoFNlckbAAAwFKYJeqKligAAIDFUWEDAAAW45lZovmpKUrCBgAALIWWqCtaogAAABZHwgYAAGBxtEQBAICl0BJ1RYUNAADA4qiwAQAAS+FZoq6osAEAAFgcFTYAAGAp3MPmioQNAABYCs8SdUVLFAAAwOKosAEAAGuhxOaChA0AAFgKs0Rd0RIFAACwOCpsAADAUpgl6oqEDQAAWAq3sLmiJQoAAGBxVNgAAIC1UGJzQYUNAADA4qiwAQAAS2FZD1ckbAAAwFKYJeqKhO02YIyRJP3xR2oeRwJYl8nKyOsQAMu69Ptx6fvkRktN9cz3lafGsQISttvAH3/8IUmqXqlM3gYCALil/fHHHwoMDLxh4/v4+Cg0NFQVy0Z4bMzQ0FD5+Ph4bLy8YjM3K11GnsnOztbRo0fl7+8vW36qD9/CUlNTFRERoaSkJAUEBOR1OICl8PthPcYY/fHHHwoPD5eX142dr5ienq7z5897bDwfHx/5+vp6bLy8QoXtNuDl5aXSpUvndRjIQUBAAF9IwGXw+2EtN7Ky9me+vr75IsHyNJb1AAAAsDgSNgAAAIsjYQPygN1u14gRI2S32/M6FMBy+P0AXDHpAAAAwOKosAEAAFgcCRsAAIDFkbABACypTJkyio2NzeswAEsgYQOu06FDh2Sz2bR9+/Yrnte8eXMNGDDgpsQEXE337t1ls9k0btw4p/2LFi266Qtsx8XFqWjRoi77N23apJ49e3r8/RISEtS8eXMFBgaqSJEiqlGjhl555RWdOHHCcc65c+c0YsQIVa5cWXa7XSVKlFC7du20a9cuj8cD5AYJG24bl76gbDabvL29Va5cOQ0ePFhpaWnXNW5ERISSk5NVrVo1SdLq1atls9l06tQpp/MWLFigUaNGXdd75WTVqlW67777VLx4cRUqVEhRUVF65pln9OuvvzrOuXDhgiZPnqwaNWrI19dXRYsWVUxMjL777juPx4Nbh6+vr8aPH6+TJ0/mdSg5KlmypAoVKuTRMYcNG6YOHTqofv36Wrp0qXbu3KmJEydqx44d+uCDDyRJGRkZatmypd577z2NGjVK+/bt05dffqkLFy6oYcOG2rBhg0djAnLFALeJbt26mXvvvdckJyebI0eOmPj4eOPn52eeeuopj77PqlWrjCRz8uRJj46bkxkzZhgvLy/zr3/9y6xatcokJiaaNWvWmB49epiBAwcaY4zJzs427dq1M0WLFjWzZs0yBw8eNNu3bzdPPvmkKViwoFm4cOENjxPW061bN/PAAw+YKlWqmGeffdaxf+HChebPXw3fffed+dvf/mZ8fX1N6dKlTd++fc2ZM2ccx48ePWruu+8+4+vra8qUKWPi4+NNZGSkmTx5suOciRMnmmrVqplChQqZ0qVLm169epk//vjDGPO/35c/byNGjDDGGKdxOnbsaDp06OD0Gc6fP2+KFy9u3nvvPWPMxf/Wx48fb8qWLWt8fX1NjRo1zCeffOI4f+PGjUaSiY2NzfFncul3dty4ccZms5nt27c7Hb9w4YKpV6+eiYqKMtnZ2bn4KQOeQ8KG20a3bt1M27ZtnfY98cQTJjQ01KSnp5u+ffuakiVLGrvdbpo2bWp++OEHx3knTpwwnTp1MiVKlDC+vr6mQoUKji+JxMREI8ls27bN8c9/3rp162aMMaZZs2amf//+xhhjnn/+edOwYUOXGKtXr25eeuklx+v33nvPVKlSxdjtdlO5cmUzbdo0x7GkpCTj4+NjBgwYkOPnvfTlM2/ePCPJLFmyxOWchx9+2BQvXtzpCxi3h0u/DwsWLDC+vr4mKSnJGOOcsP3444+mSJEiZvLkyWbfvn3mu+++M7Vr1zbdu3d3jNOyZUtTq1Yts2HDBrNlyxbTrFkz4+fn55SwTZ482axcudIcPHjQfPPNN6Zy5cqmV69exhhjMjIyTGxsrAkICDDJyckmOTnZkcz9OWH77LPPjJ+fn+PYpX2+vr7m9OnTxhhjXnjhBVOlShXz1VdfmZ9//tnMnj3b2O12s3r1amOMMf369TNFihQx58+fv+LPpkaNGuaee+7J8Vh8fLzj9x24mUjYcNvIKWHr27evKV68uOnXr58JDw83X375pdm1a5fp1q2bKVasmPn999+NMcb06dPH1KpVy2zatMkkJiaa5cuXOxKgPydsWVlZJiEhwUgye/fuNcnJyebUqVPGGOeE7aeffjKSzIEDBxyx7Ny503GdMca8/fbbJiwszCQkJJiDBw+ahIQEExQUZOLi4owxxkyaNMlIMkePHr3i5/7HP/5hKlWqlOOx7777zkiiynYb+vPvQ6NGjczjjz9ujHFO2Lp06WJ69uzpdN23335rvLy8zLlz58yePXuMJLNp0ybH8f379xtJTgnbX3388cemePHijtezZ882gYGBLuf9OWE7f/68KVGihHn//fcdxx977DHTvn17Y4wxZ86cMb6+vub77793GqNHjx7mscceM8YYExMTY2rUqHGFn8pFvr6+jt/Vv9q6dauRZObPn3/VcQBP4uHvuG398MMPmjt3rlq0aKHp06crLi5OMTExkqRZs2Zp+fLlevfdd/Xss8/qyJEjql27turVqyfp4uy1nBQoUEBBQUGSpODg4BxvpJakatWqqUaNGpo7d66GDx8uSYqPj1f9+vVVqVIlSdKoUaM0ceJEPfzww5KksmXLavfu3Zo5c6a6deum/fv3KyAgQGFhYVf8nPv27VPVqlVzPHZp/759+644BvK38ePH6+9//7ueeeYZp/1btmzRgQMHFB8f79hnjFF2drYSExO1b98+FSxYUHXq1HEcr1ChgooVK+Y0zqpVqzRmzBjt3r1bqampysrKUnp6utLS0lS4cOFcxejt7a327dsrPj5eXbp0UVpamhYvXqy5c+dKknbv3q309HS1atXK6brz58+rdu3ajtivd0KF+f+15n18fK5rHMBdTDrAbeXzzz9XkSJF5Ovrq8aNGys6Olp9+/ZVZmammjZt6jjP29tbDRo00J49eyRJvXr10rx581SrVi0NGTJE33///XXH0rlzZ8cXoTFGH330kTp37ixJOn78uJKSktSjRw8VKVLEsb366qv6+eefHdd4ajYfXz63t+joaLVu3VovvPCC0/7s7Gz9+9//1vbt2x3bjh07tH//fpUvX96RvPzVn/cfPnxY9913n6pVq6aEhARt2bJF06ZNkyRlZma6FWfnzp21YsUKHTt2TIsWLZKvr6/jL1nZ2dmSpC+++MIp3t27d+vTTz+VJFWqVEk///zzVd+3YsWK2r17d47H/vOf/zjGAm4mKmy4rVyqpnl7eys8PFze3t7asWOHJLkkP39OiGJiYnT48GF98cUXWrFihe6++2716dNHr7/++jXH0qlTJz3//PPaunWrzp07p6SkJHXs2FHS/758Zs2apYYNGzpdV6BAAUkXvzBOnz6t5OTkK1bZrvTlcykh5csH48aNU61atZz+W6hTp4527dqlChUq5HhNlSpVlJWVpW3btqlu3bqSpAMHDjjNkN68ebOysrI0ceJEeXldrBF8/PHHTuP4+PjowoULV42xSZMmioiI0Pz587V06VK1b9/e8ZeNqKgo2e12HTlyRM2aNcvx+k6dOmnKlCl666231L9/f5fjp06dUtGiRfXYY49p2LBh2rFjh2rWrOk4np2drcmTJ6tevXqKioq6aryAJ1Fhw22lcOHCqlChgiIjI+Xt7S3pYgvHx8dH69atc5yXmZmpzZs3O7USS5Ysqe7du+vDDz9UbGys3n777Rzf49IXyNW+gEqXLq3o6GjFx8crPj5eLVu2VEhIiCQpJCREpUqV0sGDB1WhQgWnrWzZspKkdu3aycfHR6+99lqO41/60nzssce0f/9+ffbZZy7nTJw4UeHh4S5tJNx+qlevrs6dO2vq1KmOfc8995zWr1+vPn36aPv27dq/f7+WLFmivn37SrqYsLVs2VI9e/bUDz/8oG3btqlnz57y8/Nz/GWnfPnyysrK0tSpU3Xw4EF98MEHmjFjhtN7lylTRmfOnNE333yj3377TWfPns0xRpvNpk6dOmnGjBlavny5/vnPfzqO+fv7a/DgwRo4cKDmzJmjn3/+Wdu2bdO0adM0Z84cSVLDhg01ZMgQPfPMMxoyZIjWr1+vw4cP65tvvlH79u0d5w0cOFANGjRQmzZt9Mknn+jIkSPatGmTHnnkEe3fv19xcXEe+7kDuZZ3t88BN1dOkw4u6d+/vwkPDzdLly51mnRw4sQJY4wxw4cPN4sWLTL79+83O3fuNA888IBp0KCBMcZ50oExxvzyyy/GZrOZuLg4c+zYMcestj9POrjk7bffNuHh4aZEiRLmgw8+cDo2a9Ys4+fnZ2JjY83evXvNjz/+aN577z0zceJExznTpk0zNpvNPP7442b16tXm0KFDZt26daZnz55m0KBBxpiLSx08+OCDplixYuadd94xiYmJZseOHaZnz57Gx8fHrFy58np/tLgF5fT7cOjQIWO3252W9fjhhx9Mq1atTJEiRUzhwoVNjRo1zOjRox3Hjx49amJiYozdbjeRkZFm7ty5Jjg42MyYMcNxzqRJk0xYWJjx8/MzrVu3Nu+//77L0jdPPfWUKV68+GWX9bhk165dRpKJjIx0WVojOzvbvPHGG6Zy5crG29vblCxZ0rRu3dqsWbPG6bz58+eb6Oho4+/v7/hMr7zyilM8Z86cMcOGDTPly5c3BQsWNJJMhQoVHLNpgZuNhA23jSslbOfOnTN9+/Y1JUqUyHFZj1GjRpmqVasaPz8/ExQUZNq2bWsOHjxojHFN2Iwx5pVXXjGhoaHGZrPluKzHJSdPnjR2u90UKlTIabmCS+Lj402tWrWMj4+PKVasmImOjjYLFixwOmf58uWmdevWplixYsbX19dUqVLFDB482Gn2aGZmppkwYYK58847jY+Pj5FkgoKCzK5du9z4CQJXl5SUZCSZFStW5HUoHvXll18au91upk6dmteh4DZlM+Yyd40CyLe2bt2qli1bqkePHpowYUJeh4Nb2MqVK3XmzBlVr15dycnJGjJkiH799Vft27fPcdtBfrFq1Sp9++236t27t0qUKJHX4eA2Q8IG3Ka2bdumxYsXq0uXLipfvnxeh4Nb1Ndff61nnnlGBw8elL+/v5o0aaLY2FhFRkbmdWhAvkLCBgAAYHHMEgUAALA4EjYAAACLI2EDAACwOBI2AAAAiyNhAwAAsDgSNgC3hJEjR6pWrVqO1927d9eDDz540+M4dOiQbDabtm/fftlzypQpo9jY2FyPGRcXp6JFi153bDabTYsWLbrucQBYDwkbgGvWvXt32Ww22Ww2eXt7q1y5cho8eLDS0tJu+Hu/8cYbuX6mY26SLACwsoJ5HQCAW9u9996r2bNnKzMzU99++62eeOIJpaWlafr06S7nZmZmemz1+8DAQI+MAwC3AipsAK6L3W5XaGioIiIi1KlTJ3Xu3NnRlrvUxnzvvfdUrlw52e12GWN0+vRp9ezZU8HBwQoICNDf//537dixw2nccePGKSQkRP7+/urRo4fS09Odjv+1JZqdna3x48erQoUKstvtuuOOOzR69GhJUtmyZSVJtWvXls1mU/PmzR3XzZ49W1WrVpWvr6+qVKmit956y+l9fvjhB9WuXVu+vr6qV6+etm3b5vbPaNKkSapevboKFy6siIgI9e7dW2fOnHE5b9GiRapUqZJ8fX3VqlUrJSUlOR3/7LPPVLduXfn6+qpcuXJ6+eWXlZWV5XY8AG49JGwAPMrPz0+ZmZmO1wcOHNDHH3+shIQER0vy/vvvV0pKir788ktt2bJFderU0d13360TJ05Ikj7++GONGDFCo0eP1ubNmxUWFuaSSP3V0KFDNX78eA0fPly7d+/W3LlzFRISIuli0iVJK1asUHJyshYsWCBJmjVrloYNG6bRo0drz549GjNmjIYPH645c+ZIktLS0vTAAw+ocuXK2rJli0aOHKnBgwe7/TPx8vLSlClTtHPnTs2ZM0crV67UkCFDnM45e/asRo8erTlz5ui7775TamqqOnbs6Dj+9ddf65///Kf69eun3bt3a+bMmYqLi3MkpQDyuTx88DyAW1y3bt1M27ZtHa83btxoihcvbh599FFjjDEjRoww3t7e5tixY45zvvnmGxMQEGDS09OdxipfvryZOXOmMcaYxo0bm6eeesrpeMOGDU3NmjVzfO/U1FRjt9vNrFmzcowzMTHRSDLbtm1z2h8REWHmzp3rtG/UqFGmcePGxhhjZs6caYKCgkxaWprj+PTp03Mc688iIyPN5MmTL3v8448/NsWLF3e8nj17tpFkNmzY4Ni3Z88eI8ls3LjRGGPM3/72NzNmzBincT744AMTFhbmeC3JLFy48LLvC+DWxT1sAK7L559/riJFiigrK0uZmZlq27atpk6d6jgeGRmpkiVLOl5v2bJFZ86cUfHixZ3GOXfunH7++WdJ0p49e/TUU085HW/cuLFWrVqVYwx79uxRRkaG7r777lzHffz4cSUlJalHjx568sknHfuzsrIc98ft2bNHNWvWVKFChZzicNeqVas0ZswY7d69W6mpqcrKylJ6errS0tJUuHBhSVLBggVVr149xzVVqlRR0aJFtWfPHjVo0EBbtmzRpk2bnCpqFy5cUHp6us6ePesUI4D8h4QNwHVp0aKFpk+fLm9vb4WHh7tMKriUkFySnZ2tsLAwrV692mWsa13aws/Pz+1rsrOzJV1sizZs2NDpWIECBSRJxphriufPDh8+rPvuu09PPfWURo0apaCgIK1bt049evRwah1LF5fl+KtL+7Kzs/Xyyy/r4YcfdjnH19f3uuMEYG0kbACuS+HChVWhQoVcn1+nTh2lpKSoYMGCKlOmTI7nVK1aVRs2bFDXrl0d+zZs2HDZMStWrCg/Pz998803euKJJ1yO+/j4SLpYkbokJCREpUqV0sGDB9W5c+ccx42KitIHH3ygc+fOOZLCK8WRk82bNysrK0sTJ06Ul9fF24Y//vhjl/OysrK0efNmNWjQQJK0d+9enTp1SlWqVJF08ee2d+9et37WAPIPEjYAN1XLli3VuHFjPfjggxo/frwqV66so0eP6ssvv9SDDz6oevXqqX///urWrZvq1aunu+66S/Hx8dq1a5fKlSuX45i+vr567rnnNGTIEPn4+Khp06Y6fvy4du3apR49eig4OFh+fn766quvVLp0afn6+iowMFAjR45Uv379FBAQoJiYGGVkZGjz5s06efKkBg0apE6dOmnYsGHq0aOHXnzxRR06dEivv/66W5+3fPnyysrK0tSpU9WmTRt99913mjFjhst53t7e6tu3r6ZMmSJvb289/fTTatSokSOBe+mll/TAAw8oIiJC7du3l5eXl3788Uf99NNPevXVV93/FwHglsIsUQA3lc1m05dffqno6Gg9/vjjqlSpkjp27KhDhw45ZnV26NBBL730kp577jnVrVtXhw8fVq9eva447vDhw/XMM8/opZdeUtWqVdWhQwcdO3ZM0sX7w6ZMmaKZM2cqPDxcbdu2lSQ98cQTeueddxQXF6fq1aurWbNmiouLcywDUqRIEX322WfavXu3ateurWHDhmn8+PFufd5atWpp0qRJGj9+vKpVq6b4+HiNHTvW5bxChQrpueeeU6dOndS4cWP5+flp3rx5juOtW7fW559/ruXLl6t+/fpq1KiRJk2apMjISLfiAXBrshlP3KQBAACAG4YKGwAAgMWRsAEAAFgcCRsAAIDFkbABAABYHAkbAACAxZGwAQAAWBwJGwAAgMWRsAEAAFgcCRsAAIDFkbABAABYHAkbAACAxZGwAQAAWNz/AVO1TXu5E/DoAAAAAElFTkSuQmCC",
571 |       "text/plain": [
572 |        "<Figure size 640x480 with 2 Axes>"
573 |       ]
574 |      },
575 |      "metadata": {},
576 |      "output_type": "display_data"
577 |     }
578 |    ],
579 |    "source": [
580 |     "\n",
581 |     "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
582 |     "import matplotlib.pyplot as plt\n",
583 |     "\n",
584 |     "# Calculate the confusion matrix\n",
585 |     "cm = confusion_matrix(cq_labels, cq_preds)\n",
586 |     "\n",
587 |     "# Create a visual representation\n",
588 |     "disp = ConfusionMatrixDisplay(\n",
589 |     "    confusion_matrix=cm,\n",
590 |     "    display_labels=[\"PositiveCQ\", \"NegativeCQ\"])\n",
591 |     "disp = disp.plot(cmap=plt.cm.Blues,values_format='g')\n",
592 |     "\n",
593 |     "plt.tight_layout()\n",
594 |     "plt.savefig(\"../evaluation/cq_test_conf.pdf\")"
595 |    ]
596 |   },
597 |   {
598 |    "cell_type": "code",
599 |    "execution_count": null,
600 |    "metadata": {},
601 |    "outputs": [],
602 |    "source": []
603 |   }
604 |  ],
605 |  "metadata": {
606 |   "kernelspec": {
607 |    "display_name": "choco",
608 |    "language": "python",
609 |    "name": "python3"
610 |   },
611 |   "language_info": {
612 |    "codemirror_mode": {
613 |     "name": "ipython",
614 |     "version": 3
615 |    },
616 |    "file_extension": ".py",
617 |    "mimetype": "text/x-python",
618 |    "name": "python",
619 |    "nbconvert_exporter": "python",
620 |    "pygments_lexer": "ipython3",
621 |    "version": "3.11.4"
622 |   }
623 |  },
624 |  "nbformat": 4,
625 |  "nbformat_minor": 2
626 | }
627 | 


--------------------------------------------------------------------------------
/ontochat/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | General-purpose utility functions.
 3 | """
 4 | 
 5 | def read_key(file_path: str = "key.txt"):
 6 |     with open(file_path, "r") as fo:
 7 |         key = fo.read()
 8 |     return key
 9 | 
10 | def read_list(file_path: str):
11 |     with open(file_path, "r") as fo:
12 |         str_list = fo.readlines()
13 |     return [s.strip() for s in str_list]
14 | 
15 | 


--------------------------------------------------------------------------------
/ontochat/verbaliser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utilities for the verbalisation of an ontology.
  3 | 
  4 | Examples of possible uses cases for ontology verbalisation:
  5 | - Summarising the features provided by the ontology (doc)
  6 | - Using a LM to extract competency questions from the ontology.
  7 | - Asking a LM if the ontology can be used for certain requirements.
  8 | 
  9 | """
 10 | import logging
 11 | from typing import List
 12 | 
 13 | import rdflib
 14 | from rdflib import Graph
 15 | from rdflib.namespace import RDF, RDFS, OWL
 16 | 
 17 | from ontochat.queries import NE_QUERY
 18 | 
 19 | logger = logging.getLogger("ontochat.verbaliser")
 20 | 
 21 | 
 22 | def verbalise_ontology(ontology_path: str, onto_about: str, onto_desc: str):
 23 |     """
 24 |     A simple method to verbalise ontologies and extract requirements. This is
 25 |     currently designed to produce a plain verbalisation.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     ontology_path : str
 30 |         Path to the ontology encoded in a format that is readable by `rdflib`.
 31 |     onto_about : str
 32 |         A short description of the ontology, if documentation is missing.
 33 |     onto_desc : str
 34 |         An extended description of the ontology to provide more context.
 35 | 
 36 |     Returns
 37 |     -------
 38 |     verbalisation : str
 39 |         A string verbalisation of the ontology produced by the language model.
 40 |     
 41 |     """
 42 |     g = Graph()
 43 |     g.parse(ontology_path)
 44 | 
 45 |     # Everything that has a label is mapped here, otherwise we get a URI label
 46 |     label_dict = {s: str(o) for s, _, o in g.triples((None, RDFS.label, None))}
 47 |     # just get the last part of the URI otw
 48 |     label_fn = lambda x: label_dict[x] if x in label_dict else str(x).split("/")[-1]
 49 |     comment_dict = {s: str(o) for s, _, o in g.triples((None, RDFS.comment, None))}
 50 | 
 51 |     logger.info("Class verbalisation: start")
 52 |     class_vrbs = verbalise_classes(g, label_fn, comment_dict)
 53 |     logger.info(f"Class verbalisation: found {len(class_vrbs)} classes")
 54 | 
 55 |     logger.info("Named entity verbalisation: start")
 56 |     nament_vrbs = verbalise_named_entities(g, label_fn, comment_dict)
 57 |     logger.info(f"Named entity verbalisation: found {len(class_vrbs)} entities")
 58 | 
 59 |     logger.info("Relation verbalisation: start")
 60 |     relat_vrbs = verbalise_relations(g, label_fn, comment_dict)
 61 |     logger.info(f"Relation verbalisation: found {len(class_vrbs)} classes")
 62 | 
 63 |     return collate_verbalisations(class_vrbs, nament_vrbs, relat_vrbs, onto_about, onto_desc)
 64 | 
 65 | 
 66 | def create_relation_dict(graph, relation):
 67 |     """
 68 |     Returns all the objects appearing as tails of the given relation.
 69 |     """
 70 |     relation_dict = {}  # subject to all possible objects via relation
 71 |     for s, p, o in graph.triples((None, relation, None)):
 72 |         if isinstance(o, rdflib.term.BNode):
 73 |             continue  # skip blank node
 74 |         if s not in relation_dict:
 75 |             relation_dict[s] = []
 76 |         relation_dict[s].append(o)
 77 |     return relation_dict
 78 | 
 79 | 
 80 | def verbalise_classes(graph: rdflib.Graph, label_fn, comment_dict: dict):
 81 |     # Classes are first to be extracted, subclasses follow
 82 |     classes = [s for s, _, _ in graph.triples((None, RDF.type, OWL.Class))]
 83 |     subclasses = create_relation_dict(graph, relation=RDFS.subClassOf)
 84 |     logger.info(f"Found: {len(classes)} classes, {len(subclasses)} subclasses")
 85 |     # Step 1: Verbalisation of classes, one by one
 86 |     verbalisation_hist = []
 87 |     class_verbalisations = []
 88 |     for base_class in classes:
 89 |         # The base verbalisation is the class label, if available
 90 |         vrbn = f"{label_fn(base_class)}"
 91 | 
 92 |         if base_class in subclasses:  # list all parent classes
 93 |             vrbn += " (subconcept of "  # opening parenthesis
 94 |             vrbn += ", ".join([label_fn(u) for u in subclasses[base_class]])
 95 |             vrbn += ")"  # closing parenthesis
 96 | 
 97 |         if base_class in comment_dict:  # include comment
 98 |             vrbn += f": {comment_dict[base_class]}"
 99 | 
100 |         verbalisation_hist.append(base_class)
101 |         class_verbalisations.append(vrbn)
102 | 
103 |     # Step 2: verbalisation of remaining subclasses
104 |     for subclass in subclasses:  # check remaining subclasses
105 |         if subclass not in verbalisation_hist:
106 |             raise NotImplementedError(subclass)
107 | 
108 |     return class_verbalisations
109 | 
110 | 
111 | def verbalise_named_entities(graph: rdflib.Graph, label, comment_dict: dict):
112 |     """
113 |     Note: TODO append NE comment (if available) to each named entity.
114 |     Note: FIXME still, a named entity can have more than 1 parent class.
115 |     """
116 |     qres = graph.query(NE_QUERY)
117 |     named_entities = {n: c for n, c in list(qres)}
118 | 
119 |     nentities_verbalisations = []
120 |     for named_entity, named_type in named_entities.items():
121 |         verbalisation = f"{label(named_entity)} is an instance of class {label(named_type)}."
122 |         nentities_verbalisations.append(verbalisation)
123 | 
124 |     return nentities_verbalisations
125 | 
126 | 
127 | def verbalise_relations(graph: rdflib.Graph, label, comment_dict: dict):
128 |     properties = [s for s, _, _ in graph.triples(
129 |         (None, RDF.type, OWL.ObjectProperty))]
130 |     subprops = create_relation_dict(graph, relation=RDFS.subPropertyOf)
131 |     domains = create_relation_dict(graph, relation=RDFS.domain)
132 |     ranges = create_relation_dict(graph, relation=RDFS.range)
133 | 
134 |     # Step 1: Verbalisation of classes
135 |     verbalisation_hist = []
136 |     relation_verbalisations = []
137 | 
138 |     for base_prop in properties:
139 | 
140 |         # The base verbalisation is the class label, if available
141 |         verbalisation = f"{label(base_prop)}"
142 | 
143 |         if base_prop in subprops:
144 |             verbalisation += " (subproperty of "  # opening parenthesis
145 |             verbalisation += ", and".join([label(u) for u in subprops[base_prop]])
146 |             verbalisation += ")"  # closing parenthesis
147 | 
148 |         if base_prop in comment_dict:  # include
149 |             verbalisation += f": {comment_dict[base_prop]}"
150 | 
151 |         if base_prop in domains:
152 |             verbalisation += f" The domain of this relation can be: "
153 |             verbalisation += ", or ".join([label(u) for u in domains[base_prop]])
154 |             verbalisation += "."
155 | 
156 |         if base_prop in ranges:
157 |             verbalisation += f" The range of this relation can be: "
158 |             verbalisation += ", or ".join([label(u) for u in ranges[base_prop]])
159 |             verbalisation += "."
160 | 
161 |         verbalisation_hist.append(base_prop)
162 |         relation_verbalisations.append(verbalisation)
163 | 
164 |     for subprop in subprops:  # check remaining subclasses
165 |         if subprop not in verbalisation_hist:
166 |             raise NotImplementedError(subprop)
167 | 
168 |     return relation_verbalisations
169 | 
170 | 
171 | def collate_verbalisations(class_verbalisations: List[str],
172 |                            relation_verbalisations: List[str],
173 |                            nentities_verbalisations: List[str],
174 |                            onto_about: str, onto_desc: str,
175 |                            ):
176 |     ontoverb = ""  # This is the basic prompt with the ontology description
177 |     # ontoverb += f"You are given an ontology about {onto_about}. {onto_desc}\n"
178 |     ontoverb += f"Ontology description: {onto_about}. {onto_desc}"
179 | 
180 |     ontoverb += "\n"
181 | 
182 |     ontoverb += "The main classes of the ontology are listed below:\n"
183 |     for class_verb in class_verbalisations:
184 |         ontoverb += f"- {class_verb}\n"
185 | 
186 |     ontoverb += "\n"
187 | 
188 |     ontoverb += "The main named entities (individuals) are listed below:\n"
189 | 
190 |     for ne_verb in nentities_verbalisations:
191 |         ontoverb += f"- {ne_verb}\n"
192 | 
193 |     ontoverb += "\n"
194 | 
195 |     ontoverb += "The main relations of the ontology are listed below:\n"
196 |     for rel_verb in relation_verbalisations:
197 |         ontoverb += f"- {rel_verb}\n"
198 | 
199 |     return ontoverb
200 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | openai
 2 | gradio
 3 | scikit-learn
 4 | sentence-transformers
 5 | rdflib
 6 | matplotlib
 7 | fastapi
 8 | uvicorn
 9 | gradio_client
10 | pandas
11 | 


--------------------------------------------------------------------------------