├── __pycache__
    ├── chain.cpython-311.pyc
    └── retrievers.cpython-311.pyc
├── zeroshot.cfg
├── __init__.py
├── README.md
├── utils.py
├── .devcontainer
    └── devcontainer.json
├── requirements.txt
├── chain.py
├── retrievers.py
├── functions.py
├── ingest.py
├── App.py
└── openaiKG.ipynb


/__pycache__/chain.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leannchen86/openai-knowledge-graph-streamlit-app/HEAD/__pycache__/chain.cpython-311.pyc


--------------------------------------------------------------------------------
/__pycache__/retrievers.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leannchen86/openai-knowledge-graph-streamlit-app/HEAD/__pycache__/retrievers.cpython-311.pyc


--------------------------------------------------------------------------------
/zeroshot.cfg:
--------------------------------------------------------------------------------
 1 | [paths]
 2 | examples = null
 3 | 
 4 | [nlp]
 5 | lang = "en"
 6 | pipeline = ["ner", "llm_rel"]
 7 | 
 8 | [components]
 9 | 
10 | [components.ner]
11 | source = "en_core_web_md"
12 | 
13 | [components.llm_rel]
14 | factory = "llm"
15 | 
16 | [components.llm_rel.task]
17 | @llm_tasks = "spacy.REL.v1"
18 | 
19 | [components.llm_rel.model]
20 | @llm_models = "spacy.GPT-4.v3"
21 | name = "gpt-4-0125-preview"
22 | config = {"temperature": 0.0}
23 | 
24 | [initialize]
25 | vectors = "en_core_web_md"


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Advanced RAG Streamlit Chatbot - README 
 2 | 
 3 | This AI chatbot integrates a Spacy-LLM constructed knowledge graph with an advanced RAG (Retrieval Augmented Generation), offering context-aware interactions. Built using Streamlit, it features an intuitive UI and a neo4j-like graph interface for exploring entity relationships. This chatbot is fed with data related to the OpenAI Saga.
 4 | 
 5 | Highlights
 6 | 
 7 | 1. Advanced RAG System: 
 8 | Utilizes OpenAI's language models for relevant, informed responses.
 9 | 
10 | 2. Knowledge Graph Integration:
11 | Leverages Neo4j's comprehensive network of entity relationships for deeper insights.
12 | 
13 | 3. Streamlit Interface:
14 | Offers a user-friendly, easy-to-navigate experience.
15 | 
16 | Note: retrievers.py and chain.py are from langchain's neo4j_advanced_rag template: 
17 | 
18 | https://github.com/langchain-ai/langchain/tree/master/templates/neo4j-advanced-rag?ref=blog.langchain.dev
19 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import inspect
16 | import textwrap
17 | 
18 | import streamlit as st
19 | 
20 | 
21 | def show_code(demo):
22 |     """Showing the code of the demo."""
23 |     show_code = st.sidebar.checkbox("Show code", True)
24 |     if show_code:
25 |         # Showing the code of the demo.
26 |         st.markdown("## Code")
27 |         sourcelines, _ = inspect.getsourcelines(demo)
28 |         st.code(textwrap.dedent("".join(sourcelines[1:])))
29 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Python 3",
 3 |   // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
 4 |   "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
 5 |   "customizations": {
 6 |     "codespaces": {
 7 |       "openFiles": [
 8 |         "README.md",
 9 |         "Hello.py"
10 |       ]
11 |     },
12 |     "vscode": {
13 |       "settings": {},
14 |       "extensions": [
15 |         "ms-python.python",
16 |         "ms-python.vscode-pylance"
17 |       ]
18 |     }
19 |   },
20 |   "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21 |   "postAttachCommand": {
22 |     "server": "streamlit run Hello.py --server.enableCORS false --server.enableXsrfProtection false"
23 |   },
24 |   "portsAttributes": {
25 |     "8501": {
26 |       "label": "Application",
27 |       "onAutoForward": "openPreview"
28 |     }
29 |   },
30 |   "forwardPorts": [
31 |     8501
32 |   ]
33 | }
34 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp==3.9.1
 2 | aiosignal==1.3.1
 3 | altair==5.2.0
 4 | annotated-types==0.6.0
 5 | anyio==3.7.1
 6 | attrs==23.1.0
 7 | blinker==1.7.0
 8 | cachetools==5.3.2
 9 | certifi==2023.11.17
10 | charset-normalizer==3.3.2
11 | click==8.1.7
12 | dataclasses-json==0.6.3
13 | distro==1.8.0
14 | frozenlist==1.4.0
15 | gitdb==4.0.11
16 | GitPython==3.1.40
17 | greenlet==3.0.1
18 | h11==0.14.0
19 | httpcore==1.0.2
20 | httpx==0.25.2
21 | idna==3.6
22 | importlib-metadata==6.11.0
23 | isodate==0.6.1
24 | Jinja2==3.1.2
25 | jsonpatch==1.33
26 | jsonpointer==2.4
27 | jsonschema==4.20.0
28 | jsonschema-specifications==2023.11.2
29 | langchain==0.0.346
30 | langchain-core==0.0.10
31 | langsmith==0.0.69
32 | markdown-it-py==3.0.0
33 | MarkupSafe==2.1.3
34 | marshmallow==3.20.1
35 | mdurl==0.1.2
36 | multidict==6.0.4
37 | mypy-extensions==1.0.0
38 | neo4j==5.15.0
39 | networkx==3.2.1
40 | numpy==1.26.2
41 | openai==1.3.7
42 | packaging==23.2
43 | pandas==2.1.3
44 | Pillow==10.1.0
45 | protobuf==4.25.1
46 | pyarrow==14.0.1
47 | pydantic==2.5.2
48 | pydantic_core==2.14.5
49 | pydeck==0.8.0
50 | Pygments==2.17.2
51 | pyparsing==3.1.1
52 | python-dateutil==2.8.2
53 | pytz==2023.3.post1
54 | PyYAML==6.0.1
55 | rdflib==7.0.0
56 | referencing==0.31.1
57 | regex==2023.10.3
58 | requests==2.31.0
59 | rich==13.7.0
60 | rpds-py==0.13.2
61 | six==1.16.0
62 | smmap==5.0.1
63 | sniffio==1.3.0
64 | SQLAlchemy==2.0.23
65 | streamlit==1.29.0
66 | streamlit-agraph==0.0.45
67 | tenacity==8.2.3
68 | tiktoken==0.5.2
69 | toml==0.10.2
70 | toolz==0.12.0
71 | tornado==6.4
72 | tqdm==4.66.1
73 | typing-inspect==0.9.0
74 | typing_extensions==4.8.0
75 | tzdata==2023.3
76 | tzlocal==5.2
77 | urllib3==2.1.0
78 | validators==0.22.0
79 | watchdog==3.0.0
80 | yarl==1.9.3
81 | zipp==3.17.0


--------------------------------------------------------------------------------
/chain.py:
--------------------------------------------------------------------------------
 1 | from operator import itemgetter
 2 | 
 3 | from langchain.chat_models import ChatOpenAI
 4 | from langchain.prompts import ChatPromptTemplate
 5 | from langchain.pydantic_v1 import BaseModel
 6 | from langchain.schema.output_parser import StrOutputParser
 7 | from langchain.schema.runnable import ConfigurableField, RunnableParallel
 8 | import os
 9 | 
10 | # from retrievers import (
11 | #     hypothetic_question_vectorstore,
12 | #     parent_vectorstore,
13 | #     summary_vectorstore,
14 | #     typical_rag,
15 | # )
16 | 
17 | # Add typing for input
18 | class Question(BaseModel):
19 |     question: str
20 | 
21 | 
22 | def initialize_chain(openai_api_key, typical_rag, parent_vectorstore, hypothetic_question_vectorstore, summary_vectorstore):
23 |     os.environ["OPENAI_API_KEY"] = openai_api_key
24 |     template = """Answer the question based only on the following context:
25 |     {context}
26 | 
27 |     Question: {question}
28 |     """
29 |     prompt = ChatPromptTemplate.from_template(template)
30 | 
31 |     model = ChatOpenAI()
32 | 
33 |     retriever = typical_rag.as_retriever().configurable_alternatives(
34 |         ConfigurableField(id="strategy"),
35 |         default_key="typical_rag",
36 |         parent_strategy=parent_vectorstore.as_retriever(),
37 |         hypothetical_questions=hypothetic_question_vectorstore.as_retriever(),
38 |         summary_strategy=summary_vectorstore.as_retriever(),
39 |     )
40 | 
41 |     chain = (
42 |         RunnableParallel(
43 |             {
44 |                 "context": itemgetter("question") | retriever,
45 |                 "question": itemgetter("question"),
46 |             }
47 |         )
48 |         | prompt
49 |         | model
50 |         | StrOutputParser()
51 |     )
52 | 
53 |     chain = chain.with_types(input_type=Question)
54 | 
55 |     return chain
56 | 
57 | 


--------------------------------------------------------------------------------
/retrievers.py:
--------------------------------------------------------------------------------
 1 | from langchain.embeddings import OpenAIEmbeddings
 2 | from langchain.vectorstores import Neo4jVector
 3 | from langchain.graphs import Neo4jGraph
 4 | import streamlit as st
 5 | import os
 6 | 
 7 | 
 8 | # NEO4J_URI= st.secrets["NEO4J_URI"]
 9 | # NEO4J_USERNAME= st.secrets["NEO4J_USERNAME"]
10 | # NEO4J_PASSWORD= st.secrets["NEO4J_PASSWORD"]
11 | 
12 | from langchain.embeddings import OpenAIEmbeddings
13 | from langchain.vectorstores import Neo4jVector
14 | import os
15 | 
16 | # Declare global variables for the retrievers
17 | # typical_rag = None
18 | # parent_vectorstore = None
19 | # hypothetic_question_vectorstore = None
20 | # summary_vectorstore = None
21 | 
22 | def initialize_retrievers(openai_api_key):
23 |     # global typical_rag, parent_vectorstore, hypothetic_question_vectorstore, summary_vectorstore
24 | 
25 |     os.environ["OPENAI_API_KEY"] = openai_api_key
26 | 
27 |     # NEO4J_URI= st.secrets["NEO4J_URI"]
28 |     # NEO4J_USERNAME= st.secrets["NEO4J_USERNAME"]
29 |     # NEO4J_PASSWORD= st.secrets["NEO4J_PASSWORD"]
30 |    
31 |     # graph = Neo4jGraph(
32 |     #     url=os.environ["NEO4J_URI"],
33 |     #     username=os.environ["NEO4J_USERNAME"],
34 |     #     password=os.environ["NEO4J_PASSWORD"])
35 | 
36 |     # Initialize typical_rag
37 |     typical_rag = Neo4jVector.from_existing_index(
38 |         OpenAIEmbeddings(), index_name="typical_rag")
39 | 
40 |     # Initialize parent_vectorstore
41 |     parent_query = """
42 |     MATCH (node)<-[:HAS_CHILD]-(parent)
43 |     WITH parent, max(score) AS score // deduplicate parents
44 |     RETURN parent.text AS text, score, {} AS metadata LIMIT 1
45 |     """
46 |     parent_vectorstore = Neo4jVector.from_existing_index(
47 |         OpenAIEmbeddings(),
48 |         index_name="parent_document",
49 |         retrieval_query=parent_query,
50 |     )
51 | 
52 |     # Initialize hypothetic_question_vectorstore
53 |     hypothetic_question_query = """
54 |     MATCH (node)<-[:HAS_QUESTION]-(parent)
55 |     WITH parent, max(score) AS score // deduplicate parents
56 |     RETURN parent.text AS text, score, {} AS metadata
57 |     """
58 |     hypothetic_question_vectorstore = Neo4jVector.from_existing_index(
59 |         OpenAIEmbeddings(),
60 |         index_name="hypothetical_questions",
61 |         retrieval_query=hypothetic_question_query,
62 |     )
63 | 
64 |     # Initialize summary_vectorstore
65 |     summary_query = """
66 |     MATCH (node)<-[:HAS_SUMMARY]-(parent)
67 |     WITH parent, max(score) AS score // deduplicate parents
68 |     RETURN parent.text AS text, score, {} AS metadata
69 |     """
70 |     summary_vectorstore = Neo4jVector.from_existing_index(
71 |         OpenAIEmbeddings(),
72 |         index_name="summary",
73 |         retrieval_query=summary_query,
74 |     )
75 | 
76 |     return typical_rag, parent_vectorstore, hypothetic_question_vectorstore, summary_vectorstore
77 | 
78 | 


--------------------------------------------------------------------------------
/functions.py:
--------------------------------------------------------------------------------
 1 | # Function to process the query and return a response
 2 | def process_query(query):
 3 |     # Use GraphCypherQAChain to get a Cypher query and a natural language response
 4 |     result = cypher_chain(query)
 5 |     intermediate_steps = result['intermediate_steps']
 6 |     final_answer = result['result']
 7 |     generated_cypher = intermediate_steps[0]['query']
 8 |     nl_response = final_answer
 9 |     
10 |     # Fetch graph data using the Cypher query
11 |     nodes, edges = fetch_graph_data(direct_cypher_query=generated_cypher, intermediate_steps=intermediate_steps)
12 |     
13 |     return nl_response, visual, nodes, edges
14 | 
15 | # Function to construct the Cypher query based on selected filters
16 | def construct_cypher_query(node_types, rel_types):
17 |     # Create a list of MATCH clauses for node types
18 |     node_clauses = []
19 |     for node_type in node_types:
20 |         node_clauses.append(f"(p:{node_type})-[r]->(n) ")
21 | 
22 |     # Create a list of WHERE clauses for relationship types
23 |     rel_clauses = []
24 |     for rel_type in rel_types:
25 |         rel_clauses.append(f"type(r)='{rel_type}' ")
26 | 
27 |     # Combine the clauses into one Cypher query
28 |     if rel_clauses:
29 |         rel_match = " OR ".join(rel_clauses)
30 |         query = f"MATCH {' OR '.join(node_clauses)} WHERE {rel_match} RETURN p, r, n"
31 |     else:
32 |         query = f"MATCH {' OR '.join(node_clauses)} RETURN p, r, n"
33 |     
34 |     return query
35 | 
36 | def process_graph_result(context):
37 |     nodes = []
38 |     edges = []
39 |     node_names = set()  # This defines node_names to track unique nodes
40 | 
41 |     for record in context:  # Adjusted to access 'Full Context' from the result
42 |         # Process nodes
43 |         p_name = record['p.name']
44 |         o_name = record['o.name']
45 | 
46 |         # Add nodes if they don't already exist
47 |         if p_name not in node_names:
48 |             nodes.append(Node(id=p_name, label=p_name, size=5, shape="circle"))
49 |             node_names.add(p_name)
50 |         if o_name not in node_names:
51 |             nodes.append(Node(id=o_name, label=o_name, size=5, shape="circle"))
52 |             node_names.add(o_name)
53 | 
54 |         # Process edges
55 |         relationship_label = record['type(r)']
56 |         edges.append(Edge(source=p_name, target=o_name, label=relationship_label))
57 | 
58 |     return nodes, edges
59 | 
60 | # Function to fetch data from Neo4j
61 | def fetch_graph_data(nodesType=None, relType=None, direct_cypher_query=None, intermediate_steps=None):
62 |     # Use the direct Cypher query if provided
63 |     if direct_cypher_query:
64 |         cypher_query = direct_cypher_query
65 |     else:
66 |         # Construct the Cypher query based on selected filters
67 |         cypher_query = construct_cypher_query(nodesType, relType)
68 |     context = intermediate_steps[0]['context']
69 |     nodes, edges = process_graph_result(context)
70 |     return nodes, edges
71 | 


--------------------------------------------------------------------------------
/ingest.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import List
  3 | from langchain.chains.openai_functions import create_structured_output_chain
  4 | from langchain.chat_models import ChatOpenAI
  5 | from langchain.document_loaders import WikipediaLoader, PyPDFLoader, TextLoader
  6 | from langchain.docstore.document import Document
  7 | from langchain.embeddings.openai import OpenAIEmbeddings
  8 | from langchain.graphs import Neo4jGraph
  9 | from langchain.prompts import ChatPromptTemplate
 10 | from langchain.pydantic_v1 import BaseModel, Field
 11 | from langchain.text_splitter import TokenTextSplitter, CharacterTextSplitter
 12 | from neo4j.exceptions import ClientError
 13 | import os
 14 | 
 15 | graph = Neo4jGraph()
 16 | 
 17 | # Load Wikipedia Data
 18 | all_data = WikipediaLoader(query="Removal_of_Sam_Altman_from_OpenAI").load()
 19 | 
 20 | # Embeddings & LLM models
 21 | embeddings = OpenAIEmbeddings()
 22 | embedding_dimension = 1536
 23 | llm = ChatOpenAI(temperature=0)
 24 | 
 25 | # Process All Data
 26 | parent_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
 27 | child_splitter = TokenTextSplitter(chunk_size=100, chunk_overlap=24)
 28 | 
 29 | # Ingest Parent-Child node pairs
 30 | for document in all_data:
 31 |     parent_documents = parent_splitter.split_documents([document])
 32 |     for i, parent in enumerate(parent_documents):
 33 |         child_documents = child_splitter.split_documents([parent])
 34 |         params = {
 35 |             "parent_text": parent.page_content,
 36 |             "parent_id": i,
 37 |             "parent_embedding": embeddings.embed_query(parent.page_content),
 38 |             "children": [
 39 |                 {
 40 |                     "text": c.page_content,
 41 |                     "id": f"{i}-{ic}",
 42 |                     "embedding": embeddings.embed_query(c.page_content),
 43 |                 }
 44 |                 for ic, c in enumerate(child_documents)
 45 |             ],
 46 |         }
 47 |         # Ingest data
 48 |         graph.query(
 49 |             """
 50 |         MERGE (p:Parent {id: $parent_id})
 51 |         SET p.text = $parent_text
 52 |         WITH p
 53 |         CALL db.create.setVectorProperty(p, 'embedding', $parent_embedding)
 54 |         YIELD node
 55 |         WITH p 
 56 |         UNWIND $children AS child
 57 |         MERGE (c:Child {id: child.id})
 58 |         SET c.text = child.text
 59 |         MERGE (c)<-[:HAS_CHILD]-(p)
 60 |         WITH c, child
 61 |         CALL db.create.setVectorProperty(c, 'embedding', child.embedding)
 62 |         YIELD node
 63 |         RETURN count(*)
 64 |         """,
 65 |             params,
 66 |         )
 67 |         # Create vector index for child
 68 |         try:
 69 |             graph.query(
 70 |                 "CALL db.index.vector.createNodeIndex('parent_document', "
 71 |                 "'Child', 'embedding', $dimension, 'cosine')",
 72 |                 {"dimension": embedding_dimension},
 73 |             )
 74 |         except ClientError:  # already exists
 75 |             pass
 76 |         # Create vector index for parents
 77 |         try:
 78 |             graph.query(
 79 |                 "CALL db.index.vector.createNodeIndex('typical_rag', "
 80 |                 "'Parent', 'embedding', $dimension, 'cosine')",
 81 |                 {"dimension": embedding_dimension},
 82 |             )
 83 |         except ClientError:  # already exists
 84 |             pass
 85 |     # Ingest hypothethical questions
 86 | 
 87 | 
 88 | class Questions(BaseModel):
 89 |     """Generating hypothetical questions about text."""
 90 | 
 91 |     questions: List[str] = Field(
 92 |         ...,
 93 |         description=(
 94 |             "Generated hypothetical questions based on " "the information from the text"
 95 |         ),
 96 |     )
 97 | 
 98 | 
 99 | questions_prompt = ChatPromptTemplate.from_messages(
100 |     [
101 |         (
102 |             "system",
103 |             (
104 |                 "You are generating hypothetical questions based on the information "
105 |                 "found in the text. Make sure to provide full context in the generated "
106 |                 "questions."
107 |             ),
108 |         ),
109 |         (
110 |             "human",
111 |             (
112 |                 "Use the given format to generate hypothetical questions from the "
113 |                 "following input: {input}"
114 |             ),
115 |         ),
116 |     ]
117 | )
118 | 
119 | question_chain = create_structured_output_chain(Questions, llm, questions_prompt)
120 | 
121 | for i, parent in enumerate(parent_documents):
122 |     questions = question_chain.run(parent.page_content).questions
123 |     params = {
124 |         "parent_id": i,
125 |         "questions": [
126 |             {"text": q, "id": f"{i}-{iq}", "embedding": embeddings.embed_query(q)}
127 |             for iq, q in enumerate(questions)
128 |             if q
129 |         ],
130 |     }
131 |     graph.query(
132 |         """
133 |     MERGE (p:Parent {id: $parent_id})
134 |     WITH p
135 |     UNWIND $questions AS question
136 |     CREATE (q:Question {id: question.id})
137 |     SET q.text = question.text
138 |     MERGE (q)<-[:HAS_QUESTION]-(p)
139 |     WITH q, question
140 |     CALL db.create.setVectorProperty(q, 'embedding', question.embedding)
141 |     YIELD node
142 |     RETURN count(*)
143 |     """,
144 |         params,
145 |     )
146 |     # Create vector index
147 |     try:
148 |         graph.query(
149 |             "CALL db.index.vector.createNodeIndex('hypothetical_questions', "
150 |             "'Question', 'embedding', $dimension, 'cosine')",
151 |             {"dimension": embedding_dimension},
152 |         )
153 |     except ClientError:  # already exists
154 |         pass
155 | 
156 | # Ingest summaries
157 | 
158 | summary_prompt = ChatPromptTemplate.from_messages(
159 |     [
160 |         (
161 |             "system",
162 |             (
163 |                 "You are generating concise and accurate summaries based on the "
164 |                 "information found in the text."
165 |             ),
166 |         ),
167 |         (
168 |             "human",
169 |             ("Generate a summary of the following input: {question}\n" "Summary:"),
170 |         ),
171 |     ]
172 | )
173 | 
174 | summary_chain = summary_prompt | llm
175 | 
176 | for i, parent in enumerate(parent_documents):
177 |     summary = summary_chain.invoke({"question": parent.page_content}).content
178 |     params = {
179 |         "parent_id": i,
180 |         "summary": summary,
181 |         "embedding": embeddings.embed_query(summary),
182 |     }
183 |     graph.query(
184 |         """
185 |     MERGE (p:Parent {id: $parent_id})
186 |     MERGE (p)-[:HAS_SUMMARY]->(s:Summary)
187 |     SET s.text = $summary
188 |     WITH s
189 |     CALL db.create.setVectorProperty(s, 'embedding', $embedding)
190 |     YIELD node
191 |     RETURN count(*)
192 |     """,
193 |         params,
194 |     )
195 |     # Create vector index
196 |     try:
197 |         graph.query(
198 |             "CALL db.index.vector.createNodeIndex('summary', "
199 |             "'Summary', 'embedding', $dimension, 'cosine')",
200 |             {"dimension": embedding_dimension},
201 |         )
202 |     except ClientError:  # already exists
203 |         pass
204 | 


--------------------------------------------------------------------------------
/App.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from langchain.chat_models import ChatOpenAI
  3 | from langchain.graphs import Neo4jGraph
  4 | from streamlit_agraph import agraph, Node, Edge, Config
  5 | from neo4j import GraphDatabase
  6 | import os
  7 | from openai import OpenAI
  8 | 
  9 | # Function to process the query and return a response
 10 | def process_query(query):
 11 |     # Use GraphCypherQAChain to get a Cypher query and a natural language response
 12 |     result = cypher_chain(query)
 13 |     intermediate_steps = result['intermediate_steps']
 14 |     final_answer = result['result']
 15 |     generated_cypher = intermediate_steps[0]['query']
 16 |     response_structured = final_answer
 17 |     
 18 |     # Fetch graph data using the Cypher query
 19 |     nodes, edges = fetch_graph_data(nodesType=None, relType=None, direct_cypher_query=generated_cypher, intermediate_steps=intermediate_steps)
 20 |     
 21 |     return response_structured, nodes, edges
 22 | 
 23 | # Function to fetch data from Neo4j
 24 | def fetch_graph_data(nodesType=None, relType=None, direct_cypher_query=None, intermediate_steps=None):
 25 |     # Use the direct Cypher query if provided
 26 |     if direct_cypher_query:
 27 |         context = intermediate_steps[1]['context']
 28 |         nodes, edges = process_graph_result(context)
 29 |     else:
 30 |         if nodesType or relType:
 31 |             # Construct the Cypher query based on selected filters
 32 |             cypher_query = construct_cypher_query(nodesType, relType)
 33 |             with GraphDatabase.driver(os.environ["NEO4J_URI"], 
 34 |                                     auth=(os.environ["NEO4J_USERNAME"], 
 35 |                                             os.environ["NEO4J_PASSWORD"])).session() as session:
 36 |                 result = session.run(cypher_query)
 37 |                 nodes, edges = process_graph_result_select(result)
 38 |     
 39 |     return nodes, edges
 40 | 
 41 | 
 42 | # Function to construct the Cypher query based on selected filters
 43 | def construct_cypher_query(node_types, rel_types):
 44 |     # Create a list of MATCH clauses for node types
 45 |     node_clauses = []
 46 |     for node_type in node_types:
 47 |         node_clauses.append(f"(p:{node_type})-[r]->(n) ")
 48 | 
 49 |     # Create a list of WHERE clauses for relationship types
 50 |     rel_clauses = []
 51 |     for rel_type in rel_types:
 52 |         rel_clauses.append(f"type(r)='{rel_type}' ")
 53 | 
 54 |     # Combine the clauses into one Cypher query
 55 |     if rel_clauses:
 56 |         rel_match = " OR ".join(rel_clauses)
 57 |         query = f"MATCH {' OR '.join(node_clauses)} WHERE {rel_match} RETURN p, r, n"
 58 |     else:
 59 |         query = f"MATCH {' OR '.join(node_clauses)} RETURN p, r, n"
 60 |     
 61 |     return query
 62 | 
 63 | def process_graph_result(result):
 64 |     nodes = []
 65 |     edges = []
 66 |     node_names = set()  # This defines node_names to track unique nodes
 67 | 
 68 |     for record in result: 
 69 |         # Process nodes
 70 |         p_name = record['p.name']
 71 |         o_name = record['o.name']
 72 | 
 73 |         # Add nodes if they don't already exist
 74 |         if p_name not in node_names:
 75 |             nodes.append(Node(id=p_name, label=p_name, size=5, shape="circle"))
 76 |             node_names.add(p_name)
 77 |         if o_name not in node_names:
 78 |             nodes.append(Node(id=o_name, label=o_name, size=5, shape="circle"))
 79 |             node_names.add(o_name)
 80 | 
 81 |         # Process edges
 82 |         relationship_label = record['type(r)']
 83 |         edges.append(Edge(source=p_name, target=o_name, label=relationship_label))
 84 | 
 85 |     return nodes, edges
 86 | 
 87 | def process_graph_result_select(result):
 88 |     nodes = []
 89 |     edges = []
 90 |     node_names = set()  # This defines node_names to track unique nodes
 91 | 
 92 |     for record in result: 
 93 |         # Process nodes
 94 |         p = record['p']
 95 |         n = record['n']
 96 |         p_name = p['name']
 97 |         n_name = n['name']
 98 | 
 99 |        # Add nodes if they don't already exist
100 |         if p_name not in node_names:
101 |             nodes.append(Node(id=p_name, label=p_name, size=5, shape="circle"))
102 |             node_names.add(p_name)
103 |         if n_name not in node_names:
104 |             nodes.append(Node(id=n_name, label=n_name, size=5, shape="circle"))
105 |             node_names.add(n_name)
106 | 
107 |         # Process edges, include the date in the label if it exists
108 |         r = record['r']
109 |         relationship_label = r.type
110 |         if 'date' in r:
111 |             relationship_label = f"{r.type} ({r['date']})"
112 |         edges.append(Edge(source=p_name, target=n_name, label=relationship_label))
113 |     
114 |     return nodes, edges
115 | 
116 | # from langchain.agents import initialize_agent
117 | st.title("The OpenAI Saga")
118 | 
119 | NEO4J_URI= st.secrets["NEO4J_URI"]
120 | NEO4J_USERNAME= st.secrets["NEO4J_USERNAME"]
121 | NEO4J_PASSWORD= st.secrets["NEO4J_PASSWORD"]
122 | 
123 | graph = Neo4jGraph(
124 |     url=os.environ["NEO4J_URI"],
125 |     username=os.environ["NEO4J_USERNAME"],
126 |     password=os.environ["NEO4J_PASSWORD"])
127 | 
128 | # Fetch the unique node types and relationship types for sidebar filters
129 | node_types = ['Person', 'Organization', 'Group', 'Topic']
130 | relationship_types = [
131 |     'BELONGS_TO', 'FORMER_CEO_OF', 'CEO_OF', 'FORMER_MEMBER_OF', 'CURRENT_MEMBER_OF','REMAIN_MEMBER_OF', 'SCHEDULES_CALL_WITH',
132 |     'QUESTIONED_FIRING_SAM', 'FOUNDED_BY', 'INVESTED_IN', 'CONSIDERS_BOARD_SEAT', 'FORMER_CTO_OF', 'INFORMED_OF_FIRING', 'FIRED_AS_CEO',
133 |     'ALL_HANDS_MEETING', 'RESIGNS_FROM', 'APPOINTED_INTERIM_CEO', 'JOINS_MICROSOFT', 'THREATEN_TO_RESIGN', 'CONSIDERS_MERGER_WITH',
134 |     'IN_TALKS_WITH_BOARD', 'RETURNS_AS_CEO', 'RETURNS_TO', 'CONSIDERS_BOARD_SEAT', 'AIMS_TO_DEVELOP_AGI_WITH', 'QUESTIONED_FIRING_SAM',
135 |     'FOUNDED_BY', 'INVESTED_IN', 'DEMOTED_FROM', 'RELEASES_HIRING_STATEMENT', 'HIRED_BY', 'REGRETS_FIRING','MENTIONS', 'EXPLAINS_DECISIONS', 'DESCRIBES', 'FORMER_PRESIDENT']
136 | 
137 | st.sidebar.header('Filters')
138 | selected_node_types = st.sidebar.multiselect('Node Types', node_types, default=node_types)
139 | selected_relationship_types = st.sidebar.multiselect('Relationship Types', relationship_types, default=relationship_types)
140 | 
141 | # Initialize state variables and check for changes in selections
142 | if 'prev_node_types' not in st.session_state:
143 |     st.session_state.prev_node_types = selected_node_types
144 | if 'prev_relationship_types' not in st.session_state:
145 |     st.session_state.prev_relationship_types = selected_relationship_types
146 | 
147 | # Update graph if selections change
148 | if (selected_node_types != st.session_state.prev_node_types or 
149 |     selected_relationship_types != st.session_state.prev_relationship_types):
150 |     st.session_state.prev_node_types = selected_node_types
151 |     st.session_state.prev_relationship_types = selected_relationship_types
152 |     # Construct and fetch new graph data
153 |     cypher_query = construct_cypher_query(selected_node_types, selected_relationship_types)
154 |     nodes, edges = fetch_graph_data(nodesType=selected_node_types, relType=selected_relationship_types)
155 |     # Define the configuration for the graph visualization
156 |     config = Config(height=600, width=800, directed=True, nodeHighlightBehavior=True, highlightColor="#F7A7A6")
157 |     # Render the graph using agraph with the specified configuration
158 |     agraph(nodes=nodes, edges=edges, config=config)
159 | 
160 | 
161 | with st.sidebar:
162 |     openai_api_key = st.text_input("OpenAI API Key", key="langchain_search_api_key_openai", type="password")
163 |     "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
164 | 
165 | def combine_contexts(structured, unstructured, client):
166 |     
167 |     messages = [{'role': 'system', 'content': 'You are an assistant of an advanced retrieval augmented system,\
168 |                  who prioritizes accuracy and is very context-aware.\
169 |                  Pleass summarize text from the following and generate\
170 |                  a comprehensive, logical and context_aware answer.'},
171 |                 {'role': 'user', 'content': structured + unstructured}]
172 |     completion = client.chat.completions.create(model="gpt-4",
173 |                                                 messages=messages,
174 |                                                 temperature=0)
175 |     response = completion.choices[0].message.content
176 |     
177 |     return response
178 | 
179 | # Initialize OpenAI API key and Chat model
180 | if openai_api_key:
181 |     client = OpenAI(api_key=openai_api_key)
182 |     os.environ["OPENAI_API_KEY"] = openai_api_key
183 |     from retrievers import initialize_retrievers
184 |     from chain import initialize_chain, Question
185 |     typical_rag, parent_vectorstore, hypothetic_question_vectorstore, summary_vectorstore = initialize_retrievers(openai_api_key)
186 |     chain_txt = initialize_chain(openai_api_key, typical_rag, parent_vectorstore, hypothetic_question_vectorstore, summary_vectorstore)
187 | 
188 | # Chat interface
189 | if "messages" not in st.session_state:
190 |     st.session_state["messages"] = [{"role": "assistant", "content": "Hi there, ask me a question."}]
191 | 
192 | for msg in st.session_state.messages:
193 |     st.chat_message(msg["role"]).write(msg["content"])
194 | 
195 | if prompt := st.chat_input(placeholder="Ask a question"):
196 |     if not openai_api_key:
197 |         st.error("Please add your OpenAI API key to continue.")
198 |     else:
199 |         # Display response
200 |         # Initialize the GraphCypherQAChain from chain.py
201 |         from langchain.chains import GraphCypherQAChain
202 |         cypher_chain = GraphCypherQAChain.from_llm(
203 |             cypher_llm=ChatOpenAI(temperature=0, model_name='gpt-4', api_key=openai_api_key),
204 |             qa_llm=ChatOpenAI(temperature=0, api_key=openai_api_key),
205 |             graph=graph,
206 |             verbose=True,
207 |             return_intermediate_steps=True
208 | )
209 |         # Update session state with new message
210 |         st.session_state.messages.append({"role": "user", "content": prompt})
211 |         st.chat_message("user").write(prompt)
212 |         response_structured, nodes, edges= process_query(prompt)
213 |         response_nonstructured = chain_txt.invoke(
214 |                 {"question": prompt},
215 |                 {"configurable": {"strategy": "parent_strategy"}},
216 |             )
217 |         config = Config(height=600, width=800, directed=True, nodeHighlightBehavior=True, highlightColor="#F7A7A6")
218 |         final_ans = combine_contexts(response_structured, response_nonstructured, client)
219 |         st.session_state.messages.append({"role": "assistant", "content": final_ans})
220 |         st.chat_message("assistant").write(final_ans)
221 |         agraph(nodes=nodes, edges=edges, config=config)
222 | 
223 | 


--------------------------------------------------------------------------------
/openaiKG.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "57ea0bce-75b1-4e35-a17c-53ae7b1ccff6",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Get Wiki info of OpenAI key stakeholders"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "2cc4fb2b-08e4-43a0-a37e-6cdd62f5dc83",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# !pip install langchain\n",
 19 |     "# !pip install wikipedia"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 8,
 25 |    "id": "33a04ef2-16aa-41dc-9ee1-a53703275d41",
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "# Load the OpenAI Wikipedia page\n",
 30 |     "from langchain.document_loaders import WikipediaLoader\n",
 31 |     "from langchain.text_splitter import CharacterTextSplitter\n",
 32 |     "raw_documents = WikipediaLoader(query=\"OpenAI\").load()\n",
 33 |     "\n",
 34 |     "# Define chunking strategy\n",
 35 |     "text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n",
 36 |     "    chunk_size=1000, chunk_overlap=20\n",
 37 |     ")\n",
 38 |     "# Chunk the document\n",
 39 |     "documents = text_splitter.split_documents(raw_documents)\n",
 40 |     "for d in documents:\n",
 41 |     "    del d.metadata[\"summary\"]"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 11,
 47 |    "id": "9da3f01d-ae49-4d18-8b27-86932b67a5f3",
 48 |    "metadata": {},
 49 |    "outputs": [
 50 |     {
 51 |      "name": "stdout",
 52 |      "output_type": "stream",
 53 |      "text": [
 54 |       "https://en.wikipedia.org/wiki/OpenAI\n",
 55 |       "https://en.wikipedia.org/wiki/Removal_of_Sam_Altman_from_OpenAI\n",
 56 |       "https://en.wikipedia.org/wiki/OpenAI_Five\n",
 57 |       "https://en.wikipedia.org/wiki/Greg_Brockman\n",
 58 |       "https://en.wikipedia.org/wiki/Ilya_Sutskever\n",
 59 |       "https://en.wikipedia.org/wiki/Emmett_Shear\n",
 60 |       "https://en.wikipedia.org/wiki/Artificial_general_intelligence\n",
 61 |       "https://en.wikipedia.org/wiki/Mira_Murati\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "for doc in documents:\n",
 67 |     "    print(doc.metadata['source'])"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 10,
 73 |    "id": "5c5d5a34-d079-4444-88f9-511da29fe523",
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "documents.remove(documents[2])\n",
 78 |     "documents.remove(documents[3])"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "id": "97cd6970-d1c4-444b-81bf-f2594c2b2045",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "# Enable Neo4j database"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "id": "64657236-720e-4953-b598-8931eec0244d",
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "# !pip install pypdf"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "id": "1c9550f5-cc18-46c2-9469-1f43991f3f1c",
102 |    "metadata": {},
103 |    "source": [
104 |     "# News Articles"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 8,
110 |    "id": "aba1b836-437b-40ca-be38-3f1d11a42cc7",
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "from langchain.document_loaders import PyPDFLoader\n",
115 |     "from langchain.docstore.document import Document\n",
116 |     "import os\n",
117 |     "\n",
118 |     "# Directory containing your PDF files\n",
119 |     "directory_path = '/Users/leannchen/Documents/tcnews'\n",
120 |     "\n",
121 |     "# Initialize PyPDFLoader for each PDF in the directory\n",
122 |     "loaders = [PyPDFLoader(os.path.join(directory_path, f)) for f in os.listdir(directory_path) if f.endswith('.pdf')]\n",
123 |     "\n",
124 |     "# Load documents from PDFs\n",
125 |     "news_docs = []\n",
126 |     "for loader in loaders:\n",
127 |     "    news_docs.extend(loader.load())\n",
128 |     "\n",
129 |     "# Prepare the content and metadata for each news article as Document objects\n",
130 |     "news_articles_data = [\n",
131 |     "    Document(\n",
132 |     "        page_content=doc.page_content,  # Assuming this is how you access the page content of the document\n",
133 |     "        metadata={\n",
134 |     "            \"source\": doc.metadata['source'].removeprefix('/Users/leannchen/Documents/tcnews/'),  # Assuming this is the metadata format\n",
135 |     "            # Include any other metadata items here\n",
136 |     "        }\n",
137 |     "    )\n",
138 |     "    for doc in news_docs  # Assuming news_docs is a list of objects with page_content and metadata\n",
139 |     "]\n",
140 |     "\n",
141 |     "# Later, when you are ready to add them to the database:\n",
142 |     "# Call add_documents and construct Document objects inline\n",
143 |     "# Assuming news_articles_data is already a list of Document objects\n",
144 |     "# neo4j_db.add_documents(\n",
145 |     "#     news_articles_data,\n",
146 |     "#     ids=[f\"news_article_{i}\" for i in range(len(news_articles_data))]\n",
147 |     "\n",
148 |     "# )"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "id": "2ce9e011-7bdf-438e-a591-9ad480ef78e1",
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "# !pip install spacy-llm\n",
159 |     "# !pip install --upgrade jupyter ipywidgets"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 9,
165 |    "id": "710b9c7a-0cfa-4435-8cc7-aade9cfa2bc3",
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "all_data = documents + news_articles_data"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "id": "ea91be3d-cc34-4093-921c-f55dd31ba652",
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "all_data"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "id": "12a944c3-8050-4c25-9ab1-37aa14ac8311",
185 |    "metadata": {},
186 |    "source": [
187 |     "# Perform Article Summaries as Relationship Extraction Database"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 289,
193 |    "id": "2528c928-2f38-42ed-a509-1ed4109885bf",
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "from langchain.chains.combine_documents.stuff import StuffDocumentsChain\n",
198 |     "from langchain.chains.llm import LLMChain\n",
199 |     "from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain\n",
200 |     "from langchain.prompts import PromptTemplate\n",
201 |     "from langchain.llms import OpenAI\n",
202 |     "from langchain.chat_models import ChatOpenAI\n",
203 |     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
204 |     "# Initialize the text splitter\n",
205 |     "rtext_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)\n",
206 |     "\n",
207 |     "# Initialize LLM\n",
208 |     "llm = ChatOpenAI(temperature=0, model_name=\"gpt-4\")\n",
209 |     "\n",
210 |     "# Define the map prompt template\n",
211 |     "map_template = \"\"\"The following is a set of documents\n",
212 |     "{all_data}\n",
213 |     "Based on this list of docs, please perform concise summaries while extracting essential relationships for relationships analysis later, please do include dates of actions or events, which are very important for timeline analysis later. Example: \"Sam gets fired by the OpenAI board on 11/17/2023 or (Nov. 17th, Friday)\", which showcases not only the relationship between Sam and OpenAI, but also when it happens.\n",
214 |     "Helpful Answer:\"\"\"\n",
215 |     "map_prompt = PromptTemplate.from_template(map_template)\n",
216 |     "\n",
217 |     "# Define the map_chain\n",
218 |     "map_chain = LLMChain(llm=llm, prompt=map_prompt)\n",
219 |     "\n",
220 |     "all_data = news_articles_data + documents\n",
221 |     "# Extract text from each document\n",
222 |     "# all_text_data = [doc.page_content for doc in all_data]\n",
223 |     "\n",
224 |     "# Reduce\n",
225 |     "reduce_template = \"\"\"The following is set of summaries:\n",
226 |     "{all_data}\n",
227 |     "Take these and distill it into concise summaries of the articles while containing important relationships and events (including the timeline). Example: \"Sam gets fired by the OpenAI board on 11/17/2023 or (Nov. 17th, Friday)\", which showcases not only the relationship between Sam and OpenAI, but also when it happens.\n",
228 |     "Helpful Answer:\"\"\"\n",
229 |     "reduce_prompt = PromptTemplate.from_template(reduce_template)\n",
230 |     "\n",
231 |     "# ChatPromptTemplate(input_variables=['all_data'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['all_data'], template='The following is a set of documents:\\n{all_data}\\nBased on this list of docs, please identify the main themes \\nHelpful Answer:'))])\n",
232 |     "\n",
233 |     "# Run chain\n",
234 |     "reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)\n",
235 |     "\n",
236 |     "combine_documents_chain = StuffDocumentsChain(\n",
237 |     "    llm_chain=reduce_chain,\n",
238 |     "    document_variable_name=\"all_data\"  # This should match the variable name in reduce_prompt\n",
239 |     ")\n",
240 |     "\n",
241 |     "# Combines and iteravely reduces the mapped documents\n",
242 |     "reduce_documents_chain = ReduceDocumentsChain(\n",
243 |     "    # This is final chain that is called.\n",
244 |     "    combine_documents_chain=combine_documents_chain,\n",
245 |     "    # If documents exceed context for `StuffDocumentsChain`\n",
246 |     "    collapse_documents_chain=combine_documents_chain,\n",
247 |     "    # The maximum number of tokens to group documents into.\n",
248 |     "    token_max=4000,\n",
249 |     ")\n",
250 |     "\n",
251 |     "# Combining documents by mapping a chain over them, then combining results\n",
252 |     "map_reduce_chain = MapReduceDocumentsChain(\n",
253 |     "    # Map chain\n",
254 |     "    llm_chain=map_chain,\n",
255 |     "    # Reduce chain\n",
256 |     "    reduce_documents_chain=reduce_documents_chain,\n",
257 |     "    # The variable name in the llm_chain to put the documents in\n",
258 |     "    document_variable_name=\"all_data\",\n",
259 |     "    # Return the results of the map steps in the output\n",
260 |     "    return_intermediate_steps=False,\n",
261 |     ")\n",
262 |     "\n",
263 |     "text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n",
264 |     "    chunk_size=1000, chunk_overlap=0\n",
265 |     ")\n",
266 |     "split_docs = text_splitter.split_documents(all_data)\n",
267 |     "\n",
268 |     "# Run the MapReduce Chain\n",
269 |     "summarization_results = map_reduce_chain.run(split_docs)\n"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 290,
275 |    "id": "5816cb1b-0a49-47fa-be60-102252980584",
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "data": {
280 |       "text/plain": [
281 |        "\"1. Sam Altman was fired as CEO of OpenAI on November 17, 2023, leading to a power struggle within the company. Over 730 employees threatened to quit and join Altman at Microsoft unless the board resigned and reappointed Altman and co-founder Greg Brockman. Despite initial talks of reinstatement, the board later confirmed that Altman would not be returning. Altman and Brockman subsequently joined Microsoft to head a new advanced AI research unit.\\n\\n2. Following Altman's departure, the OpenAI board underwent a reshuffle, with Mira Murati appointed as interim CEO, only to be later replaced by Emmett Shear, the former CEO of Twitch. The board's actions were criticized for lack of transparency and communication, with the exact reasons for Altman's removal remaining unclear.\\n\\n3. The OpenAI board, composed of Ilya Sutskever, Adam D’Angelo, Helen Toner, and Tasha McCauley, faced calls for resignation. Potential replacements included Bret Taylor and Will Hurd. The board's composition was criticized for lack of diversity and deep knowledge about responsible use of AI.\\n\\n4. Altman announced on November 20, 2023, that he would not be returning as CEO of OpenAI and would instead join Microsoft to lead a new AI research team. Nearly 500 of OpenAI’s roughly 770 employees threatened to resign unless the startup’s board resigned and reappointed Altman.\\n\\n5. Microsoft has been acquiring top executives and AI engineering talent from OpenAI, a generative AI company in which Microsoft holds a minority stake worth several billion dollars. Microsoft's leadership, particularly CEO Satya Nadella, has been projecting a 'business as usual' message during these upheavals at OpenAI.\\n\\n6. Microsoft successfully navigated through U.K. and EU competition authorities to merge with Activision by restructuring the deal and agreeing to certain conditions. However, its market power in cloud computing and potential influence over OpenAI is raising concerns among competition regulators.\\n\\n7. Emmett Shear, the ex-CEO of Twitch, was appointed as the interim CEO of OpenAI after Sam Altman was fired and replaced by CTO Mira Murati. Shear plans to hire an independent investigator to look into the events leading up to his appointment and reform the management and leadership team at OpenAI.\\n\\n8. OpenAI, an American AI research organization, was founded in December 2015. Microsoft invested $1 billion in OpenAI Global LLC in 2019 and $10 billion in 2023. On November 17, 2023, Sam Altman was removed as CEO and Greg Brockman was removed as chairman of OpenAI. Both returned four days later after negotiations with the board.\\n\\n9. Mira Murati, born in Albania in 1988, is a technology executive who has worked at Tesla, Leap Motion, and OpenAI. She served as the CTO of OpenAI from 2018, leading projects like ChatGPT, Dall-E, and Codex. She briefly served as interim CEO of OpenAI in November 2023 after Sam Altman was removed, but was replaced by Emmett Shear after three days.\\n\\n10. Sam Altman, born in 1985, is a technology entrepreneur who co-founded Loopt and Hydrazine Capital, and served as a partner and president at Y Combinator. He was the CEO of OpenAI from 2019 until his removal in November 2023, but was reinstated five days later. He also co-founded Tools For Humanity and raised $1 billion for OpenAI from Microsoft.\""
282 |       ]
283 |      },
284 |      "execution_count": 290,
285 |      "metadata": {},
286 |      "output_type": "execute_result"
287 |     }
288 |    ],
289 |    "source": [
290 |     "summarization_results"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 291,
296 |    "id": "a686f1a8-a3a7-4e4d-a297-3493927ff3f5",
297 |    "metadata": {},
298 |    "outputs": [],
299 |    "source": [
300 |     "# Store summarization_results to a text file for future use\n",
301 |     "# Timeline will further be added into the summaries\n",
302 |     "with open('summary.txt', 'w') as file:\n",
303 |     "    file.write(str(summarization_results))"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": null,
309 |    "id": "dd4a39aa-5c78-49ed-a789-6514f33af1b4",
310 |    "metadata": {},
311 |    "outputs": [],
312 |    "source": [
313 |     "!pip install spacy-llm"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "id": "9f8b2d01-5d61-41f5-a126-99194c2be22a",
319 |    "metadata": {},
320 |    "source": [
321 |     "# Entity and Relationship"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "code",
326 |    "execution_count": 293,
327 |    "id": "56098473-d023-4452-a439-4cf62556b9e8",
328 |    "metadata": {},
329 |    "outputs": [
330 |     {
331 |      "name": "stdout",
332 |      "output_type": "stream",
333 |      "text": [
334 |       "Text: 1.\n",
335 |       "Entities: [('1', 'CARDINAL')]\n",
336 |       "Relations:\n",
337 |       "Text: Sam Altman was fired as CEO of OpenAI on November 17, 2023, leading to a\n",
338 |       "power struggle within the company.\n",
339 |       "Entities: [('Sam Altman', 'PERSON'), ('November 17, 2023', 'DATE')]\n",
340 |       "Relations:\n",
341 |       "  - Sam Altman [fired_on] November 17, 2023\n",
342 |       "Text: Over 730 employees threatened to quit and join Altman at Microsoft unless\n",
343 |       "the board resigned and reappointed Altman and co-founder Greg Brockman.\n",
344 |       "Entities: [('Over 730', 'CARDINAL'), ('Altman', 'ORG'), ('Microsoft', 'ORG'),\n",
345 |       "('Altman', 'PERSON'), ('Greg Brockman', 'PERSON')]\n",
346 |       "Relations:\n",
347 |       "  - Greg Brockman [co-founder_of] Altman\n",
348 |       "Text: Despite initial talks of reinstatement, the board later confirmed that\n",
349 |       "Altman would not be returning.\n",
350 |       "Entities: [('Altman', 'ORG')]\n",
351 |       "Relations:\n",
352 |       "Text: Altman and Brockman subsequently joined Microsoft to head a new advanced\n",
353 |       "AI research unit.\n",
354 |       "Entities: [('Altman', 'ORG'), ('Brockman', 'ORG'), ('Microsoft', 'ORG'), ('AI',\n",
355 |       "'ORG')]\n",
356 |       "Relations:\n",
357 |       "  - Altman [joined] Microsoft\n",
358 |       "  - Brockman [joined] Microsoft\n",
359 |       "  - Altman [head] AI\n",
360 |       "  - Brockman [head] AI\n",
361 |       "Text: 2.\n",
362 |       "Entities: [('2', 'CARDINAL')]\n",
363 |       "Relations:\n",
364 |       "Text: Following Altman's departure, the OpenAI board underwent a reshuffle, with\n",
365 |       "Mira Murati appointed as interim CEO, only to be later replaced by Emmett Shear,\n",
366 |       "the former CEO of Twitch.\n",
367 |       "Entities: [('Altman', 'PERSON'), ('OpenAI', 'ORG'), ('Mira Murati', 'PERSON'),\n",
368 |       "('Emmett Shear', 'PERSON'), ('Twitch', 'ORG')]\n",
369 |       "Relations:\n",
370 |       "  - Altman [departure_from] OpenAI\n",
371 |       "  - Mira Murati [interim_CEO_of] OpenAI\n",
372 |       "  - Emmett Shear [CEO_of] OpenAI\n",
373 |       "  - Emmett Shear [former_CEO_of] Twitch\n",
374 |       "Text: The board's actions were criticized for lack of transparency and\n",
375 |       "communication, with the exact reasons for Altman's removal remaining unclear.\n",
376 |       "Entities: [('Altman', 'ORG')]\n",
377 |       "Relations:\n",
378 |       "Text: 3.\n",
379 |       "Entities: [('3', 'CARDINAL')]\n",
380 |       "Relations:\n",
381 |       "Text: The OpenAI board, composed of Ilya Sutskever, Adam D’Angelo, Helen Toner,\n",
382 |       "and Tasha McCauley, faced calls for resignation.\n",
383 |       "Entities: [('Ilya Sutskever', 'PERSON'), ('Adam D’Angelo', 'PERSON'), ('Helen\n",
384 |       "Toner', 'PERSON'), ('Tasha McCauley', 'PERSON')]\n",
385 |       "Relations:\n",
386 |       "Text: Potential replacements included Bret Taylor and Will Hurd.\n",
387 |       "Entities: [('Bret Taylor', 'PERSON'), ('Will Hurd', 'PERSON')]\n",
388 |       "Relations:\n",
389 |       "  - Bret Taylor [potential_replacements] Will Hurd\n",
390 |       "Text: The board's composition was criticized for lack of diversity and deep\n",
391 |       "knowledge about responsible use of AI.\n",
392 |       "Entities: [('AI', 'ORG')]\n",
393 |       "Relations:\n",
394 |       "Text: 4.\n",
395 |       "Entities: [('4', 'CARDINAL')]\n",
396 |       "Relations:\n",
397 |       "Text: Altman announced on November 20, 2023, that he would not be returning as\n",
398 |       "CEO of OpenAI and would instead join Microsoft to lead a new AI research team.\n",
399 |       "Entities: [('Altman', 'ORG'), ('November 20, 2023', 'DATE'), ('OpenAI', 'ORG'),\n",
400 |       "('Microsoft', 'ORG'), ('AI', 'ORG')]\n",
401 |       "Relations:\n",
402 |       "  - Altman [lead] AI\n",
403 |       "Text: Nearly 500 of OpenAI’s roughly 770 employees threatened to resign unless\n",
404 |       "the startup’s board resigned and reappointed Altman.\n",
405 |       "Entities: [('Nearly 500', 'CARDINAL'), ('roughly 770', 'CARDINAL'), ('Altman',\n",
406 |       "'PERSON')]\n",
407 |       "Relations:\n",
408 |       "  - Nearly 500 [part_of] roughly 770\n",
409 |       "  - Nearly 500 [threatened_by] Altman\n",
410 |       "  - roughly 770 [threatened_by] Altman\n",
411 |       "Text: 5.\n",
412 |       "Entities: [('5', 'CARDINAL')]\n",
413 |       "Relations:\n",
414 |       "Text: Microsoft has been acquiring top executives and AI engineering talent from\n",
415 |       "OpenAI, a generative AI company in which Microsoft holds a minority stake worth\n",
416 |       "several billion dollars.\n",
417 |       "Entities: [('Microsoft', 'ORG'), ('AI', 'ORG'), ('OpenAI', 'ORG'), ('Microsoft',\n",
418 |       "'ORG'), ('several billion dollars', 'MONEY')]\n",
419 |       "Relations:\n",
420 |       "  - Microsoft [has_stake] several billion dollars\n",
421 |       "Text: Microsoft's leadership, particularly CEO Satya Nadella, has been\n",
422 |       "projecting a 'business as usual' message during these upheavals at OpenAI.  6.\n",
423 |       "Entities: [('Microsoft', 'ORG'), ('Satya Nadella', 'PERSON'), ('6', 'CARDINAL')]\n",
424 |       "Relations:\n",
425 |       "  - Satya Nadella [works for] Microsoft\n",
426 |       "Text: Microsoft successfully navigated through U.K. and EU competition\n",
427 |       "authorities to merge with Activision by restructuring the deal and agreeing to\n",
428 |       "certain conditions.\n",
429 |       "Entities: [('Microsoft', 'ORG'), ('U.K.', 'GPE'), ('EU', 'ORG'), ('Activision',\n",
430 |       "'ORG')]\n",
431 |       "Relations:\n",
432 |       "  - Microsoft [merge_with] Activision\n",
433 |       "Text: However, its market power in cloud computing and potential influence over\n",
434 |       "OpenAI is raising concerns among competition regulators.\n",
435 |       "Entities: [('OpenAI', 'ORG')]\n",
436 |       "Relations:\n",
437 |       "Text: 7. Emmett Shear, the ex-CEO of Twitch, was appointed as the interim CEO of\n",
438 |       "OpenAI after Sam Altman was fired and replaced by CTO Mira Murati.\n",
439 |       "Entities: [('7', 'CARDINAL'), ('Emmett Shear', 'PERSON'), ('Twitch', 'ORG'),\n",
440 |       "('Sam Altman', 'PERSON'), ('Mira Murati', 'PERSON')]\n",
441 |       "Relations:\n",
442 |       "  - Emmett Shear [ex-CEO_of] Twitch\n",
443 |       "  - Emmett Shear [ordinal] 7\n",
444 |       "Text: Shear plans to hire an independent investigator to look into the events\n",
445 |       "leading up to his appointment and reform the management and leadership team at\n",
446 |       "OpenAI.\n",
447 |       "Entities: [('Shear', 'ORG')]\n",
448 |       "Relations:\n",
449 |       "Text: 8.\n",
450 |       "Entities: [('8', 'CARDINAL')]\n",
451 |       "Relations:\n",
452 |       "Text: OpenAI, an American AI research organization, was founded in December\n",
453 |       "2015.\n",
454 |       "Entities: [('American', 'NORP'), ('December 2015', 'DATE')]\n",
455 |       "Relations:\n",
456 |       "  - American [founded_on] December 2015\n",
457 |       "Text: Microsoft invested $1 billion in OpenAI Global LLC in 2019 and $10 billion\n",
458 |       "in 2023.\n",
459 |       "Entities: [('Microsoft', 'ORG'), ('$1 billion', 'MONEY'), ('OpenAI Global LLC',\n",
460 |       "'ORG'), ('2019', 'DATE'), ('$10 billion', 'MONEY'), ('2023', 'DATE')]\n",
461 |       "Relations:\n",
462 |       "  - Microsoft [invested_in] $1 billion\n",
463 |       "  - Microsoft [invested_in] OpenAI Global LLC\n",
464 |       "  - Microsoft [invested_on] 2019\n",
465 |       "  - Microsoft [invested_in] $10 billion\n",
466 |       "  - Microsoft [invested_on] 2023\n",
467 |       "Text: On November 17, 2023, Sam Altman was removed as CEO and Greg Brockman was\n",
468 |       "removed as chairman of OpenAI.\n",
469 |       "Entities: [('November 17, 2023', 'DATE'), ('Sam Altman', 'PERSON'), ('Greg\n",
470 |       "Brockman', 'PERSON'), ('OpenAI', 'ORG')]\n",
471 |       "Relations:\n",
472 |       "  - Greg Brockman [role_in] OpenAI\n",
473 |       "Text: Both returned four days later after negotiations with the board.\n",
474 |       "Entities: [('four days later', 'DATE')]\n",
475 |       "Relations:\n",
476 |       "Text: 9.\n",
477 |       "Entities: [('9', 'CARDINAL')]\n",
478 |       "Relations:\n",
479 |       "Text: Mira Murati, born in Albania in 1988, is a technology executive who has\n",
480 |       "worked at Tesla, Leap Motion, and OpenAI.\n",
481 |       "Entities: [('Mira Murati', 'PERSON'), ('Albania', 'GPE'), ('1988', 'DATE'),\n",
482 |       "('Tesla', 'ORG'), ('Leap Motion', 'ORG')]\n",
483 |       "Relations:\n",
484 |       "  - Mira Murati [born_in] Albania\n",
485 |       "  - Mira Murati [born_on] 1988\n",
486 |       "  - Mira Murati [worked_at] Tesla\n",
487 |       "  - Mira Murati [worked_at] Leap Motion\n",
488 |       "Text: She served as the CTO of OpenAI from 2018, leading projects like ChatGPT,\n",
489 |       "Dall-E, and Codex.\n",
490 |       "Entities: [('OpenAI', 'ORG'), ('2018', 'DATE'), ('Dall', 'PERSON'), ('Codex',\n",
491 |       "'ORG')]\n",
492 |       "Relations:\n",
493 |       "  - OpenAI [time] 2018\n",
494 |       "  - Dall [affiliation] OpenAI\n",
495 |       "  - Dall [time] 2018\n",
496 |       "Text: She briefly served as interim CEO of OpenAI in November 2023 after Sam\n",
497 |       "Altman was removed, but was replaced by Emmett Shear after three days.\n",
498 |       "Entities: [('OpenAI', 'ORG'), ('November 2023', 'DATE'), ('Sam Altman',\n",
499 |       "'PERSON'), ('Emmett Shear', 'PERSON'), ('three days', 'DATE')]\n",
500 |       "Relations:\n",
501 |       "  - OpenAI [time] November 2023\n",
502 |       "  - Sam Altman [role] OpenAI\n",
503 |       "  - Emmett Shear [role] OpenAI\n",
504 |       "  - Emmett Shear [time] three days\n",
505 |       "Text: 10.\n",
506 |       "Entities: [('10', 'CARDINAL')]\n",
507 |       "Relations:\n",
508 |       "Text: Sam Altman, born in 1985, is a technology entrepreneur who co-founded\n",
509 |       "Loopt and Hydrazine Capital, and served as a partner and president at Y\n",
510 |       "Combinator.\n",
511 |       "Entities: [('Sam Altman', 'PERSON'), ('1985', 'DATE'), ('Loopt and Hydrazine\n",
512 |       "Capital', 'ORG'), ('Y Combinator', 'ORG')]\n",
513 |       "Relations:\n",
514 |       "  - Sam Altman [born_on] 1985\n",
515 |       "  - Sam Altman [co-founded] Loopt and Hydrazine Capital\n",
516 |       "  - Sam Altman [worked_at] Y Combinator\n",
517 |       "Text: He was the CEO of OpenAI from 2019 until his removal in November 2023, but\n",
518 |       "was reinstated five days later.\n",
519 |       "Entities: [('OpenAI', 'ORG'), ('2019', 'DATE'), ('November 2023', 'DATE'),\n",
520 |       "('five days later', 'DATE')]\n",
521 |       "Relations:\n",
522 |       "  - OpenAI [start_date] 2019\n",
523 |       "  - OpenAI [end_date] November 2023\n",
524 |       "  - OpenAI [reinstatement_date] five days later\n",
525 |       "Text: He also co-founded Tools For Humanity and raised $1 billion for OpenAI\n",
526 |       "from Microsoft.\n",
527 |       "Entities: [('Tools For Humanity', 'ORG'), ('$1 billion', 'MONEY'), ('Microsoft',\n",
528 |       "'ORG')]\n",
529 |       "Relations:\n",
530 |       "  - Tools For Humanity [raised_money_for] $1 billion\n",
531 |       "  - $1 billion [invested_by] Microsoft\n",
532 |       "Entity counts: Counter({'ORG': 39, 'PERSON': 24, 'DATE': 15, 'CARDINAL': 13,\n",
533 |       "'MONEY': 4, 'GPE': 2, 'NORP': 1})\n",
534 |       "Relation counts: Counter({'time': 4, 'invested_in': 3, 'worked_at': 3, 'joined':\n",
535 |       "2, 'head': 2, 'threatened_by': 2, 'invested_on': 2, 'born_on': 2, 'role': 2,\n",
536 |       "'fired_on': 1, 'co-founder_of': 1, 'departure_from': 1, 'interim_CEO_of': 1,\n",
537 |       "'CEO_of': 1, 'former_CEO_of': 1, 'potential_replacements': 1, 'lead': 1,\n",
538 |       "'part_of': 1, 'has_stake': 1, 'works for': 1, 'merge_with': 1, 'ex-CEO_of': 1,\n",
539 |       "'ordinal': 1, 'founded_on': 1, 'role_in': 1, 'born_in': 1, 'affiliation': 1,\n",
540 |       "'co-founded': 1, 'start_date': 1, 'end_date': 1, 'reinstatement_date': 1,\n",
541 |       "'raised_money_for': 1, 'invested_by': 1})\n"
542 |      ]
543 |     }
544 |    ],
545 |    "source": [
546 |     "import os\n",
547 |     "import json\n",
548 |     "import spacy\n",
549 |     "from collections import Counter\n",
550 |     "from pathlib import Path\n",
551 |     "from wasabi import msg\n",
552 |     "from spacy_llm.util import assemble\n",
553 |     "\n",
554 |     "# traditional spacy NER (Named Recognition Library)\n",
555 |     "def split_document_sent(text):\n",
556 |     "    nlp = spacy.load(\"en_core_web_sm\")\n",
557 |     "    doc = nlp(text)\n",
558 |     "    return [sent.text.strip() for sent in doc.sents] # referencial\n",
559 |     "\n",
560 |     "# spacy-llm relationship extraction\n",
561 |     "def process_text(nlp, text, verbose=False):\n",
562 |     "    doc = nlp(text)\n",
563 |     "    if verbose:\n",
564 |     "        msg.text(f\"Text: {doc.text}\")\n",
565 |     "        msg.text(f\"Entities: {[(ent.text, ent.label_) for ent in doc.ents]}\")\n",
566 |     "        msg.text(\"Relations:\")\n",
567 |     "        for r in doc._.rel:\n",
568 |     "            msg.text(f\"  - {doc.ents[r.dep]} [{r.relation}] {doc.ents[r.dest]}\")\n",
569 |     "    return doc\n",
570 |     "\n",
571 |     "def run_pipeline(config_path, examples_path=None, verbose=False):\n",
572 |     "    if not os.getenv(\"OPENAI_API_KEY\"):\n",
573 |     "        msg.fail(\"OPENAI_API_KEY env variable was not found. Set it and try again.\", exits=1)\n",
574 |     "\n",
575 |     "    nlp = assemble(config_path, overrides={} if examples_path is None else {\"paths.examples\": str(examples_path)})\n",
576 |     "\n",
577 |     "    # Initialize counters and storage\n",
578 |     "    processed_data = []\n",
579 |     "    entity_counts = Counter()\n",
580 |     "    relation_counts = Counter()\n",
581 |     "\n",
582 |     "    # Load your articles and news data here\n",
583 |     "    # all_data = news_articles_data + documents\n",
584 |     "\n",
585 |     "    sents = split_document_sent(summarization_results)\n",
586 |     "    for sent in sents:\n",
587 |     "        doc = process_text(nlp, sent, verbose)\n",
588 |     "        entities = [(ent.text, ent.label_) for ent in doc.ents]\n",
589 |     "        relations = [(doc.ents[r.dep].text, r.relation, doc.ents[r.dest].text) for r in doc._.rel]\n",
590 |     "        \n",
591 |     "        # Store processed data\n",
592 |     "        processed_data.append({'text': doc.text, 'entities': entities, 'relations': relations})\n",
593 |     "\n",
594 |     "        # Update counters\n",
595 |     "        entity_counts.update([ent[1] for ent in entities])\n",
596 |     "        relation_counts.update([rel[1] for rel in relations])\n",
597 |     "\n",
598 |     "    # Export to JSON\n",
599 |     "    with open('processed_data.json', 'w') as f:\n",
600 |     "        json.dump(processed_data, f)\n",
601 |     "\n",
602 |     "    # Display summary\n",
603 |     "    msg.text(f\"Entity counts: {entity_counts}\")\n",
604 |     "    msg.text(f\"Relation counts: {relation_counts}\")\n",
605 |     "\n",
606 |     "# Set your configuration paths and flags\n",
607 |     "config_path = Path(\"zeroshot.cfg\")\n",
608 |     "examples_path = None  # or None if not using few-shot\n",
609 |     "verbose = True\n",
610 |     "\n",
611 |     "# Run the pipeline\n",
612 |     "file = run_pipeline(config_path, None, verbose)\n"
613 |    ]
614 |   }
615 |  ],
616 |  "metadata": {
617 |   "kernelspec": {
618 |    "display_name": "Python 3 (ipykernel)",
619 |    "language": "python",
620 |    "name": "python3"
621 |   },
622 |   "language_info": {
623 |    "codemirror_mode": {
624 |     "name": "ipython",
625 |     "version": 3
626 |    },
627 |    "file_extension": ".py",
628 |    "mimetype": "text/x-python",
629 |    "name": "python",
630 |    "nbconvert_exporter": "python",
631 |    "pygments_lexer": "ipython3",
632 |    "version": "3.10.12"
633 |   }
634 |  },
635 |  "nbformat": 4,
636 |  "nbformat_minor": 5
637 | }
638 | 


--------------------------------------------------------------------------------