├── .gitignore ├── AgreementSchema.py ├── ContractPlugin.py ├── ContractService.py ├── LICENSE ├── README.md ├── Utils.py ├── app.py ├── convert-pdf-to-json.py ├── create_graph_from_json.py ├── data ├── debug │ ├── complete_response_AtnInternational.pdf.json │ ├── complete_response_CybergyHoldingsInc.pdf.json │ └── complete_response_SimplicityEsportsGamingCompany.pdf.json ├── input │ ├── AtnInternational.pdf │ ├── CybergyHoldingsInc.pdf │ └── SimplicityEsportsGamingCompany.pdf └── output │ ├── AtnInternational.pdf.json │ ├── CybergyHoldingsInc.pdf.json │ └── SimplicityEsportsGamingCompany.pdf.json ├── formatters.py ├── images ├── 4-stage-approach .png ├── contract_graph.png ├── schema.png └── streamlit_view.png ├── prompts ├── contract_extraction_prompt.txt └── system_prompt.txt ├── requirements.txt └── test_agent.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # poetry 99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 100 | # This is especially recommended for binary packages to ensure reproducibility, and is more 101 | # commonly ignored for libraries. 102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 103 | #poetry.lock 104 | 105 | # pdm 106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 107 | #pdm.lock 108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 109 | # in version control. 110 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 111 | .pdm.toml 112 | .pdm-python 113 | .pdm-build/ 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | # pytype static type analyzer 153 | .pytype/ 154 | 155 | # Cython debug symbols 156 | cython_debug/ 157 | 158 | # PyCharm 159 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 161 | # and can be added to the global gitignore or merged into this file. For a more nuclear 162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 163 | #.idea/ 164 | -------------------------------------------------------------------------------- /AgreementSchema.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import TypedDict 3 | from typing import List 4 | from enum import Enum 5 | 6 | # Define a Pydantic model for the Agreement schema 7 | class Party(TypedDict): 8 | name: str 9 | role: str 10 | incorporation_country: str 11 | incorporation_state: str 12 | 13 | class GoverningLaw(TypedDict): 14 | country: str 15 | state: str 16 | most_favored_country: str 17 | 18 | class ContractClause(TypedDict): 19 | clause_type: str 20 | excerpts: List[str] 21 | 22 | class Agreement(TypedDict): 23 | agreement_name: str 24 | agreement_type: str 25 | effective_date: str 26 | expiration_date: str 27 | renewal_term: str 28 | notice_period_to_terminate_Renewal: str 29 | parties: List[Party] 30 | #governing_law: GoverningLaw 31 | clauses: List[ContractClause] 32 | 33 | 34 | class ClauseType(Enum): 35 | ANTI_ASSIGNMENT = "Anti-Assignment" 36 | COMPETITIVE_RESTRICTION = "Competitive Restriction Exception" 37 | NON_COMPETE = "Non-Compete" 38 | EXCLUSIVITY = "Exclusivity" 39 | NO_SOLICIT_CUSTOMERS = "No-Solicit of Customers" 40 | NO_SOLICIT_EMPLOYEES = "No-Solicit Of Employees" 41 | NON_DISPARAGEMENT = "Non-Disparagement" 42 | TERMINATION_FOR_CONVENIENCE = "Termination For Convenience" 43 | ROFR_ROFO_ROFN = "Rofr/Rofo/Rofn" 44 | CHANGE_OF_CONTROL = "Change of Control" 45 | REVENUE_PROFIT_SHARING = "Revenue/Profit Sharing" 46 | PRICE_RESTRICTION = "Price Restrictions" 47 | MINIMUM_COMMITMENT = "Minimum Commitment" 48 | VOLUME_RESTRICTION = "Volume Restriction" 49 | IP_OWNERSHIP_ASSIGNMENT = "IP Ownership Assignment" 50 | JOINT_IP_OWNERSHIP = "Joint IP Ownership" 51 | LICENSE_GRANT = "License grant" 52 | NON_TRANSFERABLE_LICENSE = " Non-Transferable License" 53 | AFFILIATE_LICENSE_LICENSOR = "Affiliate License-Licensor" 54 | AFFILIATE_LICENSE_LICENSEE = "Affiliate License-Licensee" 55 | UNLIMITED_LICENSE = "Unlimited/All-You-Can-Eat-License" 56 | PERPETUAL_LICENSE = "Irrevocable Or Perpetual License" 57 | SOURCE_CODE_SCROW = "Source Code Escrow" 58 | POST_TERMINATION_SERVICES = "Post-Termination Services" 59 | AUDIT_RIGHTS = "Audit Rights" 60 | UNCAPPED_LIABILITY = "Uncapped Liability" 61 | CAP_ON_LIABILITY = "Cap On Liability" 62 | LIQUIDATED_DAMAGES = "Liquidated Damages" 63 | WARRANTY_DURATION = "Warranty Duration" 64 | INSURANCE = "Insurance" 65 | COVENANT_NOT_TO_SUE = "Covenant Not To Sue" 66 | THIRD_PARTY_BENEFICIARY = "Third Party Beneficiary" 67 | 68 | -------------------------------------------------------------------------------- /ContractPlugin.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from typing import List, Optional, Annotated 4 | from AgreementSchema import Agreement, ClauseType 5 | from semantic_kernel.functions import kernel_function 6 | from ContractService import ContractSearchService 7 | 8 | 9 | class ContractPlugin: 10 | 11 | def __init__(self, contract_search_service: ContractSearchService ): 12 | self.contract_search_service = contract_search_service 13 | 14 | @kernel_function 15 | async def get_contract(self, contract_id: int) -> Annotated[Agreement, "A contract"]: 16 | """Gets details about a contract with the given id.""" 17 | return await self.contract_search_service.get_contract(contract_id) 18 | 19 | @kernel_function 20 | async def get_contracts(self, organization_name: str) -> Annotated[List[Agreement], "A list of contracts"]: 21 | """Gets basic details about all contracts where one of the parties has a name similar to the given organization name.""" 22 | return await self.contract_search_service.get_contracts(organization_name) 23 | 24 | @kernel_function 25 | async def get_contracts_without_clause(self, clause_type: ClauseType) -> Annotated[List[Agreement], "A list of contracts"]: 26 | """Gets basic details from contracts without a clause of the given type.""" 27 | return await self.contract_search_service.get_contracts_without_clause(clause_type=clause_type) 28 | 29 | @kernel_function 30 | async def get_contracts_with_clause_type(self, clause_type: ClauseType) -> Annotated[List[Agreement], "A list of contracts"]: 31 | """Gets basic details from contracts with a clause of the given type.""" 32 | return await self.contract_search_service.get_contracts_with_clause_type(clause_type=clause_type) 33 | 34 | @kernel_function 35 | async def get_contracts_similar_text(self, clause_text: str) -> Annotated[List[Agreement], "A list of contracts with similar text in one of their clauses"]: 36 | """Gets basic details from contracts having semantically similar text in one of their clauses to the to the 'clause_text' provided.""" 37 | return await self.contract_search_service.get_contracts_similar_text(clause_text=clause_text) 38 | 39 | @kernel_function 40 | async def answer_aggregation_question(self, user_question: str) -> Annotated[str, "An answer to user_question"]: 41 | """Answer obtained by turning user_question into a CYPHER query""" 42 | return await self.contract_search_service.answer_aggregation_question(user_question=user_question) 43 | 44 | @kernel_function 45 | async def get_contract_excerpts(self, contract_id: int) -> Annotated[Agreement, "A contract"]: 46 | """Gets basic contract details and its excerpts.""" 47 | return await self.contract_search_service.get_contract_excerpts(contract_id=contract_id) 48 | -------------------------------------------------------------------------------- /ContractService.py: -------------------------------------------------------------------------------- 1 | from neo4j import GraphDatabase 2 | from typing import List 3 | from AgreementSchema import Agreement, ClauseType,Party, ContractClause 4 | from neo4j_graphrag.retrievers import VectorCypherRetriever,Text2CypherRetriever 5 | from neo4j_graphrag.embeddings import OpenAIEmbeddings 6 | from formatters import my_vector_search_excerpt_record_formatter 7 | from neo4j_graphrag.llm import OpenAILLM 8 | 9 | 10 | 11 | class ContractSearchService: 12 | def __init__(self, uri, user ,pwd ): 13 | driver = GraphDatabase.driver(uri, auth=(user, pwd)) 14 | self._driver = driver 15 | self._openai_embedder = OpenAIEmbeddings(model = "text-embedding-3-small") 16 | # Create LLM object. Used to generate the CYPHER queries 17 | self._llm = OpenAILLM(model_name="gpt-4o", model_params={"temperature": 0}) 18 | 19 | 20 | async def get_contract(self, contract_id: int) -> Agreement: 21 | 22 | GET_CONTRACT_BY_ID_QUERY = """ 23 | MATCH (a:Agreement {contract_id: $contract_id})-[:HAS_CLAUSE]->(clause:ContractClause) 24 | WITH a, collect(clause) as clauses 25 | MATCH (country:Country)-[i:INCORPORATED_IN]-(p:Organization)-[r:IS_PARTY_TO]-(a) 26 | WITH a, clauses, collect(p) as parties, collect(country) as countries, collect(r) as roles, collect(i) as states 27 | RETURN a as agreement, clauses, parties, countries, roles, states 28 | """ 29 | 30 | agreement_node = {} 31 | 32 | records, _, _ = self._driver.execute_query(GET_CONTRACT_BY_ID_QUERY,{'contract_id':contract_id}) 33 | 34 | 35 | if (len(records)==1): 36 | agreement_node = records[0].get('agreement') 37 | party_list = records[0].get('parties') 38 | role_list = records[0].get('roles') 39 | country_list = records[0].get('countries') 40 | state_list = records[0].get('states') 41 | clause_list = records[0].get('clauses') 42 | 43 | return await self._get_agreement( 44 | agreement_node, format="long", 45 | party_list=party_list, role_list=role_list, 46 | country_list=country_list,state_list=state_list, 47 | clause_list=clause_list 48 | ) 49 | 50 | async def get_contracts(self, organization_name: str) -> List[Agreement]: 51 | GET_CONTRACTS_BY_PARTY_NAME = """ 52 | CALL db.index.fulltext.queryNodes('organizationNameTextIndex', $organization_name) 53 | YIELD node AS o, score 54 | WITH o, score 55 | ORDER BY score DESC 56 | LIMIT 1 57 | WITH o 58 | MATCH (o)-[:IS_PARTY_TO]->(a:Agreement) 59 | WITH a 60 | MATCH (country:Country)-[i:INCORPORATED_IN]-(p:Organization)-[r:IS_PARTY_TO]-(a:Agreement) 61 | RETURN a as agreement, collect(p) as parties, collect(r) as roles, collect(country) as countries, collect(i) as states 62 | """ 63 | 64 | #run the Cypher query 65 | records, _ , _ = self._driver.execute_query(GET_CONTRACTS_BY_PARTY_NAME,{'organization_name':organization_name}) 66 | 67 | #Build the result 68 | all_aggrements = [] 69 | for row in records: 70 | agreement_node = row['agreement'] 71 | party_list = row['parties'] 72 | role_list = row['roles'] 73 | country_list = row['countries'] 74 | state_list = row['states'] 75 | 76 | agreement : Agreement = await self._get_agreement( 77 | format="short", 78 | agreement_node=agreement_node, 79 | party_list=party_list, 80 | role_list=role_list, 81 | country_list=country_list, 82 | state_list=state_list 83 | ) 84 | all_aggrements.append(agreement) 85 | 86 | return all_aggrements 87 | 88 | async def get_contracts_with_clause_type(self, clause_type: ClauseType) -> List[Agreement]: 89 | GET_CONTRACT_WITH_CLAUSE_TYPE_QUERY = """ 90 | MATCH (a:Agreement)-[:HAS_CLAUSE]->(cc:ContractClause {type: $clause_type}) 91 | WITH a 92 | MATCH (country:Country)-[i:INCORPORATED_IN]-(p:Organization)-[r:IS_PARTY_TO]-(a:Agreement) 93 | RETURN a as agreement, collect(p) as parties, collect(r) as roles, collect(country) as countries, collect(i) as states 94 | 95 | """ 96 | #run the Cypher query 97 | records, _ , _ = self._driver.execute_query(GET_CONTRACT_WITH_CLAUSE_TYPE_QUERY,{'clause_type': str(clause_type.value)}) 98 | # Process the results 99 | 100 | all_agreements = [] 101 | for row in records: 102 | agreement_node = row['agreement'] 103 | party_list = row['parties'] 104 | role_list = row['roles'] 105 | country_list = row['countries'] 106 | state_list = row['states'] 107 | agreement : Agreement = await self._get_agreement( 108 | format="short", 109 | agreement_node=agreement_node, 110 | party_list=party_list, 111 | role_list=role_list, 112 | country_list=country_list, 113 | state_list=state_list 114 | ) 115 | 116 | all_agreements.append(agreement) 117 | 118 | return all_agreements 119 | 120 | async def get_contracts_without_clause(self, clause_type: ClauseType) -> List[Agreement]: 121 | GET_CONTRACT_WITHOUT_CLAUSE_TYPE_QUERY = """ 122 | MATCH (a:Agreement) 123 | OPTIONAL MATCH (a)-[:HAS_CLAUSE]->(cc:ContractClause {type: $clause_type}) 124 | WITH a,cc 125 | WHERE cc is NULL 126 | WITH a 127 | MATCH (country:Country)-[i:INCORPORATED_IN]-(p:Organization)-[r:IS_PARTY_TO]-(a) 128 | RETURN a as agreement, collect(p) as parties, collect(r) as roles, collect(country) as countries, collect(i) as states 129 | """ 130 | 131 | #run the Cypher query 132 | records, _ , _ = self._driver.execute_query(GET_CONTRACT_WITHOUT_CLAUSE_TYPE_QUERY,{'clause_type':clause_type.value}) 133 | 134 | all_agreements = [] 135 | for row in records: 136 | agreement_node = row['agreement'] 137 | party_list = row['parties'] 138 | role_list = row['roles'] 139 | country_list = row['countries'] 140 | state_list = row['states'] 141 | agreement : Agreement = await self._get_agreement( 142 | format="short", 143 | agreement_node=agreement_node, 144 | party_list=party_list, 145 | role_list=role_list, 146 | country_list=country_list, 147 | state_list=state_list 148 | ) 149 | all_agreements.append(agreement) 150 | return all_agreements 151 | 152 | async def get_contracts_similar_text(self, clause_text: str) -> List[Agreement]: 153 | 154 | 155 | #Cypher to traverse from the semantically similar excerpts back to the agreement 156 | EXCERPT_TO_AGREEMENT_TRAVERSAL_QUERY=""" 157 | MATCH (a:Agreement)-[:HAS_CLAUSE]->(cc:ContractClause)-[:HAS_EXCERPT]-(node) 158 | RETURN a.name as agreement_name, a.contract_id as contract_id, cc.type as clause_type, node.text as excerpt 159 | """ 160 | 161 | #Set up vector Cypher retriever 162 | retriever = VectorCypherRetriever( 163 | driver= self._driver, 164 | index_name="excerpt_embedding", 165 | embedder=self._openai_embedder, 166 | retrieval_query=EXCERPT_TO_AGREEMENT_TRAVERSAL_QUERY, 167 | result_formatter=my_vector_search_excerpt_record_formatter 168 | ) 169 | 170 | # run vector search query on excerpts and get results containing the relevant agreement and clause 171 | retriever_result = retriever.search(query_text=clause_text, top_k=3) 172 | 173 | #set up List of Agreements (with partial data) to be returned 174 | agreements = [] 175 | for item in retriever_result.items: 176 | content = item.content 177 | a : Agreement = { 178 | 'agreement_name': content['agreement_name'], 179 | 'contract_id': content['contract_id'] 180 | } 181 | c : ContractClause = { 182 | "clause_type": content['clause_type'], 183 | "excerpts" : [content['excerpt']] 184 | } 185 | a['clauses'] = [c] 186 | agreements.append(a) 187 | 188 | return agreements 189 | 190 | async def answer_aggregation_question(self, user_question) -> str: 191 | answer = "" 192 | 193 | NEO4J_SCHEMA = """ 194 | Node properties: 195 | Agreement {agreement_type: STRING, contract_id: INTEGER,effective_date: STRING,renewal_term: STRING, name: STRING} 196 | ContractClause {type: STRING} 197 | ClauseType {name: STRING} 198 | Country {name: STRING} 199 | Excerpt {text: STRING} 200 | Organization {name: STRING} 201 | 202 | Relationship properties: 203 | IS_PARTY_TO {role: STRING} 204 | GOVERNED_BY_LAW {state: STRING} 205 | HAS_CLAUSE {type: STRING} 206 | INCORPORATED_IN {state: STRING} 207 | 208 | The relationships: 209 | (:Agreement)-[:HAS_CLAUSE]->(:ContractClause) 210 | (:ContractClause)-[:HAS_EXCERPT]->(:Excerpt) 211 | (:ContractClause)-[:HAS_TYPE]->(:ClauseType) 212 | (:Agreement)-[:GOVERNED_BY_LAW]->(:Country) 213 | (:Organization)-[:IS_PARTY_TO]->(:Agreement) 214 | (:Organization)-[:INCORPORATED_IN]->(:Country) 215 | 216 | """ 217 | 218 | # Initialize the retriever 219 | retriever = Text2CypherRetriever( 220 | driver=self._driver, 221 | llm=self._llm, 222 | neo4j_schema=NEO4J_SCHEMA 223 | ) 224 | 225 | # Generate a Cypher query using the LLM, send it to the Neo4j database, and return the results 226 | retriever_result = retriever.search(query_text=user_question) 227 | 228 | for item in retriever_result.items: 229 | content = str(item.content) 230 | if content: 231 | answer += content + '\n\n' 232 | 233 | return answer 234 | 235 | async def _get_agreement (self,agreement_node, format="short", party_list=None, role_list=None,country_list=None, 236 | state_list=None,clause_list=None,clause_dict=None): 237 | agreement : Agreement = {} 238 | 239 | if format == "short" and agreement_node: 240 | agreement: Agreement = { 241 | "contract_id" : agreement_node.get('contract_id'), 242 | "name" : agreement_node.get('name'), 243 | "agreement_type": agreement_node.get('agreement_type') 244 | } 245 | agreement['parties']= await self._get_parties ( 246 | party_list=party_list, 247 | role_list=role_list, 248 | country_list=country_list, 249 | state_list=state_list) 250 | 251 | elif format=="long" and agreement_node: 252 | agreement: Agreement = { 253 | "contract_id" : agreement_node.get('contract_id'), 254 | "name" : agreement_node.get('name'), 255 | "agreement_type": agreement_node.get('agreement_type'), 256 | "agreement_date": agreement_node.get('agreement_date'), 257 | "expiration_date": agreement_node.get('expiration_date'), 258 | "renewal_term": agreement_node.get('renewal_term') 259 | } 260 | agreement['parties'] = await self._get_parties ( 261 | party_list=party_list, 262 | role_list=role_list, 263 | country_list=country_list, 264 | state_list=state_list) 265 | 266 | clauses = [] 267 | if clause_list: 268 | for clause in clause_list: 269 | clause : ContractClause = {"clause_type": clause.get('type')} 270 | clauses.append(clause) 271 | 272 | elif clause_dict: 273 | 274 | for clause_type_key in clause_dict: 275 | clause : ContractClause = {"clause_type": clause_type_key,"excerpts": clause_dict[clause_type_key]} 276 | clauses.append(clause) 277 | 278 | agreement['clauses'] = clauses 279 | 280 | 281 | 282 | return agreement 283 | 284 | async def _get_parties (self, party_list=None, role_list=None,country_list=None,state_list=None): 285 | parties = [] 286 | if party_list: 287 | for i in range(len(party_list)): 288 | p: Party = { 289 | "name": party_list[i].get('name'), 290 | "role": role_list[i].get('role'), 291 | "incorporation_country": country_list[i].get('name'), 292 | "incorporation_state": state_list[i].get('state') 293 | } 294 | parties.append(p) 295 | 296 | return parties 297 | 298 | async def get_contract_excerpts (self, contract_id:int): 299 | 300 | GET_CONTRACT_CLAUSES_QUERY = """ 301 | MATCH (a:Agreement {contract_id: $contract_id})-[:HAS_CLAUSE]->(cc:ContractClause)-[:HAS_EXCERPT]->(e:Excerpt) 302 | RETURN a as agreement, cc.type as contract_clause_type, collect(e.text) as excerpts 303 | """ 304 | #run CYPHER query 305 | clause_records, _, _ = self._driver.execute_query(GET_CONTRACT_CLAUSES_QUERY,{'contract_id':contract_id}) 306 | 307 | #get a dict d[clause_type]=list(Excerpt) 308 | clause_dict = {} 309 | for row in clause_records: 310 | agreement_node = row['agreement'] 311 | clause_type = row['contract_clause_type'] 312 | relevant_excerpts = row['excerpts'] 313 | clause_dict[clause_type] = relevant_excerpts 314 | 315 | #Agreement to return 316 | agreement = await self._get_agreement( 317 | format="long",agreement_node=agreement_node, 318 | clause_dict=clause_dict) 319 | 320 | return agreement 321 | 322 | 323 | 324 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GraphRAG in Commercial Contract Review 2 | 3 | This repository contains all of the code mentioned in [GraphRAG in Commercial Contract Review](https://medium.com/@edward.sandoval.2000/graphrag-in-commercial-contract-review-7d4a6caa6eb5). 4 | 5 | 6 | ## Contract Review - GraphRAG-based approach 7 | The GraphRAG-based approach described in the blog post goes beyond the traditional chunk-based RAG, focusing instead on targeted information extraction from the contracts (LLM + Prompt) to create a knowledge graph representation (LLM + Neo4J), a simple set of data retrieval functions (in Python using Cypher, Text to Cypher, Vector Search retrievers) and ultimately a Q&A agent (Semantic Kernel) capable of handling complex questions 8 | 9 | The diagram below illustrates the approach 10 | 11 | ![4-stage-approach](./images/4-stage-approach%20.png) 12 | The 4-stage GraphRAG approach: From question-based extraction -> knowledge graph model -> GraphRAG retrieval -> Q&A Agent 13 | 14 | 15 | The four steps are: 16 | 1. Extracting Relevant Information from Contracts (LLM + Contract) 17 | 2. Storing information extracted into a Knowledge Graph (Neo4j) 18 | 3. Developing simple KG Data Retrieval Functions (Python) 19 | 4. Building a Q&A Agent handling complex questions (Semantic Kernel, LLM, Neo4j) 20 | 21 | # What Do You Need? 22 | - Obtain an [OpenAI token](https://platform.openai.com/api-keys). It will be used to: 23 | - Build the Q&A agent with Semantic Kernel 24 | - Extract specific information from contracts (parties,key dates,jurisdiction) 25 | - Generate embeddings for a small number of contract excerpts 26 | - Power a Text2Cypher data retrieval function 27 | - Python 3.9+ and a Python virtual environment 28 | - Access to a Neo4j database 29 | - Docker, Aura or Self-hosted 30 | - GenAI plugin running on the database (This is automatically available in Aura) 31 | 32 | # Set Up 33 | - Clone the repo 34 | ``` 35 | git clone https://github.com/neo4j-product-examples/graphrag-contract-review.git 36 | cd graphrag-contract-review 37 | ``` 38 | - Create a Python Virtual environment 39 | ``` 40 | python3 -m venv .venv 41 | source .venv/bin/activate 42 | pip install -r requirements.txt 43 | ``` 44 | - Run a local Neo4j instance (Optional) 45 | We will use Docker 46 | 47 | But you can use an Aura or self-hosted Neo4j instance. 48 | If you do so, you can skip this step. Just make sure you have URL, user name and password to access your Neo4j database 49 | 50 | ``` 51 | docker run \ 52 | --restart always --env NEO4J_AUTH=neo4j/yourpassword \ 53 | --publish=7474:7474 --publish=7687:7687 \ 54 | --env NEO4J_PLUGINS='["genai","apoc"]' neo4j:latest 55 | ``` 56 | 57 | Make sure you replace **yourpassword** with a password to access this database 58 | 59 | ## Set up some environment vars 60 | If you are using Neo4j Aura or self-hosted instance 61 | ``` 62 | export NEO4J_URI= 63 | export NEO4J_USERNAME= 64 | ``` 65 | If you are using Docker, you need to specify URI or Username ONLY if different from the default 66 | 67 | You need to specify the password to access the database 68 | ``` 69 | export NEO4J_PASSWORD= 70 | ``` 71 | 72 | Set your OpenAI API Key 73 | ``` 74 | export OPENAI_API_KEY=sk-.... 75 | ``` 76 | 77 | 78 | # STEP 1: Extracting Relevant Information from Contracts (LLM + Contract) 79 | In the [data](./data/input) folder, you will find 3 real commercial contracts in PDF format 80 | 81 | These contracts were taken from the publicly available [Contract Understanding Atticus Dataset](https://www.atticusprojectai.org/cuad) 82 | 83 | Our first step is to run a program that will prompt a ```OpenAI gpt-4o``` model to answer 40+ questions for each contract. 84 | 85 | The prompt will include instructions to store the extracted information in JSON format, under [data/output](./data/output) 86 | 87 | The full prompt can be found [here](./prompts/contract_extraction_prompt.txt) 88 | 89 | ## From PDF Contract to JSON 90 | Run the following command 91 | ``` 92 | python convert-pdf-to-json.py 93 | ``` 94 | Each PDF will take around 60s to process 95 | 96 | You can check out any of the [json files generated under the data/output folder](./data/output/) 97 | 98 | In case the LLM generates invalid JSON, you can find the infomration returned by the LLM under [data/debug](./data/debug/) folder 99 | 100 | # STEP 2: Storing information extracted into a Knowledge Graph (Neo4j) 101 | With each contract as a JSON file, the next step is to create a Knowledge Graph in Neo4j 102 | 103 | Before we can do that, we need to design a Knowledge Graph Data model suitable to represent the information extracted from the contracts 104 | 105 | ## A Suitable KG Data Model for our contracts 106 | 107 | In our case, a suitable KG data model includes our main entities: Agreements (contracts), their clauses, the Parties( organizations) to the contracts and the relationships amongst all of them 108 | 109 | ![A Suitable Knowledge Graph Model](./images/schema.png) 110 | 111 | ## Some useful properties for each of the main nodes and relationships 112 | ``` 113 | Agreement {agreement_type: STRING, contract_id: INTEGER, 114 | effective_date: STRING, 115 | renewal_term: STRING, name: STRING} 116 | ContractClause {name: STRING, type: STRING} 117 | ClauseType {name: STRING} 118 | Country {name: STRING} 119 | Excerpt {text: STRING} 120 | Organization {name: STRING} 121 | 122 | Relationship properties: 123 | IS_PARTY_TO {role: STRING} 124 | GOVERNED_BY_LAW {state: STRING} 125 | HAS_CLAUSE {type: STRING} 126 | INCORPORATED_IN {state: STRING} 127 | ``` 128 | 129 | Now, let's create a Knowledge Graph from the JSON files in ```./data/output/*.json``` 130 | 131 | ``` 132 | python create_graph_from_json.py 133 | ``` 134 | The ```create_graph_from_json``` Python script is relatively straightforward to understand. 135 | 136 | The main area of complexity is the ```CREATE_GRAPH_STATEMENT```. This [CYPHER statement](./create_graph_from_json.py#L7) that takes a Contract JSON and creates the relevant nodes and relationships for that contract in Neo4j. 137 | 138 | You can check out the original [blog post](https://medium.com/@edward.sandoval.2000/graphrag-in-commercial-contract-review-7d4a6caa6eb5) for a full breakdown of this CYPHER statement 139 | 140 | 141 | You will see output similar to 142 | ``` 143 | Index excerptTextIndex created. 144 | Index agreementTypeTextIndex created. 145 | Index clauseTypeNameTextIndex created. 146 | Index clauseNameTextIndex created. 147 | Index organizationNameTextIndex created. 148 | Creating index: contractIdIndex 149 | Generating Embeddings for Contract Excerpts... 150 | ``` 151 | 152 | The generation of embeddings takes about 1 minute(s) to complete 153 | 154 | After the Python script finishes: 155 | - Each Contract JSON has been uploaded to Neo4J Knowledge Graph 156 | - Key properties on the Agreement, ClauseTypes, Organization (Party) have fulltext indexes 157 | - A new property Excerpt.embedding was generated by using ```genai.vector.encode(excerpt.text)``` 158 | - This calls out OpenAI Text Embedding model ```text-embedding-3-small``` 159 | - A new vector index for Excerpt.embedding is created 160 | 161 | 162 | The total number of Excerpt embeddings for the 3 contracts is between 30-40 (depending on how many relevant excerpts were detected by the LLM on Step 1) 163 | 164 | A visual representation of one of the contracts in the Knowledge Graph is 165 | 166 | ![A visual representation of a single Contract as a Knowledge Graph](./images/contract_graph.png) 167 | 168 | If you are using Docker to run your Neo4j instance, you can use the [browser tool](http://localhost:7474/browser/) to confirm your data was loaded 169 | 170 | If you are using Aura or a self-hosted Neo4j instance, you can use the Query tool from the [new Neo4j console](https://console-preview.neo4j.io/tools/query). You may need to log into your Aura instance or manually add a connection to your self-hosted database 171 | 172 | 173 | # STEP 3: Developing simple KG Data Retrieval Functions (Python) 174 | With the contracts now represented in a Knowledge Graph, the next step is to build some basic data retrieval functions. 175 | 176 | These functions are fundamental building blocks that enable us to build a Q&A agent in the next section 177 | 178 | Let's define a few basic data retrieval functions: 179 | 180 | - Retrieve basic details about a contract (given a contract ID) 181 | - Find contracts involving a specific organization (given a partial organization name) 182 | - Find contracts that DO NOT contain a particular clause type 183 | - Find contracts contain a specific type of clause 184 | - Find contracts based on the semantic similarity with the text (Excerpt) in a clause (e.g., contracts mentioning the use of "prohibited items") 185 | - Run a natural language query against all contracts in the database. For example, an aggregation query that counts "how many contracts in the database ". 186 | 187 | You are encouraged to explore [ContractPlugin.py](./ContractPlugin.py) for a definition and [ContractService.py](./ContractService.py) for the implementation of each of the data retrieval functions 188 | 189 | The original [blog post](https://medium.com/@edward.sandoval.2000/graphrag-in-commercial-contract-review-7d4a6caa6eb5) provides a walk through of three different styles of data retrieval functions 190 | - Cypher-based data retrieval functions - 191 | - ```get_contract(self, contract_id: int) -> Annotated[Agreement, "A contract"]:```   192 | - ```get_contracts_without_clause(self, clause_type: ClauseType) -> List[Agreement]:``` 193 | - Both of these data retrieval are built around simple CYPHER statements 194 | - Vector-Search + Graph traversal data retrieval function 195 | - ```get_contracts_similar_text(self, clause_text: str) -> Annotated[List[Agreement], "A list of contracts with similar text in one of their clauses"]:``` 196 | - This function leverages [Neo4j GraphRAG package](https://github.com/neo4j/neo4j-graphrag-python) 197 | - It also relies on a vector index defined on "Exceprt" nodes 198 | - Text-to-Cypher (T2C) data retrieval function 199 | - ```answer_aggregation_question(self, user_question: str) -> Annotated[str, "An answer to user_question"]:``` 200 | - This function leverages [Neo4j GraphRAG package](https://github.com/neo4j/neo4j-graphrag-python) 201 | - It uses OpenAI ```gpt-4o``` to generate CYPHER statement that will be executed against the database 202 | 203 | 204 | 205 | # STEP 4: Building a Q&A Agent handling complex questions (Semantic Kernel, LLM, Neo4j) 206 | 207 | Armed with our Knowledge Graph data retrieval functions, we are ready to build an agent grounded by GraphRAG! 208 | 209 | We will use Microsoft Semantic Kernel, a framework that allows developers to integrate LLM function calling with existing APIs and data retrieval functions The framework uses a concept called ```Plugins``` to represent specific functionality that the kernel can perform. In our case, all of our data retrieval functions defined in the "ContractPlugin" can be used by the LLM to answer questions about contracts in the Neo4J database 210 | In addition, Semantic Kernel uses the concept of ```Memory``` to keep all interactions between user and agent. This includes details of any functions called/executed (with all input and output data) 211 | 212 | An extremely simple Terminal-based agent can be implemented with a few lines of code. 213 | 214 | Run 215 | ``` 216 | python test_agent.py 217 | ``` 218 | You can try variations of the following questions to exercise the different data retrieval functions 219 | 220 | - Get me contracts with Price Restrictions but without Insurance 221 | - See the logging INFO and notice how this requires calling 2 of our data retrieval functions 222 | - Get more details about this contract 223 | - Get me contracts for AT&T 224 | - Get me contracts for Mount Knowledge 225 | - Get me contract 3 226 | - Get me contracts that mention 100 units of product 227 | - What's the average number of excerpts per contract? 228 | 229 | You can type ```**exit``` to finish your session with the agent 230 | 231 | You can see the full code of the [test_agent.py](./test_agent.py) 232 | You will find functions that exercise each of the retrieval functions (commented out) 233 | 234 | For a nicer-looking UI, you can try on streamlit 235 | ``` 236 | streamlit run app.py 237 | ``` 238 | The browser shows 239 | 240 | ![Agent in Streamlit](./images/streamlit_view.png) 241 | 242 | 243 | # Acknowledgements - Contract Understanding Atticus Dataset 244 | 245 | This demo was made possible thanks to the invaluable resource provided by the Contract Understanding Atticus Dataset (CUAD) v1. 246 | 247 | 248 | A dataset curated and maintained by The Atticus Project. CUAD's extensive corpus of over 13,000 labels in 510 commercial legal contracts, manually annotated under the supervision of experienced lawyers, has been instrumental in identifying critical legal clauses for contract review, particularly in corporate transactions such as mergers and acquisitions. 249 | 250 | We recognize and appreciate CUAD's contribution to advancing NLP research and development in the field of legal contract analysis. 251 | 252 | 253 | # Future Improvements 254 | In this demo, we didn't fine-tune the LLM to enhace its basic capabilities to identify relevant excerpts. 255 | 256 | The CUAD does provide the labelled clauses/excerpts that could be used to fine-tune a model to recognize the presence/absence of these clauses 257 | 258 | 259 | -------------------------------------------------------------------------------- /Utils.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import re 3 | import json 4 | 5 | def open_as_bytes(pdf_filename:str): 6 | with open(pdf_filename, 'rb') as pdf_file: 7 | pdf_bytes = pdf_file.read() 8 | pdf_base64 = base64.b64encode(pdf_bytes) 9 | return pdf_base64 10 | 11 | def read_text_file(file_path): 12 | # Open the file in read mode 13 | with open(file_path, 'r') as file: 14 | file_content = file.read() 15 | return file_content 16 | 17 | def extract_json_from_string(input_string): 18 | try: 19 | # Parse the JSON string into a Python object (dictionary) 20 | if input_string.startswith('```json'): 21 | input_string = re.sub(r'^```json\s*|\s*```$', '', input_string, flags=re.DOTALL) 22 | 23 | json_object = json.loads(input_string) 24 | return json_object 25 | except json.JSONDecodeError as e: 26 | print(f"Error parsing JSON: {e}") 27 | return None 28 | else: 29 | print("No valid JSON block found.") 30 | return None 31 | 32 | def save_json_string_to_file(json_string, file_path): 33 | # Open the file in write mode and save the JSON string 34 | with open(file_path, 'w') as file: 35 | file.write(json_string) 36 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | import asyncio 4 | from semantic_kernel import Kernel 5 | from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion 6 | from semantic_kernel.contents.chat_history import ChatHistory 7 | from ContractPlugin import ContractPlugin 8 | from ContractService import ContractSearchService 9 | from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase 10 | from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( 11 | OpenAIChatPromptExecutionSettings) 12 | from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior 13 | from semantic_kernel.functions.kernel_arguments import KernelArguments 14 | import logging 15 | import time 16 | 17 | # Configure logging 18 | logging.basicConfig(level=logging.INFO) 19 | 20 | # Get info from environment 21 | OPENAI_KEY = os.getenv('OPENAI_API_KEY') 22 | NEO4J_URI = os.getenv('NEO4J_URI', 'bolt://localhost:7687') 23 | NEO4J_USER = os.getenv('NEO4J_USERNAME', 'neo4j') 24 | NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD') 25 | service_id = "contract_search" 26 | 27 | # Streamlit app configuration 28 | st.set_page_config(layout="wide") 29 | st.title("📄 Q&A Chatbot for Contract Review") 30 | 31 | # Initialize Kernel, Chat History, and Settings in Session State 32 | if 'semantic_kernel' not in st.session_state: 33 | # Initialize the kernel 34 | kernel = Kernel() 35 | 36 | # Add the Contract Search plugin to the kernel 37 | contract_search_neo4j = ContractSearchService(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD) 38 | kernel.add_plugin(ContractPlugin(contract_search_service=contract_search_neo4j), plugin_name="contract_search") 39 | 40 | # Add the OpenAI chat completion service to the Kernel 41 | kernel.add_service(OpenAIChatCompletion(ai_model_id="gpt-4o", api_key=OPENAI_KEY, service_id=service_id)) 42 | 43 | # Enable automatic function calling 44 | settings: OpenAIChatPromptExecutionSettings = kernel.get_prompt_execution_settings_from_service_id( 45 | service_id=service_id) 46 | settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"included_plugins": ["contract_search"]}) 47 | 48 | # Create a history of the conversation 49 | st.session_state.semantic_kernel = kernel 50 | st.session_state.kernel_settings = settings 51 | st.session_state.chat_history = ChatHistory() 52 | st.session_state.ui_chat_history = [] # For displaying messages in UI 53 | 54 | if 'user_question' not in st.session_state: 55 | st.session_state.user_question = "" # To retain the input text value 56 | 57 | 58 | # Function to get a response from the agent 59 | async def get_agent_response(user_input): 60 | kernel = st.session_state.semantic_kernel 61 | history = st.session_state.chat_history 62 | settings = st.session_state.kernel_settings 63 | 64 | # Add user input to the chat history 65 | history.add_user_message(user_input) 66 | st.session_state.ui_chat_history.append({"role": "user", "content": user_input}) 67 | 68 | 69 | retry_attempts = 3 70 | for attempt in range(retry_attempts): 71 | 72 | # Get the response from the agent 73 | try: 74 | chat_completion: OpenAIChatCompletion = kernel.get_service(type=ChatCompletionClientBase) 75 | 76 | result = (await chat_completion.get_chat_message_contents( 77 | chat_history=history, 78 | settings=settings, 79 | kernel=kernel, 80 | #arguments=KernelArguments(), 81 | ))[0] 82 | 83 | 84 | # Add the agent's reply to the chat history 85 | history.add_message(result) 86 | st.session_state.ui_chat_history.append({"role": "agent", "content": str(result)}) 87 | 88 | return # Exit after successful response 89 | 90 | except Exception as e: 91 | if attempt < retry_attempts - 1: 92 | #st.warning(f"Connection error: {str(e)}. Retrying ...") 93 | time.sleep(0.2) # Wait before retrying 94 | else: 95 | print ("get_agent_response-error" + str(e)) 96 | st.session_state.ui_chat_history.append({"role": "agent", "content": f"Error: {str(e)}"}) 97 | 98 | # UI for Q&A interaction 99 | st.subheader("Chat with Your Agent") 100 | 101 | # Container for chat history 102 | chat_placeholder = st.container() 103 | 104 | # Function to display the chat history 105 | def display_chat(): 106 | with chat_placeholder: 107 | for chat in st.session_state.ui_chat_history: 108 | if chat['role'] == 'user': 109 | st.markdown(f"**User:** {chat['content']}") 110 | else: 111 | st.markdown(f"**Agent:** {chat['content']}") 112 | 113 | 114 | # Create a form for the input so that pressing Enter triggers the form submission 115 | with st.form(key="user_input_form"): 116 | #user_question = st.text_input("Enter your question:", key="user_question") 117 | user_question = st.text_input("Enter your question:", value=st.session_state.user_question, key="user_question_") 118 | send_button = st.form_submit_button("Send") 119 | 120 | # Execute the response action when the user clicks "Send" or presses Enter 121 | if send_button and user_question.strip() != "": 122 | # Retain the value of user input in session state to display it in the input box 123 | st.session_state.user_question = user_question 124 | # Run the agent response asynchronously in a blocking way 125 | asyncio.run(get_agent_response(st.session_state.user_question)) 126 | # Clear the session state's question value after submission 127 | st.session_state.user_question = "" 128 | display_chat() 129 | 130 | elif send_button: 131 | st.error("Please enter a question before sending.") 132 | 133 | # Input for user question 134 | #user_question = st.text_input("Enter your question:") 135 | 136 | 137 | 138 | # Button to send the question 139 | #if st.button("Send"): 140 | # if user_question.strip() != "": 141 | # Run the agent response asynchronously 142 | # asyncio.run(get_agent_response(user_question)) 143 | # # Update chat history in UI 144 | # #display_chat() 145 | # st.rerun() 146 | # else: 147 | # st.error("Please enter a question before sending.") 148 | 149 | # Footer 150 | st.markdown("---") 151 | st.write("© 2024 Your Company Name. All rights reserved.") 152 | -------------------------------------------------------------------------------- /convert-pdf-to-json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from openai import OpenAI 4 | from openai.types.beta.threads.message_create_params import ( 5 | Attachment, 6 | AttachmentToolFileSearch, 7 | ) 8 | from Utils import read_text_file, save_json_string_to_file, extract_json_from_string 9 | import re 10 | 11 | # Configuring the OpenAI library with your API key 12 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 13 | client = OpenAI(api_key=OPENAI_API_KEY) 14 | 15 | # Load the system instruction and extraction prompt 16 | system_instruction = read_text_file('./prompts/system_prompt.txt') 17 | extraction_prompt = read_text_file('./prompts/contract_extraction_prompt.txt') 18 | 19 | # Configure the assistant 20 | pdf_assistant = client.beta.assistants.create( 21 | model="gpt-4o-2024-08-06", 22 | description="An assistant to extract the information from contracts in PDF format.", 23 | tools=[{"type": "file_search"}], 24 | name="PDF assistant", 25 | instructions=system_instruction, 26 | ) 27 | 28 | def process_pdf(pdf_filename): 29 | # Create thread 30 | thread = client.beta.threads.create() 31 | # Upload PDF file 32 | file = client.files.create(file=open(pdf_filename, "rb"), purpose="assistants") 33 | # Create assistant message with attachment and extraction_prompt 34 | client.beta.threads.messages.create(thread_id=thread.id,role="user", 35 | attachments=[ 36 | Attachment( 37 | file_id=file.id, tools=[AttachmentToolFileSearch(type="file_search")] 38 | ) 39 | ], 40 | content=extraction_prompt, 41 | ) 42 | 43 | # Run thread 44 | run = client.beta.threads.runs.create_and_poll( 45 | thread_id=thread.id, assistant_id=pdf_assistant.id, timeout=1000) 46 | 47 | if run.status != "completed": 48 | raise Exception("Run failed:", run.status) 49 | 50 | # Retrieve messages 51 | messages_cursor = client.beta.threads.messages.list(thread_id=thread.id) 52 | messages = [message for message in messages_cursor] 53 | 54 | # Output extracted content 55 | return messages[0].content[0].text.value 56 | 57 | def main(): 58 | pdf_files = [filename for filename in os.listdir('./data/input/') if filename.endswith('.pdf')] 59 | 60 | for pdf_filename in pdf_files: 61 | print('Processing ' + pdf_filename + '...') 62 | # Extract content from PDF using the assistant 63 | complete_response = process_pdf('./data/input/' + pdf_filename) 64 | # Log the complete response to debug 65 | save_json_string_to_file(complete_response, './data/debug/complete_response_' + pdf_filename + '.json') 66 | # Try to load the response as valid JSON 67 | try: 68 | contract_json = extract_json_from_string(complete_response) 69 | # Store as valid JSON so it can be imported into a KG later 70 | json_string = json.dumps(contract_json, indent=4) 71 | save_json_string_to_file(json_string, './data/output/' + pdf_filename + '.json') 72 | except json.JSONDecodeError as e: 73 | print(f"Failed to decode JSON: {e}") 74 | 75 | if __name__ == '__main__': 76 | main() -------------------------------------------------------------------------------- /create_graph_from_json.py: -------------------------------------------------------------------------------- 1 | from neo4j import GraphDatabase 2 | import json 3 | import os 4 | 5 | 6 | 7 | CREATE_GRAPH_STATEMENT = """ 8 | WITH $data AS data 9 | WITH data.agreement as a 10 | 11 | // todo proper global id for the agreement, perhaps from filename 12 | MERGE (agreement:Agreement {contract_id: a.contract_id}) 13 | ON CREATE SET 14 | agreement.name = a.agreement_name, 15 | agreement.effective_date = a.effective_date, 16 | agreement.expiration_date = a.expiration_date, 17 | agreement.agreement_type = a.agreement_type, 18 | agreement.renewal_term = a.renewal_term, 19 | agreement.most_favored_country = a.governing_law.most_favored_country 20 | //agreement.Notice_period_to_Terminate_Renewal = a.Notice_period_to_Terminate_Renewal 21 | 22 | 23 | MERGE (gl_country:Country {name: a.governing_law.country}) 24 | MERGE (agreement)-[gbl:GOVERNED_BY_LAW]->(gl_country) 25 | SET gbl.state = a.governing_law.state 26 | 27 | 28 | FOREACH (party IN a.parties | 29 | // todo proper global id for the party 30 | MERGE (p:Organization {name: party.name}) 31 | MERGE (p)-[ipt:IS_PARTY_TO]->(agreement) 32 | SET ipt.role = party.role 33 | MERGE (country_of_incorporation:Country {name: party.incorporation_country}) 34 | MERGE (p)-[incorporated:INCORPORATED_IN]->(country_of_incorporation) 35 | SET incorporated.state = party.incorporation_state 36 | ) 37 | 38 | WITH a, agreement, [clause IN a.clauses WHERE clause.exists = true] AS valid_clauses 39 | FOREACH (clause IN valid_clauses | 40 | CREATE (cl:ContractClause {type: clause.clause_type}) 41 | MERGE (agreement)-[clt:HAS_CLAUSE]->(cl) 42 | SET clt.type = clause.clause_type 43 | // ON CREATE SET c.excerpts = clause.excerpts 44 | FOREACH (excerpt IN clause.excerpts | 45 | MERGE (cl)-[:HAS_EXCERPT]->(e:Excerpt {text: excerpt}) 46 | ) 47 | //link clauses to a Clause Type label 48 | MERGE (clType:ClauseType{name: clause.clause_type}) 49 | MERGE (cl)-[:HAS_TYPE]->(clType) 50 | )""" 51 | 52 | CREATE_VECTOR_INDEX_STATEMENT = """ 53 | CREATE VECTOR INDEX excerpt_embedding IF NOT EXISTS 54 | FOR (e:Excerpt) ON (e.embedding) 55 | OPTIONS {indexConfig: {`vector.dimensions`: 1536, `vector.similarity_function`:'cosine'}} 56 | """ 57 | 58 | CREATE_FULL_TEXT_INDICES = [ 59 | ("excerptTextIndex", "CREATE FULLTEXT INDEX excerptTextIndex IF NOT EXISTS FOR (e:Excerpt) ON EACH [e.text]"), 60 | ("agreementTypeTextIndex", "CREATE FULLTEXT INDEX agreementTypeTextIndex IF NOT EXISTS FOR (a:Agreement) ON EACH [a.agreement_type]"), 61 | ("clauseTypeNameTextIndex", "CREATE FULLTEXT INDEX clauseTypeNameTextIndex IF NOT EXISTS FOR (ct:ClauseType) ON EACH [ct.name]"), 62 | ("clauseNameTextIndex", "CREATE FULLTEXT INDEX contractClauseTypeTextIndex IF NOT EXISTS FOR (c:ContractClause) ON EACH [c.type]"), 63 | ("organizationNameTextIndex", "CREATE FULLTEXT INDEX organizationNameTextIndex IF NOT EXISTS FOR (o:Organization) ON EACH [o.name]"), 64 | ("contractIdIndex","CREATE INDEX agreementContractId IF NOT EXISTS FOR (a:Agreement) ON (a.contract_id) ") 65 | ] 66 | 67 | 68 | EMBEDDINGS_STATEMENT = """ 69 | MATCH (e:Excerpt) 70 | WHERE e.text is not null and e.embedding is null 71 | SET e.embedding = genai.vector.encode(e.text, "OpenAI", { 72 | token: $token, model: "text-embedding-3-small", dimensions: 1536 73 | }) 74 | """ 75 | 76 | def index_exists(driver, index_name): 77 | check_index_query = "SHOW INDEXES WHERE name = $index_name" 78 | result = driver.execute_query(check_index_query, {"index_name": index_name}) 79 | return len(result.records) > 0 80 | 81 | 82 | def create_full_text_indices(driver): 83 | with driver.session() as session: 84 | for index_name, create_query in CREATE_FULL_TEXT_INDICES: 85 | if not index_exists(driver,index_name): 86 | print(f"Creating index: {index_name}") 87 | driver.execute_query(create_query) 88 | else: 89 | print(f"Index {index_name} already exists.") 90 | 91 | 92 | NEO4J_URI=os.getenv('NEO4J_URI', 'bolt://localhost:7687') 93 | NEO4J_USER=os.getenv('NEO4J_USERNAME', 'neo4j') 94 | NEO4J_PASSWORD=os.getenv('NEO4J_PASSWORD') 95 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 96 | JSON_CONTRACT_FOLDER = './data/output/' 97 | 98 | driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) 99 | 100 | 101 | 102 | json_contracts = [filename for filename in os.listdir(JSON_CONTRACT_FOLDER) if filename.endswith('.json')] 103 | contract_id = 1 104 | for json_contract in json_contracts: 105 | with open(JSON_CONTRACT_FOLDER + json_contract,'r') as file: 106 | json_string = file.read() 107 | json_data = json.loads(json_string) 108 | agreement = json_data['agreement'] 109 | agreement['contract_id'] = contract_id 110 | driver.execute_query(CREATE_GRAPH_STATEMENT, data=json_data) 111 | contract_id+=1 112 | 113 | 114 | create_full_text_indices(driver) 115 | driver.execute_query(CREATE_VECTOR_INDEX_STATEMENT) 116 | print ("Generating Embeddings for Contract Excerpts...") 117 | driver.execute_query(EMBEDDINGS_STATEMENT, token = OPENAI_API_KEY) 118 | -------------------------------------------------------------------------------- /data/debug/complete_response_AtnInternational.pdf.json: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "agreement": { 4 | "agreement_name": "Mobility Network General Agreement", 5 | "agreement_type": "Network Build and Maintenance Agreement", 6 | "effective_date": "2019-07-31", 7 | "expiration_date": "", 8 | "renewal_term": "", 9 | "Notice_period_to_Terminate_Renewal": "", 10 | "parties": [ 11 | { 12 | "role": "Vendor", 13 | "name": "Commnet Wireless, LLC", 14 | "incorporation_country": "United States of America", 15 | "incorporation_state": "Delaware" 16 | }, 17 | { 18 | "role": "AT&T Mobility LLC", 19 | "name": "Contracting Party", 20 | "incorporation_country": "United States of America", 21 | "incorporation_state": "Delaware" 22 | } 23 | ], 24 | "governing_law": { 25 | "country": "United States of America", 26 | "state": "New York", 27 | "most_favored_country": "United States of America" 28 | }, 29 | "clauses": [ 30 | { 31 | "clause_type": "Competitive Restriction Exception", 32 | "exists": false, 33 | "excerpts": [] 34 | }, 35 | { 36 | "clause_type": "Non-Compete", 37 | "exists": false, 38 | "excerpts": [] 39 | }, 40 | { 41 | "clause_type": "Exclusivity", 42 | "exists": false, 43 | "excerpts": [] 44 | }, 45 | { 46 | "clause_type": "No-Solicit Of Customers", 47 | "exists": false, 48 | "excerpts": [] 49 | }, 50 | { 51 | "clause_type": "No-Solicit Of Employees", 52 | "exists": false, 53 | "excerpts": [] 54 | }, 55 | { 56 | "clause_type": "Non-Disparagement", 57 | "exists": false, 58 | "excerpts": [] 59 | }, 60 | { 61 | "clause_type": "Termination For Convenience", 62 | "exists": false, 63 | "excerpts": [] 64 | }, 65 | { 66 | "clause_type": "Rofr/Rofo/Rofn", 67 | "exists": false, 68 | "excerpts": [] 69 | }, 70 | { 71 | "clause_type": "Change Of Control", 72 | "exists": true, 73 | "excerpts": [ 74 | "“Change of Control” means any contract or transaction or series of related contracts or transactions (regardless of form or structure) that would directly result in the Control of a Person or its business or assets changing from one Person to another Person (alone or in combination with any other third Person)." 75 | ] 76 | }, 77 | { 78 | "clause_type": "Anti-Assignment", 79 | "exists": false, 80 | "excerpts": [] 81 | }, 82 | { 83 | "clause_type": "Revenue/Profit Sharing", 84 | "exists": false, 85 | "excerpts": [] 86 | }, 87 | { 88 | "clause_type": "Price Restrictions", 89 | "exists": false, 90 | "excerpts": [] 91 | }, 92 | { 93 | "clause_type": "Minimum Commitment", 94 | "exists": false, 95 | "excerpts": [] 96 | }, 97 | { 98 | "clause_type": "Volume Restriction", 99 | "exists": false, 100 | "excerpts": [] 101 | }, 102 | { 103 | "clause_type": "IP Ownership Assignment", 104 | "exists": true, 105 | "excerpts": [ 106 | "AT&T shall be the exclusive owner of all right, title, and interest in and to all Paid-For Development, including, without limitation, all Intellectual Property Rights therein and thereto." 107 | ] 108 | }, 109 | { 110 | "clause_type": "Joint IP Ownership", 111 | "exists": false, 112 | "excerpts": [] 113 | }, 114 | { 115 | "clause_type": "License grant", 116 | "exists": true, 117 | "excerpts": [ 118 | "Vendor hereby grants and promises to grant and have granted to AT&T and its Affiliates a royalty-free, nonexclusive, sublicensable, assignable, transferable, irrevocable, perpetual, world-wide license." 119 | ] 120 | }, 121 | { 122 | "clause_type": "Non-Transferable License", 123 | "exists": false, 124 | "excerpts": [] 125 | }, 126 | { 127 | "clause_type": "Affiliate License-Licensor", 128 | "exists": false, 129 | "excerpts": [] 130 | }, 131 | { 132 | "clause_type": "Affiliate License-Licensee", 133 | "exists": false, 134 | "excerpts": [] 135 | }, 136 | { 137 | "clause_type": "Unlimited/All-You-Can-Eat-License", 138 | "exists": false, 139 | "excerpts": [] 140 | }, 141 | { 142 | "clause_type": "Irrevocable Or Perpetual License", 143 | "exists": true, 144 | "excerpts": [ 145 | "Vendor hereby grants and promises to grant and have granted to AT&T and its Affiliates a royalty-free, nonexclusive, sublicensable, assignable, transferable, irrevocable, perpetual, world-wide license." 146 | ] 147 | }, 148 | { 149 | "clause_type": "Source Code Escrow", 150 | "exists": false, 151 | "excerpts": [] 152 | }, 153 | { 154 | "clause_type": "Post-Termination Services", 155 | "exists": false, 156 | "excerpts": [] 157 | }, 158 | { 159 | "clause_type": "Audit Rights", 160 | "exists": false, 161 | "excerpts": [] 162 | }, 163 | { 164 | "clause_type": "Uncapped Liability", 165 | "exists": false, 166 | "excerpts": [] 167 | }, 168 | { 169 | "clause_type": "Cap On Liability", 170 | "exists": false, 171 | "excerpts": [] 172 | }, 173 | { 174 | "clause_type": "Liquidated Damages", 175 | "exists": false, 176 | "excerpts": [] 177 | }, 178 | { 179 | "clause_type": "Warranty Duration", 180 | "exists": false, 181 | "excerpts": [] 182 | }, 183 | { 184 | "clause_type": "Insurance", 185 | "exists": false, 186 | "excerpts": [] 187 | }, 188 | { 189 | "clause_type": "Covenant Not To Sue", 190 | "exists": false, 191 | "excerpts": [] 192 | }, 193 | { 194 | "clause_type": "Third Party Beneficiary", 195 | "exists": false, 196 | "excerpts": [] 197 | } 198 | ] 199 | } 200 | } 201 | ``` -------------------------------------------------------------------------------- /data/debug/complete_response_CybergyHoldingsInc.pdf.json: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "agreement": { 4 | "agreement_name": "Marketing Affiliate Agreement", 5 | "agreement_type": "Affiliate Agreement", 6 | "effective_date": "2014-05-08", 7 | "expiration_date": "2014-12-31", 8 | "renewal_term": "1 year", 9 | "Notice_period_to_Terminate_Renewal": "30 days", 10 | "parties": [ 11 | { 12 | "role": "Company", 13 | "name": "Birch First Global Investments Inc.", 14 | "incorporation_country": "United States", 15 | "incorporation_state": "U.S. Virgin Islands" 16 | }, 17 | { 18 | "role": "Marketing Affiliate", 19 | "name": "Mount Knowledge Holdings Inc.", 20 | "incorporation_country": "United States", 21 | "incorporation_state": "Nevada" 22 | } 23 | ], 24 | "governing_law": { 25 | "country": "United States", 26 | "state": "Nevada", 27 | "most_favored_country": "United States" 28 | }, 29 | "clauses": [ 30 | { 31 | "clause_type": "Competitive Restriction Exception", 32 | "exists": false, 33 | "excerpts": [] 34 | }, 35 | { 36 | "clause_type": "Non-Compete", 37 | "exists": false, 38 | "excerpts": [] 39 | }, 40 | { 41 | "clause_type": "Exclusivity", 42 | "exists": false, 43 | "excerpts": [] 44 | }, 45 | { 46 | "clause_type": "No-Solicit Of Customers", 47 | "exists": false, 48 | "excerpts": [] 49 | }, 50 | { 51 | "clause_type": "No-Solicit Of Employees", 52 | "exists": false, 53 | "excerpts": [] 54 | }, 55 | { 56 | "clause_type": "Non-Disparagement", 57 | "exists": true, 58 | "excerpts": [ 59 | "avoid deceptive, misleading or unethical practices that are or might be detrimental to Company and/or its Affiliates, the Technology or the public, including but not limited to disparagement of Company or the Technology" 60 | ] 61 | }, 62 | { 63 | "clause_type": "Termination For Convenience", 64 | "exists": true, 65 | "excerpts": [ 66 | "This Agreement may be terminated by either party at the expiration of its term or any renewal term upon thirty (30) days written notice to the other party." 67 | ] 68 | }, 69 | { 70 | "clause_type": "Rofr/Rofo/Rofn", 71 | "exists": false, 72 | "excerpts": [] 73 | }, 74 | { 75 | "clause_type": "Change Of Control", 76 | "exists": false, 77 | "excerpts": [] 78 | }, 79 | { 80 | "clause_type": "Anti-Assignment", 81 | "exists": true, 82 | "excerpts": [ 83 | "MA may not assign, sell, lease or otherwise transfer in whole or in party any of the rights granted pursuant to this Agreement without prior written approval of Company." 84 | ] 85 | }, 86 | { 87 | "clause_type": "Revenue/Profit Sharing", 88 | "exists": false, 89 | "excerpts": [] 90 | }, 91 | { 92 | "clause_type": "Price Restrictions", 93 | "exists": true, 94 | "excerpts": [ 95 | "Company reserves the right to change its process and/or fees, from time to time, in its sole and absolute discretion." 96 | ] 97 | }, 98 | { 99 | "clause_type": "Minimum Commitment", 100 | "exists": true, 101 | "excerpts": [ 102 | "MA commits to purchase a minimum of 100 Units in aggregate within the Territory within the first six months of term of this Agreement." 103 | ] 104 | }, 105 | { 106 | "clause_type": "Volume Restriction", 107 | "exists": false, 108 | "excerpts": [] 109 | }, 110 | { 111 | "clause_type": "IP Ownership Assignment", 112 | "exists": false, 113 | "excerpts": [] 114 | }, 115 | { 116 | "clause_type": "Joint IP Ownership", 117 | "exists": false, 118 | "excerpts": [] 119 | }, 120 | { 121 | "clause_type": "License grant", 122 | "exists": true, 123 | "excerpts": [ 124 | "Company hereby grants to MA the right to advertise, market and sell to corporate users, government agencies and educational facilities" 125 | ] 126 | }, 127 | { 128 | "clause_type": "Non-Transferable License", 129 | "exists": true, 130 | "excerpts": [ 131 | "MA and its Clients receive no title to the Technology contained on the Technology." 132 | ] 133 | }, 134 | { 135 | "clause_type": "Affiliate License-Licensor", 136 | "exists": false, 137 | "excerpts": [] 138 | }, 139 | { 140 | "clause_type": "Affiliate License-Licensee", 141 | "exists": false, 142 | "excerpts": [] 143 | }, 144 | { 145 | "clause_type": "Unlimited/All-You-Can-Eat-License", 146 | "exists": false, 147 | "excerpts": [] 148 | }, 149 | { 150 | "clause_type": "Irrevocable Or Perpetual License", 151 | "exists": false, 152 | "excerpts": [] 153 | }, 154 | { 155 | "clause_type": "Source Code Escrow", 156 | "exists": true, 157 | "excerpts": [ 158 | "MA agrees that it shall not, by itself or in association with any other party, reproduce, duplicate, copy, decompile, disassemble or reverse engineer the Technology in source form" 159 | ] 160 | }, 161 | { 162 | "clause_type": "Post-Termination Services", 163 | "exists": false, 164 | "excerpts": [] 165 | }, 166 | { 167 | "clause_type": "Audit Rights", 168 | "exists": false, 169 | "excerpts": [] 170 | }, 171 | { 172 | "clause_type": "Uncapped Liability", 173 | "exists": false, 174 | "excerpts": [] 175 | }, 176 | { 177 | "clause_type": "Cap On Liability", 178 | "exists": true, 179 | "excerpts": [ 180 | "Company's liability shall not exceed the fees that MA has paid under this Agreement." 181 | ] 182 | }, 183 | { 184 | "clause_type": "Liquidated Damages", 185 | "exists": false, 186 | "excerpts": [] 187 | }, 188 | { 189 | "clause_type": "Warranty Duration", 190 | "exists": true, 191 | "excerpts": [ 192 | "any claim for breach of warranty under subparagraph (A) hereof must be made in writing within (90) days from date of shipment." 193 | ] 194 | }, 195 | { 196 | "clause_type": "Insurance", 197 | "exists": false, 198 | "excerpts": [] 199 | }, 200 | { 201 | "clause_type": "Covenant Not To Sue", 202 | "exists": false, 203 | "excerpts": [] 204 | }, 205 | { 206 | "clause_type": "Third Party Beneficiary", 207 | "exists": false, 208 | "excerpts": [] 209 | } 210 | ] 211 | } 212 | } 213 | ``` -------------------------------------------------------------------------------- /data/debug/complete_response_SimplicityEsportsGamingCompany.pdf.json: -------------------------------------------------------------------------------- 1 | ```json 2 | { 3 | "agreement": { 4 | "agreement_name": "Master Franchise Agreement", 5 | "agreement_type": "Franchise", 6 | "effective_date": "2018-11-20", 7 | "expiration_date": "", 8 | "renewal_term": "", 9 | "Notice_period_to_Terminate_Renewal": "", 10 | "parties": [ 11 | { 12 | "role": "Franchisor", 13 | "name": "Smaaash Entertainment Private Limited", 14 | "incorporation_country": "India", 15 | "incorporation_state": "Maharashtra" 16 | }, 17 | { 18 | "role": "Franchisee", 19 | "name": "I-AM Capital Acquisition Company", 20 | "incorporation_country": "United States of America", 21 | "incorporation_state": "New York" 22 | } 23 | ], 24 | "governing_law": { 25 | "country": "United States of America", 26 | "state": "New York", 27 | "most_favored_country": "United States of America" 28 | }, 29 | "clauses": [ 30 | { 31 | "clause_type": "Competitive Restriction Exception", 32 | "exists": false, 33 | "excerpts": [] 34 | }, 35 | { 36 | "clause_type": "Non-Compete", 37 | "exists": false, 38 | "excerpts": [] 39 | }, 40 | { 41 | "clause_type": "Exclusivity", 42 | "exists": true, 43 | "excerpts": [ 44 | "the rights granted herein include the limited license to use the Trademarks of the Franchisor...for the purposes of establishing and operating the Smaaash Centres in the Territory", 45 | "Franchisor hereby grants to Franchisee the exclusive right...to establish and operate Smaaash Centres in the Territory" 46 | ] 47 | }, 48 | { 49 | "clause_type": "No-Solicit Of Customers", 50 | "exists": false, 51 | "excerpts": [] 52 | }, 53 | { 54 | "clause_type": "No-Solicit Of Employees", 55 | "exists": false, 56 | "excerpts": [] 57 | }, 58 | { 59 | "clause_type": "Non-Disparagement", 60 | "exists": false, 61 | "excerpts": [] 62 | }, 63 | { 64 | "clause_type": "Termination For Convenience", 65 | "exists": false, 66 | "excerpts": [] 67 | }, 68 | { 69 | "clause_type": "Rofr/Rofo/Rofn", 70 | "exists": false, 71 | "excerpts": [] 72 | }, 73 | { 74 | "clause_type": "Change Of Control", 75 | "exists": false, 76 | "excerpts": [] 77 | }, 78 | { 79 | "clause_type": "Anti-Assignment", 80 | "exists": true, 81 | "excerpts": [ 82 | "The Franchisee shall not be entitled to assign, transfer, encumber or dispose of any of its rights and or obligations under this Agreement...without the prior written consent of the Franchisor" 83 | ] 84 | }, 85 | { 86 | "clause_type": "Revenue/Profit Sharing", 87 | "exists": true, 88 | "excerpts": [ 89 | "Franchisee shall be entitled to receive...5% (five percent) fee or commission of the revenue generated by such third party franchisees" 90 | ] 91 | }, 92 | { 93 | "clause_type": "Price Restrictions", 94 | "exists": true, 95 | "excerpts": [ 96 | "The prices of the products and services offered...shall be decided mutually among the parties" 97 | ] 98 | }, 99 | { 100 | "clause_type": "Minimum Commitment", 101 | "exists": true, 102 | "excerpts": [ 103 | "Franchisee...shall be under an obligation to set up at least 6 (six) Smaaash Centres during the first Contract Year" 104 | ] 105 | }, 106 | { 107 | "clause_type": "Volume Restriction", 108 | "exists": false, 109 | "excerpts": [] 110 | }, 111 | { 112 | "clause_type": "IP Ownership Assignment", 113 | "exists": true, 114 | "excerpts": [ 115 | "Franchisee, or any third party franchisee, by reason of this Agreement, has not and shall not acquire any right, title, interest or claim of ownership in any of the Franchisor Property" 116 | ] 117 | }, 118 | { 119 | "clause_type": "Joint IP Ownership", 120 | "exists": false, 121 | "excerpts": [] 122 | }, 123 | { 124 | "clause_type": "License grant", 125 | "exists": true, 126 | "excerpts": [ 127 | "Franchisor hereby grants to Franchisee the right to use the Trademarks...for the purpose of operating and promoting the Smaaash Centres in the Territory" 128 | ] 129 | }, 130 | { 131 | "clause_type": "Non-Transferable License", 132 | "exists": false, 133 | "excerpts": [] 134 | }, 135 | { 136 | "clause_type": "Affiliate License-Licensor", 137 | "exists": false, 138 | "excerpts": [] 139 | }, 140 | { 141 | "clause_type": "Affiliate License-Licensee", 142 | "exists": false, 143 | "excerpts": [] 144 | }, 145 | { 146 | "clause_type": "Unlimited/All-You-Can-Eat-License", 147 | "exists": false, 148 | "excerpts": [] 149 | }, 150 | { 151 | "clause_type": "Irrevocable Or Perpetual License", 152 | "exists": false, 153 | "excerpts": [] 154 | }, 155 | { 156 | "clause_type": "Source Code Escrow", 157 | "exists": false, 158 | "excerpts": [] 159 | }, 160 | { 161 | "clause_type": "Post-Termination Services", 162 | "exists": true, 163 | "excerpts": [ 164 | "All licenses and other rights granted to Franchisee hereunder shall immediately cease" 165 | ] 166 | }, 167 | { 168 | "clause_type": "Audit Rights", 169 | "exists": true, 170 | "excerpts": [ 171 | "During the Term, Franchisor shall have the right to conduct audits of Franchisee with respect to the Smaaash Centres" 172 | ] 173 | }, 174 | { 175 | "clause_type": "Uncapped Liability", 176 | "exists": false, 177 | "excerpts": [] 178 | }, 179 | { 180 | "clause_type": "Cap On Liability", 181 | "exists": false, 182 | "excerpts": [] 183 | }, 184 | { 185 | "clause_type": "Liquidated Damages", 186 | "exists": false, 187 | "excerpts": [] 188 | }, 189 | { 190 | "clause_type": "Warranty Duration", 191 | "exists": false, 192 | "excerpts": [] 193 | }, 194 | { 195 | "clause_type": "Insurance", 196 | "exists": true, 197 | "excerpts": [ 198 | "During the Term, Franchisee shall maintain policies of insurance as may be requested by Franchisor" 199 | ] 200 | }, 201 | { 202 | "clause_type": "Covenant Not To Sue", 203 | "exists": false, 204 | "excerpts": [] 205 | }, 206 | { 207 | "clause_type": "Third Party Beneficiary", 208 | "exists": false, 209 | "excerpts": [] 210 | } 211 | ] 212 | } 213 | } 214 | ``` -------------------------------------------------------------------------------- /data/input/AtnInternational.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-product-examples/graphrag-contract-review/04b5ac340968dbb77faffa9d5a62c65dbd5ab43f/data/input/AtnInternational.pdf -------------------------------------------------------------------------------- /data/input/CybergyHoldingsInc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-product-examples/graphrag-contract-review/04b5ac340968dbb77faffa9d5a62c65dbd5ab43f/data/input/CybergyHoldingsInc.pdf -------------------------------------------------------------------------------- /data/input/SimplicityEsportsGamingCompany.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-product-examples/graphrag-contract-review/04b5ac340968dbb77faffa9d5a62c65dbd5ab43f/data/input/SimplicityEsportsGamingCompany.pdf -------------------------------------------------------------------------------- /data/output/AtnInternational.pdf.json: -------------------------------------------------------------------------------- 1 | { 2 | "agreement": { 3 | "agreement_name": "Mobility Network General Agreement", 4 | "agreement_type": "Network Build and Maintenance Agreement", 5 | "effective_date": "2019-07-31", 6 | "expiration_date": "", 7 | "renewal_term": "", 8 | "Notice_period_to_Terminate_Renewal": "", 9 | "parties": [ 10 | { 11 | "role": "Vendor", 12 | "name": "Commnet Wireless, LLC", 13 | "incorporation_country": "United States of America", 14 | "incorporation_state": "Delaware" 15 | }, 16 | { 17 | "role": "AT&T Mobility LLC", 18 | "name": "Contracting Party", 19 | "incorporation_country": "United States of America", 20 | "incorporation_state": "Delaware" 21 | } 22 | ], 23 | "governing_law": { 24 | "country": "United States of America", 25 | "state": "New York", 26 | "most_favored_country": "United States of America" 27 | }, 28 | "clauses": [ 29 | { 30 | "clause_type": "Competitive Restriction Exception", 31 | "exists": false, 32 | "excerpts": [] 33 | }, 34 | { 35 | "clause_type": "Non-Compete", 36 | "exists": false, 37 | "excerpts": [] 38 | }, 39 | { 40 | "clause_type": "Exclusivity", 41 | "exists": false, 42 | "excerpts": [] 43 | }, 44 | { 45 | "clause_type": "No-Solicit Of Customers", 46 | "exists": false, 47 | "excerpts": [] 48 | }, 49 | { 50 | "clause_type": "No-Solicit Of Employees", 51 | "exists": false, 52 | "excerpts": [] 53 | }, 54 | { 55 | "clause_type": "Non-Disparagement", 56 | "exists": false, 57 | "excerpts": [] 58 | }, 59 | { 60 | "clause_type": "Termination For Convenience", 61 | "exists": false, 62 | "excerpts": [] 63 | }, 64 | { 65 | "clause_type": "Rofr/Rofo/Rofn", 66 | "exists": false, 67 | "excerpts": [] 68 | }, 69 | { 70 | "clause_type": "Change Of Control", 71 | "exists": true, 72 | "excerpts": [ 73 | "\u201cChange of Control\u201d means any contract or transaction or series of related contracts or transactions (regardless of form or structure) that would directly result in the Control of a Person or its business or assets changing from one Person to another Person (alone or in combination with any other third Person)." 74 | ] 75 | }, 76 | { 77 | "clause_type": "Anti-Assignment", 78 | "exists": false, 79 | "excerpts": [] 80 | }, 81 | { 82 | "clause_type": "Revenue/Profit Sharing", 83 | "exists": false, 84 | "excerpts": [] 85 | }, 86 | { 87 | "clause_type": "Price Restrictions", 88 | "exists": false, 89 | "excerpts": [] 90 | }, 91 | { 92 | "clause_type": "Minimum Commitment", 93 | "exists": false, 94 | "excerpts": [] 95 | }, 96 | { 97 | "clause_type": "Volume Restriction", 98 | "exists": false, 99 | "excerpts": [] 100 | }, 101 | { 102 | "clause_type": "IP Ownership Assignment", 103 | "exists": true, 104 | "excerpts": [ 105 | "AT&T shall be the exclusive owner of all right, title, and interest in and to all Paid-For Development, including, without limitation, all Intellectual Property Rights therein and thereto." 106 | ] 107 | }, 108 | { 109 | "clause_type": "Joint IP Ownership", 110 | "exists": false, 111 | "excerpts": [] 112 | }, 113 | { 114 | "clause_type": "License grant", 115 | "exists": true, 116 | "excerpts": [ 117 | "Vendor hereby grants and promises to grant and have granted to AT&T and its Affiliates a royalty-free, nonexclusive, sublicensable, assignable, transferable, irrevocable, perpetual, world-wide license." 118 | ] 119 | }, 120 | { 121 | "clause_type": "Non-Transferable License", 122 | "exists": false, 123 | "excerpts": [] 124 | }, 125 | { 126 | "clause_type": "Affiliate License-Licensor", 127 | "exists": false, 128 | "excerpts": [] 129 | }, 130 | { 131 | "clause_type": "Affiliate License-Licensee", 132 | "exists": false, 133 | "excerpts": [] 134 | }, 135 | { 136 | "clause_type": "Unlimited/All-You-Can-Eat-License", 137 | "exists": false, 138 | "excerpts": [] 139 | }, 140 | { 141 | "clause_type": "Irrevocable Or Perpetual License", 142 | "exists": true, 143 | "excerpts": [ 144 | "Vendor hereby grants and promises to grant and have granted to AT&T and its Affiliates a royalty-free, nonexclusive, sublicensable, assignable, transferable, irrevocable, perpetual, world-wide license." 145 | ] 146 | }, 147 | { 148 | "clause_type": "Source Code Escrow", 149 | "exists": false, 150 | "excerpts": [] 151 | }, 152 | { 153 | "clause_type": "Post-Termination Services", 154 | "exists": false, 155 | "excerpts": [] 156 | }, 157 | { 158 | "clause_type": "Audit Rights", 159 | "exists": false, 160 | "excerpts": [] 161 | }, 162 | { 163 | "clause_type": "Uncapped Liability", 164 | "exists": false, 165 | "excerpts": [] 166 | }, 167 | { 168 | "clause_type": "Cap On Liability", 169 | "exists": false, 170 | "excerpts": [] 171 | }, 172 | { 173 | "clause_type": "Liquidated Damages", 174 | "exists": false, 175 | "excerpts": [] 176 | }, 177 | { 178 | "clause_type": "Warranty Duration", 179 | "exists": false, 180 | "excerpts": [] 181 | }, 182 | { 183 | "clause_type": "Insurance", 184 | "exists": false, 185 | "excerpts": [] 186 | }, 187 | { 188 | "clause_type": "Covenant Not To Sue", 189 | "exists": false, 190 | "excerpts": [] 191 | }, 192 | { 193 | "clause_type": "Third Party Beneficiary", 194 | "exists": false, 195 | "excerpts": [] 196 | } 197 | ] 198 | } 199 | } -------------------------------------------------------------------------------- /data/output/CybergyHoldingsInc.pdf.json: -------------------------------------------------------------------------------- 1 | { 2 | "agreement": { 3 | "agreement_name": "Marketing Affiliate Agreement", 4 | "agreement_type": "Affiliate Agreement", 5 | "effective_date": "2014-05-08", 6 | "expiration_date": "2014-12-31", 7 | "renewal_term": "1 year", 8 | "Notice_period_to_Terminate_Renewal": "30 days", 9 | "parties": [ 10 | { 11 | "role": "Company", 12 | "name": "Birch First Global Investments Inc.", 13 | "incorporation_country": "United States", 14 | "incorporation_state": "U.S. Virgin Islands" 15 | }, 16 | { 17 | "role": "Marketing Affiliate", 18 | "name": "Mount Knowledge Holdings Inc.", 19 | "incorporation_country": "United States", 20 | "incorporation_state": "Nevada" 21 | } 22 | ], 23 | "governing_law": { 24 | "country": "United States", 25 | "state": "Nevada", 26 | "most_favored_country": "United States" 27 | }, 28 | "clauses": [ 29 | { 30 | "clause_type": "Competitive Restriction Exception", 31 | "exists": false, 32 | "excerpts": [] 33 | }, 34 | { 35 | "clause_type": "Non-Compete", 36 | "exists": false, 37 | "excerpts": [] 38 | }, 39 | { 40 | "clause_type": "Exclusivity", 41 | "exists": false, 42 | "excerpts": [] 43 | }, 44 | { 45 | "clause_type": "No-Solicit Of Customers", 46 | "exists": false, 47 | "excerpts": [] 48 | }, 49 | { 50 | "clause_type": "No-Solicit Of Employees", 51 | "exists": false, 52 | "excerpts": [] 53 | }, 54 | { 55 | "clause_type": "Non-Disparagement", 56 | "exists": true, 57 | "excerpts": [ 58 | "avoid deceptive, misleading or unethical practices that are or might be detrimental to Company and/or its Affiliates, the Technology or the public, including but not limited to disparagement of Company or the Technology" 59 | ] 60 | }, 61 | { 62 | "clause_type": "Termination For Convenience", 63 | "exists": true, 64 | "excerpts": [ 65 | "This Agreement may be terminated by either party at the expiration of its term or any renewal term upon thirty (30) days written notice to the other party." 66 | ] 67 | }, 68 | { 69 | "clause_type": "Rofr/Rofo/Rofn", 70 | "exists": false, 71 | "excerpts": [] 72 | }, 73 | { 74 | "clause_type": "Change Of Control", 75 | "exists": false, 76 | "excerpts": [] 77 | }, 78 | { 79 | "clause_type": "Anti-Assignment", 80 | "exists": true, 81 | "excerpts": [ 82 | "MA may not assign, sell, lease or otherwise transfer in whole or in party any of the rights granted pursuant to this Agreement without prior written approval of Company." 83 | ] 84 | }, 85 | { 86 | "clause_type": "Revenue/Profit Sharing", 87 | "exists": false, 88 | "excerpts": [] 89 | }, 90 | { 91 | "clause_type": "Price Restrictions", 92 | "exists": true, 93 | "excerpts": [ 94 | "Company reserves the right to change its process and/or fees, from time to time, in its sole and absolute discretion." 95 | ] 96 | }, 97 | { 98 | "clause_type": "Minimum Commitment", 99 | "exists": true, 100 | "excerpts": [ 101 | "MA commits to purchase a minimum of 100 Units in aggregate within the Territory within the first six months of term of this Agreement." 102 | ] 103 | }, 104 | { 105 | "clause_type": "Volume Restriction", 106 | "exists": false, 107 | "excerpts": [] 108 | }, 109 | { 110 | "clause_type": "IP Ownership Assignment", 111 | "exists": false, 112 | "excerpts": [] 113 | }, 114 | { 115 | "clause_type": "Joint IP Ownership", 116 | "exists": false, 117 | "excerpts": [] 118 | }, 119 | { 120 | "clause_type": "License grant", 121 | "exists": true, 122 | "excerpts": [ 123 | "Company hereby grants to MA the right to advertise, market and sell to corporate users, government agencies and educational facilities" 124 | ] 125 | }, 126 | { 127 | "clause_type": "Non-Transferable License", 128 | "exists": true, 129 | "excerpts": [ 130 | "MA and its Clients receive no title to the Technology contained on the Technology." 131 | ] 132 | }, 133 | { 134 | "clause_type": "Affiliate License-Licensor", 135 | "exists": false, 136 | "excerpts": [] 137 | }, 138 | { 139 | "clause_type": "Affiliate License-Licensee", 140 | "exists": false, 141 | "excerpts": [] 142 | }, 143 | { 144 | "clause_type": "Unlimited/All-You-Can-Eat-License", 145 | "exists": false, 146 | "excerpts": [] 147 | }, 148 | { 149 | "clause_type": "Irrevocable Or Perpetual License", 150 | "exists": false, 151 | "excerpts": [] 152 | }, 153 | { 154 | "clause_type": "Source Code Escrow", 155 | "exists": true, 156 | "excerpts": [ 157 | "MA agrees that it shall not, by itself or in association with any other party, reproduce, duplicate, copy, decompile, disassemble or reverse engineer the Technology in source form" 158 | ] 159 | }, 160 | { 161 | "clause_type": "Post-Termination Services", 162 | "exists": false, 163 | "excerpts": [] 164 | }, 165 | { 166 | "clause_type": "Audit Rights", 167 | "exists": false, 168 | "excerpts": [] 169 | }, 170 | { 171 | "clause_type": "Uncapped Liability", 172 | "exists": false, 173 | "excerpts": [] 174 | }, 175 | { 176 | "clause_type": "Cap On Liability", 177 | "exists": true, 178 | "excerpts": [ 179 | "Company's liability shall not exceed the fees that MA has paid under this Agreement." 180 | ] 181 | }, 182 | { 183 | "clause_type": "Liquidated Damages", 184 | "exists": false, 185 | "excerpts": [] 186 | }, 187 | { 188 | "clause_type": "Warranty Duration", 189 | "exists": true, 190 | "excerpts": [ 191 | "any claim for breach of warranty under subparagraph (A) hereof must be made in writing within (90) days from date of shipment." 192 | ] 193 | }, 194 | { 195 | "clause_type": "Insurance", 196 | "exists": false, 197 | "excerpts": [] 198 | }, 199 | { 200 | "clause_type": "Covenant Not To Sue", 201 | "exists": false, 202 | "excerpts": [] 203 | }, 204 | { 205 | "clause_type": "Third Party Beneficiary", 206 | "exists": false, 207 | "excerpts": [] 208 | } 209 | ] 210 | } 211 | } -------------------------------------------------------------------------------- /data/output/SimplicityEsportsGamingCompany.pdf.json: -------------------------------------------------------------------------------- 1 | { 2 | "agreement": { 3 | "agreement_name": "Master Franchise Agreement", 4 | "agreement_type": "Franchise", 5 | "effective_date": "2018-11-20", 6 | "expiration_date": "", 7 | "renewal_term": "", 8 | "Notice_period_to_Terminate_Renewal": "", 9 | "parties": [ 10 | { 11 | "role": "Franchisor", 12 | "name": "Smaaash Entertainment Private Limited", 13 | "incorporation_country": "India", 14 | "incorporation_state": "Maharashtra" 15 | }, 16 | { 17 | "role": "Franchisee", 18 | "name": "I-AM Capital Acquisition Company", 19 | "incorporation_country": "United States of America", 20 | "incorporation_state": "New York" 21 | } 22 | ], 23 | "governing_law": { 24 | "country": "United States of America", 25 | "state": "New York", 26 | "most_favored_country": "United States of America" 27 | }, 28 | "clauses": [ 29 | { 30 | "clause_type": "Competitive Restriction Exception", 31 | "exists": false, 32 | "excerpts": [] 33 | }, 34 | { 35 | "clause_type": "Non-Compete", 36 | "exists": false, 37 | "excerpts": [] 38 | }, 39 | { 40 | "clause_type": "Exclusivity", 41 | "exists": true, 42 | "excerpts": [ 43 | "the rights granted herein include the limited license to use the Trademarks of the Franchisor...for the purposes of establishing and operating the Smaaash Centres in the Territory", 44 | "Franchisor hereby grants to Franchisee the exclusive right...to establish and operate Smaaash Centres in the Territory" 45 | ] 46 | }, 47 | { 48 | "clause_type": "No-Solicit Of Customers", 49 | "exists": false, 50 | "excerpts": [] 51 | }, 52 | { 53 | "clause_type": "No-Solicit Of Employees", 54 | "exists": false, 55 | "excerpts": [] 56 | }, 57 | { 58 | "clause_type": "Non-Disparagement", 59 | "exists": false, 60 | "excerpts": [] 61 | }, 62 | { 63 | "clause_type": "Termination For Convenience", 64 | "exists": false, 65 | "excerpts": [] 66 | }, 67 | { 68 | "clause_type": "Rofr/Rofo/Rofn", 69 | "exists": false, 70 | "excerpts": [] 71 | }, 72 | { 73 | "clause_type": "Change Of Control", 74 | "exists": false, 75 | "excerpts": [] 76 | }, 77 | { 78 | "clause_type": "Anti-Assignment", 79 | "exists": true, 80 | "excerpts": [ 81 | "The Franchisee shall not be entitled to assign, transfer, encumber or dispose of any of its rights and or obligations under this Agreement...without the prior written consent of the Franchisor" 82 | ] 83 | }, 84 | { 85 | "clause_type": "Revenue/Profit Sharing", 86 | "exists": true, 87 | "excerpts": [ 88 | "Franchisee shall be entitled to receive...5% (five percent) fee or commission of the revenue generated by such third party franchisees" 89 | ] 90 | }, 91 | { 92 | "clause_type": "Price Restrictions", 93 | "exists": true, 94 | "excerpts": [ 95 | "The prices of the products and services offered...shall be decided mutually among the parties" 96 | ] 97 | }, 98 | { 99 | "clause_type": "Minimum Commitment", 100 | "exists": true, 101 | "excerpts": [ 102 | "Franchisee...shall be under an obligation to set up at least 6 (six) Smaaash Centres during the first Contract Year" 103 | ] 104 | }, 105 | { 106 | "clause_type": "Volume Restriction", 107 | "exists": false, 108 | "excerpts": [] 109 | }, 110 | { 111 | "clause_type": "IP Ownership Assignment", 112 | "exists": true, 113 | "excerpts": [ 114 | "Franchisee, or any third party franchisee, by reason of this Agreement, has not and shall not acquire any right, title, interest or claim of ownership in any of the Franchisor Property" 115 | ] 116 | }, 117 | { 118 | "clause_type": "Joint IP Ownership", 119 | "exists": false, 120 | "excerpts": [] 121 | }, 122 | { 123 | "clause_type": "License grant", 124 | "exists": true, 125 | "excerpts": [ 126 | "Franchisor hereby grants to Franchisee the right to use the Trademarks...for the purpose of operating and promoting the Smaaash Centres in the Territory" 127 | ] 128 | }, 129 | { 130 | "clause_type": "Non-Transferable License", 131 | "exists": false, 132 | "excerpts": [] 133 | }, 134 | { 135 | "clause_type": "Affiliate License-Licensor", 136 | "exists": false, 137 | "excerpts": [] 138 | }, 139 | { 140 | "clause_type": "Affiliate License-Licensee", 141 | "exists": false, 142 | "excerpts": [] 143 | }, 144 | { 145 | "clause_type": "Unlimited/All-You-Can-Eat-License", 146 | "exists": false, 147 | "excerpts": [] 148 | }, 149 | { 150 | "clause_type": "Irrevocable Or Perpetual License", 151 | "exists": false, 152 | "excerpts": [] 153 | }, 154 | { 155 | "clause_type": "Source Code Escrow", 156 | "exists": false, 157 | "excerpts": [] 158 | }, 159 | { 160 | "clause_type": "Post-Termination Services", 161 | "exists": true, 162 | "excerpts": [ 163 | "All licenses and other rights granted to Franchisee hereunder shall immediately cease" 164 | ] 165 | }, 166 | { 167 | "clause_type": "Audit Rights", 168 | "exists": true, 169 | "excerpts": [ 170 | "During the Term, Franchisor shall have the right to conduct audits of Franchisee with respect to the Smaaash Centres" 171 | ] 172 | }, 173 | { 174 | "clause_type": "Uncapped Liability", 175 | "exists": false, 176 | "excerpts": [] 177 | }, 178 | { 179 | "clause_type": "Cap On Liability", 180 | "exists": false, 181 | "excerpts": [] 182 | }, 183 | { 184 | "clause_type": "Liquidated Damages", 185 | "exists": false, 186 | "excerpts": [] 187 | }, 188 | { 189 | "clause_type": "Warranty Duration", 190 | "exists": false, 191 | "excerpts": [] 192 | }, 193 | { 194 | "clause_type": "Insurance", 195 | "exists": true, 196 | "excerpts": [ 197 | "During the Term, Franchisee shall maintain policies of insurance as may be requested by Franchisor" 198 | ] 199 | }, 200 | { 201 | "clause_type": "Covenant Not To Sue", 202 | "exists": false, 203 | "excerpts": [] 204 | }, 205 | { 206 | "clause_type": "Third Party Beneficiary", 207 | "exists": false, 208 | "excerpts": [] 209 | } 210 | ] 211 | } 212 | } -------------------------------------------------------------------------------- /formatters.py: -------------------------------------------------------------------------------- 1 | from neo4j_graphrag.types import RetrieverResultItem 2 | import ast 3 | from neo4j import Record 4 | 5 | def my_excerpt_record_formatter( record: Record) -> RetrieverResultItem: 6 | #set up metadata 7 | metadata = {"score": record.get("score"),"nodeLabels": record.get("nodeLabels"),"id": record.get("id")} 8 | 9 | #Reformatting: node -> to_string -> to_dict -> d['text'] 10 | node = str(record.get("node")) #entire node as string 11 | node_as_dict = ast.literal_eval(node) #convert to dict 12 | node_text = "Excerpt: " + node_as_dict['text'] #take only the text property 13 | 14 | return RetrieverResultItem(content= node_text,metadata = metadata) 15 | 16 | def my_vector_search_excerpt_record_formatter( record: Record) -> RetrieverResultItem: 17 | #set up metadata 18 | metadata = {"contract_id": record.get("contract_id"),"nodeLabels": ['Excerpt','Agreement','ContractClause']} 19 | 20 | #Reformatting: get individual fields from the RETURN stattement. 21 | #RETURN a.name as agreement_name, a.contract_id as contract_id, cc.type as clause_type, node.text as exceprt 22 | result_dict = {} 23 | result_dict['agreement_name'] = record.get("agreement_name") 24 | result_dict['contract_id'] = record.get("contract_id") 25 | result_dict['clause_type'] = record.get("clause_type") 26 | result_dict['excerpt'] = record.get("excerpt") 27 | 28 | return RetrieverResultItem(content = result_dict,metadata = metadata) 29 | -------------------------------------------------------------------------------- /images/4-stage-approach .png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-product-examples/graphrag-contract-review/04b5ac340968dbb77faffa9d5a62c65dbd5ab43f/images/4-stage-approach .png -------------------------------------------------------------------------------- /images/contract_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-product-examples/graphrag-contract-review/04b5ac340968dbb77faffa9d5a62c65dbd5ab43f/images/contract_graph.png -------------------------------------------------------------------------------- /images/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-product-examples/graphrag-contract-review/04b5ac340968dbb77faffa9d5a62c65dbd5ab43f/images/schema.png -------------------------------------------------------------------------------- /images/streamlit_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-product-examples/graphrag-contract-review/04b5ac340968dbb77faffa9d5a62c65dbd5ab43f/images/streamlit_view.png -------------------------------------------------------------------------------- /prompts/contract_extraction_prompt.txt: -------------------------------------------------------------------------------- 1 | Generate a valid JSON document. Do not include anything else other than the JSON document 2 | Using the Answers to the following questions and The schema of the resulting JSON file ( which is specified further down) 3 | In your answers, Use information exclusively on this contract. 4 | 5 | 1) What type of contract is this? 6 | 2) Who are the parties and their roles? Where are they incorporated? Name state and country (use ISO 3166 Country name) 7 | 3) What is the Agreement Date? (if absolute date is mentioned use yyyy-mm-dd) 8 | 4) What is the Effective date? (if absolute date is mentioned use yyyy-mm-dd) 9 | 5) What is the expiration date? (if absolute date is mentioned use yyyy-mm-dd) 10 | 6) What is the Renewal Term ? 11 | 7) What is the Notice Period To Terminate Renewal? 12 | 8) What is the governing law ? 13 | Name the state and country (use ISO 3166 Country name) 14 | 9) If multiple countries are in the governing law, what is the most favoured country? if there is only one country just repeat the same information for governing law 15 | 16 | 10) For each of the contract clause types, extract the following: 17 | a) A Yes/No that indicates if you think the clause is found in this contract 18 | b) A list of full (long) excerpts, directly taken from the contract that give you reason to believe that this this clause type exists. 19 | 20 | 21 | The only Contract Clause types are: Competitive Restriction Exception, Non-Compete, Exclusivity, No-Solicit Of Customers, 22 | No-Solicit Of Employees, Non-Disparagement, Termination For Convenience, Rofr/Rofo/Rofn, Change Of Control, 23 | Anti-Assignment, Revenue/Profit Sharing, Price Restrictions, Minimum Commitment,Volume Restriction, 24 | IP Ownership Assignment, Joint IP Ownership, License grant, Non-Transferable License, 25 | Affiliate License-Licensor, Affiliate License-Licensee,Unlimited/All-You-Can-Eat-License,Irrevocable Or Perpetual License, 26 | Source Code Escrow, Post-Termination Services, Audit Rights, Uncapped Liability, Cap On Liability, Liquidated Damages, 27 | Warranty Duration, Insurance, Covenant Not To Sue, Third Party Beneficiary. 28 | 29 | Finally, Using the answers to the questions above, provide your final answer in a JSON document. 30 | Make sure the JSON document is VALID and adheres to the correct format. 31 | 32 | The JSON document has the following structure: 33 | 34 | { 35 | "agreement": { 36 | "agreement_name": "string", 37 | "agreement_type": "string", 38 | "effective_date": "string", 39 | "expiration_date": "string", 40 | "renewal_term": "string", 41 | "Notice_period_to_Terminate_Renewal": "string", 42 | "parties": [ 43 | { 44 | "role": "string", 45 | "name": "string", 46 | "incorporation_country": "string", 47 | "incorporation_state": "string" 48 | } 49 | ], 50 | "governing_law": { 51 | "country": "string", 52 | "state": "string", 53 | "most_favored_country": "string" 54 | }, 55 | "clauses": [ 56 | { 57 | "clause_type": "string", 58 | "exists": "boolean", 59 | "excerpts": ["string"] 60 | } 61 | ] 62 | } 63 | } 64 | Ensure the JSON is valid and correctly formatted. 65 | 66 | 67 | -------------------------------------------------------------------------------- /prompts/system_prompt.txt: -------------------------------------------------------------------------------- 1 | You are a seasoned legal expert specializing in the meticulous review and analysis of commercial contracts. Your expertise lies in identifying critical elements within legal documents, assessing compliance with legal standards, and ensuring that contracts serve the best interests of the parties involved. Your approach is thorough, detail-oriented, and guided by a deep understanding of legal principles and commercial practices. 2 | You will be presented with contracts and be asked questions by users who usually need their output in JSON format. 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | neo4j-rust-ext == 5.25.0.0 2 | neo4j-graphrag == 1.0.0 3 | semantic-kernel == 1.11.0 4 | streamlit == 1.39.0 5 | openai == 1.51.2 6 | -------------------------------------------------------------------------------- /test_agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import asyncio 3 | from semantic_kernel import Kernel 4 | from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion 5 | from semantic_kernel.contents.chat_history import ChatHistory 6 | from ContractPlugin import ContractPlugin 7 | from ContractService import ContractSearchService 8 | from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase 9 | from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( 10 | OpenAIChatPromptExecutionSettings) 11 | from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior 12 | from semantic_kernel.functions.kernel_arguments import KernelArguments 13 | import logging 14 | 15 | 16 | logging.basicConfig(level=logging.INFO) 17 | 18 | #get info from environment 19 | OPENAI_KEY = os.getenv('OPENAI_API_KEY') 20 | NEO4J_URI=os.getenv('NEO4J_URI', 'bolt://localhost:7687') 21 | NEO4J_USER=os.getenv('NEO4J_USERNAME', 'neo4j') 22 | NEO4J_PASSWORD=os.getenv('NEO4J_PASSWORD') 23 | service_id = "contract_search" 24 | 25 | # Initialize the kernel 26 | kernel = Kernel() 27 | 28 | # Add the Contract Search plugin to the kernel 29 | contract_search_neo4j = ContractSearchService(NEO4J_URI,NEO4J_USER,NEO4J_PASSWORD) 30 | kernel.add_plugin(ContractPlugin(contract_search_service=contract_search_neo4j),plugin_name="contract_search") 31 | 32 | # Add the OpenAI chat completion service to the Kernel 33 | kernel.add_service(OpenAIChatCompletion(ai_model_id="gpt-4o",api_key=OPENAI_KEY, service_id=service_id)) 34 | 35 | # Enable automatic function calling 36 | settings: OpenAIChatPromptExecutionSettings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id) 37 | settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"included_plugins": ["contract_search"]}) 38 | 39 | 40 | # Create a history of the conversation 41 | history = ChatHistory() 42 | 43 | async def basic_agent() : 44 | userInput = None 45 | while True: 46 | # Collect user input 47 | userInput = input("User > ") 48 | 49 | # Terminate the loop if the user says "exit" 50 | if userInput == "exit": 51 | break 52 | 53 | # Add user input to the history 54 | history.add_user_message(userInput) 55 | 56 | # 3. Get the response from the AI with automatic function calling 57 | chat_completion : OpenAIChatCompletion = kernel.get_service(type=ChatCompletionClientBase) 58 | result = (await chat_completion.get_chat_message_contents( 59 | chat_history=history, 60 | settings=settings, 61 | kernel=kernel, 62 | arguments=KernelArguments(), 63 | ))[0] 64 | 65 | # Print the results 66 | print("Assistant > " + str(result)) 67 | 68 | # Add the message from the agent to the chat history 69 | history.add_message(result) 70 | 71 | 72 | async def test_contract_search(): 73 | print( 74 | await kernel.invoke_prompt( 75 | function_name="get_contract", 76 | plugin_name="contract_search", 77 | prompt="Can you get me information for contract 1 and return in JSON format", 78 | settings=settings 79 | ) 80 | ) 81 | 82 | async def test_contracts_search(): 83 | print( 84 | await kernel.invoke_prompt( 85 | function_name="get_contracts", 86 | plugin_name="contract_search", 87 | prompt="Can you get me contracts for Mount Knowledge", 88 | settings=settings 89 | ) 90 | ) 91 | 92 | async def test_contracts_without_clause_search(): 93 | print( 94 | await kernel.invoke_prompt( 95 | function_name="get_contracts_without_clause", 96 | plugin_name="contract_search", 97 | prompt="Can you get me contracts without non compete clauses", 98 | settings=settings 99 | ) 100 | ) 101 | 102 | async def test_contracts_with_clause_search(): 103 | print( 104 | await kernel.invoke_prompt( 105 | function_name="get_contracts_with_clause_type", 106 | plugin_name="contract_search", 107 | prompt="Can you get me contracts with non compete caluse", 108 | settings=settings 109 | ) 110 | ) 111 | 112 | if __name__ == "__main__": 113 | 114 | asyncio.run(basic_agent()) 115 | 116 | #OR test individual data retrieval functions 117 | #asyncio.run(test_contract_search()) 118 | #asyncio.run(test_contracts_search()) 119 | #asyncio.run(test_contracts_without_clause_search()) 120 | #asyncio.run(test_contracts_with_clause_search()) 121 | 122 | 123 | 124 | --------------------------------------------------------------------------------