├── docs
    ├── frontend
    │   └── frontend_docs.adoc
    ├── project architecture.png
    └── project_docs.adoc
├── POC_Experiments
    └── Data_Analysis
├── frontend
    ├── .dockerignore
    ├── .prettierignore
    ├── src
    │   ├── vite-env.d.ts
    │   ├── assets
    │   │   ├── images
    │   │   │   ├── gcs.webp
    │   │   │   ├── s3logo.png
    │   │   │   ├── chatbot-ai.png
    │   │   │   ├── chatbot-user.png
    │   │   │   ├── youtubeimg.jfif
    │   │   │   ├── internet_logo.png
    │   │   │   ├── Neo4jRetrievalLogo.png
    │   │   │   ├── bgImage.svg
    │   │   │   ├── dropzone.svg
    │   │   │   ├── db-search.svg
    │   │   │   ├── youtube.svg
    │   │   │   ├── youtube-lightmode.svg
    │   │   │   ├── youtube-darkmode.svg
    │   │   │   ├── web-search-svgrepo-com (2).svg
    │   │   │   ├── graph-search.svg
    │   │   │   ├── web-search-svgrepo-com.svg
    │   │   │   ├── web.svg
    │   │   │   ├── web-search-darkmode-final2.svg
    │   │   │   └── web-darkmode.svg
    │   │   ├── ChatbotMessages.json
    │   │   └── schemas.json
    │   ├── router.tsx
    │   ├── components
    │   │   ├── UI
    │   │   │   ├── CustomProgressBar.tsx
    │   │   │   ├── Legend.tsx
    │   │   │   ├── CustomButton.tsx
    │   │   │   ├── Alert.tsx
    │   │   │   ├── IconButtonToolTip.tsx
    │   │   │   ├── Menu.tsx
    │   │   │   ├── ButtonWithToolTip.tsx
    │   │   │   ├── ErrroBoundary.tsx
    │   │   │   └── HoverableLink.tsx
    │   │   ├── DataSources
    │   │   │   ├── GCS
    │   │   │   │   └── GCSButton.tsx
    │   │   │   └── AWS
    │   │   │   │   └── S3Bucket.tsx
    │   │   ├── Graph
    │   │   │   ├── LegendsChip.tsx
    │   │   │   ├── GraphViewButton.tsx
    │   │   │   └── CheckboxSelection.tsx
    │   │   ├── Layout
    │   │   │   ├── AlertIcon.tsx
    │   │   │   ├── DrawerChatbot.tsx
    │   │   │   └── Header.tsx
    │   │   ├── WebSources
    │   │   │   ├── Web
    │   │   │   │   └── WebInput.tsx
    │   │   │   ├── WikiPedia
    │   │   │   │   └── WikipediaInput.tsx
    │   │   │   ├── Youtube
    │   │   │   │   └── YoutubeInput.tsx
    │   │   │   ├── CustomSourceInput.tsx
    │   │   │   ├── GenericSourceButton.tsx
    │   │   │   └── GenericSourceModal.tsx
    │   │   ├── ChatBot
    │   │   │   ├── ChatModeToggle.tsx
    │   │   │   └── ExpandedChatButtonContainer.tsx
    │   │   ├── Popups
    │   │   │   ├── DeletePopUp
    │   │   │   │   └── DeletePopUp.tsx
    │   │   │   ├── LargeFilePopUp
    │   │   │   │   ├── ConfirmationDialog.tsx
    │   │   │   │   └── LargeFilesAlert.tsx
    │   │   │   └── GraphEnhancementDialog
    │   │   │   │   └── index.tsx
    │   │   ├── Dropdown.tsx
    │   │   └── QuickStarter.tsx
    │   ├── main.tsx
    │   ├── index.css
    │   ├── services
    │   │   ├── HealthStatus.ts
    │   │   ├── GetFiles.ts
    │   │   ├── SchemaFromTextAPI.ts
    │   │   ├── GetNodeLabelsRelTypes.ts
    │   │   ├── GetOrphanNodes.ts
    │   │   ├── ConnectAPI.ts
    │   │   ├── DeleteOrphanNodes.ts
    │   │   ├── ChunkEntitiesInfo.ts
    │   │   ├── CommonAPI.ts
    │   │   ├── PostProcessing.ts
    │   │   ├── ServerSideStatusUpdateAPI.ts
    │   │   ├── CancelAPI.ts
    │   │   ├── GraphQuery.ts
    │   │   ├── DeleteFiles.ts
    │   │   ├── PollingAPI.ts
    │   │   ├── QnaAPI.ts
    │   │   └── URLScan.ts
    │   ├── utils
    │   │   ├── Loader.tsx
    │   │   └── FileAPI.ts
    │   ├── HOC
    │   │   ├── SettingModalHOC.tsx
    │   │   └── CustomModal.tsx
    │   ├── styling
    │   │   └── info.css
    │   ├── App.tsx
    │   ├── context
    │   │   ├── UserCredentials.tsx
    │   │   ├── UserMessages.tsx
    │   │   ├── Alert.tsx
    │   │   ├── ThemeWrapper.tsx
    │   │   └── UsersFiles.tsx
    │   ├── hooks
    │   │   ├── useSpeech.tsx
    │   │   └── useSse.tsx
    │   ├── logo.svg
    │   └── logo-color.svg
    ├── postcss.config.js
    ├── public
    │   ├── favicons
    │   │   ├── favicon.ico
    │   │   ├── favicon-16x16.png
    │   │   ├── favicon-32x32.png
    │   │   └── favicon-194x194.png
    │   ├── paragraph-left-align.svg
    │   └── paginate-filter-text.svg
    ├── .lintstagedrc.json
    ├── tsconfig.node.json
    ├── .prettierrc.json
    ├── nginx
    │   └── nginx.conf
    ├── tailwind.config.js
    ├── example.env
    ├── vite.config.ts
    ├── index.html
    ├── tsconfig.json
    ├── README.md
    ├── Dockerfile
    ├── package.json
    └── .gitignore
├── data
    └── README.md
├── experiments
    ├── README.md
    ├── Combined chunk comparision.png
    ├── LLM Comparisons with one pdf.docx
    ├── Experimentations for Kg creation.docx
    └── LLM_Results_.csv
├── POC_Documents
    └── V1
    │   ├── figure.2,3.jpg
    │   ├── figure.4.jpg
    │   └── Local-to-global-genAI_GraphRAG_V1
├── backend
    ├── src
    │   ├── entities
    │   │   ├── user_credential.py
    │   │   └── source_node.py
    │   ├── document_sources
    │   │   ├── web_pages.py
    │   │   ├── wikipedia.py
    │   │   ├── youtube.py
    │   │   ├── s3_bucket.py
    │   │   └── local_file.py
    │   ├── logger.py
    │   ├── diffbot_transformer.py
    │   ├── openAI_llm.py
    │   ├── api_response.py
    │   ├── create_chunks.py
    │   ├── generate_graphDocuments_from_llm.py
    │   ├── shared
    │   │   └── schema_extraction.py
    │   ├── groq_llama3_llm.py
    │   ├── gemini_llm.py
    │   └── post_processing.py
    ├── Dockerfile
    ├── example.env
    ├── README.md
    └── requirements.txt
├── example.env
├── docker-compose.yml
└── .gitignore


/docs/frontend/frontend_docs.adoc:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/POC_Experiments/Data_Analysis:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/frontend/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules


--------------------------------------------------------------------------------
/frontend/.prettierignore:
--------------------------------------------------------------------------------
1 | dist
2 | node_modules
3 | docs


--------------------------------------------------------------------------------
/frontend/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="vite/client" />
2 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | # llm-graph-builder
2 | Neo4j graph construction from unstructured data
3 | 


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
1 | # llm-graph-builder
2 | Neo4j graph construction from unstructured data
3 | 


--------------------------------------------------------------------------------
/POC_Documents/V1/figure.2,3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/POC_Documents/V1/figure.2,3.jpg


--------------------------------------------------------------------------------
/POC_Documents/V1/figure.4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/POC_Documents/V1/figure.4.jpg


--------------------------------------------------------------------------------
/docs/project architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/docs/project architecture.png


--------------------------------------------------------------------------------
/frontend/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | }
7 | 


--------------------------------------------------------------------------------
/frontend/public/favicons/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/public/favicons/favicon.ico


--------------------------------------------------------------------------------
/frontend/src/assets/images/gcs.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/src/assets/images/gcs.webp


--------------------------------------------------------------------------------
/backend/src/entities/user_credential.py:
--------------------------------------------------------------------------------
1 | class user_credential:
2 |     uri:str
3 |     user_name:str
4 |     password:str
5 |     database:str


--------------------------------------------------------------------------------
/frontend/src/assets/images/s3logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/src/assets/images/s3logo.png


--------------------------------------------------------------------------------
/frontend/src/assets/images/chatbot-ai.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/src/assets/images/chatbot-ai.png


--------------------------------------------------------------------------------
/experiments/Combined chunk comparision.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/experiments/Combined chunk comparision.png


--------------------------------------------------------------------------------
/frontend/public/favicons/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/public/favicons/favicon-16x16.png


--------------------------------------------------------------------------------
/frontend/public/favicons/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/public/favicons/favicon-32x32.png


--------------------------------------------------------------------------------
/frontend/src/assets/images/chatbot-user.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/src/assets/images/chatbot-user.png


--------------------------------------------------------------------------------
/frontend/src/assets/images/youtubeimg.jfif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/src/assets/images/youtubeimg.jfif


--------------------------------------------------------------------------------
/experiments/LLM Comparisons with one pdf.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/experiments/LLM Comparisons with one pdf.docx


--------------------------------------------------------------------------------
/frontend/public/favicons/favicon-194x194.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/public/favicons/favicon-194x194.png


--------------------------------------------------------------------------------
/frontend/src/assets/images/internet_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/src/assets/images/internet_logo.png


--------------------------------------------------------------------------------
/experiments/Experimentations for Kg creation.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/experiments/Experimentations for Kg creation.docx


--------------------------------------------------------------------------------
/frontend/src/assets/images/Neo4jRetrievalLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/llm-graph-builder/main/frontend/src/assets/images/Neo4jRetrievalLogo.png


--------------------------------------------------------------------------------
/frontend/.lintstagedrc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "*.ts": ["prettier --write", "eslint --fix"],
3 |     "*.tsx": ["prettier --write", "eslint --fix"],
4 |     "*.json": ["prettier --write"],
5 |     "*.js": ["prettier --write"]
6 |   }


--------------------------------------------------------------------------------
/frontend/src/router.tsx:
--------------------------------------------------------------------------------
 1 | import { createBrowserRouter } from 'react-router-dom';
 2 | import App from './App';
 3 | 
 4 | const router = createBrowserRouter([
 5 |   {
 6 |     path: '/',
 7 |     element: <App />,
 8 |   },
 9 | ]);
10 | export default router;
11 | 


--------------------------------------------------------------------------------
/frontend/src/assets/images/bgImage.svg:
--------------------------------------------------------------------------------
1 | <svg width='100%' height='100%' xmlns='http://www.w3.org/2000/svg'><rect width='100%' height='100%' fill='none' rx='8' ry='8' stroke='#C4C8CD' stroke-width='1.5' stroke-dasharray='6' stroke-dashoffset='0' stroke-linecap='square'/></svg>


--------------------------------------------------------------------------------
/frontend/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "composite": true,
 4 |     "skipLibCheck": true,
 5 |     "module": "ESNext",
 6 |     "moduleResolution": "bundler",
 7 |     "allowSyntheticDefaultImports": true
 8 |   },
 9 |   "include": ["vite.config.ts"]
10 | }
11 | 


--------------------------------------------------------------------------------
/frontend/public/paragraph-left-align.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path d="M.75 2.998h21m-21 4.5h18m-18 4.5h22.5m-22.5 4.5h18m-18 4.5h21" style="fill:none;stroke:#000;stroke-linecap:round;stroke-linejoin:round;stroke-width:1.5px"/></svg>


--------------------------------------------------------------------------------
/frontend/.prettierrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "printWidth": 120,
 3 |   "semi": true,
 4 |   "singleQuote": true,
 5 |   "jsxSingleQuote": true,
 6 |   "useTabs": false,
 7 |   "tabWidth": 2,
 8 |   "arrowParens": "always",
 9 |   "trailingComma": "es5",
10 |   "bracketSpacing": true,
11 |   "endOfLine": "lf"
12 | }


--------------------------------------------------------------------------------
/frontend/src/components/UI/CustomProgressBar.tsx:
--------------------------------------------------------------------------------
1 | import { ProgressBar } from '@neo4j-ndl/react';
2 | 
3 | export default function CustomProgressBar({ value }: { value: number }) {
4 |   return <ProgressBar heading={value < 100 ? 'Uploading ' : 'Uploaded'} size='small' value={value}></ProgressBar>;
5 | }
6 | 


--------------------------------------------------------------------------------
/frontend/src/main.tsx:
--------------------------------------------------------------------------------
1 | import ReactDOM from 'react-dom/client';
2 | import './index.css';
3 | import { RouterProvider } from 'react-router-dom';
4 | import router from './router.tsx';
5 | 
6 | ReactDOM.createRoot(document.getElementById('root')!).render(<RouterProvider router={router}></RouterProvider>);
7 | 


--------------------------------------------------------------------------------
/frontend/src/index.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | body {
 6 |   margin: 0;
 7 | }
 8 | 
 9 | * {
10 |   margin: 0;
11 |   padding: 0;
12 |   box-sizing: border-box;
13 | }
14 | 
15 | 
16 | 
17 | .ndl-progress-bar-wrapper .ndl-header .ndl-heading::after{
18 |   animation: none !important;
19 | }
20 | 


--------------------------------------------------------------------------------
/frontend/nginx/nginx.conf:
--------------------------------------------------------------------------------
 1 | server {
 2 | 
 3 |   listen 8080;
 4 | 
 5 |   location / {
 6 |     root /usr/share/nginx/html;
 7 |     index index.html index.htm;
 8 |     try_files $uri $uri/ /index.html;
 9 |   }
10 | 
11 |     error_page 401 403 404 index.html;   
12 | 
13 |     location /public {
14 |         root /usr/local/var/www;
15 |     }
16 | }


--------------------------------------------------------------------------------
/frontend/src/assets/images/dropzone.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg"  fill="#e2e3e5" viewBox="0 0 24 24" stroke-width="1.5" stroke="#ffffff" aria-hidden="true" ><path stroke-linecap="round" stroke-linejoin="round" d="M12 16.5V9.75m0 0l3 3m-3-3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>


--------------------------------------------------------------------------------
/frontend/src/components/UI/Legend.tsx:
--------------------------------------------------------------------------------
 1 | export default function Legend({
 2 |   bgColor,
 3 |   title,
 4 |   chunkCount,
 5 | }: {
 6 |   bgColor: string;
 7 |   title: string;
 8 |   chunkCount?: number;
 9 | }) {
10 |   return (
11 |     <div className='legend' style={{ backgroundColor: `${bgColor}` }}>
12 |       {title}
13 |       {chunkCount && `(${chunkCount})`}
14 |     </div>
15 |   );
16 | }
17 | 


--------------------------------------------------------------------------------
/frontend/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | export default {
 3 |   content: [
 4 |     "./index.html",
 5 |     "./src/**/*.{js,ts,jsx,tsx}",
 6 |   ],
 7 |   theme: {
 8 |     extend: {},
 9 |   },
10 |   plugins: [],
11 |   presets:[require('@neo4j-ndl/base').tailwindConfig],
12 |   corePlugins: {
13 |     preflight: false,
14 |   },
15 |   prefix:""
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/frontend/public/paginate-filter-text.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><defs><style>.a{fill:none;stroke:#000;stroke-linecap:round;stroke-linejoin:round;stroke-width:1.5px}</style></defs><rect width="19.5" height="19.5" x="3.75" y=".746" class="a" rx="1.5" ry="1.5"/><path d="M20.25 23.246h-18a1.5 1.5 0 0 1-1.5-1.5v-18m7.5 3h10.5m-10.5 3h10.5m-10.5 3h7.5" class="a"/></svg>


--------------------------------------------------------------------------------
/frontend/src/services/HealthStatus.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | const healthStatus = async () => {
 4 |   try {
 5 |     const healthUrl = `${url()}/health`;
 6 |     const response = await axios.get(healthUrl);
 7 |     return response;
 8 |   } catch (error) {
 9 |     console.log('API status error', error);
10 |     throw error;
11 |   }
12 | };
13 | export { healthStatus };
14 | 


--------------------------------------------------------------------------------
/frontend/src/utils/Loader.tsx:
--------------------------------------------------------------------------------
 1 | export default function Loader({ title }: { title: string }) {
 2 |   return (
 3 |     <div className='n-flex n-flex-col n-justify-center n-items-center n-gap-y-2'>
 4 |       <div className='ndl-spin-wrapper ndl-large' role='status' aria-label='Loading content' aria-live='polite'>
 5 |         <div className='ndl-spin'></div>
 6 |       </div>
 7 |       <div>{title}</div>
 8 |     </div>
 9 |   );
10 | }
11 | 


--------------------------------------------------------------------------------
/frontend/src/components/UI/CustomButton.tsx:
--------------------------------------------------------------------------------
 1 | import { CommonButtonProps } from '../../types';
 2 | 
 3 | const CustomButton: React.FC<CommonButtonProps> = ({ openModal, wrapperclassName, logo, title, className }) => {
 4 |   return (
 5 |     <div onClick={openModal} className={`custombutton ${wrapperclassName ?? ''}`}>
 6 |       <img src={logo} className={`brandimg ${className}`}></img>
 7 |       <h6>{title}</h6>
 8 |     </div>
 9 |   );
10 | };
11 | export default CustomButton;
12 | 


--------------------------------------------------------------------------------
/frontend/example.env:
--------------------------------------------------------------------------------
 1 | BACKEND_API_URL="http://localhost:8000"
 2 | BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true"
 3 | REACT_APP_SOURCES="local,youtube,wiki,s3,web"
 4 | LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o"
 5 | ENV="DEV"
 6 | TIME_PER_CHUNK=4
 7 | TIME_PER_PAGE=50
 8 | CHUNK_SIZE=5242880
 9 | LARGE_FILE_SIZE=5242880
10 | GOOGLE_CLIENT_ID=""
11 | CHAT_MODES=""


--------------------------------------------------------------------------------
/frontend/src/components/DataSources/GCS/GCSButton.tsx:
--------------------------------------------------------------------------------
 1 | import gcslogo from '../../../assets/images/gcs.webp';
 2 | import { DataComponentProps } from '../../../types';
 3 | import { buttonCaptions } from '../../../utils/Constants';
 4 | import CustomButton from '../../UI/CustomButton';
 5 | const GCSButton: React.FC<DataComponentProps> = ({ openModal }) => {
 6 |   return (
 7 |     <CustomButton title={buttonCaptions.gcs} openModal={openModal} logo={gcslogo} wrapperclassName='' className='' />
 8 |   );
 9 | };
10 | export default GCSButton;
11 | 


--------------------------------------------------------------------------------
/frontend/src/components/DataSources/AWS/S3Bucket.tsx:
--------------------------------------------------------------------------------
 1 | import { DataComponentProps } from '../../../types';
 2 | import s3logo from '../../../assets/images/s3logo.png';
 3 | import CustomButton from '../../UI/CustomButton';
 4 | import { buttonCaptions } from '../../../utils/Constants';
 5 | 
 6 | const S3Component: React.FC<DataComponentProps> = ({ openModal }) => {
 7 |   return (
 8 |     <CustomButton title={buttonCaptions.amazon} openModal={openModal} logo={s3logo} wrapperclassName='' className='' />
 9 |   );
10 | };
11 | 
12 | export default S3Component;
13 | 


--------------------------------------------------------------------------------
/frontend/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig, loadEnv } from 'vite';
 2 | import react from '@vitejs/plugin-react';
 3 | 
 4 | // see https://stackoverflow.com/questions/73834404/react-uncaught-referenceerror-process-is-not-defined
 5 | // otherwise use import.meta.env.VITE_BACKEND_API_URL and expose it as such with the VITE_ prefix
 6 | export default defineConfig(({ mode }) => {
 7 |   const env = loadEnv(mode, process.cwd(), '');
 8 |   return {
 9 |     define: {
10 |       'process.env': env,
11 |     },
12 |     plugins: [react()],
13 |   };
14 | });
15 | 


--------------------------------------------------------------------------------
/frontend/src/components/Graph/LegendsChip.tsx:
--------------------------------------------------------------------------------
 1 | import { useMemo } from 'react';
 2 | import { LegendChipProps } from '../../types';
 3 | import Legend from '../UI/Legend';
 4 | 
 5 | export const LegendsChip: React.FunctionComponent<LegendChipProps> = ({ scheme, title, nodes }) => {
 6 |   const chunkcount = useMemo(
 7 |     // @ts-ignore
 8 |     () => [...new Set(nodes?.filter((n) => n?.labels?.includes(title)).map((i) => i.id))].length,
 9 |     []
10 |   );
11 |   return <Legend title={title} chunkCount={chunkcount} bgColor={scheme[title]}></Legend>;
12 | };
13 | 


--------------------------------------------------------------------------------
/frontend/src/components/Layout/AlertIcon.tsx:
--------------------------------------------------------------------------------
 1 | export default function AlertIcon() {
 2 |   return (
 3 |     <svg
 4 |       xmlns='http://www.w3.org/2000/svg'
 5 |       viewBox='0 0 24 24'
 6 |       fill='#bb2d00'
 7 |       aria-hidden='true'
 8 |       width={20}
 9 |       height={20}
10 |     >
11 |       <path
12 |         fillRule='evenodd'
13 |         d='M2.25 12c0-5.385 4.365-9.75 9.75-9.75s9.75 4.365 9.75 9.75-4.365 9.75-9.75 9.75S2.25 17.385 2.25 12zM12 8.25a.75.75 0 01.75.75v3.75a.75.75 0 01-1.5 0V9a.75.75 0 01.75-.75zm0 8.25a.75.75 0 100-1.5.75.75 0 000 1.5z'
14 |         clipRule='evenodd'
15 |       ></path>
16 |     </svg>
17 |   );
18 | }
19 | 


--------------------------------------------------------------------------------
/frontend/src/components/UI/Alert.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import Snackbar from '@mui/material/Snackbar';
 3 | import Alert from '@mui/material/Alert';
 4 | import { CustomAlertProps } from '../../types';
 5 | 
 6 | const CustomAlert: React.FC<CustomAlertProps> = ({ open, handleClose, alertMessage, severity = 'error' }) => {
 7 |   return (
 8 |     <Snackbar anchorOrigin={{ vertical: 'bottom', horizontal: 'left' }} open={open} onClose={handleClose}>
 9 |       <Alert onClose={handleClose} severity={severity} variant='filled' sx={{ width: '100%' }}>
10 |         {alertMessage}
11 |       </Alert>
12 |     </Snackbar>
13 |   );
14 | };
15 | export default CustomAlert;
16 | 


--------------------------------------------------------------------------------
/frontend/src/services/GetFiles.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { SourceListServerData, UserCredentials } from '../types';
 4 | 
 5 | export const getSourceNodes = async (userCredentials: UserCredentials) => {
 6 |   try {
 7 |     const encodedstr = btoa(userCredentials.password);
 8 |     const response = await axios.get<SourceListServerData>(
 9 |       `${url()}/sources_list?uri=${userCredentials.uri}&database=${userCredentials.database}&userName=${
10 |         userCredentials.userName
11 |       }&password=${encodedstr}`
12 |     );
13 |     return response;
14 |   } catch (error) {
15 |     console.log(error);
16 |     throw error;
17 |   }
18 | };
19 | 


--------------------------------------------------------------------------------
/frontend/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 6 |     <link rel="icon" type="image/png" sizes="32x32" href="/favicons/favicon-32x32.png">
 7 |     <link rel="icon" type="image/png" sizes="194x194" href="/favicons/favicon-194x194.png">
 8 |     <link rel="icon" type="image/png" sizes="16x16" href="/favicons/favicon-16x16.png">
 9 |     <link rel="shortcut icon" href="/favicons/favicon.ico">
10 |     <title>Neo4j graph builder</title>
11 |   </head>
12 |   <body>
13 |     <div id="root"></div>
14 |     <script type="module" src="/src/main.tsx"></script>
15 |   </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/frontend/src/services/SchemaFromTextAPI.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { ScehmaFromText } from '../types';
 4 | 
 5 | export const getNodeLabelsAndRelTypesFromText = async (model: string, inputText: string, isSchemaText: boolean) => {
 6 |   const formData = new FormData();
 7 |   formData.append('model', model);
 8 |   formData.append('input_text', inputText);
 9 |   formData.append('is_schema_description_checked', JSON.stringify(isSchemaText));
10 | 
11 |   try {
12 |     const response = await axios.post<ScehmaFromText>(`${url()}/populate_graph_schema`, formData);
13 |     return response;
14 |   } catch (error) {
15 |     console.log(error);
16 |     throw error;
17 |   }
18 | };
19 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "useDefineForClassFields": true,
 5 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |     "module": "ESNext",
 7 |     "skipLibCheck": true,
 8 | 
 9 |     /* Bundler mode */
10 |     "moduleResolution": "bundler",
11 |     "allowImportingTsExtensions": true,
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "noEmit": true,
15 |     "jsx": "react-jsx",
16 | 
17 |     /* Linting */
18 |     "strict": true,
19 |     "noUnusedLocals": true,
20 |     "noUnusedParameters": true,
21 |     "noFallthroughCasesInSwitch": true
22 |   },
23 |   "include": ["src"],
24 |   "references": [{ "path": "./tsconfig.node.json" }]
25 | }
26 | 


--------------------------------------------------------------------------------
/backend/src/document_sources/web_pages.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from langchain_community.document_loaders import WebBaseLoader
 3 | from src.api_response import create_api_response
 4 | 
 5 | def get_documents_from_web_page(source_url:str):
 6 |   try:
 7 |     pages = WebBaseLoader(source_url).load()
 8 |     file_name = pages[0].metadata['title']
 9 |     return file_name, pages
10 |   except Exception as e:
11 |     job_status = "Failed"
12 |     message="Failed To Process Web URL"
13 |     error_message = str(e)
14 |     logging.error(f"Failed To Process Web URL: {file_name}")
15 |     logging.exception(f'Exception Stack trace: {error_message}')
16 |     return create_api_response(job_status,message=message,error=error_message,file_name=file_name) 


--------------------------------------------------------------------------------
/backend/src/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from google.cloud import logging as gclogger
 3 | 
 4 | class CustomLogger:
 5 |     def __init__(self):
 6 |         self.is_gcp_log_enabled = os.environ.get("GCP_LOG_METRICS_ENABLED", "False").lower() in ("true", "1", "yes")
 7 |         if self.is_gcp_log_enabled:
 8 |             self.logging_client = gclogger.Client()
 9 |             self.logger_name = "llm_experiments_metrics"
10 |             self.logger = self.logging_client.logger(self.logger_name)
11 |         else:
12 |             self.logger = None
13 | 
14 |     def log_struct(self, message):
15 |         if self.is_gcp_log_enabled and message is not None:
16 |             self.logger.log_struct(message)
17 |         else:
18 |             print(message)
19 | 


--------------------------------------------------------------------------------
/backend/src/diffbot_transformer.py:
--------------------------------------------------------------------------------
 1 | from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer
 2 | from langchain_community.graphs import Neo4jGraph
 3 | from langchain.docstore.document import Document
 4 | from typing import List
 5 | import os
 6 | import logging
 7 | import uuid
 8 | from src.llm import get_combined_chunks, get_llm
 9 | 
10 | logging.basicConfig(format='%(asctime)s - %(message)s',level='INFO')
11 | 
12 | def get_graph_from_diffbot(graph,chunkId_chunkDoc_list:List):
13 |     combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list)
14 |     llm,model_name = get_llm('diffbot')
15 |     graph_documents = llm.convert_to_graph_documents(combined_chunk_document_list)
16 |     return graph_documents
17 | 
18 |     


--------------------------------------------------------------------------------
/backend/src/entities/source_node.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | class sourceNode:
 4 |     file_name:str=None
 5 |     file_size:int=None
 6 |     file_type:str=None
 7 |     file_source:str=None
 8 |     status:str=None
 9 |     url:str=None
10 |     gcsBucket:str=None
11 |     gcsBucketFolder:str=None
12 |     gcsProjectId:str=None
13 |     awsAccessKeyId:str=None
14 |     node_count:int=None
15 |     relationship_count:str=None
16 |     model:str=None
17 |     created_at:datetime=None
18 |     updated_at:datetime=None
19 |     processing_time:float=None
20 |     error_message:str=None
21 |     total_pages:int=None
22 |     total_chunks:int=None
23 |     language:str=None
24 |     is_cancelled:bool=None
25 |     processed_chunk:int=None
26 |     access_token:str=None
27 | 


--------------------------------------------------------------------------------
/frontend/src/HOC/SettingModalHOC.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { SettingsModalProps } from '../types';
 3 | import SettingsModal from '../components/Popups/Settings/SettingModal';
 4 | 
 5 | const SettingModalHOC: React.FC<SettingsModalProps> = ({
 6 |   openTextSchema,
 7 |   open,
 8 |   onClose,
 9 |   isSchema,
10 |   settingView,
11 |   setIsSchema,
12 |   onContinue,
13 |   onClear,
14 | }) => {
15 |   return (
16 |     <SettingsModal
17 |       open={open}
18 |       onClose={onClose}
19 |       openTextSchema={openTextSchema}
20 |       onContinue={onContinue}
21 |       isSchema={isSchema}
22 |       setIsSchema={setIsSchema}
23 |       settingView={settingView}
24 |       onClear={onClear}
25 |     />
26 |   );
27 | };
28 | export default SettingModalHOC;
29 | 


--------------------------------------------------------------------------------
/frontend/src/services/GetNodeLabelsRelTypes.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { ServerData, UserCredentials } from '../types';
 4 | 
 5 | export const getNodeLabelsAndRelTypes = async (userCredentials: UserCredentials) => {
 6 |   const formData = new FormData();
 7 |   formData.append('uri', userCredentials?.uri ?? '');
 8 |   formData.append('database', userCredentials?.database ?? '');
 9 |   formData.append('userName', userCredentials?.userName ?? '');
10 |   formData.append('password', userCredentials?.password ?? '');
11 |   try {
12 |     const response = await axios.post<ServerData>(`${url()}/schema`, formData);
13 |     return response;
14 |   } catch (error) {
15 |     console.log(error);
16 |     throw error;
17 |   }
18 | };
19 | 


--------------------------------------------------------------------------------
/frontend/src/services/GetOrphanNodes.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { OrphanNodeResponse, UserCredentials } from '../types';
 4 | 
 5 | export const getOrphanNodes = async (userCredentials: UserCredentials) => {
 6 |   const formData = new FormData();
 7 |   formData.append('uri', userCredentials?.uri ?? '');
 8 |   formData.append('database', userCredentials?.database ?? '');
 9 |   formData.append('userName', userCredentials?.userName ?? '');
10 |   formData.append('password', userCredentials?.password ?? '');
11 |   try {
12 |     const response = await axios.post<OrphanNodeResponse>(`${url()}/get_unconnected_nodes_list`, formData);
13 |     return response;
14 |   } catch (error) {
15 |     console.log(error);
16 |     throw error;
17 |   }
18 | };
19 | 


--------------------------------------------------------------------------------
/frontend/src/services/ConnectAPI.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | 
 4 | const connectAPI = async (connectionURI: string, username: string, password: string, database: string) => {
 5 |   try {
 6 |     const formData = new FormData();
 7 |     formData.append('uri', connectionURI ?? '');
 8 |     formData.append('database', database ?? '');
 9 |     formData.append('userName', username ?? '');
10 |     formData.append('password', password ?? '');
11 |     const response = await axios.post(`${url()}/connect`, formData, {
12 |       headers: {
13 |         'Content-Type': 'multipart/form-data',
14 |       },
15 |     });
16 |     return response;
17 |   } catch (error) {
18 |     console.log('Error in connecting to the Neo4j instance :', error);
19 |     throw error;
20 |   }
21 | };
22 | export default connectAPI;
23 | 


--------------------------------------------------------------------------------
/backend/src/document_sources/wikipedia.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from langchain_community.document_loaders import WikipediaLoader
 3 | from src.api_response import create_api_response
 4 | 
 5 | def get_documents_from_Wikipedia(wiki_query:str, language:str):
 6 |   try:
 7 |     pages = WikipediaLoader(query=wiki_query.strip(), lang=language, load_max_docs=1, load_all_available_meta=False).load()
 8 |     file_name = wiki_query.strip()
 9 |     logging.info(f"Total Pages from Wikipedia = {len(pages)}") 
10 |     return file_name, pages
11 |   except Exception as e:
12 |     job_status = "Failed"
13 |     message="Failed To Process Wikipedia Query"
14 |     error_message = str(e)
15 |     logging.error(f"Failed To Process Wikipedia Query: {file_name}")
16 |     logging.exception(f'Exception Stack trace: {error_message}')
17 |     return create_api_response(job_status,message=message,error=error_message,file_name=file_name) 


--------------------------------------------------------------------------------
/backend/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim
 2 | WORKDIR /code
 3 | ENV PORT 8000
 4 | EXPOSE 8000
 5 | # Install dependencies and clean up in one layer
 6 | RUN apt-get update && \
 7 |    apt-get install -y --no-install-recommends \
 8 |        libgl1-mesa-glx \
 9 |        libreoffice \
10 |        cmake \
11 |        poppler-utils \
12 |        tesseract-ocr && \
13 |    apt-get clean && \
14 |    rm -rf /var/lib/apt/lists/*
15 | # Set LD_LIBRARY_PATH
16 | ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
17 | # Copy requirements file and install Python dependencies
18 | COPY requirements.txt /code/
19 | # --no-cache-dir --upgrade 
20 | RUN pip install -r requirements.txt 
21 | # Copy application code
22 | COPY . /code
23 | # Set command
24 | CMD ["gunicorn", "score:app", "--workers", "8","--threads", "8", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000", "--timeout", "300"]
25 | 


--------------------------------------------------------------------------------
/frontend/src/services/DeleteOrphanNodes.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { UserCredentials } from '../types';
 4 | 
 5 | const deleteOrphanAPI = async (userCredentials: UserCredentials, selectedNodes: string[]) => {
 6 |   try {
 7 |     const formData = new FormData();
 8 |     formData.append('uri', userCredentials?.uri ?? '');
 9 |     formData.append('database', userCredentials?.database ?? '');
10 |     formData.append('userName', userCredentials?.userName ?? '');
11 |     formData.append('password', userCredentials?.password ?? '');
12 |     formData.append('unconnected_entities_list', JSON.stringify(selectedNodes));
13 |     const response = await axios.post(`${url()}/delete_unconnected_nodes`, formData);
14 |     return response;
15 |   } catch (error) {
16 |     console.log('Error Posting the Question:', error);
17 |     throw error;
18 |   }
19 | };
20 | export default deleteOrphanAPI;
21 | 


--------------------------------------------------------------------------------
/backend/src/openAI_llm.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.graphs import Neo4jGraph
 2 | import os
 3 | from dotenv import load_dotenv 
 4 | import logging
 5 | import concurrent.futures
 6 | from concurrent.futures import ThreadPoolExecutor
 7 | from langchain_experimental.graph_transformers import LLMGraphTransformer
 8 | from src.llm import get_graph_document_list, get_combined_chunks, get_llm
 9 | 
10 | load_dotenv()
11 | logging.basicConfig(format='%(asctime)s - %(message)s',level='INFO')
12 | 
13 | def get_graph_from_OpenAI(model_version, graph, chunkId_chunkDoc_list, allowedNodes, allowedRelationship):
14 |     futures=[]
15 |     graph_document_list=[]
16 |         
17 |     combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list)
18 |     
19 |     llm,model_name = get_llm(model_version)  
20 |     return  get_graph_document_list(llm, combined_chunk_document_list, allowedNodes, allowedRelationship)
21 |            
22 |         
23 |     
24 |     
25 | 


--------------------------------------------------------------------------------
/frontend/src/services/ChunkEntitiesInfo.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { ChatInfo_APIResponse, UserCredentials } from '../types';
 4 | 
 5 | const chunkEntitiesAPI = async (userCredentials: UserCredentials, chunk_ids: string) => {
 6 |   try {
 7 |     const formData = new FormData();
 8 |     formData.append('uri', userCredentials?.uri ?? '');
 9 |     formData.append('userName', userCredentials?.userName ?? '');
10 |     formData.append('password', userCredentials?.password ?? '');
11 |     formData.append('chunk_ids', chunk_ids);
12 | 
13 |     const response: ChatInfo_APIResponse = await axios.post(`${url()}/chunk_entities`, formData, {
14 |       headers: {
15 |         'Content-Type': 'multipart/form-data',
16 |       },
17 |     });
18 |     return response;
19 |   } catch (error) {
20 |     console.log('Error uploading file:', error);
21 |     throw error;
22 |   }
23 | };
24 | 
25 | export { chunkEntitiesAPI };
26 | 


--------------------------------------------------------------------------------
/frontend/src/services/CommonAPI.ts:
--------------------------------------------------------------------------------
 1 | import axios, { AxiosResponse, Method } from 'axios';
 2 | import { UserCredentials, FormDataParams } from '../types';
 3 | 
 4 | // API Call
 5 | const apiCall = async (
 6 |   url: string,
 7 |   method: Method,
 8 |   commonParams: UserCredentials,
 9 |   additionalParams: FormDataParams
10 | ) => {
11 |   try {
12 |     const formData = new FormData();
13 |     for (const key in commonParams) {
14 |       formData.append(key, commonParams[key]);
15 |     }
16 |     for (const key in additionalParams) {
17 |       formData.append(key, additionalParams[key]);
18 |     }
19 |     const response: AxiosResponse = await axios({
20 |       method: method,
21 |       url: url,
22 |       data: formData,
23 |       headers: {
24 |         'Content-Type': 'multipart/form-data',
25 |       },
26 |     });
27 |     return response.data;
28 |   } catch (error) {
29 |     console.log('API Error:', error);
30 |     throw error;
31 |   }
32 | };
33 | 
34 | export { apiCall };
35 | 


--------------------------------------------------------------------------------
/frontend/src/styling/info.css:
--------------------------------------------------------------------------------
 1 | .list-class {
 2 |  .li {
 3 |    all: revert;
 4 |  }
 5 |  ol {
 6 |    all: revert;
 7 |  }
 8 |  ul {
 9 |    all: revert;
10 |  }
11 |  .button-container {
12 |    display: flex;
13 |    justify-content: center;
14 |    width: 100%;
15 |  }
16 |  .entities-container {
17 |    display: flex;
18 |    flex-direction: column;
19 |    gap: 10px;
20 |  }
21 |  .entity-item {
22 |    display: flex;
23 |    align-items: center;
24 |  }
25 |  .entity-label {
26 |    margin-right: 10px;
27 |  }
28 |  .entity-text {
29 |    display: inline;
30 |  }
31 |  .hoverable-link-container {
32 |    position: relative;
33 |  }
34 |  .popup {
35 |    position: fixed;
36 |    z-index: 1000;
37 |    background-color: white;
38 |    border: 1px solid #ccc;
39 |    padding: 5px;
40 |    box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);
41 |    border-radius: 8px;
42 |  }
43 |  .iframe-preview {
44 |    width: 360px;
45 |    height: 215px;
46 |    border: none;
47 |    border-radius: 8px;
48 |  }
49 | }


--------------------------------------------------------------------------------
/frontend/src/services/PostProcessing.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { UserCredentials } from '../types';
 4 | 
 5 | const postProcessing = async (userCredentials: UserCredentials, taskParam: string[]) => {
 6 |   try {
 7 |     const formData = new FormData();
 8 |     formData.append('uri', userCredentials?.uri ?? '');
 9 |     formData.append('database', userCredentials?.database ?? '');
10 |     formData.append('userName', userCredentials?.userName ?? '');
11 |     formData.append('password', userCredentials?.password ?? '');
12 |     formData.append('tasks', JSON.stringify(taskParam));
13 |     const response = await axios.post(`${url()}/post_processing`, formData, {
14 |       headers: {
15 |         'Content-Type': 'multipart/form-data',
16 |       },
17 |     });
18 |     return response;
19 |   } catch (error) {
20 |     console.log('Error updating the graph:', error);
21 |     throw error;
22 |   }
23 | };
24 | 
25 | export { postProcessing };
26 | 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | # Neo4j Knowledge Graph Builder
 2 | 
 3 | Reactjs app for building an knowledge graph using [Neo4j Needle](https://www.neo4j.design/).
 4 |     
 5 | ## Features
 6 | - 🚀 Responsive: Adapts to different screen sizes for optimal user experience.
 7 | - ⚙️ Neo4j Integration: A simple example for connecting to a Neo4j database.
 8 | - 🔐 Neo4j Auto-connect: Automatically connects to the Neo4j database if the user has a session saved (using localStorage).
 9 | - 💻 Dropzone: To drop pdf or txt files.
10 | - 💻 Table : To show uploaded files and generate knowledge graph.
11 | - 🛠️️ Modular approach: Facilitates easy customization.
12 | 
13 | 
14 | ## Installation:
15 | ```shell
16 | npm install -g yarn
17 | yarn i
18 | yarn run dev
19 | ```
20 | 
21 | ##
22 | Do run yarn add -- package name to get project updated with required dependencies.
23 | 
24 | 29/01/2024> Latest dependency
25 | yarn add uuid 
26 | ##
27 | Upload api url should be picked from: ports tab under codespace environement // For demo
28 | ## What it looks like
29 | 


--------------------------------------------------------------------------------
/frontend/src/services/ServerSideStatusUpdateAPI.ts:
--------------------------------------------------------------------------------
 1 | import { eventResponsetypes } from '../types';
 2 | import { url } from '../utils/Utils';
 3 | export function triggerStatusUpdateAPI(
 4 |   name: string,
 5 |   uri: string,
 6 |   username: string,
 7 |   password: string,
 8 |   database: string,
 9 |   datahandler: (i: eventResponsetypes) => void
10 | ) {
11 |   let encodedstr;
12 |   if (password) {
13 |     encodedstr = btoa(password);
14 |   }
15 |   const eventSource = new EventSource(
16 |     `${url()}/update_extract_status/${name}?url=${uri}&userName=${username}&password=${encodedstr}&database=${database}`
17 |   );
18 |   eventSource.onmessage = (event) => {
19 |     const eventResponse = JSON.parse(event.data);
20 |     if (
21 |       eventResponse.status === 'Completed' ||
22 |       eventResponse.status == 'Failed' ||
23 |       eventResponse.status == 'Cancelled'
24 |     ) {
25 |       datahandler(eventResponse);
26 |       eventSource.close();
27 |     } else {
28 |       datahandler(eventResponse);
29 |     }
30 |   };
31 | }
32 | 


--------------------------------------------------------------------------------
/frontend/src/App.tsx:
--------------------------------------------------------------------------------
 1 | import './App.css';
 2 | import '@neo4j-ndl/base/lib/neo4j-ds-styles.css';
 3 | import ThemeWrapper from './context/ThemeWrapper';
 4 | import QuickStarter from './components/QuickStarter';
 5 | import { GoogleOAuthProvider } from '@react-oauth/google';
 6 | import { APP_SOURCES } from './utils/Constants';
 7 | import ErrorBoundary from './components/UI/ErrroBoundary';
 8 | 
 9 | const App: React.FC = () => {
10 |   return (
11 |     <>
12 |       {APP_SOURCES != undefined && APP_SOURCES.includes('gcs') ? (
13 |         <ErrorBoundary>
14 |           <GoogleOAuthProvider clientId={process.env.GOOGLE_CLIENT_ID as string}>
15 |             <ThemeWrapper>
16 |               <QuickStarter />
17 |             </ThemeWrapper>
18 |           </GoogleOAuthProvider>
19 |         </ErrorBoundary>
20 |       ) : (
21 |         <ErrorBoundary>
22 |           <ThemeWrapper>
23 |             <QuickStarter />
24 |           </ThemeWrapper>
25 |         </ErrorBoundary>
26 |       )}
27 |     </>
28 |   );
29 | };
30 | 
31 | export default App;
32 | 


--------------------------------------------------------------------------------
/frontend/src/context/UserCredentials.tsx:
--------------------------------------------------------------------------------
 1 | import { createContext, useState, useContext, FunctionComponent, ReactNode } from 'react';
 2 | import { UserCredentials } from '../types';
 3 | 
 4 | type Props = {
 5 |   children: ReactNode;
 6 | };
 7 | 
 8 | interface ContextProps {
 9 |   userCredentials: UserCredentials | null;
10 |   setUserCredentials: (UserCredentials: UserCredentials) => void;
11 | }
12 | export const UserConnection = createContext<ContextProps>({
13 |   userCredentials: null,
14 |   setUserCredentials: () => null,
15 | });
16 | export const useCredentials = () => {
17 |   const userCredentials = useContext(UserConnection);
18 |   return userCredentials;
19 | };
20 | const UserCredentialsWrapper: FunctionComponent<Props> = (props) => {
21 |   const [userCredentials, setUserCredentials] = useState<UserCredentials | null>(null);
22 |   const value = {
23 |     userCredentials,
24 |     setUserCredentials,
25 |   };
26 |   return <UserConnection.Provider value={value}>{props.children}</UserConnection.Provider>;
27 | };
28 | export default UserCredentialsWrapper;
29 | 


--------------------------------------------------------------------------------
/frontend/src/services/CancelAPI.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { UserCredentials, commonserverresponse } from '../types';
 4 | 
 5 | const cancelAPI = async (filenames: string[], source_types: string[]) => {
 6 |   try {
 7 |     const formData = new FormData();
 8 |     const credentials: UserCredentials = JSON.parse(localStorage.getItem('neo4j.connection') || 'null');
 9 |     if (credentials) {
10 |       formData.append('uri', credentials?.uri ?? '');
11 |       formData.append('database', credentials?.database ?? '');
12 |       formData.append('userName', credentials?.user ?? '');
13 |       formData.append('password', credentials?.password ?? '');
14 |     }
15 |     formData.append('filenames', JSON.stringify(filenames));
16 |     formData.append('source_types', JSON.stringify(source_types));
17 |     const response = await axios.post<commonserverresponse>(`${url()}/cancelled_job`, formData);
18 |     return response;
19 |   } catch (error) {
20 |     console.log('Error Posting the Question:', error);
21 |     throw error;
22 |   }
23 | };
24 | export default cancelAPI;
25 | 


--------------------------------------------------------------------------------
/frontend/src/components/Graph/GraphViewButton.tsx:
--------------------------------------------------------------------------------
 1 | import React, { useState } from 'react';
 2 | import { Button } from '@neo4j-ndl/react';
 3 | import GraphViewModal from './GraphViewModal';
 4 | import { Node, Relationship } from '@neo4j-nvl/base';
 5 | 
 6 | interface GraphViewButtonProps {
 7 |   nodeValues?: Node[];
 8 |   relationshipValues?: Relationship[];
 9 | }
10 | const GraphViewButton: React.FC<GraphViewButtonProps> = ({ nodeValues, relationshipValues }) => {
11 |   const [openGraphView, setOpenGraphView] = useState(false);
12 |   const [viewPoint, setViewPoint] = useState('');
13 | 
14 |   const handleGraphViewClick = () => {
15 |     setOpenGraphView(true);
16 |     setViewPoint('chatInfoView');
17 |   };
18 |   return (
19 |     <>
20 |       <Button onClick={handleGraphViewClick}>Graph Entities used for Answer Generation</Button>
21 |       <GraphViewModal
22 |         open={openGraphView}
23 |         setGraphViewOpen={setOpenGraphView}
24 |         viewPoint={viewPoint}
25 |         nodeValues={nodeValues}
26 |         relationshipValues={relationshipValues}
27 |       />
28 |     </>
29 |   );
30 | };
31 | export default GraphViewButton;
32 | 


--------------------------------------------------------------------------------
/frontend/src/services/GraphQuery.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { UserCredentials } from '../types';
 4 | 
 5 | const graphQueryAPI = async (
 6 |   userCredentials: UserCredentials,
 7 |   query_type: string,
 8 |   document_names: (string | undefined)[] | undefined
 9 | ) => {
10 |   try {
11 |     const formData = new FormData();
12 |     formData.append('uri', userCredentials?.uri ?? '');
13 |     formData.append('database', userCredentials?.database ?? '');
14 |     formData.append('userName', userCredentials?.userName ?? '');
15 |     formData.append('password', userCredentials?.password ?? '');
16 |     formData.append('query_type', query_type ?? 'entities');
17 |     formData.append('document_names', JSON.stringify(document_names));
18 | 
19 |     const response = await axios.post(`${url()}/graph_query`, formData, {
20 |       headers: {
21 |         'Content-Type': 'multipart/form-data',
22 |       },
23 |     });
24 |     return response;
25 |   } catch (error) {
26 |     console.log('Error Posting the Question:', error);
27 |     throw error;
28 |   }
29 | };
30 | export default graphQueryAPI;
31 | 


--------------------------------------------------------------------------------
/frontend/src/services/DeleteFiles.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { CustomFile, UserCredentials } from '../types';
 4 | 
 5 | const deleteAPI = async (userCredentials: UserCredentials, selectedFiles: CustomFile[], deleteEntities: boolean) => {
 6 |   try {
 7 |     const filenames = selectedFiles.map((str) => str.name);
 8 |     const source_types = selectedFiles.map((str) => str.fileSource);
 9 |     const formData = new FormData();
10 |     formData.append('uri', userCredentials?.uri ?? '');
11 |     formData.append('database', userCredentials?.database ?? '');
12 |     formData.append('userName', userCredentials?.userName ?? '');
13 |     formData.append('password', userCredentials?.password ?? '');
14 |     formData.append('deleteEntities', JSON.stringify(deleteEntities));
15 |     formData.append('filenames', JSON.stringify(filenames));
16 |     formData.append('source_types', JSON.stringify(source_types));
17 |     const response = await axios.post(`${url()}/delete_document_and_entities`, formData);
18 |     return response;
19 |   } catch (error) {
20 |     console.log('Error Posting the Question:', error);
21 |     throw error;
22 |   }
23 | };
24 | export default deleteAPI;
25 | 


--------------------------------------------------------------------------------
/frontend/src/components/Graph/CheckboxSelection.tsx:
--------------------------------------------------------------------------------
 1 | import { Checkbox } from '@neo4j-ndl/react';
 2 | import React from 'react';
 3 | import { CheckboxSectionProps } from '../../types';
 4 | 
 5 | const CheckboxSelection: React.FC<CheckboxSectionProps> = ({ graphType, loading, handleChange }) => (
 6 |   <div className='flex gap-5 mt-2 justify-between'>
 7 |     <div className='flex gap-5'>
 8 |       <Checkbox
 9 |         checked={graphType.includes('Document')}
10 |         label='Document'
11 |         disabled={(graphType.includes('Document') && graphType.length === 1) || loading}
12 |         onChange={() => handleChange('Document')}
13 |       />
14 |       <Checkbox
15 |         checked={graphType.includes('Entities')}
16 |         label='Entities'
17 |         disabled={(graphType.includes('Entities') && graphType.length === 1) || loading}
18 |         onChange={() => handleChange('Entities')}
19 |       />
20 |       <Checkbox
21 |         checked={graphType.includes('Chunk')}
22 |         label='Chunks'
23 |         disabled={(graphType.includes('Chunk') && graphType.length === 1) || loading}
24 |         onChange={() => handleChange('Chunk')}
25 |       />
26 |     </div>
27 |   </div>
28 | );
29 | export default CheckboxSelection;
30 | 


--------------------------------------------------------------------------------
/frontend/src/components/UI/IconButtonToolTip.tsx:
--------------------------------------------------------------------------------
 1 | import { IconButton, Tip } from '@neo4j-ndl/react';
 2 | 
 3 | const IconButtonWithToolTip = ({
 4 |   text,
 5 |   children,
 6 |   onClick,
 7 |   size = 'medium',
 8 |   clean,
 9 |   grouped,
10 |   placement = 'bottom',
11 |   disabled = false,
12 |   label,
13 | }: {
14 |   label: string;
15 |   text: string | React.ReactNode;
16 |   children: React.ReactNode;
17 |   onClick?: React.MouseEventHandler<HTMLButtonElement> | undefined;
18 |   size?: 'small' | 'medium' | 'large';
19 |   clean?: boolean;
20 |   grouped?: boolean;
21 |   placement?: 'bottom' | 'top' | 'right' | 'left';
22 |   disabled?: boolean;
23 | }) => {
24 |   return (
25 |     <Tip allowedPlacements={[placement]}>
26 |       <Tip.Trigger>
27 |         <IconButton
28 |           aria-label={label}
29 |           size={size}
30 |           clean={clean}
31 |           grouped={grouped}
32 |           onClick={onClick}
33 |           disabled={disabled}
34 |         >
35 |           {children}
36 |         </IconButton>
37 |       </Tip.Trigger>
38 |       <Tip.Content isPortaled={false} style={{ whiteSpace: 'nowrap' }}>
39 |         {text}
40 |       </Tip.Content>
41 |     </Tip>
42 |   );
43 | };
44 | 
45 | export default IconButtonWithToolTip;
46 | 


--------------------------------------------------------------------------------
/frontend/src/context/UserMessages.tsx:
--------------------------------------------------------------------------------
 1 | import { createContext, useState, useContext, Dispatch, SetStateAction, FC } from 'react';
 2 | import { MessagesContextProviderProps, Messages } from '../types';
 3 | import chatbotmessages from '../assets/ChatbotMessages.json';
 4 | import { getDateTime } from '../utils/Utils';
 5 | 
 6 | interface MessageContextType {
 7 |   messages: Messages[] | [];
 8 |   setMessages: Dispatch<SetStateAction<Messages[]>>;
 9 | }
10 | 
11 | const MessageContext = createContext<MessageContextType | undefined>(undefined);
12 | 
13 | const MessageContextWrapper: FC<MessagesContextProviderProps> = ({ children }) => {
14 |   const [messages, setMessages] = useState<Messages[] | []>([
15 |     { ...chatbotmessages.listMessages[1], datetime: getDateTime() },
16 |   ]);
17 | 
18 |   const value: MessageContextType = {
19 |     messages,
20 |     setMessages,
21 |   };
22 |   return <MessageContext.Provider value={value}>{children}</MessageContext.Provider>;
23 | };
24 | const useMessageContext = () => {
25 |   const context = useContext(MessageContext);
26 |   if (!context) {
27 |     throw new Error('useMessageContext must be used within a MessageContextWrapper');
28 |   }
29 |   return context;
30 | };
31 | export { MessageContextWrapper, useMessageContext };
32 | 


--------------------------------------------------------------------------------
/frontend/src/components/Layout/DrawerChatbot.tsx:
--------------------------------------------------------------------------------
 1 | import { Drawer } from '@neo4j-ndl/react';
 2 | import Chatbot from '../ChatBot/Chatbot';
 3 | import { Messages } from '../../types';
 4 | import { useMessageContext } from '../../context/UserMessages';
 5 | interface DrawerChatbotProps {
 6 |   isExpanded: boolean;
 7 |   clearHistoryData: boolean;
 8 |   messages: Messages[];
 9 | }
10 | const DrawerChatbot: React.FC<DrawerChatbotProps> = ({ isExpanded, clearHistoryData, messages }) => {
11 |   const { setMessages } = useMessageContext();
12 | 
13 |   const getIsLoading = (messages: Messages[]) => {
14 |     return messages.some((msg) => msg.isTyping || msg.isLoading);
15 |   };
16 |   return (
17 |     <div className='flex min-h-[calc(-58px+100vh)] relative'>
18 |       <Drawer expanded={isExpanded} closeable={false} position='right' type='push' className='!pt-0'>
19 |         <Drawer.Body className='!overflow-hidden !pr-0'>
20 |           <Chatbot
21 |             isFullScreen={false}
22 |             messages={messages}
23 |             setMessages={setMessages}
24 |             clear={clearHistoryData}
25 |             isLoading={getIsLoading(messages)}
26 |           />
27 |         </Drawer.Body>
28 |       </Drawer>
29 |     </div>
30 |   );
31 | };
32 | export default DrawerChatbot;
33 | 


--------------------------------------------------------------------------------
/frontend/src/components/WebSources/Web/WebInput.tsx:
--------------------------------------------------------------------------------
 1 | import { webLinkValidation } from '../../../utils/Utils';
 2 | import useSourceInput from '../../../hooks/useSourceInput';
 3 | import CustomSourceInput from '../CustomSourceInput';
 4 | 
 5 | export default function WebInput({ setIsLoading }: { setIsLoading: React.Dispatch<React.SetStateAction<boolean>> }) {
 6 |   const {
 7 |     inputVal,
 8 |     onChangeHandler,
 9 |     onBlurHandler,
10 |     submitHandler,
11 |     status,
12 |     setStatus,
13 |     statusMessage,
14 |     isFocused,
15 |     isValid,
16 |     onClose,
17 |     onPasteHandler,
18 |   } = useSourceInput(webLinkValidation, setIsLoading, 'web-url', false, false, true);
19 |   return (
20 |     <CustomSourceInput
21 |       onCloseHandler={onClose}
22 |       isFocused={isFocused}
23 |       isValid={isValid}
24 |       disabledCheck={false}
25 |       label='Website Link'
26 |       placeHolder='https://neo4j.com/'
27 |       value={inputVal}
28 |       onChangeHandler={onChangeHandler}
29 |       onBlurHandler={onBlurHandler}
30 |       submitHandler={submitHandler}
31 |       setStatus={setStatus}
32 |       status={status}
33 |       statusMessage={statusMessage}
34 |       id='Website link'
35 |       onPasteHandler={onPasteHandler}
36 |     />
37 |   );
38 | }
39 | 


--------------------------------------------------------------------------------
/frontend/src/components/UI/Menu.tsx:
--------------------------------------------------------------------------------
 1 | import { Menu } from '@neo4j-ndl/react';
 2 | import { Menuitems, Origin } from '../../types';
 3 | 
 4 | export default function CustomMenu({
 5 |   open,
 6 |   closeHandler,
 7 |   items,
 8 |   MenuAnchor,
 9 |   anchorOrigin,
10 |   transformOrigin,
11 |   anchorPortal = true,
12 |   disableBackdrop = false,
13 | }: {
14 |   open: boolean;
15 |   closeHandler: () => void;
16 |   items: Menuitems[] | undefined;
17 |   MenuAnchor: HTMLElement | null;
18 |   anchorOrigin?: Origin;
19 |   transformOrigin?: Origin;
20 |   anchorPortal?: boolean;
21 |   disableBackdrop?: boolean;
22 | }) {
23 |   return (
24 |     <Menu
25 |       open={open}
26 |       onClose={closeHandler}
27 |       anchorOrigin={anchorOrigin}
28 |       transformOrigin={transformOrigin}
29 |       anchorPortal={anchorPortal}
30 |       anchorEl={MenuAnchor}
31 |       disableBackdrop={disableBackdrop}
32 |     >
33 |       {items?.map((i, idx) => {
34 |         return (
35 |           <Menu.Item
36 |             key={`${idx}${i.title}`}
37 |             title={i.title}
38 |             onClick={i.onClick}
39 |             disabled={i.disabledCondition}
40 |             className={i.isSelected ? i.selectedClassName : ''}
41 |             description={i.description}
42 |           />
43 |         );
44 |       })}
45 |     </Menu>
46 |   );
47 | }
48 | 


--------------------------------------------------------------------------------
/frontend/src/components/WebSources/WikiPedia/WikipediaInput.tsx:
--------------------------------------------------------------------------------
 1 | import { wikiValidation } from '../../../utils/Utils';
 2 | import useSourceInput from '../../../hooks/useSourceInput';
 3 | import CustomSourceInput from '../CustomSourceInput';
 4 | 
 5 | export default function WikipediaInput({
 6 |   setIsLoading,
 7 | }: {
 8 |   setIsLoading: React.Dispatch<React.SetStateAction<boolean>>;
 9 | }) {
10 |   const {
11 |     inputVal,
12 |     onChangeHandler,
13 |     onBlurHandler,
14 |     submitHandler,
15 |     status,
16 |     setStatus,
17 |     statusMessage,
18 |     isFocused,
19 |     isValid,
20 |     onClose,
21 |     onPasteHandler,
22 |   } = useSourceInput(wikiValidation, setIsLoading, 'Wikipedia', true, false, false);
23 |   return (
24 |     <CustomSourceInput
25 |       onCloseHandler={onClose}
26 |       isFocused={isFocused}
27 |       isValid={isValid}
28 |       disabledCheck={false}
29 |       label='Wikipedia Link'
30 |       placeHolder='https://en.wikipedia.org/wiki/Albert_Einstein'
31 |       value={inputVal}
32 |       onChangeHandler={onChangeHandler}
33 |       onBlurHandler={onBlurHandler}
34 |       submitHandler={submitHandler}
35 |       setStatus={setStatus}
36 |       status={status}
37 |       statusMessage={statusMessage}
38 |       id='Wikipedia link'
39 |       onPasteHandler={onPasteHandler}
40 |     />
41 |   );
42 | }
43 | 


--------------------------------------------------------------------------------
/frontend/src/components/WebSources/Youtube/YoutubeInput.tsx:
--------------------------------------------------------------------------------
 1 | import CustomSourceInput from '../CustomSourceInput';
 2 | import useSourceInput from '../../../hooks/useSourceInput';
 3 | import { youtubeLinkValidation } from '../../../utils/Utils';
 4 | 
 5 | export default function YoutubeInput({
 6 |   setIsLoading,
 7 | }: {
 8 |   setIsLoading: React.Dispatch<React.SetStateAction<boolean>>;
 9 | }) {
10 |   const {
11 |     inputVal,
12 |     onChangeHandler,
13 |     onBlurHandler,
14 |     submitHandler,
15 |     status,
16 |     setStatus,
17 |     statusMessage,
18 |     isFocused,
19 |     isValid,
20 |     onClose,
21 |     onPasteHandler,
22 |   } = useSourceInput(youtubeLinkValidation, setIsLoading, 'youtube', false, true, false);
23 |   return (
24 |     <CustomSourceInput
25 |       onCloseHandler={onClose}
26 |       isFocused={isFocused}
27 |       isValid={isValid}
28 |       disabledCheck={false}
29 |       label='Youtube Link'
30 |       placeHolder='https://www.youtube.com/watch?v=2W9HM1xBibo'
31 |       value={inputVal}
32 |       onChangeHandler={onChangeHandler}
33 |       onBlurHandler={onBlurHandler}
34 |       submitHandler={submitHandler}
35 |       setStatus={setStatus}
36 |       status={status}
37 |       statusMessage={statusMessage}
38 |       id='youtube link'
39 |       onPasteHandler={onPasteHandler}
40 |     />
41 |   );
42 | }
43 | 


--------------------------------------------------------------------------------
/example.env:
--------------------------------------------------------------------------------
 1 | # Mandatory
 2 | OPENAI_API_KEY = ""
 3 | DIFFBOT_API_KEY = ""
 4 | 
 5 | # Optional Backend
 6 | EMBEDDING_MODEL = "all-MiniLM-L6-v2"
 7 | IS_EMBEDDING = "true"
 8 | KNN_MIN_SCORE = "0.94"
 9 | # Enable Gemini (default is False) | Can be False or True
10 | GEMINI_ENABLED = False
11 | # LLM_MODEL_CONFIG_ollama_llama3="llama3,http://host.docker.internal:11434"
12 | 
13 | # Enable Google Cloud logs (default is False) | Can be False or True
14 | GCP_LOG_METRICS_ENABLED = False
15 | NUMBER_OF_CHUNKS_TO_COMBINE = 6
16 | UPDATE_GRAPH_CHUNKS_PROCESSED = 20
17 | NEO4J_URI = "neo4j://database:7687"
18 | NEO4J_USERNAME = "neo4j"
19 | NEO4J_PASSWORD = "password"
20 | LANGCHAIN_API_KEY = ""
21 | LANGCHAIN_PROJECT = ""
22 | LANGCHAIN_TRACING_V2 = "true"
23 | LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com"
24 | GCS_FILE_CACHE = False
25 | ENTITY_EMBEDDING=True
26 | 
27 | # Optional Frontend
28 | BACKEND_API_URL="http://localhost:8000"
29 | BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true"
30 | REACT_APP_SOURCES="local,youtube,wiki,s3,web"
31 | LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" # ",ollama_llama3"
32 | ENV="DEV"
33 | TIME_PER_CHUNK=4
34 | TIME_PER_PAGE=50
35 | CHUNK_SIZE=5242880
36 | GOOGLE_CLIENT_ID=""
37 | CHAT_MODES=""
38 | 


--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Step 1: Build the React application
 2 | FROM node:20 AS build
 3 | 
 4 | ARG BACKEND_API_URL="http://localhost:8000"
 5 | ARG REACT_APP_SOURCES=""
 6 | ARG LLM_MODELS=""
 7 | ARG GOOGLE_CLIENT_ID=""
 8 | ARG BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true"
 9 | ARG TIME_PER_CHUNK=4
10 | ARG TIME_PER_PAGE=50
11 | ARG LARGE_FILE_SIZE=5242880
12 | ARG CHUNK_SIZE=5242880
13 | ARG CHAT_MODES=""
14 | ARG ENV="DEV"
15 | 
16 | WORKDIR /app
17 | COPY package.json yarn.lock ./
18 | RUN yarn add @neo4j-nvl/base @neo4j-nvl/react
19 | RUN yarn install
20 | COPY . ./
21 | RUN BACKEND_API_URL=$BACKEND_API_URL \
22 |     REACT_APP_SOURCES=$REACT_APP_SOURCES \
23 |     LLM_MODELS=$LLM_MODELS \
24 |     GOOGLE_CLIENT_ID=$GOOGLE_CLIENT_ID \
25 |     BLOOM_URL=$BLOOM_URL \
26 |     TIME_PER_CHUNK=$TIME_PER_CHUNK \
27 |     CHUNK_SIZE=$CHUNK_SIZE \
28 |     ENV=$ENV \
29 |     LARGE_FILE_SIZE=${LARGE_FILE_SIZE} \
30 |     CHAT_MODES=$CHAT_MODES \
31 |     yarn run build
32 | 
33 | # Step 2: Serve the application using Nginx
34 | FROM nginx:alpine
35 | COPY --from=build /app/dist /usr/share/nginx/html
36 | COPY nginx/nginx.conf /etc/nginx/conf.d/default.conf
37 | 
38 | EXPOSE 8080
39 | CMD ["nginx", "-g", "daemon off;"]
40 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/useSpeech.tsx:
--------------------------------------------------------------------------------
 1 | import { useEffect, useState } from 'react';
 2 | import { SpeechSynthesisProps, SpeechArgs } from '../types';
 3 | 
 4 | const useSpeechSynthesis = (props: SpeechSynthesisProps = {}) => {
 5 |   const { onEnd = () => {} } = props;
 6 |   const [speaking, setSpeaking] = useState(false);
 7 |   const [supported, setSupported] = useState(false);
 8 |   const handleEnd = () => {
 9 |     setSpeaking(false);
10 |     onEnd();
11 |   };
12 |   useEffect(() => {
13 |     if (typeof window !== 'undefined' && window.speechSynthesis) {
14 |       setSupported(true);
15 |     }
16 |   }, []);
17 |   const speak = (args: SpeechArgs = {}) => {
18 |     const { text = '', rate = 1, pitch = 1, volume = 1 } = args;
19 |     if (!supported) {
20 |       return;
21 |     }
22 |     setSpeaking(true);
23 |     const utterance = new SpeechSynthesisUtterance();
24 |     utterance.text = text;
25 |     utterance.onend = handleEnd;
26 |     utterance.rate = rate;
27 |     utterance.pitch = pitch;
28 |     utterance.volume = volume;
29 |     window.speechSynthesis.speak(utterance);
30 |   };
31 |   const cancel = () => {
32 |     if (!supported) {
33 |       return;
34 |     }
35 |     setSpeaking(false);
36 |     window.speechSynthesis.cancel();
37 |   };
38 |   return {
39 |     supported,
40 |     speak,
41 |     speaking,
42 |     cancel,
43 |   };
44 | };
45 | export default useSpeechSynthesis;
46 | 


--------------------------------------------------------------------------------
/frontend/src/components/UI/ButtonWithToolTip.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, Tip } from '@neo4j-ndl/react';
 2 | import React, { MouseEventHandler } from 'react';
 3 | 
 4 | const ButtonWithToolTip = ({
 5 |   text,
 6 |   children,
 7 |   onClick,
 8 |   size = 'medium',
 9 |   placement = 'bottom',
10 |   disabled = false,
11 |   className = '',
12 |   label,
13 |   loading,
14 |   fill = 'filled',
15 | }: {
16 |   text: string | React.ReactNode;
17 |   children: React.ReactNode;
18 |   onClick?: MouseEventHandler<HTMLButtonElement> | (() => void);
19 |   size?: 'small' | 'medium' | 'large';
20 |   clean?: boolean;
21 |   grouped?: boolean;
22 |   placement?: 'bottom' | 'top' | 'right' | 'left';
23 |   disabled?: boolean;
24 |   className?: string;
25 |   loading?: boolean;
26 |   label: string;
27 |   fill?: 'filled' | 'outlined' | 'text';
28 | }) => {
29 |   return (
30 |     <Tip allowedPlacements={[placement]}>
31 |       <Tip.Trigger>
32 |         <Button
33 |           aria-label={label}
34 |           size={size}
35 |           onClick={onClick}
36 |           disabled={disabled}
37 |           className={className}
38 |           loading={loading}
39 |           fill={fill}
40 |         >
41 |           {children}
42 |         </Button>
43 |       </Tip.Trigger>
44 |       <Tip.Content isPortaled={false} style={{ whiteSpace: 'nowrap' }}>
45 |         {text}
46 |       </Tip.Content>
47 |     </Tip>
48 |   );
49 | };
50 | 
51 | export default ButtonWithToolTip;
52 | 


--------------------------------------------------------------------------------
/backend/src/api_response.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def create_api_response(status,success_count=None,failed_count=None, data=None, error=None,message=None,file_source=None,file_name=None):
 4 |     """
 5 |     Create a response to be sent to the API. This is a helper function to create a JSON response that can be sent to the API.
 6 |     
 7 |     Args:
 8 |         status: The status of the API call. Should be one of the constants in this module.
 9 |         data: The data that was returned by the API call.
10 |         error: The error that was returned by the API call.
11 |         success_count: Number of files successfully processed.
12 |         failed_count: Number of files failed to process.
13 |     Returns: 
14 |       A dictionary containing the status data and error if any
15 |     """
16 |     response = {"status": status}
17 | 
18 |     # Set the data of the response
19 |     if data is not None:
20 |       response["data"] = data
21 | 
22 |     # Set the error message to the response.
23 |     if error is not None:
24 |       response["error"] = error
25 |     
26 |     if success_count is not None:
27 |       response['success_count']=success_count
28 |       response['failed_count']=failed_count
29 |     
30 |     if message is not None:
31 |       response['message']=message
32 | 
33 |     if file_source is not None:
34 |       response['file_source']=file_source
35 | 
36 |     if file_name is not None:
37 |       response['file_name']=file_name
38 |       
39 |     return response


--------------------------------------------------------------------------------
/frontend/src/assets/images/db-search.svg:
--------------------------------------------------------------------------------
1 | <svg width="48" height="48" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path d="M19.2011 19.324C8.91514 18.496 1.16714 14.86 1.16714 10.502C1.16714 5.53001 11.2411 1.50201 23.6671 1.50201C36.0931 1.50201 46.1671 5.53001 46.1671 10.502C46.1671 12.864 43.8911 15.014 40.1671 16.62" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
3 | <path d="M46.1671 22.502V10.502" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
4 | <path d="M1.16714 10.502V22.502C1.16714 26.202 6.75114 29.38 14.7311 30.764" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
5 | <path d="M1.16714 22.502V34.502C1.16714 38.86 8.91514 42.502 19.2011 43.324" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
6 | <path d="M20.6671 31.502C20.6671 34.2868 21.7734 36.9575 23.7425 38.9266C25.7117 40.8958 28.3824 42.002 31.1671 42.002C33.9519 42.002 36.6226 40.8958 38.5918 38.9266C40.5609 36.9575 41.6671 34.2868 41.6671 31.502C41.6671 28.7172 40.5609 26.0465 38.5918 24.0774C36.6226 22.1083 33.9519 21.002 31.1671 21.002C28.3824 21.002 25.7117 22.1083 23.7425 24.0774C21.7734 26.0465 20.6671 28.7172 20.6671 31.502V31.502Z" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
7 | <path d="M46.1671 46.502L38.5911 38.926" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
8 | </svg>
9 | 


--------------------------------------------------------------------------------
/frontend/src/HOC/CustomModal.tsx:
--------------------------------------------------------------------------------
 1 | import { Banner, Button, Dialog } from '@neo4j-ndl/react';
 2 | import { CustomModalProps } from '../types';
 3 | import { buttonCaptions } from '../utils/Constants';
 4 | 
 5 | const CustomModal: React.FC<CustomModalProps> = ({
 6 |   open,
 7 |   onClose,
 8 |   children,
 9 |   submitLabel = buttonCaptions.submit,
10 |   submitHandler,
11 |   statusMessage,
12 |   status,
13 |   setStatus,
14 | }) => {
15 |   const isDisabled = status === 'danger' || status === 'info' || status === 'warning' || status === 'success';
16 |   return (
17 |     <Dialog
18 |       size='small'
19 |       open={open}
20 |       modalProps={{
21 |         id: 'default-menu',
22 |       }}
23 |       onClose={onClose}
24 |     >
25 |       <Dialog.Content className='n-flex n-flex-col n-gap-token-4 mt-6'>
26 |         {status !== 'unknown' && (
27 |           <Banner
28 |             closeable
29 |             description={statusMessage}
30 |             onClose={() => setStatus('unknown')}
31 |             type={status}
32 |             name='Custom Banner'
33 |           />
34 |         )}
35 |         <div className='n-flex n-flex-row n-flex-wrap'>{children}</div>
36 |         <Dialog.Actions className='mt-4'>
37 |           <Button onClick={submitHandler} size='medium' disabled={isDisabled}>
38 |             {submitLabel}
39 |           </Button>
40 |         </Dialog.Actions>
41 |       </Dialog.Content>
42 |     </Dialog>
43 |   );
44 | };
45 | 
46 | export default CustomModal;
47 | 


--------------------------------------------------------------------------------
/frontend/src/services/PollingAPI.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { PollingAPI_Response, statusupdate } from '../types';
 4 | 
 5 | export default async function subscribe(
 6 |   fileName: string,
 7 |   uri: string,
 8 |   username: string,
 9 |   database: string,
10 |   password: string,
11 |   datahandler: (i: statusupdate) => void,
12 |   progressHandler: (i: statusupdate) => void
13 | ) {
14 |   let encodedstr = password ? btoa(password) : '';
15 | 
16 |   const MAX_POLLING_ATTEMPTS = 10;
17 |   let pollingAttempts = 0;
18 |   let delay = 2000;
19 | 
20 |   while (pollingAttempts < MAX_POLLING_ATTEMPTS) {
21 |     let currentdelay = delay;
22 |     let response: PollingAPI_Response = await axios.get(
23 |       `${url()}/document_status/${fileName}?url=${uri}&userName=${username}&password=${encodedstr}&database=${database}`
24 |     );
25 | 
26 |     if (response.data?.file_name?.status === 'Processing') {
27 |       progressHandler(response.data);
28 |       await new Promise((resolve) => {
29 |         setTimeout(resolve, currentdelay);
30 |       });
31 |       delay *= 2;
32 |       pollingAttempts++;
33 |     } else if (response.status !== 200) {
34 |       throw new Error(
35 |         JSON.stringify({ fileName, message: `Failed To Process ${fileName} or LLM Unable To Parse Content` })
36 |       );
37 |     } else {
38 |       datahandler(response.data);
39 |       return;
40 |     }
41 |   }
42 | 
43 |   throw new Error(`Polling for ${fileName} timed out after ${MAX_POLLING_ATTEMPTS} attempts.`);
44 | }
45 | 


--------------------------------------------------------------------------------
/backend/example.env:
--------------------------------------------------------------------------------
 1 | OPENAI_API_KEY = ""
 2 | DIFFBOT_API_KEY = ""
 3 | GROQ_API_KEY = ""
 4 | EMBEDDING_MODEL = "all-MiniLM-L6-v2"
 5 | IS_EMBEDDING = "true"
 6 | KNN_MIN_SCORE = "0.94"
 7 | # Enable Gemini (default is False) | Can be False or True
 8 | GEMINI_ENABLED = False
 9 | # Enable Google Cloud logs (default is False) | Can be False or True
10 | GCP_LOG_METRICS_ENABLED = False
11 | NUMBER_OF_CHUNKS_TO_COMBINE = 6
12 | UPDATE_GRAPH_CHUNKS_PROCESSED = 20
13 | NEO4J_URI = ""
14 | NEO4J_USERNAME = ""
15 | NEO4J_PASSWORD = ""
16 | NEO4J_DATABASE = ""
17 | AWS_ACCESS_KEY_ID =  ""
18 | AWS_SECRET_ACCESS_KEY = ""
19 | LANGCHAIN_API_KEY = ""
20 | LANGCHAIN_PROJECT = ""
21 | LANGCHAIN_TRACING_V2 = ""
22 | LANGCHAIN_ENDPOINT = ""
23 | GCS_FILE_CACHE = "" #save the file into GCS or local, SHould be True or False
24 | NEO4J_USER_AGENT=""
25 | ENABLE_USER_AGENT = ""
26 | LLM_MODEL_CONFIG_model_version=""
27 | ENTITY_EMBEDDING="" True or False
28 | #examples
29 | LLM_MODEL_CONFIG_azure_ai_gpt_35="azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version"
30 | LLM_MODEL_CONFIG_azure_ai_gpt_4o="gpt-4o,https://YOUR-ENDPOINT.openai.azure.com/,azure_api_key,api_version"
31 | LLM_MODEL_CONFIG_groq_llama3_70b="model_name,base_url,groq_api_key"
32 | LLM_MODEL_CONFIG_anthropic_claude_3_5_sonnet="model_name,anthropic_api_key"
33 | LLM_MODEL_CONFIG_fireworks_llama_v3_70b="model_name,fireworks_api_key"
34 | LLM_MODEL_CONFIG_bedrock_claude_3_5_sonnet="model_name,aws_access_key_id,aws_secret__access_key,region_name"
35 | LLM_MODEL_CONFIG_ollama_llama3="model_name,model_local_url"
36 | 
37 | 


--------------------------------------------------------------------------------
/frontend/src/assets/ChatbotMessages.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "listMessages": [
 3 |     {
 4 |       "id": 1,
 5 |       "message": "Hi, I need help with creating a Cypher query for Neo4j.",
 6 |       "user": "user",
 7 |       "datetime": "01/01/2024 00:00:00"
 8 |     },
 9 |     {
10 |       "id": 2,
11 |       "message": " Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed.",
12 |       "user": "chatbot",
13 |       "datetime": "01/01/2024 00:00:00"
14 |     },
15 |     {
16 |       "id": 3,
17 |       "message": "I need to find all employees who work in the IT department.",
18 |       "user": "user",
19 |       "datetime": "01/01/2024 00:00:00"
20 |     },
21 |     {
22 |       "id": 4,
23 |       "message": "Alright, you can use the following query: `MATCH (e:Employee)-[:WORKS_IN]->(d:Department {name: 'IT'}) RETURN e.name`. This query matches nodes labeled 'Employee' related to the 'IT' department and returns their names.",
24 |       "user": "chatbot",
25 |       "datetime": "01/01/2024 00:00:00"
26 |     },
27 |     {
28 |       "id": 5,
29 |       "message": "Thanks! And how do I get the total number of such employees?",
30 |       "user": "user",
31 |       "datetime": "01/01/2024 00:00:00"
32 |     },
33 |     {
34 |       "id": 6,
35 |       "message": "To get the count, use: `MATCH (e:Employee)-[:WORKS_IN]->(d:Department {name: 'IT'}) RETURN count(e)`. This counts all the distinct 'Employee' nodes related to the 'IT' department.",
36 |       "user": "chatbot",
37 |       "datetime": "01/01/2024 00:00:00"
38 |     }
39 |   ]
40 | }


--------------------------------------------------------------------------------
/frontend/src/components/UI/ErrroBoundary.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { Banner } from '@neo4j-ndl/react';
 3 | 
 4 | export default class ErrorBoundary extends React.Component<any, any> {
 5 |   state = { hasError: false, errorMessage: '' };
 6 | 
 7 |   static getDerivedStateFromError(_error: unknown) {
 8 |     return { hasError: true };
 9 |   }
10 | 
11 |   componentDidCatch(error: Error, errorInfo: any) {
12 |     this.setState({ ...this.state, errorMessage: error.message });
13 |     console.log({ error });
14 |     console.log({ errorInfo });
15 |   }
16 | 
17 |   render() {
18 |     if (this.state.hasError) {
19 |       return (
20 |         <div className='n-size-full n-flex n-flex-col n-items-center n-justify-center n-rounded-md n-bg-palette-neutral-bg-weak n-box-border'>
21 |           <Banner
22 |             icon
23 |             type='info'
24 |             description={
25 |               this.state.errorMessage === 'Missing required parameter client_id.'
26 |                 ? 'Please Provide The Google Client ID For GCS Source'
27 |                 : 'Sorry there was a problem loading this page'
28 |             }
29 |             title='Something went wrong'
30 |             floating
31 |             className='mt-8'
32 |             actions={[
33 |               {
34 |                 label: 'Documentation',
35 |                 href: 'https://github.com/neo4j-labs/llm-graph-builder',
36 |                 target: '_blank',
37 |               },
38 |             ]}
39 |           ></Banner>
40 |         </div>
41 |       );
42 |     }
43 |     return this.props.children;
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/frontend/src/assets/images/youtube.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
 3 | <svg width="800px" height="800px" viewBox="0 -7 48 48" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 4 |     
 5 |     <title>Youtube-color</title>
 6 |     <desc>Created with Sketch.</desc>
 7 |     <defs>
 8 | 
 9 | </defs>
10 |     <g id="Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
11 |         <g id="Color-" transform="translate(-200.000000, -368.000000)" fill="#CE1312">
12 |             <path d="M219.044,391.269916 L219.0425,377.687742 L232.0115,384.502244 L219.044,391.269916 Z M247.52,375.334163 C247.52,375.334163 247.0505,372.003199 245.612,370.536366 C243.7865,368.610299 241.7405,368.601235 240.803,368.489448 C234.086,368 224.0105,368 224.0105,368 L223.9895,368 C223.9895,368 213.914,368 207.197,368.489448 C206.258,368.601235 204.2135,368.610299 202.3865,370.536366 C200.948,372.003199 200.48,375.334163 200.48,375.334163 C200.48,375.334163 200,379.246723 200,383.157773 L200,386.82561 C200,390.73817 200.48,394.64922 200.48,394.64922 C200.48,394.64922 200.948,397.980184 202.3865,399.447016 C204.2135,401.373084 206.612,401.312658 207.68,401.513574 C211.52,401.885191 224,402 224,402 C224,402 234.086,401.984894 240.803,401.495446 C241.7405,401.382148 243.7865,401.373084 245.612,399.447016 C247.0505,397.980184 247.52,394.64922 247.52,394.64922 C247.52,394.64922 248,390.73817 248,386.82561 L248,383.157773 C248,379.246723 247.52,375.334163 247.52,375.334163 L247.52,375.334163 Z" id="Youtube">
13 | 
14 | </path>
15 |         </g>
16 |     </g>
17 | </svg>


--------------------------------------------------------------------------------
/frontend/src/assets/images/youtube-lightmode.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?><!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
2 | <svg fill="#000000" width="800px" height="800px" viewBox="0 -4 32 32" xmlns="http://www.w3.org/2000/svg" preserveAspectRatio="xMidYMid">
3 |     <path d="M30.722,20.579 C30.137,21.894 28.628,23.085 27.211,23.348 C27.066,23.375 23.603,24.000 16.010,24.000 L15.990,24.000 C8.398,24.000 4.932,23.375 4.788,23.349 C3.371,23.085 1.861,21.894 1.275,20.578 C1.223,20.461 0.001,17.647 0.001,12.000 C0.001,6.353 1.223,3.538 1.275,3.421 C1.861,2.105 3.371,0.915 4.788,0.652 C4.932,0.625 8.398,-0.000 15.990,-0.000 C23.603,-0.000 27.066,0.625 27.210,0.651 C28.628,0.915 30.137,2.105 30.723,3.420 C30.775,3.538 32.000,6.353 32.000,12.000 C32.000,17.647 30.775,20.461 30.722,20.579 ZM28.893,4.230 C28.581,3.529 27.603,2.759 26.845,2.618 C26.813,2.612 23.386,2.000 16.010,2.000 C8.615,2.000 5.185,2.612 5.152,2.618 C4.394,2.759 3.417,3.529 3.104,4.234 C3.094,4.255 2.002,6.829 2.002,12.000 C2.002,17.170 3.094,19.744 3.106,19.770 C3.417,20.471 4.394,21.241 5.153,21.382 C5.185,21.388 8.615,22.000 15.990,22.000 L16.010,22.000 C23.386,22.000 26.813,21.388 26.846,21.382 C27.604,21.241 28.581,20.471 28.894,19.766 C28.904,19.744 29.998,17.170 29.998,12.000 C29.998,6.830 28.904,4.255 28.893,4.230 ZM13.541,17.846 C13.379,17.949 13.193,18.000 13.008,18.000 C12.842,18.000 12.676,17.959 12.525,17.875 C12.206,17.699 12.008,17.364 12.008,17.000 L12.008,7.000 C12.008,6.637 12.204,6.303 12.521,6.127 C12.838,5.950 13.227,5.958 13.534,6.149 L21.553,11.105 C21.846,11.286 22.026,11.606 22.027,11.951 C22.028,12.296 21.852,12.618 21.560,12.801 L13.541,17.846 ZM14.009,8.794 L14.009,15.189 L19.137,11.963 L14.009,8.794 Z"/>
4 | </svg>


--------------------------------------------------------------------------------
/frontend/src/components/ChatBot/ChatModeToggle.tsx:
--------------------------------------------------------------------------------
 1 | import { StatusIndicator } from '@neo4j-ndl/react';
 2 | import { useMemo } from 'react';
 3 | import { useFileContext } from '../../context/UsersFiles';
 4 | import CustomMenu from '../UI/Menu';
 5 | import { chatModes } from '../../utils/Constants';
 6 | import { capitalize } from '@mui/material';
 7 | 
 8 | export default function ChatModeToggle({
 9 |   menuAnchor,
10 |   closeHandler = () => {},
11 |   open,
12 |   anchorPortal = true,
13 |   disableBackdrop = false,
14 | }: {
15 |   menuAnchor: HTMLElement | null;
16 |   closeHandler?: () => void;
17 |   open: boolean;
18 |   anchorPortal?: boolean;
19 |   disableBackdrop?: boolean;
20 | }) {
21 |   const { setchatMode, chatMode } = useFileContext();
22 | 
23 |   return (
24 |     <CustomMenu
25 |       closeHandler={closeHandler}
26 |       open={open}
27 |       MenuAnchor={menuAnchor}
28 |       anchorPortal={anchorPortal}
29 |       disableBackdrop={disableBackdrop}
30 |       items={useMemo(
31 |         () =>
32 |           chatModes?.map((m) => {
33 |             return {
34 |               title: m.includes('+') ? 'Graph + Vector' : capitalize(m),
35 |               onClick: () => {
36 |                 setchatMode(m);
37 |               },
38 |               disabledCondition: false,
39 |               description: (
40 |                 <span>
41 |                   {chatMode === m && (
42 |                     <>
43 |                       <StatusIndicator type={`${chatMode === m ? 'success' : 'unknown'}`} /> Selected
44 |                     </>
45 |                   )}
46 |                 </span>
47 |               ),
48 |             };
49 |           }),
50 |         [chatMode, chatModes]
51 |       )}
52 |     ></CustomMenu>
53 |   );
54 | }
55 | 


--------------------------------------------------------------------------------
/frontend/src/context/Alert.tsx:
--------------------------------------------------------------------------------
 1 | import { createContext, useState, useContext, FunctionComponent, ReactNode } from 'react';
 2 | import { alertStateType } from '../types';
 3 | import { OverridableStringUnion } from '@mui/types';
 4 | import { AlertColor, AlertPropsColorOverrides } from '@mui/material';
 5 | 
 6 | type Props = {
 7 |   children: ReactNode;
 8 | };
 9 | 
10 | interface ContextProps {
11 |   alertState: alertStateType;
12 |   showAlert: (
13 |     alertType: OverridableStringUnion<AlertColor, AlertPropsColorOverrides> | undefined,
14 |     alertMessage: string
15 |   ) => void;
16 |   closeAlert: () => void;
17 | }
18 | export const alertContext = createContext<ContextProps>({
19 |   alertState: { showAlert: false, alertMessage: '', alertType: 'info' },
20 |   closeAlert: () => {},
21 |   showAlert: () => {},
22 | });
23 | export const useAlertContext = () => {
24 |   const alertCtx = useContext(alertContext);
25 |   return alertCtx;
26 | };
27 | const AlertContextWrapper: FunctionComponent<Props> = (props) => {
28 |   const [alertState, setAlertState] = useState<alertStateType>({
29 |     showAlert: false,
30 |     alertMessage: '',
31 |     alertType: 'info',
32 |   });
33 |   const showAlert = (
34 |     alertType: OverridableStringUnion<AlertColor, AlertPropsColorOverrides> | undefined,
35 |     alertMessage: string
36 |   ) => {
37 |     setAlertState({
38 |       showAlert: true,
39 |       alertType,
40 |       alertMessage,
41 |     });
42 |   };
43 |   const closeAlert = () => {
44 |     setAlertState({
45 |       showAlert: false,
46 |       alertType: 'info',
47 |       alertMessage: '',
48 |     });
49 |   };
50 |   const value = {
51 |     alertState,
52 |     showAlert,
53 |     closeAlert,
54 |   };
55 |   return <alertContext.Provider value={value}>{props.children}</alertContext.Provider>;
56 | };
57 | export default AlertContextWrapper;
58 | 


--------------------------------------------------------------------------------
/backend/src/create_chunks.py:
--------------------------------------------------------------------------------
 1 | from langchain_text_splitters import TokenTextSplitter
 2 | from langchain.docstore.document import Document
 3 | from langchain_community.graphs import Neo4jGraph
 4 | import logging
 5 | import os
 6 | from src.document_sources.youtube import get_chunks_with_timestamps
 7 | 
 8 | logging.basicConfig(format="%(asctime)s - %(message)s", level="INFO")
 9 | 
10 | 
11 | class CreateChunksofDocument:
12 |     def __init__(self, pages: list[Document], graph: Neo4jGraph):
13 |         self.pages = pages
14 |         self.graph = graph
15 | 
16 |     def split_file_into_chunks(self):
17 |         """
18 |         Split a list of documents(file pages) into chunks of fixed size.
19 | 
20 |         Args:
21 |             pages: A list of pages to split. Each page is a list of text strings.
22 | 
23 |         Returns:
24 |             A list of chunks each of which is a langchain Document.
25 |         """
26 |         logging.info("Split file into smaller chunks")
27 |         # number_of_chunks_allowed = int(os.environ.get('NUMBER_OF_CHUNKS_ALLOWED'))
28 |         text_splitter = TokenTextSplitter(chunk_size=200, chunk_overlap=20)
29 |         if 'page' in self.pages[0].metadata:
30 |             chunks = []
31 |             for i, document in enumerate(self.pages):
32 |                 page_number = i + 1
33 |                 for chunk in text_splitter.split_documents([document]):
34 |                     chunks.append(Document(page_content=chunk.page_content, metadata={'page_number':page_number}))    
35 |         
36 |         elif 'length' in self.pages[0].metadata:
37 |             chunks_without_timestamps = text_splitter.split_documents(self.pages)
38 |             chunks = get_chunks_with_timestamps(chunks_without_timestamps, self.pages[0].metadata['source'])
39 |         else:
40 |             chunks = text_splitter.split_documents(self.pages)
41 |         return chunks


--------------------------------------------------------------------------------
/frontend/src/assets/images/youtube-darkmode.svg:
--------------------------------------------------------------------------------
1 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
2 | <!-- Uploaded to: SVG Repo, www.svgrepo.com, Transformed by: SVG Repo Mixer Tools -->
3 | <svg fill="#ffffff" width="800px" height="800px" viewBox="0 -4 32 32" xmlns="http://www.w3.org/2000/svg" preserveAspectRatio="xMidYMid">
4 | <g id="SVGRepo_bgCarrier" stroke-width="0"/>
5 | <g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"/>
6 | <g id="SVGRepo_iconCarrier"> <path d="M30.722,20.579 C30.137,21.894 28.628,23.085 27.211,23.348 C27.066,23.375 23.603,24.000 16.010,24.000 L15.990,24.000 C8.398,24.000 4.932,23.375 4.788,23.349 C3.371,23.085 1.861,21.894 1.275,20.578 C1.223,20.461 0.001,17.647 0.001,12.000 C0.001,6.353 1.223,3.538 1.275,3.421 C1.861,2.105 3.371,0.915 4.788,0.652 C4.932,0.625 8.398,-0.000 15.990,-0.000 C23.603,-0.000 27.066,0.625 27.210,0.651 C28.628,0.915 30.137,2.105 30.723,3.420 C30.775,3.538 32.000,6.353 32.000,12.000 C32.000,17.647 30.775,20.461 30.722,20.579 ZM28.893,4.230 C28.581,3.529 27.603,2.759 26.845,2.618 C26.813,2.612 23.386,2.000 16.010,2.000 C8.615,2.000 5.185,2.612 5.152,2.618 C4.394,2.759 3.417,3.529 3.104,4.234 C3.094,4.255 2.002,6.829 2.002,12.000 C2.002,17.170 3.094,19.744 3.106,19.770 C3.417,20.471 4.394,21.241 5.153,21.382 C5.185,21.388 8.615,22.000 15.990,22.000 L16.010,22.000 C23.386,22.000 26.813,21.388 26.846,21.382 C27.604,21.241 28.581,20.471 28.894,19.766 C28.904,19.744 29.998,17.170 29.998,12.000 C29.998,6.830 28.904,4.255 28.893,4.230 ZM13.541,17.846 C13.379,17.949 13.193,18.000 13.008,18.000 C12.842,18.000 12.676,17.959 12.525,17.875 C12.206,17.699 12.008,17.364 12.008,17.000 L12.008,7.000 C12.008,6.637 12.204,6.303 12.521,6.127 C12.838,5.950 13.227,5.958 13.534,6.149 L21.553,11.105 C21.846,11.286 22.026,11.606 22.027,11.951 C22.028,12.296 21.852,12.618 21.560,12.801 L13.541,17.846 ZM14.009,8.794 L14.009,15.189 L19.137,11.963 L14.009,8.794 Z"/> </g>
7 | </svg>


--------------------------------------------------------------------------------
/backend/src/generate_graphDocuments_from_llm.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.graphs import Neo4jGraph
 2 | from src.diffbot_transformer import get_graph_from_diffbot
 3 | from src.openAI_llm import get_graph_from_OpenAI
 4 | from src.gemini_llm import get_graph_from_Gemini
 5 | from typing import List
 6 | import logging
 7 | from src.shared.constants import *
 8 | import os
 9 | from src.llm import get_graph_from_llm
10 | 
11 | logging.basicConfig(format="%(asctime)s - %(message)s", level="INFO")
12 | 
13 | 
14 | def generate_graphDocuments(model: str, graph: Neo4jGraph, chunkId_chunkDoc_list: List, allowedNodes=None, allowedRelationship=None):
15 |     
16 |     if  allowedNodes is None or allowedNodes=="":
17 |         allowedNodes =[]
18 |     else:
19 |         allowedNodes = allowedNodes.split(',')    
20 |     if  allowedRelationship is None or allowedRelationship=="":   
21 |         allowedRelationship=[]
22 |     else:
23 |         allowedRelationship = allowedRelationship.split(',')
24 |     
25 |     logging.info(f"allowedNodes: {allowedNodes}, allowedRelationship: {allowedRelationship}")
26 | 
27 |     graph_documents = []
28 |     if model == "diffbot":
29 |         graph_documents = get_graph_from_diffbot(graph, chunkId_chunkDoc_list)
30 | 
31 |     elif model in OPENAI_MODELS:
32 |         graph_documents = get_graph_from_OpenAI(model, graph, chunkId_chunkDoc_list, allowedNodes, allowedRelationship)
33 | 
34 |     elif model in GEMINI_MODELS:
35 |         graph_documents = get_graph_from_Gemini(model, graph, chunkId_chunkDoc_list, allowedNodes, allowedRelationship)
36 | 
37 |     # elif model in GROQ_MODELS :
38 |     #     graph_documents = get_graph_from_Groq_Llama3(MODEL_VERSIONS[model], graph, chunkId_chunkDoc_list, allowedNodes, allowedRelationship)
39 |     
40 |     else : 
41 |         graph_documents = get_graph_from_llm(model,chunkId_chunkDoc_list, allowedNodes, allowedRelationship) 
42 | 
43 |     logging.info(f"graph_documents = {len(graph_documents)}")
44 |     return graph_documents
45 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "neo4j-needle-starterkit",
 3 |   "private": true,
 4 |   "version": "1.0.0",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite --host 0.0.0.0",
 8 |     "build": "tsc && vite build",
 9 |     "format": "prettier --write \"**/*.{ts,tsx}\"",
10 |     "lint": "eslint --ext .ts --ext .tsx . --report-unused-disable-directives --max-warnings 0",
11 |     "preview": "vite preview"
12 |   },
13 |   "dependencies": {
14 |     "@emotion/styled": "^11.11.0",
15 |     "@mui/material": "^5.15.10",
16 |     "@mui/styled-engine": "^5.15.9",
17 |     "@neo4j-devtools/word-color": "^0.0.8",
18 |     "@neo4j-ndl/base": "^2.11.6",
19 |     "@neo4j-ndl/react": "^2.15.10",
20 |     "@neo4j-nvl/base": "^0.3.1",
21 |     "@neo4j-nvl/react": "^0.3.1",
22 |     "@react-oauth/google": "^0.12.1",
23 |     "@types/uuid": "^9.0.7",
24 |     "axios": "^1.6.5",
25 |     "clsx": "^2.1.1",
26 |     "eslint-plugin-react": "^7.33.2",
27 |     "neo4j-driver": "^5.14.0",
28 |     "re-resizable": "^6.9.16",
29 |     "react": "^18.3.1",
30 |     "react-dom": "^18.3.1",
31 |     "react-icons": "^5.2.1",
32 |     "react-markdown": "^9.0.1",
33 |     "react-router": "^6.23.1",
34 |     "react-router-dom": "^6.23.1",
35 |     "remark-gfm": "^4.0.0",
36 |     "tailwind-merge": "^2.3.0",
37 |     "uuid": "^9.0.1"
38 |   },
39 |   "devDependencies": {
40 |     "@types/node": "^20.11.10",
41 |     "@types/react": "^18.2.15",
42 |     "@types/react-dom": "^18.2.7",
43 |     "@typescript-eslint/eslint-plugin": "^6.0.0",
44 |     "@typescript-eslint/parser": "^6.0.0",
45 |     "@vitejs/plugin-react": "^4.0.3",
46 |     "autoprefixer": "^10.4.17",
47 |     "eslint": "^8.45.0",
48 |     "eslint-config-prettier": "^8.5.0",
49 |     "eslint-plugin-react-hooks": "^4.6.0",
50 |     "eslint-plugin-react-refresh": "^0.4.3",
51 |     "postcss": "^8.4.33",
52 |     "prettier": "^2.7.1",
53 |     "react-dropzone": "^14.2.3",
54 |     "tailwindcss": "^3.4.1",
55 |     "typescript": "^5.0.2",
56 |     "vite": "^4.5.3"
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/frontend/src/context/ThemeWrapper.tsx:
--------------------------------------------------------------------------------
 1 | import { ReactNode, createContext, useMemo, useState } from 'react';
 2 | import { NeedleThemeProvider, useMediaQuery } from '@neo4j-ndl/react';
 3 | 
 4 | export const ThemeWrapperContext = createContext({
 5 |   toggleColorMode: () => {},
 6 |   colorMode: localStorage.getItem('mode') as 'light' | 'dark',
 7 | });
 8 | 
 9 | interface ThemeWrapperProps {
10 |   children: ReactNode;
11 | }
12 | const ThemeWrapper = ({ children }: ThemeWrapperProps) => {
13 |   const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
14 |   // @ts-ignore
15 |   const defaultMode: 'light' | 'dark' = localStorage.getItem('mode');
16 |   const [mode, setMode] = useState<'light' | 'dark'>(prefersDarkMode ? 'dark' : defaultMode ?? 'light');
17 |   const [usingPreferredMode, setUsingPreferredMode] = useState<boolean>(true);
18 |   const themeWrapperUtils = useMemo(
19 |     () => ({
20 |       colorMode: mode,
21 |       toggleColorMode: () => {
22 |         setMode((prevMode) => {
23 |           setUsingPreferredMode(false);
24 |           localStorage.setItem('mode', prevMode === 'light' ? 'dark' : 'light');
25 |           themeBodyInjection(prevMode);
26 |           return prevMode === 'light' ? 'dark' : 'light';
27 |         });
28 |       },
29 |     }),
30 |     [mode]
31 |   );
32 |   const themeBodyInjection = (mode: string) => {
33 |     if (mode === 'light') {
34 |       document.body.classList.add('ndl-theme-dark');
35 |     } else {
36 |       document.body.classList.remove('ndl-theme-dark');
37 |     }
38 |   };
39 | 
40 |   if (usingPreferredMode) {
41 |     prefersDarkMode ? themeBodyInjection('light') : themeBodyInjection('dark');
42 |   }
43 | 
44 |   return (
45 |     <ThemeWrapperContext.Provider value={themeWrapperUtils}>
46 |       <NeedleThemeProvider theme={mode as 'light' | 'dark' | undefined} wrapperProps={{ isWrappingChildren: true }}>
47 |         {children}
48 |       </NeedleThemeProvider>
49 |     </ThemeWrapperContext.Provider>
50 |   );
51 | };
52 | export default ThemeWrapper;
53 | 


--------------------------------------------------------------------------------
/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, Checkbox, Dialog } from '@neo4j-ndl/react';
 2 | import { useState } from 'react';
 3 | export default function DeletePopUp({
 4 |   open,
 5 |   no_of_files,
 6 |   deleteHandler,
 7 |   deleteCloseHandler,
 8 |   loading,
 9 |   view,
10 | }: {
11 |   open: boolean;
12 |   no_of_files: number;
13 |   deleteHandler: (delentities: boolean) => void;
14 |   deleteCloseHandler: () => void;
15 |   loading: boolean;
16 |   view?: 'contentView' | 'settingsView';
17 | }) {
18 |   const [deleteEntities, setDeleteEntities] = useState<boolean>(true);
19 |   const message =
20 |     view === 'contentView'
21 |       ? `Are you sure you want to permanently delete ${no_of_files} ${no_of_files > 1 ? 'Files' : 'File'} ${
22 |           deleteEntities ? 'and associated entities' : ''
23 |         } from the graph database?`
24 |       : `Are you sure you want to permanently delete ${no_of_files} ${
25 |           no_of_files > 1 ? 'Nodes' : 'Node'
26 |         } from the graph database? `;
27 |   return (
28 |     <Dialog open={open} onClose={deleteCloseHandler}>
29 |       <Dialog.Content>
30 |         <h5 className='max-w-[90%]'>{message}</h5>
31 |         {view === 'contentView' && (
32 |           <div className='mt-5'>
33 |             <Checkbox
34 |               label='Delete Entities'
35 |               checked={deleteEntities}
36 |               onChange={(e) => {
37 |                 if (e.target.checked) {
38 |                   setDeleteEntities(true);
39 |                 } else {
40 |                   setDeleteEntities(false);
41 |                 }
42 |               }}
43 |             />
44 |           </div>
45 |         )}
46 |       </Dialog.Content>
47 |       <Dialog.Actions className='mt-3'>
48 |         <Button onClick={deleteCloseHandler} size='large' disabled={loading}>
49 |           Cancel
50 |         </Button>
51 |         <Button onClick={() => deleteHandler(deleteEntities)} size='large' loading={loading}>
52 |           Continue
53 |         </Button>
54 |       </Dialog.Actions>
55 |     </Dialog>
56 |   );
57 | }


--------------------------------------------------------------------------------
/frontend/src/components/WebSources/CustomSourceInput.tsx:
--------------------------------------------------------------------------------
 1 | import { Banner, Box, Button, Flex, TextInput } from '@neo4j-ndl/react';
 2 | import { CustomInput } from '../../types';
 3 | 
 4 | export default function CustomSourceInput({
 5 |   value,
 6 |   label,
 7 |   placeHolder,
 8 |   onChangeHandler,
 9 |   submitHandler,
10 |   disabledCheck,
11 |   onCloseHandler,
12 |   id,
13 |   onBlurHandler,
14 |   status,
15 |   setStatus,
16 |   statusMessage,
17 |   isValid,
18 |   isFocused,
19 |   onPasteHandler,
20 | }: CustomInput) {
21 |   return (
22 |     <Flex gap='6'>
23 |       {status !== 'unknown' && (
24 |         <Box>
25 |           <Banner
26 |             closeable
27 |             description={statusMessage}
28 |             onClose={() => setStatus('unknown')}
29 |             type={status}
30 |             name='Custom Banner'
31 |             className='text-lg font-semibold'
32 |           />
33 |         </Box>
34 |       )}
35 |       <Box>
36 |         <div className='w-full inline-block'>
37 |           <TextInput
38 |             id={id}
39 |             value={value}
40 |             disabled={false}
41 |             label={label}
42 |             aria-label={label}
43 |             placeholder={placeHolder}
44 |             onBlur={onBlurHandler}
45 |             autoFocus
46 |             fluid
47 |             required
48 |             onChange={onChangeHandler}
49 |             errorText={!isValid && isFocused && 'Please Fill The Valid URL'}
50 |             onPaste={onPasteHandler}
51 |           />
52 |         </div>
53 |       </Box>
54 |       <Flex flexDirection='row' justifyContent='flex-end'>
55 |         <div>
56 |           <Button
57 |             disabled={value.trim() === ''}
58 |             color='neutral'
59 |             fill='outlined'
60 |             onClick={onCloseHandler}
61 |             size='medium'
62 |             className='mr-4'
63 |           >
64 |             Reset
65 |           </Button>
66 |           <Button onClick={() => submitHandler(value)} size='medium' disabled={disabledCheck}>
67 |             Submit
68 |           </Button>
69 |         </div>
70 |       </Flex>
71 |     </Flex>
72 |   );
73 | }
74 | 


--------------------------------------------------------------------------------
/frontend/src/components/WebSources/GenericSourceButton.tsx:
--------------------------------------------------------------------------------
 1 | import CustomButton from '../UI/CustomButton';
 2 | import internet from '../../assets/images/web-search-svgrepo-com.svg';
 3 | import internetdarkmode from '../../assets/images/web-search-darkmode-final2.svg';
 4 | import { DataComponentProps } from '../../types';
 5 | import { Flex, Typography } from '@neo4j-ndl/react';
 6 | import IconButtonWithToolTip from '../UI/IconButtonToolTip';
 7 | import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons';
 8 | import { APP_SOURCES } from '../../utils/Constants';
 9 | import { useContext } from 'react';
10 | import { ThemeWrapperContext } from '../../context/ThemeWrapper';
11 | 
12 | export default function GenericButton({ openModal }: DataComponentProps) {
13 |   const themeUtils = useContext(ThemeWrapperContext);
14 | 
15 |   return (
16 |     <Flex alignItems='center' gap='4'>
17 |       <CustomButton
18 |         openModal={openModal}
19 |         logo={themeUtils.colorMode === 'dark' ? internetdarkmode : internet}
20 |         wrapperclassName='my-2'
21 |         className='webImg'
22 |       />
23 |       <Typography variant='body-small'>
24 |         <Flex gap='0'>
25 |           <span>Web Sources</span>
26 |           <div className='align-self-center flex justify-center'>
27 |             <IconButtonWithToolTip
28 |               label='Source info'
29 |               clean
30 |               text={
31 |                 <Typography variant='body-small'>
32 |                   <Flex gap='3' alignItems='flex-start'>
33 |                     {APP_SOURCES != undefined && APP_SOURCES.includes('youtube') && <span>Youtube</span>}
34 |                     {APP_SOURCES != undefined && APP_SOURCES.includes('wiki') && <span>Wikipedia</span>}
35 |                     {APP_SOURCES != undefined && APP_SOURCES.includes('web') && <span>Website</span>}
36 |                   </Flex>
37 |                 </Typography>
38 |               }
39 |             >
40 |               <InformationCircleIconOutline className='w-[22px] h-[22px]' />
41 |             </IconButtonWithToolTip>
42 |           </div>
43 |         </Flex>
44 |       </Typography>
45 |     </Flex>
46 |   );
47 | }
48 | 


--------------------------------------------------------------------------------
/backend/README.md:
--------------------------------------------------------------------------------
 1 | # Project Overview
 2 | Welcome to our project! This project is built using FastAPI framework to create a fast and modern API with Python.
 3 | 
 4 | ## Feature
 5 | API Endpoint : This project provides various API endpoint to perform specific tasks.
 6 | Data Validation : Utilize FastAPI data validation and serialization feature.
 7 | Interactive Documentation : Access Swagger UI and ReDoc for interactive API documentation.
 8 | 
 9 | ## Getting Started 
10 | 
11 | Follow these steps to set up and run the project locally:
12 | 
13 | 1. Clone the Repository:
14 | 
15 | > git clone https://github.com/neo4j-labs/llm-graph-builder.git
16 | 
17 | > cd llm-graph-builder
18 | 
19 | 2. Install Dependency :
20 | 
21 | > pip install -t requirements.txt
22 | 
23 | ## Run backend project using unicorn
24 | Run the server:
25 | > uvicorn score:app --reload
26 | 
27 | ## Run project using docker
28 | ## prerequisite 
29 | Before proceeding, ensure the following software is installed on your machine
30 | 
31 | Docker: https://www.docker.com/
32 | 
33 | 1. Build the docker image
34 |    > docker build -t your_image_name .
35 |    
36 |    Replace `your_image_name` with the meaningful name for your Docker image
37 | 
38 | 2. Run the Docker Container
39 |    > docker run -it -p 8000:8000 your_image_name
40 |    
41 |    Replace `8000` with the desired port.
42 | 
43 | ## Access the API Documentation
44 | Open your browser and navigate to
45 | http://127.0.0.1:8000/docs for Swagger UI or
46 | http://127.0.0.1:8000/redocs for ReDoc.
47 | 
48 | ## Project Structure
49 | `score.py`: Score entry point for FastAPI application
50 | 
51 | ## Configuration
52 | 
53 | Update the environment variable in `.env` file.
54 | 
55 | `OPENAI_API_KEY`: Open AI key to use LLM
56 | 
57 | `DIFFBOT_API_KEY` : Diffbot API key to use DiffbotGraphTransformer
58 | 
59 | `NEO4J_URI` : Neo4j URL
60 | 
61 | `NEO4J_USERNAME` : Neo4J database username
62 | 
63 | `NEO4J_PASSWORD` : Neo4j database user password
64 | 
65 | `AWS_ACCESS_KEY_ID` : AWS Access key ID
66 | 
67 | `AWS_SECRET_ACCESS_KEY` : AWS secret access key
68 | 
69 | 
70 | ## Contact
71 | For questions or support, feel free to contact us at christopher.crosbie@neo4j.com or michael.hunger@neo4j.com
72 | 


--------------------------------------------------------------------------------
/frontend/src/assets/images/web-search-svgrepo-com (2).svg:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 2 | 
 3 | <!-- Uploaded to: SVG Repo, www.svgrepo.com, Transformed by: SVG Repo Mixer Tools -->
 4 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="websearch" viewBox="0 0 214.6279 198.1416" enable-background="new 0 0 214.6279 198.1416" xml:space="preserve" width="800px" height="800px" fill="#1a1b1d">
 5 | 
 6 | <g id="SVGRepo_bgCarrier" stroke-width="0"/>
 7 | 
 8 | <g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"/>
 9 | 
10 | <g id="SVGRepo_iconCarrier"> 
11 | <g> 
12 | <path fill="#02507b" d="M171.748,142.3193c0-15.4375-12.5625-28-28-28s-28,12.5625-28,28s12.5625,28,28,28 S171.748,157.7568,171.748,142.3193z"/>
13 |  <path fill="#ffffff" d="M95.748,166.3193v-76h42.989c0.0146,0.7878,0.0696,1.5579,0.0696,2.3516c0,2.2109,1.7891,4,4,4 s4-1.7891,4-4c0-0.7937-0.0603-1.5632-0.0745-2.3516h25.0159h5.6484h2.3516c0-46.3164-37.6836-84-84-84h-8v0.4053 c-42.5754,4.0447-76,39.9785-76,83.5947c0,46.3164,37.6836,84,84,84c2.2109,0,4-1.7891,4-4S97.959,166.3193,95.748,166.3193z M87.748,164.9949c-19.9287-6.438-35.0586-36.3945-35.0586-72.324c0-0.7937,0.0549-1.5637,0.0696-2.3516h34.989V164.9949z M53.1655,82.3193C55.75,51.1135,69.7729,26.157,87.748,20.3506v61.9688H53.1655z M95.748,82.3193v-63.293 c21.6829,0,39.6248,27.5808,42.5825,63.293H95.748z M171.3181,82.3193h-24.9685c-2.3884-29.9812-14.96-54.8911-31.9993-65.6013 C144.7681,24.4146,167.9641,50.417,171.3181,82.3193z M77.1458,16.718C60.1064,27.4282,47.5349,52.3381,45.1465,82.3193H20.178 C23.532,50.417,46.728,24.4146,77.1458,16.718z M19.748,90.3193h25.0159c-0.0142,0.7883-0.0745,1.5579-0.0745,2.3516 c0,28.5327,9.217,53.6572,23.1233,68.2603C39.7031,149.769,19.748,122.3567,19.748,90.3193z"/> <path fill="#ffffff" d="M191.5137,184.4287l-19.665-19.665c4.9307-6.1609,7.8994-13.9573,7.8994-22.4443 c0-19.8516-16.1484-36-36-36s-36,16.1484-36,36s16.1484,36,36,36c8.4871,0,16.2834-2.9688,22.4443-7.8994l19.665,19.665 L191.5137,184.4287z M115.748,142.3193c0-15.4375,12.5625-28,28-28s28,12.5625,28,28s-12.5625,28-28,28 S115.748,157.7568,115.748,142.3193z"/> </g>
14 |     </g>
15 | 
16 | </svg>


--------------------------------------------------------------------------------
/frontend/src/services/QnaAPI.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { UserCredentials } from '../types';
 4 | 
 5 | export const chatBotAPI = async (
 6 |   userCredentials: UserCredentials,
 7 |   question: string,
 8 |   session_id: string,
 9 |   model: string,
10 |   mode: string,
11 |   document_names?: (string | undefined)[]
12 | ) => {
13 |   try {
14 |     const formData = new FormData();
15 |     formData.append('uri', userCredentials?.uri ?? '');
16 |     formData.append('database', userCredentials?.database ?? '');
17 |     formData.append('userName', userCredentials?.userName ?? '');
18 |     formData.append('password', userCredentials?.password ?? '');
19 |     formData.append('question', question);
20 |     formData.append('session_id', session_id);
21 |     formData.append('model', model);
22 |     formData.append('mode', mode);
23 |     formData.append('document_names', JSON.stringify(document_names));
24 |     const startTime = Date.now();
25 |     const response = await axios.post(`${url()}/chat_bot`, formData, {
26 |       headers: {
27 |         'Content-Type': 'multipart/form-data',
28 |       },
29 |     });
30 |     const endTime = Date.now();
31 |     const timeTaken = endTime - startTime;
32 |     return { response: response, timeTaken: timeTaken };
33 |   } catch (error) {
34 |     console.log('Error Posting the Question:', error);
35 |     throw error;
36 |   }
37 | };
38 | 
39 | export const clearChatAPI = async (userCredentials: UserCredentials, session_id: string) => {
40 |   try {
41 |     const formData = new FormData();
42 |     formData.append('uri', userCredentials?.uri ?? '');
43 |     formData.append('database', userCredentials?.database ?? '');
44 |     formData.append('userName', userCredentials?.userName ?? '');
45 |     formData.append('password', userCredentials?.password ?? '');
46 |     formData.append('session_id', session_id);
47 |     const response = await axios.post(`${url()}/clear_chat_bot`, formData, {
48 |       headers: {
49 |         'Content-Type': 'multipart/form-data',
50 |       },
51 |     });
52 |     return response;
53 |   } catch (error) {
54 |     console.log('Error Posting the Question:', error);
55 |     throw error;
56 |   }
57 | };
58 | 


--------------------------------------------------------------------------------
/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx:
--------------------------------------------------------------------------------
 1 | import { TrashIconOutline, XMarkIconOutline } from '@neo4j-ndl/react/icons';
 2 | import ChatModeToggle from './ChatModeToggle';
 3 | import { Box, IconButton } from '@neo4j-ndl/react';
 4 | import { Messages } from '../../types';
 5 | import IconButtonWithToolTip from '../UI/IconButtonToolTip';
 6 | import { tooltips } from '../../utils/Constants';
 7 | import { useState } from 'react';
 8 | import { RiChatSettingsLine } from 'react-icons/ri';
 9 | 
10 | interface IconProps {
11 |   closeChatBot: () => void;
12 |   deleteOnClick?: () => void;
13 |   messages: Messages[];
14 | }
15 | 
16 | const ExpandedChatButtonContainer: React.FC<IconProps> = ({ closeChatBot, deleteOnClick, messages }) => {
17 |   const [chatAnchor, setchatAnchor] = useState<HTMLElement | null>(null);
18 |   const [showChatModeOption, setshowChatModeOption] = useState<boolean>(false);
19 |   return (
20 |     <div className='flex items-end justify-end'>
21 |       <ChatModeToggle
22 |         closeHandler={() => setshowChatModeOption(false)}
23 |         anchorPortal={true}
24 |         disableBackdrop={true}
25 |         open={showChatModeOption}
26 |         menuAnchor={chatAnchor}
27 |       />
28 |       <Box className='!h-[48px] mx-2'>
29 |         <IconButtonWithToolTip
30 |           onClick={(e) => {
31 |             setchatAnchor(e.currentTarget);
32 |             setshowChatModeOption(true);
33 |           }}
34 |           clean
35 |           text='Chat mode'
36 |           placement='bottom'
37 |           label='Chat mode'
38 |         >
39 |           <RiChatSettingsLine />
40 |         </IconButtonWithToolTip>
41 |         <IconButtonWithToolTip
42 |           text={tooltips.clearChat}
43 |           aria-label='Remove chat history'
44 |           clean
45 |           onClick={deleteOnClick}
46 |           disabled={messages.length === 1}
47 |           placement='bottom'
48 |           label={tooltips.clearChat}
49 |         >
50 |           <TrashIconOutline />
51 |         </IconButtonWithToolTip>
52 |         <IconButton aria-label='Remove chatbot' clean onClick={closeChatBot}>
53 |           <XMarkIconOutline />
54 |         </IconButton>
55 |       </Box>
56 |     </div>
57 |   );
58 | };
59 | 
60 | export default ExpandedChatButtonContainer;
61 | 


--------------------------------------------------------------------------------
/backend/src/shared/schema_extraction.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from langchain_core.pydantic_v1 import BaseModel, Field
 3 | from src.llm import get_llm
 4 | from src.shared.constants import MODEL_VERSIONS
 5 | from langchain_core.prompts import ChatPromptTemplate
 6 | 
 7 | class Schema(BaseModel):
 8 |     """Knowledge Graph Schema."""
 9 | 
10 |     labels: List[str] = Field(description="list of node labels or types in a graph schema")
11 |     relationshipTypes: List[str] = Field(description="list of relationship types in a graph schema")
12 | 
13 | PROMPT_TEMPLATE_WITH_SCHEMA = (
14 |     "You are an expert in schema extraction, especially for extracting graph schema information from various formats."
15 |     "Generate the generalized graph schema based on input text. Identify key entities and their relationships and "
16 |     "provide a generalized label for the overall context"
17 |     "Schema representations formats can contain extra symbols, quotes, or comments. Ignore all that extra markup."
18 |     "Only return the string types for nodes and relationships. Don't return attributes."
19 | )
20 | 
21 | PROMPT_TEMPLATE_WITHOUT_SCHEMA = (
22 |     "You are an expert in schema extraction, especially for deriving graph schema information from example texts."
23 |     "Analyze the following text and extract only the types of entities and relationships from the example prose."
24 |     "Don't return any actual entities like people's names or instances of organizations."
25 |     "Only return the string types for nodes and relationships, don't return attributes."
26 | )
27 | 
28 | def schema_extraction_from_text(input_text:str, model:str, is_schema_description_cheked:bool):
29 |     
30 |     llm, model_name = get_llm(model)
31 |     if is_schema_description_cheked:
32 |         schema_prompt = PROMPT_TEMPLATE_WITH_SCHEMA
33 |     else:
34 |         schema_prompt = PROMPT_TEMPLATE_WITHOUT_SCHEMA
35 |         
36 |     prompt = ChatPromptTemplate.from_messages(
37 |     [("system", schema_prompt), ("user", "{text}")]
38 |     )
39 |     
40 |     runnable = prompt | llm.with_structured_output(
41 |         schema=Schema,
42 |         method="function_calling",
43 |         include_raw=False,
44 |     )
45 |     
46 |     raw_schema = runnable.invoke({"text": input_text})
47 |     return raw_schema


--------------------------------------------------------------------------------
/frontend/src/services/URLScan.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { url } from '../utils/Utils';
 3 | import { ScanProps, ServerResponse } from '../types';
 4 | 
 5 | const urlScanAPI = async (props: ScanProps) => {
 6 |   try {
 7 |     const formData = new FormData();
 8 |     let s3url: string = '';
 9 |     if (props.source_type === 's3 bucket') {
10 |       if (!props.urlParam?.endsWith('/')) {
11 |         s3url = `${props?.urlParam}/`;
12 |       } else {
13 |         s3url = props?.urlParam;
14 |       }
15 |     }
16 |     formData.append('uri', props?.userCredentials?.uri ?? '');
17 |     formData.append('database', props?.userCredentials?.database ?? '');
18 |     formData.append('userName', props?.userCredentials?.userName ?? '');
19 |     formData.append('password', props?.userCredentials?.password ?? '');
20 |     if (props.source_type === 's3 bucket') {
21 |       formData.append('source_url', s3url ?? '');
22 |     } else {
23 |       formData.append('source_url', props?.urlParam ?? '');
24 |     }
25 |     formData.append('wiki_query', decodeURIComponent(props?.wikiquery ?? ''));
26 |     formData.append('source_type', props?.source_type ?? '');
27 |     if (props.model != undefined) {
28 |       formData.append('model', props?.model);
29 |     }
30 |     if (props.accessKey?.length) {
31 |       formData.append('aws_access_key_id', props?.accessKey);
32 |     }
33 |     if (props?.secretKey?.length) {
34 |       formData.append('aws_secret_access_key', props?.secretKey);
35 |     }
36 |     if (props?.gcs_bucket_name) {
37 |       formData.append('gcs_bucket_name', props.gcs_bucket_name);
38 |     }
39 |     if (props?.gcs_bucket_folder) {
40 |       formData.append('gcs_bucket_folder', props.gcs_bucket_folder);
41 |     }
42 |     if (props?.gcs_project_id) {
43 |       formData.append('gcs_project_id', props.gcs_project_id);
44 |     }
45 |     if (props?.access_token) {
46 |       formData.append('access_token', props.access_token);
47 |     }
48 | 
49 |     const response: ServerResponse = await axios.post(`${url()}/url/scan`, formData, {
50 |       headers: {
51 |         'Content-Type': 'multipart/form-data',
52 |       },
53 |     });
54 |     return response;
55 |   } catch (error) {
56 |     console.log('Error uploading file:', error);
57 |     throw error;
58 |   }
59 | };
60 | 
61 | export { urlScanAPI };
62 | 


--------------------------------------------------------------------------------
/backend/src/groq_llama3_llm.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.graphs import Neo4jGraph
 2 | from dotenv import load_dotenv
 3 | import os
 4 | import logging
 5 | import concurrent.futures
 6 | from concurrent.futures import ThreadPoolExecutor
 7 | from typing import List
 8 | from langchain_experimental.graph_transformers import LLMGraphTransformer
 9 | from langchain_core.documents import Document
10 | from src.llm import get_combined_chunks, get_llm
11 | 
12 | load_dotenv()
13 | logging.basicConfig(format='%(asctime)s - %(message)s',level='INFO')
14 | 
15 | def get_graph_from_Groq_Llama3(model_version,
16 |                             graph: Neo4jGraph,
17 |                             chunkId_chunkDoc_list: List, 
18 |                             allowedNodes, 
19 |                             allowedRelationship):
20 |     """
21 |         Extract graph from Groq Llama3 and store it in database. 
22 |         This is a wrapper for extract_and_store_graph
23 |                                 
24 |         Args:
25 |             model_version : identify the model of LLM
26 |             graph: Neo4jGraph to be extracted.
27 |             chunks: List of chunk documents created from input file
28 |         Returns: 
29 |             List of langchain GraphDocument - used to generate graph
30 |     """
31 |     logging.info(f"Get graphDocuments from {model_version}")
32 |     futures = []
33 |     graph_document_list = []
34 |     combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list)
35 |     #api_key = os.environ.get('GROQ_API_KEY') 
36 |     llm,model_name = get_llm(model_version)
37 |     llm_transformer = LLMGraphTransformer(llm=llm, node_properties=["description"], allowed_nodes=allowedNodes, allowed_relationships=allowedRelationship)
38 |     
39 |     with ThreadPoolExecutor(max_workers=10) as executor:
40 |         for chunk in combined_chunk_document_list:
41 |             chunk_doc = Document(page_content= chunk.page_content.encode("utf-8"), metadata=chunk.metadata)
42 |             futures.append(executor.submit(llm_transformer.convert_to_graph_documents,[chunk_doc]))   
43 |         
44 |         for i, future in enumerate(concurrent.futures.as_completed(futures)):
45 |             graph_document = future.result()
46 |             graph_document_list.append(graph_document[0])
47 |            
48 |     return  graph_document_list


--------------------------------------------------------------------------------
/frontend/src/components/Dropdown.tsx:
--------------------------------------------------------------------------------
 1 | import { Dropdown } from '@neo4j-ndl/react';
 2 | import { DropdownProps, OptionType } from '../types';
 3 | import { useMemo } from 'react';
 4 | import { capitalize } from '../utils/Utils';
 5 | interface ReusableDropdownProps extends DropdownProps {
 6 |   options: string[] | OptionType[];
 7 |   placeholder?: string;
 8 |   defaultValue?: string;
 9 |   children?: React.ReactNode;
10 |   view?: 'ContentView' | 'GraphView';
11 |   isDisabled: boolean;
12 |   value?: OptionType;
13 | }
14 | const DropdownComponent: React.FC<ReusableDropdownProps> = ({
15 |   options,
16 |   placeholder,
17 |   defaultValue,
18 |   onSelect,
19 |   children,
20 |   view,
21 |   isDisabled,
22 |   value,
23 | }) => {
24 |   const handleChange = (selectedOption: OptionType | null | void) => {
25 |     onSelect(selectedOption);
26 |   };
27 |   const allOptions = useMemo(() => options, [options]);
28 |   return (
29 |     <>
30 |       <div className={view === 'ContentView' ? 'w-[150px]' : ''}>
31 |         <Dropdown
32 |           type='select'
33 |           aria-label='A selection dropdown'
34 |           selectProps={{
35 |             onChange: handleChange,
36 |             options: allOptions?.map((option) => {
37 |               const label =
38 |                 typeof option === 'string'
39 |                   ? (option.includes('LLM_MODEL_CONFIG_')
40 |                       ? capitalize(option.split('LLM_MODEL_CONFIG_').at(-1) as string)
41 |                       : capitalize(option)
42 |                     )
43 |                       .split('_')
44 |                       .join(' ')
45 |                   : capitalize(option.label);
46 |               const value = typeof option === 'string' ? option : option.value;
47 |               return {
48 |                 label,
49 |                 value,
50 |               };
51 |             }),
52 |             placeholder: placeholder || 'Select an option',
53 |             defaultValue: defaultValue ? { label: capitalize(defaultValue), value: defaultValue } : undefined,
54 |             menuPlacement: 'auto',
55 |             isDisabled: isDisabled,
56 |             value: value,
57 |           }}
58 |           size='medium'
59 |           fluid
60 |         />
61 |         {children}
62 |       </div>
63 |     </>
64 |   );
65 | };
66 | export default DropdownComponent;
67 | 


--------------------------------------------------------------------------------
/frontend/src/components/QuickStarter.tsx:
--------------------------------------------------------------------------------
 1 | import Header from './Layout/Header';
 2 | import React, { useState } from 'react';
 3 | import { ThemeWrapperContext } from '../context/ThemeWrapper';
 4 | import PageLayout from './Layout/PageLayout';
 5 | import { FileContextProvider } from '../context/UsersFiles';
 6 | import UserCredentialsWrapper from '../context/UserCredentials';
 7 | import AlertContextWrapper from '../context/Alert';
 8 | import { MessageContextWrapper } from '../context/UserMessages';
 9 | 
10 | const QuickStarter: React.FunctionComponent = () => {
11 |   const themeUtils = React.useContext(ThemeWrapperContext);
12 |   const [themeMode, setThemeMode] = useState<string>(themeUtils.colorMode);
13 |   const [showSettingsModal, setshowSettingsModal] = useState<boolean>(false);
14 |   const [showOrphanNodeDeletionDialog, setshowOrphanNodeDeletionDialog] = useState<boolean>(false);
15 | 
16 |   const toggleColorMode = () => {
17 |     setThemeMode((prevThemeMode) => {
18 |       return prevThemeMode === 'light' ? 'dark' : 'light';
19 |     });
20 |     themeUtils.toggleColorMode();
21 |   };
22 |   const openSettingsModal = () => {
23 |     setshowSettingsModal(true);
24 |   };
25 |   const closeSettingModal = () => {
26 |     setshowSettingsModal(false);
27 |   };
28 |   const openOrphanNodeDeletionModal = () => {
29 |     setshowOrphanNodeDeletionDialog(true);
30 |   };
31 |   const closeOrphanNodeDeletionModal = () => {
32 |     setshowOrphanNodeDeletionDialog(false);
33 |   };
34 |   return (
35 |     <UserCredentialsWrapper>
36 |       <FileContextProvider>
37 |         <MessageContextWrapper>
38 |           <AlertContextWrapper>
39 |             <Header themeMode={themeMode} toggleTheme={toggleColorMode} />
40 |             <PageLayout
41 |               openSettingsDialog={openSettingsModal}
42 |               isSettingPanelExpanded={showSettingsModal}
43 |               closeSettingModal={closeSettingModal}
44 |               closeOrphanNodeDeletionModal={closeOrphanNodeDeletionModal}
45 |               showOrphanNodeDeletionModal={showOrphanNodeDeletionDialog}
46 |               openOrphanNodeDeletionModal={openOrphanNodeDeletionModal}
47 |             />
48 |           </AlertContextWrapper>
49 |         </MessageContextWrapper>
50 |       </FileContextProvider>
51 |     </UserCredentialsWrapper>
52 |   );
53 | };
54 | export default QuickStarter;
55 | 


--------------------------------------------------------------------------------
/backend/src/gemini_llm.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.graphs import Neo4jGraph
 2 | from dotenv import load_dotenv
 3 | from langchain.schema import Document
 4 | import logging
 5 | import concurrent.futures
 6 | from concurrent.futures import ThreadPoolExecutor
 7 | from typing import List
 8 | from langchain_experimental.graph_transformers import LLMGraphTransformer
 9 | from langchain_core.documents import Document
10 | from typing import List
11 | import google.auth 
12 | from typing import List
13 | from langchain_core.documents import Document
14 | import vertexai
15 | from src.llm import get_graph_document_list, get_combined_chunks, get_llm
16 | 
17 | load_dotenv()
18 | logging.basicConfig(format='%(asctime)s - %(message)s',level='DEBUG')
19 | 
20 | 
21 | def get_graph_from_Gemini(model_version,
22 |                             graph: Neo4jGraph,
23 |                             chunkId_chunkDoc_list: List, 
24 |                             allowedNodes, 
25 |                             allowedRelationship):
26 |     """
27 |         Extract graph from OpenAI and store it in database. 
28 |         This is a wrapper for extract_and_store_graph
29 |                                 
30 |         Args:
31 |             model_version : identify the model of LLM
32 |             graph: Neo4jGraph to be extracted.
33 |             chunks: List of chunk documents created from input file
34 |         Returns: 
35 |             List of langchain GraphDocument - used to generate graph
36 |     """
37 |     logging.info(f"Get graphDocuments from {model_version}")
38 |     futures = []
39 |     graph_document_list = []
40 |     location = "us-central1"
41 |     #project_id = "llm-experiments-387609"                            
42 |     credentials, project_id = google.auth.default()
43 |     if hasattr(credentials, "service_account_email"):
44 |       logging.info(credentials.service_account_email)
45 |     else:
46 |         logging.info("WARNING: no service account credential. User account credential?")                           
47 |     vertexai.init(project=project_id, location=location)
48 |     
49 |     combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list)
50 |      
51 |     llm,model_name = get_llm(model_version)
52 |     return  get_graph_document_list(llm, combined_chunk_document_list, allowedNodes, allowedRelationship)
53 |            
54 |        
55 | 


--------------------------------------------------------------------------------
/frontend/src/logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 27.8.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 677.5 242.4" style="enable-background:new 0 0 677.5 242.4;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#FFFFFF;}
 7 | </style>
 8 | <g>
 9 | 	<path class="st0" d="M137.8,61.9c-35.3,0-58.9,20.5-58.9,60.4v28.4c3.5-1.7,7.3-2.6,11.4-2.6s8,1,11.5,2.7v-28.5
10 | 		c0-25.8,14.2-39.1,36-39.1s36,13.3,36,39.1v62.1h22.9v-62.1C196.7,82.2,173,61.9,137.8,61.9L137.8,61.9z"/>
11 | 	<path class="st0" d="M209.2,124.7c0-36.2,26.6-62.8,64.2-62.8s63.8,26.6,63.8,62.8v8.5H233.3c3.4,21.3,19.3,33.1,40.1,33.1
12 | 		c15.5,0,26.3-4.8,33.3-15.2h25.4c-9.2,22.2-30.9,36.5-58.7,36.5C235.7,187.5,209.2,161,209.2,124.7L209.2,124.7z M313,112.7
13 | 		c-4.6-19.1-20.3-29.5-39.6-29.5s-34.8,10.6-39.4,29.5H313z"/>
14 | 	<path class="st0" d="M349.5,124.7c0-36.2,26.6-62.8,64.2-62.8s64.2,26.6,64.2,62.8c0,36.2-26.6,62.8-64.2,62.8
15 | 		S349.5,161,349.5,124.7z M454.7,124.7c0-24.2-16.4-41.5-41.1-41.5s-41.1,17.4-41.1,41.5c0,24.1,16.4,41.5,41.1,41.5
16 | 		S454.7,148.9,454.7,124.7z"/>
17 | 	<path class="st0" d="M609.1,208.1h2.7c14.7,0,20.3-6.5,20.3-23.4V65.9H655v117.3c0,29.5-11.6,44.7-41.1,44.7h-4.8L609.1,208.1
18 | 		L609.1,208.1z"/>
19 | 	<path class="st0" d="M597.6,195.9h-22.9v-28.3h-58.2c-11.6,0-21.7-5.7-26.3-14.8c-4.3-8.6-3.1-18.7,3.1-27.2L545.6,57
20 | 		c7.5-10.1,20.2-14.2,32.2-10.2c12,4,19.8,14.7,19.8,27.3v73.1h17.2v20.4h-17.2V195.9L597.6,195.9z M512.6,138.1
21 | 		c-0.7,0.9-1.1,2.1-1.1,3.4c0,3.2,2.6,5.8,5.8,5.8h57.5V73.6c0-3.8-2.8-5.2-4-5.6c-0.5-0.1-1.2-0.3-2.1-0.3c-1.4,0-3.1,0.5-4.6,2.4
22 | 		L512.6,138.1L512.6,138.1L512.6,138.1z"/>
23 | 	<path class="st0" d="M24.6,125.8c-3,1.5-5.8,4-7.8,7.3c-2,3.3-2.8,6.8-2.5,10.3c0.3,6.3,3.8,12.1,9.5,15.3c5.3,3,11.3,2.3,17,1
24 | 		c7-1.8,13-2.5,19.3,1.3c0,0,0,0,0.3,0c10.8,6.3,10.8,22.1,0,28.4c0,0,0,0-0.3,0c-6.3,3.8-12.3,3-19.3,1.3c-5.5-1.5-11.5-2.3-17,1
25 | 		c-5.8,3.3-9,9.3-9.5,15.3c-0.3,3.5,0.5,7,2.5,10.3c2,3.3,4.5,5.8,7.8,7.3c5.5,2.8,12.3,2.8,18-0.5c5.3-3,7.8-8.8,9.3-14.3
26 | 		c2-7,4.3-12.6,10.8-16.1c6.3-3.8,12.3-3,19.3-1.3c5.5,1.5,11.5,2.3,17-1c5.8-3.3,9-9.3,9.5-15.3c0-0.5,0-0.8,0-1.3
27 | 		c0-0.5,0-0.8,0-1.3c-0.3-6.3-3.8-12.1-9.5-15.3c-5.3-3-11.3-2.3-17-1c-7,1.8-13,2.5-19.3-1.3c-6.3-3.8-8.8-9.1-10.8-16.1
28 | 		c-1.5-5.5-4-11.1-9.3-14.3C36.8,123.1,30.1,123.1,24.6,125.8z"/>
29 | 	<path class="st0" d="M643.6,17.2c-10.8,0-19.6,8.8-19.6,19.6s8.8,19.6,19.6,19.6c10.8,0,19.6-8.8,19.6-19.6S654.4,17.2,643.6,17.2z
30 | 		"/>
31 | </g>
32 | </svg>


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
  1 | # Logs
  2 | logs
  3 | *.log
  4 | npm-debug.log*
  5 | yarn-debug.log*
  6 | yarn-error.log*
  7 | lerna-debug.log*
  8 | .pnpm-debug.log*
  9 | 
 10 | # Diagnostic reports (https://nodejs.org/api/report.html)
 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 12 | 
 13 | # Runtime data
 14 | pids
 15 | *.pid
 16 | *.seed
 17 | *.pid.lock
 18 | 
 19 | # Directory for instrumented libs generated by jscoverage/JSCover
 20 | lib-cov
 21 | 
 22 | # Coverage directory used by tools like istanbul
 23 | coverage
 24 | *.lcov
 25 | 
 26 | # nyc test coverage
 27 | .nyc_output
 28 | 
 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 30 | .grunt
 31 | 
 32 | # Bower dependency directory (https://bower.io/)
 33 | bower_components
 34 | 
 35 | # node-waf configuration
 36 | .lock-wscript
 37 | 
 38 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 39 | build/Release
 40 | 
 41 | # Dependency directories
 42 | node_modules/
 43 | jspm_packages/
 44 | 
 45 | # Snowpack dependency directory (https://snowpack.dev/)
 46 | web_modules/
 47 | 
 48 | # TypeScript cache
 49 | *.tsbuildinfo
 50 | 
 51 | # Optional npm cache directory
 52 | .npm
 53 | 
 54 | # Optional eslint cache
 55 | .eslintcache
 56 | 
 57 | # Optional stylelint cache
 58 | .stylelintcache
 59 | 
 60 | # Microbundle cache
 61 | .rpt2_cache/
 62 | .rts2_cache_cjs/
 63 | .rts2_cache_es/
 64 | .rts2_cache_umd/
 65 | 
 66 | # Optional REPL history
 67 | .node_repl_history
 68 | 
 69 | # Output of 'npm pack'
 70 | *.tgz
 71 | 
 72 | # Yarn Integrity file
 73 | .yarn-integrity
 74 | 
 75 | # dotenv environment variable files
 76 | .env
 77 | .env.development.local
 78 | .env.test.local
 79 | .env.production.local
 80 | .env.local
 81 | 
 82 | # parcel-bundler cache (https://parceljs.org/)
 83 | .cache
 84 | .parcel-cache
 85 | 
 86 | # Next.js build output
 87 | .next
 88 | out
 89 | 
 90 | # Nuxt.js build / generate output
 91 | .nuxt
 92 | dist
 93 | 
 94 | # Gatsby files
 95 | .cache/
 96 | # Comment in the public line in if your project uses Gatsby and not Next.js
 97 | # https://nextjs.org/blog/next-9-1#public-directory-support
 98 | # public
 99 | 
100 | # vuepress build output
101 | .vuepress/dist
102 | 
103 | # vuepress v2.x temp and cache directory
104 | .temp
105 | .cache
106 | 
107 | # Docusaurus cache and generated files
108 | .docusaurus
109 | 
110 | # Serverless directories
111 | .serverless/
112 | 
113 | # FuseBox cache
114 | .fusebox/
115 | 
116 | # DynamoDB Local files
117 | .dynamodb/
118 | 
119 | # TernJS port file
120 | .tern-port
121 | 
122 | # Stores VSCode versions used for testing VSCode extensions
123 | .vscode-test
124 | 
125 | # yarn v2
126 | .yarn/cache
127 | .yarn/unplugged
128 | .yarn/build-state.yml
129 | .yarn/install-state.gz
130 | .pnp.*
131 | package-lock.json
132 | 
133 | docs/build


--------------------------------------------------------------------------------
/frontend/src/components/UI/HoverableLink.tsx:
--------------------------------------------------------------------------------
 1 | import React, { useState, useEffect, useRef } from 'react';
 2 | import { HoverableLinkProps } from '../../types';
 3 | const HoverableLink: React.FC<HoverableLinkProps> = ({ url, children }) => {
 4 |   const [hovering, setHovering] = useState(false);
 5 |   const [iframeSrc, setIframeSrc] = useState<string>('');
 6 |   const [mousePosition, setMousePosition] = useState({ x: 0, y: 0 });
 7 |   const popupRef = useRef<HTMLDivElement>(null);
 8 |   useEffect(() => {
 9 |     let timer: NodeJS.Timeout;
10 |     if (hovering) {
11 |       setIframeSrc('');
12 |       timer = setTimeout(() => {
13 |         setIframeSrc(url);
14 |       }, 100);
15 |     }
16 |     return () => clearTimeout(timer);
17 |   }, [hovering, url]);
18 |   const handleMouseEnter = (event: React.MouseEvent) => {
19 |     setHovering(true);
20 |     setMousePosition({ x: event.clientX, y: event.clientY });
21 |   };
22 |   const handleMouseMove = (event: React.MouseEvent) => {
23 |     if (hovering) {
24 |       setMousePosition({ x: event.clientX, y: event.clientY });
25 |     }
26 |   };
27 |   const handleMouseLeave = () => {
28 |     setHovering(false);
29 |   };
30 |   const isYouTubeURL = (url: string): boolean => {
31 |     return url.includes('youtube.com') || url.includes('youtu.be');
32 |   };
33 |   const extractYouTubeVideoId = (url: string): string => {
34 |     const videoIdRegex = /(?:\/embed\/|\/watch\?v=|\/(?:embed\/|v\/|watch\?.*v=|youtu\.be\/|embed\/|v=))([^&?#]+)/;
35 |     const match = url.match(videoIdRegex);
36 |     return match ? match[1] : '';
37 |   };
38 |   return (
39 |     <div
40 |       className='hoverable-link-container'
41 |       onMouseEnter={handleMouseEnter}
42 |       onMouseMove={handleMouseMove}
43 |       onMouseLeave={handleMouseLeave}
44 |     >
45 |       {children}
46 |       {hovering && (
47 |         <div
48 |           className='popup'
49 |           ref={popupRef}
50 |           style={{ left: `${mousePosition.x}px`, top: `${mousePosition.y - (popupRef.current?.offsetHeight || 0)}px` }}
51 |         >
52 |           {isYouTubeURL(url) ? (
53 |             <iframe
54 |               width='360'
55 |               height='215'
56 |               src={`https://www.youtube.com/embed/${extractYouTubeVideoId(url)}?controls=0`}
57 |               title='YouTube video player'
58 |               allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share'
59 |               referrerPolicy='strict-origin-when-cross-origin'
60 |               allowFullScreen
61 |             />
62 |           ) : (
63 |             iframeSrc && <iframe src={iframeSrc} title='Link Preview' className='iframe-preview' />
64 |           )}
65 |         </div>
66 |       )}
67 |       <a href={url} target='_blank' rel='noopener noreferrer' />
68 |     </div>
69 |   );
70 | };
71 | export default HoverableLink;
72 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/useSse.tsx:
--------------------------------------------------------------------------------
 1 | import { useFileContext } from '../context/UsersFiles';
 2 | import { eventResponsetypes } from '../types';
 3 | import { calculateProcessingTime } from '../utils/Utils';
 4 | 
 5 | export default function useServerSideEvent(
 6 |   alertHandler: (inMinutes: boolean, minutes: number, filename: string) => void,
 7 |   errorHandler: (filename: string) => void
 8 | ) {
 9 |   const { setFilesData } = useFileContext();
10 |   function updateStatusForLargeFiles(eventSourceRes: eventResponsetypes) {
11 |     const {
12 |       fileName,
13 |       nodeCount = 0,
14 |       processingTime,
15 |       relationshipCount = 0,
16 |       status,
17 |       total_chunks,
18 |       model,
19 |       processed_chunk = 0,
20 |       fileSize,
21 |     } = eventSourceRes;
22 |     const alertShownStatus = JSON.parse(localStorage.getItem('alertShown') || 'null');
23 | 
24 |     if (status === 'Processing') {
25 |       if (alertShownStatus != null && alertShownStatus == false && total_chunks != null) {
26 |         const { minutes, seconds } = calculateProcessingTime(fileSize, 0.2);
27 |         alertHandler(minutes !== 0, minutes === 0 ? seconds : minutes, fileName);
28 |       }
29 |       if (total_chunks) {
30 |         setFilesData((prevfiles) => {
31 |           return prevfiles.map((curfile) => {
32 |             if (curfile.name == fileName) {
33 |               return {
34 |                 ...curfile,
35 |                 status: total_chunks === processed_chunk ? 'Completed' : status,
36 |                 NodesCount: nodeCount,
37 |                 relationshipCount: relationshipCount,
38 |                 model: model,
39 |                 processing: processingTime?.toFixed(2),
40 |                 processingProgress: Math.floor((processed_chunk / total_chunks) * 100),
41 |               };
42 |             }
43 |             return curfile;
44 |           });
45 |         });
46 |       }
47 |     } else if (status === 'Completed' || status === 'Cancelled') {
48 |       setFilesData((prevfiles) => {
49 |         return prevfiles.map((curfile) => {
50 |           if (curfile.name == fileName) {
51 |             return {
52 |               ...curfile,
53 |               status: status,
54 |               NodesCount: nodeCount,
55 |               relationshipCount: relationshipCount,
56 |               model: model,
57 |               processing: processingTime?.toFixed(2),
58 |             };
59 |           }
60 |           return curfile;
61 |         });
62 |       });
63 |     } else if (eventSourceRes.status === 'Failed') {
64 |       setFilesData((prevfiles) => {
65 |         return prevfiles.map((curfile) => {
66 |           if (curfile.name == fileName) {
67 |             return {
68 |               ...curfile,
69 |               status: status,
70 |             };
71 |           }
72 |           return curfile;
73 |         });
74 |       });
75 |       errorHandler(fileName);
76 |     }
77 |   }
78 |   return {
79 |     updateStatusForLargeFiles,
80 |   };
81 | }
82 | 


--------------------------------------------------------------------------------
/experiments/LLM_Results_.csv:
--------------------------------------------------------------------------------
1 | LLM,File name,Page number,Node count,Relation count,Nodes,Relations
2 | OpenAI-GPT-3.5-turbo-16k,../data/December_2023_Bolt.pdf,3,28,30,"['The Patliputra Central Cooperative Bank, Bihar', 'Bank Of America', 'Rbi', 'Outstanding Credit', 'Total Bank Credit', 'Term Deposits', 'Hdfc Bank', 'Sbi Mf', 'Patan Nagarik Sahakari Bank Ltd., Patan, Gujarat', 'Non-Residents', 'September 2023', 'March 2022', 'Industry', ""The Dhrangadhra People'S Co-Operative Bank, Gujarat"", 'Www.Oliveboard.In', 'Central Bank', 'Kvb', 'Branch2', 'Branch1', 'Karur Vysya Bank', 'Penalties', 'Working Capital Loans', 'The Mandal Nagarik Sahakari Bank, Gujarat', 'Cooperative Banks', 'Banks', 'Deposits', 'Branch3', 'The Balasore Bhadrak Central Co-Operative Bank, Odisha']","['reportedBy', 'reportedBy', 'from', 'imposed on', 'penalized', 'penalized', 'penalized', 'penalized', 'penalized', 'imposed by', 'based on', 'not intended to pronounce upon', 'approval', 'stake', 'approval_required', 'has_branch', 'has_branch', 'has_branch', 'shows', 'released in', 'released in', 'has', 'data on', 'at', 'accounted for', 'of', 'at', 'increased by', 'in', 'has remained']"
3 | Diffbot,../data/December_2023_Bolt.pdf,3,0,0,[],[]
4 | OpenAI-GPT-4,../data/December_2023_Bolt.pdf,3,27,21,"['Rbi', 'Thepatliputracentralcooperativebank', 'Termdeposits', 'Private Corporate Sector', 'Sbi Mutual Fund', 'Brian Thomas Moynihan', 'Transaction', 'Thebalasorebhadrakcentralcooperativebank', 'Bank Of America, N.A.', 'Patannagariksahakaribankltd', 'Deficienciesinregulatorycompliance', 'Www.Oliveboard.In', 'Workingcapitalloans', 'Nonresidents', 'Kvb', 'Bengaluru', 'Karur Vysya Bank Ltd', 'Industryloans', 'Thedhrangadhrapeoplescooperativebank', 'Tamil Nadu', 'Outstandingcredit', 'Penalties', 'Hdfc Bank Ltd', 'Themandalnagariksahakaribank', 'Centralbank', 'Reserve Bank', 'Fivecooperativebanks']","['imposedPenalty', 'imposedPenalty', 'isCEOOf', 'imposed', 'appliedTo', 'includes', 'includes', 'includes', 'includes', 'includes', 'basedOn', 'intends to acquire stake', 'approves', 'regulates', 'hasBranchIn', 'hasBranchIn', 'reportsGrowth', 'releasedData', 'releasedData', 'includes', 'includes']"
5 | Human ,../data/December_2023_Bolt.pdf,3,22,25,"[RBI,SBI Mutual Fund,non-residents,Karur Vysya Bank,Hdfc Bank,Bank of America,Central Bank,National stock exchange of India, Interglobe Aviation, Patliputra Central Cooperative Bank, The Balasore Bhadrak Central Cooperative Bank, The Dhrangadhra Cooperative Bank, Patan Nagarik Sahakari Bank, The Mondal Nagarik Sahakari Bank, Liberalised Remittance Scheme of FEMA, regulatory compliance, Brian Thomas Moynihan, Bihar, Orissa,Gujarat,Tamil Nadu,Bengalure]","[imposed penalty on, imposed penalty on, imposed penalty on, imposed penalty on, imposed penalty on, imposed penalty on, imposed penalty on, penalty of,penalty of,is Ceo/Chairman of,Approves,announced branch in, announced branch in, Intends to acquire,violation of,violation of,violation of,violation of,violation of,violation of,located in,located in,located in,located in,located in]"
6 | 


--------------------------------------------------------------------------------
/frontend/src/assets/images/graph-search.svg:
--------------------------------------------------------------------------------
 1 | <svg width="49" height="48" viewBox="0 0 49 48" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g clip-path="url(#clip0_229_1385)">
 3 | <path d="M4.73735 28.5265C6.63571 32.9933 10.2308 36.5231 14.7316 38.3393C19.2325 40.1555 24.2705 40.1093 28.7373 38.2109C33.2042 36.3126 36.7339 32.7175 38.5501 28.2167C40.3663 23.7158 40.3202 18.6778 38.4218 14.211C36.5234 9.74417 32.9284 6.21441 28.4275 4.39823C23.9266 2.58205 18.8886 2.62821 14.4218 4.52657C9.95496 6.42492 6.4252 10.02 4.60901 14.5208C2.79282 19.0217 2.83899 24.0597 4.73735 28.5265V28.5265Z" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
 4 | <path d="M34.519 34.3072L45.5791 45.3689" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
 5 | <path d="M11.2783 18.7247C11.2783 19.1228 11.3567 19.517 11.5091 19.8848C11.6614 20.2525 11.8847 20.5867 12.1662 20.8682C12.4477 21.1497 12.7819 21.373 13.1497 21.5254C13.5175 21.6777 13.9117 21.7561 14.3098 21.7561C14.7079 21.7561 15.1021 21.6777 15.4699 21.5254C15.8376 21.373 16.1718 21.1497 16.4533 20.8682C16.7348 20.5867 16.9581 20.2525 17.1105 19.8848C17.2628 19.517 17.3412 19.1228 17.3412 18.7247C17.3412 18.3266 17.2628 17.9324 17.1105 17.5646C16.9581 17.1968 16.7348 16.8626 16.4533 16.5811C16.1718 16.2996 15.8376 16.0763 15.4699 15.924C15.1021 15.7716 14.7079 15.6932 14.3098 15.6932C13.9117 15.6932 13.5175 15.7716 13.1497 15.924C12.7819 16.0763 12.4477 16.2996 12.1662 16.5811C11.8847 16.8626 11.6614 17.1968 11.5091 17.5646C11.3567 17.9324 11.2783 18.3266 11.2783 18.7247V18.7247Z" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
 6 | <path d="M21.3846 26.8081C21.3846 27.6121 21.704 28.3832 22.2725 28.9517C22.841 29.5202 23.612 29.8395 24.416 29.8395C25.22 29.8395 25.9911 29.5202 26.5596 28.9517C27.1281 28.3832 27.4475 27.6121 27.4475 26.8081C27.4475 26.0041 27.1281 25.2331 26.5596 24.6646C25.9911 24.0961 25.22 23.7767 24.416 23.7767C23.612 23.7767 22.841 24.0961 22.2725 24.6646C21.704 25.2331 21.3846 26.0041 21.3846 26.8081V26.8081Z" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
 7 | <path d="M28.4571 10.6407C28.4571 11.4447 28.7765 12.2157 29.345 12.7842C29.9135 13.3527 30.6845 13.6721 31.4885 13.6721C32.2925 13.6721 33.0636 13.3527 33.6321 12.7842C34.2006 12.2157 34.52 11.4447 34.52 10.6407C34.52 9.8367 34.2006 9.06564 33.6321 8.49714C33.0636 7.92864 32.2925 7.60925 31.4885 7.60925C30.6845 7.60925 29.9135 7.92864 29.345 8.49714C28.7765 9.06564 28.4571 9.8367 28.4571 10.6407V10.6407Z" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
 8 | <path d="M16.6793 20.619L22.0496 24.9156" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
 9 | <path d="M30.2733 13.4188L25.6305 24.0302" stroke="#1A1B1D" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
10 | </g>
11 | <defs>
12 | <clipPath id="clip0_229_1385">
13 | <rect width="48" height="48" fill="white" transform="translate(0.55571)"/>
14 | </clipPath>
15 | </defs>
16 | </svg>
17 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | 
 3 | services:
 4 |   backend:
 5 |     build:
 6 |       context: ./backend
 7 |       dockerfile: Dockerfile
 8 |     volumes:
 9 |       - ./backend:/app
10 |     environment:
11 |       - NEO4J_URI=${NEO4J_URI-neo4j://database:7687}
12 |       - NEO4J_PASSWORD=${NEO4J_PASSWORD-password}
13 |       - NEO4J_USERNAME=${NEO4J_USERNAME-neo4j}
14 |       - OPENAI_API_KEY=${OPENAI_API_KEY-}
15 |       - DIFFBOT_API_KEY=${DIFFBOT_API_KEY-}
16 |       - EMBEDDING_MODEL=${EMBEDDING_MODEL-all-MiniLM-L6-v2}
17 |       - LANGCHAIN_ENDPOINT=${LANGCHAIN_ENDPOINT-}
18 |       - LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2-}
19 |       - LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT-}
20 |       - LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY-}
21 |       - KNN_MIN_SCORE=${KNN_MIN_SCORE-0.94}
22 |       - IS_EMBEDDING=${IS_EMBEDDING-true}
23 |       - GEMINI_ENABLED=${GEMINI_ENABLED-False}
24 |       - GCP_LOG_METRICS_ENABLED=${GCP_LOG_METRICS_ENABLED-False}
25 |       - UPDATE_GRAPH_CHUNKS_PROCESSED=${UPDATE_GRAPH_CHUNKS_PROCESSED-20}
26 |       - NUMBER_OF_CHUNKS_TO_COMBINE=${NUMBER_OF_CHUNKS_TO_COMBINE-6}
27 |       - ENTITY_EMBEDDING=${ENTITY_EMBEDDING-False}
28 |       - GCS_FILE_CACHE=${GCS_FILE_CACHE-False}
29 | #      - LLM_MODEL_CONFIG_anthropic_claude_35_sonnet=${LLM_MODEL_CONFIG_anthropic_claude_35_sonnet-}
30 | #      - LLM_MODEL_CONFIG_fireworks_llama_v3_70b=${LLM_MODEL_CONFIG_fireworks_llama_v3_70b-}
31 | #      - LLM_MODEL_CONFIG_azure_ai_gpt_4o=${LLM_MODEL_CONFIG_azure_ai_gpt_4o-}
32 | #      - LLM_MODEL_CONFIG_azure_ai_gpt_35=${LLM_MODEL_CONFIG_azure_ai_gpt_35-}
33 | #      - LLM_MODEL_CONFIG_groq_llama3_70b=${LLM_MODEL_CONFIG_groq_llama3_70b-}
34 | #      - LLM_MODEL_CONFIG_bedrock_claude_3_5_sonnet=${LLM_MODEL_CONFIG_bedrock_claude_3_5_sonnet-}
35 | #     - LLM_MODEL_CONFIG_fireworks_qwen_72b=${LLM_MODEL_CONFIG_fireworks_qwen_72b-}
36 |       - LLM_MODEL_CONFIG_ollama_llama3=${LLM_MODEL_CONFIG_ollama_llama3-}
37 |     container_name: backend
38 |     extra_hosts:
39 |       - host.docker.internal:host-gateway
40 |     ports:
41 |       - "8000:8000"
42 |     networks:
43 |       - net
44 | 
45 |   frontend:
46 |     depends_on:
47 |       - backend
48 |     build:
49 |       context: ./frontend
50 |       dockerfile: Dockerfile
51 |       args:
52 |         - BACKEND_API_URL=${BACKEND_API_URL-http://localhost:8000}
53 |         - REACT_APP_SOURCES=${REACT_APP_SOURCES-local,youtube,wiki,s3}
54 |         - LLM_MODELS=${LLM_MODELS-diffbot,openai-gpt-3.5,openai-gpt-4o}
55 |         - GOOGLE_CLIENT_ID=${GOOGLE_CLIENT_ID-""}
56 |         - BLOOM_URL=${BLOOM_URL-https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true}
57 |         - TIME_PER_CHUNK=${TIME_PER_CHUNK-4}
58 |         - TIME_PER_PAGE=${TIME_PER_PAGE-50}
59 |         - CHUNK_SIZE=${CHUNK_SIZE-5242880}
60 |         - ENV=${ENV-DEV}
61 |         - CHAT_MODES=${CHAT_MODES-""}
62 |     volumes:
63 |       - ./frontend:/app
64 |       - /app/node_modules
65 |     container_name: frontend
66 |     ports:
67 |       - "8080:8080"
68 |     networks:
69 |       - net
70 | 
71 | networks:
72 |   net:
73 | 


--------------------------------------------------------------------------------
/frontend/src/assets/images/web-search-svgrepo-com.svg:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- Uploaded to: SVG Repo, www.svgrepo.com, Transformed by: SVG Repo Mixer Tools -->
 3 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="websearch" viewBox="0 0 214.6279 198.1416" enable-background="new 0 0 214.6279 198.1416" xml:space="preserve" width="800px" height="800px" fill="#000000">
 4 | 
 5 | <g id="SVGRepo_bgCarrier" stroke-width="0"/>
 6 | 
 7 | <g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"/>
 8 | 
 9 | <g id="SVGRepo_iconCarrier"> <g> <path fill="#ffffff" d="M171.748,142.3193c0-15.4375-12.5625-28-28-28s-28,12.5625-28,28s12.5625,28,28,28 S171.748,157.7568,171.748,142.3193z"/> <path fill="#02507b" d="M95.748,166.3193v-76h42.989c0.0146,0.7878,0.0696,1.5579,0.0696,2.3516c0,2.2109,1.7891,4,4,4 s4-1.7891,4-4c0-0.7937-0.0603-1.5632-0.0745-2.3516h25.0159h5.6484h2.3516c0-46.3164-37.6836-84-84-84h-8v0.4053 c-42.5754,4.0447-76,39.9785-76,83.5947c0,46.3164,37.6836,84,84,84c2.2109,0,4-1.7891,4-4S97.959,166.3193,95.748,166.3193z M87.748,164.9949c-19.9287-6.438-35.0586-36.3945-35.0586-72.324c0-0.7937,0.0549-1.5637,0.0696-2.3516h34.989V164.9949z M53.1655,82.3193C55.75,51.1135,69.7729,26.157,87.748,20.3506v61.9688H53.1655z M95.748,82.3193v-63.293 c21.6829,0,39.6248,27.5808,42.5825,63.293H95.748z M171.3181,82.3193h-24.9685c-2.3884-29.9812-14.96-54.8911-31.9993-65.6013 C144.7681,24.4146,167.9641,50.417,171.3181,82.3193z M77.1458,16.718C60.1064,27.4282,47.5349,52.3381,45.1465,82.3193H20.178 C23.532,50.417,46.728,24.4146,77.1458,16.718z M19.748,90.3193h25.0159c-0.0142,0.7883-0.0745,1.5579-0.0745,2.3516 c0,28.5327,9.217,53.6572,23.1233,68.2603C39.7031,149.769,19.748,122.3567,19.748,90.3193z"/> <path fill="#02507b" d="M191.5137,184.4287l-19.665-19.665c4.9307-6.1609,7.8994-13.9573,7.8994-22.4443 c0-19.8516-16.1484-36-36-36s-36,16.1484-36,36s16.1484,36,36,36c8.4871,0,16.2834-2.9688,22.4443-7.8994l19.665,19.665 L191.5137,184.4287z M115.748,142.3193c0-15.4375,12.5625-28,28-28s28,12.5625,28,28s-12.5625,28-28,28 S115.748,157.7568,115.748,142.3193z"/> </g> <path fill="#fffafa" d="M4,22.1416c-1.0234,0-2.0479-0.3906-2.8281-1.1719c-1.5625-1.5615-1.5625-4.0947,0-5.6562L15.3145,1.1719 c1.5625-1.5625,4.0957-1.5625,5.6562,0c1.5625,1.5615,1.5625,4.0947,0,5.6562L6.8281,20.9697 C6.0469,21.751,5.0234,22.1416,4,22.1416z"/> <path fill="#fffafa" d="M18.1426,22.1416c-1.0234,0-2.0469-0.3906-2.8281-1.1719L1.1719,6.8281 c-1.5625-1.5615-1.5625-4.0947,0-5.6562c1.5605-1.5625,4.0938-1.5625,5.6562,0l14.1426,14.1416 c1.5625,1.5615,1.5625,4.0947,0,5.6562C20.1904,21.751,19.166,22.1416,18.1426,22.1416z"/> <path fill="#ffffff" d="M14,198.1416c-7.7197,0-14-6.2803-14-14s6.2803-14,14-14s14,6.2803,14,14S21.7197,198.1416,14,198.1416z M14,178.1416c-3.3086,0-6,2.6914-6,6s2.6914,6,6,6s6-2.6914,6-6S17.3086,178.1416,14,178.1416z"/> <path fill="#ffffff" d="M199.3145,56.7695c-1.0605,0-2.0781-0.4219-2.8281-1.1719l-11.3145-11.3145 c-0.75-0.75-1.1719-1.7676-1.1719-2.8281s0.4219-2.0781,1.1719-2.8281l11.3145-11.3135c1.5625-1.5625,4.0938-1.5625,5.6562,0 l11.3135,11.3135c1.5625,1.5615,1.5625,4.0947,0,5.6562l-11.3135,11.3145C201.3926,56.3477,200.375,56.7695,199.3145,56.7695z M193.6572,41.4551l5.6572,5.6572l5.6572-5.6572l-5.6572-5.6572L193.6572,41.4551z"/> </g>
10 | 
11 | </svg>


--------------------------------------------------------------------------------
/frontend/src/assets/images/web.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
 3 | <svg width="800px" height="800px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
 4 | <g id="style=stroke" clip-path="url(#clip0_1_1828)">
 5 | <g id="web">
 6 | <path id="vector (Stroke)" fill-rule="evenodd" clip-rule="evenodd" d="M10.4425 2.44429C10.0752 3.64002 9.32073 6.25569 8.89915 8.83258C9.99331 9.00921 11.0621 9.12209 12 9.12209C12.9379 9.12209 14.0067 9.00921 15.1009 8.83258C14.6793 6.25569 13.9248 3.64002 13.5575 2.44429C13.0509 2.3624 12.5307 2.31977 12 2.31977C11.4693 2.31977 10.9491 2.3624 10.4425 2.44429ZM15.3337 2.90893C15.737 4.305 16.2958 6.42828 16.6448 8.54737C18.1513 8.23703 19.5727 7.85824 20.605 7.56109C19.4986 5.42102 17.6172 3.74662 15.3337 2.90893ZM21.2129 9.01933C20.1222 9.33683 18.5423 9.76328 16.8594 10.1057C16.9295 10.7564 16.9709 11.3958 16.9709 12C16.9709 12.8816 16.8827 13.8411 16.7445 14.8058C18.759 14.3858 20.6068 13.849 21.5557 13.5575C21.6376 13.0509 21.6802 12.5307 21.6802 12C21.6802 10.959 21.5162 9.95751 21.2129 9.01933ZM21.0911 15.3337C19.9166 15.6729 18.229 16.1219 16.4634 16.4634C16.1219 18.229 15.6729 19.9166 15.3337 21.0911C17.9978 20.1138 20.1138 17.9978 21.0911 15.3337ZM13.5576 21.5557C13.849 20.6068 14.3858 18.759 14.8058 16.7445C13.8411 16.8827 12.8816 16.9709 12 16.9709C11.1184 16.9709 10.1589 16.8827 9.19423 16.7445C9.61421 18.759 10.151 20.6068 10.4425 21.5557C10.9491 21.6376 11.4693 21.6802 12 21.6802C12.5307 21.6802 13.0509 21.6376 13.5576 21.5557ZM8.66629 21.0911C8.32707 19.9166 7.8781 18.229 7.53658 16.4634C5.77099 16.1219 4.08335 15.6729 2.90891 15.3337C3.88622 17.9978 6.00216 20.1138 8.66629 21.0911ZM2.44429 13.5575C3.39316 13.849 5.24101 14.3858 7.25548 14.8058C7.1173 13.8411 7.02907 12.8816 7.02907 12C7.02907 11.3958 7.07048 10.7564 7.14056 10.1057C5.45769 9.76328 3.87779 9.33683 2.78712 9.01933C2.48383 9.95751 2.31977 10.959 2.31977 12C2.31977 12.5307 2.3624 13.0509 2.44429 13.5575ZM3.39504 7.56109C4.42731 7.85824 5.84865 8.23703 7.35522 8.54737C7.70416 6.42827 8.26303 4.305 8.66626 2.90893C6.38282 3.74662 4.50139 5.42102 3.39504 7.56109ZM8.68924 10.3888C8.63137 10.9544 8.59884 11.4968 8.59884 12C8.59884 12.9399 8.71224 14.012 8.88985 15.1102C9.98798 15.2878 11.0601 15.4012 12 15.4012C12.9399 15.4012 14.012 15.2878 15.1102 15.1102C15.2878 14.012 15.4012 12.9399 15.4012 12C15.4012 11.4968 15.3686 10.9544 15.3108 10.3888C14.1776 10.5703 13.0348 10.6919 12 10.6919C10.9652 10.6919 9.82236 10.5703 8.68924 10.3888ZM9.67273 0.991173C10.4243 0.833026 11.2029 0.75 12 0.75C12.7971 0.75 13.5757 0.833026 14.3273 0.991174C18.0108 1.76627 21.0281 4.34097 22.42 7.75174C22.9554 9.06356 23.25 10.4983 23.25 12C23.25 12.7971 23.167 13.5757 23.0088 14.3273C22.0943 18.6736 18.6736 22.0943 14.3273 23.0088C13.5757 23.167 12.7971 23.25 12 23.25C11.2029 23.25 10.4243 23.167 9.67273 23.0088C5.32644 22.0943 1.90572 18.6736 0.991173 14.3273C0.833026 13.5757 0.75 12.7971 0.75 12C0.75 10.4972 1.04509 9.06132 1.58123 7.74866C2.97369 4.33943 5.99026 1.76604 9.67273 0.991173Z" fill="#000000"/>
 7 | </g>
 8 | </g>
 9 | <defs>
10 | <clipPath id="clip0_1_1828">
11 | <rect width="24" height="24" fill="white"/>
12 | </clipPath>
13 | </defs>
14 | </svg>


--------------------------------------------------------------------------------
/frontend/src/assets/images/web-search-darkmode-final2.svg:
--------------------------------------------------------------------------------
1 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
2 | <!-- Uploaded to: SVG Repo, www.svgrepo.com, Transformed by: SVG Repo Mixer Tools -->
3 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="websearch" viewBox="0 0 214.6279 198.1416" enable-background="new 0 0 214.6279 198.1416" xml:space="preserve" width="800px" height="800px" fill="#ffffff">
4 | <g id="SVGRepo_bgCarrier" stroke-width="0"/>
5 | <g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"/>
6 | <g id="SVGRepo_iconCarrier"> <g> <path fill="#000000" d="M171.748,142.3193c0-15.4375-12.5625-28-28-28s-28,12.5625-28,28s12.5625,28,28,28 S171.748,157.7568,171.748,142.3193z"/> <path fill="#ffffff" d="M95.748,166.3193v-76h42.989c0.0146,0.7878,0.0696,1.5579,0.0696,2.3516c0,2.2109,1.7891,4,4,4 s4-1.7891,4-4c0-0.7937-0.0603-1.5632-0.0745-2.3516h25.0159h5.6484h2.3516c0-46.3164-37.6836-84-84-84h-8v0.4053 c-42.5754,4.0447-76,39.9785-76,83.5947c0,46.3164,37.6836,84,84,84c2.2109,0,4-1.7891,4-4S97.959,166.3193,95.748,166.3193z M87.748,164.9949c-19.9287-6.438-35.0586-36.3945-35.0586-72.324c0-0.7937,0.0549-1.5637,0.0696-2.3516h34.989V164.9949z M53.1655,82.3193C55.75,51.1135,69.7729,26.157,87.748,20.3506v61.9688H53.1655z M95.748,82.3193v-63.293 c21.6829,0,39.6248,27.5808,42.5825,63.293H95.748z M171.3181,82.3193h-24.9685c-2.3884-29.9812-14.96-54.8911-31.9993-65.6013 C144.7681,24.4146,167.9641,50.417,171.3181,82.3193z M77.1458,16.718C60.1064,27.4282,47.5349,52.3381,45.1465,82.3193H20.178 C23.532,50.417,46.728,24.4146,77.1458,16.718z M19.748,90.3193h25.0159c-0.0142,0.7883-0.0745,1.5579-0.0745,2.3516 c0,28.5327,9.217,53.6572,23.1233,68.2603C39.7031,149.769,19.748,122.3567,19.748,90.3193z"/> <path fill="#ffffff" d="M191.5137,184.4287l-19.665-19.665c4.9307-6.1609,7.8994-13.9573,7.8994-22.4443 c0-19.8516-16.1484-36-36-36s-36,16.1484-36,36s16.1484,36,36,36c8.4871,0,16.2834-2.9688,22.4443-7.8994l19.665,19.665 L191.5137,184.4287z M115.748,142.3193c0-15.4375,12.5625-28,28-28s28,12.5625,28,28s-12.5625,28-28,28 S115.748,157.7568,115.748,142.3193z"/> </g> <path fill="#222325" d="M4,22.1416c-1.0234,0-2.0479-0.3906-2.8281-1.1719c-1.5625-1.5615-1.5625-4.0947,0-5.6562L15.3145,1.1719 c1.5625-1.5625,4.0957-1.5625,5.6562,0c1.5625,1.5615,1.5625,4.0947,0,5.6562L6.8281,20.9697 C6.0469,21.751,5.0234,22.1416,4,22.1416z"/> <path fill="#222325" d="M18.1426,22.1416c-1.0234,0-2.0469-0.3906-2.8281-1.1719L1.1719,6.8281 c-1.5625-1.5615-1.5625-4.0947,0-5.6562c1.5605-1.5625,4.0938-1.5625,5.6562,0l14.1426,14.1416 c1.5625,1.5615,1.5625,4.0947,0,5.6562C20.1904,21.751,19.166,22.1416,18.1426,22.1416z"/> <path fill="#222325" d="M14,198.1416c-7.7197,0-14-6.2803-14-14s6.2803-14,14-14s14,6.2803,14,14S21.7197,198.1416,14,198.1416z M14,178.1416c-3.3086,0-6,2.6914-6,6s2.6914,6,6,6s6-2.6914,6-6S17.3086,178.1416,14,178.1416z"/> <path fill="#222325" d="M199.3145,56.7695c-1.0605,0-2.0781-0.4219-2.8281-1.1719l-11.3145-11.3145 c-0.75-0.75-1.1719-1.7676-1.1719-2.8281s0.4219-2.0781,1.1719-2.8281l11.3145-11.3135c1.5625-1.5625,4.0938-1.5625,5.6562,0 l11.3135,11.3135c1.5625,1.5615,1.5625,4.0947,0,5.6562l-11.3135,11.3145C201.3926,56.3477,200.375,56.7695,199.3145,56.7695z M193.6572,41.4551l5.6572,5.6572l5.6572-5.6572l-5.6572-5.6572L193.6572,41.4551z"/> </g>
7 | </svg>


--------------------------------------------------------------------------------
/frontend/src/utils/FileAPI.ts:
--------------------------------------------------------------------------------
  1 | import { Method } from 'axios';
  2 | import { url } from './Utils';
  3 | import { UserCredentials, ExtractParams, UploadParams } from '../types';
  4 | import { apiCall } from '../services/CommonAPI';
  5 | 
  6 | // Upload Call
  7 | export const uploadAPI = async (
  8 |   file: Blob,
  9 |   userCredentials: UserCredentials,
 10 |   model: string,
 11 |   chunkNumber: number,
 12 |   totalChunks: number,
 13 |   originalname: string
 14 | ): Promise<any> => {
 15 |   const urlUpload = `${url()}/upload`;
 16 |   const method: Method = 'post';
 17 |   const commonParams: UserCredentials = userCredentials;
 18 |   const additionalParams: UploadParams = { file, model, chunkNumber, totalChunks, originalname };
 19 |   const response = await apiCall(urlUpload, method, commonParams, additionalParams);
 20 |   return response;
 21 | };
 22 | 
 23 | // Extract call
 24 | export const extractAPI = async (
 25 |   model: string,
 26 |   userCredentials: UserCredentials,
 27 |   source_type: string,
 28 |   source_url?: string,
 29 |   aws_access_key_id?: string | null,
 30 |   aws_secret_access_key?: string | null,
 31 |   file_name?: string,
 32 |   gcs_bucket_name?: string,
 33 |   gcs_bucket_folder?: string,
 34 |   allowedNodes?: string[],
 35 |   allowedRelationship?: string[],
 36 |   gcs_project_id?: string,
 37 |   language?: string,
 38 |   access_token?: string
 39 | ): Promise<any> => {
 40 |   const urlExtract = `${url()}/extract`;
 41 |   const method: Method = 'post';
 42 |   const commonParams: UserCredentials = userCredentials;
 43 |   let additionalParams: ExtractParams;
 44 |   if (source_type === 's3 bucket') {
 45 |     additionalParams = {
 46 |       model,
 47 |       source_url,
 48 |       aws_secret_access_key,
 49 |       aws_access_key_id,
 50 |       source_type,
 51 |       file_name,
 52 |       allowedNodes,
 53 |       allowedRelationship,
 54 |     };
 55 |   } else if (source_type === 'Wikipedia') {
 56 |     additionalParams = {
 57 |       model,
 58 |       wiki_query: file_name,
 59 |       source_type,
 60 |       file_name,
 61 |       allowedNodes,
 62 |       allowedRelationship,
 63 |       language,
 64 |     };
 65 |   } else if (source_type === 'gcs bucket') {
 66 |     additionalParams = {
 67 |       model,
 68 |       gcs_blob_filename: file_name,
 69 |       gcs_bucket_folder,
 70 |       gcs_bucket_name,
 71 |       source_type,
 72 |       file_name,
 73 |       allowedNodes,
 74 |       allowedRelationship,
 75 |       gcs_project_id,
 76 |       access_token,
 77 |     };
 78 |   } else if (source_type === 'youtube') {
 79 |     additionalParams = {
 80 |       model,
 81 |       source_url,
 82 |       source_type,
 83 |       file_name,
 84 |       allowedNodes,
 85 |       allowedRelationship,
 86 |     };
 87 |   } else if (source_type === 'web-url') {
 88 |     additionalParams = {
 89 |       model,
 90 |       source_url,
 91 |       source_type,
 92 |       file_name,
 93 |       allowedNodes,
 94 |       allowedRelationship,
 95 |     };
 96 |   } else {
 97 |     additionalParams = {
 98 |       model,
 99 |       source_type,
100 |       file_name,
101 |       allowedNodes,
102 |       allowedRelationship,
103 |     };
104 |   }
105 |   const response = await apiCall(urlExtract, method, commonParams, additionalParams);
106 |   return response;
107 | };
108 | 


--------------------------------------------------------------------------------
/backend/src/document_sources/youtube.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.document_loaders import YoutubeLoader
 2 | from pytube import YouTube
 3 | from youtube_transcript_api import YouTubeTranscriptApi 
 4 | import logging
 5 | from urllib.parse import urlparse,parse_qs
 6 | from difflib import SequenceMatcher
 7 | from datetime import timedelta
 8 | 
 9 | def get_youtube_transcript(youtube_id):
10 |   try:
11 |     transcript = YouTubeTranscriptApi.get_transcript(youtube_id)
12 |     return transcript
13 |   except Exception as e:
14 |     message = f"Youtube transcript is not available for youtube Id: {youtube_id}"
15 |     raise Exception(message)
16 |   
17 | def get_youtube_combined_transcript(youtube_id):
18 |   try:
19 |     transcript_dict = get_youtube_transcript(youtube_id)
20 |     transcript=''
21 |     for td in transcript_dict:
22 |       transcript += ''.join(td['text'])
23 |     return transcript
24 |   except Exception as e:
25 |     message = f"Youtube transcript is not available for youtube Id: {youtube_id}"
26 |     raise Exception(message)
27 | 
28 | 
29 | def create_youtube_url(url):
30 |     you_tu_url = "https://www.youtube.com/watch?v="
31 |     u_pars = urlparse(url)
32 |     quer_v = parse_qs(u_pars.query).get('v')
33 |     if quer_v:
34 |       return  you_tu_url + quer_v[0].strip()
35 | 
36 |     pth = u_pars.path.split('/')
37 |     if pth:
38 |       return you_tu_url + pth[-1].strip()
39 | 
40 |   
41 | def get_documents_from_youtube(url):
42 |     try:
43 |       youtube_loader = YoutubeLoader.from_youtube_url(url, 
44 |                                                       language=["en-US", "en-gb", "en-ca", "en-au","zh-CN", "zh-Hans", "zh-TW", "fr-FR","de-DE","it-IT","ja-JP","pt-BR","ru-RU","es-ES"],
45 |                                                       translation = "en",
46 |                                                       add_video_info=True)
47 |       pages = youtube_loader.load()
48 |       file_name = YouTube(url).title
49 |       return file_name, pages
50 |     except Exception as e:
51 |       error_message = str(e)
52 |       logging.exception(f'Exception in reading transcript from youtube:{error_message}')
53 |       raise Exception(error_message)  
54 | 
55 | def get_chunks_with_timestamps(chunks, youtube_id):
56 |   max_start_similarity=0
57 |   max_end_similarity=0
58 |   transcript = get_youtube_transcript(youtube_id)
59 |   for chunk in chunks:
60 |     start_content = chunk.page_content[:40]
61 |     end_content = chunk.page_content[-40:]
62 |     
63 |     for segment in transcript:
64 |         start_similarity = SequenceMatcher(None, start_content, segment['text'])
65 |         end_similarity = SequenceMatcher(None, end_content, segment['text'])
66 |         
67 |         if start_similarity.ratio() > max_start_similarity:
68 |             max_start_similarity = start_similarity.ratio()
69 |             start_time = segment['start']
70 |             
71 |         if end_similarity.ratio() > max_end_similarity:
72 |             max_end_similarity = end_similarity.ratio()
73 |             end_time = segment['start']+segment['duration'] 
74 |                    
75 |     chunk.metadata['start_time'] = str(timedelta(seconds = start_time)).split('.')[0]
76 |     chunk.metadata['end_time'] = str(timedelta(seconds = end_time)).split('.')[0]
77 |     max_start_similarity=0
78 |     max_end_similarity=0
79 |   return chunks


--------------------------------------------------------------------------------
/backend/src/document_sources/s3_bucket.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.document_loaders import S3DirectoryLoader
 2 | import logging
 3 | import boto3
 4 | import os
 5 | from urllib.parse import urlparse
 6 | 
 7 | def get_s3_files_info(s3_url,aws_access_key_id=None,aws_secret_access_key=None):
 8 |   try:
 9 |       # Extract bucket name and directory from the S3 URL
10 |       parsed_url = urlparse(s3_url)
11 |       bucket_name = parsed_url.netloc
12 |       directory = parsed_url.path.lstrip('/')
13 |       try:
14 |         # Connect to S3
15 |         s3 = boto3.client('s3',aws_access_key_id=aws_access_key_id,aws_secret_access_key=aws_secret_access_key)
16 | 
17 |         # List objects in the specified directory
18 |         response = s3.list_objects_v2(Bucket=bucket_name, Prefix=directory)
19 |       except Exception as e:
20 |          raise Exception("Invalid AWS credentials")
21 |       
22 |       files_info = []
23 | 
24 |       # Check each object for file size and type
25 |       for obj in response.get('Contents', []):
26 |           file_key = obj['Key']
27 |           file_name = os.path.basename(file_key)
28 |           logging.info(f'file_name : {file_name}  and file key : {file_key}')
29 |           file_size = obj['Size']
30 | 
31 |           # Check if file is a PDF
32 |           if file_name.endswith('.pdf'):
33 |             files_info.append({'file_key': file_key, 'file_size_bytes': file_size})
34 |             
35 |       return files_info
36 |   except Exception as e:
37 |     error_message = str(e)
38 |     logging.error(f"Error while reading files from s3: {error_message}")
39 |     raise Exception(error_message)
40 | 
41 | 
42 | def get_s3_pdf_content(s3_url,aws_access_key_id=None,aws_secret_access_key=None):
43 |     try:
44 |       # Extract bucket name and directory from the S3 URL
45 |         parsed_url = urlparse(s3_url)
46 |         bucket_name = parsed_url.netloc
47 |         logging.info(f'bucket name : {bucket_name}')
48 |         directory = parsed_url.path.lstrip('/')
49 |         if directory.endswith('.pdf'):
50 |           loader=S3DirectoryLoader(bucket_name, prefix=directory,aws_access_key_id=aws_access_key_id,aws_secret_access_key=aws_secret_access_key)
51 |           pages = loader.load_and_split()
52 |           return pages
53 |         else:
54 |           return None
55 |     
56 |     except Exception as e:
57 |         logging.error(f"getting error while reading content from s3 files:{e}")
58 |         raise Exception(e)
59 | 
60 | 
61 | def get_documents_from_s3(s3_url, aws_access_key_id, aws_secret_access_key):
62 |     try:
63 |       parsed_url = urlparse(s3_url)
64 |       bucket = parsed_url.netloc
65 |       file_key = parsed_url.path.lstrip('/')
66 |       file_name=file_key.split('/')[-1]
67 |       s3=boto3.client('s3',aws_access_key_id=aws_access_key_id,aws_secret_access_key=aws_secret_access_key)
68 |       response=s3.head_object(Bucket=bucket,Key=file_key)
69 |       file_size=response['ContentLength']
70 |       
71 |       logging.info(f'bucket : {bucket},file_name:{file_name},  file key : {file_key},  file size : {file_size}')
72 |       pages=get_s3_pdf_content(s3_url,aws_access_key_id=aws_access_key_id,aws_secret_access_key=aws_secret_access_key)
73 |       return file_name,pages
74 |     except Exception as e:
75 |       error_message = str(e)
76 |       logging.exception(f'Exception in reading content from S3:{error_message}')
77 |       raise Exception(error_message)    


--------------------------------------------------------------------------------
/frontend/src/assets/images/web-darkmode.svg:
--------------------------------------------------------------------------------
1 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
2 | <!-- Uploaded to: SVG Repo, www.svgrepo.com, Transformed by: SVG Repo Mixer Tools -->
3 | <svg width="800px" height="800px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg" stroke="#ffffff">
4 | <g id="SVGRepo_bgCarrier" stroke-width="0"/>
5 | <g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"/>
6 | <g id="SVGRepo_iconCarrier"> <g id="style=stroke" clip-path="url(#clip0_1_1828)"> <g id="web"> <path id="vector (Stroke)" fill-rule="evenodd" clip-rule="evenodd" d="M10.4425 2.44429C10.0752 3.64002 9.32073 6.25569 8.89915 8.83258C9.99331 9.00921 11.0621 9.12209 12 9.12209C12.9379 9.12209 14.0067 9.00921 15.1009 8.83258C14.6793 6.25569 13.9248 3.64002 13.5575 2.44429C13.0509 2.3624 12.5307 2.31977 12 2.31977C11.4693 2.31977 10.9491 2.3624 10.4425 2.44429ZM15.3337 2.90893C15.737 4.305 16.2958 6.42828 16.6448 8.54737C18.1513 8.23703 19.5727 7.85824 20.605 7.56109C19.4986 5.42102 17.6172 3.74662 15.3337 2.90893ZM21.2129 9.01933C20.1222 9.33683 18.5423 9.76328 16.8594 10.1057C16.9295 10.7564 16.9709 11.3958 16.9709 12C16.9709 12.8816 16.8827 13.8411 16.7445 14.8058C18.759 14.3858 20.6068 13.849 21.5557 13.5575C21.6376 13.0509 21.6802 12.5307 21.6802 12C21.6802 10.959 21.5162 9.95751 21.2129 9.01933ZM21.0911 15.3337C19.9166 15.6729 18.229 16.1219 16.4634 16.4634C16.1219 18.229 15.6729 19.9166 15.3337 21.0911C17.9978 20.1138 20.1138 17.9978 21.0911 15.3337ZM13.5576 21.5557C13.849 20.6068 14.3858 18.759 14.8058 16.7445C13.8411 16.8827 12.8816 16.9709 12 16.9709C11.1184 16.9709 10.1589 16.8827 9.19423 16.7445C9.61421 18.759 10.151 20.6068 10.4425 21.5557C10.9491 21.6376 11.4693 21.6802 12 21.6802C12.5307 21.6802 13.0509 21.6376 13.5576 21.5557ZM8.66629 21.0911C8.32707 19.9166 7.8781 18.229 7.53658 16.4634C5.77099 16.1219 4.08335 15.6729 2.90891 15.3337C3.88622 17.9978 6.00216 20.1138 8.66629 21.0911ZM2.44429 13.5575C3.39316 13.849 5.24101 14.3858 7.25548 14.8058C7.1173 13.8411 7.02907 12.8816 7.02907 12C7.02907 11.3958 7.07048 10.7564 7.14056 10.1057C5.45769 9.76328 3.87779 9.33683 2.78712 9.01933C2.48383 9.95751 2.31977 10.959 2.31977 12C2.31977 12.5307 2.3624 13.0509 2.44429 13.5575ZM3.39504 7.56109C4.42731 7.85824 5.84865 8.23703 7.35522 8.54737C7.70416 6.42827 8.26303 4.305 8.66626 2.90893C6.38282 3.74662 4.50139 5.42102 3.39504 7.56109ZM8.68924 10.3888C8.63137 10.9544 8.59884 11.4968 8.59884 12C8.59884 12.9399 8.71224 14.012 8.88985 15.1102C9.98798 15.2878 11.0601 15.4012 12 15.4012C12.9399 15.4012 14.012 15.2878 15.1102 15.1102C15.2878 14.012 15.4012 12.9399 15.4012 12C15.4012 11.4968 15.3686 10.9544 15.3108 10.3888C14.1776 10.5703 13.0348 10.6919 12 10.6919C10.9652 10.6919 9.82236 10.5703 8.68924 10.3888ZM9.67273 0.991173C10.4243 0.833026 11.2029 0.75 12 0.75C12.7971 0.75 13.5757 0.833026 14.3273 0.991174C18.0108 1.76627 21.0281 4.34097 22.42 7.75174C22.9554 9.06356 23.25 10.4983 23.25 12C23.25 12.7971 23.167 13.5757 23.0088 14.3273C22.0943 18.6736 18.6736 22.0943 14.3273 23.0088C13.5757 23.167 12.7971 23.25 12 23.25C11.2029 23.25 10.4243 23.167 9.67273 23.0088C5.32644 22.0943 1.90572 18.6736 0.991173 14.3273C0.833026 13.5757 0.75 12.7971 0.75 12C0.75 10.4972 1.04509 9.06132 1.58123 7.74866C2.97369 4.33943 5.99026 1.76604 9.67273 0.991173Z" fill="#000000"/> </g> </g> <defs> <clipPath id="clip0_1_1828"> <rect width="24" height="24" fill="white"/> </clipPath> </defs> </g>
7 | </svg>


--------------------------------------------------------------------------------
/frontend/src/logo-color.svg:
--------------------------------------------------------------------------------
 1 | <svg width="308" height="102" viewBox="0 0 308 102" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <path d="M57.3779 21.5162C40.5967 21.5162 29.3328 31.411 29.3328 50.5135V64.0909C30.9412 63.2854 32.7812 62.8247 34.7344 62.8247C36.6877 62.8247 38.5277 63.2854 40.2519 64.0909V50.3977C40.2519 37.9707 47.0328 31.6426 57.3779 31.6426C67.7231 31.6426 74.504 37.9707 74.504 50.3977V80.2005H85.423V50.3977C85.5388 31.182 74.1592 21.5162 57.3779 21.5162Z" fill="#1A1B1D"/>
 3 | <path d="M91.4011 51.5481C91.4011 34.1724 104.044 21.4006 121.976 21.4006C139.908 21.4006 152.322 34.1724 152.322 51.5481V55.5755H103.01C104.618 65.8152 112.205 71.4535 122.089 71.4535C129.447 71.4535 134.619 69.1529 137.952 64.2042H150.021C145.654 74.9045 135.309 81.6931 122.089 81.6931C104.158 81.6931 91.3985 68.9213 91.3985 51.5456L91.4011 51.5481ZM140.942 45.794C138.757 36.5889 131.287 31.6402 122.092 31.6402C112.897 31.6402 105.54 36.7021 103.357 45.794H140.945H140.942Z" fill="#1A1B1D"/>
 4 | <path d="M158.3 51.5481C158.3 34.1724 170.943 21.4006 188.875 21.4006C206.806 21.4006 219.449 34.1724 219.449 51.5481C219.449 68.9238 206.806 81.6957 188.875 81.6957C170.943 81.6957 158.3 68.9238 158.3 51.5481ZM208.53 51.5481C208.53 39.9266 200.715 31.6427 188.99 31.6427C177.266 31.6427 169.45 39.9266 169.45 51.5481C169.45 63.1697 177.266 71.4535 188.99 71.4535C200.715 71.4535 208.53 63.1697 208.53 51.5481Z" fill="#1A1B1D"/>
 5 | <path d="M281.978 91.5929H283.242C290.254 91.5929 292.897 88.4869 292.897 80.3164V23.3589H303.816V79.6267C303.816 93.7809 298.299 101.03 284.276 101.03H281.978V91.5959V91.5929Z" fill="#1A1B1D"/>
 6 | <path d="M276.577 85.7229H265.655V72.1458H237.955C232.437 72.1458 227.61 69.3845 225.425 65.0122C223.356 60.869 223.93 56.0362 226.92 52.0088L251.862 19.2157C255.426 14.3828 261.517 12.427 267.15 14.267C272.897 16.107 276.577 21.2847 276.577 27.3837V62.4774H284.85V72.259H276.577V85.7209V85.7229ZM236.002 57.992C235.657 58.4526 235.541 59.0265 235.541 59.6029C235.541 61.0981 236.804 62.3642 238.3 62.3642H265.655V27.0389C265.655 25.1989 264.276 24.5067 263.702 24.3934C263.473 24.2776 263.128 24.2776 262.667 24.2776C261.978 24.2776 261.172 24.5067 260.483 25.4279L236.002 57.992Z" fill="#1A1B1D"/>
 7 | <path d="M298.404 0C293.232 0 289.094 4.2564 289.094 9.4367C289.094 14.617 293.348 18.8709 298.404 18.8709C303.461 18.8709 307.715 14.6144 307.715 9.4367C307.715 4.259 303.461 0 298.404 0Z" fill="#014063"/>
 8 | <path d="M4.73608 54.312C3.35676 55.0017 2.09321 56.152 1.17192 57.6497C0.253212 59.1449 -0.0916182 60.7558 0.0216118 62.3668C0.137412 65.2439 1.7458 67.8893 4.38868 69.3845C6.80258 70.7638 9.56128 70.419 12.2042 69.8451C15.4235 69.0396 18.1822 68.6948 21.0542 70.4216H21.17C26.1135 73.2986 26.1135 80.548 21.17 83.4225H21.0542C18.1797 85.1489 15.421 84.8039 12.2042 83.9989C9.67448 83.3092 6.91578 82.9644 4.38868 84.4599C1.7458 85.9549 0.250642 88.7159 0.0216118 91.4769C-0.0941982 93.0879 0.250642 94.6989 1.17192 96.1939C2.09063 97.6899 3.24095 98.8399 4.73608 99.5319C7.26578 100.798 10.3693 100.798 13.0122 99.3029C15.4261 97.9239 16.5764 95.2759 17.2661 92.7429C18.1848 89.5219 19.2193 86.9889 22.2096 85.3779C25.0841 83.6515 27.8428 83.9989 31.0596 84.8039C33.5893 85.4939 36.348 85.8389 38.8751 84.3439C41.518 82.8486 43.0131 80.0873 43.2422 77.326V76.1757C43.1264 73.2986 41.518 70.6532 38.8751 69.158C36.4612 67.7761 33.7025 68.1235 31.0596 68.6974C27.8403 69.5029 25.0816 69.8477 22.2096 68.1209C19.3351 66.3942 18.1874 63.9777 17.2661 60.7558C16.5764 58.2236 15.4261 55.6939 13.0122 54.1962C10.3693 53.0459 7.26578 53.0459 4.73608 54.312Z" fill="#014063"/>
 9 | </svg>
10 | 


--------------------------------------------------------------------------------
/frontend/src/components/Layout/Header.tsx:
--------------------------------------------------------------------------------
 1 | import Neo4jLogoBW from '../../logo.svg';
 2 | import Neo4jLogoColor from '../../logo-color.svg';
 3 | import {
 4 |   MoonIconOutline,
 5 |   SunIconOutline,
 6 |   CodeBracketSquareIconOutline,
 7 |   InformationCircleIconOutline,
 8 | } from '@neo4j-ndl/react/icons';
 9 | import { Typography } from '@neo4j-ndl/react';
10 | import { useCallback, useEffect } from 'react';
11 | import IconButtonWithToolTip from '../UI/IconButtonToolTip';
12 | import { tooltips } from '../../utils/Constants';
13 | import { useFileContext } from '../../context/UsersFiles';
14 | 
15 | export default function Header({ themeMode, toggleTheme }: { themeMode: string; toggleTheme: () => void }) {
16 |   const handleURLClick = useCallback((url: string) => {
17 |     window.open(url, '_blank');
18 |   }, []);
19 | 
20 |   const { isSchema, setIsSchema } = useFileContext();
21 | 
22 |   useEffect(() => {
23 |     setIsSchema(isSchema);
24 |   }, [isSchema]);
25 | 
26 |   return (
27 |     <div
28 |       className='n-bg-palette-neutral-bg-weak p-1'
29 |       style={{ borderBottom: '2px solid rgb(var(--theme-palette-neutral-border-weak))' }}
30 |     >
31 |       <nav
32 |         className='flex items-center justify-between flex-row'
33 |         role='navigation'
34 |         data-testid='navigation'
35 |         id='navigation'
36 |         aria-label='main navigation'
37 |       >
38 |         <section className='flex w-1/3 shrink-0 grow-0 items-center grow min-w-[200px]'>
39 |           <Typography variant='h6' component='a' href='#app-bar-with-responsive-menu' sx={{}}>
40 |             <img
41 |               src={themeMode === 'dark' ? Neo4jLogoBW : Neo4jLogoColor}
42 |               className='h-8 min-h-8 min-w-8'
43 |               alt='Neo4j Logo'
44 |             />
45 |           </Typography>
46 |         </section>
47 |         <section className='items-center justify-end w-1/3 grow-0 flex'>
48 |           <div>
49 |             <div
50 |               className='inline-flex gap-x-1'
51 |               style={{ display: 'flex', flexGrow: 0, alignItems: 'center', gap: '4px' }}
52 |             >
53 |               <IconButtonWithToolTip
54 |                 text={tooltips.documentation}
55 |                 onClick={() => handleURLClick('https://neo4j.com/labs/genai-ecosystem/llm-graph-builder')}
56 |                 size='large'
57 |                 clean
58 |                 placement='left'
59 |                 label={tooltips.documentation}
60 |               >
61 |                 <InformationCircleIconOutline className='n-size-token-7' />
62 |               </IconButtonWithToolTip>
63 | 
64 |               <IconButtonWithToolTip
65 |                 label={tooltips.github}
66 |                 onClick={() => handleURLClick('https://github.com/neo4j-labs/llm-graph-builder/issues')}
67 |                 text={tooltips.github}
68 |                 size='large'
69 |                 clean
70 |               >
71 |                 <CodeBracketSquareIconOutline />
72 |               </IconButtonWithToolTip>
73 |               <IconButtonWithToolTip
74 |                 label={tooltips.theme}
75 |                 text={tooltips.theme}
76 |                 clean
77 |                 size='large'
78 |                 onClick={toggleTheme}
79 |                 placement='left'
80 |               >
81 |                 {themeMode === 'dark' ? (
82 |                   <span role='img' aria-label='sun'>
83 |                     <SunIconOutline />
84 |                   </span>
85 |                 ) : (
86 |                   <span role='img' aria-label='moon'>
87 |                     <MoonIconOutline />
88 |                   </span>
89 |                 )}
90 |               </IconButtonWithToolTip>
91 |             </div>
92 |           </div>
93 |         </section>
94 |       </nav>
95 |     </div>
96 |   );
97 | }
98 | 


--------------------------------------------------------------------------------
/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx:
--------------------------------------------------------------------------------
  1 | import { Button, Dialog, Typography } from '@neo4j-ndl/react';
  2 | import { CustomFile } from '../../../types';
  3 | import LargeFilesAlert from './LargeFilesAlert';
  4 | import { useEffect, useState } from 'react';
  5 | import { useFileContext } from '../../../context/UsersFiles';
  6 | 
  7 | export default function ConfirmationDialog({
  8 |   largeFiles,
  9 |   open,
 10 |   onClose,
 11 |   loading,
 12 |   extractHandler,
 13 | }: {
 14 |   largeFiles: CustomFile[];
 15 |   open: boolean;
 16 |   onClose: () => void;
 17 |   loading: boolean;
 18 |   extractHandler: (allowLargeFiles: boolean, selectedFilesFromAllfiles: CustomFile[]) => void;
 19 | }) {
 20 |   const { setSelectedRows, filesData, setRowSelection, selectedRows } = useFileContext();
 21 |   const [checked, setChecked] = useState<string[]>([...largeFiles.map((f) => f.id)]);
 22 |   const handleToggle = (ischecked: boolean, id: string) => {
 23 |     const newChecked = [...checked];
 24 |     if (ischecked) {
 25 |       const file = filesData.find((f) => f.id === id);
 26 |       newChecked.push(id);
 27 |       setSelectedRows((prev) => {
 28 |         const fileindex = prev.findIndex((f) => JSON.parse(f).id === id);
 29 |         if (fileindex == -1) {
 30 |           return [...prev, JSON.stringify(file)];
 31 |         }
 32 |         return prev;
 33 |       });
 34 |       setRowSelection((prev) => {
 35 |         const copiedobj = { ...prev };
 36 |         for (const key in copiedobj) {
 37 |           if (key == id) {
 38 |             copiedobj[key] = true;
 39 |           }
 40 |         }
 41 |         return copiedobj;
 42 |       });
 43 |     } else {
 44 |       const currentIndex = checked.findIndex((v) => v === id);
 45 |       newChecked.splice(currentIndex, 1);
 46 |       setRowSelection((prev) => {
 47 |         const copiedobj = { ...prev };
 48 |         for (const key in copiedobj) {
 49 |           if (key == id) {
 50 |             copiedobj[key] = false;
 51 |           }
 52 |         }
 53 |         return copiedobj;
 54 |       });
 55 |       setSelectedRows((prev) => {
 56 |         const filteredrows = prev.filter((f) => JSON.parse(f).id != id);
 57 |         return filteredrows;
 58 |       });
 59 |     }
 60 |     setChecked(newChecked);
 61 |   };
 62 |   useEffect(() => {
 63 |     if (!checked.length) {
 64 |       onClose();
 65 |     }
 66 |   }, [checked]);
 67 | 
 68 |   return (
 69 |     <Dialog
 70 |       size='medium'
 71 |       open={open}
 72 |       aria-labelledby='form-dialog-title'
 73 |       onClose={() => {
 74 |         setChecked([]);
 75 |         onClose();
 76 |         extractHandler(false, []);
 77 |       }}
 78 |     >
 79 |       <Dialog.Content className='n-flex n-flex-col n-gap-token-4'>
 80 |         {largeFiles.length === 0 && loading ? (
 81 |           <Typography variant='subheading-large'>Files are under processing</Typography>
 82 |         ) : (
 83 |           <LargeFilesAlert handleToggle={handleToggle} largeFiles={largeFiles} checked={checked}></LargeFilesAlert>
 84 |         )}
 85 |       </Dialog.Content>
 86 |       <Dialog.Actions className='!mt-3'>
 87 |         <Button
 88 |           onClick={() => {
 89 |             if (selectedRows.length) {
 90 |               extractHandler(true, []);
 91 |             } else {
 92 |               const tobeProcessFiles: CustomFile[] = [];
 93 |               checked.forEach((id: string) => {
 94 |                 const file = filesData.find((f) => f.id === id);
 95 |                 if (file) {
 96 |                   tobeProcessFiles.push(file);
 97 |                 }
 98 |               });
 99 |               extractHandler(true, tobeProcessFiles);
100 |             }
101 |             setChecked([]);
102 |             onClose();
103 |           }}
104 |           size='large'
105 |         >
106 |           Continue
107 |         </Button>
108 |       </Dialog.Actions>
109 |     </Dialog>
110 |   );
111 | }
112 | 


--------------------------------------------------------------------------------
/docs/project_docs.adoc:
--------------------------------------------------------------------------------
  1 | = LLM Knowledge Graph Builder
  2 | 
  3 | == Introduction
  4 | 
  5 | This document provides comprehensive documentation for the Neo4j llm-graph-builder Project, a Python web application built with the FastAPI framework. It covers various aspects of the project, including its features, architecture, usage, development, deployment, limitations and known issues.
  6 | 
  7 | 
  8 | == Features
  9 | 
 10 | * Upload unstructured data from multiple sources to generate structuted Neo4j knowledge graph.
 11 | 
 12 | * Extraction of nodes and relations from multiple LLMs(OpenAI GPT-3.5, OpenAI GPT-4, Gemini 1.0-Pro and Diffbot).
 13 | 
 14 | * View complete graph or only a particular element of graph(ex: Only chunks, only entities, document and entities, etc.) 
 15 | 
 16 | * Generate embedding of chunks created from unstructured content.
 17 | 
 18 | * Generate k-nearest neighbors graph for similar chunks.
 19 | 
 20 | * Chat with graph data using chat bot.
 21 | 
 22 | == Local Setup and Execution
 23 | 
 24 | Run Docker Compose to build and start all components:
 25 | ....
 26 | docker-compose up --build
 27 | ....
 28 | 
 29 | Alternatively, run specific directories separately:
 30 | 
 31 | ** For frontend 
 32 | ....
 33 | cd frontend
 34 | yarn
 35 | yarn run dev
 36 | ....
 37 | 
 38 | ** For backend
 39 | ....
 40 | cd backend
 41 | python -m venv envName
 42 | source envName/bin/activate 
 43 | pip install -r requirements.txt
 44 | uvicorn score:app --reload
 45 | ....
 46 | 
 47 | Set up environment variables 
 48 | ....
 49 | OPENAI_API_KEY = ""
 50 | DIFFBOT_API_KEY = ""
 51 | NEO4J_URI = ""
 52 | NEO4J_USERNAME = ""
 53 | NEO4J_PASSWORD = ""
 54 | NEO4J_DATABASE = ""
 55 | AWS_ACCESS_KEY_ID =  ""
 56 | AWS_SECRET_ACCESS_KEY = ""
 57 | EMBEDDING_MODEL = ""
 58 | IS_EMBEDDING = "TRUE"
 59 | KNN_MIN_SCORE = ""
 60 | LANGCHAIN_API_KEY = ""
 61 | LANGCHAIN_PROJECT = ""
 62 | LANGCHAIN_TRACING_V2 = ""
 63 | LANGCHAIN_ENDPOINT = ""
 64 | NUMBER_OF_CHUNKS_TO_COMBINE = ""
 65 | ....
 66 | 
 67 | == Architecture
 68 | image::project architecture.png[Archirecture diagram, 600, align='left']
 69 | 
 70 | == Development
 71 | 
 72 | ==== Backend
 73 | link:backend/backend_docs.adoc[backend_docs.adoc]
 74 | 
 75 | ==== Frontend
 76 | link:frontend/frontend_docs.adoc[frontend_docs.adoc]
 77 | 
 78 | == Deployment and Monitoring
 79 | * The application is deployed on Google Cloud Platform.
 80 | 
 81 |   To deploy frontend
 82 | ....
 83 | gcloud run deploy 
 84 | source location current directory > Frontend
 85 | region : 32 [us-central 1]
 86 | Allow unauthenticated request : Yes
 87 | ....
 88 | 
 89 |   To deploy backend
 90 | ....
 91 | gcloud run deploy --set-env-vars "OPENAI_API_KEY = " --set-env-vars "DIFFBOT_API_KEY = " --set-env-vars "NEO4J_URI = " --set-env-vars "NEO4J_PASSWORD = " --set-env-vars "NEO4J_USERNAME = "
 92 | source location current directory > Backend
 93 | region : 32 [us-central 1]
 94 | Allow unauthenticated request : Yes
 95 | .... 
 96 | 
 97 | * Langserve is used with FAST API to deploy Langchain runnables and chains as a REST API.
 98 | 
 99 | * Langsmith is used to monitor and evaluate the application
100 | 
101 | 
102 | Developement url 
103 | 
104 | Production url 
105 | 
106 | 
107 | 
108 | == Appendix
109 | 
110 | === Limitations
111 | 
112 | ** Only pdf file uploaded from device or uploaded from s3 bucket or gcs bucket can be processed.
113 | 
114 | ** GCS buckets present under 1051503595507@cloudbuild.gserviceaccount.com service account can only be accessed.
115 | 
116 | ** Only 1st page of Wikipedia content is processed to generate graphDocument.
117 | 
118 | 
119 | === Known issues 
120 | 
121 | ** InactiveRpcError error with Gemini 1.0 Pro -  grpc_status:13, grpc_message:"Internal error encountered."
122 | 
123 | ** ResourceExhausted error with Gemini 1.5 Pro - 429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-1.5-pro
124 | 
125 | ** Gemini response validation errors even after making safety_settings parameters to BLOCK_NONE. 
126 | 
127 | 


--------------------------------------------------------------------------------
/backend/src/post_processing.py:
--------------------------------------------------------------------------------
 1 | from neo4j import GraphDatabase
 2 | import logging
 3 | import time
 4 | from langchain_community.graphs import Neo4jGraph
 5 | import os
 6 | from src.shared.common_fn import load_embedding_model
 7 | 
 8 | DROP_INDEX_QUERY = "DROP INDEX entities IF EXISTS;"
 9 | LABELS_QUERY = "CALL db.labels()"
10 | FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX entities FOR (n{labels_str}) ON EACH [n.id, n.description];"
11 | FILTER_LABELS = ["Chunk","Document"]
12 | 
13 | def create_fulltext(uri, username, password, database):
14 |     start_time = time.time()
15 |     logging.info("Starting the process of creating a full-text index.")
16 | 
17 |     try:
18 |         driver = GraphDatabase.driver(uri, auth=(username, password), database=database)
19 |         driver.verify_connectivity()
20 |         logging.info("Database connectivity verified.")
21 |     except Exception as e:
22 |         logging.error(f"Failed to create a database driver or verify connectivity: {e}")
23 |         return
24 | 
25 |     try:
26 |         with driver.session() as session:
27 |             try:
28 |                 start_step = time.time()
29 |                 session.run(DROP_INDEX_QUERY)
30 |                 logging.info(f"Dropped existing index (if any) in {time.time() - start_step:.2f} seconds.")
31 |             except Exception as e:
32 |                 logging.error(f"Failed to drop index: {e}")
33 |                 return
34 |             try:
35 |                 start_step = time.time()
36 |                 result = session.run(LABELS_QUERY)
37 |                 labels = [record["label"] for record in result]
38 |                 
39 |                 for label in FILTER_LABELS:
40 |                     if label in labels:
41 |                         labels.remove(label)
42 |                 
43 |                 labels_str = ":" + "|".join([f"`{label}`" for label in labels])
44 |                 logging.info(f"Fetched labels in {time.time() - start_step:.2f} seconds.")
45 |             except Exception as e:
46 |                 logging.error(f"Failed to fetch labels: {e}")
47 |                 return
48 |             try:
49 |                 start_step = time.time()
50 |                 session.run(FULL_TEXT_QUERY.format(labels_str=labels_str))
51 |                 logging.info(f"Created full-text index in {time.time() - start_step:.2f} seconds.")
52 |             except Exception as e:
53 |                 logging.error(f"Failed to create full-text index: {e}")
54 |                 return
55 |     except Exception as e:
56 |         logging.error(f"An error occurred during the session: {e}")
57 |     finally:
58 |         driver.close()
59 |         logging.info("Driver closed.")
60 |         logging.info(f"Process completed in {time.time() - start_time:.2f} seconds.")
61 | 
62 |         
63 | def create_entity_embedding(graph:Neo4jGraph):
64 |     rows = fetch_entities_for_embedding(graph)
65 |     for i in range(0, len(rows), 1000):
66 |         update_embeddings(rows[i:i+1000],graph)
67 |             
68 | def fetch_entities_for_embedding(graph):
69 |     query = """
70 |                 MATCH (e)
71 |                 WHERE NOT (e:Chunk OR e:Document) AND e.embedding IS NULL AND e.id IS NOT NULL
72 |                 RETURN elementId(e) AS elementId, e.id + " " + coalesce(e.description, "") AS text
73 |                 """
74 |     result = graph.query(query)           
75 |     return [{"elementId": record["elementId"], "text": record["text"]} for record in result]
76 | 
77 | def update_embeddings(rows, graph):
78 |     embedding_model = os.getenv('EMBEDDING_MODEL')
79 |     embeddings, dimension = load_embedding_model(embedding_model)
80 |     logging.info(f"update embedding for entities")
81 |     for row in rows:
82 |         row['embedding'] = embeddings.embed_query(row['text'])                        
83 |     query = """
84 |       UNWIND $rows AS row
85 |       MATCH (e) WHERE elementId(e) = row.elementId
86 |       CALL db.create.setNodeVectorProperty(e, "embedding", row.embedding)
87 |       """  
88 |     return graph.query(query,params={'rows':rows})          


--------------------------------------------------------------------------------
/backend/src/document_sources/local_file.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import shutil
 3 | from pathlib import Path
 4 | from tempfile import NamedTemporaryFile
 5 | # from langchain_community.document_loaders import PyPDFLoader
 6 | from langchain_community.document_loaders import PyMuPDFLoader
 7 | from langchain_community.document_loaders import UnstructuredFileLoader
 8 | from langchain_core.documents import Document
 9 | 
10 | # def get_documents_from_file_by_bytes(file):
11 | #     file_name = file.filename
12 | #     logging.info(f"get_documents_from_file called for filename = {file_name}")
13 | #     suffix = Path(file.filename).suffix
14 | #     with NamedTemporaryFile(delete=True, suffix=suffix) as tmp:
15 | #         shutil.copyfileobj(file.file, tmp)
16 | #         tmp_path = Path(tmp.name)
17 | #         loader = PyPDFLoader(str(tmp_path))
18 | #         pages = loader.load_and_split()
19 | #     return file_name, pages
20 | 
21 | def load_document_content(file_path):
22 |     if Path(file_path).suffix.lower() == '.pdf':
23 |         print("in if")
24 |         return PyMuPDFLoader(file_path)
25 |     else:
26 |         print("in else")
27 |         return UnstructuredFileLoader(file_path, encoding="utf-8", mode="elements")
28 |     
29 | def get_documents_from_file_by_path(file_path,file_name):
30 |     file_path = Path(file_path)
31 |     if file_path.exists():
32 |         logging.info(f'file {file_name} processing')
33 |         # loader = PyPDFLoader(str(file_path))
34 |         file_extension = file_path.suffix.lower()
35 |         try:
36 |             loader = load_document_content(file_path)
37 |             if file_extension == ".pdf":
38 |                 pages = loader.load()
39 |             else:
40 |                 unstructured_pages = loader.load()   
41 |                 pages= get_pages_with_page_numbers(unstructured_pages)      
42 |         except Exception as e:
43 |             raise Exception('Error while reading the file content or metadata')
44 |     else:
45 |         logging.info(f'File {file_name} does not exist')
46 |         raise Exception(f'File {file_name} does not exist')
47 |     return file_name, pages , file_extension
48 | 
49 | def get_pages_with_page_numbers(unstructured_pages):
50 |     pages = []
51 |     page_number = 1
52 |     page_content=''
53 |     metadata = {}
54 |     for page in unstructured_pages:
55 |         if  'page_number' in page.metadata:
56 |             if page.metadata['page_number']==page_number:
57 |                 page_content += page.page_content
58 |                 metadata = {'source':page.metadata['source'],'page_number':page_number, 'filename':page.metadata['filename'],
59 |                         'filetype':page.metadata['filetype'], 'total_pages':unstructured_pages[-1].metadata['page_number']}
60 |                 
61 |             if page.metadata['page_number']>page_number:
62 |                 page_number+=1
63 |                 if not metadata:
64 |                     metadata = {'total_pages':unstructured_pages[-1].metadata['page_number']}
65 |                 pages.append(Document(page_content = page_content, metadata=metadata))
66 |                 page_content='' 
67 |                 
68 |             if page == unstructured_pages[-1]:
69 |                 if not metadata:
70 |                     metadata = {'total_pages':unstructured_pages[-1].metadata['page_number']}
71 |                 pages.append(Document(page_content = page_content, metadata=metadata))
72 |                     
73 |         elif page.metadata['category']=='PageBreak' and page!=unstructured_pages[0]:
74 |             page_number+=1
75 |             pages.append(Document(page_content = page_content, metadata=metadata))
76 |             page_content=''
77 |             metadata={}
78 |         
79 |         else:
80 |             page_content += page.page_content
81 |             metadata_with_custom_page_number = {'source':page.metadata['source'],
82 |                             'page_number':1, 'filename':page.metadata['filename'],
83 |                             'filetype':page.metadata['filetype'], 'total_pages':1}
84 |             if page == unstructured_pages[-1]:
85 |                     pages.append(Document(page_content = page_content, metadata=metadata_with_custom_page_number))
86 |     return pages                


--------------------------------------------------------------------------------
/frontend/src/components/WebSources/GenericSourceModal.tsx:
--------------------------------------------------------------------------------
 1 | import { Box, Dialog, Tabs, Typography } from '@neo4j-ndl/react';
 2 | import youtubelightmodelogo from '../../assets/images/youtube-lightmode.svg';
 3 | import youtubedarkmodelogo from '../../assets/images/youtube-darkmode.svg';
 4 | import wikipedialogo from '../../assets/images/wikipedia.svg';
 5 | import weblogo from '../../assets/images/web.svg';
 6 | import webdarkmode from '../../assets/images/web-darkmode.svg';
 7 | import wikipediadarkmode from '../../assets/images/wikipedia-darkmode.svg';
 8 | import { useContext, useState } from 'react';
 9 | import WikipediaInput from './WikiPedia/WikipediaInput';
10 | import WebInput from './Web/WebInput';
11 | import YoutubeInput from './Youtube/YoutubeInput';
12 | import { APP_SOURCES } from '../../utils/Constants';
13 | import Neo4jDataImportFromCloud from '../../assets/images/data-from-cloud.svg';
14 | import { ThemeWrapperContext } from '../../context/ThemeWrapper';
15 | 
16 | export default function GenericModal({
17 |   open,
18 |   closeHandler,
19 |   isOnlyYoutube,
20 |   isOnlyWikipedia,
21 |   isOnlyWeb,
22 | }: {
23 |   open: boolean;
24 |   closeHandler: () => void;
25 |   isOnlyYoutube?: boolean;
26 |   isOnlyWikipedia?: boolean;
27 |   isOnlyWeb?: boolean;
28 | }) {
29 |   const themeUtils = useContext(ThemeWrapperContext);
30 |   const [activeTab, setactiveTab] = useState<number>(isOnlyYoutube ? 0 : isOnlyWikipedia ? 1 : isOnlyWeb ? 2 : 0);
31 |   const [isLoading, setIsLoading] = useState<boolean>(false);
32 | 
33 |   return (
34 |     <Dialog open={open} onClose={closeHandler}>
35 |       <Dialog.Header>
36 |         <Box className='flex flex-row pb-6 items-center mb-2'>
37 |           <img src={Neo4jDataImportFromCloud} style={{ width: 95, height: 95, marginRight: 10 }} loading='lazy' />
38 |           <Box className='flex flex-col'>
39 |             <Typography variant='h2'>Web Sources</Typography>
40 |             <Typography variant='body-medium' className='mb-2'>
41 |               Convert Any Web Source to Knowledge graph
42 |             </Typography>
43 |           </Box>
44 |         </Box>
45 |         <Tabs fill='underline' onChange={setactiveTab} size='large' value={activeTab}>
46 |           {APP_SOURCES != undefined && APP_SOURCES.includes('youtube') && (
47 |             <Tabs.Tab tabId={0} aria-label='Database' disabled={isLoading}>
48 |               <img
49 |                 src={themeUtils.colorMode === 'light' ? youtubelightmodelogo : youtubedarkmodelogo}
50 |                 className={`brandimg`}
51 |               ></img>
52 |             </Tabs.Tab>
53 |           )}
54 |           {APP_SOURCES != undefined && APP_SOURCES.includes('wiki') && (
55 |             <Tabs.Tab tabId={1} aria-label='Add database' disabled={isLoading}>
56 |               <img
57 |                 src={themeUtils.colorMode === 'dark' ? wikipedialogo : wikipediadarkmode}
58 |                 className={`brandimg`}
59 |               ></img>
60 |             </Tabs.Tab>
61 |           )}
62 |           {APP_SOURCES != undefined && APP_SOURCES.includes('web') && (
63 |             <Tabs.Tab tabId={2} aria-label='Inbox' disabled={isLoading}>
64 |               <img src={themeUtils.colorMode === 'dark' ? webdarkmode : weblogo} className={`brandimg`}></img>
65 |             </Tabs.Tab>
66 |           )}
67 |         </Tabs>
68 |         {APP_SOURCES != undefined && APP_SOURCES.includes('youtube') && (
69 |           <Tabs.TabPanel className='n-flex n-flex-col n-gap-token-4 n-p-token-6' value={activeTab} tabId={0}>
70 |             <YoutubeInput setIsLoading={setIsLoading} />
71 |           </Tabs.TabPanel>
72 |         )}
73 |         {APP_SOURCES != undefined && APP_SOURCES.includes('wiki') && (
74 |           <Tabs.TabPanel className='n-flex n-flex-col n-gap-token-4 n-p-token-6' value={activeTab} tabId={1}>
75 |             <WikipediaInput setIsLoading={setIsLoading} />
76 |           </Tabs.TabPanel>
77 |         )}
78 |         {APP_SOURCES != undefined && APP_SOURCES.includes('web') && (
79 |           <Tabs.TabPanel className='n-flex n-flex-col n-gap-token-4 n-p-token-6' value={activeTab} tabId={2}>
80 |             <WebInput setIsLoading={setIsLoading} />
81 |           </Tabs.TabPanel>
82 |         )}
83 |       </Dialog.Header>
84 |     </Dialog>
85 |   );
86 | }
87 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | /backend/graph
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 
150 | # pytype static type analyzer
151 | .pytype/
152 | 
153 | # Cython debug symbols
154 | cython_debug/
155 | 
156 | # PyCharm
157 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
160 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 | .vscode/launch.json
163 | temp.pdf
164 | google-cloud-sdk
165 | google-cloud-cli-469.0.0-linux-x86_64.tar.gz
166 | /data/llm-experiments-387609-c73d512ca3b1.json
167 | /backend/src/merged_files
168 | /backend/src/chunks
169 | /backend/merged_files
170 | /backend/chunks
171 | google-cloud-cli-479.0.0-linux-x86_64.tar.gz
172 | 


--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiohttp==3.9.3
  2 | aiosignal==1.3.1
  3 | annotated-types==0.6.0
  4 | antlr4-python3-runtime==4.9.3
  5 | anyio==4.3.0
  6 | async-timeout==4.0.3
  7 | asyncio==3.4.3
  8 | attrs==23.2.0
  9 | backoff==2.2.1
 10 | beautifulsoup4==4.12.3
 11 | boto3==1.34.140
 12 | botocore==1.34.140
 13 | cachetools==5.3.3
 14 | certifi==2024.2.2
 15 | cffi==1.16.0
 16 | chardet==5.2.0
 17 | charset-normalizer==3.3.2
 18 | click==8.1.7
 19 | coloredlogs==15.0.1
 20 | contourpy==1.2.0
 21 | cryptography==42.0.2
 22 | cycler==0.12.1
 23 | dataclasses-json==0.6.4
 24 | dataclasses-json-speakeasy==0.5.11
 25 | Deprecated==1.2.14
 26 | distro==1.9.0
 27 | docstring_parser==0.16
 28 | effdet==0.4.1
 29 | emoji==2.10.1
 30 | exceptiongroup==1.2.0
 31 | fastapi==0.111.0
 32 | fastapi-health==0.4.0
 33 | filelock==3.13.1
 34 | filetype==1.2.0
 35 | flatbuffers==23.5.26
 36 | fonttools==4.49.0
 37 | frozenlist==1.4.1
 38 | fsspec==2024.2.0
 39 | google-api-core==2.18.0
 40 | google-auth==2.29.0
 41 | google_auth_oauthlib==1.2.0
 42 | google-cloud-aiplatform==1.58.0
 43 | google-cloud-bigquery==3.19.0
 44 | google-cloud-core==2.4.1
 45 | google-cloud-resource-manager==1.12.3
 46 | google-cloud-storage==2.17.0
 47 | google-crc32c==1.5.0
 48 | google-resumable-media==2.7.0
 49 | googleapis-common-protos==1.63.0
 50 | greenlet==3.0.3
 51 | grpc-google-iam-v1==0.13.0
 52 | grpcio==1.62.1
 53 | google-ai-generativelanguage==0.6.6
 54 | grpcio-status==1.62.1
 55 | h11==0.14.0
 56 | httpcore==1.0.4
 57 | httpx==0.27.0
 58 | huggingface-hub
 59 | humanfriendly==10.0
 60 | idna==3.6
 61 | importlib-resources==6.1.1
 62 | install==1.3.5
 63 | iopath==0.1.10
 64 | Jinja2==3.1.3
 65 | jmespath==1.0.1
 66 | joblib==1.3.2
 67 | jsonpatch==1.33
 68 | jsonpath-python==1.0.6
 69 | jsonpointer==2.4
 70 | json-repair==0.25.2
 71 | kiwisolver==1.4.5
 72 | langchain==0.2.6
 73 | langchain-aws==0.1.9
 74 | langchain-anthropic==0.1.19
 75 | langchain-fireworks==0.1.4
 76 | langchain-google-genai==1.0.7
 77 | langchain-community==0.2.6
 78 | langchain-core==0.2.10
 79 | langchain-experimental==0.0.62
 80 | langchain-google-vertexai==1.0.6
 81 | langchain-groq==0.1.6
 82 | langchain-openai==0.1.14
 83 | langchain-text-splitters==0.2.2
 84 | langdetect==1.0.9
 85 | langsmith==0.1.83
 86 | layoutparser==0.3.4
 87 | langserve==0.2.2
 88 | #langchain-cli==0.0.25
 89 | lxml==5.1.0
 90 | MarkupSafe==2.1.5
 91 | marshmallow==3.20.2
 92 | matplotlib==3.7.2
 93 | mpmath==1.3.0
 94 | multidict==6.0.5
 95 | mypy-extensions==1.0.0
 96 | neo4j-rust-ext
 97 | networkx==3.2.1
 98 | nltk==3.8.1
 99 | numpy==1.26.4
100 | omegaconf==2.3.0
101 | onnx==1.16.1
102 | onnxruntime==1.18.1
103 | openai==1.35.10
104 | opencv-python==4.8.0.76
105 | orjson==3.9.15
106 | packaging==23.2
107 | pandas==2.2.0
108 | pdf2image==1.17.0
109 | pdfminer.six==20221105
110 | pdfplumber==0.10.4
111 | pikepdf==8.11.0
112 | pillow==10.2.0
113 | pillow_heif==0.15.0
114 | portalocker==2.8.2
115 | proto-plus==1.23.0
116 | protobuf==4.23.4
117 | psutil==6.0.0
118 | pyasn1==0.6.0
119 | pyasn1_modules==0.4.0
120 | pycocotools==2.0.7
121 | pycparser==2.21
122 | pydantic==2.8.2
123 | pydantic_core==2.20.1
124 | pyparsing==3.0.9
125 | pypdf==4.0.1
126 | PyPDF2==3.0.1
127 | pypdfium2==4.27.0
128 | pytesseract==0.3.10
129 | python-dateutil==2.8.2
130 | python-dotenv==1.0.1
131 | python-iso639==2024.2.7
132 | python-magic==0.4.27
133 | python-multipart==0.0.9
134 | pytube==15.0.0
135 | pytz==2024.1
136 | PyYAML==6.0.1
137 | rapidfuzz==3.6.1
138 | regex==2023.12.25
139 | requests==2.32.3
140 | rsa==4.9
141 | s3transfer==0.10.1
142 | safetensors==0.4.1
143 | scipy==1.10.1
144 | shapely==2.0.3
145 | six==1.16.0
146 | sniffio==1.3.1
147 | soupsieve==2.5
148 | SQLAlchemy==2.0.28
149 | starlette==0.37.2
150 | sse-starlette==2.1.2
151 | starlette-session==0.4.3
152 | sympy==1.12
153 | tabulate==0.9.0
154 | tenacity==8.2.3
155 | tiktoken==0.7.0
156 | timm==0.9.12
157 | tokenizers==0.19
158 | tqdm==4.66.2
159 | transformers==4.42.3
160 | types-protobuf
161 | types-requests
162 | typing-inspect==0.9.0
163 | typing_extensions==4.9.0
164 | tzdata==2024.1
165 | unstructured==0.14.9
166 | unstructured-client==0.23.8
167 | unstructured-inference==0.7.36
168 | unstructured.pytesseract==0.3.12
169 | unstructured[all-docs]==0.14.9
170 | urllib3==2.2.2
171 | uvicorn==0.30.1
172 | gunicorn==22.0.0
173 | wikipedia==1.4.0
174 | wrapt==1.16.0
175 | yarl==1.9.4
176 | youtube-transcript-api==0.6.2
177 | zipp==3.17.0
178 | sentence-transformers==2.7.0
179 | google-cloud-logging==3.10.0
180 | PyMuPDF==1.24.5
181 | 


--------------------------------------------------------------------------------
/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx:
--------------------------------------------------------------------------------
  1 | import { Dialog, Tabs, Box, Typography, Flex } from '@neo4j-ndl/react';
  2 | import graphenhancement from '../../../assets/images/graph-enhancements.svg';
  3 | import { useEffect, useState } from 'react';
  4 | import DeletePopUpForOrphanNodes from './DeleteTabForOrphanNodes';
  5 | import deleteOrphanAPI from '../../../services/DeleteOrphanNodes';
  6 | import { UserCredentials } from '../../../types';
  7 | import { useCredentials } from '../../../context/UserCredentials';
  8 | import EntityExtractionSettings from '../Settings/EntityExtractionSetting';
  9 | import { AlertColor, AlertPropsColorOverrides } from '@mui/material';
 10 | import { OverridableStringUnion } from '@mui/types';
 11 | import { useFileContext } from '../../../context/UsersFiles';
 12 | 
 13 | export default function GraphEnhancementDialog({
 14 |   open,
 15 |   onClose,
 16 |   closeSettingModal
 17 | }: {
 18 |   open: boolean;
 19 |   onClose: () => void;
 20 |   showAlert: (
 21 |     alertmsg: string,
 22 |     alerttype: OverridableStringUnion<AlertColor, AlertPropsColorOverrides> | undefined
 23 |   ) => void;
 24 |   closeSettingModal:()=>void
 25 | }) {
 26 |   const [orphanDeleteAPIloading, setorphanDeleteAPIloading] = useState<boolean>(false);
 27 |   const { setShowTextFromSchemaDialog } = useFileContext();
 28 |   const { userCredentials } = useCredentials();
 29 | 
 30 |   const orphanNodesDeleteHandler = async (selectedEntities: string[]) => {
 31 |     try {
 32 |       setorphanDeleteAPIloading(true);
 33 |       const response = await deleteOrphanAPI(userCredentials as UserCredentials, selectedEntities);
 34 |       setorphanDeleteAPIloading(false);
 35 |       console.log(response);
 36 |     } catch (error) {
 37 |       console.log(error);
 38 |     }
 39 |   };
 40 |   useEffect(() => {
 41 |     closeSettingModal()
 42 |   }, [])
 43 |   
 44 |   const [activeTab, setactiveTab] = useState<number>(0);
 45 |   return (
 46 |     <Dialog
 47 |       modalProps={{
 48 |         id: 'graph-enhancement-popup',
 49 |         className: 'n-p-token-4 n-rounded-lg h-[90%]',
 50 |       }}
 51 |       open={open}
 52 |       size='unset'
 53 |       disableCloseButton={false}
 54 |       onClose={onClose}
 55 |     >
 56 |       <Dialog.Header className='flex justify-between self-end !mb-0 '>
 57 |         <Box className='n-bg-palette-neutral-bg-weak px-4'>
 58 |           <Box className='flex flex-row items-center mb-2'>
 59 |             <img src={graphenhancement} style={{ width: 250, height: 250, marginRight: 10 }} loading='lazy' />
 60 |             <Box className='flex flex-col'>
 61 |               <Typography variant='h2'>Graph Enhancements</Typography>
 62 |               <Typography variant='subheading-medium' className='mb-2'>
 63 |                 This set of tools will help you enhance the quality of your Knowledge Graph by removing possible
 64 |                 duplicated entities, disconnected nodes and set a Graph Schema for improving the quality of the entity
 65 |                 extraction process
 66 |               </Typography>
 67 |               <Flex className='pt-2'>
 68 |                 <Tabs fill='underline' onChange={setactiveTab} size='large' value={activeTab}>
 69 |                   <Tabs.Tab tabId={0} aria-label='Database'>
 70 |                     Entity Extraction Settings
 71 |                   </Tabs.Tab>
 72 |                   <Tabs.Tab tabId={1} aria-label='Add database'>
 73 |                     Disconnected Nodes
 74 |                   </Tabs.Tab>
 75 |                 </Tabs>
 76 |               </Flex>
 77 |             </Box>
 78 |           </Box>
 79 |         </Box>
 80 |       </Dialog.Header>
 81 |       <Dialog.Content className='flex flex-col n-gap-token- grow w-[90%] mx-auto'>
 82 |         <Tabs.TabPanel className='n-flex n-flex-col n-gap-token-4 n-p-token-6' value={activeTab} tabId={0}>
 83 |           <div className='w-[80%] mx-auto'>
 84 |             <EntityExtractionSettings
 85 |               view='Tabs'
 86 |               openTextSchema={() => {
 87 |                 setShowTextFromSchemaDialog({ triggeredFrom: 'enhancementtab', show: true });
 88 |               }}
 89 |               colseEnhanceGraphSchemaDialog={onClose}
 90 |               settingView='headerView'
 91 |             />
 92 |           </div>
 93 |         </Tabs.TabPanel>
 94 |         <Tabs.TabPanel className='n-flex n-flex-col n-gap-token-4 n-p-token-6' value={activeTab} tabId={1}>
 95 |           <DeletePopUpForOrphanNodes deleteHandler={orphanNodesDeleteHandler} loading={orphanDeleteAPIloading} />
 96 |         </Tabs.TabPanel>
 97 |       </Dialog.Content>
 98 |     </Dialog>
 99 |   );
100 | }


--------------------------------------------------------------------------------
/frontend/src/context/UsersFiles.tsx:
--------------------------------------------------------------------------------
  1 | import { createContext, useContext, useState, Dispatch, SetStateAction, FC, useEffect } from 'react';
  2 | import { CustomFile, FileContextProviderProps, OptionType } from '../types';
  3 | import { defaultLLM } from '../utils/Constants';
  4 | import { useCredentials } from './UserCredentials';
  5 | interface showTextFromSchemaDialogType {
  6 |   triggeredFrom: string;
  7 |   show: boolean;
  8 | }
  9 | interface FileContextType {
 10 |   files: (File | null)[] | [];
 11 |   filesData: CustomFile[] | [];
 12 |   setFiles: Dispatch<SetStateAction<(File | null)[]>>;
 13 |   setFilesData: Dispatch<SetStateAction<CustomFile[]>>;
 14 |   model: string;
 15 |   setModel: Dispatch<SetStateAction<string>>;
 16 |   graphType: string;
 17 |   setGraphType: Dispatch<SetStateAction<string>>;
 18 |   selectedNodes: readonly OptionType[];
 19 |   setSelectedNodes: Dispatch<SetStateAction<readonly OptionType[]>>;
 20 |   selectedRels: readonly OptionType[];
 21 |   setSelectedRels: Dispatch<SetStateAction<readonly OptionType[]>>;
 22 |   rowSelection: Record<string, boolean>;
 23 |   setRowSelection: React.Dispatch<React.SetStateAction<Record<string, boolean>>>;
 24 |   selectedRows: string[];
 25 |   setSelectedRows: React.Dispatch<React.SetStateAction<string[]>>;
 26 |   selectedSchemas: readonly OptionType[];
 27 |   setSelectedSchemas: Dispatch<SetStateAction<readonly OptionType[]>>;
 28 |   chatMode: string;
 29 |   setchatMode: Dispatch<SetStateAction<string>>;
 30 |   isSchema: boolean;
 31 |   setIsSchema: React.Dispatch<React.SetStateAction<boolean>>;
 32 |   showTextFromSchemaDialog: showTextFromSchemaDialogType;
 33 |   setShowTextFromSchemaDialog: React.Dispatch<React.SetStateAction<showTextFromSchemaDialogType>>;
 34 | }
 35 | const FileContext = createContext<FileContextType | undefined>(undefined);
 36 | 
 37 | const FileContextProvider: FC<FileContextProviderProps> = ({ children }) => {
 38 |   const selectedNodeLabelstr = localStorage.getItem('selectedNodeLabels');
 39 |   const selectedNodeRelsstr = localStorage.getItem('selectedRelationshipLabels');
 40 | 
 41 |   const [files, setFiles] = useState<(File | null)[] | []>([]);
 42 |   const [filesData, setFilesData] = useState<CustomFile[] | []>([]);
 43 |   const [model, setModel] = useState<string>(defaultLLM);
 44 |   const [graphType, setGraphType] = useState<string>('Knowledge Graph Entities');
 45 |   const [selectedNodes, setSelectedNodes] = useState<readonly OptionType[]>([]);
 46 |   const [selectedRels, setSelectedRels] = useState<readonly OptionType[]>([]);
 47 |   const [selectedSchemas, setSelectedSchemas] = useState<readonly OptionType[]>([]);
 48 |   const [rowSelection, setRowSelection] = useState<Record<string, boolean>>({});
 49 |   const [selectedRows, setSelectedRows] = useState<string[]>([]);
 50 |   const [chatMode, setchatMode] = useState<string>('graph+vector');
 51 |   const { userCredentials } = useCredentials();
 52 |   const [isSchema, setIsSchema] = useState<boolean>(false);
 53 |   const [showTextFromSchemaDialog, setShowTextFromSchemaDialog] = useState<showTextFromSchemaDialogType>({
 54 |     triggeredFrom: '',
 55 |     show: false,
 56 |   });
 57 | 
 58 |   useEffect(() => {
 59 |     if (selectedNodeLabelstr != null) {
 60 |       const selectedNodeLabel = JSON.parse(selectedNodeLabelstr);
 61 |       if (userCredentials?.uri === selectedNodeLabel.db) {
 62 |         setSelectedNodes(selectedNodeLabel.selectedOptions);
 63 |       }
 64 |     }
 65 |     if (selectedNodeRelsstr != null) {
 66 |       const selectedNodeRels = JSON.parse(selectedNodeRelsstr);
 67 |       if (userCredentials?.uri === selectedNodeRels.db) {
 68 |         setSelectedRels(selectedNodeRels.selectedOptions);
 69 |       }
 70 |     }
 71 |   }, [userCredentials]);
 72 | 
 73 |   const value: FileContextType = {
 74 |     files,
 75 |     filesData,
 76 |     setFiles,
 77 |     setFilesData,
 78 |     model,
 79 |     setModel,
 80 |     graphType,
 81 |     setGraphType,
 82 |     selectedRels,
 83 |     setSelectedRels,
 84 |     selectedNodes,
 85 |     setSelectedNodes,
 86 |     rowSelection,
 87 |     setRowSelection,
 88 |     selectedRows,
 89 |     setSelectedRows,
 90 |     selectedSchemas,
 91 |     setSelectedSchemas,
 92 |     chatMode,
 93 |     setchatMode,
 94 |     isSchema,
 95 |     setIsSchema,
 96 |     setShowTextFromSchemaDialog,
 97 |     showTextFromSchemaDialog,
 98 |   };
 99 |   return <FileContext.Provider value={value}>{children}</FileContext.Provider>;
100 | };
101 | const useFileContext = () => {
102 |   const context = useContext(FileContext);
103 |   if (!context) {
104 |     throw new Error('useFileContext must be used within a FileContextProvider');
105 |   }
106 |   return context;
107 | };
108 | export { FileContextProvider, useFileContext };
109 | 


--------------------------------------------------------------------------------
/frontend/src/assets/schemas.json:
--------------------------------------------------------------------------------
  1 | [{
  2 |   "labels": [
  3 |     "Answer",
  4 |     "Question",
  5 |     "Tag",
  6 |     "User"
  7 |   ],
  8 |   "relationshipTypes": [
  9 |     "ACCEPTED",
 10 |     "ANSWERED",
 11 |     "POSTED",
 12 |     "SIMILAR",
 13 |     "TAGGED"
 14 |   ],
 15 |   "schema": "stackoverflow"
 16 | },
 17 | {
 18 |     "labels": [
 19 |       "Actor",
 20 |       "Director",
 21 |       "Genre",
 22 |       "Movie",
 23 |       "Person",
 24 |       "User"
 25 |     ],
 26 |     "relationshipTypes": [
 27 |       "ACTED_IN",
 28 |       "DIRECTED",
 29 |       "IN_GENRE",
 30 |       "RATED"
 31 |     ],
 32 |     "schema": "movies"
 33 |   },
 34 |   {
 35 |     "labels": [
 36 |       "Application",
 37 |       "DataCenter",
 38 |       "Egress",
 39 |       "Interface",
 40 |       "Machine",
 41 |       "Network",
 42 |       "OS",
 43 |       "Port",
 44 |       "Process",
 45 |       "Rack",
 46 |       "Router",
 47 |       "Service",
 48 |       "Software",
 49 |       "Switch",
 50 |       "Type",
 51 |       "Version",
 52 |       "Zone"
 53 |     ],
 54 |     "relationshipTypes": [
 55 |       "CONNECTS",
 56 |       "CONTAINS",
 57 |       "DEPENDS_ON",
 58 |       "EXPOSES",
 59 |       "HOLDS",
 60 |       "INSTANCE",
 61 |       "LISTENS",
 62 |       "PREVIOUS",
 63 |       "ROUTES",
 64 |       "RUNS",
 65 |       "TYPE",
 66 |       "VERSION"
 67 |     ],
 68 |     "schema": "network"
 69 |   }, 
 70 |   {
 71 |     "labels": [
 72 |       "Category",
 73 |       "Customer",
 74 |       "Order",
 75 |       "Product",
 76 |       "Supplier"
 77 |     ],
 78 |     "relationshipTypes": [
 79 |       "ORDERS",
 80 |       "PART_OF",
 81 |       "PURCHASED",
 82 |       "SUPPLIES"
 83 |     ],
 84 |     "schema": "retail"
 85 |   },{
 86 |     "labels": [
 87 |       "Tag",
 88 |       "Link",
 89 |       "Post",
 90 |       "User"
 91 |     ],
 92 |     "relationshipTypes": [
 93 |       "CONTAINS",
 94 |       "FOLLOWS",
 95 |       "MENTIONS",
 96 |       "POSTS",
 97 |       "REPLY_TO",
 98 |       "REPOST",
 99 |       "TAGS"
100 |     ],
101 |     "schema": "social"
102 |   },
103 |   {
104 |     "labels": [
105 |       "Business",
106 |       "Category",
107 |       "City",
108 |       "Label",
109 |       "Photo",
110 |       "Review",
111 |       "User"
112 |     ],
113 |     "relationshipTypes": [
114 |       "FRIENDS",
115 |       "HAS_LABEL",
116 |       "HAS_PHOTO",
117 |       "IN_CATEGORY",
118 |       "IN_CITY",
119 |       "REVIEWS",
120 |       "SIMILAR",
121 |       "WROTE",
122 |       "WROTE_TIP"
123 |     ],
124 |     "schema": "reviews"
125 |   },
126 |   {
127 |     "labels": [
128 |       "Bank",
129 |       "CashIn",
130 |       "CashOut",
131 |       "Client",
132 |       "Debit",
133 |       "Email",
134 |       "Merchant",
135 |       "Payment",
136 |       "Phone",
137 |       "SSN",
138 |       "Transaction",
139 |       "Transfer"
140 |     ],
141 |     "relationshipTypes": [
142 |       "HAS_EMAIL",
143 |       "HAS_PHONE",
144 |       "HAS_SSN",
145 |       "NEXT",
146 |       "PERFORMED",
147 |       "SENT_TO",
148 |       "RECEIVED_FROM"
149 |     ],
150 |     "schema": "payments"
151 |   },
152 |   {
153 |     "labels": [
154 |       "Area",
155 |       "Crime",
156 |       "Email",
157 |       "Location",
158 |       "Object",
159 |       "Officer",
160 |       "Person",
161 |       "Phone",
162 |       "PhoneCall",
163 |       "PostCode",
164 |       "Vehicle"
165 |     ],
166 |     "relationshipTypes": [
167 |       "CALLED",
168 |       "CALLER",
169 |       "CURRENT_ADDRESS",
170 |       "FAMILY_REL",
171 |       "HAS_EMAIL",
172 |       "HAS_PHONE",
173 |       "HAS_POSTCODE",
174 |       "INVESTIGATED_BY",
175 |       "INVOLVED_IN",
176 |       "KNOWS",
177 |       "KNOWS_LW",
178 |       "KNOWS_PHONE",
179 |       "KNOWS_SN",
180 |       "LOCATION_IN_AREA",
181 |       "OCCURRED_AT",
182 |       "PARTY_TO",
183 |       "POSTCODE_IN_AREA"
184 |     ],
185 |     "schema": "crime"
186 |   },
187 |   {
188 |     "labels": [
189 |       "Airport",
190 |       "City",
191 |       "Continent",
192 |       "Country",
193 |       "Region"
194 |     ],
195 |     "relationshipTypes": [
196 |       "HAS_ROUTE",
197 |       "IN_CITY",
198 |       "IN_COUNTRY",
199 |       "IN_REGION",
200 |       "ON_CONTINENT"
201 |     ],
202 |     "schema": "flights"
203 |   },
204 |   {
205 |     "labels": [
206 |       "AgeGroup",
207 |       "Case",
208 |       "Drug",
209 |       "Manufacturer",
210 |       "Outcome",
211 |       "Reaction",
212 |       "ReportSource",
213 |       "Therapy"
214 |     ],
215 |     "relationshipTypes": [
216 |       "FALLS_UNDER",
217 |       "HAS_REACTION",
218 |       "IS_CONCOMITANT",
219 |       "IS_INTERACTING",
220 |       "IS_PRIMARY_SUSPECT",
221 |       "IS_SECONDARY_SUSPECT",
222 |       "PRESCRIBED",
223 |       "RECEIVED",
224 |       "REGISTERED",
225 |       "REPORTED_BY",
226 |       "RESULTED_IN"
227 |     ],
228 |     "schema": "healthcare"
229 |   }
230 | ]


--------------------------------------------------------------------------------
/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx:
--------------------------------------------------------------------------------
  1 | import { Box, Checkbox, Flex, Typography } from '@neo4j-ndl/react';
  2 | import { DocumentTextIconOutline } from '@neo4j-ndl/react/icons';
  3 | import { LargefilesProps } from '../../../types';
  4 | import { List, ListItem, ListItemAvatar, ListItemButton, ListItemIcon, ListItemText } from '@mui/material';
  5 | import { FC, useContext, useMemo } from 'react';
  6 | import { chunkSize } from '../../../utils/Constants';
  7 | import BellImage from '../../../assets/images/Stopwatch-blue.svg';
  8 | import AlertIcon from '../../Layout/AlertIcon';
  9 | import wikipedialogo from '../../../assets/images/wikipedia.svg';
 10 | import youtubelogo from '../../../assets/images/youtube.svg';
 11 | import weblogo from '../../../assets/images/web.svg';
 12 | import webdarkmode from '../../../assets/images/web-darkmode.svg';
 13 | import gcslogo from '../../../assets/images/gcs.webp';
 14 | import s3logo from '../../../assets/images/s3logo.png';
 15 | import { calculateProcessingTime } from '../../../utils/Utils';
 16 | import { ThemeWrapperContext } from '../../../context/ThemeWrapper';
 17 | 
 18 | const LargeFilesAlert: FC<LargefilesProps> = ({ largeFiles, handleToggle, checked }) => {
 19 |   const { colorMode } = useContext(ThemeWrapperContext);
 20 | 
 21 |   const imageIcon: Record<string, string> = useMemo(
 22 |     () => ({
 23 |       Wikipedia: wikipedialogo,
 24 |       'gcs bucket': gcslogo,
 25 |       youtube: youtubelogo,
 26 |       's3 bucket': s3logo,
 27 |       'web-url': colorMode === 'light' ? weblogo : webdarkmode,
 28 |     }),
 29 |     [colorMode]
 30 |   );
 31 |   return (
 32 |     <Box className='n-bg-palette-neutral-bg-weak p-4'>
 33 |       <Box className='flex flex-row pb-6 items-center mb-2'>
 34 |         <img
 35 |           style={{ width: 95, height: 95, marginRight: 10, alignSelf: 'flex-start' }}
 36 |           src={BellImage}
 37 |           alt='alert icon'
 38 |         />
 39 |         <Box className='flex flex-col'>
 40 |           <Typography variant='h3'>Large Document Notice</Typography>
 41 |           <Typography variant='body-medium' sx={{ mb: 2 }}>
 42 |             One or more of your selected documents are large and may take extra time to process. Please review the
 43 |             estimated times below
 44 |           </Typography>
 45 |           <List className='max-h-80 overflow-y-auto'>
 46 |             {largeFiles.map((f, i) => {
 47 |               const { minutes, seconds } = calculateProcessingTime(f.size as number, 0.2);
 48 |               return (
 49 |                 <ListItem key={i} disablePadding>
 50 |                   <ListItemButton role={undefined} dense>
 51 |                     <ListItemIcon>
 52 |                       <Checkbox
 53 |                         aria-label='selection checkbox'
 54 |                         onChange={(e) => {
 55 |                           if (e.target.checked) {
 56 |                             handleToggle(true, f.id);
 57 |                           } else {
 58 |                             handleToggle(false, f.id);
 59 |                           }
 60 |                         }}
 61 |                         checked={checked.indexOf(f.id) !== -1}
 62 |                         tabIndex={-1}
 63 |                       />
 64 |                     </ListItemIcon>
 65 |                     <ListItemAvatar>
 66 |                       {imageIcon[f.fileSource] ? (
 67 |                         <img width={20} height={20} src={imageIcon[f.fileSource]}></img>
 68 |                       ) : (
 69 |                         <DocumentTextIconOutline className='n-size-token-7 mr-2' />
 70 |                       )}
 71 |                     </ListItemAvatar>
 72 |                     <ListItemText
 73 |                       primary={
 74 |                         <Flex flexDirection='row'>
 75 |                           <span className='word-break'>
 76 |                             {f.name} - {Math.floor((f?.size as number) / 1000)?.toFixed(2)}KB
 77 |                             {f.fileSource === 'local file' && minutes === 0 && typeof f.size === 'number'
 78 |                               ? `- ${seconds} Sec `
 79 |                               : f.fileSource === 'local file'
 80 |                               ? `- ${minutes} Min`
 81 |                               : ''}
 82 |                           </span>
 83 |                           {typeof f.size === 'number' && f.size > chunkSize ? (
 84 |                             <span>
 85 |                               <AlertIcon />
 86 |                             </span>
 87 |                           ) : (
 88 |                             <></>
 89 |                           )}
 90 |                         </Flex>
 91 |                       }
 92 |                     />
 93 |                   </ListItemButton>
 94 |                 </ListItem>
 95 |               );
 96 |             })}
 97 |           </List>
 98 |         </Box>
 99 |       </Box>
100 |     </Box>
101 |   );
102 | };
103 | export default LargeFilesAlert;


--------------------------------------------------------------------------------
/POC_Documents/V1/Local-to-global-genAI_GraphRAG_V1:
--------------------------------------------------------------------------------
 1 | Graph DB Connectors and GenAI Integrations POC_v1
 2 | 
 3 | “” This is the version v1, where main content is taken with the below paper1 and some other contents are added with other papers and blogs.””
 4 | 
 5 | Paper1: From Local to Global: A Graph RAG Approach to
 6 | Query-Focused Summarization
 7 | 
 8 | “” Python-based implementation of both global and local Graph RAG
 9 | approaches are forthcoming at https://aka.ms/graphrag.””
10 | 
11 | Graph RAG approach uses the natural modularity of graphs to partition data for global summarization. It uses an LLM to build a graph-based text index in two stages:
12 | 1.	Derive an entity knowledge graph from the source documents, 
13 | 2.	Pre-generate community summaries for all groups of closely related entities.
14 | 
15 | IT can answer such questions like “What are the main themes in the dataset?”, Basically, it is an inherently query focused summarization (QFS) task. Graph RAG approach improves the question answering over private text corpora that scales with both the generality of user questions and the quantity of source text to be indexed. Graph RAG leads to substantial improvements for both the comprehensiveness and diversity of generated answers.
16 | 
17 | community descriptions provide complete coverage of the underlying graph index, and the input documents it represents. Query-focused summarization of an entire corpus is then made possible using a map-reduce approach: first using each community summary to answer the query independently and in parallel, then summarizing all relevant partial answers into a final global answer.
18 | 
19 |  
20 |         Figure 1: Graph RAG pipeline using an LLM-derived graph index of source document text
21 | 
22 | I.  Data Ingestion:
23 | 1.	Documents/chunks/Text Preprocessing: 
24 |                        To reduce document size and improve latency use text summarization for heavy documents or multi-documents with the below steps:
25 |                                Step1: LLM (use a specific LLM embedding to summarize documents)
26 |                              Step2: Knowledge graph to reduce size with entities, relationship, and their properties with subgraphs.
27 | Note: Above steps can be followed bidirectionally.
28 | 
29 | 2.	Create vector DB/Embedding/Indexing with LLM embedding.
30 | 
31 | II. vector Embedding/Indexing Storage
32 | 
33 | Generate KG with the embedding and store in graph DB or store the embedding in FAISS/pinecone to improve latency and accuracy.
34 | or
35 | Both the methods can be combined (KG+ vector embedding) and store DB to handle both structure and unstructured data 
36 | 
37 | Generate   four communities (C0, C1, C2, C3) Graph RAG summary from the embedding/KG of the document/multi-documents/embeddings by using text summarized Map Reduced approach. 
38 | C0: Uses root-level community summaries (fewest in number) to answer user queries.
39 | C1: Uses high-level community summaries to answer queries. These are sub-communities.
40 | of C0, if present, otherwise C0 communities projected down.
41 | C2: Uses intermediate-level community summaries to answer queries. These are subcommunities of C1, if present, otherwise C1 communities projected down.
42 | C3: Uses low-level community summaries (greatest in number) to answer queries. These
43 |  are sub-communities of C2, if present, otherwise C2 communities projected down.
44 |                                                                                          
45 | Figure 2.1 Communities’ Summary                                          Figure.2.2 Communities Graph 
46 |  
47 | Figure 3. Summarized Community Graph
48 | 
49 | III. Chat Response/Architecture:
50 | Approaches: Multi-hope RAG, memory-based response, Head-to-Head measures.
51 | Head-To-Head measures can use as a performance metrics using an LLM evaluator are as follows:
52 | • Comprehensiveness: How much detail does the answer provide to cover all aspects and
53 | details of the question?
54 | • Diversity: How varied and rich is the answer in providing different perspectives and insights on the question?
55 | 
56 | 
57 | For a given community levels (Fig.2.2,2.2 & 3), the global answer to any user query is generated as follows:
58 | 
59 | • Prepare community summaries. Community summaries are randomly shuffled and divided
60 | into chunks of pre-specified token size. This ensures relevant information is distributed
61 | across chunks, rather than concentrated (and potentially lost) in a single context window.
62 | 
63 | •  Map community answers. Generate intermediate answers in parallel, one for each chunk.
64 | The LLM is also asked to generate a score between 0-100 indicating how helpful the generated
65 | answer is in answering the target question. Answers with score 0 are filtered out.
66 | 
67 | • Reduce to global answer. Intermediate community answers are sorted in descending order
68 | of helpfulness score and iteratively added into a new context window until the token limit
69 | is reached. This final context is used to generate the global answer returned to the user.
70 | 
71 | 


--------------------------------------------------------------------------------