├── .gitignore
├── .mvn
└── wrapper
│ ├── maven-wrapper.jar
│ └── maven-wrapper.properties
├── CHANGELOG.md
├── Dockerfile
├── README.md
├── docker-compose.yml
├── mvnw
├── mvnw.cmd
├── ollama-postman_collection.json
├── pom.xml
└── src
├── main
├── java
│ └── org
│ │ └── benaya
│ │ └── ai
│ │ └── rag
│ │ ├── RagApplication.java
│ │ ├── controller
│ │ └── AssistantController.java
│ │ ├── model
│ │ └── Paragraph.java
│ │ ├── repository
│ │ └── DocumentRepository.java
│ │ ├── runner
│ │ └── DatabaseInitRunner.java
│ │ └── service
│ │ ├── ChatGeneratorService.java
│ │ ├── CsvParserService.java
│ │ └── RagService.java
└── resources
│ ├── META-INF
│ └── additional-spring-configuration-metadata.json
│ ├── application.yml
│ ├── prompts
│ └── system-qa.st
│ └── sample_nda.csv
└── test
└── java
└── org
└── benaya
└── ai
└── rag
└── RagApplicationTests.java
/.gitignore:
--------------------------------------------------------------------------------
1 | HELP.md
2 | target/
3 | !.mvn/wrapper/maven-wrapper.jar
4 | !**/src/main/**/target/
5 | !**/src/test/**/target/
6 |
7 | ### STS ###
8 | .apt_generated
9 | .classpath
10 | .factorypath
11 | .project
12 | .settings
13 | .springBeans
14 | .sts4-cache
15 |
16 | ### IntelliJ IDEA ###
17 | .idea
18 | *.iws
19 | *.iml
20 | *.ipr
21 |
22 | ### NetBeans ###
23 | /nbproject/private/
24 | /nbbuild/
25 | /dist/
26 | /nbdist/
27 | /.nb-gradle/
28 | build/
29 | !**/src/main/**/build/
30 | !**/src/test/**/build/
31 |
32 |
33 | /ollama-setup/
34 |
35 |
--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benayat/rag-with-spring-ai/001bc687b07848907ab4e89d7a2b8b54b92db837/.mvn/wrapper/maven-wrapper.jar
--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.6/apache-maven-3.9.6-bin.zip
2 | wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar
3 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | #### 0.0.1-SNAPSHOT
2 | - Initial commit
3 | - Basic project structure and code.
4 | - set up milvus-db and ollama as dependencies.
5 | - Added chat and embedding controllers for POC.
6 |
7 | #### 1.0.0-SNAPSHOT
8 | - Added basic RAG system functionality.
9 | - Removed redundant embedding and chat controllers.
10 | - Encapsulated all Vectordb operations in a repository.
11 | - cleaned application.yml file from constants, and added env vars for customization.
12 | - todo: create better reader to much all types of documents and paragraphs.
13 |
14 | #### 1.0.0
15 | - changed to Postgres DB for simplicity, and added relevant definitions.
16 | - changed vectorStore chatClient to generic\interface types to avoid coupling.
17 |
18 | #### 1.0.1
19 | - set up docker-compose for running the system.
20 |
21 | #### 1.0.2
22 | - changed to mistral for faster inference.
23 | - updated docker-compose to be a self-contained system.
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM eclipse-temurin:21.0.2_13-jdk-alpine
2 | COPY target/rag-1.0.2.jar /app/app.jar
3 | EXPOSE 8080
4 | ENTRYPOINT ["java","-jar","/app/app.jar"]
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## RAG system demo
2 | This is a demo RAG system, which is basically a QA bot which uses solid data to answer questions, rather than relying solely on it's own LLM knowledge.
3 |
4 | #### Architecture
5 | The System is based on the following components:
6 | 1. Code - spring boot framework with spring ai.
7 | 2. Local models deployment, including chat and embeddings generation: ollama.
8 | 3. Vector Database, for storing embeddings and querying them: postgres pgvector.
9 | All components used are free and open source.
10 |
11 | #### Running the system
12 | ##### Prerequisites
13 | - docker, make sure to login to dockerhub.
14 |
15 | ##### Running
16 | simply download the file "docker-compose.yml" and run `docker-compose up`.
17 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.8'
2 | services:
3 | rag:
4 | container_name: rag
5 | image: benaya7/rag:1.0.2
6 | ports:
7 | - "8080:8080"
8 | environment:
9 | - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/postgres
10 | - SPRING_DATASOURCE_USERNAME=postgres
11 | - SPRING_DATASOURCE_PASSWORD=postgres
12 | - OLLAMA_BASE_URL=http://ollama:11434
13 | - DATABASE_INIT=true
14 | depends_on:
15 | postgres:
16 | condition: service_healthy
17 | postgres:
18 | image: pgvector/pgvector:0.7.0-pg16
19 | container_name: postgres
20 | ports:
21 | - "5432:5432"
22 | environment:
23 | - PGUSER=postgres
24 | - POSTGRES_PASSWORD=postgres
25 | - POSTGRES_DB=postgres
26 | restart: unless-stopped
27 | healthcheck:
28 | test: [ "CMD-SHELL", "pg_isready", "-d", "postgres" ]
29 | interval: 5s
30 | timeout: 60s
31 | retries: 5
32 | ollama:
33 | container_name: ollama-offline
34 | image: benaya7/ollama-offline:mistral_nomic-1
35 | ports:
36 | - "11434:11434"
37 | environment:
38 | - OLLAMA_MODELS=/root/.ollama/models
39 | volumes:
40 | - ollama-2:/root/.ollama
41 |
42 | volumes:
43 | ollama-2:
44 |
--------------------------------------------------------------------------------
/mvnw:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # ----------------------------------------------------------------------------
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | # ----------------------------------------------------------------------------
20 |
21 | # ----------------------------------------------------------------------------
22 | # Apache Maven Wrapper startup batch script, version 3.2.0
23 | #
24 | # Required ENV vars:
25 | # ------------------
26 | # JAVA_HOME - location of a JDK home dir
27 | #
28 | # Optional ENV vars
29 | # -----------------
30 | # MAVEN_OPTS - parameters passed to the Java VM when running Maven
31 | # e.g. to debug Maven itself, use
32 | # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
33 | # MAVEN_SKIP_RC - flag to disable loading of mavenrc files
34 | # ----------------------------------------------------------------------------
35 |
36 | if [ -z "$MAVEN_SKIP_RC" ] ; then
37 |
38 | if [ -f /usr/local/etc/mavenrc ] ; then
39 | . /usr/local/etc/mavenrc
40 | fi
41 |
42 | if [ -f /etc/mavenrc ] ; then
43 | . /etc/mavenrc
44 | fi
45 |
46 | if [ -f "$HOME/.mavenrc" ] ; then
47 | . "$HOME/.mavenrc"
48 | fi
49 |
50 | fi
51 |
52 | # OS specific support. $var _must_ be set to either true or false.
53 | cygwin=false;
54 | darwin=false;
55 | mingw=false
56 | case "$(uname)" in
57 | CYGWIN*) cygwin=true ;;
58 | MINGW*) mingw=true;;
59 | Darwin*) darwin=true
60 | # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home
61 | # See https://developer.apple.com/library/mac/qa/qa1170/_index.html
62 | if [ -z "$JAVA_HOME" ]; then
63 | if [ -x "/usr/libexec/java_home" ]; then
64 | JAVA_HOME="$(/usr/libexec/java_home)"; export JAVA_HOME
65 | else
66 | JAVA_HOME="/Library/Java/Home"; export JAVA_HOME
67 | fi
68 | fi
69 | ;;
70 | esac
71 |
72 | if [ -z "$JAVA_HOME" ] ; then
73 | if [ -r /etc/gentoo-release ] ; then
74 | JAVA_HOME=$(java-config --jre-home)
75 | fi
76 | fi
77 |
78 | # For Cygwin, ensure paths are in UNIX format before anything is touched
79 | if $cygwin ; then
80 | [ -n "$JAVA_HOME" ] &&
81 | JAVA_HOME=$(cygpath --unix "$JAVA_HOME")
82 | [ -n "$CLASSPATH" ] &&
83 | CLASSPATH=$(cygpath --path --unix "$CLASSPATH")
84 | fi
85 |
86 | # For Mingw, ensure paths are in UNIX format before anything is touched
87 | if $mingw ; then
88 | [ -n "$JAVA_HOME" ] && [ -d "$JAVA_HOME" ] &&
89 | JAVA_HOME="$(cd "$JAVA_HOME" || (echo "cannot cd into $JAVA_HOME."; exit 1); pwd)"
90 | fi
91 |
92 | if [ -z "$JAVA_HOME" ]; then
93 | javaExecutable="$(which javac)"
94 | if [ -n "$javaExecutable" ] && ! [ "$(expr "\"$javaExecutable\"" : '\([^ ]*\)')" = "no" ]; then
95 | # readlink(1) is not available as standard on Solaris 10.
96 | readLink=$(which readlink)
97 | if [ ! "$(expr "$readLink" : '\([^ ]*\)')" = "no" ]; then
98 | if $darwin ; then
99 | javaHome="$(dirname "\"$javaExecutable\"")"
100 | javaExecutable="$(cd "\"$javaHome\"" && pwd -P)/javac"
101 | else
102 | javaExecutable="$(readlink -f "\"$javaExecutable\"")"
103 | fi
104 | javaHome="$(dirname "\"$javaExecutable\"")"
105 | javaHome=$(expr "$javaHome" : '\(.*\)/bin')
106 | JAVA_HOME="$javaHome"
107 | export JAVA_HOME
108 | fi
109 | fi
110 | fi
111 |
112 | if [ -z "$JAVACMD" ] ; then
113 | if [ -n "$JAVA_HOME" ] ; then
114 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
115 | # IBM's JDK on AIX uses strange locations for the executables
116 | JAVACMD="$JAVA_HOME/jre/sh/java"
117 | else
118 | JAVACMD="$JAVA_HOME/bin/java"
119 | fi
120 | else
121 | JAVACMD="$(\unset -f command 2>/dev/null; \command -v java)"
122 | fi
123 | fi
124 |
125 | if [ ! -x "$JAVACMD" ] ; then
126 | echo "Error: JAVA_HOME is not defined correctly." >&2
127 | echo " We cannot execute $JAVACMD" >&2
128 | exit 1
129 | fi
130 |
131 | if [ -z "$JAVA_HOME" ] ; then
132 | echo "Warning: JAVA_HOME environment variable is not set."
133 | fi
134 |
135 | # traverses directory structure from process work directory to filesystem root
136 | # first directory with .mvn subdirectory is considered project base directory
137 | find_maven_basedir() {
138 | if [ -z "$1" ]
139 | then
140 | echo "Path not specified to find_maven_basedir"
141 | return 1
142 | fi
143 |
144 | basedir="$1"
145 | wdir="$1"
146 | while [ "$wdir" != '/' ] ; do
147 | if [ -d "$wdir"/.mvn ] ; then
148 | basedir=$wdir
149 | break
150 | fi
151 | # workaround for JBEAP-8937 (on Solaris 10/Sparc)
152 | if [ -d "${wdir}" ]; then
153 | wdir=$(cd "$wdir/.." || exit 1; pwd)
154 | fi
155 | # end of workaround
156 | done
157 | printf '%s' "$(cd "$basedir" || exit 1; pwd)"
158 | }
159 |
160 | # concatenates all lines of a file
161 | concat_lines() {
162 | if [ -f "$1" ]; then
163 | # Remove \r in case we run on Windows within Git Bash
164 | # and check out the repository with auto CRLF management
165 | # enabled. Otherwise, we may read lines that are delimited with
166 | # \r\n and produce $'-Xarg\r' rather than -Xarg due to word
167 | # splitting rules.
168 | tr -s '\r\n' ' ' < "$1"
169 | fi
170 | }
171 |
172 | log() {
173 | if [ "$MVNW_VERBOSE" = true ]; then
174 | printf '%s\n' "$1"
175 | fi
176 | }
177 |
178 | BASE_DIR=$(find_maven_basedir "$(dirname "$0")")
179 | if [ -z "$BASE_DIR" ]; then
180 | exit 1;
181 | fi
182 |
183 | MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}; export MAVEN_PROJECTBASEDIR
184 | log "$MAVEN_PROJECTBASEDIR"
185 |
186 | ##########################################################################################
187 | # Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
188 | # This allows using the maven wrapper in projects that prohibit checking in binary data.
189 | ##########################################################################################
190 | wrapperJarPath="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar"
191 | if [ -r "$wrapperJarPath" ]; then
192 | log "Found $wrapperJarPath"
193 | else
194 | log "Couldn't find $wrapperJarPath, downloading it ..."
195 |
196 | if [ -n "$MVNW_REPOURL" ]; then
197 | wrapperUrl="$MVNW_REPOURL/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar"
198 | else
199 | wrapperUrl="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar"
200 | fi
201 | while IFS="=" read -r key value; do
202 | # Remove '\r' from value to allow usage on windows as IFS does not consider '\r' as a separator ( considers space, tab, new line ('\n'), and custom '=' )
203 | safeValue=$(echo "$value" | tr -d '\r')
204 | case "$key" in (wrapperUrl) wrapperUrl="$safeValue"; break ;;
205 | esac
206 | done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties"
207 | log "Downloading from: $wrapperUrl"
208 |
209 | if $cygwin; then
210 | wrapperJarPath=$(cygpath --path --windows "$wrapperJarPath")
211 | fi
212 |
213 | if command -v wget > /dev/null; then
214 | log "Found wget ... using wget"
215 | [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--quiet"
216 | if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
217 | wget $QUIET "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath"
218 | else
219 | wget $QUIET --http-user="$MVNW_USERNAME" --http-password="$MVNW_PASSWORD" "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath"
220 | fi
221 | elif command -v curl > /dev/null; then
222 | log "Found curl ... using curl"
223 | [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--silent"
224 | if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
225 | curl $QUIET -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath"
226 | else
227 | curl $QUIET --user "$MVNW_USERNAME:$MVNW_PASSWORD" -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath"
228 | fi
229 | else
230 | log "Falling back to using Java to download"
231 | javaSource="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.java"
232 | javaClass="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.class"
233 | # For Cygwin, switch paths to Windows format before running javac
234 | if $cygwin; then
235 | javaSource=$(cygpath --path --windows "$javaSource")
236 | javaClass=$(cygpath --path --windows "$javaClass")
237 | fi
238 | if [ -e "$javaSource" ]; then
239 | if [ ! -e "$javaClass" ]; then
240 | log " - Compiling MavenWrapperDownloader.java ..."
241 | ("$JAVA_HOME/bin/javac" "$javaSource")
242 | fi
243 | if [ -e "$javaClass" ]; then
244 | log " - Running MavenWrapperDownloader.java ..."
245 | ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$wrapperUrl" "$wrapperJarPath") || rm -f "$wrapperJarPath"
246 | fi
247 | fi
248 | fi
249 | fi
250 | ##########################################################################################
251 | # End of extension
252 | ##########################################################################################
253 |
254 | # If specified, validate the SHA-256 sum of the Maven wrapper jar file
255 | wrapperSha256Sum=""
256 | while IFS="=" read -r key value; do
257 | case "$key" in (wrapperSha256Sum) wrapperSha256Sum=$value; break ;;
258 | esac
259 | done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties"
260 | if [ -n "$wrapperSha256Sum" ]; then
261 | wrapperSha256Result=false
262 | if command -v sha256sum > /dev/null; then
263 | if echo "$wrapperSha256Sum $wrapperJarPath" | sha256sum -c > /dev/null 2>&1; then
264 | wrapperSha256Result=true
265 | fi
266 | elif command -v shasum > /dev/null; then
267 | if echo "$wrapperSha256Sum $wrapperJarPath" | shasum -a 256 -c > /dev/null 2>&1; then
268 | wrapperSha256Result=true
269 | fi
270 | else
271 | echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available."
272 | echo "Please install either command, or disable validation by removing 'wrapperSha256Sum' from your maven-wrapper.properties."
273 | exit 1
274 | fi
275 | if [ $wrapperSha256Result = false ]; then
276 | echo "Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised." >&2
277 | echo "Investigate or delete $wrapperJarPath to attempt a clean download." >&2
278 | echo "If you updated your Maven version, you need to update the specified wrapperSha256Sum property." >&2
279 | exit 1
280 | fi
281 | fi
282 |
283 | MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS"
284 |
285 | # For Cygwin, switch paths to Windows format before running java
286 | if $cygwin; then
287 | [ -n "$JAVA_HOME" ] &&
288 | JAVA_HOME=$(cygpath --path --windows "$JAVA_HOME")
289 | [ -n "$CLASSPATH" ] &&
290 | CLASSPATH=$(cygpath --path --windows "$CLASSPATH")
291 | [ -n "$MAVEN_PROJECTBASEDIR" ] &&
292 | MAVEN_PROJECTBASEDIR=$(cygpath --path --windows "$MAVEN_PROJECTBASEDIR")
293 | fi
294 |
295 | # Provide a "standardized" way to retrieve the CLI args that will
296 | # work with both Windows and non-Windows executions.
297 | MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $*"
298 | export MAVEN_CMD_LINE_ARGS
299 |
300 | WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
301 |
302 | # shellcheck disable=SC2086 # safe args
303 | exec "$JAVACMD" \
304 | $MAVEN_OPTS \
305 | $MAVEN_DEBUG_OPTS \
306 | -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \
307 | "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \
308 | ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@"
309 |
--------------------------------------------------------------------------------
/mvnw.cmd:
--------------------------------------------------------------------------------
1 | @REM ----------------------------------------------------------------------------
2 | @REM Licensed to the Apache Software Foundation (ASF) under one
3 | @REM or more contributor license agreements. See the NOTICE file
4 | @REM distributed with this work for additional information
5 | @REM regarding copyright ownership. The ASF licenses this file
6 | @REM to you under the Apache License, Version 2.0 (the
7 | @REM "License"); you may not use this file except in compliance
8 | @REM with the License. You may obtain a copy of the License at
9 | @REM
10 | @REM https://www.apache.org/licenses/LICENSE-2.0
11 | @REM
12 | @REM Unless required by applicable law or agreed to in writing,
13 | @REM software distributed under the License is distributed on an
14 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | @REM KIND, either express or implied. See the License for the
16 | @REM specific language governing permissions and limitations
17 | @REM under the License.
18 | @REM ----------------------------------------------------------------------------
19 |
20 | @REM ----------------------------------------------------------------------------
21 | @REM Apache Maven Wrapper startup batch script, version 3.2.0
22 | @REM
23 | @REM Required ENV vars:
24 | @REM JAVA_HOME - location of a JDK home dir
25 | @REM
26 | @REM Optional ENV vars
27 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands
28 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending
29 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven
30 | @REM e.g. to debug Maven itself, use
31 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
32 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files
33 | @REM ----------------------------------------------------------------------------
34 |
35 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on'
36 | @echo off
37 | @REM set title of command window
38 | title %0
39 | @REM enable echoing by setting MAVEN_BATCH_ECHO to 'on'
40 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO%
41 |
42 | @REM set %HOME% to equivalent of $HOME
43 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%")
44 |
45 | @REM Execute a user defined script before this one
46 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre
47 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending
48 | if exist "%USERPROFILE%\mavenrc_pre.bat" call "%USERPROFILE%\mavenrc_pre.bat" %*
49 | if exist "%USERPROFILE%\mavenrc_pre.cmd" call "%USERPROFILE%\mavenrc_pre.cmd" %*
50 | :skipRcPre
51 |
52 | @setlocal
53 |
54 | set ERROR_CODE=0
55 |
56 | @REM To isolate internal variables from possible post scripts, we use another setlocal
57 | @setlocal
58 |
59 | @REM ==== START VALIDATION ====
60 | if not "%JAVA_HOME%" == "" goto OkJHome
61 |
62 | echo.
63 | echo Error: JAVA_HOME not found in your environment. >&2
64 | echo Please set the JAVA_HOME variable in your environment to match the >&2
65 | echo location of your Java installation. >&2
66 | echo.
67 | goto error
68 |
69 | :OkJHome
70 | if exist "%JAVA_HOME%\bin\java.exe" goto init
71 |
72 | echo.
73 | echo Error: JAVA_HOME is set to an invalid directory. >&2
74 | echo JAVA_HOME = "%JAVA_HOME%" >&2
75 | echo Please set the JAVA_HOME variable in your environment to match the >&2
76 | echo location of your Java installation. >&2
77 | echo.
78 | goto error
79 |
80 | @REM ==== END VALIDATION ====
81 |
82 | :init
83 |
84 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn".
85 | @REM Fallback to current working directory if not found.
86 |
87 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR%
88 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir
89 |
90 | set EXEC_DIR=%CD%
91 | set WDIR=%EXEC_DIR%
92 | :findBaseDir
93 | IF EXIST "%WDIR%"\.mvn goto baseDirFound
94 | cd ..
95 | IF "%WDIR%"=="%CD%" goto baseDirNotFound
96 | set WDIR=%CD%
97 | goto findBaseDir
98 |
99 | :baseDirFound
100 | set MAVEN_PROJECTBASEDIR=%WDIR%
101 | cd "%EXEC_DIR%"
102 | goto endDetectBaseDir
103 |
104 | :baseDirNotFound
105 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR%
106 | cd "%EXEC_DIR%"
107 |
108 | :endDetectBaseDir
109 |
110 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig
111 |
112 | @setlocal EnableExtensions EnableDelayedExpansion
113 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a
114 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS%
115 |
116 | :endReadAdditionalConfig
117 |
118 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe"
119 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar"
120 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
121 |
122 | set WRAPPER_URL="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar"
123 |
124 | FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO (
125 | IF "%%A"=="wrapperUrl" SET WRAPPER_URL=%%B
126 | )
127 |
128 | @REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
129 | @REM This allows using the maven wrapper in projects that prohibit checking in binary data.
130 | if exist %WRAPPER_JAR% (
131 | if "%MVNW_VERBOSE%" == "true" (
132 | echo Found %WRAPPER_JAR%
133 | )
134 | ) else (
135 | if not "%MVNW_REPOURL%" == "" (
136 | SET WRAPPER_URL="%MVNW_REPOURL%/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar"
137 | )
138 | if "%MVNW_VERBOSE%" == "true" (
139 | echo Couldn't find %WRAPPER_JAR%, downloading it ...
140 | echo Downloading from: %WRAPPER_URL%
141 | )
142 |
143 | powershell -Command "&{"^
144 | "$webclient = new-object System.Net.WebClient;"^
145 | "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^
146 | "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^
147 | "}"^
148 | "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%WRAPPER_URL%', '%WRAPPER_JAR%')"^
149 | "}"
150 | if "%MVNW_VERBOSE%" == "true" (
151 | echo Finished downloading %WRAPPER_JAR%
152 | )
153 | )
154 | @REM End of extension
155 |
156 | @REM If specified, validate the SHA-256 sum of the Maven wrapper jar file
157 | SET WRAPPER_SHA_256_SUM=""
158 | FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO (
159 | IF "%%A"=="wrapperSha256Sum" SET WRAPPER_SHA_256_SUM=%%B
160 | )
161 | IF NOT %WRAPPER_SHA_256_SUM%=="" (
162 | powershell -Command "&{"^
163 | "$hash = (Get-FileHash \"%WRAPPER_JAR%\" -Algorithm SHA256).Hash.ToLower();"^
164 | "If('%WRAPPER_SHA_256_SUM%' -ne $hash){"^
165 | " Write-Output 'Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised.';"^
166 | " Write-Output 'Investigate or delete %WRAPPER_JAR% to attempt a clean download.';"^
167 | " Write-Output 'If you updated your Maven version, you need to update the specified wrapperSha256Sum property.';"^
168 | " exit 1;"^
169 | "}"^
170 | "}"
171 | if ERRORLEVEL 1 goto error
172 | )
173 |
174 | @REM Provide a "standardized" way to retrieve the CLI args that will
175 | @REM work with both Windows and non-Windows executions.
176 | set MAVEN_CMD_LINE_ARGS=%*
177 |
178 | %MAVEN_JAVA_EXE% ^
179 | %JVM_CONFIG_MAVEN_PROPS% ^
180 | %MAVEN_OPTS% ^
181 | %MAVEN_DEBUG_OPTS% ^
182 | -classpath %WRAPPER_JAR% ^
183 | "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" ^
184 | %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %*
185 | if ERRORLEVEL 1 goto error
186 | goto end
187 |
188 | :error
189 | set ERROR_CODE=1
190 |
191 | :end
192 | @endlocal & set ERROR_CODE=%ERROR_CODE%
193 |
194 | if not "%MAVEN_SKIP_RC%"=="" goto skipRcPost
195 | @REM check for post script, once with legacy .bat ending and once with .cmd ending
196 | if exist "%USERPROFILE%\mavenrc_post.bat" call "%USERPROFILE%\mavenrc_post.bat"
197 | if exist "%USERPROFILE%\mavenrc_post.cmd" call "%USERPROFILE%\mavenrc_post.cmd"
198 | :skipRcPost
199 |
200 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on'
201 | if "%MAVEN_BATCH_PAUSE%"=="on" pause
202 |
203 | if "%MAVEN_TERMINATE_CMD%"=="on" exit %ERROR_CODE%
204 |
205 | cmd /C exit /B %ERROR_CODE%
206 |
--------------------------------------------------------------------------------
/ollama-postman_collection.json:
--------------------------------------------------------------------------------
1 | {
2 | "info": {
3 | "_postman_id": "546e95a8-f79b-4f13-81b2-ff4c4db2c012",
4 | "name": "ollama בדיקה",
5 | "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json",
6 | "_exporter_id": "9576700"
7 | },
8 | "item": [
9 | {
10 | "name": "assistant-chat",
11 | "request": {
12 | "method": "POST",
13 | "header": [
14 | {
15 | "key": "Content-Type",
16 | "value": "text/event-stream",
17 | "type": "text",
18 | "uuid": "a0357676-b30d-4b31-ac71-6f6e2acd9623"
19 | }
20 | ],
21 | "body": {
22 | "mode": "raw",
23 | "raw": "to the law of which state does this nda apply to?",
24 | "options": {
25 | "raw": {
26 | "language": "text"
27 | }
28 | }
29 | },
30 | "url": {
31 | "raw": "http://localhost:8080/assistant/chat",
32 | "protocol": "http",
33 | "host": [
34 | "localhost"
35 | ],
36 | "port": "8080",
37 | "path": [
38 | "assistant",
39 | "chat"
40 | ]
41 | }
42 | },
43 | "response": []
44 | }
45 | ]
46 | }
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | org.springframework.boot
7 | spring-boot-starter-parent
8 | 3.2.5
9 |
10 |
11 | org.benaya.ai
12 | rag
13 | 1.0.2
14 | rag
15 | rag
16 |
17 | 21
18 | 0.8.1
19 | 3.0.1
20 |
21 |
22 |
23 | org.springframework.boot
24 | spring-boot-starter-web
25 |
26 |
27 | org.springframework.ai
28 | spring-ai-ollama-spring-boot-starter
29 |
30 |
31 |
32 |
33 |
34 |
35 | org.springframework.ai
36 | spring-ai-pgvector-store-spring-boot-starter
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 | org.apache.commons
48 | commons-csv
49 | 1.10.0
50 |
51 |
52 | org.apache.pdfbox
53 | pdfbox
54 | ${pdfbox.version}
55 |
56 |
57 | commons-logging
58 | commons-logging
59 |
60 |
61 |
62 |
63 | org.springframework.boot
64 | spring-boot-devtools
65 | runtime
66 | true
67 |
68 |
69 | org.springframework.boot
70 | spring-boot-configuration-processor
71 | true
72 |
73 |
74 | org.projectlombok
75 | lombok
76 | true
77 |
78 |
79 | org.springframework.boot
80 | spring-boot-starter-test
81 | test
82 |
83 |
84 |
85 |
86 |
87 | org.springframework.ai
88 | spring-ai-bom
89 | ${spring-ai.version}
90 | pom
91 | import
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 | org.springframework.boot
100 | spring-boot-maven-plugin
101 |
102 |
103 |
104 | org.projectlombok
105 | lombok
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 | spring-milestones
115 | Spring Milestones
116 | https://repo.spring.io/milestone
117 |
118 | false
119 |
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/src/main/java/org/benaya/ai/rag/RagApplication.java:
--------------------------------------------------------------------------------
1 | package org.benaya.ai.rag;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 | @SpringBootApplication
7 | public class RagApplication {
8 | public static void main(String[] args) {
9 | SpringApplication.run(RagApplication.class, args);
10 | }
11 |
12 | }
13 |
--------------------------------------------------------------------------------
/src/main/java/org/benaya/ai/rag/controller/AssistantController.java:
--------------------------------------------------------------------------------
1 | package org.benaya.ai.rag.controller;
2 |
3 | import lombok.RequiredArgsConstructor;
4 | import org.benaya.ai.rag.service.ChatGeneratorService;
5 | import org.benaya.ai.rag.service.RagService;
6 | import org.springframework.ai.chat.ChatResponse;
7 | import org.springframework.ai.chat.prompt.Prompt;
8 | import org.springframework.web.bind.annotation.PostMapping;
9 | import org.springframework.web.bind.annotation.RequestBody;
10 | import org.springframework.web.bind.annotation.RequestMapping;
11 | import org.springframework.web.bind.annotation.RestController;
12 | import reactor.core.publisher.Flux;
13 |
14 | @RestController
15 | @RequestMapping(path = "/assistant")
16 | @RequiredArgsConstructor
17 | public class AssistantController {
18 | private final ChatGeneratorService chatGeneratorService;
19 | private final RagService ragService;
20 |
21 | @PostMapping(value = "/chat", produces = "text/event-stream")
22 | public Flux prompt(@RequestBody String clientPrompt) {
23 | Prompt prompt = ragService.generatePromptFromClientPrompt(clientPrompt);
24 | return chatGeneratorService.generateStream(prompt)
25 | .map(this::extractContentFromChatResponse);
26 | }
27 | private String extractContentFromChatResponse(ChatResponse chatResponse) {
28 | return chatResponse.getResult().getOutput().getContent();
29 | }
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/java/org/benaya/ai/rag/model/Paragraph.java:
--------------------------------------------------------------------------------
1 | package org.benaya.ai.rag.model;
2 |
3 | import lombok.*;
4 | import org.springframework.ai.document.Document;
5 |
6 | import java.util.Map;
7 |
8 | @Setter
9 | @Getter
10 | @AllArgsConstructor
11 | @Builder
12 | public class Paragraph {
13 | private long id;
14 | private int page;
15 | private String title;
16 | private String content;
17 |
18 | public Document toDocument(@NonNull Paragraph paragraph) {
19 | return new Document(paragraph.getContent(), Map.of("title", paragraph.getTitle(), "page", paragraph.getPage(), "id", paragraph.getId()));
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/java/org/benaya/ai/rag/repository/DocumentRepository.java:
--------------------------------------------------------------------------------
1 | package org.benaya.ai.rag.repository;
2 |
3 | import lombok.RequiredArgsConstructor;
4 | import org.springframework.ai.vectorstore.SearchRequest;
5 | import org.springframework.ai.vectorstore.VectorStore;
6 | import org.springframework.stereotype.Component;
7 |
8 | import org.springframework.ai.document.Document;
9 | import java.util.List;
10 |
11 | @Component
12 | @RequiredArgsConstructor
13 | public class DocumentRepository {
14 |
15 | private final VectorStore vectorStore;
16 |
17 | public void addDocuments(List docsToAdd) {
18 | vectorStore.add(docsToAdd);
19 | }
20 | public List similaritySearchWithTopK(String prompt, int topK) {
21 | SearchRequest searchRequest = SearchRequest.query(prompt).withTopK(topK);
22 | return vectorStore.similaritySearch(searchRequest);
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/org/benaya/ai/rag/runner/DatabaseInitRunner.java:
--------------------------------------------------------------------------------
1 | package org.benaya.ai.rag.runner;
2 |
3 | import lombok.RequiredArgsConstructor;
4 | import lombok.extern.slf4j.Slf4j;
5 | import org.benaya.ai.rag.repository.DocumentRepository;
6 | import org.benaya.ai.rag.service.CsvParserService;
7 | import org.springframework.ai.document.Document;
8 | import org.springframework.beans.factory.annotation.Value;
9 | import org.springframework.boot.ApplicationArguments;
10 | import org.springframework.boot.ApplicationRunner;
11 | import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
12 | import org.springframework.core.io.Resource;
13 | import org.springframework.stereotype.Component;
14 |
15 | import java.util.List;
16 |
17 | @Component
18 | @RequiredArgsConstructor
19 | @Slf4j
20 | @ConditionalOnProperty(value = {"database.init"}, havingValue = "true")
21 | public class DatabaseInitRunner implements ApplicationRunner {
22 | private final DocumentRepository documentRepository;
23 | private final CsvParserService csvParserService;
24 | @Value("classpath:sample_nda.csv")
25 | private Resource ndaResource;
26 |
27 | @Override
28 | public void run(ApplicationArguments args) {
29 | List documents = csvParserService.getContentFromCsv(ndaResource);
30 | log.info("Adding documents to vector store");
31 | documents.forEach(doc -> log.debug("Document: {}", doc));
32 | documentRepository.addDocuments(documents);
33 | log.info("done!");
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/org/benaya/ai/rag/service/ChatGeneratorService.java:
--------------------------------------------------------------------------------
1 | package org.benaya.ai.rag.service;
2 |
3 | import lombok.Getter;
4 | import lombok.RequiredArgsConstructor;
5 | import org.springframework.ai.chat.ChatClient;
6 | import org.springframework.ai.chat.ChatResponse;
7 | import org.springframework.ai.chat.StreamingChatClient;
8 | import org.springframework.ai.chat.prompt.Prompt;
9 | import org.springframework.stereotype.Service;
10 | import reactor.core.publisher.Flux;
11 |
12 | @Service
13 | @RequiredArgsConstructor
14 | @Getter
15 | public class ChatGeneratorService {
16 | private final StreamingChatClient streamingChatClient;
17 | private final ChatClient chatClient;
18 | public String generate(String message) {
19 | return getChatClient().call(message);
20 | }
21 | public Flux generateStream(String message) {
22 | return getStreamingChatClient().stream(message);
23 | }
24 | public Flux generateStream(Prompt prompt) {
25 | return getStreamingChatClient().stream(prompt);
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/org/benaya/ai/rag/service/CsvParserService.java:
--------------------------------------------------------------------------------
1 | package org.benaya.ai.rag.service;
2 |
3 | import org.apache.commons.csv.CSVFormat;
4 | import org.apache.commons.csv.CSVParser;
5 | import org.apache.commons.csv.CSVRecord;
6 | import org.benaya.ai.rag.model.Paragraph;
7 | import org.springframework.ai.document.Document;
8 | import org.springframework.core.io.Resource;
9 | import org.springframework.stereotype.Service;
10 |
11 | import java.io.IOException;
12 | import java.io.Reader;
13 | import java.nio.file.Files;
14 | import java.nio.file.Paths;
15 | import java.util.ArrayList;
16 | import java.util.Arrays;
17 | import java.util.List;
18 | import java.util.stream.Collectors;
19 |
20 | @Service
21 | public class CsvParserService {
22 | public List getContentFromCsv(Resource resource){
23 | try (Reader reader = Files.newBufferedReader(Paths.get(resource.getURI()))
24 | ; CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT
25 | .builder()
26 | .setHeader().setSkipHeaderRecord(true)
27 | .setTrim(true)
28 | .setIgnoreEmptyLines(true)
29 | .setIgnoreHeaderCase(true)
30 | .build())) {
31 | List documentsToAdd = new ArrayList<>();
32 | for (CSVRecord csvRecord : csvParser) {
33 | Paragraph paragraph = Paragraph.builder()
34 | .id(Long.parseLong(csvRecord.get("DOC_ID")))
35 | .page(Integer.parseInt(csvRecord.get("page")))
36 | .title(Arrays.stream(csvRecord.get("display").split(" ")).filter(s -> s.equals(s.toUpperCase())).collect(Collectors.joining(" ")))
37 | .content(csvRecord.get("passage"))
38 | .build();
39 | documentsToAdd.add(paragraph.toDocument(paragraph));
40 | }
41 | return documentsToAdd;
42 | } catch (
43 | IOException e) {
44 | throw new RuntimeException(e);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/org/benaya/ai/rag/service/RagService.java:
--------------------------------------------------------------------------------
1 | package org.benaya.ai.rag.service;
2 |
3 | import lombok.RequiredArgsConstructor;
4 | import lombok.extern.slf4j.Slf4j;
5 | import org.benaya.ai.rag.repository.DocumentRepository;
6 | import org.springframework.ai.chat.messages.Message;
7 | import org.springframework.ai.chat.messages.UserMessage;
8 | import org.springframework.ai.chat.prompt.Prompt;
9 | import org.springframework.ai.chat.prompt.SystemPromptTemplate;
10 | import org.springframework.ai.document.Document;
11 | import org.springframework.beans.factory.annotation.Value;
12 | import org.springframework.core.io.Resource;
13 | import org.springframework.stereotype.Service;
14 |
15 | import java.util.List;
16 | import java.util.Map;
17 | import java.util.stream.Collectors;
18 |
19 | @Service
20 | @RequiredArgsConstructor
21 | @Slf4j
22 | public class RagService {
23 | @Value("classpath:prompts/system-qa.st")
24 | private Resource systemNdaPrompt;
25 | @Value("${queries.top-k:2}")
26 | private int topK;
27 |
28 | private final DocumentRepository documentRepository;
29 | public Prompt generatePromptFromClientPrompt(String clientPrompt) {
30 | List docs = documentRepository.similaritySearchWithTopK(clientPrompt, topK);
31 | Message systemMessage = getSystemMessage(docs);
32 | log.info("System message: {}", systemMessage.getContent());
33 | UserMessage userMessage = new UserMessage(clientPrompt);
34 | return new Prompt(List.of(systemMessage, userMessage));
35 | }
36 | private Message getSystemMessage(List similarDocuments) {
37 | String documents = similarDocuments.stream().map(Document::getContent).collect(Collectors.joining("\n"));
38 | SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(systemNdaPrompt);
39 | return systemPromptTemplate.createMessage(Map.of("documents", documents));
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/resources/META-INF/additional-spring-configuration-metadata.json:
--------------------------------------------------------------------------------
1 | {
2 | "properties": [
3 | {
4 | "name": "database.init",
5 | "type": "java.lang.String",
6 | "description": "property to decide if the database should be initialized."
7 | },
8 | {
9 | "name": "queries.top-k",
10 | "type": "java.lang.String",
11 | "description": " queries.top-k."
12 | }
13 | ]
14 | }
--------------------------------------------------------------------------------
/src/main/resources/application.yml:
--------------------------------------------------------------------------------
1 | spring:
2 | mvc:
3 | async:
4 | request-timeout: ${SPRING_MVC_ASYNC_REQUEST-TIMEOUT:-1}
5 | application:
6 | name: rag
7 | # autoconfigure:
8 | # exclude: org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration
9 | datasource:
10 | url: ${SPRING_DATASOURCE_URL:jdbc:postgresql://localhost:5432/postgres}
11 | username: ${SPRING_DATASOURCE_USERNAME:postgres}
12 | password: ${SPRING_DATASOURCE_PASSWORD:postgres}
13 | ai:
14 | retry:
15 | backoff:
16 | initial-interval: 2000
17 | max-interval: 10000
18 | multiplier: 4
19 | max-attempts: 20
20 | ollama:
21 | embedding:
22 | model: ${EMBEDDING_MODEL:nomic-embed-text}
23 | options:
24 | top-k: ${OLLAMA_EMBEDDING_TOP-K:10}
25 | num-thread: ${EMBEDDINGS_NUM_THREAD:8}
26 | num-ctx: 8192
27 | num-batch: 768
28 | top-p: ${OLLAMA_EMBEDDING_TOP-P:0.5}
29 | enabled: true
30 | base-url: ${OLLAMA_BASE_URL:http://host.docker.internal:11434}
31 | chat:
32 | model: ${CHAT_MODEL:mistral}
33 | options:
34 | num-ctx: ${CHAT_MODEL_CONTEXT_LENGTH:32768}
35 | num-batch: 768
36 | num-thread: ${CHAT_NUM_THREAD:8}
37 | top-k: ${CHAT_TOP-K:1}
38 | top-p: ${CHAT_TOP-P:0.5}
39 | temperature: ${CHAT_TEMPERATURE:0.5}
40 | vectorstore:
41 | pgvector:
42 | index-type: ${VECTORSTORE_PGVECTOR_INDEX-TYPE:hnsw}
43 | dimensions: ${VECTORSTORE_PGVECTOR_DIMENSIONS:768}
44 | distance-type: cosine_distance
45 |
46 | # milvus:
47 | # collection-name: ${MILVUS_COLLECTION_NAME:vector_store1}
48 | # embedding-dimension: ${MILVUS_EMBEDDING_DIM:768}
49 | database:
50 | init: ${DATABASE_INIT:false}
51 | queries:
52 | top-k: ${QUERIES_TOP-K:3}
--------------------------------------------------------------------------------
/src/main/resources/prompts/system-qa.st:
--------------------------------------------------------------------------------
1 | You're a qa assistant for a nda.
2 | Use the information from the DOCUMENTS section to provide accurate, simple and short answers.
3 | The answer involves referring to the relevant document and providing a summary of the requested information.
4 | If unsure, simply state that you don't know.
5 |
6 | DOCUMENTS:
7 | {documents}
--------------------------------------------------------------------------------
/src/main/resources/sample_nda.csv:
--------------------------------------------------------------------------------
1 | DOC_ID,passage,para,filename,page,display,highlight
2 | 0,confidentiality agreement this confidentiality agreement (the “agreement”) is made by and between acme. dba tothemoon inc. with offices at 2025 guadalupe st. suite 260 austin tx 78705 and starwars dba tothemars with offices at the forest moon of endor and entered as of may 3 2023 (“effective date”).,confidentiality agreement this confidentiality agreement (the “agreement”) is made by and between acme. dba tothemoon inc. with offices at 2025 guadalupe st. suite 260 austin tx 78705 and starwars dba tothemars with offices at the forest moon of endor and entered as of may 3 2023 (“effective date”).,mutual_nda_teamplate_for_testing.pdf,0,CONFIDENTIALITY AGREEMENT This Confidentiality Agreement (the “Agreement”) is made by and between ACME. dba ToTheMoon Inc. with offices at 2025 Guadalupe St. Suite 260 Austin TX 78705 and StarWars dba ToTheMars with offices at the forest moon of Endor and entered as of May 3 2023 (“Effective Date”).,"{0: [0, 1]}"
3 | 1,in consideration of the business discussions disclosure of confidential information and any future business relationship between the parties it is hereby agreed as follows: 1. confidential information. for purposes of this agreement the term “confidential information” shall mean any information business plan concept idea know-how process technique program design formula algorithm or work-in-process request for proposal (rfp) or request for information (rfi) and any responses thereto engineering manufacturing marketing technical financial data or sales information or information regarding suppliers customers employees investors or business operations and other information or materials whether disclosed in written graphic oral or electronic form whether tangible or intangible and in whatever form or medium provided or which is learned or disclosed in the course of discussions studies or other work undertaken between the parties prior to or after the effective date.,in consideration of the business discussions disclosure of confidential information and any future business relationship between the parties it is hereby agreed as follows: 1. confidential information. for purposes of this agreement the term “confidential information” shall mean any information business plan concept idea know-how process technique program design formula algorithm or work-in-process request for proposal (rfp) or request for information (rfi) and any responses thereto engineering manufacturing marketing technical financial data or sales information or information regarding suppliers customers employees investors or business operations and other information or materials whether disclosed in written graphic oral or electronic form whether tangible or intangible and in whatever form or medium provided or which is learned or disclosed in the course of discussions studies or other work undertaken between the parties prior to or after the effective date.,mutual_nda_teamplate_for_testing.pdf,0,In consideration of the business discussions disclosure of Confidential Information and any future business relationship between the parties it is hereby agreed as follows: 1. CONFIDENTIAL INFORMATION. For purposes of this Agreement the term “Confidential Information” shall mean any information business plan concept idea know-how process technique program design formula algorithm or work-in-process Request for Proposal (RFP) or Request for Information (RFI) and any responses thereto engineering manufacturing marketing technical financial data or sales information or information regarding suppliers customers employees investors or business operations and other information or materials whether disclosed in written graphic oral or electronic form whether tangible or intangible and in whatever form or medium provided or which is learned or disclosed in the course of discussions studies or other work undertaken between the parties prior to or after the Effective Date.,"{0: [2, 3]}"
4 | 2,2. need to know. the receiving party shall limit its disclosure of the other party’s confidential information to those of its officers and employees and subcontractors (i) to which such disclosure is necessary for purposes of the discussions contemplated by this agreement and (ii) who have agreed in writing to be bound by provisions no less restrictive than those set forth in this agreement.,2. need to know. the receiving party shall limit its disclosure of the other party’s confidential information to those of its officers and employees and subcontractors (i) to which such disclosure is necessary for purposes of the discussions contemplated by this agreement and (ii) who have agreed in writing to be bound by provisions no less restrictive than those set forth in this agreement.,mutual_nda_teamplate_for_testing.pdf,0,2. NEED TO KNOW. The receiving party shall limit its disclosure of the other party’s Confidential Information to those of its officers and employees and subcontractors (i) to which such disclosure is necessary for purposes of the discussions contemplated by this Agreement and (ii) who have agreed in writing to be bound by provisions no less restrictive than those set forth in this Agreement.,{0: [4]}
5 | 3,3. joint undertaking. each party agrees that it will not at any time disclose give or transmit in any manner or for any purpose the confidential information received from the other party to any person firm or corporation or use such confidential information for its own benefit or the benefit of anyone else or for any purpose other than to engage in discussions regarding a possible business relationship or the current business relationship involving both parties.,3. joint undertaking. each party agrees that it will not at any time disclose give or transmit in any manner or for any purpose the confidential information received from the other party to any person firm or corporation or use such confidential information for its own benefit or the benefit of anyone else or for any purpose other than to engage in discussions regarding a possible business relationship or the current business relationship involving both parties.,mutual_nda_teamplate_for_testing.pdf,0,3. JOINT UNDERTAKING. Each party agrees that it will not at any time disclose give or transmit in any manner or for any purpose the Confidential Information received from the other party to any person firm or corporation or use such Confidential Information for its own benefit or the benefit of anyone else or for any purpose other than to engage in discussions regarding a possible business relationship or the current business relationship involving both parties.,{0: [5]}
6 | 4,each party shall take all reasonable measures to preserve the confidentiality and avoid the disclosure of the other party’s confidential information including but not limited to those steps taken with respect to the party’s own confidential information of like importance. neither party shall disassemble decompile or otherwise reverse engineer any software product of the other party and to the extent any such activity may be permitted the results thereof shall be deemed confidential information subject to the requirements of this agreement.,each party shall take all reasonable measures to preserve the confidentiality and avoid the disclosure of the other party’s confidential information including but not limited to those steps taken with respect to the party’s own confidential information of like importance. neither party shall disassemble decompile or otherwise reverse engineer any software product of the other party and to the extent any such activity may be permitted the results thereof shall be deemed confidential information subject to the requirements of this agreement.,mutual_nda_teamplate_for_testing.pdf,0,Each party shall take all reasonable measures to preserve the confidentiality and avoid the disclosure of the other party’s Confidential Information including but not limited to those steps taken with respect to the party’s own Confidential Information of like importance. Neither party shall disassemble decompile or otherwise reverse engineer any software product of the other party and to the extent any such activity may be permitted the results thereof shall be deemed Confidential Information subject to the requirements of this Agreement.,{0: [5]}
7 | 5,4. return of confidential information. upon request of the other party termination of the discussions regarding a business relationship between the parties or termination of the current business relationship each party shall promptly destroy or deliver to the other party any and all documents notes and other physical embodiments of or reflecting the confidential information (including any copies thereof) that are in their possession or control. upon request of a party a responsible officer of the other party shall provide written certification of the completeness of the delivery or destruction of such materials.,4. return of confidential information. upon request of the other party termination of the discussions regarding a business relationship between the parties or termination of the current business relationship each party shall promptly destroy or deliver to the other party any and all documents notes and other physical embodiments of or reflecting the confidential information (including any copies thereof) that are in their possession or control. upon request of a party a responsible officer of the other party shall provide written certification of the completeness of the delivery or destruction of such materials.,mutual_nda_teamplate_for_testing.pdf,0,4. RETURN OF CONFIDENTIAL INFORMATION. Upon request of the other party termination of the discussions regarding a business relationship between the parties or termination of the current business relationship each party shall promptly destroy or deliver to the other party any and all documents notes and other physical embodiments of or reflecting the Confidential Information (including any copies thereof) that are in their possession or control. Upon request of a party a responsible officer of the other party shall provide written certification of the completeness of the delivery or destruction of such materials.,{0: [6]}
8 | 6,5. non conveyance. nothing in this agreement shall be construed as conveying to either party (i) any right title or interests or copyright in or to any confidential information of the other party or (ii) any license as to use sell exploit copy or further develop any such confidential information.,5. non conveyance. nothing in this agreement shall be construed as conveying to either party (i) any right title or interests or copyright in or to any confidential information of the other party or (ii) any license as to use sell exploit copy or further develop any such confidential information.,mutual_nda_teamplate_for_testing.pdf,0,5. NON CONVEYANCE. Nothing in this Agreement shall be construed as conveying to either party (i) any right title or interests or copyright in or to any Confidential Information of the other party or (ii) any license as to use sell exploit copy or further develop any such Confidential Information.,{0: [7]}
9 | 7,6. excluded information. the parties agree that confidential information of the other party shall not include any information to the extent that the information: (i) is or at any time becomes a part of the public domain through no act or omission of the receiving party; (ii) is independently discovered or developed by the receiving party without use of the disclosing party’s confidential information; (iii) is rightfully obtained from a third party without any obligation of confidentiality; or (iv) is already known by the receiving party without any obligation of confidentiality prior to obtaining the confidential information from the disclosing party.,6. excluded information. the parties agree that confidential information of the other party shall not include any information to the extent that the information: (i) is or at any time becomes a part of the public domain through no act or omission of the receiving party; (ii) is independently discovered or developed by the receiving party without use of the disclosing party’s confidential information; (iii) is rightfully obtained from a third party without any obligation of confidentiality; or (iv) is already known by the receiving party without any obligation of confidentiality prior to obtaining the confidential information from the disclosing party.,mutual_nda_teamplate_for_testing.pdf,0,6. EXCLUDED INFORMATION. The parties agree that Confidential Information of the other party shall not include any information to the extent that the information: (i) is or at any time becomes a part of the public domain through no act or omission of the receiving party; (ii) is independently discovered or developed by the receiving party without use of the disclosing party’s Confidential Information; (iii) is rightfully obtained from a third party without any obligation of confidentiality; or (iv) is already known by the receiving party without any obligation of confidentiality prior to obtaining the Confidential Information from the disclosing party.,"{0: [8], 1: [0]}"
10 | 8,the receiving party shall not be liable for disclosure of confidential information if made in response to a valid order of a court or authorized agency of government provided that notice is promptly given to the disclosing party so that a protective order may be sought and other efforts employed to minimize the required disclosure.the receiving party shall cooperate with the disclosing party in seeking the protective order and engaging in such other efforts.,the receiving party shall not be liable for disclosure of confidential information if made in response to a valid order of a court or authorized agency of government provided that notice is promptly given to the disclosing party so that a protective order may be sought and other efforts employed to minimize the required disclosure.the receiving party shall cooperate with the disclosing party in seeking the protective order and engaging in such other efforts.,mutual_nda_teamplate_for_testing.pdf,0,The receiving party shall not be liable for disclosure of Confidential Information if made in response to a valid order of a court or authorized agency of government provided that notice is promptly given to the disclosing party so that a protective order may be sought and other efforts employed to minimize the required disclosure.The receiving party shall cooperate with the disclosing party in seeking the protective order and engaging in such other efforts.,"{0: [8], 1: [0]}"
11 | 9,7. remedies. a violation of this agreement by either party could cause irreparable injury to the other party and as there is no adequate remedy at law for such violation the non-breaching party may in addition to any other remedies available to it at law or in equity seek injunctive relief for violating or threatening to violate this agreement.,7. remedies. a violation of this agreement by either party could cause irreparable injury to the other party and as there is no adequate remedy at law for such violation the non-breaching party may in addition to any other remedies available to it at law or in equity seek injunctive relief for violating or threatening to violate this agreement.,mutual_nda_teamplate_for_testing.pdf,1,7. REMEDIES. A violation of this Agreement by either party could cause irreparable injury to the other party and as there is no adequate remedy at law for such violation the non-breaching party may in addition to any other remedies available to it at law or in equity seek injunctive relief for violating or threatening to violate this Agreement.,{1: [1]}
12 | 10,8. no commitment. this agreement does not in any way bind the parties to enter into a business relationship of any nature with the other. nothing herein or any other verbal representations made by either party shall be construed as a binding commitment to establish a business relationship. except for a breach of this agreement neither party shall have any liability to the other if the parties do not establish a business relationship that is expressed in writing and expressly stated to be legally binding.,8. no commitment. this agreement does not in any way bind the parties to enter into a business relationship of any nature with the other. nothing herein or any other verbal representations made by either party shall be construed as a binding commitment to establish a business relationship. except for a breach of this agreement neither party shall have any liability to the other if the parties do not establish a business relationship that is expressed in writing and expressly stated to be legally binding.,mutual_nda_teamplate_for_testing.pdf,1,8. NO COMMITMENT. This Agreement does not in any way bind the parties to enter into a business relationship of any nature with the other. Nothing herein or any other verbal representations made by either party shall be construed as a binding commitment to establish a business relationship. Except for a breach of this Agreement neither party shall have any liability to the other if the parties do not establish a business relationship that is expressed in writing and expressly stated to be legally binding.,{1: [2]}
13 | 11,9. fees. in the event any action including arbitration is brought to enforce any provision of this agreement or to declare a breach of this agreement the prevailing party shall be entitled to recover in addition to any other amounts awarded reasonable attorney’s fees and other related costs and expenses.,9. fees. in the event any action including arbitration is brought to enforce any provision of this agreement or to declare a breach of this agreement the prevailing party shall be entitled to recover in addition to any other amounts awarded reasonable attorney’s fees and other related costs and expenses.,mutual_nda_teamplate_for_testing.pdf,1,9. FEES. In the event any action including arbitration is brought to enforce any provision of this Agreement or to declare a breach of this Agreement the prevailing party shall be entitled to recover in addition to any other amounts awarded reasonable attorney’s fees and other related costs and expenses.,{1: [3]}
14 | 12,10. non assignment. neither party may assign or transfer this agreement or any rights hereunder to any third party without the prior written consent of the other party provided that such consent shall not be required if either party assigns this agreement in connection with a merger acquisition joint venture or sale of all or substantially all of its assets.,10. non assignment. neither party may assign or transfer this agreement or any rights hereunder to any third party without the prior written consent of the other party provided that such consent shall not be required if either party assigns this agreement in connection with a merger acquisition joint venture or sale of all or substantially all of its assets.,mutual_nda_teamplate_for_testing.pdf,1,10. NON ASSIGNMENT. Neither party may assign or transfer this Agreement or any rights hereunder to any third party without the prior written consent of the other party provided that such consent shall not be required if either party assigns this Agreement in connection with a merger acquisition joint venture or sale of all or substantially all of its assets.,{1: [4]}
15 | 13,11. non disclosure of discussions. each party agrees that in addition to the other confidentiality obligations hereunder it will not disclose to any third party that it is having any discussions with the other party with respect to establishing a business relationship.,11. non disclosure of discussions. each party agrees that in addition to the other confidentiality obligations hereunder it will not disclose to any third party that it is having any discussions with the other party with respect to establishing a business relationship.,mutual_nda_teamplate_for_testing.pdf,1,11. NON DISCLOSURE OF DISCUSSIONS. Each party agrees that in addition to the other confidentiality obligations hereunder it will not disclose to any third party that it is having any discussions with the other party with respect to establishing a business relationship.,{1: [5]}
16 | 14,12. entire agreement. this agreement constitutes the entire agreement with respect to the subject matter hereof and supersedes all prior agreements and understandings between the parties (whether written or oral) relating to the subject matter and may not be amended or modified except in a writing signed by an authorized representative of both parties. the terms of this agreement relating to the confidentiality and non-use of confidential information shall continue after the termination of this agreement for a period of the longer of (i) five (5) years or (ii) when the confidential information no longer qualifies as a trade secret under applicable law.,12. entire agreement. this agreement constitutes the entire agreement with respect to the subject matter hereof and supersedes all prior agreements and understandings between the parties (whether written or oral) relating to the subject matter and may not be amended or modified except in a writing signed by an authorized representative of both parties. the terms of this agreement relating to the confidentiality and non-use of confidential information shall continue after the termination of this agreement for a period of the longer of (i) five (5) years or (ii) when the confidential information no longer qualifies as a trade secret under applicable law.,mutual_nda_teamplate_for_testing.pdf,1,12. ENTIRE AGREEMENT. This Agreement constitutes the entire agreement with respect to the subject matter hereof and supersedes all prior agreements and understandings between the parties (whether written or oral) relating to the subject matter and may not be amended or modified except in a writing signed by an authorized representative of both parties. The terms of this Agreement relating to the confidentiality and non-use of Confidential Information shall continue after the termination of this Agreement for a period of the longer of (i) five (5) years or (ii) when the Confidential Information no longer qualifies as a trade secret under applicable law.,{1: [6]}
17 | 15,13. severability. each party acknowledges that should any provision of this agreement be determined to be void invalid or otherwise unenforceable by any court of competent jurisdiction such determination shall not affect the remaining provisions hereof which shall remain in full force and effect.,13. severability. each party acknowledges that should any provision of this agreement be determined to be void invalid or otherwise unenforceable by any court of competent jurisdiction such determination shall not affect the remaining provisions hereof which shall remain in full force and effect.,mutual_nda_teamplate_for_testing.pdf,1,13. SEVERABILITY. Each party acknowledges that should any provision of this Agreement be determined to be void invalid or otherwise unenforceable by any court of competent jurisdiction such determination shall not affect the remaining provisions hereof which shall remain in full force and effect.,{1: [7]}
18 | 16,14. governing law. this agreement shall be construed for all purposes in accordance with the laws of the state of texas without regard to the conflicts of law provisions of any state or jurisdiction. any action or suit related to this agreement shall be brought in austin texas and each party hereby submits to the exclusive jurisdiction of such courts.,14. governing law. this agreement shall be construed for all purposes in accordance with the laws of the state of texas without regard to the conflicts of law provisions of any state or jurisdiction. any action or suit related to this agreement shall be brought in austin texas and each party hereby submits to the exclusive jurisdiction of such courts.,mutual_nda_teamplate_for_testing.pdf,1,14. GOVERNING LAW. This Agreement shall be construed for all purposes in accordance with the laws of the State of Texas without regard to the conflicts of law provisions of any state or jurisdiction. Any action or suit related to this Agreement shall be brought in Austin Texas and each party hereby submits to the exclusive jurisdiction of such courts.,{1: [8]}
19 | 17,in witness whereof this agreement has been duly executed by the parties hereto as of the latest date set forth below: acme inc. starwars inc. by: by: name: bugs bunny name: luke skywalker title: ceo title: ceo date: may 5 2023 date: may 7 2023,in witness whereof this agreement has been duly executed by the parties hereto as of the latest date set forth below: acme inc. starwars inc. by: by: name: bugs bunny name: luke skywalker title: ceo title: ceo date: may 5 2023 date: may 7 2023,mutual_nda_teamplate_for_testing.pdf,2,IN WITNESS WHEREOF this Agreement has been duly executed by the parties hereto as of the latest date set forth below: Acme Inc. StarWars Inc. By: By: Name: Bugs Bunny Name: Luke Skywalker Title: CEO Title: CEO Date: May 5 2023 Date: May 7 2023,"{2: [0, 1, 2, 3, 4, 5]}"
20 |
--------------------------------------------------------------------------------
/src/test/java/org/benaya/ai/rag/RagApplicationTests.java:
--------------------------------------------------------------------------------
1 | package org.benaya.ai.rag;
2 |
3 | import org.junit.jupiter.api.Test;
4 | import org.springframework.boot.test.context.SpringBootTest;
5 |
6 | @SpringBootTest
7 | class RagApplicationTests {
8 |
9 | @Test
10 | void contextLoads() {
11 | }
12 |
13 | }
14 |
--------------------------------------------------------------------------------