├── .github
    ├── scripts
    │   └── generate-release-content.js
    └── workflows
    │   └── build-and-release.yaml
├── LICENSE
├── README.md
└── examples
    ├── CreatingAMemoryBank
        ├── README.md
        ├── recipes
        │   ├── wish_i_made_more__yeast_potato_lem_orange_rolls_(577).pdf
        │   ├── wolf_creek_inn__macaroni___cheese_(578).pdf
        │   ├── wolfpack__pork_sandwich_(579).pdf
        │   ├── world_s_best____and_easiest___teriyaki_chicken_wings_(298).pdf
        │   ├── world_s_best__macaroni___cheese_(580).pdf
        │   ├── you_bring_the_macaroni_salad___macaroni_salad_(299).pdf
        │   ├── you_can_t_eat_just_one__ice_box_cookies_(300).pdf
        │   ├── you_like_cheese___garlic_bread_spread_(581).pdf
        │   ├── you_ll_never_miss_the_noodles__lasagna_(584).pdf
        │   └── you_want_me_to_do_what_to_the_buttered_noodles_(582).pdf
        ├── requirements.txt
        └── store_files.py
    ├── PythonPDFChatbot-PythonSDK
        ├── .gitignore
        ├── README.md
        ├── app.py
        ├── requirements.txt
        ├── sample_prompt.md
        └── templates
        │   ├── general_template.md
        │   └── recipe_template.md
    └── PythonPDFChatbot-RESTAPI
        ├── .gitignore
        ├── README.md
        ├── app.py
        ├── requirements.txt
        ├── retriever.py
        ├── sample_prompt.md
        └── templates
            ├── general_template.md
            └── recipe_template.md


/.github/scripts/generate-release-content.js:
--------------------------------------------------------------------------------
 1 | const { Octokit } = require("@octokit/rest");
 2 | const fs = require("fs");
 3 | 
 4 | const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN });
 5 | 
 6 | async function generateReleaseContent()
 7 | {
 8 |     const { data: release } = await octokit.repos.getRelease({
 9 |         owner: process.env.GITHUB_REPOSITORY.split('/')[0],
10 |         repo: process.env.GITHUB_REPOSITORY.split('/')[1],
11 |         release_id: process.env.GITHUB_EVENT.release.id,
12 |     });
13 | 
14 |     let content = `# ${release.name}\n\n`;
15 |     content += release.body + "\n\n";
16 | 
17 |     content += "## Downloads\n\n";
18 |     content += "| Platform | Download | Size | Date |\n";
19 |     content += "|----------|----------|------|------|\n";
20 | 
21 |     const mainAssets = release.assets.filter(asset =>
22 |         !asset.name.includes("debug") && !asset.name.includes("symbols")
23 |     );
24 | 
25 |     for (const asset of mainAssets)
26 |     {
27 |         const platform = getPlatformFromAsset(asset.name);
28 |         const size = formatSize(asset.size);
29 |         const date = formatDate(new Date(asset.created_at));
30 |         content += `| ${platform} | [${asset.name}](${asset.browser_download_url}) | ${size} | ${date} |\n`;
31 |     }
32 | 
33 |     content += "\n## Additional Files\n\n";
34 |     const additionalAssets = release.assets.filter(asset =>
35 |         asset.name.includes("debug") || asset.name.includes("symbols")
36 |     );
37 | 
38 |     for (const asset of additionalAssets)
39 |     {
40 |         const size = formatSize(asset.size);
41 |         const date = formatDate(new Date(asset.created_at));
42 |         content += `- [${asset.name}](${asset.browser_download_url}) (${size}, ${date})\n`;
43 |     }
44 | 
45 |     fs.writeFileSync("release-body.md", content);
46 | }
47 | 
48 | function getPlatformFromAsset(assetName)
49 | {
50 |     if (assetName.includes("windows")) return "Windows";
51 |     if (assetName.includes("macos")) return "macOS";
52 |     if (assetName.includes("linux")) return "Linux";
53 |     return "Other";
54 | }
55 | 
56 | function formatSize(bytes)
57 | {
58 |     const units = ['B', 'KB', 'MB', 'GB', 'TB'];
59 |     let size = bytes;
60 |     let unitIndex = 0;
61 | 
62 |     while (size >= 1024 && unitIndex < units.length - 1)
63 |     {
64 |         size /= 1024;
65 |         unitIndex++;
66 |     }
67 | 
68 |     return `${size.toFixed(1)}${units[unitIndex]}`;
69 | }
70 | 
71 | function formatDate(date)
72 | {
73 |     const now = new Date();
74 |     const diffTime = Math.abs(now - date);
75 |     const diffDays = Math.ceil(diffTime / (1000 * 60 * 60 * 24));
76 | 
77 |     if (diffDays === 0)
78 |     {
79 |         return "Today";
80 |     } else if (diffDays === 1)
81 |     {
82 |         return "Yesterday";
83 |     } else if (diffDays <= 7)
84 |     {
85 |         return `${diffDays} days ago`;
86 |     } else
87 |     {
88 |         return date.toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
89 |     }
90 | }
91 | 
92 | generateReleaseContent().catch(console.error);


--------------------------------------------------------------------------------
/.github/workflows/build-and-release.yaml:
--------------------------------------------------------------------------------
  1 | name: Dabarqus Build and Release
  2 | 
  3 | defaults:
  4 |   run:
  5 |     shell: bash
  6 | 
  7 | on:
  8 |   repository_dispatch:
  9 |     types: [dabarqus_push]
 10 | 
 11 | jobs:
 12 |   build-and-release:
 13 |     permissions: write-all
 14 |     runs-on: ${{ matrix.runner }}
 15 |     env:
 16 |       # Set the deployment repository owner and name,
 17 |       # which is different from the current repository
 18 |       GITHUB_DEPLOYMENT_REPO_OWNER: electricpipelines
 19 |       GITHUB_DEPLOYMENT_REPO_NAME: barq
 20 |       GITHUB_SOURCE_REPO_OWNER: electricpipelines
 21 |       GITHUB_SOURCE_REPO_NAME: dabarqus
 22 |       GH_TOKEN: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN  }}
 23 |       GITHUB_TOKEN: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN  }}
 24 |       RELEASE_TYPE: release
 25 |       DOCKERHUB_USERNAME: electricpipelines
 26 |       DOCKERHUB_TOKEN: ${{ secrets.DOCKER_REPO_ACCESS_TOKEN }}
 27 | 
 28 |     strategy:
 29 |       fail-fast: false
 30 |       matrix:
 31 |         include:
 32 |           - platform: macos
 33 |             runner: [self-hosted, macos, x64]
 34 |           - platform: macos-metal
 35 |             runner: [macos-latest]
 36 |           - platform: windows
 37 |             runner: [self-hosted, windows]
 38 |           - platform: windows-nvidia
 39 |             runner: [self-hosted, windows, cuda]
 40 |           - platform: linux
 41 |             runner: [self-hosted, linux]
 42 |           - platform: linux-nvidia
 43 |             runner: [self-hosted, linux, cuda]
 44 |     steps:
 45 |       - name: Checkout Repository
 46 |         uses: actions/checkout@v4
 47 |         with:
 48 |           repository: ${{ env.GITHUB_SOURCE_REPO_OWNER }}/${{ env.GITHUB_SOURCE_REPO_NAME }}
 49 |           token: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }}
 50 |           submodules: recursive
 51 | 
 52 |       - name: Set Git Bash as default shell for Windows and Linux
 53 |         if: startsWith(matrix.platform, 'windows') || startsWith(matrix.platform, 'linux')
 54 |         shell: pwsh
 55 |         run: |
 56 |           echo "Setting up Git Bash as default shell"
 57 |           echo "C:/Program Files/Git/bin" >> $env:GITHUB_PATH
 58 | 
 59 |       - name: Check if runner is self-hosted
 60 |         id: check-runner
 61 |         run: |
 62 |           if [[ "${{ toJson(matrix.runner) }}" == *"self-hosted"* ]]; then
 63 |             echo "is_self_hosted=true" >> $GITHUB_OUTPUT
 64 |           else
 65 |             echo "is_self_hosted=false" >> $GITHUB_OUTPUT
 66 |           fi
 67 | 
 68 |       - name: Check if runner has Docker
 69 |         id: check-for-docker
 70 |         run: |
 71 |           if [[ "${{ toJson(matrix.runner) }}" == *"docker"* ]]; then
 72 |             echo "has_docker=true" >> $GITHUB_OUTPUT
 73 |           else
 74 |             echo "has_docker=false" >> $GITHUB_OUTPUT
 75 |           fi
 76 | 
 77 |       - name: Install ccache (non-self-hosted only)
 78 |         # Github's macOS runners
 79 |         if: steps.check-runner.outputs.is_self_hosted == 'false' && startsWith(matrix.platform, 'macos')
 80 |         run: brew install ccache
 81 | 
 82 |       - name: Cache ccache files (non-self-hosted only)
 83 |         # Github's macOS runners
 84 |         if: steps.check-runner.outputs.is_self_hosted == 'false' && startsWith(matrix.platform, 'macos')
 85 |         uses: actions/cache@v3
 86 |         with:
 87 |           path: .ccache
 88 |           key: ${{ runner.os }}-ccache-${{ matrix.platform }}-${{ github.sha }}
 89 |           restore-keys: |
 90 |             ${{ runner.os }}-ccache-${{ matrix.platform }}-
 91 | 
 92 |       - name: Cache ccache files (Docker builds)
 93 |         if: steps.check-for-docker.outputs.has_docker == 'true'
 94 |         uses: actions/cache@v4
 95 |         with:
 96 |           path: ~/.ccache
 97 |           key: ${{ runner.os }}-ccache-${{ matrix.platform }}-${{ github.sha }}
 98 |           restore-keys: ${{ runner.os }}-ccache-${{ matrix.platform }}
 99 | 
100 |       - name: Extract Package Version from version.txt
101 |         run: |
102 |           PACKAGE_VERSION=$(cat version.txt | tr -d '\n')
103 |           echo "PACKAGE_VERSION=$PACKAGE_VERSION" >> $GITHUB_ENV
104 | 
105 |       - name: Install the Apple developer certificates
106 |         if: matrix.platform == 'macos' || matrix.platform == 'macos-metal'
107 |         env:
108 |           APP_CERTIFICATE_BASE64: ${{ secrets.MACOS_APP_CERTIFICATE_BASE64 }}
109 |           INSTALLER_CERTIFICATE_BASE64: ${{ secrets.MACOS_INSTALLER_CERTIFICATE_BASE64 }}
110 |           P12_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_P12_PASSWORD }}
111 |           KEYCHAIN_PASSWORD: ${{ secrets.MACOS_KEYCHAIN_PASSWORD }}
112 |           CSC_INSTALLER_LINK: ${{ secrets.MACOS_INSTALLER_CERTIFICATE_BASE64 }}
113 |           CSC_INSTALLER_KEY_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_P12_PASSWORD }}
114 |         run: |
115 |           echo "Creating variables"
116 |           APP_CERTIFICATE_PATH=$RUNNER_TEMP/app_certificate.p12
117 |           INSTALLER_CERTIFICATE_PATH=$RUNNER_TEMP/installer_certificate.p12
118 |           KEYCHAIN_PATH=$RUNNER_TEMP/app-signing.keychain-db
119 | 
120 |           echo "Importing certificates from secrets"
121 |           echo -n "$APP_CERTIFICATE_BASE64" | base64 --decode -o $APP_CERTIFICATE_PATH
122 |           echo -n "$INSTALLER_CERTIFICATE_BASE64" | base64 --decode -o $INSTALLER_CERTIFICATE_PATH
123 | 
124 |           echo "Creating temporary keychain"
125 |           security -v create-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
126 |           security -v set-keychain-settings -lut 21600 $KEYCHAIN_PATH
127 |           security unlock-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
128 | 
129 |           echo "Importing certificates to keychain"
130 |           security -v import $APP_CERTIFICATE_PATH -P "$P12_PASSWORD" -A -t cert -f pkcs12 -k $KEYCHAIN_PATH
131 |           security -v import $INSTALLER_CERTIFICATE_PATH -P "$P12_PASSWORD" -A -t cert -f pkcs12 -k $KEYCHAIN_PATH
132 | 
133 |           echo "Setting keychain ACLs"
134 |           security -v set-key-partition-list -S apple-tool:,apple: -k "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
135 | 
136 |           echo "Setting default keychain"
137 |           security -v list-keychain -d user -s $KEYCHAIN_PATH
138 | 
139 |           echo "Extracting Apple Developer Identity"
140 |           IDENTITY_INFO=$(security find-identity -v -p codesigning $KEYCHAIN_PATH)
141 |           if [ $? -ne 0 ]; then
142 |             echo "Error: Failed to find identities. Keychain contents:"
143 |             security dump-keychain $KEYCHAIN_PATH
144 |             exit 1
145 |           fi
146 | 
147 |           echo "All codesigning identities:"
148 |           echo "$IDENTITY_INFO"
149 | 
150 |           FULL_IDENTITY=$(echo "$IDENTITY_INFO" | grep "Developer ID Application" | sed -n 's/.*"\(Developer ID Application: .*\)"/\1/p')
151 | 
152 |           if [ -z "$FULL_IDENTITY" ]; then
153 |             echo "Error: Failed to extract Developer ID Application identity."
154 |             exit 1
155 |           fi
156 | 
157 |           # Extract just the name and team ID without the prefix
158 |           IDENTITY=$(echo "$FULL_IDENTITY" | sed 's/Developer ID Application: //')
159 | 
160 |           echo "CSC_NAME=$IDENTITY" >> $GITHUB_ENV
161 |           echo "Extracted identity for signing: $IDENTITY"
162 | 
163 |       - name: Install build tools on macOS Metal via Homebrew
164 |         if: matrix.platform == 'macos' || matrix.platform == 'macos-metal'
165 |         run: brew install python-setuptools autoconf automake autoconf-archive
166 | 
167 |       - uses: actions/github-script@v7
168 |         with:
169 |           script: |
170 |             core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
171 |             core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
172 | 
173 |       - name: Install VCPKG
174 |         if: matrix.platform == 'macos' || matrix.platform == 'macos-metal'
175 |         run: |
176 |           git clone https://github.com/microsoft/vcpkg
177 |           ./vcpkg/bootstrap-vcpkg.sh
178 |           export VCPKG_INSTALLATION_ROOT=$(pwd)/vcpkg
179 |           echo "VCPKG_INSTALLATION_ROOT=${VCPKG_INSTALLATION_ROOT}" >> $GITHUB_ENV
180 |           echo "VCPKG_ROOT=${VCPKG_INSTALLATION_ROOT}" >> $GITHUB_ENV
181 | 
182 |       - name: Cache npm dependencies
183 |         if: steps.check-for-docker.outputs.has_docker == 'false'
184 |         uses: actions/cache@v4
185 |         with:
186 |           path: '~/.npm'
187 |           key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
188 |           restore-keys: |
189 |             ${{ runner.os }}-node-
190 | 
191 |       - name: Set up Docker
192 |         if: steps.check-for-docker.outputs.has_docker == 'true'
193 |         run: |
194 |           if ! command -v docker &> /dev/null; then
195 |             echo "Docker not found. Please install Docker on your self-hosted runner."
196 |             exit 1
197 |           fi
198 | 
199 |       - name: Docker Build, Package and Release to GitHub
200 |         if: steps.check-for-docker.outputs.has_docker == 'true'
201 |         env:
202 |           RELEASE_TYPE: ${{ env.RELEASE_TYPE }}
203 |         run: |
204 | 
205 |           # Convert Windows path to Docker-compatible path
206 |           WORKSPACE_PATH=$(cygpath -w "${{ github.workspace }}" | sed 's/\\/\//g')
207 | 
208 |           docker build -t dabarqus-builder -f Dockerfile.build .
209 | 
210 |           docker run --rm --gpus all \
211 |             -v "${WORKSPACE_PATH}:/dabarqus_src" \
212 |             -v "${HOME}/.ccache:/root/.ccache" \
213 |             -e BUILD_PLATFORM=${{ matrix.platform }} \
214 |             -e GH_TOKEN=${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }} \
215 |             -e GITHUB_TOKEN=${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }} \
216 |             -e GITHUB_DEPLOYMENT_REPO_OWNER=${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }} \
217 |             -e GITHUB_DEPLOYMENT_REPO_NAME=${{ env.GITHUB_DEPLOYMENT_REPO_NAME }} \
218 |             -e RELEASE_TYPE=${{ env.RELEASE_TYPE }} \
219 |             dabarqus-builder
220 | 
221 |       - name: Native Build, Package and Release to GitHub
222 |         if: steps.check-for-docker.outputs.has_docker == 'false'
223 |         shell: pwsh
224 |         env:
225 |           APPLE_ID: ${{ secrets.MACOS_APP_NOTARIZATION_USERID }}
226 |           APPLE_PASSWORD: ${{ secrets.MACOS_APP_NOTARIZATION_PASSWORD }}
227 |           APPLE_APP_SPECIFIC_PASSWORD : ${{ secrets.MACOS_APP_NOTARIZATION_PASSWORD }}
228 |           APPLE_TEAM_ID: ${{ secrets.MACOS_APP_NOTARIZATION_TEAMID }}
229 |           DABARQUS_FINE_GRAINED_TOKEN: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }}
230 |           RELEASE_TYPE: ${{ env.RELEASE_TYPE }}
231 |         run: |
232 |             ./build.ps1 -BuildPlatform ${{ matrix.platform }}
233 | 
234 |       - name: Sign macOS Binaries and Libraries
235 |         if: startsWith(matrix.platform, 'macos')
236 |         run: |
237 |           cd out/install/${{ matrix.platform }}
238 | 
239 |           # Sign all binaries recursively (not just in bin)
240 |           find . -type f -perm +111 | while read file; do
241 |               echo "Signing executable: $file"
242 |               codesign --force --options runtime --timestamp --deep --sign "${{ env.CSC_NAME }}" "$file"
243 |               codesign --verify --deep --strict --verbose "$file"
244 |           done
245 |           
246 |           # Sign all dylibs recursively (not just in lib)
247 |           find . -type f -name "*.dylib" | while read file; do
248 |               echo "Signing library: $file"
249 |               codesign --force --options runtime --timestamp --deep --sign "${{ env.CSC_NAME }}" "$file"
250 |               codesign --verify --deep --strict --verbose "$file"
251 |           done
252 |           
253 |           # Sign all bundles
254 |           find . -type d -name "*.app" -or -name "*.framework" | while read bundle; do
255 |               echo "Signing bundle: $bundle"
256 |               codesign --force --options runtime --timestamp --deep --sign "${{ env.CSC_NAME }}" "$bundle"
257 |               codesign --verify --deep --strict --verbose "$bundle"
258 |           done
259 | 
260 |           cd ../../..
261 | 
262 |       - name: Create and Notarize PKG
263 |         if: startsWith(matrix.platform, 'macos')
264 |         env:
265 |           APPLE_ID: ${{ secrets.MACOS_APP_NOTARIZATION_USERID }}
266 |           APPLE_PASSWORD: ${{ secrets.MACOS_APP_NOTARIZATION_PASSWORD }}
267 |           APPLE_TEAM_ID: ${{ secrets.MACOS_APP_NOTARIZATION_TEAMID }}
268 |         run: |
269 |           PACKAGE_VERSION=$(cat version.txt | tr -d '\n')
270 |           
271 |           # Build the pkg
272 |           chmod +x build-pkg.sh
273 |           ./build-pkg.sh ${{ matrix.platform }} "${{ env.CSC_NAME }}"
274 |           
275 |           PKG_PATH="Dabarqus-${PACKAGE_VERSION}-${{ matrix.platform }}.pkg"
276 |           
277 |           # Submit for notarization and capture the submission ID
278 |           echo "Submitting pkg for notarization..."
279 |           SUBMISSION_ID=$(xcrun notarytool submit "$PKG_PATH" \
280 |             --apple-id "$APPLE_ID" \
281 |             --password "$APPLE_PASSWORD" \
282 |             --team-id "$APPLE_TEAM_ID" \
283 |             --wait \
284 |             | grep "id:" | head -n1 | awk '{print $2}')
285 |           
286 |           # Get detailed information about the submission
287 |           echo "Getting detailed notarization info..."
288 |           xcrun notarytool info "$SUBMISSION_ID" \
289 |             --apple-id "$APPLE_ID" \
290 |             --password "$APPLE_PASSWORD" \
291 |             --team-id "$APPLE_TEAM_ID"
292 |           
293 |           # Get the submission log
294 |           echo "Getting notarization log..."
295 |           xcrun notarytool log "$SUBMISSION_ID" \
296 |             --apple-id "$APPLE_ID" \
297 |             --password "$APPLE_PASSWORD" \
298 |             --team-id "$APPLE_TEAM_ID"
299 |           
300 |           # Check if notarization succeeded
301 |           STATUS=$(xcrun notarytool info "$SUBMISSION_ID" \
302 |             --apple-id "$APPLE_ID" \
303 |             --password "$APPLE_PASSWORD" \
304 |             --team-id "$APPLE_TEAM_ID" \
305 |             | grep "status:" | awk '{print $2}')
306 |           
307 |           if [ "$STATUS" != "Accepted" ]; then
308 |             echo "Notarization failed with status: $STATUS"
309 |             exit 1
310 |           fi
311 |           
312 |           echo "Stapling notarization ticket to $PKG_PATH"
313 |           xcrun stapler staple "$PKG_PATH"
314 |           echo "PKG_PATH=${PKG_PATH}" >> $GITHUB_ENV
315 |           echo "PKG_NAME=${PKG_PATH}" >> $GITHUB_ENV
316 | 
317 |       - name: Create Zip Archive
318 |         run: |
319 |           PACKAGE_VERSION=$(cat version.txt | tr -d '\n')
320 |           ZIP_NAME="dabarqus-${{ matrix.platform }}-${PACKAGE_VERSION}.zip"
321 | 
322 |           cd out/install/${{ matrix.platform }}
323 | 
324 |           # Remove unnecessary files
325 |           if [[ "${{ matrix.platform }}" == windows* ]]; then
326 |             find . -type f ! \( -name "*.dll" -o -name "*.exe" -o -path "./bin/*" \) -delete
327 |           elif [[ "${{ matrix.platform }}" == linux* ]]; then
328 |             find . -type f ! \( -name "*.so" -o -path "./bin/*" \) -delete
329 |             # Ensure executables have proper permissions
330 |             find ./bin -type f -exec chmod 755 {} \;
331 |             # Preserve symlinks in lib directory
332 |             find ./lib -type l -exec chmod 777 {} \;
333 |           elif [[ "${{ matrix.platform }}" == macos* ]]; then
334 |             find . -type f ! \( -name "*.dylib" -o -path "./bin/*" \) -delete
335 |             # Ensure executables have proper permissions
336 |             find ./bin -type f -exec chmod 755 {} \;
337 |           fi
338 | 
339 |           find ./bin -type f -name "*.py" -delete
340 | 
341 |           # Remove empty directories
342 |           find . -type d -empty -delete
343 | 
344 |           # Create the zip archive with permission and symlink preservation
345 |           if [[ "${{ matrix.platform }}" == linux* ]]; then
346 |             # -X preserves permissions
347 |             # -r recursive
348 |             zip -r -X "${ZIP_NAME}" . -x "*.DS_Store" "*.git*"
349 |           elif [[ "${{ matrix.platform }}" == macos* ]]; then
350 |             # zip -r -X "${ZIP_NAME}" . -x "*.DS_Store" "*.git*"
351 |             # Use ditto to preserve extended attributes and resource forks
352 |             ditto -c -k --sequesterRsrc --keepParent . "${ZIP_NAME}"
353 |           else
354 |             zip -r "${ZIP_NAME}" . -x "*.DS_Store" "*.git*"
355 |           fi
356 | 
357 |           # For macOS platforms, notarize the zip archive
358 |           if [[ "${{ matrix.platform }}" == macos* ]]; then
359 |             # Notarize the zip archive
360 |             xcrun notarytool submit "${ZIP_NAME}" --wait \
361 |               --apple-id "${{ secrets.MACOS_APP_NOTARIZATION_USERID }}" \
362 |               --password "${{ secrets.MACOS_APP_NOTARIZATION_PASSWORD }}" \
363 |               --team-id "${{ secrets.MACOS_APP_NOTARIZATION_TEAMID }}"
364 | 
365 |             # # Staple the notarization ticket to the zip archive
366 |             # xcrun stapler staple "${ZIP_NAME}"
367 |           fi
368 | 
369 |           # Move the zip file to the root of the workspace
370 |           mv "${ZIP_NAME}" ../../../
371 | 
372 |           ls -l ../../../${ZIP_NAME}
373 | 
374 |           # Go back to the root directory
375 |           cd ../../../
376 | 
377 |           # Find the exact path of the zip file
378 |           ZIP_PATH=$(find . -name "${ZIP_NAME}")
379 | 
380 |           if [ -z "$ZIP_PATH" ]; then
381 |             echo "Error: Zip file not found"
382 |             exit 1
383 |           fi
384 | 
385 |           echo "ZIP_PATH=${ZIP_PATH}" >> $GITHUB_ENV
386 |           echo "ZIP_NAME=${ZIP_NAME}" >> $GITHUB_ENV
387 | 
388 |       - name: Release Zip Archive
389 |         env:
390 |           GH_TOKEN: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }}
391 |           PUBLISH_OWNER: ${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}
392 |           PUBLISH_REPO: ${{ env.GITHUB_DEPLOYMENT_REPO_NAME }}
393 |           RELEASE_TYPE: ${{ env.RELEASE_TYPE }}
394 |         run: |
395 |           PACKAGE_VERSION=$(cat version.txt | tr -d '\n')
396 |           if gh release view "v${PACKAGE_VERSION}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" &>/dev/null; then
397 |             echo "Release v${PACKAGE_VERSION} exists. Uploading zip file to existing release."
398 |             gh release upload "v${PACKAGE_VERSION}" "${ZIP_NAME}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" --clobber
399 |           else
400 |             echo "Release v${PACKAGE_VERSION} does not exist. Creating ${RELEASE_TYPE} release and uploading zip file."
401 |             if [ "$RELEASE_TYPE" = "draft" ]; then
402 |               gh release create "v${PACKAGE_VERSION}" "${ZIP_NAME}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" --draft
403 |             else
404 |               gh release create "v${PACKAGE_VERSION}" "${ZIP_NAME}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}"
405 |             fi
406 |           fi
407 | 
408 |       - name: Release PKG
409 |         if: startsWith(matrix.platform, 'macos')
410 |         env:
411 |           GH_TOKEN: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }}
412 |           PUBLISH_OWNER: ${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}
413 |           PUBLISH_REPO: ${{ env.GITHUB_DEPLOYMENT_REPO_NAME }}
414 |           RELEASE_TYPE: ${{ env.RELEASE_TYPE }}
415 |         run: |
416 |           PACKAGE_VERSION=$(cat version.txt | tr -d '\n')
417 |           PKG_PATH="$PKG_NAME"
418 |           
419 |           if gh release view "v${PACKAGE_VERSION}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" &>/dev/null; then
420 |             echo "Release v${PACKAGE_VERSION} exists. Uploading pkg file to existing release."
421 |             gh release upload "v${PACKAGE_VERSION}" "${PKG_PATH}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" --clobber
422 |           else
423 |             echo "Release v${PACKAGE_VERSION} does not exist. Creating ${RELEASE_TYPE} release and uploading pkg file."
424 |             if [ "$RELEASE_TYPE" = "draft" ]; then
425 |               gh release create "v${PACKAGE_VERSION}" "${PKG_PATH}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" --draft
426 |             else
427 |               gh release create "v${PACKAGE_VERSION}" "${PKG_PATH}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}"
428 |             fi
429 |           fi
430 | 
431 |       - name: Build and Push Runtime Docker Image
432 |         if: matrix.platform == 'linux' || matrix.platform == 'linux-nvidia'
433 |         env:
434 |           PACKAGE_VERSION: ${{ env.PACKAGE_VERSION }}
435 |         run: |
436 |           # Determine Docker base image based on platform
437 |           BASE_IMAGE="ubuntu:22.04"
438 |           TAG_SUFFIX=""
439 |           PACKAGE_NAME="dabarqus"
440 |           if [[ "${{ matrix.platform }}" == "linux-nvidia" ]]; then
441 |             BASE_IMAGE="nvidia/cuda:12.1.0-base-ubuntu22.04"
442 |             TAG_SUFFIX="-nvidia"
443 |             PACKAGE_NAME="dabarqus-nvidia"
444 |           fi
445 | 
446 |           echo "Building Docker image for platform ${{ matrix.platform }}"
447 |           echo "Base image: ${BASE_IMAGE}"
448 |           echo "Package name: ${PACKAGE_NAME}"
449 |           echo "Package version: ${{ env.PACKAGE_VERSION }}"
450 |           echo "Release type: ${{ env.RELEASE_TYPE }}"
451 | 
452 |           # Build the runtime Docker image
453 |           docker build \
454 |             --build-arg BASE_IMAGE=${BASE_IMAGE} \
455 |             --build-arg PLATFORM=${{ matrix.platform }} \
456 |             -t ${PACKAGE_NAME}:${{ env.PACKAGE_VERSION }} \
457 |             -t ${PACKAGE_NAME}:latest .
458 | 
459 |           # If this is a release build, push to container registry
460 |           if [[ "${{ env.RELEASE_TYPE }}" == "release" ]]; then
461 |             echo "Pushing Docker image to container registry..."
462 |             echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
463 |             
464 |             docker tag ${PACKAGE_NAME}:${{ env.PACKAGE_VERSION }} ghcr.io/${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}/${PACKAGE_NAME}:${{ env.PACKAGE_VERSION }}
465 |             docker tag ${PACKAGE_NAME}:${{ env.PACKAGE_VERSION }} ghcr.io/${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}/${PACKAGE_NAME}:latest
466 |             
467 |             docker push ghcr.io/${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}/${PACKAGE_NAME}:${{ env.PACKAGE_VERSION }}
468 |             docker push ghcr.io/${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}/${PACKAGE_NAME}:latest
469 |             
470 |             # Set package visibility to public
471 |             echo "Setting package visibility to public..."
472 |             gh api \
473 |               --method PATCH \
474 |               -H "Accept: application/vnd.github+json" \
475 |               -H "X-GitHub-Api-Version: 2022-11-28" \
476 |               orgs/${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}/packages/container/${PACKAGE_NAME}/visibility \
477 |               -f visibility=public || echo "Warning: Could not set package visibility. The package may need to be manually set to public."
478 |           fi
479 | 
480 |       - name: Create and Push Git Tag if Not Exists
481 |         run: |
482 |           PACKAGE_VERSION=${{ env.PACKAGE_VERSION }}
483 |           echo "Checking for existing tag for version $PACKAGE_VERSION..."
484 | 
485 |           # Fetch tags to ensure we have the latest tags in the local git history
486 |           git fetch --tags >/dev/null 2>&1;
487 | 
488 |           # Check if the tag already exists
489 |           if git rev-parse "v$PACKAGE_VERSION" >/dev/null 2>&1; then
490 |             echo "Tag v$PACKAGE_VERSION already exists. Skipping tag creation."
491 |           else
492 |             echo "Tag v$PACKAGE_VERSION does not exist. Creating and pushing tag..."
493 |             git config user.name "github-actions"
494 |             git config user.email "github-actions@github.com"
495 |             git tag -a "v$PACKAGE_VERSION" -m "Release version $PACKAGE_VERSION"
496 |             git push origin "v$PACKAGE_VERSION"
497 |             echo "Tag v$PACKAGE_VERSION created and pushed."
498 |           fi
499 | 
500 |       - name: Save Version Tag Name
501 |         run: echo "v${{ env.PACKAGE_VERSION }}" > tag.txt
502 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | SOFTWARE LICENSE AGREEMENT
 2 | 
 3 | Copyright (c) 2024 Electric Pipelines
 4 | 
 5 | 1. License Grant: Electric Pipelines grants you a non-exclusive, non-transferable license to use Dabarqus software product and its documentation.
 6 | 
 7 | 2. Restrictions: You may not redistribute, sell, decompile, reverse engineer, disassemble, or otherwise reduce Dabarqus to a human-perceivable form.
 8 | 
 9 | 3. Warranty Disclaimer: Dabarqus is provided 'as is' without warranty of any kind, either express or implied.
10 | 
11 | 4. Limitation of Liability: In no event shall Electric Pipelines be liable for any damages whatsoever arising out of the use of or inability to use Dabarqus.
12 | 
13 | 5. Termination: This license is effective until terminated. Your rights under this license will terminate automatically without notice from Electric Pipelines if you fail to comply with any term(s) of this license.
14 | 
15 | By installing or using Dabarqus, you agree to be bound by the terms of this license.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Dabarqus
  2 | 
  3 | **Dabarqus** is a stand alone application that implements a complete RAG solution. It is designed to be easy to use and easy to integrate with your existing applications. Dabarqus includes a REST API, a command-line interface, and an admin dashboard.
  4 | 
  5 | ## Why Create Dabarqus
  6 | 
  7 | If you're a developer, building a basic RAG solution is pretty straightforward. There are tons of tutorials and how-to's as well as Python code to reuse. But, if you're deploying your RAG solution within a company, or for end-user PCs, you will also have to figure out some potentially tricky deployment and maintenance issues. That means also deploying Python, a vector database, the right embedding AI model, and possible licensing challenges. Dabarqus was created to address these issues with a stand-alone, all-in-one solution with no dependencies. It's written in low-level C++ with built in vector search capabilities, flexibility to use the embedding AI model that's best for your use case, and a REST API for easy development integration.
  8 | 
  9 | ## Table of Contents
 10 | 
 11 | 1. [Quick Start](#quick-start)
 12 |    - [Ubuntu](#ubuntu)
 13 |    - [macOS](#macos)
 14 |    - [Windows](#windows)
 15 | 2. [Features](#features)
 16 | 3. [Barq - Command-line Interface](#barq---command-line-interface-to-dabarqus)
 17 |    - [Using with the CLI](#using-with-the-cli)
 18 |      - [Store](#store)
 19 |      - [Retrieve](#retrieve)
 20 | 4. [API - REST Interface](#api---rest-interface-to-dabarqus)
 21 |    - [Using the API](#using-the-api)
 22 | 5. [Examples](#examples)
 23 | 
 24 | ## **Quick start**
 25 | 
 26 | ### Ubuntu
 27 | 
 28 | Dabarqus works on CPU only, or can use NVIDIA CUDA for higher performance. For the CUDA (aka nvidia cublas) version, you will need to install the NVIDIA driver. The CPU version does not require any additional software. Note that to use the CUDA version, you will need to have an NVIDIA GPU with CUDA support, and to download the CUDA version of Dabarqus.
 29 | 
 30 | 0. To install NVIDIA drivers on Ubuntu (if you have an NVIDIA GPU), run the following command:
 31 | 
 32 |     ```bash
 33 |     sudo ubuntu-drivers install
 34 |     ```
 35 | 
 36 | 1. Unzip the Dabarqus file into a folder
 37 | 
 38 |     ```bash
 39 |     unzip Dabarqus-linux-DOWNLOADED_VERSION.zip
 40 |     cd Dabarqus-linux-DOWNLOADED_VERSION
 41 |     chmod +x ./bin/*
 42 |     ./bin/barq service install
 43 |     ```
 44 | 
 45 | 2. Open a browser and go to `http://localhost:6568/admin`
 46 | 
 47 | ### macOS
 48 | 
 49 | For package file downloads, do the following:
 50 | 
 51 | 1. Double click the Dabarqus-macos-DOWNLOADED_VERSION.pkg and install
 52 | 2. After installation, open your browser and navigate to `http://localhost:6568/admin`
 53 | 
 54 | For zip file downloads, do the following:
 55 | 
 56 | 1. Unzip the Dabarqus file into a folder
 57 | 
 58 |     ```bash
 59 |     unzip Dabarqus-linux-DOWNLOADED_VERSION.zip
 60 |     cd Dabarqus-linux-DOWNLOADED_VERSION
 61 |     ./bin/barq service install
 62 |     ```
 63 | 
 64 | 2. Open a browser and go to `http://localhost:6568/admin`
 65 | 
 66 | ### Windows
 67 | 
 68 | 1. Double click the Dabarqus-windows-DOWNLOADED_VERSION.exe and install
 69 | 2. Double click the Dabarqus icon or navigate to `http://localhost:6568/admin`
 70 | 
 71 | ## Features
 72 | 
 73 | 1. **Ingest documents, databases, and APIs**: Ingest diverse data sources like PDFs*, emails, and raw data.
 74 |    - No matter where your data resides, Dabarqus can make it available to your LLM
 75 | 
 76 | 2. **LLM-Style Prompting**: Use simple, LLM-style prompts when speaking to your memory banks.
 77 |    - Dabarqus will retrieve relevant data using the same prompt you give your LLM
 78 |    - No need to construct special queries or learn a new query language
 79 | 
 80 | 3. **REST API**: Comprehensive control interface for downloading models, prompting semantic indexes, and even LLM inference.
 81 |    - REST is a standard interface that enjoys wide adoption, so your team doesn't need to learn a new, complex system
 82 |    - Allows comprehensive integration with existing development tools for easy adoption
 83 | 
 84 | 4. **Multiple Semantic Indexes (Memory Banks)**: Group your data into separate semantic indexes (memory banks).
 85 |    - Keep your data organized by subject matter, category, or whatever grouping you like
 86 |    - Memory banks are portable, so you can create and use them wherever you like
 87 | 
 88 | 5. **SDKs**: Native SDKs in [Python](https://pypi.org/project/dabarqus/) and [Javascript](https://www.npmjs.com/package/dabarqus).
 89 |    - Easily integrates with Python and Javascript projects
 90 | 
 91 | 6. **LLM-Friendly Output**: Produces LLM-ready output that works with ChatGPT, Ollama, and any other LLM provider
 92 |    - Works seamlessly with the LLM of your choice
 93 | 
 94 | 7. **Admin Dashboard**: Monitor performance, test memory banks, and make changes in an easy-to-use UI
 95 |    - Easy access to Dabarqus features
 96 |    - Monitor app performance with real-time graphs
 97 | 
 98 | 8. **Mac, Linux, and Windows Support**: Runs natively with zero dependencies on all platforms: MacOS (Intel or Metal), Linux, and Windows (CPU or GPU)
 99 |    - Runs on whatever platform you use
100 | 
101 | 9. **LLM Inference**: Chat with LLM models right through the Dabarqus API/SDKs
102 |    - Built-in chatbot capabilities for use in your applications
103 | 
104 | \*[Dabarqus Professional Edition](https://dabarqus.com/features#dabarqus-editions) is required for email, messaging and API support.
105 | 
106 | ## Barq - Command-line interface to Dabarqus
107 | 
108 | To install: `barq service install`
109 | 
110 | To uninstall: `barq service uninstall`
111 | 
112 | ### Using with the CLI
113 | 
114 | #### Store
115 | 
116 | Usage: `barq store --input-path <path to folder> --memory-bank "<memory bank name>"`
117 | 
118 | Example: `barq store --input-path C:\docs --memory-bank documents`
119 | 
120 | #### Retrieve
121 | 
122 | Usage: `barq retrieve --memory-bank "<memory bank name>"`
123 | 
124 | - Example: `barq retrieve --memory-bank documents`
125 | - Example: `barq retrieve --memory-bank documents --query "Tell me about the documents" --query-limit 3`
126 |              This will display three answers to the query from the 'documents' memory bank
127 | 
128 | ## API - REST interface to Dabarqus
129 | 
130 | | Method | Endpoint | Description | Parameters |
131 | |--------|----------|-------------|------------|
132 | | GET | /health or /api/health | Check the health status of the service | None |
133 | | GET | /admin/* | Serve the admin application | None |
134 | | GET | /odobo/* | Serve the Odobo application | None |
135 | | GET | /api/models | Retrieve available AI models | None |
136 | | GET | /api/model/metadata | Get metadata for a specific model | `modelRepo`, `filePath` (optional) |
137 | | GET | /api/downloads | Get information about downloaded items | `modelRepo` (optional), `filePath` (optional) |
138 | | GET | /api/downloads/enqueue | Enqueue a new download | `modelRepo`, `filePath` |
139 | | GET | /api/downloads/cancel | Cancel a download | `modelRepo`, `filePath` |
140 | | GET | /api/downloads/remove | Remove a downloaded item | `modelRepo`, `filePath` |
141 | | GET | /api/inference | Get information about inference items | `alias` (optional) |
142 | | GET | /api/inference/start | Start an inference | `alias`, `modelRepo`, `filePath`, `address` (optional), `port` (optional), `contextSize` (optional), `gpuLayers` (optional), `chatTemplate` (optional) |
143 | | GET | /api/inference/stop | Stop an inference | `alias` |
144 | | GET | /api/inference/status | Get the status of an inference | `alias` (optional) |
145 | | GET | /api/inference/reset | Reset an inference | `alias` |
146 | | GET | /api/inference/restart | Restart the current inference | None |
147 | | GET | /api/hardware or /api/hardwareinfo | Get hardware information | None |
148 | | GET | /api/silk | Get memory status | None |
149 | | GET | /api/silk/enable | Enable memories | None |
150 | | GET | /api/silk/disable | Disable memories | None |
151 | | GET | /api/silk/memorybanks | Get memory banks information | None |
152 | | GET | /api/silk/memorybank/activate | Activate a memory bank | `memorybank` |
153 | | GET | /api/silk/memorybank/deactivate | Deactivate a memory bank | `memorybank`, `all` |
154 | | GET | /api/silk/query | Perform a semantic query | (Parameters handled by Silk retriever) |
155 | | GET | /api/silk/health | Check the health of the Silk retriever | None |
156 | | GET | /api/silk/model/metadata | Get model metadata from the Silk retriever | (Parameters handled by Silk retriever) |
157 | | GET | /api/shutdown | Initiate server shutdown | None |
158 | | POST | /api/utils/log | Write to log | JSON body with log details |
159 | | POST | /api/silk/embedding | Get an embedding from the Silk retriever | (Parameters handled by Silk retriever) |
160 | 
161 | ### Using the API
162 | 
163 | - Example: `curl http://localhost:6568/api/silk/query?q=Tell%20me%20about%20the%20documents&limit=3&memorybank=docs`
164 | 
165 | ## Examples
166 | 
167 | Examples of Dabarqus in action can be found in this repo under **examples**.
168 | 
169 | - PythonPDFChatbot-RESTAPI: An example chatbot program using Dabarqus via the REST API to chat with your PDFs.
170 | - PythonPDFChatbot-PythonSDK: An example chatbot program using Dabarqus via the [Python SDK](https://pypi.org/project/dabarqus/) to chat with your PDFs.
171 | - StoreFiles: A Python example of storing documents in to a memory bank (semantic index) using the Python SDK
172 | 
173 | ### **Notes:**
174 | 
175 | 1. Dabarqus Professional Edition is required for email, messaging and API support.
176 | 


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/README.md:
--------------------------------------------------------------------------------
 1 | # Create a Memory Bank Example
 2 | 
 3 | This example uses the [Dabarqus Python SDK](https://pypi.org/project/dabarqus/) to store pdfs of your choice into a memory bank (semantic index). With this memory bank, you can retrieve relevant information using Dabarqus. It is recommended to run this example first so that you have a working memory bank before you try out other examples.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | - Python 3.8+
 8 | - Dabarqus server running and accessible
 9 | 
10 | ## Setup
11 | 
12 | 1. Create a virtual enviroment (**optional, but recommended**):  
13 |     `python -m venv ./venv`  
14 |     **Mac or Linux**:  
15 |     `source venv/bin/activate`
16 |     **Windows**:  
17 |     `venv\Scripts\activate.ps1`
18 | 2. Install the required Python libraries:  
19 |     `python -m pip install requirements.txt`  
20 | 3. Run the app:  
21 |     `python store_files.py`  
22 | 
23 | ## Sample Usage  
24 | Run the following:  
25 | `python ./store_files.py --memory-bank MyNewRecipeBook --input-path ./recipes/`  
26 | 
27 | This will store the contents of `./recipes`, an list of included recipes, into a new memory bank called `MyNewRecipeBook`.  
28 | 
29 | After running the script:
30 | 1. You'll see progress messages as each file is processed and added to the memory bank.
31 | 2. Once complete, you'll receive a confirmation message that the memory bank has been created.
32 | 
33 | ## Verifying the Memory Bank
34 | 
35 | To verify that your memory bank was created successfully:
36 | 1. Open the Dabarqus admin interface (typically at `http://localhost:6568/admin`).
37 | 2. Navigate to the "Memory Banks" section.
38 | 3. You should see your newly created memory bank (e.g., "MyNewRecipeBook") listed.


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/recipes/wish_i_made_more__yeast_potato_lem_orange_rolls_(577).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/wish_i_made_more__yeast_potato_lem_orange_rolls_(577).pdf


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/recipes/wolf_creek_inn__macaroni___cheese_(578).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/wolf_creek_inn__macaroni___cheese_(578).pdf


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/recipes/wolfpack__pork_sandwich_(579).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/wolfpack__pork_sandwich_(579).pdf


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/recipes/world_s_best____and_easiest___teriyaki_chicken_wings_(298).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/world_s_best____and_easiest___teriyaki_chicken_wings_(298).pdf


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/recipes/world_s_best__macaroni___cheese_(580).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/world_s_best__macaroni___cheese_(580).pdf


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/recipes/you_bring_the_macaroni_salad___macaroni_salad_(299).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/you_bring_the_macaroni_salad___macaroni_salad_(299).pdf


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/recipes/you_can_t_eat_just_one__ice_box_cookies_(300).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/you_can_t_eat_just_one__ice_box_cookies_(300).pdf


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/recipes/you_like_cheese___garlic_bread_spread_(581).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/you_like_cheese___garlic_bread_spread_(581).pdf


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/recipes/you_ll_never_miss_the_noodles__lasagna_(584).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/you_ll_never_miss_the_noodles__lasagna_(584).pdf


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/recipes/you_want_me_to_do_what_to_the_buttered_noodles_(582).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/you_want_me_to_do_what_to_the_buttered_noodles_(582).pdf


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi==2024.8.30
2 | charset-normalizer==3.4.0
3 | dabarqus==1.0.1
4 | idna==3.10
5 | requests==2.32.3
6 | urllib3==2.2.3
7 | 


--------------------------------------------------------------------------------
/examples/CreatingAMemoryBank/store_files.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from dabarqus import barq
 3 | import sys
 4 | import os
 5 | 
 6 | def main():
 7 |     print(sys.argv)
 8 |     parser = argparse.ArgumentParser(description="Store documents using Dabarqus SDK")
 9 |     parser.add_argument("--memory-bank", required=True, help="Name of the memory bank")
10 |     parser.add_argument("--input-path", required=True, help="Path to the input file or directory")
11 |     parser.add_argument("--no-override", action="store_true", help="Add random number to the file name to avoid override")
12 |     parser.add_argument("--server-url", default="http://localhost:6568", help="Dabarqus server URL")
13 |     args = parser.parse_args()
14 | 
15 |     # Initialize the SDK
16 |     sdk = barq(args.server_url)
17 | 
18 |     # Check the health of the service
19 |     health = sdk.check_health()
20 |     print(f"Service health: {health}")
21 | 
22 |     memory_bank_name = args.memory_bank
23 | 
24 |     if args.no_override:
25 |         # Add random number to the file name to avoid override
26 |         import random
27 |         import string
28 |         random_string = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
29 |         memory_bank_name = args.memory_bank + random_string
30 | 
31 |     # Convert input path to absolute path if it's relative
32 |     input_path = args.input_path
33 |     if not os.path.isabs(input_path):
34 |         input_path = os.path.abspath(input_path)
35 |     
36 |     print(f"Using absolute input path: {input_path}")
37 | 
38 |     # Enqueue ingestion
39 |     ingestion_result = sdk.enqueue_ingestion(memory_bank_name=memory_bank_name, input_path=input_path, overwrite=True)
40 |     print(f"Ingestion result: {ingestion_result}")
41 | 
42 |     # Wait until the ingestion is completed
43 |     ingestions = sdk.check_ingestion_progress(memory_bank_name)
44 |     while ingestions["status"] != "complete":
45 |         ingestions = sdk.check_ingestion_progress(memory_bank_name)
46 |         sys.stdout.write(f"Ingestion progress: {ingestions['progress']:.2f}% \r")
47 |         sys.stdout.flush()
48 |     print(f"Ingestion complete!")
49 | 
50 | if __name__ == "__main__":
51 |     main()


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-PythonSDK/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python virtual environment
 2 | *venv/
 3 | 
 4 | # Python cache files
 5 | __pycache__/
 6 | *.pyc
 7 | *.pyo
 8 | *.pyd
 9 | 
10 | # Retrieval results
11 | retrievals/
12 | 
13 | # IDE-specific files (e.g., for VSCode)
14 | .vscode/
15 | 
16 | # Operating system files
17 | .DS_Store
18 | Thumbs.db
19 | 
20 | # Jupyter Notebook checkpoints
21 | .ipynb_checkpoints/
22 | 
23 | # Logs
24 | *.log
25 | 
26 | # Environment variables
27 | .env
28 | 
29 | # Build directories
30 | build/
31 | dist/
32 | 
33 | # Temporary files
34 | *.tmp
35 | *.bak
36 | *.swp
37 | 
38 | # Conversation logs 
39 | conversation*
40 | 


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-PythonSDK/README.md:
--------------------------------------------------------------------------------
 1 | # Dabarqus Python Chatbot UI Example: Python SDK
 2 | 
 3 | This is an example chatbot program using Dabarqus via the [Python SDK](https://pypi.org/project/dabarqus/) to chat with your PDFs. It using Gradio for the frontend UI, and Ollama to provide the LLMs.   
 4 | 
 5 | There is another version of this demo that uses the REST API that can be found under `PythonPDFChatbot-RESTAPI`
 6 | 
 7 | ## Features
 8 | 
 9 | - Interactive chat interface
10 | - Memory bank selection
11 | - Integration with Dabarqus API for semantic search
12 | - Powered by Gradio for easy web deployment
13 | 
14 | ## Prerequisites
15 | 
16 | - Python 3.8+
17 | - Dabarqus server running and accessible
18 | - [Dabarqus Python SDK](https://pypi.org/project/dabarqus/)
19 | - [Ollama](https://ollama.com/download)
20 | 
21 | ## Installation
22 | 
23 | ### Dabarqus Service
24 | Important: This chatbot requires Dabarqus to be installed and running on your machine. Before using this chatbot, please ensure that you have:
25 | 
26 | - Downloaded and installed Dabarqus  
27 | - Started the Dabarqus service on your machine  
28 | 
29 | The chatbot communicates with the Dabarqus service via its API, so having Dabarqus running is essential for the chatbot to function correctly.
30 | Once Dabarqus is set up and running, you can proceed with using this chatbot. For more information on how to start and manage the Dabarqus service, please refer to the [Dabarqus quick start](https://github.com/electricpipelines/barq?tab=readme-ov-file#quick-start).
31 | 
32 | ### Chatbot installation
33 | 
34 | 1. Clone the repository:   
35 | `git clone https://github.com/electricpipelines/barq.git`    
36 | `cd DabarqusChatbotUI/examples/PythonPDFChatbot-RESTAPI`   
37 | 
38 | 2. Create a virtual environment (**optional but recommended**):  
39 | `python -m venv venv`  
40 | `source venv/bin/activate  # On Windows, use 'venv\Scripts\activate'`  
41 | 
42 | 3. Install the required dependencies:  
43 | `pip install -r requirements.txt`
44 | 
45 | ### Ollama
46 | If you have not already download and set
47 | 1. Follow the installion instructions on the [Ollama](https://ollama.com/download)  
48 | 
49 | 2. After installation, install at least LLM:  
50 | `ollama pull llama3`
51 | 
52 | ## Running the Application
53 | 1. Ensure your Dabarqus server is running and accessible.  
54 | 2. Start the Gradio application:  
55 | `python app.py`   
56 | 3. The application will start and provide a local URL (usually http://127.0.0.1:7860).   
57 | 4. Open this URL in your web browser to access the chat interface.
58 | 
59 | ### Memory Banks
60 | You need a **memory bank** to chat with your PDFs. You have a few options:   
61 | - Run the CreatingAMemoryBank example.   
62 | - Create a memory bank through the admin interface:   
63 |     1. Open the Dabarqus admin interface (typically at `http://localhost:6568/admin`).  
64 |     2. Navigate to the "Memory Banks" section.   
65 |     3. You should see your newly created memory bank (e.g., "MyNewRecipeBook") listed.  
66 | 
67 | ## File Structure
68 | 
69 | - `app.py`: Main application file containing the Gradio interface
70 | - `templates/`: Directory containing prompt templates
71 | - `sample_prompt.md`: Sample prompt file for the chatbot


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-PythonSDK/app.py:
--------------------------------------------------------------------------------
  1 | import gradio as gr
  2 | from dabarqus import barq
  3 | from datetime import datetime
  4 | import json
  5 | import os
  6 | import ollama
  7 | 
  8 | # Initialize the Dabarqus SDK
  9 | sdk = barq("http://localhost:6568")
 10 | 
 11 | def check_dependencies():
 12 |     errors = []
 13 |     try:
 14 |         health = sdk.check_health()
 15 |         if health != "OK":
 16 |             errors.append("Dabarqus is not responding properly.")
 17 |     except Exception as e:
 18 |         errors.append(f"Dabarqus is not running or installed properly. Error: {str(e)}")
 19 |     return errors
 20 | 
 21 | def display_error_message(errors):
 22 |     if errors:
 23 |         error_msg = "The following errors occurred:\n" + "\n".join(errors)
 24 |         gr.Warning(error_msg)
 25 |         return gr.update(visible=True), error_msg
 26 |     return gr.update(visible=False), ""
 27 | 
 28 | def get_memory_banks():
 29 |     try:
 30 |         memory_banks = sdk.get_memory_banks()
 31 |         return [bank['name'] for bank in memory_banks if bank.get('name')]
 32 |     except Exception as e:
 33 |         print(f"Error fetching memory banks: {e}")
 34 |         return ["Default"]
 35 | 
 36 | def get_inference_models():
 37 |     try:
 38 |         inference_info = sdk.get_inference_info()        
 39 |         if inference_info:
 40 |             # Return a list of tuples: (alias, full_model_object)
 41 |             return [(item.get('alias', 'Unknown'), item) for item in inference_info]
 42 |         else:
 43 |             return [("No model running", None)]
 44 |     except Exception as e:
 45 |         print(f"Error fetching inference models: {e}")
 46 |         return [("Error fetching model", None)]
 47 | 
 48 | def chat_function(message, history, memory_bank, model, query_limit, retrieval_prompt_template, full_prompt_template):
 49 |     
 50 |     # Convert the user's message to a retrieval prompt
 51 |     retrieval_prompt = convert_prompt_to_retrieval_prompt(message, model)
 52 |     
 53 |     # Retrieve data
 54 |     retrieved_data = sdk.query_semantic_search(retrieval_prompt, limit=int(query_limit), memory_bank=memory_bank)
 55 |     
 56 |     # Prepare the prompt for the LLM
 57 |     full_prompt = f"{full_prompt_template} : RAG_response {retrieved_data}, keywords: {retrieval_prompt}, original_prompt: {message}"
 58 | 
 59 |     # Use Ollama to generate a response
 60 |     response = ""
 61 |     stream = ollama.chat(
 62 |         model=model,
 63 |         messages=[{"role": "system", "content": "You are a helpful assistant."}, 
 64 |                   {"role": "user", "content": full_prompt}],
 65 |         stream=True,
 66 |     )
 67 |     
 68 |     for chunk in stream:
 69 |         response += chunk['message']['content']
 70 |         yield history + [("Human", message), ("AI", response)]
 71 | 
 72 | def convert_prompt_to_retrieval_prompt(prompt, model="llama3"):
 73 |     response = ollama.chat(model=model, messages=[
 74 |     {
 75 |         'role': 'user',
 76 |         'content': f"Take the user's prompt to create a prompt for a semantic database retriever. Only respond with a list of comma-separated keywords. DO NOT say anything before or afer the keywords.#Prompt:{prompt}",
 77 |     },
 78 |     ])
 79 |     return response
 80 | 
 81 | def save_conversation(history):
 82 |     if not history:
 83 |         gr.Warning("No conversation to save.")
 84 |         return None, gr.update(visible=False)
 85 |     
 86 |     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 87 |     filename = f"conversation_{timestamp}.json"
 88 |     with open(filename, "w") as f:
 89 |         json.dump(history, f)
 90 |     
 91 |     gr.Info(f"Conversation saved as {filename}")
 92 |     return filename, gr.update(visible=True)
 93 | 
 94 | def toggle_load_file(file, chatbot):
 95 |     if file is None:
 96 |         return gr.update(visible=True), chatbot
 97 |     
 98 |     try:
 99 |         with open(file.name, "r") as f:
100 |             history = json.load(f)
101 |         gr.Info("Conversation loaded successfully.")
102 |         return gr.update(value=None, visible=False), history
103 |     except json.JSONDecodeError:
104 |         gr.Warning("Invalid JSON file. Please select a valid conversation file.")
105 |     except Exception as e:
106 |         gr.Warning(f"Error loading conversation: {str(e)}")
107 |     
108 |     return gr.update(value=None), chatbot
109 | 
110 | def save_prompts(retrieval_prompt, full_prompt):
111 |     prompts = {
112 |         "retrieval_prompt": retrieval_prompt,
113 |         "full_prompt": full_prompt
114 |     }
115 |     with open("custom_prompts.json", "w") as f:
116 |         json.dump(prompts, f)
117 |     gr.Info("Prompts saved successfully.")
118 | 
119 | def load_prompts():
120 |     if os.path.exists("custom_prompts.json"):
121 |         with open("custom_prompts.json", "r") as f:
122 |             prompts = json.load(f)
123 |         return prompts["retrieval_prompt"], prompts["full_prompt"]
124 |     else:
125 |         gr.Warning("No saved prompts found.")
126 |         return None, None
127 | 
128 | def enable_input(choice):
129 |     return gr.update(interactive=bool(choice)), gr.update(interactive=bool(choice))
130 | 
131 | # Get available Ollama models
132 | def get_ollama_models():
133 |     try:
134 |         models = ollama.list()
135 |         return [model['name'] for model in models['models']]
136 |     except Exception as e:
137 |         print(f"Error fetching Ollama models: {e}")
138 |         return ["llama3"]  # Default model if fetching fails
139 | 
140 | with gr.Blocks(title="dabarqus") as demo:
141 |     memory_banks = get_memory_banks()
142 |     ollama_models = get_ollama_models()
143 | 
144 |     with gr.Row():
145 |         with gr.Column(scale=3):
146 |             gr.Markdown("") # Empty markdown to take up space
147 |         with gr.Column(scale=2):
148 |             with gr.Row():
149 |                 save_button = gr.Button("Save", size="sm")
150 |                 load_button = gr.Button("Load", size="sm")
151 |             file_output = gr.File(label="Saved Conversation", visible=False)
152 |             file_input = gr.File(label="Load Conversation", file_types=[".json"], visible=False)
153 | 
154 |     with gr.Row():
155 |         memory_bank = gr.Dropdown(
156 |             choices=memory_banks,
157 |             label="Select Memory Bank",
158 |             value=None,
159 |             allow_custom_value=False,
160 |             info="Choose a memory bank to query for relevant information."
161 |         )
162 |         # Modify the model_selection Dropdown in your Gradio interface
163 |         model_selection = gr.Dropdown(
164 |             choices=ollama_models, 
165 |             label="Select Inference Model",
166 |             value=ollama_models[0] if ollama_models else None,
167 |             info="Select the Dabarqus inference model."
168 |         )
169 | 
170 |     query_limit = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Number of RAG results")
171 |     
172 |     chatbot = gr.Chatbot()
173 |     with gr.Row():
174 |         with gr.Column(scale=4):
175 |             msg = gr.Textbox(
176 |                 label="Type your message here",
177 |                 placeholder="Enter your question...",
178 |                 interactive=False,
179 |                 elem_classes="large-text-input"
180 |             )
181 |         with gr.Column(scale=1):
182 |             submit = gr.Button("Send", interactive=False)
183 | 
184 |     with gr.Accordion("Advanced Settings", open=False):
185 |         retrieval_prompt = gr.Textbox(
186 |             label="Retrieval Prompt",
187 |             placeholder="Enter the retrieval prompt...",
188 |             lines=3,
189 |             value="Take the user's prompt to create a prompt for a semantic database retriever. Only respond with a list of comma-separated keywords. DO NOT say anything before or after the keywords. User prompt: {message}"
190 |         )
191 |         
192 |         prompt_template = gr.TextArea(
193 |             label="Prompt Template",
194 |             placeholder="Enter the prompt template...",
195 |             value="Use these results from your recipe catalog to form your answer (include the file reference in your answer if you use one)"
196 |         )
197 |         with gr.Row():
198 |             save_prompts_btn = gr.Button("Save Prompts")
199 |             load_prompts_btn = gr.Button("Load Prompts")
200 |     clear = gr.Button("Clear Chat")
201 |     
202 |     memory_bank.change(enable_input, inputs=[memory_bank], outputs=[msg, submit])
203 | 
204 |     msg.submit(
205 |         chat_function,
206 |         inputs=[msg, chatbot, memory_bank, model_selection, query_limit, retrieval_prompt, prompt_template],
207 |         outputs=[chatbot]
208 |     )      
209 |     submit.click(
210 |         chat_function,
211 |         inputs=[msg, chatbot, memory_bank, model_selection, query_limit, retrieval_prompt, prompt_template],
212 |         outputs=[chatbot]
213 |     )
214 |     clear.click(lambda: None, None, chatbot, queue=False)
215 | 
216 |     save_button.click(
217 |         save_conversation,
218 |         inputs=[chatbot],
219 |         outputs=[file_output, file_output]
220 |     )
221 | 
222 |     load_button.click(
223 |         toggle_load_file,
224 |         inputs=[file_input, chatbot],
225 |         outputs=[file_input, chatbot]
226 |     )
227 | 
228 | if __name__ == "__main__":
229 |     demo.launch()
230 |     demo.load(lambda: display_error_message(check_dependencies()), outputs=[error_box])
231 | 
232 | 
233 | # Styling
234 | demo.style(
235 |     """
236 |     .large-text-input textarea {
237 |         font-size: 16px !important;
238 |     }
239 |     .gradio-slider input[type="number"] {
240 |         width: 80px;
241 |     }
242 |     #component-22 {
243 |         margin-top: -20px;
244 |     }
245 |     #component-22 .gr-button {
246 |         min-width: 60px;
247 |         height: 30px;
248 |     }
249 |     .advanced-settings {
250 |         border: 1px solid #e0e0e0;
251 |         border-radius: 8px;
252 |         padding: 15px;
253 |         margin-top: 20px;
254 |     }
255 |     .advanced-settings .gr-form {
256 |         border: none;
257 |         padding: 0;
258 |     }
259 |     """
260 | )


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-PythonSDK/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==23.2.1
 2 | annotated-types==0.7.0
 3 | anyio==4.6.0
 4 | Brotli==1.1.0
 5 | certifi==2024.8.30
 6 | charset-normalizer==3.4.0
 7 | click==8.1.7
 8 | colorama==0.4.6
 9 | dabarqus==1.1.9
10 | fastapi==0.115.0
11 | ffmpy==0.4.0
12 | filelock==3.16.1
13 | fsspec==2024.9.0
14 | gradio==5.0.0
15 | gradio_client==1.4.0
16 | h11==0.14.0
17 | httpcore==1.0.6
18 | httpx==0.27.2
19 | huggingface-hub==0.25.2
20 | idna==3.10
21 | inflate64==1.0.0
22 | Jinja2==3.1.4
23 | markdown-it-py==3.0.0
24 | MarkupSafe==2.1.5
25 | mdurl==0.1.2
26 | multivolumefile==0.2.3
27 | numpy==2.1.2
28 | ollama==0.3.3
29 | orjson==3.10.7
30 | packaging==24.1
31 | pandas==2.2.3
32 | pillow==10.4.0
33 | psutil==6.0.0
34 | py7zr==0.22.0
35 | pybcj==1.0.2
36 | pycryptodomex==3.21.0
37 | pydantic==2.9.2
38 | pydantic_core==2.23.4
39 | pydub==0.25.1
40 | Pygments==2.18.0
41 | pyppmd==1.1.0
42 | python-dateutil==2.9.0.post0
43 | python-multipart==0.0.12
44 | pytz==2024.2
45 | PyYAML==6.0.2
46 | pyzstd==0.16.2
47 | requests==2.32.3
48 | rich==13.9.2
49 | ruff==0.6.9
50 | semantic-version==2.10.0
51 | shellingham==1.5.4
52 | six==1.16.0
53 | sniffio==1.3.1
54 | starlette==0.38.6
55 | texttable==1.7.0
56 | tomlkit==0.12.0
57 | tqdm==4.66.5
58 | typer==0.12.5
59 | typing_extensions==4.12.2
60 | tzdata==2024.2
61 | urllib3==2.2.3
62 | uvicorn==0.31.1
63 | websockets==12.0
64 | 


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-PythonSDK/sample_prompt.md:
--------------------------------------------------------------------------------
1 | Evaluate these responses from your RAG database {RAG_response} in response to this original prompt: {original_prompt}. You prompted it using these keywords: {keywords}. You have three options:
2 | 1. Seek more context
3 |     Reprompt the RAG database with the same keywords, but asking for more results. Please say: REPROMPT
4 | 2. Reprompt RAG database
5 |     Reprompt the RAG database with new keywords. Please say: NEW KEYWORDS: (the new keywords)
6 | 3. Accept answer from RAG database and use it to respond to the user.
7 |     First, say ACCECPT. Then, on a new line, Please respond to the user's original prompt using the context you gathered from the RAG database


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-PythonSDK/templates/general_template.md:
--------------------------------------------------------------------------------
1 | Use these results from your knowledge base to answer this prompt. you are a bot made to help users with access to a library of general information. Use the information provided to you (in JSON) to help answer the user. DO NOT mention the distance, and you MUST utilize the information from your knowledge base.


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-PythonSDK/templates/recipe_template.md:
--------------------------------------------------------------------------------
1 | Use these results from your recipe catalog to form your answer (include the file reference in your answer if you use one). DO NOT mention the distance, and ONLY use the recipes you recieve from the catalog.


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-RESTAPI/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python virtual environment
 2 | venv/
 3 | 
 4 | # Python cache files
 5 | __pycache__/
 6 | *.pyc
 7 | *.pyo
 8 | *.pyd
 9 | 
10 | # Retrieval results
11 | retrievals/
12 | 
13 | # IDE-specific files (e.g., for VSCode)
14 | .vscode/
15 | 
16 | # Operating system files
17 | .DS_Store
18 | Thumbs.db
19 | 
20 | # Jupyter Notebook checkpoints
21 | .ipynb_checkpoints/
22 | 
23 | # Logs
24 | *.log
25 | 
26 | # Environment variables
27 | .env
28 | 
29 | # Build directories
30 | build/
31 | dist/
32 | 
33 | # Temporary files
34 | *.tmp
35 | *.bak
36 | *.swp
37 | 
38 | # Conversation logs 
39 | conversation*
40 | 


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-RESTAPI/README.md:
--------------------------------------------------------------------------------
 1 | # Dabarqus Python Chatbot UI Example: REST API
 2 | 
 3 | This is an example chatbot program using Dabarqus via the REST API to chat with your PDFs. It using Gradio for the frontend UI, and Ollama to provide the LLMs.   
 4 | 
 5 | There is another version of this demo coming soon that uses the [native Dabarqus Python SDK](https://pypi.org/project/dabarqus/) and Dabarqus's built-in inference engine. 
 6 | 
 7 | ## Features
 8 | 
 9 | - Interactive chat interface
10 | - Memory bank selection
11 | - Integration with Dabarqus API for semantic search
12 | - Powered by Gradio for easy web deployment
13 | 
14 | ## Prerequisites
15 | 
16 | - Python 3.8+
17 | - Dabarqus server running and accessible
18 | - [Ollama](https://ollama.com/download)
19 | 
20 | ## Installation
21 | ### Dabarqus Service
22 | Important: This chatbot requires Dabarqus to be installed and running on your machine. Before using this chatbot, please ensure that you have:
23 | 
24 | - Downloaded and installed Dabarqus  
25 | - Started the Dabarqus service on your machine  
26 | 
27 | The chatbot communicates with the Dabarqus service via its API, so having Dabarqus running is essential for the chatbot to function correctly.
28 | Once Dabarqus is set up and running, you can proceed with using this chatbot. For more information on how to start and manage the Dabarqus service, please refer to the [Dabarqus quick start](https://github.com/electricpipelines/barq?tab=readme-ov-file#quick-start).
29 | 
30 | ### Chatbot installation
31 | 
32 | 1. Clone the repository:   
33 | `git clone https://github.com/electricpipelines/barq.git`    
34 | `cd DabarqusChatbotUI/examples/PythonPDFChatbot-RESTAPI`   
35 | 
36 | 2. Create a virtual environment (**optional but recommended**):  
37 | `python -m venv venv`  
38 | `source venv/bin/activate  # On Windows, use 'venv\Scripts\activate'`  
39 | 
40 | 3. Install the required dependencies:  
41 | `pip install -r requirements.txt`
42 | 
43 | ### Ollama
44 | If you have not already download and set
45 | 1. Follow the installion instructions on the [Ollama](https://ollama.com/download)  
46 | 
47 | 2. After installation, install at least LLM:  
48 | `ollama pull llama3`
49 | 
50 | ## Running the Application
51 | 
52 | 1. Ensure your Dabarqus server is running and accessible.
53 | 2. Start the Gradio application:  
54 | `python app.py`
55 | 3. The application will start and provide a local URL (usually http://127.0.0.1:7860).
56 | 4. Open this URL in your web browser to access the chat interface.
57 | 
58 | ### Memory Banks
59 | You need a **memory bank** to chat with your PDFs. You have a few options:   
60 | - Run the CreatingAMemoryBank example.   
61 | - Create a memory bank through the admin interface:   
62 |     1. Open the Dabarqus admin interface (typically at `http://localhost:6568/admin`).  
63 |     2. Navigate to the "Memory Banks" section.   
64 |     3. You should see your newly created memory bank (e.g., "MyNewRecipeBook") listed.  
65 | 
66 | 
67 | ## File Structure
68 | 
69 | - `app.py`: Main application file containing the Gradio interface
70 | - `retriever.py`: Contains functions for interacting with the Dabarqus API
71 | - `templates/`: Directory containing prompt templates
72 | - `sample_prompt.md`: Sample prompt file for the chatbot


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-RESTAPI/app.py:
--------------------------------------------------------------------------------
  1 | import gradio as gr
  2 | import ollama
  3 | from retriever import retrieve_data, convert_prompt_to_retrieval_prompt
  4 | import requests
  5 | import json
  6 | import os
  7 | from datetime import datetime
  8 | 
  9 | def check_dependencies():
 10 |     errors = []
 11 |     
 12 |     # Check Ollama
 13 |     try:
 14 |         ollama.list()
 15 |     except Exception as e:
 16 |         errors.append(f"Ollama is not running or installed properly. Error: {str(e)}")
 17 |     
 18 |     # Check Dabarqus
 19 |     try:
 20 |         response = requests.get("http://localhost:6568/health")
 21 |         if response.status_code != 200:
 22 |             errors.append("Dabarqus is not responding properly.")
 23 |     except requests.RequestException:
 24 |         errors.append("Dabarqus is not running or installed properly.")
 25 |     
 26 |     return errors
 27 | 
 28 | def display_error_message(errors):
 29 |     if errors:
 30 |         error_msg = "The following errors occurred:\n" + "\n".join(errors)
 31 |         gr.Warning(error_msg)
 32 |         return gr.update(visible=True), error_msg
 33 |     return gr.update(visible=False), ""
 34 | 
 35 | with gr.Blocks(title="dabarqus") as demo:
 36 |     error_box = gr.Textbox(visible=False, label="Errors")
 37 | 
 38 | # Get available Ollama models
 39 | def get_ollama_models():
 40 |     try:
 41 |         models = ollama.list()
 42 |         return [model['name'] for model in models['models']]
 43 |     except Exception as e:
 44 |         print(f"Error fetching Ollama models: {e}")
 45 |         return ["llama3"]  # Default model if fetching fails
 46 | 
 47 | 
 48 | def get_memory_banks():
 49 |     url = "http://localhost:6568/api/silk/memorybanks"
 50 |     try:
 51 |         response = requests.get(url)
 52 |         response.raise_for_status()
 53 |         memory_banks = response.json()
 54 | 
 55 |         return [bank.get('name') for bank in memory_banks['SilkMemoryBanks'] if bank.get('name')]
 56 |     except requests.exceptions.RequestException as e:
 57 |         print(f"Error fetching memory banks: {e}")
 58 |         return ["Default"]  # Return a default option if the API call fails
 59 | 
 60 | 
 61 | def chat_function(message, history, memory_bank, model, query_limit, retrieval_prompt_template, full_prompt_template):
 62 |     # Convert the user's message to a retrieval prompt
 63 |     retrieval_prompt = convert_prompt_to_retrieval_prompt(message, retrieval_prompt_template)
 64 |     
 65 |     # Retrieve data
 66 |     retrieved_data = retrieve_data(retrieval_prompt, memory_bank, int(query_limit))
 67 |     
 68 |     # Prepare the prompt for the LLM
 69 |     full_prompt = f"{full_prompt_template} : RAG_response {retrieved_data}, keywords: {retrieval_prompt}, original_prompt: {message}"
 70 |     
 71 |     # Use Ollama to generate a response
 72 |     response = ""
 73 |     stream = ollama.chat(
 74 |         model=model,
 75 |         messages=[{"role": "system", "content": "You are a helpful assistant."}, 
 76 |                   {"role": "user", "content": full_prompt}],
 77 |         stream=True,
 78 |     )
 79 |     
 80 |     for chunk in stream:
 81 |         response += chunk['message']['content']
 82 |         yield history + [("Human", message), ("AI", response)]
 83 | 
 84 | 
 85 | def save_conversation(history):
 86 |     if not history:
 87 |         gr.Warning("No conversation to save.")
 88 |         return None, gr.update(visible=False)
 89 |     
 90 |     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 91 |     filename = f"conversation_{timestamp}.json"
 92 |     with open(filename, "w") as f:
 93 |         json.dump(history, f)
 94 |     
 95 |     gr.Info(f"Conversation saved as {filename}")
 96 |     return filename, gr.update(visible=True)
 97 | 
 98 | def toggle_load_file(file, chatbot):
 99 |     if file is None:
100 |         # If no file is selected, just toggle visibility
101 |         return gr.update(visible=True), chatbot
102 |     
103 |     try:
104 |         with open(file.name, "r") as f:
105 |             history = json.load(f)
106 |         gr.Info("Conversation loaded successfully.")
107 |         return gr.update(value=None, visible=False), history
108 |     except json.JSONDecodeError:
109 |         gr.Warning("Invalid JSON file. Please select a valid conversation file.")
110 |     except Exception as e:
111 |         gr.Warning(f"Error loading conversation: {str(e)}")
112 |     
113 |     return gr.update(value=None), chatbot
114 | def show_load_file():
115 |     return gr.update(visible=True)
116 | 
117 | def hide_load_file():
118 |     return gr.update(visible=False)
119 | 
120 | 
121 | def save_prompts(retrieval_prompt, full_prompt):
122 |     prompts = {
123 |         "retrieval_prompt": retrieval_prompt,
124 |         "full_prompt": full_prompt
125 |     }
126 |     with open("custom_prompts.json", "w") as f:
127 |         json.dump(prompts, f)
128 |     gr.Info("Prompts saved successfully.")
129 | 
130 | def load_prompts():
131 |     if os.path.exists("custom_prompts.json"):
132 |         with open("custom_prompts.json", "r") as f:
133 |             prompts = json.load(f)
134 |         return prompts["retrieval_prompt"], prompts["full_prompt"]
135 |     else:
136 |         gr.Warning("No saved prompts found.")
137 |         return None, None
138 | 
139 | 
140 | 
141 | def enable_input(choice):
142 |     return gr.update(interactive=bool(choice)), gr.update(interactive=bool(choice))
143 | 
144 | 
145 | 
146 | with gr.Blocks(title="dabarqus") as demo:
147 |     memory_banks = get_memory_banks()
148 |     ollama_models = get_ollama_models()
149 | 
150 |     with gr.Row():
151 |         with gr.Column(scale=3):
152 |             gr.Markdown("") # Empty markdown to take up space
153 |         with gr.Column(scale=2):
154 |             with gr.Row():
155 |                 save_button = gr.Button("Save", size="sm")
156 |                 load_button = gr.Button("Load", size="sm")
157 |             file_output = gr.File(label="Saved Conversation", visible=False)
158 |             file_input = gr.File(label="Load Conversation", file_types=[".json"], visible=False)
159 | 
160 |     with gr.Row():
161 |         memory_bank = gr.Dropdown(
162 |         choices=memory_banks,
163 |         label="Select Memory Bank",
164 |         value=None,
165 |         allow_custom_value=False,
166 |         info="Choose a memory bank to query for relevant information."
167 |     )
168 |         model_selection = gr.Dropdown(
169 |             choices=ollama_models,
170 |             label="Select Inference Model",
171 |             value=ollama_models[0] if ollama_models else None,
172 |             info="Select the Ollama model for inference. Make sure Ollama is installed and running."
173 |         )
174 | 
175 | 
176 | 
177 |     query_limit = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Number of RAG results")
178 |     
179 |     chatbot = gr.Chatbot()
180 |     with gr.Row():
181 |         with gr.Column(scale=4):
182 |             msg = gr.Textbox(
183 |                 label="Type your message here",
184 |                 placeholder="Enter your question...",
185 |                 interactive=False,
186 |                 elem_classes="large-text-input"
187 |             )
188 |         with gr.Column(scale=1):
189 |             submit = gr.Button("Send", interactive=False)
190 | 
191 |     with gr.Accordion("Advanced Settings", open=False):
192 |         retrieval_prompt = gr.Textbox(
193 |             label="Retrieval Prompt",
194 |             placeholder="Enter the retrieval prompt...",
195 |             lines=3,
196 |             value="Take the user's prompt to create a prompt for a semantic database retriever. Only respond with a list of comma-separated keywords. DO NOT say anything before or after the keywords."
197 |         )
198 |         
199 |         prompt_template = gr.TextArea(
200 |             label="Prompt Template",
201 |             placeholder="Enter the prompt template...",
202 |             value="Use these results from your recipe catalog to form your answer (include the file reference in your answer if you use one)"
203 |         )
204 |         with gr.Row():
205 |             save_prompts_btn = gr.Button("Save Prompts")
206 |             load_prompts_btn = gr.Button("Load Prompts")
207 |     clear = gr.Button("Clear Chat")
208 |     
209 |     memory_bank.change(enable_input, inputs=[memory_bank], outputs=[msg, submit])
210 | 
211 |     msg.submit(
212 |     chat_function,
213 |     inputs=[msg, chatbot, memory_bank, model_selection, query_limit, retrieval_prompt, prompt_template],
214 |     outputs=[chatbot]
215 |     )
216 |     submit.click(
217 |         chat_function,
218 |         inputs=[msg, chatbot, memory_bank, model_selection, query_limit, retrieval_prompt, prompt_template],
219 |         outputs=[chatbot]
220 |     )
221 |     clear.click(lambda: None, None, chatbot, queue=False)
222 | 
223 |     save_button.click(
224 |         save_conversation,
225 |         inputs=[chatbot],
226 |         outputs=[file_output, file_output]
227 |     )
228 | 
229 |     load_button.click(
230 |         toggle_load_file,
231 |         inputs=[file_input, chatbot],
232 |         outputs=[file_input, chatbot]
233 |     )
234 | # Launch the app
235 | if __name__ == "__main__":
236 | 
237 |     demo.launch()
238 |     demo.load(lambda: display_error_message(check_dependencies()), outputs=[error_box])
239 | 
240 | 
241 | 
242 | # Styling
243 | demo.style(
244 |     """
245 |     .large-text-input textarea {
246 |         font-size: 16px !important;
247 |     }
248 |     .gradio-slider input[type="number"] {
249 |         width: 80px;
250 |     }
251 |     /* Add these new styles */
252 |     #component-22 {  /* Adjust this ID to match your save/load row */
253 |         margin-top: -20px;  /* Reduce space above the save/load row */
254 |     }
255 |     #component-22 .gr-button {  /* Adjust this ID to match your save/load buttons */
256 |         min-width: 60px;  /* Make buttons smaller */
257 |         height: 30px;  /* Make buttons shorter */
258 |     }
259 | 
260 |     .advanced-settings {
261 |         border: 1px solid #e0e0e0;
262 |         border-radius: 8px;
263 |         padding: 15px;
264 |         margin-top: 20px;
265 |     }
266 |     
267 |     .advanced-settings .gr-form {
268 |         border: none;
269 |         padding: 0;
270 |     }
271 |     """
272 | )


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-RESTAPI/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==23.2.1
 2 | annotated-types==0.7.0
 3 | anyio==4.4.0
 4 | certifi==2024.7.4
 5 | charset-normalizer==3.3.2
 6 | click==8.1.7
 7 | colorama==0.4.6
 8 | contourpy==1.2.1
 9 | cycler==0.12.1
10 | exceptiongroup==1.2.2
11 | fastapi==0.112.0
12 | ffmpy==0.4.0
13 | filelock==3.15.4
14 | fonttools==4.53.1
15 | fsspec==2024.6.1
16 | gradio==4.41.0
17 | gradio_client==1.3.0
18 | h11==0.14.0
19 | httpcore==1.0.5
20 | httpx==0.27.0
21 | huggingface-hub==0.24.5
22 | idna==3.7
23 | importlib_resources==6.4.0
24 | Jinja2==3.1.4
25 | kiwisolver==1.4.5
26 | markdown-it-py==3.0.0
27 | MarkupSafe==2.1.5
28 | matplotlib==3.9.1.post1
29 | mdurl==0.1.2
30 | numpy==2.0.1
31 | ollama==0.3.1
32 | orjson==3.10.7
33 | packaging==24.1
34 | pandas==2.2.2
35 | pillow==10.4.0
36 | pydantic==2.8.2
37 | pydantic_core==2.20.1
38 | pydub==0.25.1
39 | Pygments==2.18.0
40 | pyparsing==3.1.2
41 | python-dateutil==2.9.0.post0
42 | python-multipart==0.0.9
43 | pytz==2024.1
44 | PyYAML==6.0.2
45 | requests==2.32.3
46 | rich==13.7.1
47 | ruff==0.5.7
48 | semantic-version==2.10.0
49 | shellingham==1.5.4
50 | six==1.16.0
51 | sniffio==1.3.1
52 | starlette==0.37.2
53 | tomlkit==0.12.0
54 | tqdm==4.66.5
55 | typer==0.12.3
56 | typing_extensions==4.12.2
57 | tzdata==2024.1
58 | urllib3==2.2.2
59 | uvicorn==0.30.5
60 | websockets==12.0
61 | 


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-RESTAPI/retriever.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | import time
 3 | import os
 4 | import threading
 5 | import sys
 6 | import itertools
 7 | import requests
 8 | import ollama
 9 | from colorama import Fore, Back, Style
10 | 
11 | def serialize_response(json_string, directory='./retrievals/'):
12 |     # Ensure the directory exists
13 |     if not os.path.exists(directory):
14 |         os.makedirs(directory)
15 | 
16 |     # Generate a unique filename using timestamp and UUID
17 |     unique_filename = f"{int(time.time())}_{uuid.uuid4()}.json"
18 |     file_path = os.path.join(directory, unique_filename)
19 | 
20 |     # Write the JSON string to the file
21 |     with open(file_path, 'w') as file:
22 |         file.write(json_string)
23 | 
24 |     # Provide the link to the file
25 |     print(Fore.LIGHTBLUE_EX + f"Retrieved info has been save to {file_path}" + Style.RESET_ALL)
26 |     print("___")
27 |     print()
28 |     return file_path
29 | 
30 | def convert_prompt_to_retrieval_prompt(prompt, prompt_template, model="llama3"):
31 |     # llm = Ollama(
32 |     #         model=model,
33 |     #         temperature=0
34 |     #     )
35 |     # response = llm.invoke(f"Take the user's prompt to create a prompt for a semantic database retriever. Only respond with a list of comma-separated keywords. DO NOT say anything before or afer the keywords.#Prompt:{prompt}")
36 |     response = ollama.chat(model=model, messages=[
37 |     {
38 |         'role': 'user',
39 |         'content': f"Take the user's prompt to create a prompt for a semantic database retriever. Only respond with a list of comma-separated keywords. DO NOT say anything before or afer the keywords.#Prompt:{prompt}",
40 |     },
41 |     ])
42 |     return response
43 | 
44 | def display_spinner_and_wait_message(stop_event, message=""):
45 |     spinner = itertools.cycle(['-', '\\', '|', '/'])
46 |     while not stop_event.is_set():  # Check the stop event
47 |         sys.stdout.write('\r' + Fore.YELLOW + message + next(spinner) + Fore.RESET)
48 |         sys.stdout.flush()
49 |         time.sleep(0.1)
50 |         # Clear the spinner line when done
51 |     sys.stdout.write('\r \r')  
52 |     sys.stdout.flush()
53 | 
54 | 
55 | def retrieve_data(prompt, memory_bank, query_limit=10):
56 |     stop_event = threading.Event()
57 |     t = threading.Thread(target=display_spinner_and_wait_message, args=(stop_event, "Retrieving info from database..."))
58 |     t.start()
59 | 
60 |     url = "http://localhost:6568/api/silk/query"
61 |     params = {
62 |         "q": prompt,
63 |         "limit": query_limit,
64 |         "memorybank": memory_bank  # Changed from "memoryBank" to "memorybank"
65 |     }
66 | 
67 |     try:
68 |         response = requests.get(url, params=params)
69 |         response.raise_for_status()  # Raises an HTTPError for bad responses
70 |         serialize_response(response.text)
71 |         return response.json()
72 |     except requests.exceptions.RequestException as e:
73 |         print(f"An error occurred: {e}")
74 |         return None
75 |     finally:
76 |         stop_event.set()  # Signal the spinner thread to stop
77 |         t.join()  # Wait for the spinner thread to finish


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-RESTAPI/sample_prompt.md:
--------------------------------------------------------------------------------
1 | Evaluate these responses from your RAG database {RAG_response} in response to this original prompt: {original_prompt}. You prompted it using these keywords: {keywords}. You have three options:
2 | 1. Seek more context
3 |     Reprompt the RAG database with the same keywords, but asking for more results. Please say: REPROMPT
4 | 2. Reprompt RAG database
5 |     Reprompt the RAG database with new keywords. Please say: NEW KEYWORDS: (the new keywords)
6 | 3. Accept answer from RAG database and use it to respond to the user.
7 |     First, say ACCECPT. Then, on a new line, Please respond to the user's original prompt using the context you gathered from the RAG database


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-RESTAPI/templates/general_template.md:
--------------------------------------------------------------------------------
1 | Use these results from your knowledge base to answer this prompt. you are a bot made to help users with access to a library of general information. Use the information provided to you (in JSON) to help answer the user. DO NOT mention the distance, and you MUST utilize the information from your knowledge base.


--------------------------------------------------------------------------------
/examples/PythonPDFChatbot-RESTAPI/templates/recipe_template.md:
--------------------------------------------------------------------------------
1 | Use these results from your recipe catalog to form your answer (include the file reference in your answer if you use one). DO NOT mention the distance, and ONLY use the recipes you recieve from the catalog.


--------------------------------------------------------------------------------