├── .github ├── scripts │ └── generate-release-content.js └── workflows │ └── build-and-release.yaml ├── LICENSE ├── README.md └── examples ├── CreatingAMemoryBank ├── README.md ├── recipes │ ├── wish_i_made_more__yeast_potato_lem_orange_rolls_(577).pdf │ ├── wolf_creek_inn__macaroni___cheese_(578).pdf │ ├── wolfpack__pork_sandwich_(579).pdf │ ├── world_s_best____and_easiest___teriyaki_chicken_wings_(298).pdf │ ├── world_s_best__macaroni___cheese_(580).pdf │ ├── you_bring_the_macaroni_salad___macaroni_salad_(299).pdf │ ├── you_can_t_eat_just_one__ice_box_cookies_(300).pdf │ ├── you_like_cheese___garlic_bread_spread_(581).pdf │ ├── you_ll_never_miss_the_noodles__lasagna_(584).pdf │ └── you_want_me_to_do_what_to_the_buttered_noodles_(582).pdf ├── requirements.txt └── store_files.py ├── PythonPDFChatbot-PythonSDK ├── .gitignore ├── README.md ├── app.py ├── requirements.txt ├── sample_prompt.md └── templates │ ├── general_template.md │ └── recipe_template.md └── PythonPDFChatbot-RESTAPI ├── .gitignore ├── README.md ├── app.py ├── requirements.txt ├── retriever.py ├── sample_prompt.md └── templates ├── general_template.md └── recipe_template.md /.github/scripts/generate-release-content.js: -------------------------------------------------------------------------------- 1 | const { Octokit } = require("@octokit/rest"); 2 | const fs = require("fs"); 3 | 4 | const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN }); 5 | 6 | async function generateReleaseContent() 7 | { 8 | const { data: release } = await octokit.repos.getRelease({ 9 | owner: process.env.GITHUB_REPOSITORY.split('/')[0], 10 | repo: process.env.GITHUB_REPOSITORY.split('/')[1], 11 | release_id: process.env.GITHUB_EVENT.release.id, 12 | }); 13 | 14 | let content = `# ${release.name}\n\n`; 15 | content += release.body + "\n\n"; 16 | 17 | content += "## Downloads\n\n"; 18 | content += "| Platform | Download | Size | Date |\n"; 19 | content += "|----------|----------|------|------|\n"; 20 | 21 | const mainAssets = release.assets.filter(asset => 22 | !asset.name.includes("debug") && !asset.name.includes("symbols") 23 | ); 24 | 25 | for (const asset of mainAssets) 26 | { 27 | const platform = getPlatformFromAsset(asset.name); 28 | const size = formatSize(asset.size); 29 | const date = formatDate(new Date(asset.created_at)); 30 | content += `| ${platform} | [${asset.name}](${asset.browser_download_url}) | ${size} | ${date} |\n`; 31 | } 32 | 33 | content += "\n## Additional Files\n\n"; 34 | const additionalAssets = release.assets.filter(asset => 35 | asset.name.includes("debug") || asset.name.includes("symbols") 36 | ); 37 | 38 | for (const asset of additionalAssets) 39 | { 40 | const size = formatSize(asset.size); 41 | const date = formatDate(new Date(asset.created_at)); 42 | content += `- [${asset.name}](${asset.browser_download_url}) (${size}, ${date})\n`; 43 | } 44 | 45 | fs.writeFileSync("release-body.md", content); 46 | } 47 | 48 | function getPlatformFromAsset(assetName) 49 | { 50 | if (assetName.includes("windows")) return "Windows"; 51 | if (assetName.includes("macos")) return "macOS"; 52 | if (assetName.includes("linux")) return "Linux"; 53 | return "Other"; 54 | } 55 | 56 | function formatSize(bytes) 57 | { 58 | const units = ['B', 'KB', 'MB', 'GB', 'TB']; 59 | let size = bytes; 60 | let unitIndex = 0; 61 | 62 | while (size >= 1024 && unitIndex < units.length - 1) 63 | { 64 | size /= 1024; 65 | unitIndex++; 66 | } 67 | 68 | return `${size.toFixed(1)}${units[unitIndex]}`; 69 | } 70 | 71 | function formatDate(date) 72 | { 73 | const now = new Date(); 74 | const diffTime = Math.abs(now - date); 75 | const diffDays = Math.ceil(diffTime / (1000 * 60 * 60 * 24)); 76 | 77 | if (diffDays === 0) 78 | { 79 | return "Today"; 80 | } else if (diffDays === 1) 81 | { 82 | return "Yesterday"; 83 | } else if (diffDays <= 7) 84 | { 85 | return `${diffDays} days ago`; 86 | } else 87 | { 88 | return date.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }); 89 | } 90 | } 91 | 92 | generateReleaseContent().catch(console.error); -------------------------------------------------------------------------------- /.github/workflows/build-and-release.yaml: -------------------------------------------------------------------------------- 1 | name: Dabarqus Build and Release 2 | 3 | defaults: 4 | run: 5 | shell: bash 6 | 7 | on: 8 | repository_dispatch: 9 | types: [dabarqus_push] 10 | 11 | jobs: 12 | build-and-release: 13 | permissions: write-all 14 | runs-on: ${{ matrix.runner }} 15 | env: 16 | # Set the deployment repository owner and name, 17 | # which is different from the current repository 18 | GITHUB_DEPLOYMENT_REPO_OWNER: electricpipelines 19 | GITHUB_DEPLOYMENT_REPO_NAME: barq 20 | GITHUB_SOURCE_REPO_OWNER: electricpipelines 21 | GITHUB_SOURCE_REPO_NAME: dabarqus 22 | GH_TOKEN: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }} 23 | GITHUB_TOKEN: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }} 24 | RELEASE_TYPE: release 25 | DOCKERHUB_USERNAME: electricpipelines 26 | DOCKERHUB_TOKEN: ${{ secrets.DOCKER_REPO_ACCESS_TOKEN }} 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | include: 32 | - platform: macos 33 | runner: [self-hosted, macos, x64] 34 | - platform: macos-metal 35 | runner: [macos-latest] 36 | - platform: windows 37 | runner: [self-hosted, windows] 38 | - platform: windows-nvidia 39 | runner: [self-hosted, windows, cuda] 40 | - platform: linux 41 | runner: [self-hosted, linux] 42 | - platform: linux-nvidia 43 | runner: [self-hosted, linux, cuda] 44 | steps: 45 | - name: Checkout Repository 46 | uses: actions/checkout@v4 47 | with: 48 | repository: ${{ env.GITHUB_SOURCE_REPO_OWNER }}/${{ env.GITHUB_SOURCE_REPO_NAME }} 49 | token: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }} 50 | submodules: recursive 51 | 52 | - name: Set Git Bash as default shell for Windows and Linux 53 | if: startsWith(matrix.platform, 'windows') || startsWith(matrix.platform, 'linux') 54 | shell: pwsh 55 | run: | 56 | echo "Setting up Git Bash as default shell" 57 | echo "C:/Program Files/Git/bin" >> $env:GITHUB_PATH 58 | 59 | - name: Check if runner is self-hosted 60 | id: check-runner 61 | run: | 62 | if [[ "${{ toJson(matrix.runner) }}" == *"self-hosted"* ]]; then 63 | echo "is_self_hosted=true" >> $GITHUB_OUTPUT 64 | else 65 | echo "is_self_hosted=false" >> $GITHUB_OUTPUT 66 | fi 67 | 68 | - name: Check if runner has Docker 69 | id: check-for-docker 70 | run: | 71 | if [[ "${{ toJson(matrix.runner) }}" == *"docker"* ]]; then 72 | echo "has_docker=true" >> $GITHUB_OUTPUT 73 | else 74 | echo "has_docker=false" >> $GITHUB_OUTPUT 75 | fi 76 | 77 | - name: Install ccache (non-self-hosted only) 78 | # Github's macOS runners 79 | if: steps.check-runner.outputs.is_self_hosted == 'false' && startsWith(matrix.platform, 'macos') 80 | run: brew install ccache 81 | 82 | - name: Cache ccache files (non-self-hosted only) 83 | # Github's macOS runners 84 | if: steps.check-runner.outputs.is_self_hosted == 'false' && startsWith(matrix.platform, 'macos') 85 | uses: actions/cache@v3 86 | with: 87 | path: .ccache 88 | key: ${{ runner.os }}-ccache-${{ matrix.platform }}-${{ github.sha }} 89 | restore-keys: | 90 | ${{ runner.os }}-ccache-${{ matrix.platform }}- 91 | 92 | - name: Cache ccache files (Docker builds) 93 | if: steps.check-for-docker.outputs.has_docker == 'true' 94 | uses: actions/cache@v4 95 | with: 96 | path: ~/.ccache 97 | key: ${{ runner.os }}-ccache-${{ matrix.platform }}-${{ github.sha }} 98 | restore-keys: ${{ runner.os }}-ccache-${{ matrix.platform }} 99 | 100 | - name: Extract Package Version from version.txt 101 | run: | 102 | PACKAGE_VERSION=$(cat version.txt | tr -d '\n') 103 | echo "PACKAGE_VERSION=$PACKAGE_VERSION" >> $GITHUB_ENV 104 | 105 | - name: Install the Apple developer certificates 106 | if: matrix.platform == 'macos' || matrix.platform == 'macos-metal' 107 | env: 108 | APP_CERTIFICATE_BASE64: ${{ secrets.MACOS_APP_CERTIFICATE_BASE64 }} 109 | INSTALLER_CERTIFICATE_BASE64: ${{ secrets.MACOS_INSTALLER_CERTIFICATE_BASE64 }} 110 | P12_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_P12_PASSWORD }} 111 | KEYCHAIN_PASSWORD: ${{ secrets.MACOS_KEYCHAIN_PASSWORD }} 112 | CSC_INSTALLER_LINK: ${{ secrets.MACOS_INSTALLER_CERTIFICATE_BASE64 }} 113 | CSC_INSTALLER_KEY_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_P12_PASSWORD }} 114 | run: | 115 | echo "Creating variables" 116 | APP_CERTIFICATE_PATH=$RUNNER_TEMP/app_certificate.p12 117 | INSTALLER_CERTIFICATE_PATH=$RUNNER_TEMP/installer_certificate.p12 118 | KEYCHAIN_PATH=$RUNNER_TEMP/app-signing.keychain-db 119 | 120 | echo "Importing certificates from secrets" 121 | echo -n "$APP_CERTIFICATE_BASE64" | base64 --decode -o $APP_CERTIFICATE_PATH 122 | echo -n "$INSTALLER_CERTIFICATE_BASE64" | base64 --decode -o $INSTALLER_CERTIFICATE_PATH 123 | 124 | echo "Creating temporary keychain" 125 | security -v create-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH 126 | security -v set-keychain-settings -lut 21600 $KEYCHAIN_PATH 127 | security unlock-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH 128 | 129 | echo "Importing certificates to keychain" 130 | security -v import $APP_CERTIFICATE_PATH -P "$P12_PASSWORD" -A -t cert -f pkcs12 -k $KEYCHAIN_PATH 131 | security -v import $INSTALLER_CERTIFICATE_PATH -P "$P12_PASSWORD" -A -t cert -f pkcs12 -k $KEYCHAIN_PATH 132 | 133 | echo "Setting keychain ACLs" 134 | security -v set-key-partition-list -S apple-tool:,apple: -k "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH 135 | 136 | echo "Setting default keychain" 137 | security -v list-keychain -d user -s $KEYCHAIN_PATH 138 | 139 | echo "Extracting Apple Developer Identity" 140 | IDENTITY_INFO=$(security find-identity -v -p codesigning $KEYCHAIN_PATH) 141 | if [ $? -ne 0 ]; then 142 | echo "Error: Failed to find identities. Keychain contents:" 143 | security dump-keychain $KEYCHAIN_PATH 144 | exit 1 145 | fi 146 | 147 | echo "All codesigning identities:" 148 | echo "$IDENTITY_INFO" 149 | 150 | FULL_IDENTITY=$(echo "$IDENTITY_INFO" | grep "Developer ID Application" | sed -n 's/.*"\(Developer ID Application: .*\)"/\1/p') 151 | 152 | if [ -z "$FULL_IDENTITY" ]; then 153 | echo "Error: Failed to extract Developer ID Application identity." 154 | exit 1 155 | fi 156 | 157 | # Extract just the name and team ID without the prefix 158 | IDENTITY=$(echo "$FULL_IDENTITY" | sed 's/Developer ID Application: //') 159 | 160 | echo "CSC_NAME=$IDENTITY" >> $GITHUB_ENV 161 | echo "Extracted identity for signing: $IDENTITY" 162 | 163 | - name: Install build tools on macOS Metal via Homebrew 164 | if: matrix.platform == 'macos' || matrix.platform == 'macos-metal' 165 | run: brew install python-setuptools autoconf automake autoconf-archive 166 | 167 | - uses: actions/github-script@v7 168 | with: 169 | script: | 170 | core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); 171 | core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); 172 | 173 | - name: Install VCPKG 174 | if: matrix.platform == 'macos' || matrix.platform == 'macos-metal' 175 | run: | 176 | git clone https://github.com/microsoft/vcpkg 177 | ./vcpkg/bootstrap-vcpkg.sh 178 | export VCPKG_INSTALLATION_ROOT=$(pwd)/vcpkg 179 | echo "VCPKG_INSTALLATION_ROOT=${VCPKG_INSTALLATION_ROOT}" >> $GITHUB_ENV 180 | echo "VCPKG_ROOT=${VCPKG_INSTALLATION_ROOT}" >> $GITHUB_ENV 181 | 182 | - name: Cache npm dependencies 183 | if: steps.check-for-docker.outputs.has_docker == 'false' 184 | uses: actions/cache@v4 185 | with: 186 | path: '~/.npm' 187 | key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} 188 | restore-keys: | 189 | ${{ runner.os }}-node- 190 | 191 | - name: Set up Docker 192 | if: steps.check-for-docker.outputs.has_docker == 'true' 193 | run: | 194 | if ! command -v docker &> /dev/null; then 195 | echo "Docker not found. Please install Docker on your self-hosted runner." 196 | exit 1 197 | fi 198 | 199 | - name: Docker Build, Package and Release to GitHub 200 | if: steps.check-for-docker.outputs.has_docker == 'true' 201 | env: 202 | RELEASE_TYPE: ${{ env.RELEASE_TYPE }} 203 | run: | 204 | 205 | # Convert Windows path to Docker-compatible path 206 | WORKSPACE_PATH=$(cygpath -w "${{ github.workspace }}" | sed 's/\\/\//g') 207 | 208 | docker build -t dabarqus-builder -f Dockerfile.build . 209 | 210 | docker run --rm --gpus all \ 211 | -v "${WORKSPACE_PATH}:/dabarqus_src" \ 212 | -v "${HOME}/.ccache:/root/.ccache" \ 213 | -e BUILD_PLATFORM=${{ matrix.platform }} \ 214 | -e GH_TOKEN=${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }} \ 215 | -e GITHUB_TOKEN=${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }} \ 216 | -e GITHUB_DEPLOYMENT_REPO_OWNER=${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }} \ 217 | -e GITHUB_DEPLOYMENT_REPO_NAME=${{ env.GITHUB_DEPLOYMENT_REPO_NAME }} \ 218 | -e RELEASE_TYPE=${{ env.RELEASE_TYPE }} \ 219 | dabarqus-builder 220 | 221 | - name: Native Build, Package and Release to GitHub 222 | if: steps.check-for-docker.outputs.has_docker == 'false' 223 | shell: pwsh 224 | env: 225 | APPLE_ID: ${{ secrets.MACOS_APP_NOTARIZATION_USERID }} 226 | APPLE_PASSWORD: ${{ secrets.MACOS_APP_NOTARIZATION_PASSWORD }} 227 | APPLE_APP_SPECIFIC_PASSWORD : ${{ secrets.MACOS_APP_NOTARIZATION_PASSWORD }} 228 | APPLE_TEAM_ID: ${{ secrets.MACOS_APP_NOTARIZATION_TEAMID }} 229 | DABARQUS_FINE_GRAINED_TOKEN: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }} 230 | RELEASE_TYPE: ${{ env.RELEASE_TYPE }} 231 | run: | 232 | ./build.ps1 -BuildPlatform ${{ matrix.platform }} 233 | 234 | - name: Sign macOS Binaries and Libraries 235 | if: startsWith(matrix.platform, 'macos') 236 | run: | 237 | cd out/install/${{ matrix.platform }} 238 | 239 | # Sign all binaries recursively (not just in bin) 240 | find . -type f -perm +111 | while read file; do 241 | echo "Signing executable: $file" 242 | codesign --force --options runtime --timestamp --deep --sign "${{ env.CSC_NAME }}" "$file" 243 | codesign --verify --deep --strict --verbose "$file" 244 | done 245 | 246 | # Sign all dylibs recursively (not just in lib) 247 | find . -type f -name "*.dylib" | while read file; do 248 | echo "Signing library: $file" 249 | codesign --force --options runtime --timestamp --deep --sign "${{ env.CSC_NAME }}" "$file" 250 | codesign --verify --deep --strict --verbose "$file" 251 | done 252 | 253 | # Sign all bundles 254 | find . -type d -name "*.app" -or -name "*.framework" | while read bundle; do 255 | echo "Signing bundle: $bundle" 256 | codesign --force --options runtime --timestamp --deep --sign "${{ env.CSC_NAME }}" "$bundle" 257 | codesign --verify --deep --strict --verbose "$bundle" 258 | done 259 | 260 | cd ../../.. 261 | 262 | - name: Create and Notarize PKG 263 | if: startsWith(matrix.platform, 'macos') 264 | env: 265 | APPLE_ID: ${{ secrets.MACOS_APP_NOTARIZATION_USERID }} 266 | APPLE_PASSWORD: ${{ secrets.MACOS_APP_NOTARIZATION_PASSWORD }} 267 | APPLE_TEAM_ID: ${{ secrets.MACOS_APP_NOTARIZATION_TEAMID }} 268 | run: | 269 | PACKAGE_VERSION=$(cat version.txt | tr -d '\n') 270 | 271 | # Build the pkg 272 | chmod +x build-pkg.sh 273 | ./build-pkg.sh ${{ matrix.platform }} "${{ env.CSC_NAME }}" 274 | 275 | PKG_PATH="Dabarqus-${PACKAGE_VERSION}-${{ matrix.platform }}.pkg" 276 | 277 | # Submit for notarization and capture the submission ID 278 | echo "Submitting pkg for notarization..." 279 | SUBMISSION_ID=$(xcrun notarytool submit "$PKG_PATH" \ 280 | --apple-id "$APPLE_ID" \ 281 | --password "$APPLE_PASSWORD" \ 282 | --team-id "$APPLE_TEAM_ID" \ 283 | --wait \ 284 | | grep "id:" | head -n1 | awk '{print $2}') 285 | 286 | # Get detailed information about the submission 287 | echo "Getting detailed notarization info..." 288 | xcrun notarytool info "$SUBMISSION_ID" \ 289 | --apple-id "$APPLE_ID" \ 290 | --password "$APPLE_PASSWORD" \ 291 | --team-id "$APPLE_TEAM_ID" 292 | 293 | # Get the submission log 294 | echo "Getting notarization log..." 295 | xcrun notarytool log "$SUBMISSION_ID" \ 296 | --apple-id "$APPLE_ID" \ 297 | --password "$APPLE_PASSWORD" \ 298 | --team-id "$APPLE_TEAM_ID" 299 | 300 | # Check if notarization succeeded 301 | STATUS=$(xcrun notarytool info "$SUBMISSION_ID" \ 302 | --apple-id "$APPLE_ID" \ 303 | --password "$APPLE_PASSWORD" \ 304 | --team-id "$APPLE_TEAM_ID" \ 305 | | grep "status:" | awk '{print $2}') 306 | 307 | if [ "$STATUS" != "Accepted" ]; then 308 | echo "Notarization failed with status: $STATUS" 309 | exit 1 310 | fi 311 | 312 | echo "Stapling notarization ticket to $PKG_PATH" 313 | xcrun stapler staple "$PKG_PATH" 314 | echo "PKG_PATH=${PKG_PATH}" >> $GITHUB_ENV 315 | echo "PKG_NAME=${PKG_PATH}" >> $GITHUB_ENV 316 | 317 | - name: Create Zip Archive 318 | run: | 319 | PACKAGE_VERSION=$(cat version.txt | tr -d '\n') 320 | ZIP_NAME="dabarqus-${{ matrix.platform }}-${PACKAGE_VERSION}.zip" 321 | 322 | cd out/install/${{ matrix.platform }} 323 | 324 | # Remove unnecessary files 325 | if [[ "${{ matrix.platform }}" == windows* ]]; then 326 | find . -type f ! \( -name "*.dll" -o -name "*.exe" -o -path "./bin/*" \) -delete 327 | elif [[ "${{ matrix.platform }}" == linux* ]]; then 328 | find . -type f ! \( -name "*.so" -o -path "./bin/*" \) -delete 329 | # Ensure executables have proper permissions 330 | find ./bin -type f -exec chmod 755 {} \; 331 | # Preserve symlinks in lib directory 332 | find ./lib -type l -exec chmod 777 {} \; 333 | elif [[ "${{ matrix.platform }}" == macos* ]]; then 334 | find . -type f ! \( -name "*.dylib" -o -path "./bin/*" \) -delete 335 | # Ensure executables have proper permissions 336 | find ./bin -type f -exec chmod 755 {} \; 337 | fi 338 | 339 | find ./bin -type f -name "*.py" -delete 340 | 341 | # Remove empty directories 342 | find . -type d -empty -delete 343 | 344 | # Create the zip archive with permission and symlink preservation 345 | if [[ "${{ matrix.platform }}" == linux* ]]; then 346 | # -X preserves permissions 347 | # -r recursive 348 | zip -r -X "${ZIP_NAME}" . -x "*.DS_Store" "*.git*" 349 | elif [[ "${{ matrix.platform }}" == macos* ]]; then 350 | # zip -r -X "${ZIP_NAME}" . -x "*.DS_Store" "*.git*" 351 | # Use ditto to preserve extended attributes and resource forks 352 | ditto -c -k --sequesterRsrc --keepParent . "${ZIP_NAME}" 353 | else 354 | zip -r "${ZIP_NAME}" . -x "*.DS_Store" "*.git*" 355 | fi 356 | 357 | # For macOS platforms, notarize the zip archive 358 | if [[ "${{ matrix.platform }}" == macos* ]]; then 359 | # Notarize the zip archive 360 | xcrun notarytool submit "${ZIP_NAME}" --wait \ 361 | --apple-id "${{ secrets.MACOS_APP_NOTARIZATION_USERID }}" \ 362 | --password "${{ secrets.MACOS_APP_NOTARIZATION_PASSWORD }}" \ 363 | --team-id "${{ secrets.MACOS_APP_NOTARIZATION_TEAMID }}" 364 | 365 | # # Staple the notarization ticket to the zip archive 366 | # xcrun stapler staple "${ZIP_NAME}" 367 | fi 368 | 369 | # Move the zip file to the root of the workspace 370 | mv "${ZIP_NAME}" ../../../ 371 | 372 | ls -l ../../../${ZIP_NAME} 373 | 374 | # Go back to the root directory 375 | cd ../../../ 376 | 377 | # Find the exact path of the zip file 378 | ZIP_PATH=$(find . -name "${ZIP_NAME}") 379 | 380 | if [ -z "$ZIP_PATH" ]; then 381 | echo "Error: Zip file not found" 382 | exit 1 383 | fi 384 | 385 | echo "ZIP_PATH=${ZIP_PATH}" >> $GITHUB_ENV 386 | echo "ZIP_NAME=${ZIP_NAME}" >> $GITHUB_ENV 387 | 388 | - name: Release Zip Archive 389 | env: 390 | GH_TOKEN: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }} 391 | PUBLISH_OWNER: ${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }} 392 | PUBLISH_REPO: ${{ env.GITHUB_DEPLOYMENT_REPO_NAME }} 393 | RELEASE_TYPE: ${{ env.RELEASE_TYPE }} 394 | run: | 395 | PACKAGE_VERSION=$(cat version.txt | tr -d '\n') 396 | if gh release view "v${PACKAGE_VERSION}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" &>/dev/null; then 397 | echo "Release v${PACKAGE_VERSION} exists. Uploading zip file to existing release." 398 | gh release upload "v${PACKAGE_VERSION}" "${ZIP_NAME}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" --clobber 399 | else 400 | echo "Release v${PACKAGE_VERSION} does not exist. Creating ${RELEASE_TYPE} release and uploading zip file." 401 | if [ "$RELEASE_TYPE" = "draft" ]; then 402 | gh release create "v${PACKAGE_VERSION}" "${ZIP_NAME}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" --draft 403 | else 404 | gh release create "v${PACKAGE_VERSION}" "${ZIP_NAME}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" 405 | fi 406 | fi 407 | 408 | - name: Release PKG 409 | if: startsWith(matrix.platform, 'macos') 410 | env: 411 | GH_TOKEN: ${{ secrets.DABARQUS_FINE_GRAINED_TOKEN }} 412 | PUBLISH_OWNER: ${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }} 413 | PUBLISH_REPO: ${{ env.GITHUB_DEPLOYMENT_REPO_NAME }} 414 | RELEASE_TYPE: ${{ env.RELEASE_TYPE }} 415 | run: | 416 | PACKAGE_VERSION=$(cat version.txt | tr -d '\n') 417 | PKG_PATH="$PKG_NAME" 418 | 419 | if gh release view "v${PACKAGE_VERSION}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" &>/dev/null; then 420 | echo "Release v${PACKAGE_VERSION} exists. Uploading pkg file to existing release." 421 | gh release upload "v${PACKAGE_VERSION}" "${PKG_PATH}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" --clobber 422 | else 423 | echo "Release v${PACKAGE_VERSION} does not exist. Creating ${RELEASE_TYPE} release and uploading pkg file." 424 | if [ "$RELEASE_TYPE" = "draft" ]; then 425 | gh release create "v${PACKAGE_VERSION}" "${PKG_PATH}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" --draft 426 | else 427 | gh release create "v${PACKAGE_VERSION}" "${PKG_PATH}" --repo "${PUBLISH_OWNER}/${PUBLISH_REPO}" 428 | fi 429 | fi 430 | 431 | - name: Build and Push Runtime Docker Image 432 | if: matrix.platform == 'linux' || matrix.platform == 'linux-nvidia' 433 | env: 434 | PACKAGE_VERSION: ${{ env.PACKAGE_VERSION }} 435 | run: | 436 | # Determine Docker base image based on platform 437 | BASE_IMAGE="ubuntu:22.04" 438 | TAG_SUFFIX="" 439 | PACKAGE_NAME="dabarqus" 440 | if [[ "${{ matrix.platform }}" == "linux-nvidia" ]]; then 441 | BASE_IMAGE="nvidia/cuda:12.1.0-base-ubuntu22.04" 442 | TAG_SUFFIX="-nvidia" 443 | PACKAGE_NAME="dabarqus-nvidia" 444 | fi 445 | 446 | echo "Building Docker image for platform ${{ matrix.platform }}" 447 | echo "Base image: ${BASE_IMAGE}" 448 | echo "Package name: ${PACKAGE_NAME}" 449 | echo "Package version: ${{ env.PACKAGE_VERSION }}" 450 | echo "Release type: ${{ env.RELEASE_TYPE }}" 451 | 452 | # Build the runtime Docker image 453 | docker build \ 454 | --build-arg BASE_IMAGE=${BASE_IMAGE} \ 455 | --build-arg PLATFORM=${{ matrix.platform }} \ 456 | -t ${PACKAGE_NAME}:${{ env.PACKAGE_VERSION }} \ 457 | -t ${PACKAGE_NAME}:latest . 458 | 459 | # If this is a release build, push to container registry 460 | if [[ "${{ env.RELEASE_TYPE }}" == "release" ]]; then 461 | echo "Pushing Docker image to container registry..." 462 | echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin 463 | 464 | docker tag ${PACKAGE_NAME}:${{ env.PACKAGE_VERSION }} ghcr.io/${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}/${PACKAGE_NAME}:${{ env.PACKAGE_VERSION }} 465 | docker tag ${PACKAGE_NAME}:${{ env.PACKAGE_VERSION }} ghcr.io/${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}/${PACKAGE_NAME}:latest 466 | 467 | docker push ghcr.io/${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}/${PACKAGE_NAME}:${{ env.PACKAGE_VERSION }} 468 | docker push ghcr.io/${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}/${PACKAGE_NAME}:latest 469 | 470 | # Set package visibility to public 471 | echo "Setting package visibility to public..." 472 | gh api \ 473 | --method PATCH \ 474 | -H "Accept: application/vnd.github+json" \ 475 | -H "X-GitHub-Api-Version: 2022-11-28" \ 476 | orgs/${{ env.GITHUB_DEPLOYMENT_REPO_OWNER }}/packages/container/${PACKAGE_NAME}/visibility \ 477 | -f visibility=public || echo "Warning: Could not set package visibility. The package may need to be manually set to public." 478 | fi 479 | 480 | - name: Create and Push Git Tag if Not Exists 481 | run: | 482 | PACKAGE_VERSION=${{ env.PACKAGE_VERSION }} 483 | echo "Checking for existing tag for version $PACKAGE_VERSION..." 484 | 485 | # Fetch tags to ensure we have the latest tags in the local git history 486 | git fetch --tags >/dev/null 2>&1; 487 | 488 | # Check if the tag already exists 489 | if git rev-parse "v$PACKAGE_VERSION" >/dev/null 2>&1; then 490 | echo "Tag v$PACKAGE_VERSION already exists. Skipping tag creation." 491 | else 492 | echo "Tag v$PACKAGE_VERSION does not exist. Creating and pushing tag..." 493 | git config user.name "github-actions" 494 | git config user.email "github-actions@github.com" 495 | git tag -a "v$PACKAGE_VERSION" -m "Release version $PACKAGE_VERSION" 496 | git push origin "v$PACKAGE_VERSION" 497 | echo "Tag v$PACKAGE_VERSION created and pushed." 498 | fi 499 | 500 | - name: Save Version Tag Name 501 | run: echo "v${{ env.PACKAGE_VERSION }}" > tag.txt 502 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | SOFTWARE LICENSE AGREEMENT 2 | 3 | Copyright (c) 2024 Electric Pipelines 4 | 5 | 1. License Grant: Electric Pipelines grants you a non-exclusive, non-transferable license to use Dabarqus software product and its documentation. 6 | 7 | 2. Restrictions: You may not redistribute, sell, decompile, reverse engineer, disassemble, or otherwise reduce Dabarqus to a human-perceivable form. 8 | 9 | 3. Warranty Disclaimer: Dabarqus is provided 'as is' without warranty of any kind, either express or implied. 10 | 11 | 4. Limitation of Liability: In no event shall Electric Pipelines be liable for any damages whatsoever arising out of the use of or inability to use Dabarqus. 12 | 13 | 5. Termination: This license is effective until terminated. Your rights under this license will terminate automatically without notice from Electric Pipelines if you fail to comply with any term(s) of this license. 14 | 15 | By installing or using Dabarqus, you agree to be bound by the terms of this license. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dabarqus 2 | 3 | **Dabarqus** is a stand alone application that implements a complete RAG solution. It is designed to be easy to use and easy to integrate with your existing applications. Dabarqus includes a REST API, a command-line interface, and an admin dashboard. 4 | 5 | ## Why Create Dabarqus 6 | 7 | If you're a developer, building a basic RAG solution is pretty straightforward. There are tons of tutorials and how-to's as well as Python code to reuse. But, if you're deploying your RAG solution within a company, or for end-user PCs, you will also have to figure out some potentially tricky deployment and maintenance issues. That means also deploying Python, a vector database, the right embedding AI model, and possible licensing challenges. Dabarqus was created to address these issues with a stand-alone, all-in-one solution with no dependencies. It's written in low-level C++ with built in vector search capabilities, flexibility to use the embedding AI model that's best for your use case, and a REST API for easy development integration. 8 | 9 | ## Table of Contents 10 | 11 | 1. [Quick Start](#quick-start) 12 | - [Ubuntu](#ubuntu) 13 | - [macOS](#macos) 14 | - [Windows](#windows) 15 | 2. [Features](#features) 16 | 3. [Barq - Command-line Interface](#barq---command-line-interface-to-dabarqus) 17 | - [Using with the CLI](#using-with-the-cli) 18 | - [Store](#store) 19 | - [Retrieve](#retrieve) 20 | 4. [API - REST Interface](#api---rest-interface-to-dabarqus) 21 | - [Using the API](#using-the-api) 22 | 5. [Examples](#examples) 23 | 24 | ## **Quick start** 25 | 26 | ### Ubuntu 27 | 28 | Dabarqus works on CPU only, or can use NVIDIA CUDA for higher performance. For the CUDA (aka nvidia cublas) version, you will need to install the NVIDIA driver. The CPU version does not require any additional software. Note that to use the CUDA version, you will need to have an NVIDIA GPU with CUDA support, and to download the CUDA version of Dabarqus. 29 | 30 | 0. To install NVIDIA drivers on Ubuntu (if you have an NVIDIA GPU), run the following command: 31 | 32 | ```bash 33 | sudo ubuntu-drivers install 34 | ``` 35 | 36 | 1. Unzip the Dabarqus file into a folder 37 | 38 | ```bash 39 | unzip Dabarqus-linux-DOWNLOADED_VERSION.zip 40 | cd Dabarqus-linux-DOWNLOADED_VERSION 41 | chmod +x ./bin/* 42 | ./bin/barq service install 43 | ``` 44 | 45 | 2. Open a browser and go to `http://localhost:6568/admin` 46 | 47 | ### macOS 48 | 49 | For package file downloads, do the following: 50 | 51 | 1. Double click the Dabarqus-macos-DOWNLOADED_VERSION.pkg and install 52 | 2. After installation, open your browser and navigate to `http://localhost:6568/admin` 53 | 54 | For zip file downloads, do the following: 55 | 56 | 1. Unzip the Dabarqus file into a folder 57 | 58 | ```bash 59 | unzip Dabarqus-linux-DOWNLOADED_VERSION.zip 60 | cd Dabarqus-linux-DOWNLOADED_VERSION 61 | ./bin/barq service install 62 | ``` 63 | 64 | 2. Open a browser and go to `http://localhost:6568/admin` 65 | 66 | ### Windows 67 | 68 | 1. Double click the Dabarqus-windows-DOWNLOADED_VERSION.exe and install 69 | 2. Double click the Dabarqus icon or navigate to `http://localhost:6568/admin` 70 | 71 | ## Features 72 | 73 | 1. **Ingest documents, databases, and APIs**: Ingest diverse data sources like PDFs*, emails, and raw data. 74 | - No matter where your data resides, Dabarqus can make it available to your LLM 75 | 76 | 2. **LLM-Style Prompting**: Use simple, LLM-style prompts when speaking to your memory banks. 77 | - Dabarqus will retrieve relevant data using the same prompt you give your LLM 78 | - No need to construct special queries or learn a new query language 79 | 80 | 3. **REST API**: Comprehensive control interface for downloading models, prompting semantic indexes, and even LLM inference. 81 | - REST is a standard interface that enjoys wide adoption, so your team doesn't need to learn a new, complex system 82 | - Allows comprehensive integration with existing development tools for easy adoption 83 | 84 | 4. **Multiple Semantic Indexes (Memory Banks)**: Group your data into separate semantic indexes (memory banks). 85 | - Keep your data organized by subject matter, category, or whatever grouping you like 86 | - Memory banks are portable, so you can create and use them wherever you like 87 | 88 | 5. **SDKs**: Native SDKs in [Python](https://pypi.org/project/dabarqus/) and [Javascript](https://www.npmjs.com/package/dabarqus). 89 | - Easily integrates with Python and Javascript projects 90 | 91 | 6. **LLM-Friendly Output**: Produces LLM-ready output that works with ChatGPT, Ollama, and any other LLM provider 92 | - Works seamlessly with the LLM of your choice 93 | 94 | 7. **Admin Dashboard**: Monitor performance, test memory banks, and make changes in an easy-to-use UI 95 | - Easy access to Dabarqus features 96 | - Monitor app performance with real-time graphs 97 | 98 | 8. **Mac, Linux, and Windows Support**: Runs natively with zero dependencies on all platforms: MacOS (Intel or Metal), Linux, and Windows (CPU or GPU) 99 | - Runs on whatever platform you use 100 | 101 | 9. **LLM Inference**: Chat with LLM models right through the Dabarqus API/SDKs 102 | - Built-in chatbot capabilities for use in your applications 103 | 104 | \*[Dabarqus Professional Edition](https://dabarqus.com/features#dabarqus-editions) is required for email, messaging and API support. 105 | 106 | ## Barq - Command-line interface to Dabarqus 107 | 108 | To install: `barq service install` 109 | 110 | To uninstall: `barq service uninstall` 111 | 112 | ### Using with the CLI 113 | 114 | #### Store 115 | 116 | Usage: `barq store --input-path --memory-bank ""` 117 | 118 | Example: `barq store --input-path C:\docs --memory-bank documents` 119 | 120 | #### Retrieve 121 | 122 | Usage: `barq retrieve --memory-bank ""` 123 | 124 | - Example: `barq retrieve --memory-bank documents` 125 | - Example: `barq retrieve --memory-bank documents --query "Tell me about the documents" --query-limit 3` 126 | This will display three answers to the query from the 'documents' memory bank 127 | 128 | ## API - REST interface to Dabarqus 129 | 130 | | Method | Endpoint | Description | Parameters | 131 | |--------|----------|-------------|------------| 132 | | GET | /health or /api/health | Check the health status of the service | None | 133 | | GET | /admin/* | Serve the admin application | None | 134 | | GET | /odobo/* | Serve the Odobo application | None | 135 | | GET | /api/models | Retrieve available AI models | None | 136 | | GET | /api/model/metadata | Get metadata for a specific model | `modelRepo`, `filePath` (optional) | 137 | | GET | /api/downloads | Get information about downloaded items | `modelRepo` (optional), `filePath` (optional) | 138 | | GET | /api/downloads/enqueue | Enqueue a new download | `modelRepo`, `filePath` | 139 | | GET | /api/downloads/cancel | Cancel a download | `modelRepo`, `filePath` | 140 | | GET | /api/downloads/remove | Remove a downloaded item | `modelRepo`, `filePath` | 141 | | GET | /api/inference | Get information about inference items | `alias` (optional) | 142 | | GET | /api/inference/start | Start an inference | `alias`, `modelRepo`, `filePath`, `address` (optional), `port` (optional), `contextSize` (optional), `gpuLayers` (optional), `chatTemplate` (optional) | 143 | | GET | /api/inference/stop | Stop an inference | `alias` | 144 | | GET | /api/inference/status | Get the status of an inference | `alias` (optional) | 145 | | GET | /api/inference/reset | Reset an inference | `alias` | 146 | | GET | /api/inference/restart | Restart the current inference | None | 147 | | GET | /api/hardware or /api/hardwareinfo | Get hardware information | None | 148 | | GET | /api/silk | Get memory status | None | 149 | | GET | /api/silk/enable | Enable memories | None | 150 | | GET | /api/silk/disable | Disable memories | None | 151 | | GET | /api/silk/memorybanks | Get memory banks information | None | 152 | | GET | /api/silk/memorybank/activate | Activate a memory bank | `memorybank` | 153 | | GET | /api/silk/memorybank/deactivate | Deactivate a memory bank | `memorybank`, `all` | 154 | | GET | /api/silk/query | Perform a semantic query | (Parameters handled by Silk retriever) | 155 | | GET | /api/silk/health | Check the health of the Silk retriever | None | 156 | | GET | /api/silk/model/metadata | Get model metadata from the Silk retriever | (Parameters handled by Silk retriever) | 157 | | GET | /api/shutdown | Initiate server shutdown | None | 158 | | POST | /api/utils/log | Write to log | JSON body with log details | 159 | | POST | /api/silk/embedding | Get an embedding from the Silk retriever | (Parameters handled by Silk retriever) | 160 | 161 | ### Using the API 162 | 163 | - Example: `curl http://localhost:6568/api/silk/query?q=Tell%20me%20about%20the%20documents&limit=3&memorybank=docs` 164 | 165 | ## Examples 166 | 167 | Examples of Dabarqus in action can be found in this repo under **examples**. 168 | 169 | - PythonPDFChatbot-RESTAPI: An example chatbot program using Dabarqus via the REST API to chat with your PDFs. 170 | - PythonPDFChatbot-PythonSDK: An example chatbot program using Dabarqus via the [Python SDK](https://pypi.org/project/dabarqus/) to chat with your PDFs. 171 | - StoreFiles: A Python example of storing documents in to a memory bank (semantic index) using the Python SDK 172 | 173 | ### **Notes:** 174 | 175 | 1. Dabarqus Professional Edition is required for email, messaging and API support. 176 | -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/README.md: -------------------------------------------------------------------------------- 1 | # Create a Memory Bank Example 2 | 3 | This example uses the [Dabarqus Python SDK](https://pypi.org/project/dabarqus/) to store pdfs of your choice into a memory bank (semantic index). With this memory bank, you can retrieve relevant information using Dabarqus. It is recommended to run this example first so that you have a working memory bank before you try out other examples. 4 | 5 | ## Prerequisites 6 | 7 | - Python 3.8+ 8 | - Dabarqus server running and accessible 9 | 10 | ## Setup 11 | 12 | 1. Create a virtual enviroment (**optional, but recommended**): 13 | `python -m venv ./venv` 14 | **Mac or Linux**: 15 | `source venv/bin/activate` 16 | **Windows**: 17 | `venv\Scripts\activate.ps1` 18 | 2. Install the required Python libraries: 19 | `python -m pip install requirements.txt` 20 | 3. Run the app: 21 | `python store_files.py` 22 | 23 | ## Sample Usage 24 | Run the following: 25 | `python ./store_files.py --memory-bank MyNewRecipeBook --input-path ./recipes/` 26 | 27 | This will store the contents of `./recipes`, an list of included recipes, into a new memory bank called `MyNewRecipeBook`. 28 | 29 | After running the script: 30 | 1. You'll see progress messages as each file is processed and added to the memory bank. 31 | 2. Once complete, you'll receive a confirmation message that the memory bank has been created. 32 | 33 | ## Verifying the Memory Bank 34 | 35 | To verify that your memory bank was created successfully: 36 | 1. Open the Dabarqus admin interface (typically at `http://localhost:6568/admin`). 37 | 2. Navigate to the "Memory Banks" section. 38 | 3. You should see your newly created memory bank (e.g., "MyNewRecipeBook") listed. -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/recipes/wish_i_made_more__yeast_potato_lem_orange_rolls_(577).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/wish_i_made_more__yeast_potato_lem_orange_rolls_(577).pdf -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/recipes/wolf_creek_inn__macaroni___cheese_(578).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/wolf_creek_inn__macaroni___cheese_(578).pdf -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/recipes/wolfpack__pork_sandwich_(579).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/wolfpack__pork_sandwich_(579).pdf -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/recipes/world_s_best____and_easiest___teriyaki_chicken_wings_(298).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/world_s_best____and_easiest___teriyaki_chicken_wings_(298).pdf -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/recipes/world_s_best__macaroni___cheese_(580).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/world_s_best__macaroni___cheese_(580).pdf -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/recipes/you_bring_the_macaroni_salad___macaroni_salad_(299).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/you_bring_the_macaroni_salad___macaroni_salad_(299).pdf -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/recipes/you_can_t_eat_just_one__ice_box_cookies_(300).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/you_can_t_eat_just_one__ice_box_cookies_(300).pdf -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/recipes/you_like_cheese___garlic_bread_spread_(581).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/you_like_cheese___garlic_bread_spread_(581).pdf -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/recipes/you_ll_never_miss_the_noodles__lasagna_(584).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/you_ll_never_miss_the_noodles__lasagna_(584).pdf -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/recipes/you_want_me_to_do_what_to_the_buttered_noodles_(582).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electricpipelines/barq/79567b8e0f7292865af40e7f0a8e30da85e07194/examples/CreatingAMemoryBank/recipes/you_want_me_to_do_what_to_the_buttered_noodles_(582).pdf -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2024.8.30 2 | charset-normalizer==3.4.0 3 | dabarqus==1.0.1 4 | idna==3.10 5 | requests==2.32.3 6 | urllib3==2.2.3 7 | -------------------------------------------------------------------------------- /examples/CreatingAMemoryBank/store_files.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from dabarqus import barq 3 | import sys 4 | import os 5 | 6 | def main(): 7 | print(sys.argv) 8 | parser = argparse.ArgumentParser(description="Store documents using Dabarqus SDK") 9 | parser.add_argument("--memory-bank", required=True, help="Name of the memory bank") 10 | parser.add_argument("--input-path", required=True, help="Path to the input file or directory") 11 | parser.add_argument("--no-override", action="store_true", help="Add random number to the file name to avoid override") 12 | parser.add_argument("--server-url", default="http://localhost:6568", help="Dabarqus server URL") 13 | args = parser.parse_args() 14 | 15 | # Initialize the SDK 16 | sdk = barq(args.server_url) 17 | 18 | # Check the health of the service 19 | health = sdk.check_health() 20 | print(f"Service health: {health}") 21 | 22 | memory_bank_name = args.memory_bank 23 | 24 | if args.no_override: 25 | # Add random number to the file name to avoid override 26 | import random 27 | import string 28 | random_string = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5)) 29 | memory_bank_name = args.memory_bank + random_string 30 | 31 | # Convert input path to absolute path if it's relative 32 | input_path = args.input_path 33 | if not os.path.isabs(input_path): 34 | input_path = os.path.abspath(input_path) 35 | 36 | print(f"Using absolute input path: {input_path}") 37 | 38 | # Enqueue ingestion 39 | ingestion_result = sdk.enqueue_ingestion(memory_bank_name=memory_bank_name, input_path=input_path, overwrite=True) 40 | print(f"Ingestion result: {ingestion_result}") 41 | 42 | # Wait until the ingestion is completed 43 | ingestions = sdk.check_ingestion_progress(memory_bank_name) 44 | while ingestions["status"] != "complete": 45 | ingestions = sdk.check_ingestion_progress(memory_bank_name) 46 | sys.stdout.write(f"Ingestion progress: {ingestions['progress']:.2f}% \r") 47 | sys.stdout.flush() 48 | print(f"Ingestion complete!") 49 | 50 | if __name__ == "__main__": 51 | main() -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-PythonSDK/.gitignore: -------------------------------------------------------------------------------- 1 | # Python virtual environment 2 | *venv/ 3 | 4 | # Python cache files 5 | __pycache__/ 6 | *.pyc 7 | *.pyo 8 | *.pyd 9 | 10 | # Retrieval results 11 | retrievals/ 12 | 13 | # IDE-specific files (e.g., for VSCode) 14 | .vscode/ 15 | 16 | # Operating system files 17 | .DS_Store 18 | Thumbs.db 19 | 20 | # Jupyter Notebook checkpoints 21 | .ipynb_checkpoints/ 22 | 23 | # Logs 24 | *.log 25 | 26 | # Environment variables 27 | .env 28 | 29 | # Build directories 30 | build/ 31 | dist/ 32 | 33 | # Temporary files 34 | *.tmp 35 | *.bak 36 | *.swp 37 | 38 | # Conversation logs 39 | conversation* 40 | -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-PythonSDK/README.md: -------------------------------------------------------------------------------- 1 | # Dabarqus Python Chatbot UI Example: Python SDK 2 | 3 | This is an example chatbot program using Dabarqus via the [Python SDK](https://pypi.org/project/dabarqus/) to chat with your PDFs. It using Gradio for the frontend UI, and Ollama to provide the LLMs. 4 | 5 | There is another version of this demo that uses the REST API that can be found under `PythonPDFChatbot-RESTAPI` 6 | 7 | ## Features 8 | 9 | - Interactive chat interface 10 | - Memory bank selection 11 | - Integration with Dabarqus API for semantic search 12 | - Powered by Gradio for easy web deployment 13 | 14 | ## Prerequisites 15 | 16 | - Python 3.8+ 17 | - Dabarqus server running and accessible 18 | - [Dabarqus Python SDK](https://pypi.org/project/dabarqus/) 19 | - [Ollama](https://ollama.com/download) 20 | 21 | ## Installation 22 | 23 | ### Dabarqus Service 24 | Important: This chatbot requires Dabarqus to be installed and running on your machine. Before using this chatbot, please ensure that you have: 25 | 26 | - Downloaded and installed Dabarqus 27 | - Started the Dabarqus service on your machine 28 | 29 | The chatbot communicates with the Dabarqus service via its API, so having Dabarqus running is essential for the chatbot to function correctly. 30 | Once Dabarqus is set up and running, you can proceed with using this chatbot. For more information on how to start and manage the Dabarqus service, please refer to the [Dabarqus quick start](https://github.com/electricpipelines/barq?tab=readme-ov-file#quick-start). 31 | 32 | ### Chatbot installation 33 | 34 | 1. Clone the repository: 35 | `git clone https://github.com/electricpipelines/barq.git` 36 | `cd DabarqusChatbotUI/examples/PythonPDFChatbot-RESTAPI` 37 | 38 | 2. Create a virtual environment (**optional but recommended**): 39 | `python -m venv venv` 40 | `source venv/bin/activate # On Windows, use 'venv\Scripts\activate'` 41 | 42 | 3. Install the required dependencies: 43 | `pip install -r requirements.txt` 44 | 45 | ### Ollama 46 | If you have not already download and set 47 | 1. Follow the installion instructions on the [Ollama](https://ollama.com/download) 48 | 49 | 2. After installation, install at least LLM: 50 | `ollama pull llama3` 51 | 52 | ## Running the Application 53 | 1. Ensure your Dabarqus server is running and accessible. 54 | 2. Start the Gradio application: 55 | `python app.py` 56 | 3. The application will start and provide a local URL (usually http://127.0.0.1:7860). 57 | 4. Open this URL in your web browser to access the chat interface. 58 | 59 | ### Memory Banks 60 | You need a **memory bank** to chat with your PDFs. You have a few options: 61 | - Run the CreatingAMemoryBank example. 62 | - Create a memory bank through the admin interface: 63 | 1. Open the Dabarqus admin interface (typically at `http://localhost:6568/admin`). 64 | 2. Navigate to the "Memory Banks" section. 65 | 3. You should see your newly created memory bank (e.g., "MyNewRecipeBook") listed. 66 | 67 | ## File Structure 68 | 69 | - `app.py`: Main application file containing the Gradio interface 70 | - `templates/`: Directory containing prompt templates 71 | - `sample_prompt.md`: Sample prompt file for the chatbot -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-PythonSDK/app.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from dabarqus import barq 3 | from datetime import datetime 4 | import json 5 | import os 6 | import ollama 7 | 8 | # Initialize the Dabarqus SDK 9 | sdk = barq("http://localhost:6568") 10 | 11 | def check_dependencies(): 12 | errors = [] 13 | try: 14 | health = sdk.check_health() 15 | if health != "OK": 16 | errors.append("Dabarqus is not responding properly.") 17 | except Exception as e: 18 | errors.append(f"Dabarqus is not running or installed properly. Error: {str(e)}") 19 | return errors 20 | 21 | def display_error_message(errors): 22 | if errors: 23 | error_msg = "The following errors occurred:\n" + "\n".join(errors) 24 | gr.Warning(error_msg) 25 | return gr.update(visible=True), error_msg 26 | return gr.update(visible=False), "" 27 | 28 | def get_memory_banks(): 29 | try: 30 | memory_banks = sdk.get_memory_banks() 31 | return [bank['name'] for bank in memory_banks if bank.get('name')] 32 | except Exception as e: 33 | print(f"Error fetching memory banks: {e}") 34 | return ["Default"] 35 | 36 | def get_inference_models(): 37 | try: 38 | inference_info = sdk.get_inference_info() 39 | if inference_info: 40 | # Return a list of tuples: (alias, full_model_object) 41 | return [(item.get('alias', 'Unknown'), item) for item in inference_info] 42 | else: 43 | return [("No model running", None)] 44 | except Exception as e: 45 | print(f"Error fetching inference models: {e}") 46 | return [("Error fetching model", None)] 47 | 48 | def chat_function(message, history, memory_bank, model, query_limit, retrieval_prompt_template, full_prompt_template): 49 | 50 | # Convert the user's message to a retrieval prompt 51 | retrieval_prompt = convert_prompt_to_retrieval_prompt(message, model) 52 | 53 | # Retrieve data 54 | retrieved_data = sdk.query_semantic_search(retrieval_prompt, limit=int(query_limit), memory_bank=memory_bank) 55 | 56 | # Prepare the prompt for the LLM 57 | full_prompt = f"{full_prompt_template} : RAG_response {retrieved_data}, keywords: {retrieval_prompt}, original_prompt: {message}" 58 | 59 | # Use Ollama to generate a response 60 | response = "" 61 | stream = ollama.chat( 62 | model=model, 63 | messages=[{"role": "system", "content": "You are a helpful assistant."}, 64 | {"role": "user", "content": full_prompt}], 65 | stream=True, 66 | ) 67 | 68 | for chunk in stream: 69 | response += chunk['message']['content'] 70 | yield history + [("Human", message), ("AI", response)] 71 | 72 | def convert_prompt_to_retrieval_prompt(prompt, model="llama3"): 73 | response = ollama.chat(model=model, messages=[ 74 | { 75 | 'role': 'user', 76 | 'content': f"Take the user's prompt to create a prompt for a semantic database retriever. Only respond with a list of comma-separated keywords. DO NOT say anything before or afer the keywords.#Prompt:{prompt}", 77 | }, 78 | ]) 79 | return response 80 | 81 | def save_conversation(history): 82 | if not history: 83 | gr.Warning("No conversation to save.") 84 | return None, gr.update(visible=False) 85 | 86 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 87 | filename = f"conversation_{timestamp}.json" 88 | with open(filename, "w") as f: 89 | json.dump(history, f) 90 | 91 | gr.Info(f"Conversation saved as {filename}") 92 | return filename, gr.update(visible=True) 93 | 94 | def toggle_load_file(file, chatbot): 95 | if file is None: 96 | return gr.update(visible=True), chatbot 97 | 98 | try: 99 | with open(file.name, "r") as f: 100 | history = json.load(f) 101 | gr.Info("Conversation loaded successfully.") 102 | return gr.update(value=None, visible=False), history 103 | except json.JSONDecodeError: 104 | gr.Warning("Invalid JSON file. Please select a valid conversation file.") 105 | except Exception as e: 106 | gr.Warning(f"Error loading conversation: {str(e)}") 107 | 108 | return gr.update(value=None), chatbot 109 | 110 | def save_prompts(retrieval_prompt, full_prompt): 111 | prompts = { 112 | "retrieval_prompt": retrieval_prompt, 113 | "full_prompt": full_prompt 114 | } 115 | with open("custom_prompts.json", "w") as f: 116 | json.dump(prompts, f) 117 | gr.Info("Prompts saved successfully.") 118 | 119 | def load_prompts(): 120 | if os.path.exists("custom_prompts.json"): 121 | with open("custom_prompts.json", "r") as f: 122 | prompts = json.load(f) 123 | return prompts["retrieval_prompt"], prompts["full_prompt"] 124 | else: 125 | gr.Warning("No saved prompts found.") 126 | return None, None 127 | 128 | def enable_input(choice): 129 | return gr.update(interactive=bool(choice)), gr.update(interactive=bool(choice)) 130 | 131 | # Get available Ollama models 132 | def get_ollama_models(): 133 | try: 134 | models = ollama.list() 135 | return [model['name'] for model in models['models']] 136 | except Exception as e: 137 | print(f"Error fetching Ollama models: {e}") 138 | return ["llama3"] # Default model if fetching fails 139 | 140 | with gr.Blocks(title="dabarqus") as demo: 141 | memory_banks = get_memory_banks() 142 | ollama_models = get_ollama_models() 143 | 144 | with gr.Row(): 145 | with gr.Column(scale=3): 146 | gr.Markdown("") # Empty markdown to take up space 147 | with gr.Column(scale=2): 148 | with gr.Row(): 149 | save_button = gr.Button("Save", size="sm") 150 | load_button = gr.Button("Load", size="sm") 151 | file_output = gr.File(label="Saved Conversation", visible=False) 152 | file_input = gr.File(label="Load Conversation", file_types=[".json"], visible=False) 153 | 154 | with gr.Row(): 155 | memory_bank = gr.Dropdown( 156 | choices=memory_banks, 157 | label="Select Memory Bank", 158 | value=None, 159 | allow_custom_value=False, 160 | info="Choose a memory bank to query for relevant information." 161 | ) 162 | # Modify the model_selection Dropdown in your Gradio interface 163 | model_selection = gr.Dropdown( 164 | choices=ollama_models, 165 | label="Select Inference Model", 166 | value=ollama_models[0] if ollama_models else None, 167 | info="Select the Dabarqus inference model." 168 | ) 169 | 170 | query_limit = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Number of RAG results") 171 | 172 | chatbot = gr.Chatbot() 173 | with gr.Row(): 174 | with gr.Column(scale=4): 175 | msg = gr.Textbox( 176 | label="Type your message here", 177 | placeholder="Enter your question...", 178 | interactive=False, 179 | elem_classes="large-text-input" 180 | ) 181 | with gr.Column(scale=1): 182 | submit = gr.Button("Send", interactive=False) 183 | 184 | with gr.Accordion("Advanced Settings", open=False): 185 | retrieval_prompt = gr.Textbox( 186 | label="Retrieval Prompt", 187 | placeholder="Enter the retrieval prompt...", 188 | lines=3, 189 | value="Take the user's prompt to create a prompt for a semantic database retriever. Only respond with a list of comma-separated keywords. DO NOT say anything before or after the keywords. User prompt: {message}" 190 | ) 191 | 192 | prompt_template = gr.TextArea( 193 | label="Prompt Template", 194 | placeholder="Enter the prompt template...", 195 | value="Use these results from your recipe catalog to form your answer (include the file reference in your answer if you use one)" 196 | ) 197 | with gr.Row(): 198 | save_prompts_btn = gr.Button("Save Prompts") 199 | load_prompts_btn = gr.Button("Load Prompts") 200 | clear = gr.Button("Clear Chat") 201 | 202 | memory_bank.change(enable_input, inputs=[memory_bank], outputs=[msg, submit]) 203 | 204 | msg.submit( 205 | chat_function, 206 | inputs=[msg, chatbot, memory_bank, model_selection, query_limit, retrieval_prompt, prompt_template], 207 | outputs=[chatbot] 208 | ) 209 | submit.click( 210 | chat_function, 211 | inputs=[msg, chatbot, memory_bank, model_selection, query_limit, retrieval_prompt, prompt_template], 212 | outputs=[chatbot] 213 | ) 214 | clear.click(lambda: None, None, chatbot, queue=False) 215 | 216 | save_button.click( 217 | save_conversation, 218 | inputs=[chatbot], 219 | outputs=[file_output, file_output] 220 | ) 221 | 222 | load_button.click( 223 | toggle_load_file, 224 | inputs=[file_input, chatbot], 225 | outputs=[file_input, chatbot] 226 | ) 227 | 228 | if __name__ == "__main__": 229 | demo.launch() 230 | demo.load(lambda: display_error_message(check_dependencies()), outputs=[error_box]) 231 | 232 | 233 | # Styling 234 | demo.style( 235 | """ 236 | .large-text-input textarea { 237 | font-size: 16px !important; 238 | } 239 | .gradio-slider input[type="number"] { 240 | width: 80px; 241 | } 242 | #component-22 { 243 | margin-top: -20px; 244 | } 245 | #component-22 .gr-button { 246 | min-width: 60px; 247 | height: 30px; 248 | } 249 | .advanced-settings { 250 | border: 1px solid #e0e0e0; 251 | border-radius: 8px; 252 | padding: 15px; 253 | margin-top: 20px; 254 | } 255 | .advanced-settings .gr-form { 256 | border: none; 257 | padding: 0; 258 | } 259 | """ 260 | ) -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-PythonSDK/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | annotated-types==0.7.0 3 | anyio==4.6.0 4 | Brotli==1.1.0 5 | certifi==2024.8.30 6 | charset-normalizer==3.4.0 7 | click==8.1.7 8 | colorama==0.4.6 9 | dabarqus==1.1.9 10 | fastapi==0.115.0 11 | ffmpy==0.4.0 12 | filelock==3.16.1 13 | fsspec==2024.9.0 14 | gradio==5.0.0 15 | gradio_client==1.4.0 16 | h11==0.14.0 17 | httpcore==1.0.6 18 | httpx==0.27.2 19 | huggingface-hub==0.25.2 20 | idna==3.10 21 | inflate64==1.0.0 22 | Jinja2==3.1.4 23 | markdown-it-py==3.0.0 24 | MarkupSafe==2.1.5 25 | mdurl==0.1.2 26 | multivolumefile==0.2.3 27 | numpy==2.1.2 28 | ollama==0.3.3 29 | orjson==3.10.7 30 | packaging==24.1 31 | pandas==2.2.3 32 | pillow==10.4.0 33 | psutil==6.0.0 34 | py7zr==0.22.0 35 | pybcj==1.0.2 36 | pycryptodomex==3.21.0 37 | pydantic==2.9.2 38 | pydantic_core==2.23.4 39 | pydub==0.25.1 40 | Pygments==2.18.0 41 | pyppmd==1.1.0 42 | python-dateutil==2.9.0.post0 43 | python-multipart==0.0.12 44 | pytz==2024.2 45 | PyYAML==6.0.2 46 | pyzstd==0.16.2 47 | requests==2.32.3 48 | rich==13.9.2 49 | ruff==0.6.9 50 | semantic-version==2.10.0 51 | shellingham==1.5.4 52 | six==1.16.0 53 | sniffio==1.3.1 54 | starlette==0.38.6 55 | texttable==1.7.0 56 | tomlkit==0.12.0 57 | tqdm==4.66.5 58 | typer==0.12.5 59 | typing_extensions==4.12.2 60 | tzdata==2024.2 61 | urllib3==2.2.3 62 | uvicorn==0.31.1 63 | websockets==12.0 64 | -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-PythonSDK/sample_prompt.md: -------------------------------------------------------------------------------- 1 | Evaluate these responses from your RAG database {RAG_response} in response to this original prompt: {original_prompt}. You prompted it using these keywords: {keywords}. You have three options: 2 | 1. Seek more context 3 | Reprompt the RAG database with the same keywords, but asking for more results. Please say: REPROMPT 4 | 2. Reprompt RAG database 5 | Reprompt the RAG database with new keywords. Please say: NEW KEYWORDS: (the new keywords) 6 | 3. Accept answer from RAG database and use it to respond to the user. 7 | First, say ACCECPT. Then, on a new line, Please respond to the user's original prompt using the context you gathered from the RAG database -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-PythonSDK/templates/general_template.md: -------------------------------------------------------------------------------- 1 | Use these results from your knowledge base to answer this prompt. you are a bot made to help users with access to a library of general information. Use the information provided to you (in JSON) to help answer the user. DO NOT mention the distance, and you MUST utilize the information from your knowledge base. -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-PythonSDK/templates/recipe_template.md: -------------------------------------------------------------------------------- 1 | Use these results from your recipe catalog to form your answer (include the file reference in your answer if you use one). DO NOT mention the distance, and ONLY use the recipes you recieve from the catalog. -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-RESTAPI/.gitignore: -------------------------------------------------------------------------------- 1 | # Python virtual environment 2 | venv/ 3 | 4 | # Python cache files 5 | __pycache__/ 6 | *.pyc 7 | *.pyo 8 | *.pyd 9 | 10 | # Retrieval results 11 | retrievals/ 12 | 13 | # IDE-specific files (e.g., for VSCode) 14 | .vscode/ 15 | 16 | # Operating system files 17 | .DS_Store 18 | Thumbs.db 19 | 20 | # Jupyter Notebook checkpoints 21 | .ipynb_checkpoints/ 22 | 23 | # Logs 24 | *.log 25 | 26 | # Environment variables 27 | .env 28 | 29 | # Build directories 30 | build/ 31 | dist/ 32 | 33 | # Temporary files 34 | *.tmp 35 | *.bak 36 | *.swp 37 | 38 | # Conversation logs 39 | conversation* 40 | -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-RESTAPI/README.md: -------------------------------------------------------------------------------- 1 | # Dabarqus Python Chatbot UI Example: REST API 2 | 3 | This is an example chatbot program using Dabarqus via the REST API to chat with your PDFs. It using Gradio for the frontend UI, and Ollama to provide the LLMs. 4 | 5 | There is another version of this demo coming soon that uses the [native Dabarqus Python SDK](https://pypi.org/project/dabarqus/) and Dabarqus's built-in inference engine. 6 | 7 | ## Features 8 | 9 | - Interactive chat interface 10 | - Memory bank selection 11 | - Integration with Dabarqus API for semantic search 12 | - Powered by Gradio for easy web deployment 13 | 14 | ## Prerequisites 15 | 16 | - Python 3.8+ 17 | - Dabarqus server running and accessible 18 | - [Ollama](https://ollama.com/download) 19 | 20 | ## Installation 21 | ### Dabarqus Service 22 | Important: This chatbot requires Dabarqus to be installed and running on your machine. Before using this chatbot, please ensure that you have: 23 | 24 | - Downloaded and installed Dabarqus 25 | - Started the Dabarqus service on your machine 26 | 27 | The chatbot communicates with the Dabarqus service via its API, so having Dabarqus running is essential for the chatbot to function correctly. 28 | Once Dabarqus is set up and running, you can proceed with using this chatbot. For more information on how to start and manage the Dabarqus service, please refer to the [Dabarqus quick start](https://github.com/electricpipelines/barq?tab=readme-ov-file#quick-start). 29 | 30 | ### Chatbot installation 31 | 32 | 1. Clone the repository: 33 | `git clone https://github.com/electricpipelines/barq.git` 34 | `cd DabarqusChatbotUI/examples/PythonPDFChatbot-RESTAPI` 35 | 36 | 2. Create a virtual environment (**optional but recommended**): 37 | `python -m venv venv` 38 | `source venv/bin/activate # On Windows, use 'venv\Scripts\activate'` 39 | 40 | 3. Install the required dependencies: 41 | `pip install -r requirements.txt` 42 | 43 | ### Ollama 44 | If you have not already download and set 45 | 1. Follow the installion instructions on the [Ollama](https://ollama.com/download) 46 | 47 | 2. After installation, install at least LLM: 48 | `ollama pull llama3` 49 | 50 | ## Running the Application 51 | 52 | 1. Ensure your Dabarqus server is running and accessible. 53 | 2. Start the Gradio application: 54 | `python app.py` 55 | 3. The application will start and provide a local URL (usually http://127.0.0.1:7860). 56 | 4. Open this URL in your web browser to access the chat interface. 57 | 58 | ### Memory Banks 59 | You need a **memory bank** to chat with your PDFs. You have a few options: 60 | - Run the CreatingAMemoryBank example. 61 | - Create a memory bank through the admin interface: 62 | 1. Open the Dabarqus admin interface (typically at `http://localhost:6568/admin`). 63 | 2. Navigate to the "Memory Banks" section. 64 | 3. You should see your newly created memory bank (e.g., "MyNewRecipeBook") listed. 65 | 66 | 67 | ## File Structure 68 | 69 | - `app.py`: Main application file containing the Gradio interface 70 | - `retriever.py`: Contains functions for interacting with the Dabarqus API 71 | - `templates/`: Directory containing prompt templates 72 | - `sample_prompt.md`: Sample prompt file for the chatbot -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-RESTAPI/app.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | import ollama 3 | from retriever import retrieve_data, convert_prompt_to_retrieval_prompt 4 | import requests 5 | import json 6 | import os 7 | from datetime import datetime 8 | 9 | def check_dependencies(): 10 | errors = [] 11 | 12 | # Check Ollama 13 | try: 14 | ollama.list() 15 | except Exception as e: 16 | errors.append(f"Ollama is not running or installed properly. Error: {str(e)}") 17 | 18 | # Check Dabarqus 19 | try: 20 | response = requests.get("http://localhost:6568/health") 21 | if response.status_code != 200: 22 | errors.append("Dabarqus is not responding properly.") 23 | except requests.RequestException: 24 | errors.append("Dabarqus is not running or installed properly.") 25 | 26 | return errors 27 | 28 | def display_error_message(errors): 29 | if errors: 30 | error_msg = "The following errors occurred:\n" + "\n".join(errors) 31 | gr.Warning(error_msg) 32 | return gr.update(visible=True), error_msg 33 | return gr.update(visible=False), "" 34 | 35 | with gr.Blocks(title="dabarqus") as demo: 36 | error_box = gr.Textbox(visible=False, label="Errors") 37 | 38 | # Get available Ollama models 39 | def get_ollama_models(): 40 | try: 41 | models = ollama.list() 42 | return [model['name'] for model in models['models']] 43 | except Exception as e: 44 | print(f"Error fetching Ollama models: {e}") 45 | return ["llama3"] # Default model if fetching fails 46 | 47 | 48 | def get_memory_banks(): 49 | url = "http://localhost:6568/api/silk/memorybanks" 50 | try: 51 | response = requests.get(url) 52 | response.raise_for_status() 53 | memory_banks = response.json() 54 | 55 | return [bank.get('name') for bank in memory_banks['SilkMemoryBanks'] if bank.get('name')] 56 | except requests.exceptions.RequestException as e: 57 | print(f"Error fetching memory banks: {e}") 58 | return ["Default"] # Return a default option if the API call fails 59 | 60 | 61 | def chat_function(message, history, memory_bank, model, query_limit, retrieval_prompt_template, full_prompt_template): 62 | # Convert the user's message to a retrieval prompt 63 | retrieval_prompt = convert_prompt_to_retrieval_prompt(message, retrieval_prompt_template) 64 | 65 | # Retrieve data 66 | retrieved_data = retrieve_data(retrieval_prompt, memory_bank, int(query_limit)) 67 | 68 | # Prepare the prompt for the LLM 69 | full_prompt = f"{full_prompt_template} : RAG_response {retrieved_data}, keywords: {retrieval_prompt}, original_prompt: {message}" 70 | 71 | # Use Ollama to generate a response 72 | response = "" 73 | stream = ollama.chat( 74 | model=model, 75 | messages=[{"role": "system", "content": "You are a helpful assistant."}, 76 | {"role": "user", "content": full_prompt}], 77 | stream=True, 78 | ) 79 | 80 | for chunk in stream: 81 | response += chunk['message']['content'] 82 | yield history + [("Human", message), ("AI", response)] 83 | 84 | 85 | def save_conversation(history): 86 | if not history: 87 | gr.Warning("No conversation to save.") 88 | return None, gr.update(visible=False) 89 | 90 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 91 | filename = f"conversation_{timestamp}.json" 92 | with open(filename, "w") as f: 93 | json.dump(history, f) 94 | 95 | gr.Info(f"Conversation saved as {filename}") 96 | return filename, gr.update(visible=True) 97 | 98 | def toggle_load_file(file, chatbot): 99 | if file is None: 100 | # If no file is selected, just toggle visibility 101 | return gr.update(visible=True), chatbot 102 | 103 | try: 104 | with open(file.name, "r") as f: 105 | history = json.load(f) 106 | gr.Info("Conversation loaded successfully.") 107 | return gr.update(value=None, visible=False), history 108 | except json.JSONDecodeError: 109 | gr.Warning("Invalid JSON file. Please select a valid conversation file.") 110 | except Exception as e: 111 | gr.Warning(f"Error loading conversation: {str(e)}") 112 | 113 | return gr.update(value=None), chatbot 114 | def show_load_file(): 115 | return gr.update(visible=True) 116 | 117 | def hide_load_file(): 118 | return gr.update(visible=False) 119 | 120 | 121 | def save_prompts(retrieval_prompt, full_prompt): 122 | prompts = { 123 | "retrieval_prompt": retrieval_prompt, 124 | "full_prompt": full_prompt 125 | } 126 | with open("custom_prompts.json", "w") as f: 127 | json.dump(prompts, f) 128 | gr.Info("Prompts saved successfully.") 129 | 130 | def load_prompts(): 131 | if os.path.exists("custom_prompts.json"): 132 | with open("custom_prompts.json", "r") as f: 133 | prompts = json.load(f) 134 | return prompts["retrieval_prompt"], prompts["full_prompt"] 135 | else: 136 | gr.Warning("No saved prompts found.") 137 | return None, None 138 | 139 | 140 | 141 | def enable_input(choice): 142 | return gr.update(interactive=bool(choice)), gr.update(interactive=bool(choice)) 143 | 144 | 145 | 146 | with gr.Blocks(title="dabarqus") as demo: 147 | memory_banks = get_memory_banks() 148 | ollama_models = get_ollama_models() 149 | 150 | with gr.Row(): 151 | with gr.Column(scale=3): 152 | gr.Markdown("") # Empty markdown to take up space 153 | with gr.Column(scale=2): 154 | with gr.Row(): 155 | save_button = gr.Button("Save", size="sm") 156 | load_button = gr.Button("Load", size="sm") 157 | file_output = gr.File(label="Saved Conversation", visible=False) 158 | file_input = gr.File(label="Load Conversation", file_types=[".json"], visible=False) 159 | 160 | with gr.Row(): 161 | memory_bank = gr.Dropdown( 162 | choices=memory_banks, 163 | label="Select Memory Bank", 164 | value=None, 165 | allow_custom_value=False, 166 | info="Choose a memory bank to query for relevant information." 167 | ) 168 | model_selection = gr.Dropdown( 169 | choices=ollama_models, 170 | label="Select Inference Model", 171 | value=ollama_models[0] if ollama_models else None, 172 | info="Select the Ollama model for inference. Make sure Ollama is installed and running." 173 | ) 174 | 175 | 176 | 177 | query_limit = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Number of RAG results") 178 | 179 | chatbot = gr.Chatbot() 180 | with gr.Row(): 181 | with gr.Column(scale=4): 182 | msg = gr.Textbox( 183 | label="Type your message here", 184 | placeholder="Enter your question...", 185 | interactive=False, 186 | elem_classes="large-text-input" 187 | ) 188 | with gr.Column(scale=1): 189 | submit = gr.Button("Send", interactive=False) 190 | 191 | with gr.Accordion("Advanced Settings", open=False): 192 | retrieval_prompt = gr.Textbox( 193 | label="Retrieval Prompt", 194 | placeholder="Enter the retrieval prompt...", 195 | lines=3, 196 | value="Take the user's prompt to create a prompt for a semantic database retriever. Only respond with a list of comma-separated keywords. DO NOT say anything before or after the keywords." 197 | ) 198 | 199 | prompt_template = gr.TextArea( 200 | label="Prompt Template", 201 | placeholder="Enter the prompt template...", 202 | value="Use these results from your recipe catalog to form your answer (include the file reference in your answer if you use one)" 203 | ) 204 | with gr.Row(): 205 | save_prompts_btn = gr.Button("Save Prompts") 206 | load_prompts_btn = gr.Button("Load Prompts") 207 | clear = gr.Button("Clear Chat") 208 | 209 | memory_bank.change(enable_input, inputs=[memory_bank], outputs=[msg, submit]) 210 | 211 | msg.submit( 212 | chat_function, 213 | inputs=[msg, chatbot, memory_bank, model_selection, query_limit, retrieval_prompt, prompt_template], 214 | outputs=[chatbot] 215 | ) 216 | submit.click( 217 | chat_function, 218 | inputs=[msg, chatbot, memory_bank, model_selection, query_limit, retrieval_prompt, prompt_template], 219 | outputs=[chatbot] 220 | ) 221 | clear.click(lambda: None, None, chatbot, queue=False) 222 | 223 | save_button.click( 224 | save_conversation, 225 | inputs=[chatbot], 226 | outputs=[file_output, file_output] 227 | ) 228 | 229 | load_button.click( 230 | toggle_load_file, 231 | inputs=[file_input, chatbot], 232 | outputs=[file_input, chatbot] 233 | ) 234 | # Launch the app 235 | if __name__ == "__main__": 236 | 237 | demo.launch() 238 | demo.load(lambda: display_error_message(check_dependencies()), outputs=[error_box]) 239 | 240 | 241 | 242 | # Styling 243 | demo.style( 244 | """ 245 | .large-text-input textarea { 246 | font-size: 16px !important; 247 | } 248 | .gradio-slider input[type="number"] { 249 | width: 80px; 250 | } 251 | /* Add these new styles */ 252 | #component-22 { /* Adjust this ID to match your save/load row */ 253 | margin-top: -20px; /* Reduce space above the save/load row */ 254 | } 255 | #component-22 .gr-button { /* Adjust this ID to match your save/load buttons */ 256 | min-width: 60px; /* Make buttons smaller */ 257 | height: 30px; /* Make buttons shorter */ 258 | } 259 | 260 | .advanced-settings { 261 | border: 1px solid #e0e0e0; 262 | border-radius: 8px; 263 | padding: 15px; 264 | margin-top: 20px; 265 | } 266 | 267 | .advanced-settings .gr-form { 268 | border: none; 269 | padding: 0; 270 | } 271 | """ 272 | ) -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-RESTAPI/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | annotated-types==0.7.0 3 | anyio==4.4.0 4 | certifi==2024.7.4 5 | charset-normalizer==3.3.2 6 | click==8.1.7 7 | colorama==0.4.6 8 | contourpy==1.2.1 9 | cycler==0.12.1 10 | exceptiongroup==1.2.2 11 | fastapi==0.112.0 12 | ffmpy==0.4.0 13 | filelock==3.15.4 14 | fonttools==4.53.1 15 | fsspec==2024.6.1 16 | gradio==4.41.0 17 | gradio_client==1.3.0 18 | h11==0.14.0 19 | httpcore==1.0.5 20 | httpx==0.27.0 21 | huggingface-hub==0.24.5 22 | idna==3.7 23 | importlib_resources==6.4.0 24 | Jinja2==3.1.4 25 | kiwisolver==1.4.5 26 | markdown-it-py==3.0.0 27 | MarkupSafe==2.1.5 28 | matplotlib==3.9.1.post1 29 | mdurl==0.1.2 30 | numpy==2.0.1 31 | ollama==0.3.1 32 | orjson==3.10.7 33 | packaging==24.1 34 | pandas==2.2.2 35 | pillow==10.4.0 36 | pydantic==2.8.2 37 | pydantic_core==2.20.1 38 | pydub==0.25.1 39 | Pygments==2.18.0 40 | pyparsing==3.1.2 41 | python-dateutil==2.9.0.post0 42 | python-multipart==0.0.9 43 | pytz==2024.1 44 | PyYAML==6.0.2 45 | requests==2.32.3 46 | rich==13.7.1 47 | ruff==0.5.7 48 | semantic-version==2.10.0 49 | shellingham==1.5.4 50 | six==1.16.0 51 | sniffio==1.3.1 52 | starlette==0.37.2 53 | tomlkit==0.12.0 54 | tqdm==4.66.5 55 | typer==0.12.3 56 | typing_extensions==4.12.2 57 | tzdata==2024.1 58 | urllib3==2.2.2 59 | uvicorn==0.30.5 60 | websockets==12.0 61 | -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-RESTAPI/retriever.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import time 3 | import os 4 | import threading 5 | import sys 6 | import itertools 7 | import requests 8 | import ollama 9 | from colorama import Fore, Back, Style 10 | 11 | def serialize_response(json_string, directory='./retrievals/'): 12 | # Ensure the directory exists 13 | if not os.path.exists(directory): 14 | os.makedirs(directory) 15 | 16 | # Generate a unique filename using timestamp and UUID 17 | unique_filename = f"{int(time.time())}_{uuid.uuid4()}.json" 18 | file_path = os.path.join(directory, unique_filename) 19 | 20 | # Write the JSON string to the file 21 | with open(file_path, 'w') as file: 22 | file.write(json_string) 23 | 24 | # Provide the link to the file 25 | print(Fore.LIGHTBLUE_EX + f"Retrieved info has been save to {file_path}" + Style.RESET_ALL) 26 | print("___") 27 | print() 28 | return file_path 29 | 30 | def convert_prompt_to_retrieval_prompt(prompt, prompt_template, model="llama3"): 31 | # llm = Ollama( 32 | # model=model, 33 | # temperature=0 34 | # ) 35 | # response = llm.invoke(f"Take the user's prompt to create a prompt for a semantic database retriever. Only respond with a list of comma-separated keywords. DO NOT say anything before or afer the keywords.#Prompt:{prompt}") 36 | response = ollama.chat(model=model, messages=[ 37 | { 38 | 'role': 'user', 39 | 'content': f"Take the user's prompt to create a prompt for a semantic database retriever. Only respond with a list of comma-separated keywords. DO NOT say anything before or afer the keywords.#Prompt:{prompt}", 40 | }, 41 | ]) 42 | return response 43 | 44 | def display_spinner_and_wait_message(stop_event, message=""): 45 | spinner = itertools.cycle(['-', '\\', '|', '/']) 46 | while not stop_event.is_set(): # Check the stop event 47 | sys.stdout.write('\r' + Fore.YELLOW + message + next(spinner) + Fore.RESET) 48 | sys.stdout.flush() 49 | time.sleep(0.1) 50 | # Clear the spinner line when done 51 | sys.stdout.write('\r \r') 52 | sys.stdout.flush() 53 | 54 | 55 | def retrieve_data(prompt, memory_bank, query_limit=10): 56 | stop_event = threading.Event() 57 | t = threading.Thread(target=display_spinner_and_wait_message, args=(stop_event, "Retrieving info from database...")) 58 | t.start() 59 | 60 | url = "http://localhost:6568/api/silk/query" 61 | params = { 62 | "q": prompt, 63 | "limit": query_limit, 64 | "memorybank": memory_bank # Changed from "memoryBank" to "memorybank" 65 | } 66 | 67 | try: 68 | response = requests.get(url, params=params) 69 | response.raise_for_status() # Raises an HTTPError for bad responses 70 | serialize_response(response.text) 71 | return response.json() 72 | except requests.exceptions.RequestException as e: 73 | print(f"An error occurred: {e}") 74 | return None 75 | finally: 76 | stop_event.set() # Signal the spinner thread to stop 77 | t.join() # Wait for the spinner thread to finish -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-RESTAPI/sample_prompt.md: -------------------------------------------------------------------------------- 1 | Evaluate these responses from your RAG database {RAG_response} in response to this original prompt: {original_prompt}. You prompted it using these keywords: {keywords}. You have three options: 2 | 1. Seek more context 3 | Reprompt the RAG database with the same keywords, but asking for more results. Please say: REPROMPT 4 | 2. Reprompt RAG database 5 | Reprompt the RAG database with new keywords. Please say: NEW KEYWORDS: (the new keywords) 6 | 3. Accept answer from RAG database and use it to respond to the user. 7 | First, say ACCECPT. Then, on a new line, Please respond to the user's original prompt using the context you gathered from the RAG database -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-RESTAPI/templates/general_template.md: -------------------------------------------------------------------------------- 1 | Use these results from your knowledge base to answer this prompt. you are a bot made to help users with access to a library of general information. Use the information provided to you (in JSON) to help answer the user. DO NOT mention the distance, and you MUST utilize the information from your knowledge base. -------------------------------------------------------------------------------- /examples/PythonPDFChatbot-RESTAPI/templates/recipe_template.md: -------------------------------------------------------------------------------- 1 | Use these results from your recipe catalog to form your answer (include the file reference in your answer if you use one). DO NOT mention the distance, and ONLY use the recipes you recieve from the catalog. --------------------------------------------------------------------------------