├── .github
    └── workflows
    │   ├── build_and_test_mlx.yml
    │   ├── build_and_test_vectura.yml
    │   └── update-readme.yml
├── .gitignore
├── .swiftpm
    └── xcode
    │   ├── package.xcworkspace
    │       └── contents.xcworkspacedata
    │   └── xcshareddata
    │       └── xcschemes
    │           ├── VecturaKit-Package.xcscheme
    │           ├── VecturaKit.xcscheme
    │           ├── VecturaKitTests.xcscheme
    │           ├── VecturaMLXKit.xcscheme
    │           ├── VecturaMLXKitTests.xcscheme
    │           ├── vectura-cli.xcscheme
    │           └── vectura-mlx-cli.xcscheme
├── .vscode
    ├── launch.json
    └── settings.json
├── LICENSE
├── Package.resolved
├── Package.swift
├── README.md
├── Sources
    ├── VecturaCLI
    │   └── VecturaCLI.swift
    ├── VecturaKit
    │   ├── BM25Index.swift
    │   ├── FileStorageProvider.swift
    │   ├── VecturaConfig.swift
    │   ├── VecturaDocument.swift
    │   ├── VecturaError.swift
    │   ├── VecturaKit.swift
    │   ├── VecturaModelSource.swift
    │   ├── VecturaProtocol.swift
    │   ├── VecturaSearchResult.swift
    │   └── VecturaStorage.swift
    ├── VecturaMLXCLI
    │   └── VecturaMLXCLI.swift
    └── VecturaMLXKit
    │   ├── MLXEmbedder.swift
    │   └── VecturaMLXKit.swift
├── Tests
    ├── VecturaKitTests
    │   └── VecturaKitTests.swift
    └── VecturaMLXKitTests
    │   └── VecturaMLXKitTests.swift
└── scripts
    └── update_readme.py


/.github/workflows/build_and_test_mlx.yml:
--------------------------------------------------------------------------------
 1 | name: "VecturaMLX CI"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   build-and-test:
11 |     runs-on: macos-15
12 |     env:
13 |       DEVELOPER_DIR: "/Applications/Xcode_16.1.app/Contents/Developer"
14 |     steps:
15 |       - name: Checkout code
16 |         uses: actions/checkout@v4
17 |       - name: Setup Swift Toolchain
18 |         uses: swift-actions/setup-swift@v2
19 |         with:
20 |           swift-version: "6.0"
21 |       - name: Build target VecturaMLXKit
22 |         run: xcodebuild -scheme "VecturaMLXKit" build -destination 'platform=macOS'
23 |       - name: Run tests
24 |         run: xcodebuild -scheme "VecturaMLXKitTests" test -destination 'platform=macOS'


--------------------------------------------------------------------------------
/.github/workflows/build_and_test_vectura.yml:
--------------------------------------------------------------------------------
 1 | name: "VecturaKit CI"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   build-and-test:
11 |     runs-on: macos-15
12 |     env:
13 |       DEVELOPER_DIR: "/Applications/Xcode_16.1.app/Contents/Developer"
14 |     strategy:
15 |       matrix:
16 |         target: ["VecturaKit", "vectura-cli"]
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |       - name: Build ${{ matrix.target }}
20 |         run: swift build --product "${{ matrix.target }}"
21 |       - name: Test ${{ matrix.target }}
22 |         run: swift test --filter "${{ matrix.target }}Tests"


--------------------------------------------------------------------------------
/.github/workflows/update-readme.yml:
--------------------------------------------------------------------------------
 1 | name: Update README
 2 | 
 3 | on:
 4 |   push:
 5 |     # Run on pushes to main (or any branch you prefer)
 6 |     branches: [main]
 7 | 
 8 | permissions:
 9 |   contents: write
10 |   pull-requests: write
11 | 
12 | jobs:
13 |   update-readme:
14 |     # Skip this job if the commit message indicates a merge from docs/update-readme.
15 |     # Adjust the string in the contains() check if your merge commit message is different.
16 |     if: "!contains(github.event.head_commit.message, 'docs/update-readme')"
17 |     runs-on: ubuntu-latest
18 | 
19 |     steps:
20 |       # 1. Check out the repository
21 |       - name: Checkout repository
22 |         uses: actions/checkout@v4
23 | 
24 |       # 2. Set up Python (make sure you choose a suitable version)
25 |       - name: Set up Python
26 |         uses: actions/setup-python@v5
27 |         with:
28 |           python-version: '3.x'
29 | 
30 |       # 3. Install Gemini package
31 |       - name: Install Gemini package
32 |         run: pip install google-genai
33 | 
34 |       # 4. Run the Python script that calls GeminiAI
35 |       - name: Run README updater script
36 |         env:
37 |           GEMINI_API_KEY: "${{ secrets.GEMINI_API_KEY }}"
38 |         run: python3 scripts/update_readme.py
39 | 
40 |       # 5. Open a Pull Request using an action (this one automates creating a PR)
41 |       - name: Create Pull Request
42 |         uses: peter-evans/create-pull-request@v4
43 |         with:
44 |           token: ${{ secrets.GITHUB_TOKEN }}
45 |           commit-message: "docs: update README.md based on codebase"
46 |           title: "docs: update README.md"
47 |           body: |
48 |             This PR updates the README.md file based on the current codebase using Gemini AI.
49 |             
50 |             - Automatically generated by GitHub Actions
51 |           branch: docs/update-readme
52 |           base: main
53 |           delete-branch: true
54 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Xcode
 2 | #
 3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
 4 | 
 5 | ## User settings
 6 | xcuserdata/
 7 | 
 8 | ## Obj-C/Swift specific
 9 | *.hmap
10 | 
11 | ## App packaging
12 | *.ipa
13 | *.dSYM.zip
14 | *.dSYM
15 | 
16 | ## Playgrounds
17 | timeline.xctimeline
18 | playground.xcworkspace
19 | 
20 | # Swift Package Manager
21 | #
22 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
23 | # Packages/
24 | # Package.pins
25 | # Package.resolved
26 | # *.xcodeproj
27 | #
28 | # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
29 | # hence it is not needed unless you have added a package configuration file to your project
30 | # .swiftpm
31 | 
32 | .build/
33 | 
34 | # CocoaPods
35 | #
36 | # We recommend against adding the Pods directory to your .gitignore. However
37 | # you should judge for yourself, the pros and cons are mentioned at:
38 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
39 | #
40 | # Pods/
41 | #
42 | # Add this line if you want to avoid checking in source code from the Xcode workspace
43 | # *.xcworkspace
44 | 
45 | # Carthage
46 | #
47 | # Add this line if you want to avoid checking in source code from Carthage dependencies.
48 | # Carthage/Checkouts
49 | 
50 | Carthage/Build/
51 | 
52 | # fastlane
53 | #
54 | # It is recommended to not store the screenshots in the git repo.
55 | # Instead, use fastlane to re-generate the screenshots whenever they are needed.
56 | # For more information about the recommended setup visit:
57 | # https://docs.fastlane.tools/best-practices/source-control/#source-control
58 | 
59 | fastlane/report.xml
60 | fastlane/Preview.html
61 | fastlane/screenshots/**/*.png
62 | fastlane/test_output
63 | 


--------------------------------------------------------------------------------
/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <Workspace
3 |    version = "1.0">
4 |    <FileRef
5 |       location = "self:">
6 |    </FileRef>
7 | </Workspace>
8 | 


--------------------------------------------------------------------------------
/.swiftpm/xcode/xcshareddata/xcschemes/VecturaKit-Package.xcscheme:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <Scheme
  3 |    LastUpgradeVersion = "1620"
  4 |    version = "1.7">
  5 |    <BuildAction
  6 |       parallelizeBuildables = "YES"
  7 |       buildImplicitDependencies = "YES"
  8 |       buildArchitectures = "Automatic">
  9 |       <BuildActionEntries>
 10 |          <BuildActionEntry
 11 |             buildForTesting = "YES"
 12 |             buildForRunning = "YES"
 13 |             buildForProfiling = "YES"
 14 |             buildForArchiving = "YES"
 15 |             buildForAnalyzing = "YES">
 16 |             <BuildableReference
 17 |                BuildableIdentifier = "primary"
 18 |                BlueprintIdentifier = "VecturaKit"
 19 |                BuildableName = "VecturaKit"
 20 |                BlueprintName = "VecturaKit"
 21 |                ReferencedContainer = "container:">
 22 |             </BuildableReference>
 23 |          </BuildActionEntry>
 24 |          <BuildActionEntry
 25 |             buildForTesting = "YES"
 26 |             buildForRunning = "YES"
 27 |             buildForProfiling = "YES"
 28 |             buildForArchiving = "YES"
 29 |             buildForAnalyzing = "YES">
 30 |             <BuildableReference
 31 |                BuildableIdentifier = "primary"
 32 |                BlueprintIdentifier = "VecturaMLXKit"
 33 |                BuildableName = "VecturaMLXKit"
 34 |                BlueprintName = "VecturaMLXKit"
 35 |                ReferencedContainer = "container:">
 36 |             </BuildableReference>
 37 |          </BuildActionEntry>
 38 |          <BuildActionEntry
 39 |             buildForTesting = "YES"
 40 |             buildForRunning = "YES"
 41 |             buildForProfiling = "YES"
 42 |             buildForArchiving = "YES"
 43 |             buildForAnalyzing = "YES">
 44 |             <BuildableReference
 45 |                BuildableIdentifier = "primary"
 46 |                BlueprintIdentifier = "vectura-cli"
 47 |                BuildableName = "vectura-cli"
 48 |                BlueprintName = "vectura-cli"
 49 |                ReferencedContainer = "container:">
 50 |             </BuildableReference>
 51 |          </BuildActionEntry>
 52 |          <BuildActionEntry
 53 |             buildForTesting = "YES"
 54 |             buildForRunning = "YES"
 55 |             buildForProfiling = "YES"
 56 |             buildForArchiving = "YES"
 57 |             buildForAnalyzing = "YES">
 58 |             <BuildableReference
 59 |                BuildableIdentifier = "primary"
 60 |                BlueprintIdentifier = "vectura-mlx-cli"
 61 |                BuildableName = "vectura-mlx-cli"
 62 |                BlueprintName = "vectura-mlx-cli"
 63 |                ReferencedContainer = "container:">
 64 |             </BuildableReference>
 65 |          </BuildActionEntry>
 66 |       </BuildActionEntries>
 67 |    </BuildAction>
 68 |    <TestAction
 69 |       buildConfiguration = "Debug"
 70 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
 71 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
 72 |       shouldUseLaunchSchemeArgsEnv = "YES"
 73 |       shouldAutocreateTestPlan = "YES">
 74 |       <Testables>
 75 |          <TestableReference
 76 |             skipped = "NO">
 77 |             <BuildableReference
 78 |                BuildableIdentifier = "primary"
 79 |                BlueprintIdentifier = "VecturaKitTests"
 80 |                BuildableName = "VecturaKitTests"
 81 |                BlueprintName = "VecturaKitTests"
 82 |                ReferencedContainer = "container:">
 83 |             </BuildableReference>
 84 |          </TestableReference>
 85 |          <TestableReference
 86 |             skipped = "NO">
 87 |             <BuildableReference
 88 |                BuildableIdentifier = "primary"
 89 |                BlueprintIdentifier = "VecturaMLXKitTests"
 90 |                BuildableName = "VecturaMLXKitTests"
 91 |                BlueprintName = "VecturaMLXKitTests"
 92 |                ReferencedContainer = "container:">
 93 |             </BuildableReference>
 94 |          </TestableReference>
 95 |       </Testables>
 96 |    </TestAction>
 97 |    <LaunchAction
 98 |       buildConfiguration = "Debug"
 99 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
100 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
101 |       launchStyle = "0"
102 |       useCustomWorkingDirectory = "NO"
103 |       ignoresPersistentStateOnLaunch = "NO"
104 |       debugDocumentVersioning = "YES"
105 |       debugServiceExtension = "internal"
106 |       allowLocationSimulation = "YES">
107 |       <MacroExpansion>
108 |          <BuildableReference
109 |             BuildableIdentifier = "primary"
110 |             BlueprintIdentifier = "vectura-cli"
111 |             BuildableName = "vectura-cli"
112 |             BlueprintName = "vectura-cli"
113 |             ReferencedContainer = "container:">
114 |          </BuildableReference>
115 |       </MacroExpansion>
116 |    </LaunchAction>
117 |    <ProfileAction
118 |       buildConfiguration = "Release"
119 |       shouldUseLaunchSchemeArgsEnv = "YES"
120 |       savedToolIdentifier = ""
121 |       useCustomWorkingDirectory = "NO"
122 |       debugDocumentVersioning = "YES">
123 |       <MacroExpansion>
124 |          <BuildableReference
125 |             BuildableIdentifier = "primary"
126 |             BlueprintIdentifier = "vectura-mlx-cli"
127 |             BuildableName = "vectura-mlx-cli"
128 |             BlueprintName = "vectura-mlx-cli"
129 |             ReferencedContainer = "container:">
130 |          </BuildableReference>
131 |       </MacroExpansion>
132 |    </ProfileAction>
133 |    <AnalyzeAction
134 |       buildConfiguration = "Debug">
135 |    </AnalyzeAction>
136 |    <ArchiveAction
137 |       buildConfiguration = "Release"
138 |       revealArchiveInOrganizer = "YES">
139 |    </ArchiveAction>
140 | </Scheme>
141 | 


--------------------------------------------------------------------------------
/.swiftpm/xcode/xcshareddata/xcschemes/VecturaKit.xcscheme:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Scheme
 3 |    LastUpgradeVersion = "1620"
 4 |    version = "1.7">
 5 |    <BuildAction
 6 |       parallelizeBuildables = "YES"
 7 |       buildImplicitDependencies = "YES"
 8 |       buildArchitectures = "Automatic">
 9 |       <BuildActionEntries>
10 |          <BuildActionEntry
11 |             buildForTesting = "YES"
12 |             buildForRunning = "YES"
13 |             buildForProfiling = "YES"
14 |             buildForArchiving = "YES"
15 |             buildForAnalyzing = "YES">
16 |             <BuildableReference
17 |                BuildableIdentifier = "primary"
18 |                BlueprintIdentifier = "VecturaKit"
19 |                BuildableName = "VecturaKit"
20 |                BlueprintName = "VecturaKit"
21 |                ReferencedContainer = "container:">
22 |             </BuildableReference>
23 |          </BuildActionEntry>
24 |       </BuildActionEntries>
25 |    </BuildAction>
26 |    <TestAction
27 |       buildConfiguration = "Debug"
28 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
29 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
30 |       shouldUseLaunchSchemeArgsEnv = "YES"
31 |       shouldAutocreateTestPlan = "YES">
32 |       <Testables>
33 |          <TestableReference
34 |             skipped = "NO">
35 |             <BuildableReference
36 |                BuildableIdentifier = "primary"
37 |                BlueprintIdentifier = "VecturaKitTests"
38 |                BuildableName = "VecturaKitTests"
39 |                BlueprintName = "VecturaKitTests"
40 |                ReferencedContainer = "container:">
41 |             </BuildableReference>
42 |          </TestableReference>
43 |       </Testables>
44 |    </TestAction>
45 |    <LaunchAction
46 |       buildConfiguration = "Debug"
47 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
48 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
49 |       launchStyle = "0"
50 |       useCustomWorkingDirectory = "NO"
51 |       ignoresPersistentStateOnLaunch = "NO"
52 |       debugDocumentVersioning = "YES"
53 |       debugServiceExtension = "internal"
54 |       allowLocationSimulation = "YES">
55 |    </LaunchAction>
56 |    <ProfileAction
57 |       buildConfiguration = "Release"
58 |       shouldUseLaunchSchemeArgsEnv = "YES"
59 |       savedToolIdentifier = ""
60 |       useCustomWorkingDirectory = "NO"
61 |       debugDocumentVersioning = "YES">
62 |       <MacroExpansion>
63 |          <BuildableReference
64 |             BuildableIdentifier = "primary"
65 |             BlueprintIdentifier = "VecturaKit"
66 |             BuildableName = "VecturaKit"
67 |             BlueprintName = "VecturaKit"
68 |             ReferencedContainer = "container:">
69 |          </BuildableReference>
70 |       </MacroExpansion>
71 |    </ProfileAction>
72 |    <AnalyzeAction
73 |       buildConfiguration = "Debug">
74 |    </AnalyzeAction>
75 |    <ArchiveAction
76 |       buildConfiguration = "Release"
77 |       revealArchiveInOrganizer = "YES">
78 |    </ArchiveAction>
79 | </Scheme>
80 | 


--------------------------------------------------------------------------------
/.swiftpm/xcode/xcshareddata/xcschemes/VecturaKitTests.xcscheme:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Scheme
 3 |    LastUpgradeVersion = "1620"
 4 |    version = "1.7">
 5 |    <BuildAction
 6 |       parallelizeBuildables = "YES"
 7 |       buildImplicitDependencies = "YES"
 8 |       buildArchitectures = "Automatic">
 9 |    </BuildAction>
10 |    <TestAction
11 |       buildConfiguration = "Debug"
12 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
13 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
14 |       shouldUseLaunchSchemeArgsEnv = "YES"
15 |       shouldAutocreateTestPlan = "YES">
16 |       <Testables>
17 |          <TestableReference
18 |             skipped = "NO">
19 |             <BuildableReference
20 |                BuildableIdentifier = "primary"
21 |                BlueprintIdentifier = "VecturaKitTests"
22 |                BuildableName = "VecturaKitTests"
23 |                BlueprintName = "VecturaKitTests"
24 |                ReferencedContainer = "container:">
25 |             </BuildableReference>
26 |          </TestableReference>
27 |       </Testables>
28 |    </TestAction>
29 |    <LaunchAction
30 |       buildConfiguration = "Debug"
31 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
32 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
33 |       launchStyle = "0"
34 |       useCustomWorkingDirectory = "NO"
35 |       ignoresPersistentStateOnLaunch = "NO"
36 |       debugDocumentVersioning = "YES"
37 |       debugServiceExtension = "internal"
38 |       allowLocationSimulation = "YES">
39 |    </LaunchAction>
40 |    <ProfileAction
41 |       buildConfiguration = "Release"
42 |       shouldUseLaunchSchemeArgsEnv = "YES"
43 |       savedToolIdentifier = ""
44 |       useCustomWorkingDirectory = "NO"
45 |       debugDocumentVersioning = "YES">
46 |    </ProfileAction>
47 |    <AnalyzeAction
48 |       buildConfiguration = "Debug">
49 |    </AnalyzeAction>
50 |    <ArchiveAction
51 |       buildConfiguration = "Release"
52 |       revealArchiveInOrganizer = "YES">
53 |    </ArchiveAction>
54 | </Scheme>
55 | 


--------------------------------------------------------------------------------
/.swiftpm/xcode/xcshareddata/xcschemes/VecturaMLXKit.xcscheme:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Scheme
 3 |    LastUpgradeVersion = "1620"
 4 |    version = "1.7">
 5 |    <BuildAction
 6 |       parallelizeBuildables = "YES"
 7 |       buildImplicitDependencies = "YES"
 8 |       buildArchitectures = "Automatic">
 9 |       <BuildActionEntries>
10 |          <BuildActionEntry
11 |             buildForTesting = "YES"
12 |             buildForRunning = "YES"
13 |             buildForProfiling = "YES"
14 |             buildForArchiving = "YES"
15 |             buildForAnalyzing = "YES">
16 |             <BuildableReference
17 |                BuildableIdentifier = "primary"
18 |                BlueprintIdentifier = "VecturaMLXKit"
19 |                BuildableName = "VecturaMLXKit"
20 |                BlueprintName = "VecturaMLXKit"
21 |                ReferencedContainer = "container:">
22 |             </BuildableReference>
23 |          </BuildActionEntry>
24 |       </BuildActionEntries>
25 |    </BuildAction>
26 |    <TestAction
27 |       buildConfiguration = "Debug"
28 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
29 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
30 |       shouldUseLaunchSchemeArgsEnv = "YES"
31 |       shouldAutocreateTestPlan = "YES">
32 |    </TestAction>
33 |    <LaunchAction
34 |       buildConfiguration = "Debug"
35 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
36 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
37 |       launchStyle = "0"
38 |       useCustomWorkingDirectory = "NO"
39 |       ignoresPersistentStateOnLaunch = "NO"
40 |       debugDocumentVersioning = "YES"
41 |       debugServiceExtension = "internal"
42 |       allowLocationSimulation = "YES">
43 |    </LaunchAction>
44 |    <ProfileAction
45 |       buildConfiguration = "Release"
46 |       shouldUseLaunchSchemeArgsEnv = "YES"
47 |       savedToolIdentifier = ""
48 |       useCustomWorkingDirectory = "NO"
49 |       debugDocumentVersioning = "YES">
50 |       <MacroExpansion>
51 |          <BuildableReference
52 |             BuildableIdentifier = "primary"
53 |             BlueprintIdentifier = "VecturaMLXKit"
54 |             BuildableName = "VecturaMLXKit"
55 |             BlueprintName = "VecturaMLXKit"
56 |             ReferencedContainer = "container:">
57 |          </BuildableReference>
58 |       </MacroExpansion>
59 |    </ProfileAction>
60 |    <AnalyzeAction
61 |       buildConfiguration = "Debug">
62 |    </AnalyzeAction>
63 |    <ArchiveAction
64 |       buildConfiguration = "Release"
65 |       revealArchiveInOrganizer = "YES">
66 |    </ArchiveAction>
67 | </Scheme>
68 | 


--------------------------------------------------------------------------------
/.swiftpm/xcode/xcshareddata/xcschemes/VecturaMLXKitTests.xcscheme:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Scheme
 3 |    LastUpgradeVersion = "1620"
 4 |    version = "1.7">
 5 |    <BuildAction
 6 |       parallelizeBuildables = "YES"
 7 |       buildImplicitDependencies = "YES"
 8 |       buildArchitectures = "Automatic">
 9 |    </BuildAction>
10 |    <TestAction
11 |       buildConfiguration = "Debug"
12 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
13 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
14 |       shouldUseLaunchSchemeArgsEnv = "YES"
15 |       shouldAutocreateTestPlan = "YES">
16 |       <Testables>
17 |          <TestableReference
18 |             skipped = "NO">
19 |             <BuildableReference
20 |                BuildableIdentifier = "primary"
21 |                BlueprintIdentifier = "VecturaMLXKitTests"
22 |                BuildableName = "VecturaMLXKitTests"
23 |                BlueprintName = "VecturaMLXKitTests"
24 |                ReferencedContainer = "container:">
25 |             </BuildableReference>
26 |          </TestableReference>
27 |       </Testables>
28 |    </TestAction>
29 |    <LaunchAction
30 |       buildConfiguration = "Debug"
31 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
32 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
33 |       launchStyle = "0"
34 |       useCustomWorkingDirectory = "NO"
35 |       ignoresPersistentStateOnLaunch = "NO"
36 |       debugDocumentVersioning = "YES"
37 |       debugServiceExtension = "internal"
38 |       allowLocationSimulation = "YES">
39 |    </LaunchAction>
40 |    <ProfileAction
41 |       buildConfiguration = "Release"
42 |       shouldUseLaunchSchemeArgsEnv = "YES"
43 |       savedToolIdentifier = ""
44 |       useCustomWorkingDirectory = "NO"
45 |       debugDocumentVersioning = "YES">
46 |    </ProfileAction>
47 |    <AnalyzeAction
48 |       buildConfiguration = "Debug">
49 |    </AnalyzeAction>
50 |    <ArchiveAction
51 |       buildConfiguration = "Release"
52 |       revealArchiveInOrganizer = "YES">
53 |    </ArchiveAction>
54 | </Scheme>
55 | 


--------------------------------------------------------------------------------
/.swiftpm/xcode/xcshareddata/xcschemes/vectura-cli.xcscheme:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Scheme
 3 |    LastUpgradeVersion = "1620"
 4 |    version = "1.7">
 5 |    <BuildAction
 6 |       parallelizeBuildables = "YES"
 7 |       buildImplicitDependencies = "YES"
 8 |       buildArchitectures = "Automatic">
 9 |       <BuildActionEntries>
10 |          <BuildActionEntry
11 |             buildForTesting = "YES"
12 |             buildForRunning = "YES"
13 |             buildForProfiling = "YES"
14 |             buildForArchiving = "YES"
15 |             buildForAnalyzing = "YES">
16 |             <BuildableReference
17 |                BuildableIdentifier = "primary"
18 |                BlueprintIdentifier = "vectura-cli"
19 |                BuildableName = "vectura-cli"
20 |                BlueprintName = "vectura-cli"
21 |                ReferencedContainer = "container:">
22 |             </BuildableReference>
23 |          </BuildActionEntry>
24 |       </BuildActionEntries>
25 |    </BuildAction>
26 |    <TestAction
27 |       buildConfiguration = "Debug"
28 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
29 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
30 |       shouldUseLaunchSchemeArgsEnv = "YES"
31 |       shouldAutocreateTestPlan = "YES">
32 |    </TestAction>
33 |    <LaunchAction
34 |       buildConfiguration = "Debug"
35 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
36 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
37 |       launchStyle = "0"
38 |       useCustomWorkingDirectory = "NO"
39 |       ignoresPersistentStateOnLaunch = "NO"
40 |       debugDocumentVersioning = "YES"
41 |       debugServiceExtension = "internal"
42 |       allowLocationSimulation = "YES">
43 |       <BuildableProductRunnable
44 |          runnableDebuggingMode = "0">
45 |          <BuildableReference
46 |             BuildableIdentifier = "primary"
47 |             BlueprintIdentifier = "vectura-cli"
48 |             BuildableName = "vectura-cli"
49 |             BlueprintName = "vectura-cli"
50 |             ReferencedContainer = "container:">
51 |          </BuildableReference>
52 |       </BuildableProductRunnable>
53 |    </LaunchAction>
54 |    <ProfileAction
55 |       buildConfiguration = "Release"
56 |       shouldUseLaunchSchemeArgsEnv = "YES"
57 |       savedToolIdentifier = ""
58 |       useCustomWorkingDirectory = "NO"
59 |       debugDocumentVersioning = "YES">
60 |       <BuildableProductRunnable
61 |          runnableDebuggingMode = "0">
62 |          <BuildableReference
63 |             BuildableIdentifier = "primary"
64 |             BlueprintIdentifier = "vectura-cli"
65 |             BuildableName = "vectura-cli"
66 |             BlueprintName = "vectura-cli"
67 |             ReferencedContainer = "container:">
68 |          </BuildableReference>
69 |       </BuildableProductRunnable>
70 |    </ProfileAction>
71 |    <AnalyzeAction
72 |       buildConfiguration = "Debug">
73 |    </AnalyzeAction>
74 |    <ArchiveAction
75 |       buildConfiguration = "Release"
76 |       revealArchiveInOrganizer = "YES">
77 |    </ArchiveAction>
78 | </Scheme>
79 | 


--------------------------------------------------------------------------------
/.swiftpm/xcode/xcshareddata/xcschemes/vectura-mlx-cli.xcscheme:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Scheme
 3 |    LastUpgradeVersion = "1620"
 4 |    version = "1.7">
 5 |    <BuildAction
 6 |       parallelizeBuildables = "YES"
 7 |       buildImplicitDependencies = "YES"
 8 |       buildArchitectures = "Automatic">
 9 |       <BuildActionEntries>
10 |          <BuildActionEntry
11 |             buildForTesting = "YES"
12 |             buildForRunning = "YES"
13 |             buildForProfiling = "YES"
14 |             buildForArchiving = "YES"
15 |             buildForAnalyzing = "YES">
16 |             <BuildableReference
17 |                BuildableIdentifier = "primary"
18 |                BlueprintIdentifier = "vectura-mlx-cli"
19 |                BuildableName = "vectura-mlx-cli"
20 |                BlueprintName = "vectura-mlx-cli"
21 |                ReferencedContainer = "container:">
22 |             </BuildableReference>
23 |          </BuildActionEntry>
24 |       </BuildActionEntries>
25 |    </BuildAction>
26 |    <TestAction
27 |       buildConfiguration = "Debug"
28 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
29 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
30 |       shouldUseLaunchSchemeArgsEnv = "YES"
31 |       shouldAutocreateTestPlan = "YES">
32 |    </TestAction>
33 |    <LaunchAction
34 |       buildConfiguration = "Debug"
35 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
36 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
37 |       launchStyle = "0"
38 |       useCustomWorkingDirectory = "NO"
39 |       ignoresPersistentStateOnLaunch = "NO"
40 |       debugDocumentVersioning = "YES"
41 |       debugServiceExtension = "internal"
42 |       allowLocationSimulation = "YES">
43 |       <BuildableProductRunnable
44 |          runnableDebuggingMode = "0">
45 |          <BuildableReference
46 |             BuildableIdentifier = "primary"
47 |             BlueprintIdentifier = "vectura-mlx-cli"
48 |             BuildableName = "vectura-mlx-cli"
49 |             BlueprintName = "vectura-mlx-cli"
50 |             ReferencedContainer = "container:">
51 |          </BuildableReference>
52 |       </BuildableProductRunnable>
53 |       <CommandLineArguments>
54 |          <CommandLineArgument
55 |             argument = "mock"
56 |             isEnabled = "YES">
57 |          </CommandLineArgument>
58 |       </CommandLineArguments>
59 |    </LaunchAction>
60 |    <ProfileAction
61 |       buildConfiguration = "Release"
62 |       shouldUseLaunchSchemeArgsEnv = "YES"
63 |       savedToolIdentifier = ""
64 |       useCustomWorkingDirectory = "NO"
65 |       debugDocumentVersioning = "YES">
66 |       <BuildableProductRunnable
67 |          runnableDebuggingMode = "0">
68 |          <BuildableReference
69 |             BuildableIdentifier = "primary"
70 |             BlueprintIdentifier = "vectura-mlx-cli"
71 |             BuildableName = "vectura-mlx-cli"
72 |             BlueprintName = "vectura-mlx-cli"
73 |             ReferencedContainer = "container:">
74 |          </BuildableReference>
75 |       </BuildableProductRunnable>
76 |    </ProfileAction>
77 |    <AnalyzeAction
78 |       buildConfiguration = "Debug">
79 |    </AnalyzeAction>
80 |    <ArchiveAction
81 |       buildConfiguration = "Release"
82 |       revealArchiveInOrganizer = "YES">
83 |    </ArchiveAction>
84 | </Scheme>
85 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "configurations": [
 3 |         {
 4 |             "type": "swift",
 5 |             "request": "launch",
 6 |             "args": [],
 7 |             "cwd": "${workspaceFolder:VecturaKit}",
 8 |             "name": "Debug vectura-cli",
 9 |             "program": "${workspaceFolder:VecturaKit}/.build/debug/vectura-cli",
10 |             "preLaunchTask": "swift: Build Debug vectura-cli"
11 |         },
12 |         {
13 |             "type": "swift",
14 |             "request": "launch",
15 |             "args": [],
16 |             "cwd": "${workspaceFolder:VecturaKit}",
17 |             "name": "Release vectura-cli",
18 |             "program": "${workspaceFolder:VecturaKit}/.build/release/vectura-cli",
19 |             "preLaunchTask": "swift: Build Release vectura-cli"
20 |         },
21 |         {
22 |             "type": "swift",
23 |             "request": "launch",
24 |             "args": [],
25 |             "cwd": "${workspaceFolder:VecturaKit}",
26 |             "name": "Debug vectura-mlx-cli",
27 |             "program": "${workspaceFolder:VecturaKit}/.build/debug/vectura-mlx-cli",
28 |             "preLaunchTask": "swift: Build Debug vectura-mlx-cli"
29 |         },
30 |         {
31 |             "type": "swift",
32 |             "request": "launch",
33 |             "args": [],
34 |             "cwd": "${workspaceFolder:VecturaKit}",
35 |             "name": "Release vectura-mlx-cli",
36 |             "program": "${workspaceFolder:VecturaKit}/.build/release/vectura-mlx-cli",
37 |             "preLaunchTask": "swift: Build Release vectura-mlx-cli"
38 |         }
39 |     ]
40 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {}


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Rudrank Riyam
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Package.resolved:
--------------------------------------------------------------------------------
  1 | {
  2 |   "originHash" : "6db3b74627697ac586e400fabd67629791185e08ed51fb68cf82ab175e2330c2",
  3 |   "pins" : [
  4 |     {
  5 |       "identity" : "gzipswift",
  6 |       "kind" : "remoteSourceControl",
  7 |       "location" : "https://github.com/1024jp/GzipSwift",
  8 |       "state" : {
  9 |         "revision" : "731037f6cc2be2ec01562f6597c1d0aa3fe6fd05",
 10 |         "version" : "6.0.1"
 11 |       }
 12 |     },
 13 |     {
 14 |       "identity" : "jinja",
 15 |       "kind" : "remoteSourceControl",
 16 |       "location" : "https://github.com/johnmai-dev/Jinja",
 17 |       "state" : {
 18 |         "revision" : "bbddb92fc51ae420b87300298370fd1dfc308f73",
 19 |         "version" : "1.1.1"
 20 |       }
 21 |     },
 22 |     {
 23 |       "identity" : "mlx-swift",
 24 |       "kind" : "remoteSourceControl",
 25 |       "location" : "https://github.com/ml-explore/mlx-swift",
 26 |       "state" : {
 27 |         "revision" : "70dbb62128a5a1471a5ab80363430adb33470cab",
 28 |         "version" : "0.21.2"
 29 |       }
 30 |     },
 31 |     {
 32 |       "identity" : "mlx-swift-examples",
 33 |       "kind" : "remoteSourceControl",
 34 |       "location" : "https://github.com/ml-explore/mlx-swift-examples/",
 35 |       "state" : {
 36 |         "branch" : "main",
 37 |         "revision" : "cb66b4bc6bc1a69663837881e7f1260cd49d6b59"
 38 |       }
 39 |     },
 40 |     {
 41 |       "identity" : "swift-argument-parser",
 42 |       "kind" : "remoteSourceControl",
 43 |       "location" : "https://github.com/apple/swift-argument-parser.git",
 44 |       "state" : {
 45 |         "revision" : "0fbc8848e389af3bb55c182bc19ca9d5dc2f255b",
 46 |         "version" : "1.4.0"
 47 |       }
 48 |     },
 49 |     {
 50 |       "identity" : "swift-collections",
 51 |       "kind" : "remoteSourceControl",
 52 |       "location" : "https://github.com/apple/swift-collections.git",
 53 |       "state" : {
 54 |         "revision" : "671108c96644956dddcd89dd59c203dcdb36cec7",
 55 |         "version" : "1.1.4"
 56 |       }
 57 |     },
 58 |     {
 59 |       "identity" : "swift-embeddings",
 60 |       "kind" : "remoteSourceControl",
 61 |       "location" : "https://github.com/jkrukowski/swift-embeddings.git",
 62 |       "state" : {
 63 |         "revision" : "419c52ea50238435218c587e3bebfe290ee91287",
 64 |         "version" : "0.0.13"
 65 |       }
 66 |     },
 67 |     {
 68 |       "identity" : "swift-numerics",
 69 |       "kind" : "remoteSourceControl",
 70 |       "location" : "https://github.com/apple/swift-numerics",
 71 |       "state" : {
 72 |         "revision" : "0a5bc04095a675662cf24757cc0640aa2204253b",
 73 |         "version" : "1.0.2"
 74 |       }
 75 |     },
 76 |     {
 77 |       "identity" : "swift-safetensors",
 78 |       "kind" : "remoteSourceControl",
 79 |       "location" : "https://github.com/jkrukowski/swift-safetensors.git",
 80 |       "state" : {
 81 |         "revision" : "718b0f38f912e0bf9d92130fa1e1fe2ae5136dd6",
 82 |         "version" : "0.0.7"
 83 |       }
 84 |     },
 85 |     {
 86 |       "identity" : "swift-sentencepiece",
 87 |       "kind" : "remoteSourceControl",
 88 |       "location" : "https://github.com/jkrukowski/swift-sentencepiece",
 89 |       "state" : {
 90 |         "revision" : "36a8b2b45733f6adb3092100f16e4c7d38a10a7c",
 91 |         "version" : "0.0.6"
 92 |       }
 93 |     },
 94 |     {
 95 |       "identity" : "swift-transformers",
 96 |       "kind" : "remoteSourceControl",
 97 |       "location" : "https://github.com/huggingface/swift-transformers",
 98 |       "state" : {
 99 |         "revision" : "be855fac725dbae27264e47a3eb535cc422a4ba8",
100 |         "version" : "0.1.18"
101 |       }
102 |     }
103 |   ],
104 |   "version" : 3
105 | }
106 | 


--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
 1 | // swift-tools-version: 6.0
 2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
 3 | 
 4 | import PackageDescription
 5 | 
 6 | let package = Package(
 7 |   name: "VecturaKit",
 8 |   platforms: [
 9 |     .macOS(.v14),
10 |     .iOS(.v17),
11 |     .tvOS(.v17),
12 |     .visionOS(.v1),
13 |     .watchOS(.v10),
14 |   ],
15 |   products: [
16 |     .library(
17 |       name: "VecturaKit",
18 |       targets: ["VecturaKit"]
19 |     ),
20 |     .library(
21 |       name: "VecturaMLXKit",
22 |       targets: ["VecturaMLXKit"]
23 |     ),
24 |     .executable(
25 |       name: "vectura-cli",
26 |       targets: ["VecturaCLI"]
27 |     ),
28 |     .executable(
29 |       name: "vectura-mlx-cli",
30 |       targets: ["VecturaMLXCLI"]
31 |     ),
32 |   ],
33 |   dependencies: [
34 |     .package(url: "https://github.com/jkrukowski/swift-embeddings.git", from: "0.0.10"),
35 |     .package(url: "https://github.com/apple/swift-argument-parser.git", from: "1.4.0"),
36 |     .package(url: "https://github.com/ml-explore/mlx-swift-examples/", branch: "main"),
37 |   ],
38 |   targets: [
39 |     .target(
40 |       name: "VecturaKit",
41 |       dependencies: [
42 |         .product(name: "Embeddings", package: "swift-embeddings")
43 |       ],
44 |       cSettings: [
45 |         .define("ACCELERATE_NEW_LAPACK"),
46 |         .define("ACCELERATE_LAPACK_ILP64"),
47 |       ]
48 |     ),
49 |     .target(
50 |       name: "VecturaMLXKit",
51 |       dependencies: [
52 |         "VecturaKit",
53 |         .product(name: "MLXEmbedders", package: "mlx-swift-examples"),
54 |       ]
55 |     ),
56 |     .executableTarget(
57 |       name: "VecturaCLI",
58 |       dependencies: [
59 |         "VecturaKit",
60 |         .product(name: "ArgumentParser", package: "swift-argument-parser"),
61 |       ]
62 |     ),
63 |     .executableTarget(
64 |       name: "VecturaMLXCLI",
65 |       dependencies: [
66 |         "VecturaMLXKit",
67 |         .product(name: "ArgumentParser", package: "swift-argument-parser"),
68 |       ]
69 |     ),
70 |     .testTarget(
71 |       name: "VecturaKitTests",
72 |       dependencies: ["VecturaKit"]
73 |     ),
74 |     .testTarget(
75 |       name: "VecturaMLXKitTests",
76 |       dependencies: ["VecturaMLXKit"]
77 |     ),
78 |   ]
79 | )
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # VecturaKit
  2 | 
  3 | VecturaKit is a Swift-based vector database designed for on-device applications, enabling advanced user experiences through local vector storage and retrieval. Inspired by [Dripfarm's SVDB](https://github.com/Dripfarm/SVDB), **VecturaKit** utilizes `MLTensor` and [`swift-embeddings`](https://github.com/jkrukowski/swift-embeddings) for generating and managing embeddings. The framework offers two primary modules: `VecturaKit`, which supports diverse embedding models via `swift-embeddings`, and `VecturaMLXKit`, which leverages Apple's MLX framework for accelerated processing.
  4 | 
  5 | ## Support
  6 | 
  7 | Love this project? Check out my books to explore more of AI and iOS development:
  8 | - [Exploring AI for iOS Development](https://academy.rudrank.com/product/ai)
  9 | - [Exploring AI-Assisted Coding for iOS Development](https://academy.rudrank.com/product/ai-assisted-coding)
 10 | 
 11 | Your support helps to keep this project growing!
 12 | 
 13 | ## Key Features
 14 | 
 15 | -   **On-Device Storage:** Stores and manages vector embeddings locally, enhancing privacy and reducing latency.
 16 | -   **Hybrid Search:** Combines vector similarity with BM25 text search for comprehensive and relevant search results (`VecturaKit`).
 17 | -   **Batch Processing:** Indexes documents in parallel for faster data ingestion.
 18 | -   **Persistent Storage:** Automatically saves and loads document data, preserving the database state across app sessions.
 19 | -   **Configurable Search:** Customizes search behavior with adjustable thresholds, result limits, and hybrid search weights.
 20 | -   **Custom Storage Location:** Specifies a custom directory for database storage.
 21 | -   **MLX Support:** Employs Apple's MLX framework for accelerated embedding generation and search operations (`VecturaMLXKit`).
 22 | -   **CLI Tool:** Includes a command-line interface (CLI) for database management, testing, and debugging for both `VecturaKit` and `VecturaMLXKit`.
 23 | 
 24 | ## Supported Platforms
 25 | 
 26 | -   macOS 14.0 or later
 27 | -   iOS 17.0 or later
 28 | -   tvOS 17.0 or later
 29 | -   visionOS 1.0 or later
 30 | -   watchOS 10.0 or later
 31 | 
 32 | ## Installation
 33 | 
 34 | ### Swift Package Manager
 35 | 
 36 | To integrate VecturaKit into your project using Swift Package Manager, add the following dependency in your `Package.swift` file:
 37 | 
 38 | ```swift
 39 | dependencies: [
 40 |     .package(url: "https://github.com/rryam/VecturaKit.git", branch: "main"),
 41 | ],
 42 | ```
 43 | 
 44 | ### Dependencies
 45 | 
 46 | VecturaKit relies on the following Swift packages:
 47 | 
 48 | -   [swift-embeddings](https://github.com/jkrukowski/swift-embeddings): Used in `VecturaKit` for generating text embeddings using various models.
 49 | -   [swift-argument-parser](https://github.com/apple/swift-argument-parser): Used for creating the command-line interface.
 50 | -   [mlx-swift-examples](https://github.com/ml-explore/mlx-swift-examples): Provides MLX-based embeddings and vector search capabilities, specifically for `VecturaMLXKit`.
 51 | 
 52 | ## Usage
 53 | 
 54 | ### Core VecturaKit
 55 | 
 56 | 1.  **Import VecturaKit**
 57 | 
 58 |     ```swift
 59 |     import VecturaKit
 60 |     ```
 61 | 
 62 | 2.  **Create Configuration and Initialize Database**
 63 | 
 64 |     ```swift
 65 |     import Foundation
 66 |     import VecturaKit
 67 | 
 68 |     let config = VecturaConfig(
 69 |         name: "my-vector-db",
 70 |         directoryURL: nil,  // Optional custom storage location
 71 |         dimension: 384,     // Matches the default BERT model dimension
 72 |         searchOptions: VecturaConfig.SearchOptions(
 73 |             defaultNumResults: 10,
 74 |             minThreshold: 0.7,
 75 |             hybridWeight: 0.5,  // Balance between vector and text search
 76 |             k1: 1.2,           // BM25 parameters
 77 |             b: 0.75
 78 |         )
 79 |     )
 80 | 
 81 |     let vectorDB = try await VecturaKit(config: config)
 82 |     ```
 83 | 
 84 | 3.  **Add Documents**
 85 | 
 86 |     Single document:
 87 | 
 88 |     ```swift
 89 |     let text = "Sample text to be embedded"
 90 |     let documentId = try await vectorDB.addDocument(
 91 |         text: text,
 92 |         id: UUID(),  // Optional, will be generated if not provided
 93 |         model: .id("sentence-transformers/all-MiniLM-L6-v2")  // Optional, this is the default
 94 |     )
 95 |     ```
 96 | 
 97 |     Multiple documents in batch:
 98 | 
 99 |     ```swift
100 |     let texts = [
101 |         "First document text",
102 |         "Second document text",
103 |         "Third document text"
104 |     ]
105 |     let documentIds = try await vectorDB.addDocuments(
106 |         texts: texts,
107 |         ids: nil,  // Optional array of UUIDs
108 |          model: .id("sentence-transformers/all-MiniLM-L6-v2") // Optional model
109 |     )
110 |     ```
111 | 
112 | 4.  **Search Documents**
113 | 
114 |     Search by text (hybrid search):
115 | 
116 |     ```swift
117 |     let results = try await vectorDB.search(
118 |         query: "search query",
119 |         numResults: 5,      // Optional
120 |         threshold: 0.8,     // Optional
121 |         model: .id("sentence-transformers/all-MiniLM-L6-v2")  // Optional
122 |     )
123 | 
124 |     for result in results {
125 |         print("Document ID: \(result.id)")
126 |         print("Text: \(result.text)")
127 |         print("Similarity Score: \(result.score)")
128 |         print("Created At: \(result.createdAt)")
129 |     }
130 |     ```
131 | 
132 |     Search by vector embedding:
133 | 
134 |     ```swift
135 |     let results = try await vectorDB.search(
136 |         query: embeddingArray,  // [Float] matching config.dimension
137 |         numResults: 5,  // Optional
138 |         threshold: 0.8  // Optional
139 |     )
140 |     ```
141 | 
142 | 5.  **Document Management**
143 | 
144 |     Update document:
145 | 
146 |     ```swift
147 |     try await vectorDB.updateDocument(
148 |         id: documentId,
149 |         newText: "Updated text",
150 |         model: .id("sentence-transformers/all-MiniLM-L6-v2")  // Optional
151 |     )
152 |     ```
153 | 
154 |     Delete documents:
155 | 
156 |     ```swift
157 |     try await vectorDB.deleteDocuments(ids: [documentId1, documentId2])
158 |     ```
159 | 
160 |     Reset database:
161 | 
162 |     ```swift
163 |     try await vectorDB.reset()
164 |     ```
165 | 
166 | ### VecturaMLXKit (MLX Version)
167 | 
168 | VecturaMLXKit harnesses Apple's MLX framework for accelerated processing, delivering optimized performance for on-device machine learning tasks.
169 | 
170 | 1.  **Import VecturaMLXKit**
171 | 
172 |     ```swift
173 |     import VecturaMLXKit
174 |     ```
175 | 
176 | 2.  **Initialize Database**
177 | 
178 |     ```swift
179 |     import VecturaMLXKit
180 |     import MLXEmbedders
181 | 
182 |     let config = VecturaConfig(
183 |       name: "my-mlx-vector-db",
184 |       dimension: 768 //  nomic_text_v1_5 model outputs 768-dimensional embeddings
185 |     )
186 |     let vectorDB = try await VecturaMLXKit(config: config, modelConfiguration: .nomic_text_v1_5)
187 |     ```
188 | 
189 | 3.  **Add Documents**
190 | 
191 |     ```swift
192 |     let texts = [
193 |       "First document text",
194 |       "Second document text",
195 |       "Third document text"
196 |     ]
197 |     let documentIds = try await vectorDB.addDocuments(texts: texts)
198 |     ```
199 | 
200 | 4.  **Search Documents**
201 | 
202 |     ```swift
203 |     let results = try await vectorDB.search(
204 |         query: "search query",
205 |         numResults: 5,      // Optional
206 |         threshold: 0.8     // Optional
207 |     )
208 | 
209 |     for result in results {
210 |         print("Document ID: \(result.id)")
211 |         print("Text: \(result.text)")
212 |         print("Similarity Score: \(result.score)")
213 |         print("Created At: \(result.createdAt)")
214 |     }
215 |     ```
216 | 
217 | 5.  **Document Management**
218 | 
219 |     Update document:
220 | 
221 |     ```swift
222 |     try await vectorDB.updateDocument(
223 |          id: documentId,
224 |          newText: "Updated text"
225 |      )
226 |     ```
227 | 
228 |     Delete documents:
229 | 
230 |     ```swift
231 |     try await vectorDB.deleteDocuments(ids: [documentId1, documentId2])
232 |     ```
233 | 
234 |     Reset database:
235 | 
236 |     ```swift
237 |     try await vectorDB.reset()
238 |     ```
239 | 
240 | ## Command Line Interface
241 | 
242 | VecturaKit includes a command-line interface for both the standard and MLX versions, facilitating easy database management.
243 | 
244 | **Standard CLI Tool**
245 | 
246 | ```bash
247 | # Add documents
248 | vectura add "First document" "Second document" "Third document" \
249 |   --db-name "my-vector-db" \
250 |   --dimension 384 \
251 |   --model-id "sentence-transformers/all-MiniLM-L6-v2"
252 | 
253 | # Search documents
254 | vectura search "search query" \
255 |   --db-name "my-vector-db" \
256 |   --dimension 384 \
257 |   --threshold 0.7 \
258 |   --num-results 5 \
259 |   --model-id "sentence-transformers/all-MiniLM-L6-v2"
260 | 
261 | # Update document
262 | vectura update <document-uuid> "Updated text content" \
263 |   --db-name "my-vector-db" \
264 |   --dimension 384 \
265 |   --model-id "sentence-transformers/all-MiniLM-L6-v2"
266 | 
267 | # Delete documents
268 | vectura delete <document-uuid-1> <document-uuid-2> \
269 |   --db-name "my-vector-db" \
270 |   --dimension 384
271 | 
272 | # Reset database
273 | vectura reset \
274 |   --db-name "my-vector-db" \
275 |   --dimension 384
276 | 
277 | # Run demo with sample data
278 | vectura mock \
279 |   --db-name "my-vector-db" \
280 |   --dimension 384 \
281 |   --threshold 0.7 \
282 |   --num-results 10 \
283 |   --model-id "sentence-transformers/all-MiniLM-L6-v2"
284 | ```
285 | 
286 | Common options:
287 | 
288 | -   `--db-name, -d`: Database name (default: "vectura-cli-db")
289 | -   `--dimension, -v`: Vector dimension (default: 384)
290 | -   `--threshold, -t`: Minimum similarity threshold (default: 0.7)
291 | -   `--num-results, -n`: Number of results to return (default: 10)
292 | -   `--model-id, -m`: Model ID for embeddings (default: "sentence-transformers/all-MiniLM-L6-v2")
293 | 
294 | **MLX CLI Tool**
295 | 
296 | ```bash
297 | # Add documents
298 | vectura-mlx add "First document" "Second document" "Third document" --db-name "my-mlx-vector-db"
299 | 
300 | # Search documents
301 | vectura-mlx search "search query" --db-name "my-mlx-vector-db"  --threshold 0.7 --num-results 5
302 | 
303 | # Update document
304 | vectura-mlx update <document-uuid> "Updated text content" --db-name "my-mlx-vector-db"
305 | 
306 | # Delete documents
307 | vectura-mlx delete <document-uuid-1> <document-uuid-2> --db-name "my-mlx-vector-db"
308 | 
309 | # Reset database
310 | vectura-mlx reset --db-name "my-mlx-vector-db"
311 | 
312 | # Run demo with sample data
313 | vectura-mlx mock  --db-name "my-mlx-vector-db"
314 | ```
315 | 
316 | ## License
317 | 
318 | VecturaKit is released under the MIT License. See the [LICENSE](LICENSE) file for more information. Copyright (c) 2025 Rudrank Riyam.
319 | 
320 | ## Contributing
321 | 
322 | Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
323 | 
324 | ### Development
325 | 
326 | The project is structured as a Swift Package.  It includes the following key targets:
327 | 
328 | - `VecturaKit`: The core vector database library.
329 | - `VecturaMLXKit`:  The MLX-accelerated version of the library.
330 | - `vectura-cli`:  The command-line interface for `VecturaKit`.
331 | - `vectura-mlx-cli`: The command-line interface for `VecturaMLXKit`.
332 | 
333 | To build and test the project, use the following commands:
334 | 
335 | ```bash
336 | swift build
337 | swift test
338 | ```
339 | 
340 | The project also includes CI workflows defined in `.github/workflows` to automate building and testing on pull requests and pushes to the `main` branch.  The workflows require Xcode 16.1 and Swift 6.0.
341 | 
342 | Debugging configurations are provided in `.vscode/launch.json` for the `vectura-cli`.  These can be used to launch the CLI with the debugger attached.
343 | 
344 | ### Continuous Integration
345 | 
346 | The project uses GitHub Actions for continuous integration. The following workflows are defined:
347 | 
348 | - `.github/workflows/build_and_test_mlx.yml`: Builds and tests the `VecturaMLXKit` target.
349 | - `.github/workflows/build_and_test_vectura.yml`: Builds and tests the `VecturaKit` and `vectura-cli` targets.
350 | - `.github/workflows/update-readme.yml`: Automatically updates the `README.md` file using a Python script that calls the Gemini AI model. This workflow is triggered on pushes to the `main` branch and creates a pull request with the updated README.
351 | 


--------------------------------------------------------------------------------
/Sources/VecturaCLI/VecturaCLI.swift:
--------------------------------------------------------------------------------
  1 | import ArgumentParser
  2 | import Foundation
  3 | import VecturaKit
  4 | 
  5 | @available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, watchOS 11.0, *)
  6 | @main
  7 | struct VecturaCLI: AsyncParsableCommand {
  8 |     struct DocumentID: ExpressibleByArgument, Decodable {
  9 |         let uuid: UUID
 10 |         
 11 |         init(_ uuid: UUID) {
 12 |             self.uuid = uuid
 13 |         }
 14 |         
 15 |         init?(argument: String) {
 16 |             guard let uuid = UUID(uuidString: argument) else { return nil }
 17 |             self.uuid = uuid
 18 |         }
 19 |     }
 20 |     
 21 |     static let configuration = CommandConfiguration(
 22 |         commandName: "vectura",
 23 |         abstract: "A CLI tool for VecturaKit vector database",
 24 |         subcommands: [Add.self, Search.self, Update.self, Delete.self, Reset.self, Mock.self]
 25 |     )
 26 |     
 27 |     static func setupDB(dbName: String, dimension: Int, numResults: Int, threshold: Float) async throws
 28 |     -> VecturaKit
 29 |     {
 30 |         let config = VecturaConfig(
 31 |             name: dbName,
 32 |             dimension: dimension,
 33 |             searchOptions: VecturaConfig.SearchOptions(
 34 |                 defaultNumResults: numResults,
 35 |                 minThreshold: threshold
 36 |             )
 37 |         )
 38 |         return try await VecturaKit(config: config)
 39 |     }
 40 | }
 41 | 
 42 | @available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, watchOS 11.0, *)
 43 | extension VecturaCLI {
 44 |     struct Mock: AsyncParsableCommand {
 45 |         static let configuration = CommandConfiguration(
 46 |             abstract: "Run a mock demonstration with sample data"
 47 |         )
 48 |         
 49 |         @Option(name: [.long, .customShort("d")], help: "Database name")
 50 |         var dbName: String = "vectura-cli-db"
 51 |         
 52 |         @Option(name: [.long, .customShort("v")], help: "Vector dimension")
 53 |         var dimension: Int = 384
 54 |         
 55 |         @Option(name: [.long, .customShort("t")], help: "Minimum similarity threshold")
 56 |         var threshold: Float = 0.7
 57 |         
 58 |         @Option(name: [.long, .customShort("n")], help: "Number of results to return")
 59 |         var numResults: Int = 10
 60 |         
 61 |         @Option(name: [.long, .customShort("m")], help: "Model ID for embeddings")
 62 |         var modelId: String = "sentence-transformers/all-MiniLM-L6-v2"
 63 |         
 64 |         mutating func run() async throws {
 65 |             let db = try await VecturaCLI.setupDB(
 66 |                 dbName: dbName,
 67 |                 dimension: dimension,
 68 |                 numResults: numResults,
 69 |                 threshold: threshold
 70 |             )
 71 |             
 72 |             // First, reset the database
 73 |             print("\n🧹 Resetting database...")
 74 |             try await db.reset()
 75 |             
 76 |             // Add sample documents
 77 |             print("\n📝 Adding sample documents...")
 78 |             let sampleTexts = [
 79 |                 "The quick brown fox jumps over the lazy dog",
 80 |                 "To be or not to be, that is the question",
 81 |                 "All that glitters is not gold",
 82 |                 "A journey of a thousand miles begins with a single step",
 83 |                 "Where there's smoke, there's fire",
 84 |             ]
 85 |             
 86 |             let ids = try await db.addDocuments(texts: sampleTexts, modelId: modelId)
 87 |             print("Added \(ids.count) documents:")
 88 |             for (id, text) in zip(ids, sampleTexts) {
 89 |                 print("ID: \(id)")
 90 |                 print("Text: \(text)")
 91 |                 print("---")
 92 |             }
 93 |             
 94 |             // Search for documents
 95 |             print("\n🔍 Searching for 'journey'...")
 96 |             let results = try await db.search(
 97 |                 query: "journey",
 98 |                 numResults: numResults,
 99 |                 threshold: threshold,
100 |                 modelId: modelId
101 |             )
102 |             
103 |             print("Found \(results.count) results:")
104 |             for result in results {
105 |                 print("ID: \(result.id)")
106 |                 print("Text: \(result.text)")
107 |                 print("Score: \(result.score)")
108 |                 print("Created: \(result.createdAt)")
109 |                 print("---")
110 |             }
111 |             
112 |             // Update a document
113 |             if let firstId = ids.first {
114 |                 print("\n✏️ Updating first document...")
115 |                 let newText = "The quick red fox jumps over the sleeping dog"
116 |                 try await db.updateDocument(id: firstId, newText: newText, modelId: modelId)
117 |                 print("Updated document \(firstId) with new text: \(newText)")
118 |             }
119 |             
120 |             // Delete last document
121 |             if let lastId = ids.last {
122 |                 print("\n🗑️ Deleting last document...")
123 |                 try await db.deleteDocuments(ids: [lastId])
124 |                 print("Deleted document \(lastId)")
125 |             }
126 |             
127 |             print("\n✨ Mock demonstration completed!")
128 |         }
129 |     }
130 |     
131 |     struct Add: AsyncParsableCommand {
132 |         static let configuration = CommandConfiguration(
133 |             abstract: "Add documents to the vector database"
134 |         )
135 |         
136 |         @Option(name: [.long, .customShort("d")], help: "Database name")
137 |         var dbName: String = "vectura-cli-db"
138 |         
139 |         @Option(name: [.long, .customShort("v")], help: "Vector dimension")
140 |         var dimension: Int = 384
141 |         
142 |         @Option(name: [.long, .customShort("m")], help: "Model ID for embeddings")
143 |         var modelId: String = "sentence-transformers/all-MiniLM-L6-v2"
144 |         
145 |         @Argument(help: "Text content to add")
146 |         var text: [String]
147 |         
148 |         mutating func run() async throws {
149 |             let db = try await VecturaCLI.setupDB(
150 |                 dbName: dbName,
151 |                 dimension: dimension,
152 |                 numResults: 10,
153 |                 threshold: 0.7
154 |             )
155 |             let ids = try await db.addDocuments(texts: text, modelId: modelId)
156 |             print("Added \(ids.count) documents:")
157 |             for (id, text) in zip(ids, text) {
158 |                 print("ID: \(id)")
159 |                 print("Text: \(text)")
160 |                 print("---")
161 |             }
162 |         }
163 |     }
164 |     
165 |     struct Search: AsyncParsableCommand {
166 |         static let configuration = CommandConfiguration(
167 |             abstract: "Search documents in the vector database"
168 |         )
169 |         
170 |         @Option(name: [.long, .customShort("d")], help: "Database name")
171 |         var dbName: String = "vectura-cli-db"
172 |         
173 |         @Option(name: [.long, .customShort("v")], help: "Vector dimension")
174 |         var dimension: Int = 384
175 |         
176 |         @Option(name: [.long, .customShort("t")], help: "Minimum similarity threshold")
177 |         var threshold: Float = 0.7
178 |         
179 |         @Option(name: [.long, .customShort("n")], help: "Number of results to return")
180 |         var numResults: Int = 10
181 |         
182 |         @Option(name: [.long, .customShort("m")], help: "Model ID for embeddings")
183 |         var modelId: String = "sentence-transformers/all-MiniLM-L6-v2"
184 |         
185 |         @Argument(help: "Search query")
186 |         var query: String
187 |         
188 |         mutating func run() async throws {
189 |             let db = try await VecturaCLI.setupDB(
190 |                 dbName: dbName,
191 |                 dimension: dimension,
192 |                 numResults: numResults,
193 |                 threshold: threshold
194 |             )
195 |             let results = try await db.search(
196 |                 query: query,
197 |                 numResults: numResults,
198 |                 threshold: threshold,
199 |                 modelId: modelId
200 |             )
201 |             
202 |             print("Found \(results.count) results:")
203 |             for result in results {
204 |                 print("ID: \(result.id)")
205 |                 print("Text: \(result.text)")
206 |                 print("Score: \(result.score)")
207 |                 print("Created: \(result.createdAt)")
208 |                 print("---")
209 |             }
210 |         }
211 |     }
212 |     
213 |     struct Update: AsyncParsableCommand, Decodable {
214 |         static let configuration = CommandConfiguration(
215 |             abstract: "Update a document in the vector database"
216 |         )
217 |         
218 |         @Option(name: [.long, .customShort("d")], help: "Database name")
219 |         var dbName: String = "vectura-cli-db"
220 |         
221 |         @Option(name: [.long, .customShort("v")], help: "Vector dimension")
222 |         var dimension: Int = 384
223 |         
224 |         @Option(name: [.long, .customShort("m")], help: "Model ID for embeddings")
225 |         var modelId: String = "sentence-transformers/all-MiniLM-L6-v2"
226 |         
227 |         @Argument(help: "Document ID to update")
228 |         var id: DocumentID
229 |         
230 |         @Argument(help: "New text content")
231 |         var newText: String
232 |         
233 |         mutating func run() async throws {
234 |             let db = try await VecturaCLI.setupDB(
235 |                 dbName: dbName,
236 |                 dimension: dimension,
237 |                 numResults: 10,
238 |                 threshold: 0.7
239 |             )
240 |             try await db.updateDocument(id: id.uuid, newText: newText, modelId: modelId)
241 |             print("Updated document \(id.uuid) with new text: \(newText)")
242 |         }
243 |     }
244 |     
245 |     struct Delete: AsyncParsableCommand, Decodable {
246 |         static let configuration = CommandConfiguration(
247 |             abstract: "Delete documents from the vector database"
248 |         )
249 |         
250 |         @Option(name: [.long, .customShort("d")], help: "Database name")
251 |         var dbName: String = "vectura-cli-db"
252 |         
253 |         @Option(name: [.long, .customShort("v")], help: "Vector dimension")
254 |         var dimension: Int = 384
255 |         
256 |         @Argument(help: "Document IDs to delete")
257 |         var ids: [DocumentID]
258 |         
259 |         mutating func run() async throws {
260 |             let db = try await VecturaCLI.setupDB(
261 |                 dbName: dbName,
262 |                 dimension: dimension,
263 |                 numResults: 10,
264 |                 threshold: 0.7
265 |             )
266 |             try await db.deleteDocuments(ids: ids.map(\.uuid))
267 |             print("Deleted \(ids.count) documents")
268 |         }
269 |     }
270 |     
271 |     struct Reset: AsyncParsableCommand {
272 |         static let configuration = CommandConfiguration(
273 |             abstract: "Reset the vector database"
274 |         )
275 |         
276 |         @Option(name: [.long, .customShort("d")], help: "Database name")
277 |         var dbName: String = "vectura-cli-db"
278 |         
279 |         @Option(name: [.long, .customShort("v")], help: "Vector dimension")
280 |         var dimension: Int = 384
281 |         
282 |         mutating func run() async throws {
283 |             let db = try await VecturaCLI.setupDB(
284 |                 dbName: dbName,
285 |                 dimension: dimension,
286 |                 numResults: 10,
287 |                 threshold: 0.7
288 |             )
289 |             try await db.reset()
290 |             print("Database reset successfully")
291 |         }
292 |     }
293 | }
294 | 


--------------------------------------------------------------------------------
/Sources/VecturaKit/BM25Index.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  BM25Index.swift
  3 | //  VecturaKit
  4 | //
  5 | //  Created by Rudrank Riyam on 1/19/25.
  6 | //
  7 | 
  8 | import Foundation
  9 | 
 10 | private func tokenize(_ text: String) -> [String] {
 11 |     text.lowercased()
 12 |         .folding(options: .diacriticInsensitive, locale: .current)
 13 |         .components(separatedBy: CharacterSet.alphanumerics.inverted)
 14 |         .filter { !$0.isEmpty }
 15 | }
 16 | 
 17 | /// An index for BM25-based text search over VecturaDocuments
 18 | public struct BM25Index {
 19 |     private let k1: Float
 20 |     private let b: Float
 21 |     private var documents: [VecturaDocument]
 22 |     private var documentFrequencies: [String: Int]
 23 |     private var documentLengths: [UUID: Int]
 24 |     private var averageDocumentLength: Float
 25 |     
 26 |     /// Creates a new BM25 index for the given documents
 27 |     /// 
 28 |     /// - Parameters:
 29 |     ///   - documents: The documents to index
 30 |     ///   - k1: BM25 k1 parameter (default: 1.2)
 31 |     ///   - b: BM25 b parameter (default: 0.75)
 32 |     public init(documents: [VecturaDocument], k1: Float = 1.2, b: Float = 0.75) {
 33 |         self.k1 = k1
 34 |         self.b = b
 35 |         self.documents = documents
 36 |         self.documentFrequencies = [:]
 37 |         
 38 |         self.documentLengths = documents.reduce(into: [:]) { dict, doc in
 39 |             dict[doc.id] = tokenize(doc.text).count
 40 |         }
 41 |         
 42 |         self.averageDocumentLength = Float(documentLengths.values.reduce(0, +)) / Float(documents.count)
 43 |         
 44 |         for document in documents {
 45 |             let terms = Set(tokenize(document.text))
 46 |             for term in terms {
 47 |                 documentFrequencies[term, default: 0] += 1
 48 |             }
 49 |         }
 50 |     }
 51 |     
 52 |     /// Searches the index using BM25 scoring
 53 |     ///
 54 |     /// - Parameters:
 55 |     ///   - query: The search query
 56 |     ///   - topK: Maximum number of results to return
 57 |     /// - Returns: Array of tuples containing documents and their BM25 scores
 58 |     public func search(query: String, topK: Int = 10) -> [(document: VecturaDocument, score: Float)] {
 59 |         let queryTerms = tokenize(query)
 60 |         var scores: [(VecturaDocument, Float)] = []
 61 |         
 62 |         for document in documents {
 63 |             let docLength = Float(documentLengths[document.id] ?? 0)
 64 |             var score: Float = 0.0
 65 |             
 66 |             for term in queryTerms {
 67 |                 let tf = termFrequency(term: term, in: document)
 68 |                 let df = Float(documentFrequencies[term] ?? 0)
 69 |                 
 70 |                 let idf = log((Float(documents.count) - df + 0.5) / (df + 0.5))
 71 |                 let numerator = tf * (k1 + 1)
 72 |                 let denominator = tf + k1 * (1 - b + b * docLength / averageDocumentLength)
 73 |                 
 74 |                 score += idf * (numerator / denominator)
 75 |             }
 76 |             
 77 |             scores.append((document, score))
 78 |         }
 79 |         
 80 |         return scores
 81 |             .sorted { $0.1 > $1.1 }
 82 |             .prefix(topK)
 83 |             .filter { $0.1 > 0 }
 84 |     }
 85 |     
 86 |     /// Add a new document to the index
 87 |     ///
 88 |     /// - Parameter document: The document to add
 89 |     public mutating func addDocument(_ document: VecturaDocument) {
 90 |         documents.append(document)
 91 |         
 92 |         let length = tokenize(document.text).count
 93 |         documentLengths[document.id] = length
 94 |         
 95 |         let terms = Set(tokenize(document.text))
 96 |         for term in terms {
 97 |             documentFrequencies[term, default: 0] += 1
 98 |         }
 99 |         
100 |         let totalLength = documentLengths.values.reduce(0, +)
101 |         self.averageDocumentLength = Float(totalLength) / Float(documents.count)
102 |     }
103 |     
104 |     private func termFrequency(term: String, in document: VecturaDocument) -> Float {
105 |         Float(
106 |             tokenize(document.text)
107 |                 .filter { $0 == term }
108 |                 .count)
109 |     }
110 | }
111 | 
112 | extension VecturaDocument {
113 |     /// Calculates a hybrid search score combining vector similarity and BM25
114 |     ///
115 |     /// - Parameters:
116 |     ///   - vectorScore: The vector similarity score
117 |     ///   - bm25Score: The BM25 score
118 |     ///   - weight: Weight for vector score (0.0-1.0), BM25 weight will be (1-weight)
119 |     /// - Returns: Combined score
120 |     public func hybridScore(vectorScore: Float, bm25Score: Float, weight: Float = 0.5) -> Float {
121 |         let normalizedBM25 = min(max(bm25Score / 10.0, 0), 1)
122 |         return weight * vectorScore + (1 - weight) * normalizedBM25
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/Sources/VecturaKit/FileStorageProvider.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import Accelerate
  3 | 
  4 | /// A file‑based storage provider that implements VecturaStorage using JSON files.
  5 | /// This provider maintains an in‑memory cache of documents while persisting them
  6 | /// to a specified storage directory.
  7 | public class FileStorageProvider: VecturaStorage {
  8 |     /// The storage directory where JSON files are stored.
  9 |     private let storageDirectory: URL
 10 | 
 11 |     /// In‑memory cache of documents keyed by their UUID.
 12 |     private var documents: [UUID: VecturaDocument] = [:]
 13 | 
 14 |     /// In‑memory cache of normalized embeddings for each document.
 15 |     private var normalizedEmbeddings: [UUID: [Float]] = [:]
 16 | 
 17 |     /// Initializes the provider with the target storage directory.
 18 |     ///
 19 |     /// - Parameter storageDirectory: The directory URL where documents will be saved and loaded.
 20 |     public init(storageDirectory: URL) throws {
 21 |         self.storageDirectory = storageDirectory
 22 |         
 23 |         // Ensure the storage directory exists
 24 |         if !FileManager.default.fileExists(atPath: storageDirectory.path) {
 25 |             try FileManager.default.createDirectory(at: storageDirectory, withIntermediateDirectories: true)
 26 |         }
 27 |         
 28 |         // Load any existing documents.
 29 |         try loadDocumentsFromStorage()
 30 |     }
 31 | 
 32 |     /// Ensures that the storage directory exists.
 33 |     public func createStorageDirectoryIfNeeded() async throws {
 34 |         if !FileManager.default.fileExists(atPath: storageDirectory.path) {
 35 |             try FileManager.default.createDirectory(at: storageDirectory, withIntermediateDirectories: true)
 36 |         }
 37 |     }
 38 | 
 39 |     /// Loads documents from in‑memory cache.
 40 |     /// This function returns the documents that were loaded during initialization.
 41 |     public func loadDocuments() async throws -> [VecturaDocument] {
 42 |         return Array(documents.values)
 43 |     }
 44 | 
 45 |     /// Saves a document by encoding it to JSON and writing it to disk.
 46 |     /// It also updates the in‑memory caches for the document and its normalized embedding.
 47 |     public func saveDocument(_ document: VecturaDocument) async throws {
 48 |         // Update cache
 49 |         documents[document.id] = document
 50 |         
 51 |         // Encode and write document to disk
 52 |         let encoder = JSONEncoder()
 53 |         encoder.outputFormatting = .prettyPrinted
 54 |         let data = try encoder.encode(document)
 55 |         let documentURL = storageDirectory.appendingPathComponent("\(document.id).json")
 56 |         try data.write(to: documentURL)
 57 |         
 58 |         // Compute and store normalized embedding
 59 |         let norm = l2Norm(document.embedding)
 60 |         var divisor = norm + 1e-9
 61 |         var normalized = [Float](repeating: 0, count: document.embedding.count)
 62 |         vDSP_vsdiv(document.embedding, 1, &divisor, &normalized, 1, vDSP_Length(document.embedding.count))
 63 |         normalizedEmbeddings[document.id] = normalized
 64 |     }
 65 | 
 66 |     /// Deletes a document by removing it from the in‑memory caches and deleting its file.
 67 |     public func deleteDocument(withID id: UUID) async throws {
 68 |         // Remove from caches
 69 |         documents.removeValue(forKey: id)
 70 |         normalizedEmbeddings.removeValue(forKey: id)
 71 |         
 72 |         let documentURL = storageDirectory.appendingPathComponent("\(id).json")
 73 |         try FileManager.default.removeItem(at: documentURL)
 74 |     }
 75 | 
 76 |     /// Updates an existing document.
 77 |     /// This is implemented by saving the updated document, which overwrites the existing file.
 78 |     public func updateDocument(_ document: VecturaDocument) async throws {
 79 |         try await saveDocument(document)
 80 |     }
 81 | 
 82 |     // MARK: - Private Helper Methods
 83 | 
 84 |     /// Loads all JSON‑encoded documents from disk into memory.
 85 |     private func loadDocumentsFromStorage() throws {
 86 |         let fileURLs = try FileManager.default.contentsOfDirectory(at: storageDirectory, includingPropertiesForKeys: nil)
 87 |         let decoder = JSONDecoder()
 88 |         
 89 |         for fileURL in fileURLs where fileURL.pathExtension.lowercased() == "json" {
 90 |             do {
 91 |                 let data = try Data(contentsOf: fileURL)
 92 |                 let doc = try decoder.decode(VecturaDocument.self, from: data)
 93 |                 documents[doc.id] = doc
 94 |                 
 95 |                 // Compute normalized embedding and store it.
 96 |                 let norm = l2Norm(doc.embedding)
 97 |                 var divisor = norm + 1e-9
 98 |                 var normalized = [Float](repeating: 0, count: doc.embedding.count)
 99 |                 vDSP_vsdiv(doc.embedding, 1, &divisor, &normalized, 1, vDSP_Length(doc.embedding.count))
100 |                 normalizedEmbeddings[doc.id] = normalized
101 |             } catch {
102 |                 // Log the error if needed
103 |                 print("Failed to load \(fileURL.lastPathComponent): \(error.localizedDescription)")
104 |             }
105 |         }
106 |     }
107 | 
108 |     /// Computes the L2 norm of a vector.
109 |     private func l2Norm(_ vector: [Float]) -> Float {
110 |         var sumSquares: Float = 0
111 |         vDSP_svesq(vector, 1, &sumSquares, vDSP_Length(vector.count))
112 |         return sqrt(sumSquares)
113 |     }
114 | }
115 | 


--------------------------------------------------------------------------------
/Sources/VecturaKit/VecturaConfig.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | /// Configuration options for Vectura vector database.
 4 | public struct VecturaConfig {
 5 | 
 6 |   /// The name of the database instance.
 7 |   public let name: String
 8 | 
 9 |   /// A custom directory where the database should be stored.
10 |   /// Will be created if it doesn't exist, database contents are stored in a subdirectory named after ``name``.
11 |   public let directoryURL: URL?
12 | 
13 |   /// The dimension of vectors to be stored.
14 |   public let dimension: Int
15 | 
16 |   /// Options for similarity search.
17 |   public struct SearchOptions {
18 |     /// The default number of results to return.
19 |     public var defaultNumResults: Int = 10
20 | 
21 |     /// The minimum similarity threshold.
22 |     public var minThreshold: Float?
23 | 
24 |     /// Weight for vector similarity in hybrid search (0.0-1.0)
25 |     /// BM25 weight will be (1-hybridWeight)
26 |     public var hybridWeight: Float = 0.5
27 | 
28 |     /// BM25 parameters
29 |     public var k1: Float = 1.2
30 |     public var b: Float = 0.75
31 | 
32 |     public init(
33 |       defaultNumResults: Int = 10,
34 |       minThreshold: Float? = nil,
35 |       hybridWeight: Float = 0.5,
36 |       k1: Float = 1.2,
37 |       b: Float = 0.75
38 |     ) {
39 |       self.defaultNumResults = defaultNumResults
40 |       self.minThreshold = minThreshold
41 |       self.hybridWeight = hybridWeight
42 |       self.k1 = k1
43 |       self.b = b
44 |     }
45 |   }
46 | 
47 |   /// Search configuration options.
48 |   public var searchOptions: SearchOptions
49 | 
50 |   public init(
51 |     name: String,
52 |     directoryURL: URL? = nil,
53 |     dimension: Int,
54 |     searchOptions: SearchOptions = SearchOptions()
55 |   ) {
56 |     self.name = name
57 |     self.directoryURL = directoryURL
58 |     self.dimension = dimension
59 |     self.searchOptions = searchOptions
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/Sources/VecturaKit/VecturaDocument.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | /// A document stored in the vector database.
 4 | public struct VecturaDocument: Identifiable, Codable, Sendable {
 5 |     /// The unique identifier of the document.
 6 |     public let id: UUID
 7 |     
 8 |     /// The text content of the document.
 9 |     public let text: String
10 |     
11 |     /// The vector embedding of the document.
12 |     public let embedding: [Float]
13 |     
14 |     /// The timestamp when the document was created.
15 |     public let createdAt: Date
16 | 
17 |     /// Creates a new document with the given properties.
18 |     /// - Parameters:
19 |     ///   - id: The unique identifier for the document. If nil, a new UUID will be generated.
20 |     ///   - text: The text content of the document.
21 |     ///   - embedding: The vector embedding of the document.
22 |     public init(id: UUID? = nil, text: String, embedding: [Float]) {
23 |         self.id = id ?? UUID()
24 |         self.text = text
25 |         self.embedding = embedding
26 |         self.createdAt = Date()
27 |     }
28 | 
29 |     // MARK: - Codable
30 |     enum CodingKeys: String, CodingKey {
31 |         case id, text, embedding, createdAt
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/Sources/VecturaKit/VecturaError.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | /// Errors that can occur when using VecturaKit.
 4 | public enum VecturaError: LocalizedError {
 5 |     /// Thrown when attempting to create a collection that already exists.
 6 |     case collectionAlreadyExists(String)
 7 | 
 8 |     /// Thrown when attempting to access a collection that doesn't exist.
 9 |     case collectionNotFound(String)
10 | 
11 |     /// Thrown when vector dimensions don't match.
12 |     case dimensionMismatch(expected: Int, got: Int)
13 | 
14 |     /// Thrown when loading collection data fails.
15 |     case loadFailed(String)
16 | 
17 |     /// Thrown when input validation fails.
18 |     case invalidInput(String)
19 | 
20 |     public var errorDescription: String? {
21 |         switch self {
22 |         case .collectionAlreadyExists(let name):
23 |             "A collection named '\(name)' already exists."
24 |         case .collectionNotFound(let name):
25 |             "Collection '\(name)' not found."
26 |         case .dimensionMismatch(let expected, let got):
27 |             "Vector dimension mismatch. Expected \(expected) but got \(got)."
28 |         case .loadFailed(let reason):
29 |             "Failed to load collection: \(reason)"
30 |         case .invalidInput(let reason):
31 |             "Invalid input: \(reason)"
32 |         }
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/Sources/VecturaKit/VecturaKit.swift:
--------------------------------------------------------------------------------
  1 | import Accelerate
  2 | import CoreML
  3 | import Embeddings
  4 | import Foundation
  5 | 
  6 | @available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, watchOS 11.0, *)
  7 | /// A vector database implementation that stores and searches documents using their vector embeddings.
  8 | public class VecturaKit: VecturaProtocol {
  9 | 
 10 |     /// The configuration for this vector database instance.
 11 |     private let config: VecturaConfig
 12 | 
 13 |     /// In-memory cache of all documents.
 14 |     private var documents: [UUID: VecturaDocument]
 15 | 
 16 |     /// The storage directory for documents.
 17 |     private let storageDirectory: URL
 18 | 
 19 |     /// The storage provider that handles document persistence.
 20 |     private let storageProvider: VecturaStorage
 21 | 
 22 |     /// Cached normalized embeddings for faster searches.
 23 |     private var normalizedEmbeddings: [UUID: [Float]] = [:]
 24 | 
 25 |     /// BM25 index for text search
 26 |     private var bm25Index: BM25Index?
 27 | 
 28 |     /// Swift-Embeddings model bundle that you can reuse (e.g. BERT, XLM-R, CLIP, etc.)
 29 |     private var bertModel: Bert.ModelBundle?
 30 | 
 31 |     // MARK: - Initialization
 32 | 
 33 |     public init(config: VecturaConfig) async throws {
 34 |         self.config = config
 35 |         self.documents = [:]
 36 | 
 37 |         if let customStorageDirectory = config.directoryURL {
 38 |             let databaseDirectory = customStorageDirectory.appending(path: config.name)
 39 |             if !FileManager.default.fileExists(atPath: databaseDirectory.path(percentEncoded: false)) {
 40 |                 try FileManager.default.createDirectory(
 41 |                     at: databaseDirectory, withIntermediateDirectories: true)
 42 |             }
 43 |             self.storageDirectory = databaseDirectory
 44 |         } else {
 45 |             // Create default storage directory
 46 |             self.storageDirectory = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
 47 |                 .first!
 48 |                 .appendingPathComponent("VecturaKit")
 49 |                 .appendingPathComponent(config.name)
 50 |         }
 51 | 
 52 |         try FileManager.default.createDirectory(at: storageDirectory, withIntermediateDirectories: true)
 53 | 
 54 |         // Instantiate the storage provider (currently the file-based implementation).
 55 |         self.storageProvider = try FileStorageProvider(storageDirectory: storageDirectory)
 56 | 
 57 |         // Load existing documents using the storage provider.
 58 |         let storedDocuments = try await storageProvider.loadDocuments()
 59 |         for doc in storedDocuments {
 60 |             self.documents[doc.id] = doc
 61 |             // Compute normalized embedding and store in cache.
 62 |             let norm = l2Norm(doc.embedding)
 63 |             var divisor = norm + 1e-9
 64 |             var normalized = [Float](repeating: 0, count: doc.embedding.count)
 65 |             vDSP_vsdiv(doc.embedding, 1, &divisor, &normalized, 1, vDSP_Length(doc.embedding.count))
 66 |             self.normalizedEmbeddings[doc.id] = normalized
 67 |         }
 68 |     }
 69 | 
 70 |     /// Adds multiple documents to the vector store in batch.
 71 |     public func addDocuments(
 72 |         texts: [String],
 73 |         ids: [UUID]? = nil,
 74 |         model: VecturaModelSource = .default
 75 |     ) async throws -> [UUID] {
 76 |         if let ids = ids, ids.count != texts.count {
 77 |             throw VecturaError.invalidInput("Number of IDs must match number of texts")
 78 |         }
 79 | 
 80 |         if bertModel == nil {
 81 |             bertModel = try await Bert.loadModelBundle(from: model)
 82 |         }
 83 | 
 84 |         guard let modelBundle = bertModel else {
 85 |             throw VecturaError.invalidInput("Failed to load BERT model: \(model)")
 86 |         }
 87 | 
 88 |         let embeddingsTensor = try modelBundle.batchEncode(texts)
 89 |         let shape = embeddingsTensor.shape
 90 | 
 91 |         if shape.count != 2 {
 92 |             throw VecturaError.invalidInput("Expected shape [N, D], got \(shape)")
 93 |         }
 94 | 
 95 |         if shape[1] != config.dimension {
 96 |             throw VecturaError.dimensionMismatch(
 97 |                 expected: config.dimension,
 98 |                 got: shape[1]
 99 |             )
100 |         }
101 | 
102 |         let embeddingShapedArray = await embeddingsTensor.cast(to: Float.self).shapedArray(
103 |             of: Float.self)
104 |         let allScalars = embeddingShapedArray.scalars
105 | 
106 |         var documentIds = [UUID]()
107 |         var documentsToSave = [VecturaDocument]()
108 | 
109 |         for i in 0..<texts.count {
110 |             let startIndex = i * config.dimension
111 |             let endIndex = startIndex + config.dimension
112 |             let embeddingRow = Array(allScalars[startIndex..<endIndex])
113 | 
114 |             let docId = ids?[i] ?? UUID()
115 |             let doc = VecturaDocument(
116 |                 id: docId,
117 |                 text: texts[i],
118 |                 embedding: embeddingRow
119 |             )
120 |             documentsToSave.append(doc)
121 |             documentIds.append(docId)
122 |         }
123 | 
124 |         for doc in documentsToSave {
125 |             let norm = l2Norm(doc.embedding)
126 |             var divisor = norm + 1e-9
127 |             var normalized = [Float](repeating: 0, count: doc.embedding.count)
128 |             vDSP_vsdiv(doc.embedding, 1, &divisor, &normalized, 1, vDSP_Length(doc.embedding.count))
129 |             normalizedEmbeddings[doc.id] = normalized
130 |             documents[doc.id] = doc
131 |         }
132 | 
133 |         let allDocs = Array(documents.values)
134 | 
135 |         bm25Index = BM25Index(
136 |             documents: allDocs,
137 |             k1: config.searchOptions.k1,
138 |             b: config.searchOptions.b
139 |         )
140 | 
141 |         try await withThrowingTaskGroup(of: Void.self) { group in
142 |             let directory = self.storageDirectory
143 | 
144 |             for doc in documentsToSave {
145 |                 group.addTask {
146 |                     let documentURL = directory.appendingPathComponent("\(doc.id).json")
147 |                     let encoder = JSONEncoder()
148 |                     encoder.outputFormatting = .prettyPrinted
149 | 
150 |                     let data = try encoder.encode(doc)
151 |                     try data.write(to: documentURL)
152 |                 }
153 |             }
154 | 
155 |             try await group.waitForAll()
156 |         }
157 | 
158 |         return documentIds
159 |     }
160 | 
161 |     public func search(
162 |         query queryEmbedding: [Float],
163 |         numResults: Int? = nil,
164 |         threshold: Float? = nil
165 |     ) async throws -> [VecturaSearchResult] {
166 |         if queryEmbedding.count != config.dimension {
167 |             throw VecturaError.dimensionMismatch(
168 |                 expected: config.dimension,
169 |                 got: queryEmbedding.count
170 |             )
171 |         }
172 | 
173 |         // Normalize the query vector
174 |         let norm = l2Norm(queryEmbedding)
175 |         var divisor = norm + 1e-9
176 |         var normalizedQuery = [Float](repeating: 0, count: queryEmbedding.count)
177 |         vDSP_vsdiv(queryEmbedding, 1, &divisor, &normalizedQuery, 1, vDSP_Length(queryEmbedding.count))
178 | 
179 |         // Build a matrix of normalized document embeddings in row-major order
180 |         var docIds = [UUID]()
181 |         var matrix = [Float]()
182 |         matrix.reserveCapacity(documents.count * config.dimension)  // Pre-allocate for better performance
183 | 
184 |         for doc in documents.values {
185 |             if let normalized = normalizedEmbeddings[doc.id] {
186 |                 docIds.append(doc.id)
187 |                 matrix.append(contentsOf: normalized)
188 |             }
189 |         }
190 | 
191 |         let docsCount = docIds.count
192 |         if docsCount == 0 {
193 |             return []
194 |         }
195 | 
196 |         let M = Int32(docsCount)  // number of rows (documents)
197 |         let N = Int32(config.dimension)  // number of columns (embedding dimension)
198 |         var similarities = [Float](repeating: 0, count: docsCount)
199 | 
200 |         // Convert Int32 to Int for LAPACK compatibility
201 |         let mInt = Int(M)  // Convert number of rows
202 |         let nInt = Int(N)  // Convert number of columns
203 |         let ldInt = Int(N) // Convert leading dimension
204 | 
205 |         // Compute all similarities at once using matrix-vector multiplication
206 |         // Matrix is in row-major order, so we use CblasNoTrans
207 |         cblas_sgemv(
208 |             CblasRowMajor,    // matrix layout
209 |             CblasNoTrans,     // no transpose needed for row-major
210 |             mInt,             // number of rows (documents) as Int
211 |             nInt,             // number of columns (dimension) as Int
212 |             1.0,              // alpha scaling factor
213 |             matrix,           // matrix
214 |             ldInt,            // leading dimension as Int
215 |             normalizedQuery,  // vector
216 |             1,                // vector increment
217 |             0.0,              // beta scaling factor
218 |             &similarities,    // result vector
219 |             1                 // result increment
220 |         )
221 | 
222 |         // Construct the results
223 |         var results = [VecturaSearchResult]()
224 |         results.reserveCapacity(docsCount)  // Pre-allocate for better performance
225 | 
226 |         for (i, similarity) in similarities.enumerated() {
227 |             if let minT = threshold ?? config.searchOptions.minThreshold, similarity < minT {
228 |                 continue
229 |             }
230 |             if let doc = documents[docIds[i]] {
231 |                 results.append(
232 |                     VecturaSearchResult(
233 |                         id: doc.id,
234 |                         text: doc.text,
235 |                         score: similarity,
236 |                         createdAt: doc.createdAt
237 |                     )
238 |                 )
239 |             }
240 |         }
241 | 
242 |         results.sort { $0.score > $1.score }
243 | 
244 |         let limit = numResults ?? config.searchOptions.defaultNumResults
245 |         return Array(results.prefix(limit))
246 |     }
247 | 
248 |     public func search(
249 |         query: String,
250 |         numResults: Int? = nil,
251 |         threshold: Float? = nil,
252 |         model: VecturaModelSource = .default
253 |     ) async throws -> [VecturaSearchResult] {
254 |         if bertModel == nil {
255 |             bertModel = try await Bert.loadModelBundle(from: model)
256 |         }
257 | 
258 |         guard let modelBundle = bertModel else {
259 |             throw VecturaError.invalidInput("Failed to load BERT model: \(model)")
260 |         }
261 | 
262 |         // Initialize BM25 index if needed
263 |         if bm25Index == nil {
264 |             let docs = documents.values.map { $0 }
265 |             bm25Index = BM25Index(
266 |                 documents: docs,
267 |                 k1: config.searchOptions.k1,
268 |                 b: config.searchOptions.b
269 |             )
270 |         }
271 | 
272 |         // Get vector similarity results
273 |         let queryEmbeddingTensor = try modelBundle.encode(query)
274 |         let queryEmbeddingFloatArray = await tensorToArray(queryEmbeddingTensor)
275 |         let vectorResults = try await search(
276 |             query: queryEmbeddingFloatArray,
277 |             numResults: nil,
278 |             threshold: nil
279 |         )
280 | 
281 |         let bm25Results =
282 |         bm25Index?.search(
283 |             query: query,
284 |             topK: documents.count
285 |         ) ?? []
286 | 
287 |         // Create a map of document IDs to their BM25 scores
288 |         let bm25Scores = Dictionary(
289 |             bm25Results.map { ($0.document.id, $0.score) },
290 |             uniquingKeysWith: { first, _ in first }
291 |         )
292 | 
293 |         // Combine scores using hybrid scoring
294 |         var hybridResults = vectorResults.map { result in
295 |             let bm25Score = bm25Scores[result.id] ?? 0
296 |             let hybridScore = VecturaDocument(
297 |                 id: result.id,
298 |                 text: result.text,
299 |                 embedding: []
300 |             ).hybridScore(
301 |                 vectorScore: result.score,
302 |                 bm25Score: bm25Score,
303 |                 weight: config.searchOptions.hybridWeight
304 |             )
305 | 
306 |             return VecturaSearchResult(
307 |                 id: result.id,
308 |                 text: result.text,
309 |                 score: hybridScore,
310 |                 createdAt: result.createdAt
311 |             )
312 |         }
313 | 
314 |         hybridResults.sort { $0.score > $1.score }
315 | 
316 |         if let threshold = threshold ?? config.searchOptions.minThreshold {
317 |             hybridResults = hybridResults.filter { $0.score >= threshold }
318 |         }
319 | 
320 |         let limit = numResults ?? config.searchOptions.defaultNumResults
321 |         return Array(hybridResults.prefix(limit))
322 |     }
323 | 
324 |     @_disfavoredOverload
325 |     public func search(
326 |         query: String,
327 |         numResults: Int? = nil,
328 |         threshold: Float? = nil,
329 |         modelId: String = VecturaModelSource.defaultModelId
330 |     ) async throws -> [VecturaSearchResult] {
331 |         try await search(
332 |             query: query, numResults: numResults, threshold: threshold, model: .id(modelId))
333 |     }
334 | 
335 |     public func reset() async throws {
336 |         documents.removeAll()
337 |         normalizedEmbeddings.removeAll()
338 | 
339 |         let files = try FileManager.default.contentsOfDirectory(
340 |             at: storageDirectory, includingPropertiesForKeys: nil)
341 |         for fileURL in files {
342 |             try FileManager.default.removeItem(at: fileURL)
343 |         }
344 |     }
345 | 
346 |     public func deleteDocuments(ids: [UUID]) async throws {
347 |         if bm25Index != nil {
348 |             let remainingDocs = documents.values.filter { !ids.contains($0.id) }
349 |             bm25Index = BM25Index(
350 |                 documents: Array(remainingDocs),
351 |                 k1: config.searchOptions.k1,
352 |                 b: config.searchOptions.b
353 |             )
354 |         }
355 | 
356 |         for id in ids {
357 |             documents[id] = nil
358 |             normalizedEmbeddings[id] = nil
359 | 
360 |             let documentURL = storageDirectory.appendingPathComponent("\(id).json")
361 |             try FileManager.default.removeItem(at: documentURL)
362 |         }
363 |     }
364 | 
365 |     public func updateDocument(
366 |         id: UUID,
367 |         newText: String,
368 |         model: VecturaModelSource = .default
369 |     ) async throws {
370 |         try await deleteDocuments(ids: [id])
371 | 
372 |         _ = try await addDocument(text: newText, id: id, model: model)
373 |     }
374 | 
375 |     @_disfavoredOverload
376 |     public func updateDocument(
377 |         id: UUID,
378 |         newText: String,
379 |         modelId: String = VecturaModelSource.defaultModelId
380 |     ) async throws {
381 |         try await updateDocument(id: id, newText: newText, model: .id(modelId))
382 |     }
383 | 
384 |     // MARK: - Private
385 | 
386 |     private func tensorToArray(_ tensor: MLTensor) async -> [Float] {
387 |         let shaped = await tensor.cast(to: Float.self).shapedArray(of: Float.self)
388 |         return shaped.scalars
389 |     }
390 | 
391 |     private func dotProduct(_ a: [Float], _ b: [Float]) -> Float {
392 |         var result: Float = 0
393 |         vDSP_dotpr(a, 1, b, 1, &result, vDSP_Length(a.count))
394 |         return result
395 |     }
396 | 
397 |     private func l2Norm(_ v: [Float]) -> Float {
398 |         var sumSquares: Float = 0
399 |         vDSP_svesq(v, 1, &sumSquares, vDSP_Length(v.count))
400 |         return sqrt(sumSquares)
401 |     }
402 | }
403 | 
404 | @available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, watchOS 11.0, *)
405 | extension Bert {
406 |     static func loadModelBundle(from source: VecturaModelSource) async throws -> Bert.ModelBundle {
407 |         switch source {
408 |         case .id(let modelId):
409 |             try await loadModelBundle(from: modelId)
410 |         case .folder(let url):
411 |             try await loadModelBundle(from: url)
412 |         }
413 |     }
414 | }
415 | 


--------------------------------------------------------------------------------
/Sources/VecturaKit/VecturaModelSource.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | /// Specifies where to obtain the resources for an embedding model.
 4 | public enum VecturaModelSource: Sendable, CustomStringConvertible {
 5 |     /// Automatically fetch the model from a remote repository based on its id.
 6 |     case id(_ id: String)
 7 |     /// Load a local model from the specified directory URL.
 8 |     case folder(_ url: URL)
 9 | }
10 | 
11 | public extension VecturaModelSource {
12 |     /// The default model identifier when not otherwise specified.
13 |     static let defaultModelId: String = "sentence-transformers/all-MiniLM-L6-v2"
14 | 
15 |     /// The default model when not otherwise specified.
16 |     static let `default` = VecturaModelSource.id(VecturaModelSource.defaultModelId)
17 | }
18 | 
19 | public extension VecturaModelSource {
20 |     var description: String {
21 |         switch self {
22 |         case .id(let id): id
23 |         case .folder(let url): url.path(percentEncoded: false)
24 |         }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/Sources/VecturaKit/VecturaProtocol.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | /// A protocol defining the requirements for a vector database instance.
 4 | public protocol VecturaProtocol {
 5 | 
 6 |     /// Adds multiple documents to the vector store in batch.
 7 |     ///
 8 |     /// - Parameters:
 9 |     ///   - texts: The text contents of the documents.
10 |     ///   - ids: Optional unique identifiers for the documents.
11 |     ///   - model: A ``VecturaModelSource`` specifying how to load the model.
12 |     ///              (e.g.,`.id("sentence-transformers/all-MiniLM-L6-v2")`).
13 |     /// - Returns: The IDs of the added documents.
14 |     func addDocuments(
15 |         texts: [String],
16 |         ids: [UUID]?,
17 |         model: VecturaModelSource
18 |     ) async throws -> [UUID]
19 | 
20 |     /// Searches for similar documents using a *pre-computed query embedding*.
21 |     ///
22 |     /// - Parameters:
23 |     ///   - query: The query vector to search with.
24 |     ///   - numResults: Maximum number of results to return.
25 |     ///   - threshold: Minimum similarity threshold.
26 |     /// - Returns: An array of search results ordered by similarity.
27 |     func search(
28 |         query: [Float],
29 |         numResults: Int?,
30 |         threshold: Float?
31 |     ) async throws -> [VecturaSearchResult]
32 | 
33 |     /// Removes all documents from the vector store.
34 |     func reset() async throws
35 | }
36 | 
37 | // MARK: - Default Implementations
38 | 
39 | public extension VecturaProtocol {
40 | 
41 |     /// Adds a document to the vector store by embedding text.
42 |     ///
43 |     /// - Parameters:
44 |     ///   - text: The text content of the document.
45 |     ///   - id: Optional unique identifier for the document.
46 |     ///   - model: A ``VecturaModelSource`` specifying how to load the model.
47 |     ///              (e.g.,`.id("sentence-transformers/all-MiniLM-L6-v2")`).
48 |     /// - Returns: The ID of the added document.
49 |     func addDocument(
50 |         text: String,
51 |         id: UUID? = nil,
52 |         model: VecturaModelSource = .default
53 |     ) async throws -> UUID {
54 |         let ids = try await addDocuments(
55 |             texts: [text],
56 |             ids: id.map { [$0] },
57 |             model: model
58 |         )
59 |         return ids[0]
60 |     }
61 | 
62 |     /// Adds a document to the vector store by embedding text.
63 |     ///
64 |     /// - Parameters:
65 |     ///   - text: The text content of the document.
66 |     ///   - id: Optional unique identifier for the document.
67 |     ///   - modelId: Identifier of the model to use for generating the embedding
68 |     ///              (e.g., "sentence-transformers/all-MiniLM-L6-v2").
69 |     /// - Returns: The ID of the added document.
70 |     @_disfavoredOverload
71 |     func addDocument(
72 |         text: String,
73 |         id: UUID?,
74 |         modelId: String = VecturaModelSource.defaultModelId
75 |     ) async throws -> UUID {
76 |         try await addDocument(text: text, id: id, model: .id(modelId))
77 |     }
78 | 
79 |     /// Adds multiple documents to the vector store in batch.
80 |     ///
81 |     /// - Parameters:
82 |     ///   - texts: The text contents of the documents.
83 |     ///   - ids: Optional unique identifiers for the documents.
84 |     ///   - modelId: Identifier of the model to use for generating the embedding
85 |     ///              (e.g.,`.id("sentence-transformers/all-MiniLM-L6-v2")`).
86 |     /// - Returns: The IDs of the added documents.
87 |     func addDocuments(
88 |         texts: [String],
89 |         ids: [UUID]? = nil,
90 |         modelId: String = VecturaModelSource.defaultModelId
91 |     ) async throws -> [UUID] {
92 |         try await addDocuments(texts: texts, ids: ids, model: .id(modelId))
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/Sources/VecturaKit/VecturaSearchResult.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | /// Represents a search result from the vector database.
 4 | public struct VecturaSearchResult: Identifiable, Sendable {
 5 | 
 6 |     /// The unique identifier of the matching document.
 7 |     public let id: UUID
 8 |     
 9 |     /// The text content of the matching document.
10 |     public let text: String
11 |     
12 |     /// The similarity score between the query and the document.
13 |     public let score: Float
14 |     
15 |     /// The timestamp when the document was created.
16 |     public let createdAt: Date
17 |     
18 |     /// Creates a new search result with the given properties.
19 |     ///
20 |     /// - Parameters:
21 |     ///   - id: The unique identifier of the matching document.
22 |     ///   - text: The text content of the matching document.
23 |     ///   - score: The similarity score between the query and the document.
24 |     ///   - createdAt: The timestamp when the document was created.
25 |     public init(id: UUID, text: String, score: Float, createdAt: Date) {
26 |         self.id = id
27 |         self.text = text
28 |         self.score = score
29 |         self.createdAt = createdAt
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/Sources/VecturaKit/VecturaStorage.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | /// VecturaStorage protocol abstracts the persistence layer for VecturaDocuments.
 4 | ///
 5 | /// It allows for multiple underlying storage implementations (e.g., File-based or SQLite)
 6 | /// without changing the higher-level API used in VecturaKit.
 7 | public protocol VecturaStorage {
 8 |     /// Prepares or creates the storage location for documents if needed.
 9 |     func createStorageDirectoryIfNeeded() async throws
10 |     
11 |     /// Loads the persisted documents.
12 |     ///
13 |     /// - Returns: An array of VecturaDocument.
14 |     func loadDocuments() async throws -> [VecturaDocument]
15 |     
16 |     /// Saves a document.
17 |     ///
18 |     /// - Parameter document: The document to save.
19 |     func saveDocument(_ document: VecturaDocument) async throws
20 |     
21 |     /// Deletes a document by its unique identifier.
22 |     ///
23 |     /// - Parameter id: The identifier of the document to be deleted.
24 |     func deleteDocument(withID id: UUID) async throws
25 |     
26 |     /// Updates an existing document. The document is replaced or modified as needed.
27 |     ///
28 |     /// - Parameter document: The updated document.
29 |     func updateDocument(_ document: VecturaDocument) async throws
30 | }
31 | 


--------------------------------------------------------------------------------
/Sources/VecturaMLXCLI/VecturaMLXCLI.swift:
--------------------------------------------------------------------------------
  1 | import ArgumentParser
  2 | import Foundation
  3 | import MLXEmbedders
  4 | import VecturaKit
  5 | import VecturaMLXKit
  6 | 
  7 | @available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, watchOS 10.0, *)
  8 | @main
  9 | struct VecturaMLXCLI: AsyncParsableCommand {
 10 |     struct DocumentID: ExpressibleByArgument, Decodable {
 11 |         let uuid: UUID
 12 |         
 13 |         init(_ uuid: UUID) {
 14 |             self.uuid = uuid
 15 |         }
 16 |         
 17 |         init?(argument: String) {
 18 |             guard let uuid = UUID(uuidString: argument) else { return nil }
 19 |             self.uuid = uuid
 20 |         }
 21 |     }
 22 |     
 23 |     static let configuration = CommandConfiguration(
 24 |         commandName: "vectura-mlx",
 25 |         abstract: "A CLI tool for VecturaMLXKit vector database using MLX",
 26 |         subcommands: [Add.self, Search.self, Update.self, Delete.self, Reset.self, Mock.self]
 27 |     )
 28 |     
 29 |     static func setupDB(
 30 |         dbName: String, modelConfiguration: MLXEmbedders.ModelConfiguration = .nomic_text_v1_5
 31 |     )
 32 |     async throws
 33 |     -> VecturaMLXKit
 34 |     {
 35 |         let config = VecturaConfig(
 36 |             name: dbName,
 37 |             dimension: 768  // nomic_text_v1_5 model outputs 768-dimensional embeddings
 38 |         )
 39 |         return try await VecturaMLXKit(config: config, modelConfiguration: modelConfiguration)
 40 |     }
 41 | }
 42 | 
 43 | @available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, watchOS 10.0, *)
 44 | extension VecturaMLXCLI {
 45 |     struct Mock: AsyncParsableCommand {
 46 |         static let configuration = CommandConfiguration(
 47 |             abstract: "Run a mock demonstration with sample data"
 48 |         )
 49 |         
 50 |         @Option(name: [.long, .customShort("d")], help: "Database name")
 51 |         var dbName: String = "vectura-mlx-cli-db"
 52 |         
 53 |         mutating func run() async throws {
 54 |             print("Starting mock command...")
 55 |             
 56 |             print("Setting up database...")
 57 |             let db = try await VecturaMLXCLI.setupDB(dbName: dbName)
 58 |             print("Database setup complete")
 59 |             
 60 |             // First, reset the database
 61 |             print("\n🧹 Resetting database...")
 62 |             try await db.reset()
 63 |             print("Reset complete")
 64 |             
 65 |             // Add sample documents
 66 |             print("\n📝 Adding sample documents...")
 67 |             let sampleTexts = [
 68 |                 "The quick brown fox jumps over the lazy dog",
 69 |                 "To be or not to be, that is the question",
 70 |                 "All that glitters is not gold",
 71 |                 "A journey of a thousand miles begins with a single step",
 72 |                 "Where there's smoke, there's fire",
 73 |             ]
 74 |             
 75 |             let ids = try await db.addDocuments(texts: sampleTexts)
 76 |             print("Added \(ids.count) documents:")
 77 |             for (id, text) in zip(ids, sampleTexts) {
 78 |                 print("ID: \(id)")
 79 |                 print("Text: \(text)")
 80 |                 print("---")
 81 |             }
 82 |             
 83 |             // Search for documents
 84 |             print("\n🔍 Searching for 'journey'...")
 85 |             let results = try await db.search(query: "journey")
 86 |             
 87 |             print("Found \(results.count) results:")
 88 |             for result in results {
 89 |                 print("ID: \(result.id)")
 90 |                 print("Text: \(result.text)")
 91 |                 print("Score: \(result.score)")
 92 |                 print("Created: \(result.createdAt)")
 93 |                 print("---")
 94 |             }
 95 |             
 96 |             // Update a document
 97 |             if let firstId = ids.first {
 98 |                 print("\n✏️ Updating first document...")
 99 |                 let newText = "The quick red fox jumps over the sleeping dog"
100 |                 try await db.updateDocument(id: firstId, newText: newText)
101 |                 print("Updated document \(firstId) with new text: \(newText)")
102 |             }
103 |             
104 |             // Delete last document
105 |             if let lastId = ids.last {
106 |                 print("\n🗑️ Deleting last document...")
107 |                 try await db.deleteDocuments(ids: [lastId])
108 |                 print("Deleted document \(lastId)")
109 |             }
110 |             
111 |             print("\n✨ Mock demonstration completed!")
112 |         }
113 |     }
114 |     
115 |     struct Add: AsyncParsableCommand {
116 |         static let configuration = CommandConfiguration(
117 |             abstract: "Add documents to the vector database"
118 |         )
119 |         
120 |         @Option(name: [.long, .customShort("d")], help: "Database name")
121 |         var dbName: String = "vectura-mlx-cli-db"
122 |         
123 |         @Argument(help: "Text content to add")
124 |         var text: [String]
125 |         
126 |         mutating func run() async throws {
127 |             let db = try await VecturaMLXCLI.setupDB(dbName: dbName)
128 |             let ids = try await db.addDocuments(texts: text)
129 |             print("Added \(ids.count) documents:")
130 |             for (id, text) in zip(ids, text) {
131 |                 print("ID: \(id)")
132 |                 print("Text: \(text)")
133 |                 print("---")
134 |             }
135 |         }
136 |     }
137 |     
138 |     struct Search: AsyncParsableCommand {
139 |         static let configuration = CommandConfiguration(
140 |             abstract: "Search documents in the vector database"
141 |         )
142 |         
143 |         @Option(name: [.long, .customShort("d")], help: "Database name")
144 |         var dbName: String = "vectura-mlx-cli-db"
145 |         
146 |         @Option(name: [.long, .customShort("t")], help: "Minimum similarity threshold")
147 |         var threshold: Float?
148 |         
149 |         @Option(name: [.long, .customShort("n")], help: "Number of results to return")
150 |         var numResults: Int?
151 |         
152 |         @Argument(help: "Search query")
153 |         var query: String
154 |         
155 |         mutating func run() async throws {
156 |             guard !query.isEmpty else {
157 |                 print("Error: Query cannot be empty.")
158 |                 throw ExitCode.failure
159 |             }
160 |             
161 |             let db = try await VecturaMLXCLI.setupDB(dbName: dbName)
162 |             let results = try await db.search(
163 |                 query: query,
164 |                 numResults: numResults,
165 |                 threshold: threshold
166 |             )
167 |             
168 |             print("Found \(results.count) results:")
169 |             for result in results {
170 |                 print("ID: \(result.id)")
171 |                 print("Text: \(result.text)")
172 |                 print("Score: \(result.score)")
173 |                 print("Created: \(result.createdAt)")
174 |                 print("---")
175 |             }
176 |         }
177 |     }
178 |     
179 |     struct Update: AsyncParsableCommand, Decodable {
180 |         static let configuration = CommandConfiguration(
181 |             abstract: "Update a document in the vector database"
182 |         )
183 |         
184 |         @Option(name: [.long, .customShort("d")], help: "Database name")
185 |         var dbName: String = "vectura-mlx-cli-db"
186 |         
187 |         @Argument(help: "Document ID to update")
188 |         var id: DocumentID
189 |         
190 |         @Argument(help: "New text content")
191 |         var newText: String
192 |         
193 |         mutating func run() async throws {
194 |             let db = try await VecturaMLXCLI.setupDB(dbName: dbName)
195 |             try await db.updateDocument(id: id.uuid, newText: newText)
196 |             print("Updated document \(id.uuid) with new text: \(newText)")
197 |         }
198 |     }
199 |     
200 |     struct Delete: AsyncParsableCommand, Decodable {
201 |         static let configuration = CommandConfiguration(
202 |             abstract: "Delete documents from the vector database"
203 |         )
204 |         
205 |         @Option(name: [.long, .customShort("d")], help: "Database name")
206 |         var dbName: String = "vectura-mlx-cli-db"
207 |         
208 |         @Argument(help: "Document IDs to delete")
209 |         var ids: [DocumentID]
210 |         
211 |         mutating func run() async throws {
212 |             let db = try await VecturaMLXCLI.setupDB(dbName: dbName)
213 |             try await db.deleteDocuments(ids: ids.map(\.uuid))
214 |             print("Deleted \(ids.count) documents")
215 |         }
216 |     }
217 |     
218 |     struct Reset: AsyncParsableCommand {
219 |         static let configuration = CommandConfiguration(
220 |             abstract: "Reset the vector database"
221 |         )
222 |         
223 |         @Option(name: [.long, .customShort("d")], help: "Database name")
224 |         var dbName: String = "vectura-mlx-cli-db"
225 |         
226 |         mutating func run() async throws {
227 |             let db = try await VecturaMLXCLI.setupDB(dbName: dbName)
228 |             try await db.reset()
229 |             print("Database reset successfully")
230 |         }
231 |     }
232 | }
233 | 


--------------------------------------------------------------------------------
/Sources/VecturaMLXKit/MLXEmbedder.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import MLX
 3 | import MLXEmbedders
 4 | import VecturaKit
 5 | 
 6 | @available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, watchOS 10.0, *)
 7 | public class MLXEmbedder {
 8 |   private let modelContainer: ModelContainer
 9 |   private let configuration: ModelConfiguration
10 | 
11 |   public init(configuration: ModelConfiguration = .nomic_text_v1_5) async throws {
12 |     self.configuration = configuration
13 |     self.modelContainer = try await MLXEmbedders.loadModelContainer(configuration: configuration)
14 |   }
15 | 
16 |   public func embed(texts: [String]) async -> [[Float]] {
17 |     await modelContainer.perform { (model: EmbeddingModel, tokenizer, pooling) -> [[Float]] in
18 |       let inputs = texts.map {
19 |         tokenizer.encode(text: $0, addSpecialTokens: true)
20 |       }
21 | 
22 |       // Pad to longest
23 |       let maxLength = inputs.reduce(into: 16) { acc, elem in
24 |         acc = max(acc, elem.count)
25 |       }
26 | 
27 |       let padded = stacked(
28 |         inputs.map { elem in
29 |           MLXArray(
30 |             elem
31 |               + Array(
32 |                 repeating: tokenizer.eosTokenId ?? 0,
33 |                 count: maxLength - elem.count))
34 |         })
35 | 
36 |       let mask = (padded .!= tokenizer.eosTokenId ?? 0)
37 |       let tokenTypes = MLXArray.zeros(like: padded)
38 | 
39 |       let result = pooling(
40 |         model(padded, positionIds: nil, tokenTypeIds: tokenTypes, attentionMask: mask),
41 |         normalize: true, applyLayerNorm: true
42 |       )
43 | 
44 |       return result.map { $0.asArray(Float.self) }
45 |     }
46 |   }
47 | 
48 |   public func embed(text: String) async throws -> [Float] {
49 |     let embeddings = await embed(texts: [text])
50 |     return embeddings[0]
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/Sources/VecturaMLXKit/VecturaMLXKit.swift:
--------------------------------------------------------------------------------
  1 | import Accelerate
  2 | import Foundation
  3 | import MLXEmbedders
  4 | import VecturaKit
  5 | 
  6 | @available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, watchOS 10.0, *)
  7 | public class VecturaMLXKit {
  8 |     private let config: VecturaConfig
  9 |     private let embedder: MLXEmbedder
 10 |     private var documents: [UUID: VecturaDocument] = [:]
 11 |     private var normalizedEmbeddings: [UUID: [Float]] = [:]
 12 |     private let storageDirectory: URL
 13 |     
 14 |     public init(config: VecturaConfig, modelConfiguration: ModelConfiguration = .nomic_text_v1_5)
 15 |     async throws
 16 |     {
 17 |         self.config = config
 18 |         self.embedder = try await MLXEmbedder(configuration: modelConfiguration)
 19 |         
 20 |         if let customStorageDirectory = config.directoryURL {
 21 |             let databaseDirectory = customStorageDirectory.appending(path: config.name)
 22 |             
 23 |             if !FileManager.default.fileExists(atPath: databaseDirectory.path(percentEncoded: false)) {
 24 |                 try FileManager.default.createDirectory(
 25 |                     at: databaseDirectory, withIntermediateDirectories: true)
 26 |             }
 27 |             
 28 |             self.storageDirectory = databaseDirectory
 29 |         } else {
 30 |             // Create default storage directory
 31 |             self.storageDirectory = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
 32 |                 .first!
 33 |                 .appendingPathComponent("VecturaKit")
 34 |                 .appendingPathComponent(config.name)
 35 |         }
 36 |         
 37 |         try FileManager.default.createDirectory(at: storageDirectory, withIntermediateDirectories: true)
 38 |         
 39 |         // Attempt to load existing docs
 40 |         try loadDocuments()
 41 |     }
 42 |     
 43 |     public func addDocuments(texts: [String], ids: [UUID]? = nil) async throws -> [UUID] {
 44 |         if let ids = ids, ids.count != texts.count {
 45 |             throw VecturaError.invalidInput("Number of IDs must match number of texts")
 46 |         }
 47 |         
 48 |         let embeddings = await embedder.embed(texts: texts)
 49 |         var documentIds = [UUID]()
 50 |         var documentsToSave = [VecturaDocument]()
 51 |         
 52 |         for (index, text) in texts.enumerated() {
 53 |             let docId = ids?[index] ?? UUID()
 54 |             let doc = VecturaDocument(id: docId, text: text, embedding: embeddings[index])
 55 |             
 56 |             // Normalize embedding for cosine similarity
 57 |             let norm = l2Norm(doc.embedding)
 58 |             var divisor = norm + 1e-9
 59 |             var normalized = [Float](repeating: 0, count: doc.embedding.count)
 60 |             vDSP_vsdiv(doc.embedding, 1, &divisor, &normalized, 1, vDSP_Length(doc.embedding.count))
 61 |             
 62 |             normalizedEmbeddings[doc.id] = normalized
 63 |             documents[doc.id] = doc
 64 |             documentIds.append(docId)
 65 |             documentsToSave.append(doc)
 66 |         }
 67 |         
 68 |         try await withThrowingTaskGroup(of: Void.self) { group in
 69 |             let directory = self.storageDirectory
 70 |             
 71 |             for doc in documentsToSave {
 72 |                 group.addTask {
 73 |                     let documentURL = directory.appendingPathComponent("\(doc.id).json")
 74 |                     let encoder = JSONEncoder()
 75 |                     encoder.outputFormatting = .prettyPrinted
 76 |                     
 77 |                     let data = try encoder.encode(doc)
 78 |                     try data.write(to: documentURL)
 79 |                 }
 80 |             }
 81 |             
 82 |             try await group.waitForAll()
 83 |         }
 84 |         
 85 |         return documentIds
 86 |     }
 87 |     
 88 |     public func search(query: String, numResults: Int? = nil, threshold: Float? = nil) async throws
 89 |     -> [VecturaSearchResult]
 90 |     {
 91 |         guard !query.isEmpty else {
 92 |             throw VecturaError.invalidInput("Query cannot be empty")
 93 |         }
 94 |         
 95 |         let queryEmbedding = try await embedder.embed(text: query)
 96 |         
 97 |         let norm = l2Norm(queryEmbedding)
 98 |         var divisorQuery = norm + 1e-9
 99 |         var normalizedQuery = [Float](repeating: 0, count: queryEmbedding.count)
100 |         vDSP_vsdiv(
101 |             queryEmbedding, 1, &divisorQuery, &normalizedQuery, 1, vDSP_Length(queryEmbedding.count))
102 |         
103 |         var results: [VecturaSearchResult] = []
104 |         
105 |         for doc in documents.values {
106 |             guard let normDoc = normalizedEmbeddings[doc.id] else { continue }
107 |             let similarity = dotProduct(normalizedQuery, normDoc)
108 |             
109 |             if let minT = threshold ?? config.searchOptions.minThreshold, similarity < minT {
110 |                 continue
111 |             }
112 |             
113 |             results.append(
114 |                 VecturaSearchResult(
115 |                     id: doc.id,
116 |                     text: doc.text,
117 |                     score: similarity,
118 |                     createdAt: doc.createdAt
119 |                 )
120 |             )
121 |         }
122 |         
123 |         results.sort { $0.score > $1.score }
124 |         
125 |         let limit = numResults ?? config.searchOptions.defaultNumResults
126 |         return Array(results.prefix(limit))
127 |     }
128 |     
129 |     public func deleteDocuments(ids: [UUID]) async throws {
130 |         for id in ids {
131 |             documents[id] = nil
132 |             normalizedEmbeddings[id] = nil
133 |             
134 |             let documentURL = storageDirectory.appendingPathComponent("\(id).json")
135 |             try FileManager.default.removeItem(at: documentURL)
136 |         }
137 |     }
138 |     
139 |     public func updateDocument(id: UUID, newText: String) async throws {
140 |         try await deleteDocuments(ids: [id])
141 |         _ = try await addDocuments(texts: [newText], ids: [id])
142 |     }
143 |     
144 |     public func reset() async throws {
145 |         documents.removeAll()
146 |         normalizedEmbeddings.removeAll()
147 |         
148 |         let files = try FileManager.default.contentsOfDirectory(
149 |             at: storageDirectory, includingPropertiesForKeys: nil)
150 |         for fileURL in files {
151 |             try FileManager.default.removeItem(at: fileURL)
152 |         }
153 |     }
154 |     
155 |     // MARK: - Private
156 |     
157 |     private func loadDocuments() throws {
158 |         let fileURLs = try FileManager.default.contentsOfDirectory(
159 |             at: storageDirectory, includingPropertiesForKeys: nil)
160 |         
161 |         let decoder = JSONDecoder()
162 |         var loadErrors: [String] = []
163 |         
164 |         for fileURL in fileURLs where fileURL.pathExtension == "json" {
165 |             do {
166 |                 let data = try Data(contentsOf: fileURL)
167 |                 let doc = try decoder.decode(VecturaDocument.self, from: data)
168 |                 
169 |                 // Rebuild normalized embeddings
170 |                 let norm = l2Norm(doc.embedding)
171 |                 var divisor = norm + 1e-9
172 |                 var normalized = [Float](repeating: 0, count: doc.embedding.count)
173 |                 vDSP_vsdiv(doc.embedding, 1, &divisor, &normalized, 1, vDSP_Length(doc.embedding.count))
174 |                 normalizedEmbeddings[doc.id] = normalized
175 |                 documents[doc.id] = doc
176 |             } catch {
177 |                 loadErrors.append(
178 |                     "Failed to load \(fileURL.lastPathComponent): \(error.localizedDescription)")
179 |             }
180 |         }
181 |         
182 |         if !loadErrors.isEmpty {
183 |             throw VecturaError.loadFailed(loadErrors.joined(separator: "\n"))
184 |         }
185 |     }
186 |     
187 |     private func dotProduct(_ a: [Float], _ b: [Float]) -> Float {
188 |         var result: Float = 0
189 |         vDSP_dotpr(a, 1, b, 1, &result, vDSP_Length(a.count))
190 |         return result
191 |     }
192 |     
193 |     private func l2Norm(_ v: [Float]) -> Float {
194 |         var sumSquares: Float = 0
195 |         vDSP_svesq(v, 1, &sumSquares, vDSP_Length(v.count))
196 |         return sqrt(sumSquares)
197 |     }
198 | }
199 | 


--------------------------------------------------------------------------------
/Tests/VecturaKitTests/VecturaKitTests.swift:
--------------------------------------------------------------------------------
  1 | import XCTest
  2 | 
  3 | @testable import VecturaKit
  4 | import Embeddings
  5 | 
  6 | @available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, watchOS 11.0, *)
  7 | final class VecturaKitTests: XCTestCase {
  8 |     var vectura: VecturaKit!
  9 |     var config: VecturaConfig!
 10 |     
 11 |     override func setUp() async throws {
 12 |         config = VecturaConfig(name: "test-db", dimension: 384)
 13 |         vectura = try VecturaKit(config: config)
 14 |     }
 15 |     
 16 |     override func tearDown() async throws {
 17 |         try await vectura.reset()
 18 |         vectura = nil
 19 |     }
 20 |     
 21 |     func testAddAndSearchDocument() async throws {
 22 |         let text = "This is a test document"
 23 |         let id = try await vectura.addDocument(text: text)
 24 |         
 25 |         let results = try await vectura.search(query: "test document")
 26 |         XCTAssertEqual(results.count, 1)
 27 |         XCTAssertEqual(results[0].id, id)
 28 |         XCTAssertEqual(results[0].text, text)
 29 |     }
 30 |     
 31 |     func testAddMultipleDocuments() async throws {
 32 |         let documents = [
 33 |             "The quick brown fox jumps over the lazy dog",
 34 |             "Pack my box with five dozen liquor jugs",
 35 |             "How vexingly quick daft zebras jump",
 36 |         ]
 37 |         
 38 |         let ids = try await vectura.addDocuments(texts: documents)
 39 |         XCTAssertEqual(ids.count, 3)
 40 |         
 41 |         let results = try await vectura.search(query: "quick jumping animals")
 42 |         XCTAssertGreaterThanOrEqual(results.count, 2)
 43 |         XCTAssertTrue(results[0].score > results[1].score)
 44 |     }
 45 |     
 46 |     func testPersistence() async throws {
 47 |         // Add documents
 48 |         let texts = ["Document 1", "Document 2"]
 49 |         let ids = try await vectura.addDocuments(texts: texts)
 50 |         
 51 |         // Create new instance with same config
 52 |         let config = VecturaConfig(name: "test-db", dimension: 384)
 53 |         let newVectura = try VecturaKit(config: config)
 54 |         
 55 |         // Search should work with new instance
 56 |         let results = try await newVectura.search(query: "Document")
 57 |         XCTAssertEqual(results.count, 2)
 58 |         XCTAssertTrue(ids.contains(results[0].id))
 59 |         XCTAssertTrue(ids.contains(results[1].id))
 60 |     }
 61 |     
 62 |     func testSearchThreshold() async throws {
 63 |         let documents = [
 64 |             "Very relevant document about cats",
 65 |             "Somewhat relevant about pets",
 66 |             "Completely irrelevant about weather",
 67 |         ]
 68 |         _ = try await vectura.addDocuments(texts: documents)
 69 |         
 70 |         // With high threshold, should get fewer results
 71 |         let results = try await vectura.search(query: "cats and pets", threshold: 0.8)
 72 |         XCTAssertLessThan(results.count, 3)
 73 |     }
 74 |     
 75 |     func testCustomIds() async throws {
 76 |         let customId = UUID()
 77 |         let text = "Document with custom ID"
 78 |         
 79 |         let resultId = try await vectura.addDocument(text: text, id: customId)
 80 |         XCTAssertEqual(customId, resultId)
 81 |         
 82 |         let results = try await vectura.search(query: text)
 83 |         XCTAssertEqual(results[0].id, customId)
 84 |     }
 85 |     
 86 |     func testModelReuse() async throws {
 87 |         // Multiple operations should reuse the same model
 88 |         let start = Date()
 89 |         for i in 1...5 {
 90 |             _ = try await vectura.addDocument(text: "Test document \(i)")
 91 |         }
 92 |         let duration = Date().timeIntervalSince(start)
 93 |         
 94 |         // If model is being reused, this should be relatively quick
 95 |         XCTAssertLessThan(duration, 5.0)  // Adjust threshold as needed
 96 |     }
 97 |     
 98 |     func testEmptySearch() async throws {
 99 |         let results = try await vectura.search(query: "test query")
100 |         XCTAssertEqual(results.count, 0, "Search on empty database should return no results")
101 |     }
102 |     
103 |     func testDimensionMismatch() async throws {
104 |         // Test with wrong dimension config
105 |         let wrongConfig = VecturaConfig(name: "wrong-dim-db", dimension: 128)
106 |         let wrongVectura = try VecturaKit(config: wrongConfig)
107 |         
108 |         let text = "Test document"
109 |         
110 |         do {
111 |             _ = try await wrongVectura.addDocument(text: text)
112 |             XCTFail("Expected dimension mismatch error")
113 |         } catch let error as VecturaError {
114 |             // Should throw dimension mismatch since BERT model outputs 384 dimensions
115 |             switch error {
116 |             case .dimensionMismatch(let expected, let got):
117 |                 XCTAssertEqual(expected, 128)
118 |                 XCTAssertEqual(got, 384)
119 |             default:
120 |                 XCTFail("Wrong error type: \(error)")
121 |             }
122 |         }
123 |     }
124 |     
125 |     func testDuplicateIds() async throws {
126 |         let id = UUID()
127 |         let text1 = "First document"
128 |         let text2 = "Second document"
129 |         
130 |         // Add first document
131 |         _ = try await vectura.addDocument(text: text1, id: id)
132 |         
133 |         // Adding second document with same ID should overwrite
134 |         _ = try await vectura.addDocument(text: text2, id: id)
135 |         
136 |         let results = try await vectura.search(query: text2)
137 |         XCTAssertEqual(results.count, 1)
138 |         XCTAssertEqual(results[0].text, text2)
139 |     }
140 |     
141 |     func testSearchThresholdEdgeCases() async throws {
142 |         let documents = ["Test document"]
143 |         _ = try await vectura.addDocuments(texts: documents)
144 |         
145 |         // Test with threshold = 1.0 (exact match only)
146 |         let perfectResults = try await vectura.search(query: "Test document", threshold: 1.0)
147 |         XCTAssertEqual(perfectResults.count, 0)  // Should find no perfect matches due to encoding differences
148 |         
149 |         // Test with threshold = 0.0 (all matches)
150 |         let allResults = try await vectura.search(query: "completely different", threshold: 0.0)
151 |         XCTAssertEqual(allResults.count, 1)  // Should return all documents
152 |     }
153 |     
154 |     func testLargeNumberOfDocuments() async throws {
155 |         let documentCount = 100
156 |         var documents: [String] = []
157 |         
158 |         for i in 0..<documentCount {
159 |             documents.append("Test document number \(i)")
160 |         }
161 |         
162 |         let ids = try await vectura.addDocuments(texts: documents)
163 |         XCTAssertEqual(ids.count, documentCount)
164 |         
165 |         let results = try await vectura.search(query: "document", numResults: 10)
166 |         XCTAssertEqual(results.count, 10)
167 |     }
168 |     
169 |     func testPersistenceAfterReset() async throws {
170 |         // Add a document
171 |         let text = "Test document"
172 |         _ = try await vectura.addDocument(text: text)
173 |         
174 |         // Reset the database
175 |         try await vectura.reset()
176 |         
177 |         // Verify search returns no results
178 |         let results = try await vectura.search(query: text)
179 |         XCTAssertEqual(results.count, 0)
180 |         
181 |         // Create new instance and verify it's empty
182 |         let newVectura = try VecturaKit(config: config)
183 |         let newResults = try await newVectura.search(query: text)
184 |         XCTAssertEqual(newResults.count, 0)
185 |     }
186 |     
187 |     func testFolderURLModelSource() async throws {
188 |         /// First load the model from a remote source in order to make it available in the local filesystem.
189 |         _ = try await Bert.loadModelBundle(from: .default)
190 |         
191 |         /// Local model will be downloaded to a predictable location (this may break if `swift-transformers` updates where it downloads models).
192 |         let url = try FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: false)
193 |             .appending(path: "huggingface/models/\(VecturaModelSource.defaultModelId)")
194 |         
195 |         XCTAssertTrue(FileManager.default.fileExists(atPath: url.path(percentEncoded: false)), "Expected downloaded model to be available locally at \(url.path())")
196 |         
197 |         let documents = [
198 |             "The quick brown fox jumps over the lazy dog",
199 |             "Pack my box with five dozen liquor jugs",
200 |             "How vexingly quick daft zebras jump",
201 |         ]
202 |         
203 |         /// Proceed as usual now, but loading the model directly from the local directory instead of downloading it.
204 |         let ids = try await vectura.addDocuments(texts: documents, model: .folder(url))
205 |         XCTAssertEqual(ids.count, 3)
206 |         
207 |         let results = try await vectura.search(query: "quick jumping animals")
208 |         XCTAssertGreaterThanOrEqual(results.count, 2)
209 |         XCTAssertTrue(results[0].score > results[1].score)
210 |     }
211 |     
212 |     func testCustomStorageDirectory() async throws {
213 |         let customDirectoryURL = URL(filePath: NSTemporaryDirectory()).appending(path: "VecturaKitTest")
214 |         defer { try? FileManager.default.removeItem(at: customDirectoryURL) }
215 |         
216 |         let instance = try VecturaKit(config: .init(name: "test", directoryURL: customDirectoryURL, dimension: 384))
217 |         let text = "Test document"
218 |         let id = UUID()
219 |         _ = try await instance.addDocument(text: text, id: id)
220 |         
221 |         let documentPath = customDirectoryURL.appending(path: "test/\(id).json").path(percentEncoded: false)
222 |         XCTAssertTrue(FileManager.default.fileExists(atPath: documentPath), "Custom storage directory inserted document doesn't exist at \(documentPath)")
223 |     }
224 | }
225 | 


--------------------------------------------------------------------------------
/Tests/VecturaMLXKitTests/VecturaMLXKitTests.swift:
--------------------------------------------------------------------------------
  1 | import XCTest
  2 | import Foundation
  3 | @testable import VecturaMLXKit
  4 | @testable import VecturaKit
  5 | 
  6 | @available(macOS 14.0, iOS 17.0, tvOS 17.0, watchOS 10.0, *)
  7 | final class VecturaMLXKitTests: XCTestCase {
  8 |     
  9 |     var testDirectory: URL!
 10 |     // Set a dimension matching your model expectation (e.g., 768)
 11 |     let testDimension = 768
 12 |     
 13 |     override func setUpWithError() throws {
 14 |         // Create a temporary directory for testing.
 15 |         let temp = FileManager.default.temporaryDirectory
 16 |         testDirectory = temp.appendingPathComponent("VecturaMLXKitTests", isDirectory: true)
 17 |         if FileManager.default.fileExists(atPath: testDirectory.path) {
 18 |             try FileManager.default.removeItem(at: testDirectory)
 19 |         }
 20 |         try FileManager.default.createDirectory(at: testDirectory, withIntermediateDirectories: true)
 21 |     }
 22 |     
 23 |     override func tearDownWithError() throws {
 24 |         // Clean up the temporary directory.
 25 |         if FileManager.default.fileExists(atPath: testDirectory.path) {
 26 |             try FileManager.default.removeItem(at: testDirectory)
 27 |         }
 28 |     }
 29 |     
 30 |     func testAddAndSearch() async throws {
 31 |         // Create a test config with a minThreshold of 0 so any document is returned.
 32 |         let config = VecturaConfig(
 33 |             name: "TestDB",
 34 |             directoryURL: testDirectory,
 35 |             dimension: testDimension,
 36 |             searchOptions: VecturaConfig.SearchOptions(defaultNumResults: 10, minThreshold: 0, hybridWeight: 0.5, k1: 1.2, b: 0.75)
 37 |         )
 38 |         let kit = try await VecturaMLXKit(config: config, modelConfiguration: .nomic_text_v1_5)
 39 |         
 40 |         let text = "Hello world"
 41 |         let ids = try await kit.addDocuments(texts: [text])
 42 |         XCTAssertEqual(ids.count, 1, "Should add exactly one document.")
 43 |         
 44 |         // Perform a search using the same text.
 45 |         let results = try await kit.search(query: text)
 46 |         XCTAssertEqual(results.count, 1, "The search should return one result after adding one document.")
 47 |         XCTAssertEqual(results.first?.text, text, "The text of the returned document should match the added text.")
 48 |     }
 49 |     
 50 |     func testDeleteDocuments() async throws {
 51 |         let config = VecturaConfig(
 52 |             name: "TestDB",
 53 |             directoryURL: testDirectory,
 54 |             dimension: testDimension,
 55 |             searchOptions: VecturaConfig.SearchOptions(defaultNumResults: 10, minThreshold: 0, hybridWeight: 0.5, k1: 1.2, b: 0.75)
 56 |         )
 57 |         let kit = try await VecturaMLXKit(config: config, modelConfiguration: .nomic_text_v1_5)
 58 |         
 59 |         let text = "Delete me"
 60 |         let ids = try await kit.addDocuments(texts: [text])
 61 |         XCTAssertEqual(ids.count, 1, "Should add exactly one document.")
 62 |         
 63 |         try await kit.deleteDocuments(ids: ids)
 64 |         
 65 |         let results = try await kit.search(query: text)
 66 |         XCTAssertTrue(results.isEmpty, "After deletion, the document should not be returned in search results.")
 67 |     }
 68 |     
 69 |     func testUpdateDocument() async throws {
 70 |         let config = VecturaConfig(
 71 |             name: "TestDB",
 72 |             directoryURL: testDirectory,
 73 |             dimension: testDimension,
 74 |             searchOptions: VecturaConfig.SearchOptions(defaultNumResults: 10, minThreshold: 0, hybridWeight: 0.5, k1: 1.2, b: 0.75)
 75 |         )
 76 |         let kit = try await VecturaMLXKit(config: config, modelConfiguration: .nomic_text_v1_5)
 77 |         
 78 |         let originalText = "Original text"
 79 |         let updatedText = "Updated text"
 80 |         let ids = try await kit.addDocuments(texts: [originalText])
 81 |         XCTAssertEqual(ids.count, 1, "Should add exactly one document.")
 82 |         
 83 |         let documentID = ids.first!
 84 |         try await kit.updateDocument(id: documentID, newText: updatedText)
 85 |         
 86 |         let results = try await kit.search(query: updatedText)
 87 |         XCTAssertEqual(results.count, 1, "One document should be returned after update.")
 88 |         XCTAssertEqual(results.first?.text, updatedText, "The document text should be updated in the search results.")
 89 |     }
 90 |     
 91 |     func testReset() async throws {
 92 |         let config = VecturaConfig(
 93 |             name: "TestDB",
 94 |             directoryURL: testDirectory,
 95 |             dimension: testDimension,
 96 |             searchOptions: VecturaConfig.SearchOptions(defaultNumResults: 10, minThreshold: 0, hybridWeight: 0.5, k1: 1.2, b: 0.75)
 97 |         )
 98 |         let kit = try await VecturaMLXKit(config: config, modelConfiguration: .nomic_text_v1_5)
 99 |         
100 |         _ = try await kit.addDocuments(texts: ["Doc1", "Doc2"])
101 |         try await kit.reset()
102 |         
103 |         let results = try await kit.search(query: "Doc")
104 |         XCTAssertTrue(results.isEmpty, "After a reset, search should return no results.")
105 |     }
106 |     
107 |     // MARK: - Robust Search Tests
108 |     
109 |     func testSearchMultipleDocuments() async throws {
110 |         let config = VecturaConfig(
111 |             name: "TestMLXDB",
112 |             directoryURL: testDirectory,
113 |             dimension: testDimension,
114 |             searchOptions: VecturaConfig.SearchOptions(defaultNumResults: 10, minThreshold: 0, hybridWeight: 0.5, k1: 1.2, b: 0.75)
115 |         )
116 |         let kit = try await VecturaMLXKit(config: config, modelConfiguration: .nomic_text_v1_5)
117 |         
118 |         // Add several documents with overlapping keywords.
119 |         let texts = [
120 |             "The quick brown fox jumps over the lazy dog",
121 |             "A fast brown fox leaps over lazy hounds",
122 |             "An agile brown fox",
123 |             "Lazy dogs sleep all day",
124 |             "Quick and nimble foxes"
125 |         ]
126 |         _ = try await kit.addDocuments(texts: texts)
127 |         
128 |         // Search for an expression close to "brown fox".
129 |         let results = try await kit.search(query: "brown fox")
130 |         
131 |         // We expect at least two results related to 'brown fox'.
132 |         XCTAssertGreaterThanOrEqual(results.count, 2, "Should return at least two documents related to 'brown fox'.")
133 |         
134 |         // Verify that results are sorted in descending order by score.
135 |         for i in 1..<results.count {
136 |             XCTAssertGreaterThanOrEqual(results[i - 1].score, results[i].score, "Search results are not sorted in descending order by score.")
137 |         }
138 |     }
139 |     
140 |     func testSearchNumResultsLimiting() async throws {
141 |         let config = VecturaConfig(
142 |             name: "TestMLXDB",
143 |             directoryURL: testDirectory,
144 |             dimension: testDimension,
145 |             searchOptions: VecturaConfig.SearchOptions(defaultNumResults: 10, minThreshold: 0, hybridWeight: 0.5, k1: 1.2, b: 0.75)
146 |         )
147 |         let kit = try await VecturaMLXKit(config: config, modelConfiguration: .nomic_text_v1_5)
148 |         
149 |         // Add more documents.
150 |         let texts = [
151 |             "Document one about testing",
152 |             "Document two about testing",
153 |             "Document three about testing",
154 |             "Document four about testing",
155 |             "Document five about testing"
156 |         ]
157 |         _ = try await kit.addDocuments(texts: texts)
158 |         
159 |         // Request only 3 results.
160 |         let results = try await kit.search(query: "testing", numResults: 3)
161 |         XCTAssertEqual(results.count, 3, "Should limit the search results to exactly 3 documents.")
162 |     }
163 |     
164 |     func testSearchWithHighThreshold() async throws {
165 |         // Set a high threshold so that only nearly identical matches return.
166 |         let config = VecturaConfig(
167 |             name: "TestMLXDB",
168 |             directoryURL: testDirectory,
169 |             dimension: testDimension,
170 |             searchOptions: VecturaConfig.SearchOptions(defaultNumResults: 10, minThreshold: 0, hybridWeight: 0.5, k1: 1.2, b: 0.75)
171 |         )
172 |         let kit = try await VecturaMLXKit(config: config, modelConfiguration: .nomic_text_v1_5)
173 |         
174 |         // Add documents that are expected to have high similarity for 'apple'.
175 |         let texts = [
176 |             "Apple pie recipe",
177 |             "Delicious apple tart",
178 |             "Banana bread instructions"
179 |         ]
180 |         _ = try await kit.addDocuments(texts: texts)
181 |         
182 |         // Use a high threshold (e.g., 0.99) to filter out less-similar documents.
183 |         let highThreshold: Float = 0.99
184 |         let results = try await kit.search(query: "apple", threshold: highThreshold)
185 |         
186 |         // Verify that all returned documents have a similarity score meeting or exceeding the threshold.
187 |         for result in results {
188 |             XCTAssertGreaterThanOrEqual(result.score, highThreshold, "Result score \(result.score) is below the high threshold \(highThreshold).")
189 |         }
190 |     }
191 |     
192 |     func testSearchNoMatches() async throws {
193 |         let config = VecturaConfig(
194 |             name: "TestMLXDB",
195 |             directoryURL: testDirectory,
196 |             dimension: testDimension,
197 |             searchOptions: VecturaConfig.SearchOptions(defaultNumResults: 10, minThreshold: 0, hybridWeight: 0.5, k1: 1.2, b: 0.75)
198 |         )
199 |         let kit = try await VecturaMLXKit(config: config, modelConfiguration: .nomic_text_v1_5)
200 |         
201 |         // Add a document.
202 |         _ = try await kit.addDocuments(texts: ["Some random content"])
203 |         
204 |         // Use a query that should not match with a high threshold.
205 |         let results = try await kit.search(query: "completely different query text", threshold: 0.9)
206 |         XCTAssertTrue(results.isEmpty, "Search should return no results when the query does not match any document.")
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/scripts/update_readme.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import subprocess
 4 | from google import genai
 5 | 
 6 | # The system prompt instructs the model on what to do.
 7 | SYSTEM_PROMPT = (
 8 |     "You are a helpful assistant that reads the entire code base and rewrites the README.md file "
 9 |     "to provide clear instructions, describe the package, list dependencies, and usage examples. "
10 |     "Please analyze the code and produce an updated README."
11 | )
12 | 
13 | def get_codebase_summary():
14 |     """
15 |     Generates a summary of the codebase by listing tracked files and their content.
16 |     You might extend this function to include more context if needed.
17 |     """
18 |     files = subprocess.check_output(["git", "ls-files"]).decode("utf-8").splitlines()
19 |     summary = ""
20 |     for file in files:
21 |         try:
22 |             with open(file, "r", encoding="utf-8") as f:
23 |                 content = f.read()
24 |         except Exception as e:
25 |             content = f"Error reading file: {e}"
26 |         summary += f"--- {file} ---\n{content}\n\n"
27 |     return summary
28 | 
29 | def call_geminiai(prompt):
30 |     """
31 |     Uses the Google GenAI Python SDK with Gemini to generate an updated README.
32 |     It generates content based on the provided prompt.
33 |     """
34 |     client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
35 |     response = client.models.generate_content(model='gemini-2.0-flash-exp', contents=prompt)
36 |     return response.text
37 | 
38 | def main():
39 |     # Gather codebase context to inform the README generation.
40 |     codebase_context = get_codebase_summary()
41 |     # Build the user prompt with the context from the codebase.
42 |     full_prompt = f"{SYSTEM_PROMPT}\n\nCodebase files:\n{codebase_context}"
43 |     
44 |     updated_readme = call_geminiai(full_prompt)
45 |     if updated_readme:
46 |         with open("README.md", "w") as f:
47 |             f.write(updated_readme)
48 |         print("README.md has been updated.")
49 |     else:
50 |         print("Failed to update README.md.")
51 | 
52 | if __name__ == "__main__":
53 |     main()


--------------------------------------------------------------------------------