├── .editorconfig ├── .eslintignore ├── .github ├── FUNDING.yml └── workflows │ └── build.yml ├── .gitignore ├── .npmrc ├── .nvmrc ├── .prettierignore ├── .prettierrc.yaml ├── LICENSE ├── README.md ├── build ├── entitlements.mac.plist ├── ico2048.png ├── icon.icns ├── icon.ico ├── icon.png ├── icon1024.png ├── icon256.png └── icon512.png ├── dev-app-update.yml ├── docs └── img │ ├── mac-codesigning-approval-dialog.png │ ├── mac-codesigning-errormessage.png │ ├── mac-codesigning-install.png │ ├── mac-codesigning-rightclick-menu.png │ └── search-result.png ├── e2e ├── features │ ├── api-key-status.feature │ ├── initial_application_view.feature │ ├── search-page.feature │ ├── settings-page.feature │ └── upload-process.feature ├── step-definitions │ ├── api-key-status.steps.ts │ ├── common.steps.ts │ ├── initial_application_view.steps.ts │ ├── search_page.steps.ts │ ├── settings_page.steps.ts │ └── upload_process.steps.ts └── test-storage │ ├── constellation-test.csv │ └── newline-test.csv ├── electron-builder.yml ├── electron.vite.config.ts ├── eslint.config.cjs ├── package-lock.json ├── package.json ├── python-prototype ├── embed.ipynb ├── embed.py ├── non_stupid_csv_reader.py ├── non_stupid_sentence_splitter.py ├── preprocessing.ipynb ├── requirements.txt ├── search.ipynb ├── search.py └── understanding_sentence_splitter.ipynb ├── resources ├── icon.icns └── icon.png ├── src ├── docs │ └── img │ │ └── groceries_screenshot.png ├── main │ ├── DocumentSetManager.ts │ ├── DocumentSetService.ts │ ├── api │ │ ├── embedding.test.ts │ │ └── embedding.ts │ ├── index.ts │ ├── services │ │ ├── csvLoader.test.ts │ │ ├── csvLoader.ts │ │ ├── embeddings.test.ts │ │ ├── embeddings.ts │ │ ├── loggingOpenAIEmbedding.ts │ │ ├── mockEmbedding.ts │ │ ├── optional_trim_sentence_tokenizer.js │ │ ├── sentenceSplitter.test.ts │ │ ├── sentenceSplitter.ts │ │ ├── sploder.ts │ │ └── weaviateService.ts │ ├── types │ │ └── index.ts │ └── utils.ts ├── preload.ts ├── preload │ ├── index.d.ts │ └── index.ts └── renderer │ ├── index.html │ └── src │ ├── App.svelte │ ├── assets │ ├── base.css │ ├── electron.svg │ └── main.css │ ├── components │ ├── ApiKeyPage.svelte │ ├── ApiKeyStatus.svelte │ ├── CsvUpload.svelte │ ├── ExistingDatabases.svelte │ ├── FrontPage.svelte │ ├── HelpPage.svelte │ ├── Preview.svelte │ ├── Results.svelte │ ├── SearchPage.svelte │ └── Table.svelte │ ├── env.d.ts │ └── main.ts ├── svelte.config.mjs ├── tsconfig.json ├── tsconfig.node.json └── wdio.conf.ts /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_style = space 6 | indent_size = 2 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | out 4 | .gitignore 5 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: jeremybmerrill 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build Distributions 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | workflow_dispatch: 8 | 9 | jobs: 10 | build-apple: 11 | name: Build macOS (Apple Silicon) 12 | runs-on: macos-latest 13 | environment: "mac build" 14 | outputs: 15 | artifact-folder: ${{ steps.upload.outputs.artifact-folder }} 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Configure Node caching 20 | uses: actions/cache@v4 21 | with: 22 | path: ~/.npm 23 | key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }} 24 | restore-keys: ${{ runner.os }}-npm- 25 | 26 | - name: Install dependencies 27 | run: npm install 28 | 29 | - name: Run tests 30 | run: npm test 31 | timeout-minutes: 5 32 | 33 | - name: Build distribution (macOS ARM) 34 | env: 35 | CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }} 36 | CSC_LINK: ${{ secrets.CSC_LINK }} 37 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 38 | run: npm run build:mac 39 | 40 | - name: Create README 41 | run: | 42 | echo "More information: https://github.com/jeremybmerrill/meaningfully" > dist/README.txt 43 | 44 | - name: Upload artifact (macOS) 45 | id: upload 46 | uses: actions/upload-artifact@v4 47 | with: 48 | name: meaningfully-macOS 49 | path: | 50 | dist/meaningfully-${{ github.ref_name }}.arm64.dmg 51 | dist/README.txt 52 | 53 | build-intel: 54 | name: Build macOS (Intel) 55 | runs-on: macos-13 56 | environment: "mac build" 57 | outputs: 58 | artifact-folder: ${{ steps.upload.outputs.artifact-folder }} 59 | steps: 60 | - uses: actions/checkout@v4 61 | 62 | - name: Configure Node caching 63 | uses: actions/cache@v4 64 | with: 65 | path: ~/.npm 66 | key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }} 67 | restore-keys: ${{ runner.os }}-npm- 68 | 69 | - name: Install brew dependencies 70 | run: brew install pkg-config cairo pango libjpeg giflib librsvg 71 | 72 | - name: Install npm deps 73 | run: npm install 74 | 75 | - name: Run tests 76 | run: npm test 77 | timeout-minutes: 5 78 | 79 | - name: Build distribution (macOS Intel) 80 | env: 81 | CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }} 82 | CSC_LINK: ${{ secrets.CSC_LINK }} 83 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 84 | run: npm run build:mac -- --publish always 85 | 86 | - name: Create README 87 | run: 'echo "More information: https://github.com/jeremybmerrill/meaningfully" > dist/README.txt' 88 | 89 | - name: Upload artifact (macOS Intel) 90 | id: upload 91 | uses: actions/upload-artifact@v4 92 | with: 93 | name: meaningfully-macOS-intel 94 | path: | 95 | dist/meaningfully-${{ github.ref_name }}.x64.dmg 96 | dist/README.txt 97 | 98 | build-linux: 99 | name: Build Linux 100 | runs-on: ubuntu-latest 101 | environment: "mac build" 102 | steps: 103 | - uses: actions/checkout@v4 104 | 105 | - name: Configure Node caching 106 | uses: actions/cache@v4 107 | with: 108 | path: ~/.npm 109 | key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }} 110 | restore-keys: ${{ runner.os }}-npm- 111 | 112 | - name: Install Linux Build Dependencies 113 | run: | 114 | sudo apt-get update 115 | sudo apt-get install -y build-essential libcairo2-dev libpango1.0-dev libjpeg-dev libgif-dev librsvg2-dev libarchive-tools libfuse2 libgtk-3-0 libnss3 libxshmfence1 libatk-bridge2.0-0 libx11-xcb1 libxcb-dri3-0 libxcomposite1 libxcursor1 libxdamage1 libxfixes3 libxi6 libxrandr2 libxtst6 libgbm1 libpangocairo-1.0-0 libpango-1.0-0 libcairo2 libatspi2.0-0 116 | # sudo snap install snapcraft --classic 117 | 118 | - name: Install dependencies 119 | run: npm install 120 | 121 | - name: Run tests 122 | run: npm test 123 | timeout-minutes: 5 124 | 125 | - name: Build distribution (Linux) 126 | env: 127 | CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }} 128 | CSC_LINK: ${{ secrets.CSC_LINK }} 129 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 130 | # SNAPCRAFT_STORE_CREDENTIALS: ${{ secrets.SNAPCRAFT_STORE_CREDENTIALS }} 131 | run: npm run build:linux -- --publish always 132 | 133 | - name: Create README 134 | run: 'echo "More information: https://github.com/jeremybmerrill/meaningfully" > dist/README.txt' 135 | 136 | - name: Upload artifact (Linux) 137 | uses: actions/upload-artifact@v4 138 | with: 139 | name: meaningfully-linux 140 | 141 | # add back later under path. dist/meaningfully-${{ github.ref_name }}.snap 142 | path: | 143 | dist/meaningfully-${{ github.ref_name }}.deb 144 | dist/meaningfully-${{ github.ref_name }}.AppImage 145 | dist/README.txt 146 | integration-test: 147 | name: Integration Test 148 | runs-on: ubuntu-latest 149 | needs: [build-apple, build-intel, build-linux] 150 | steps: 151 | - uses: actions/checkout@v4 152 | 153 | - name: Configure Node caching 154 | uses: actions/cache@v4 155 | with: 156 | path: ~/.npm 157 | key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }} 158 | restore-keys: ${{ runner.os }}-npm- 159 | 160 | - name: Install Linux Build Dependencies 161 | run: | 162 | sudo apt-get update 163 | sudo apt-get install -y build-essential libcairo2-dev libpango1.0-dev libjpeg-dev libgif-dev librsvg2-dev libarchive-tools libfuse2 libgtk-3-0 libnss3 libxshmfence1 libatk-bridge2.0-0 libx11-xcb1 libxcb-dri3-0 libxcomposite1 libxcursor1 libxdamage1 libxfixes3 libxi6 libxrandr2 libxtst6 libgbm1 libpangocairo-1.0-0 libpango-1.0-0 libcairo2 libatspi2.0-0 164 | # sudo snap install snapcraft --classic 165 | 166 | - name: Install dependencies 167 | run: npm install 168 | 169 | - name: Download Linux artifact 170 | uses: actions/download-artifact@v4 171 | with: 172 | name: meaningfully-linux 173 | path: ./dist 174 | - name: Run integration tests 175 | run: npm run wdio 176 | release: 177 | name: Create GitHub Release 178 | runs-on: ubuntu-latest 179 | environment: "mac build" 180 | needs: 181 | - build-apple 182 | - build-intel 183 | # - build-windows 184 | - build-linux 185 | - integration-test 186 | steps: 187 | - uses: actions/checkout@v4 188 | 189 | - name: Download macOS artifact 190 | uses: actions/download-artifact@v4 191 | with: 192 | name: meaningfully-macOS 193 | path: ./artifacts/macOS 194 | 195 | - name: Download macOS (Intel) artifact 196 | uses: actions/download-artifact@v4 197 | with: 198 | name: meaningfully-macOS-intel 199 | path: ./artifacts/macOS-intel 200 | 201 | - name: Download Linux artifact 202 | uses: actions/download-artifact@v4 203 | with: 204 | name: meaningfully-linux 205 | path: ./artifacts/linux 206 | 207 | - name: Create Release 208 | uses: softprops/action-gh-release@v2 209 | env: 210 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 211 | with: 212 | tag_name: ${{ github.ref_name }} 213 | name: Release ${{ github.ref_name }} 214 | draft: false 215 | prerelease: false 216 | # add back later under files: ./artifacts/linux/meaningfully-${{ github.ref_name }}.snap 217 | files: | 218 | ./artifacts/linux/meaningfully-${{ github.ref_name }}.deb 219 | ./artifacts/linux/meaningfully-${{ github.ref_name }}.AppImage 220 | ./artifacts/macOS-intel/meaningfully-${{ github.ref_name }}.x64.dmg 221 | ./artifacts/macOS/meaningfully-${{ github.ref_name }}.arm64.dmg 222 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .env 3 | sample-data/ 4 | .ipynb_checkpoints/ 5 | *.duckdb 6 | python-prototype/chroma/ 7 | __pycache__ 8 | js/meaningfully/storage 9 | e2e/test-storage/metadata.db 10 | .vscode/ 11 | node_modules 12 | dist 13 | out 14 | .DS_Store 15 | *.log* 16 | e2e/test-storage/metadata.db 17 | certs/ 18 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | electron_mirror=https://npmmirror.com/mirrors/electron/ 2 | electron_builder_binaries_mirror=https://npmmirror.com/mirrors/electron-builder-binaries/ 3 | -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | 23 2 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | out 2 | dist 3 | pnpm-lock.yaml 4 | LICENSE.md 5 | tsconfig.json 6 | tsconfig.*.json 7 | -------------------------------------------------------------------------------- /.prettierrc.yaml: -------------------------------------------------------------------------------- 1 | singleQuote: true 2 | semi: false 3 | printWidth: 100 4 | trailingComma: none 5 | plugins: 6 | - prettier-plugin-svelte 7 | overrides: 8 | - files: '*.svelte' 9 | options: 10 | parser: svelte 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2025 Jeremy B. F. Merrill 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Meaningfully (is still in pre-alpha but you can try it!) 2 | 3 | Meaningfully is a semantic search tool for text data in spreadsheets. 4 | 5 | Keyword searching in Excel or Google Sheets is painful because text data is displayed awkwardly and because keywords miss circumlocutions, typos, unexpected wording and foreign-language data. Semantic search solves all of that. Meaningfully works best for *semi-structured* data, where you have thousands of instances of a type and want to find certain instances. 6 | 7 | For example: 8 | 9 | - consumer complaints about a product or business 10 | - credit card transactions 11 | - descriptions of government contracts 12 | - responses to a survey 13 | 14 | ## Who is this for? 15 | 16 | Journalists, researchers, academics, people who do surveys or solicit submissions, anybody. 17 | 18 | ## What is semantic search? 19 | 20 | It's a middle-ground between AI chatbot search and keyword search. It uses the smarts of AI to 'understand' human language, but doesn't risk making stuff up like AI. 21 | 22 | ## Is Meaningfully ready to use? 23 | 24 | Not really, but you can try it! It is kind of the minimum viable semantic search app. If people like it, I hope to sand down the rough edges and build extra features. Right now, I make zero promises about whether it will work. **Please email me or open a ticket to tell me about how Meaningfully worked (or didn't work) for you.** 25 | 26 | In particular, Meaningfully is _slow_ and can't handle large document sets (>10,000 rows, let's say) yet. 27 | 28 | ## How do I search with meaningfully? 29 | 30 | Once you've uploaded a CSV with a text column, search is simple. 31 | 32 | ![a screenshot of the search page, with a query "he got fired" and a result saying "There are no modifications to Mr. Smith's compensation arrangements in connection with his departure. He will not receive severance and will forfeit all equity that has not vested as of his termination date."](https://github.com/jeremybmerrill/meaningfully/blob/main/docs/img/search-result.png) 33 | 34 | 35 | 1. 🤔 Just imagine what you're looking for, then imagine a phrase or sentence from that perfect result. Like, "he got fired." 36 | 2. Type the imagined phrase or sentence in the search box, and then click search. 37 | 3. Hopefully the closest results from the CSV will appear at the top of the search results. 38 | 39 | You can also filter by metadata attributes. 40 | 41 | 46 | 47 | ## Is meaningfully free? 48 | 49 | Mostly. Semantic search requires "embedding" snippets of your document into numbers. You can do this on your computer, but it's very slow, but free (but for your electric bill). I recommend you get an OpenAI API key, put it into meaningfully, and use that; you'll be responsible for the OpenAI charges, but meaningfully doesn't cost any extra on top of that. (And it's generally very cheap. Most spreadsheets, even with tens of thousands of rows, will cost a few pennies.) 50 | 51 | Eventually, meaningfully may include some paid options. 52 | 53 | ## How can I run this app myself? 54 | 55 | ### Install 56 | 57 | Visit meaningfully's [release page](https://github.com/jeremybmerrill/meaningfully/releases), download the appropriate installer for your platform, and install it. 58 | 59 | There might be some platform-specific instruction. 60 | 61 | #### Mac-specific instructions: 62 | 63 | Install `meaningfully-.arm64.dmg` (with `arm64`) if your Mac has Apple Silicon. Install the `x64` version if your Mac has an Intel chip. 64 | 65 | I haven't yet set up code-signing for this app, so once you install the app, you might get an error message that says ""meaningfully" cannot be opened because the developer cannot be verified." (picture below). 66 | 67 | ![a screenshot of a warning that meaningfully cannot be opened because the developer cannot be verified.]( 68 | https://github.com/jeremybmerrill/meaningfully/blob/main/docs/img/mac-codesigning-errormessage.png | width=300) 69 | 70 | ##### Here are the steps to work around this error 71 | 72 | 1. Install the app as usual, by copying it from the disk image (dmg) to your Applications folder. 73 | 74 | ![a screenshot of a Finder folder with the meaningfully icon and the Applications folder](https://github.com/jeremybmerrill/meaningfully/blob/main/docs/img/mac-codesigning-install.png | width=300) 75 | 76 | 2. Right-click (or command-click) the app, then click open. 77 | 78 | ![a screenshot of the right-click menu you get when you right-click on the meaningfully app, with the Open option](https://github.com/jeremybmerrill/meaningfully/blob/main/docs/img/mac-codesigning-rightclick-menu.png | width=300) 79 | 80 | 3. Then click "Open" on the pop-up dialog that says "macOS cannot verify the developer of 'meaningfully'. Are you sure you want to open it?" 81 | 82 | ![a dialog that says macOS cannot verify the developer of 'meaningfully'. Are you sure you want to open it](https://github.com/jeremybmerrill/meaningfully/blob/main/docs/img/mac-codesigning-approval-dialog.png | width=300) 83 | 84 | Sometimes you might have to try several times. But once it works, it should stay working until you update the app. If you'd like to eliminate this obstacle, please consider sponsoring this project -- as the code-signing workflow for Macs costs like $100, and I don't want to spend that until I'm sure that this project benefits people. 85 | 86 | #### Windows 87 | 88 | I couldn't get the Windows builds to work. If you use Windows and want to try meaningfully, please try development mode below, or help me get the Windows builds working. 89 | 90 | #### Linux 91 | 92 | Snaps coming soon, I hope. 93 | 94 | ### Development mode 95 | You'll need Node v22 or higher. You might try installing [nvm](https://github.com/nvm-sh/nvm) and then running `nvm install 22` and `nvm use 22` but troubleshooting and other methods are outside the scope of this document. 96 | 97 | ``` 98 | npm install 99 | npm run dev 100 | ``` 101 | 102 | There's a weird bug where sometimes I think the storage directory isn't created right. If you get weird errors like `Error searching document set: Error: ENOENT: no such file or directory`, maybe try running `mkdir ~/Library/Application\ Support/meaningfully/simple_vector_store/` and trying again. I'm trying to fix it. :D 103 | 104 | ### Testing: 105 | 106 | Run the unit tests for the backend with `npm test`. Run the integration tests for the frontend by building (`npm run build:`) with `npm run wdio`; specify a specific file with `CUCUMBER_TEST_ONLY_FEATURE=upload-process npm run wdio`. 107 | 108 | ## My documents are PDFs, not spreadsheets. Can I use Meaningfully? 109 | 110 | Try [Semantra](https://github.com/freedmand/semantra). -------------------------------------------------------------------------------- /build/entitlements.mac.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | com.apple.security.cs.allow-jit 6 | 7 | com.apple.security.cs.allow-unsigned-executable-memory 8 | 9 | com.apple.security.cs.allow-dyld-environment-variables 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /build/ico2048.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/build/ico2048.png -------------------------------------------------------------------------------- /build/icon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/build/icon.icns -------------------------------------------------------------------------------- /build/icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/build/icon.ico -------------------------------------------------------------------------------- /build/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/build/icon.png -------------------------------------------------------------------------------- /build/icon1024.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/build/icon1024.png -------------------------------------------------------------------------------- /build/icon256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/build/icon256.png -------------------------------------------------------------------------------- /build/icon512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/build/icon512.png -------------------------------------------------------------------------------- /dev-app-update.yml: -------------------------------------------------------------------------------- 1 | provider: generic 2 | url: https://example.com/auto-updates 3 | updaterCacheDirName: meaningfully-updater 4 | -------------------------------------------------------------------------------- /docs/img/mac-codesigning-approval-dialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/docs/img/mac-codesigning-approval-dialog.png -------------------------------------------------------------------------------- /docs/img/mac-codesigning-errormessage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/docs/img/mac-codesigning-errormessage.png -------------------------------------------------------------------------------- /docs/img/mac-codesigning-install.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/docs/img/mac-codesigning-install.png -------------------------------------------------------------------------------- /docs/img/mac-codesigning-rightclick-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/docs/img/mac-codesigning-rightclick-menu.png -------------------------------------------------------------------------------- /docs/img/search-result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremybmerrill/meaningfully/f2cb3bc2c4627556e2c93818330108f47635dbff/docs/img/search-result.png -------------------------------------------------------------------------------- /e2e/features/api-key-status.feature: -------------------------------------------------------------------------------- 1 | Feature: API Key Status Banner 2 | 3 | Scenario: Verify API Key Status Banner is shown if setting store is empty 4 | Given the application has started 5 | When the settings store is empty 6 | And the page has been reloaded 7 | Then the "API Key Status" component should be visible 8 | 9 | Scenario: Verify API Key Status Banner is not shown if API Key is set 10 | Given the application has started 11 | When the setting store has an OpenAI API Key value 12 | And the page has been reloaded 13 | Then the "API Key Status" component should not be visible 14 | -------------------------------------------------------------------------------- /e2e/features/initial_application_view.feature: -------------------------------------------------------------------------------- 1 | Feature: Initial Application View 2 | 3 | Scenario: Verify initial components are displayed 4 | Given the application has started 5 | Then the "Upload a Spreadsheet" component should be visible 6 | And the "Existing Spreadsheets" component should be visible 7 | 8 | Scenario: Verify empty state for Existing Spreadsheets 9 | Given the application has started 10 | And the metadata store is empty 11 | And the page has been reloaded 12 | Then the "Existing Spreadsheets" component should be visible 13 | And no datasets should be listed 14 | 15 | Scenario: Verify Existing Spreadsheets with 1 dataset 16 | Given the application has started 17 | 18 | # set an API key 19 | 20 | And the settings store is empty 21 | And the app is navigated to the 'Settings / API Keys' link 22 | And the uploadCsv function has been mocked 23 | And the OpenAI API Key value is set on the page 24 | And the "Save" component has been clicked 25 | 26 | And a file has been selected in the "Upload a Spreadsheet" component 27 | And the column "paragraph" has been selected as column to embed 28 | And the metadata column with name "font-size" has been selected 29 | And the metadata column with name "cik" has been selected 30 | And the "Dataset Name input" component has been set to "Test Dataset 1" 31 | And the "Upload button" component has been clicked 32 | And the app is navigated to the "Home" link 33 | Then the "Existing Spreadsheets" component should be visible 34 | And 1 datasets should be listed 35 | 36 | And the dataset "Test Dataset 1" should be listed 37 | 38 | # And the page has been reloaded 39 | # Then the "Existing Spreadsheets" component should be visible 40 | # And 2 datasets should be listed 41 | # And the dataset "Test Dataset 1" should be listed 42 | # And the dataset "Test Dataset 2" should be listed -------------------------------------------------------------------------------- /e2e/features/search-page.feature: -------------------------------------------------------------------------------- 1 | Feature: Search page 2 | 3 | Scenario: Verify initial components are displayed 4 | Given the application has started 5 | And the settings store is empty 6 | And the app is navigated to the 'Settings / API Keys' link 7 | And the uploadCsv function has been mocked 8 | And the OpenAI API Key value is set on the page 9 | And the "Save" component has been clicked 10 | 11 | And a file has been selected in the "Upload a Spreadsheet" component 12 | And the column "paragraph" has been selected as column to embed 13 | And the metadata column with name "font-size" has been selected 14 | And the metadata column with name "cik" has been selected 15 | And the "Dataset Name input" component has been set to "Test Dataset 1" 16 | And the "Upload button" component has been clicked 17 | # And the app is navigated to the "Home" link 18 | 19 | # And the app is navigated to the "Test Dataset 1" dataset link 20 | Then the "Document Set name" component should be visible 21 | And the "Search bar" component should be visible 22 | And the "Search button" component should be visible 23 | 24 | Scenario: Verify search button is disabled if there is no query 25 | Given the application has started 26 | And the app is navigated to the 'Home' link 27 | And the app is navigated to the "Test Dataset 1" dataset link 28 | And no search query has been entered 29 | Then the "Search button" component should be visible 30 | And the "Search button" component should be disabled 31 | 32 | Scenario: Verify search button is enabled if there is a query 33 | Given the application has started 34 | And the app is navigated to the 'Home' link 35 | And the app is navigated to the "Test Dataset 1" dataset link 36 | And a search query has been entered 37 | Then the "Search button" component should be enabled 38 | 39 | Scenario: Verify results are shown 40 | Given the application has started 41 | And the app is navigated to the 'Home' link 42 | And the app is navigated to the "Test Dataset 1" dataset link 43 | And a search query has been entered 44 | And the search button has been clicked 45 | Then the "Results" component should be visible 46 | And the "Results" component should have multiple rows shown 47 | 48 | Scenario: Verify the result modal is shown 49 | Given the application has started 50 | And the app is navigated to the 'Home' link 51 | And the app is navigated to the "Test Dataset 1" dataset link 52 | And a search query has been entered 53 | And the search button has been clicked 54 | And a result row modal button has been clicked 55 | Then the "Details" component should be visible 56 | # And the details component should be scrollable # can't be handled by the current test case whose text is too short to need scrolling -------------------------------------------------------------------------------- /e2e/features/settings-page.feature: -------------------------------------------------------------------------------- 1 | Feature: Settings Page 2 | 3 | Scenario: OpenAI API Key value is empty if settings store is empty 4 | Given the application has started 5 | When the settings store is empty 6 | And the app is navigated to the 'Settings / API Keys' link 7 | Then the "OpenAI API Key input" component should be visible 8 | And the "OpenAI API Key input" component should be empty 9 | 10 | Scenario: new OpenAI API Key values are persisted and masked 11 | Given the application has started 12 | When the settings store is empty 13 | And the app is navigated to the 'Settings / API Keys' link 14 | And the OpenAI API Key value is set on the page 15 | And the "Save" component has been clicked 16 | And the app is navigated to the 'Settings / API Keys' link 17 | Then the "OpenAI API Key input" component should be visible 18 | And the text of the "OpenAI API Key input" component is masked 19 | And the text of the "OpenAI API Key input" component is a masked version of the set value. 20 | 21 | Scenario: after save, the app is navigated back to the home page 22 | Given the application has started 23 | When the settings store is empty 24 | And the app is navigated to the 'Settings / API Keys' link 25 | And the OpenAI API Key value is set on the page 26 | And the "Save" component has been clicked 27 | Then the "Upload a Spreadsheet" component should be visible 28 | And the "Existing Spreadsheets" component should be visible -------------------------------------------------------------------------------- /e2e/features/upload-process.feature: -------------------------------------------------------------------------------- 1 | Feature: Upload page 2 | 3 | Scenario: Verify upload page is shown once a file is selected 4 | Given the application has started 5 | And a file has been selected in the "Upload a Spreadsheet" component 6 | Then the "CSV Upload Settings" component should be visible 7 | And the "Preview" component should not be visible 8 | 9 | Scenario: Verify preview is shown if a column is selected 10 | Given the application has started 11 | And a file has been selected in the "Upload a Spreadsheet" component 12 | And the column "paragraph" has been selected as column to embed 13 | Then the "CSV Upload Settings" component should be visible 14 | And the "Preview" component should be visible 15 | And the "Cost Estimate" component should be visible 16 | And the "Preview" component should contain a header row with name "paragraph" 17 | And the "Preview" component should contain HTML linebreaks not unescaped newlines 18 | 19 | Scenario: Verify metadata columns are shown if metadata columns are selected 20 | Given the application has started 21 | And a file has been selected in the "Upload a Spreadsheet" component 22 | And the column "paragraph" has been selected as column to embed 23 | And the metadata column with name "font-size" has been selected 24 | And the metadata column with name "cik" has been selected 25 | Then the "CSV Upload Settings" component should be visible 26 | And the "Preview" component should be visible 27 | And the "Preview" component should contain a header row with name "font-size" 28 | And the "Preview" component should contain a header row with name "cik" 29 | 30 | Scenario: Verify upload button is disabled if no column is selected 31 | Given the application has started 32 | # todo there needs to be a navigation to the homepage first, to reset things. 33 | And a file has been selected in the "Upload a Spreadsheet" component 34 | And no column has been selected as column to embed 35 | Then the "CSV Upload Settings" component should be visible 36 | And the "Preview" component should not be visible 37 | And the "Upload button" component should be disabled 38 | 39 | Scenario: Verify upload button is enabled if a column is selected 40 | Given the application has started 41 | And a file has been selected in the "Upload a Spreadsheet" component 42 | And the column "paragraph" has been selected as column to embed 43 | Then the "CSV Upload Settings" component should be visible 44 | And the "Preview" component should be visible 45 | And the "Upload button" component should be enabled 46 | 47 | -------------------------------------------------------------------------------- /e2e/step-definitions/api-key-status.steps.ts: -------------------------------------------------------------------------------- 1 | import { Given, When, Then } from '@wdio/cucumber-framework'; 2 | import { expect, $$, $ } from '@wdio/globals'; 3 | import { execSync } from 'child_process'; 4 | 5 | 6 | //execSync('sqlite3 ./e2e/test-storage/metadata.db "CREATE TABLE IF NOT EXISTS meaningfully_settings (settings_id INTEGER PRIMARY KEY AUTOINCREMENT, settings TEXT NOT NULL );" "CREATE TABLE IF NOT EXISTS document_sets ( set_id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL UNIQUE, upload_date TEXT NOT NULL, parameters TEXT NOT NULL, total_documents INTEGER NOT NULL DEFAULT 0);"'); 7 | 8 | // --- Steps --- 9 | 10 | Given("the settings store is empty", async () => { 11 | //execSync('sqlite3 ./e2e/test-storage/metadata.db "DELETE FROM meaningfully_settings"'); 12 | await browser.execute(() => { 13 | // @ts-ignore 14 | if (window.api && window.api.setSettings) { 15 | window.api.setSettings({ openAIKey: null, oLlamaModelType: null, oLlamaBaseURL: null }); 16 | } 17 | }); 18 | await browser.pause(500); // Optional: Adjust as needed 19 | }); 20 | 21 | Given("the setting store has an OpenAI API Key value", async () => { 22 | // execSync('sqlite3 ./e2e/test-storage/metadata.db "DELETE FROM meaningfully_settings"'); 23 | // execSync(`sqlite3 ./e2e/test-storage/metadata.db "INSERT OR REPLACE INTO meaningfully_settings (settings_id, settings) VALUES (1, '{\"openAIKey\":\"\"}')"`); 24 | await browser.execute(() => { 25 | // @ts-ignore 26 | if (window.api && window.api.setSettings) { 27 | window.api.setSettings({ openAIKey: "sk-proj-meaningfullytesting-1234567890123456789012345678901234567890", oLlamaModelType: null, oLlamaBaseURL: null }); 28 | } 29 | }); 30 | await browser.pause(500); // Optional: Adjust as needed 31 | }); 32 | 33 | // TODO: use APIs instead of shelling out to sqlite3 34 | // // Step: Simulate empty settings store. 35 | // When("the settings store is empty", async () => { 36 | // // Implement your method to clear the settings store. 37 | // // For example, using browser.execute to call your electronAPI: 38 | // await browser.execute(() => { 39 | // // @ts-ignore 40 | // if (window.api && window.api.clearSettings) { 41 | // window.api.clearSettings(); 42 | // } 43 | // }); 44 | // await browser.pause(500); 45 | // }); 46 | 47 | // // Step: Simulate settings store with an OpenAI API Key value. 48 | // When("the setting store has an OpenAI API Key value", async () => { 49 | // // Implement your method to add a key. 50 | // await browser.execute(() => { 51 | // // @ts-ignore 52 | // if (window.api && window.api.setSettings) { 53 | // // Set a dummy API key. 54 | // window.api.setSettings({ openAIKey: "sk-dummyapikeyvalue", oLlamaModelType: "", oLlamaBaseURL: "" }); 55 | // } 56 | // }); 57 | // await browser.pause(500); 58 | // }); -------------------------------------------------------------------------------- /e2e/step-definitions/common.steps.ts: -------------------------------------------------------------------------------- 1 | import { Given, When, Then } from '@wdio/cucumber-framework'; 2 | import { expect, $$, $ } from '@wdio/globals'; 3 | 4 | Given("the application has started", async () => { 5 | // WebdriverIO Electron service typically handles app launch automatically. 6 | // You might add a small wait here if needed for the UI to stabilize. 7 | await browser.pause(500); // Optional: Adjust as needed 8 | }); 9 | 10 | Given("the page has been reloaded", async () => { 11 | // Reload the current page 12 | // await browser.reloadSession(); 13 | const currentUrl = await browser.getUrl(); 14 | await browser.url(currentUrl); 15 | }); 16 | 17 | 18 | // Step: Simulate clicking the Save button. 19 | When('the {string} component has been clicked', async (componentName: string) => { 20 | let selector: string = `[data-testid="${componentName.toLowerCase().replace(/ /g, '-')}"]`; 21 | const btn = await $(selector); 22 | await btn.waitForDisplayed({ timeout: 5000 }); 23 | await btn.click(); 24 | await browser.pause(500); 25 | }); 26 | 27 | 28 | 29 | // These depend on the idea that the Feature file specifies a name that, 30 | // by convention, is the same as the data-testid attribute in the component 31 | // subject to lowercasing and spaces-to-dashes. 32 | Then("the {string} component should be visible", async (componentName: string) => { 33 | let selector: string = `[data-testid="${componentName.toLowerCase().replace(/ /g, '-')}"]`; 34 | const component = await $(selector); 35 | await expect(component).toBeDisplayed(); 36 | }); 37 | 38 | Then("the {string} component should not be visible", async (componentName: string) => { 39 | let selector: string = `[data-testid="${componentName.toLowerCase().replace(/ /g, '-')}"]`; 40 | const component = await $(selector); 41 | await expect(component).not.toBeDisplayed(); 42 | }); 43 | 44 | // Navigation step: go to the search page. 45 | // Adjust the URL as needed for your Electron app. 46 | Given("the app is navigated to the {string} link", async (linkText: string) => { 47 | // Example: navigate to a search page with a document set id of 1. 48 | const settingsLink = await $('.navbar').$(`a*=${linkText}`); 49 | await settingsLink.click(); 50 | // Wait for the search bar to be displayed as indicator of page load. 51 | }); 52 | 53 | const DATASET_ROW_SELECTOR = '[data-testid="existing-spreadsheet-row"]'; // Selector for a single dataset row/item 54 | Given("the app is navigated to the {string} dataset link", async (linkText: string) => { 55 | // Example: navigate to a search page with a document set id of 1. 56 | //const settingsLink = await (await $$(DATASET_ROW_SELECTOR).filter((elem) => !!elem.$(`a*=${linkText}`))).map((row) => row.$(`a*=${linkText}`) )[0]; 57 | //const datasetLink = await $(`a*=${linkText}`); 58 | const anchorElement = await $(`a=${linkText}`); 59 | await anchorElement.click(); 60 | // const datasetRow = await $$(DATASET_ROW_SELECTOR); 61 | // const filteredRows = await datasetRow.filter((elem) => !!elem.$(`a*="${linkText}"`)); 62 | // if (filteredRows.length === 0) { 63 | // throw new Error(`No link found with text: ${linkText}`); 64 | // } 65 | // const link = await filteredRows[0].$(`a*=${linkText}`); 66 | // await link.waitForDisplayed({ timeout: 5000 }); 67 | // await link.click(); 68 | // Wait for the search bar to be displayed as indicator of page load. 69 | }); 70 | 71 | When("the {string} component has been set to {string}", async (componentName: string, val: string) => { 72 | let selector = `[data-testid="${componentName.toLowerCase().replace(/ /g, '-')}"]`; 73 | const input = await $(selector); 74 | await input.waitForDisplayed({ timeout: 5000 }); 75 | // Clear existing value and set a new one. 76 | await input.clearValue(); 77 | // Provide a new key value. 78 | await input.setValue(val); 79 | await browser.pause(500); 80 | }); 81 | 82 | -------------------------------------------------------------------------------- /e2e/step-definitions/initial_application_view.steps.ts: -------------------------------------------------------------------------------- 1 | import { Given, When, Then } from '@wdio/cucumber-framework'; 2 | import { expect, $$, $ } from '@wdio/globals'; 3 | import { execSync } from 'child_process'; 4 | import path from 'path'; 5 | 6 | // // --- Selectors --- 7 | const DATASET_ROW_SELECTOR = '[data-testid="existing-spreadsheet-row"]'; // Selector for a single dataset row/item 8 | 9 | // execSync('sqlite3 ./e2e/test-storage/metadata.db "CREATE TABLE IF NOT EXISTS meaningfully_settings (settings_id INTEGER PRIMARY KEY AUTOINCREMENT, settings TEXT NOT NULL );" "CREATE TABLE IF NOT EXISTS document_sets ( set_id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL UNIQUE, upload_date TEXT NOT NULL, parameters TEXT NOT NULL, total_documents INTEGER NOT NULL DEFAULT 0);"'); 10 | 11 | Given("the metadata store is empty", async () => { 12 | // execSync('sqlite3 ./e2e/test-storage/metadata.db "DELETE FROM document_sets"'); 13 | // starts empty! 14 | 1+1 15 | }); 16 | 17 | Given("the uploadCsv function has been mocked", async () => { 18 | // This step is to ensure that the uploadCsv function is mocked in the browser context. 19 | // It might be set up in your test environment or application code. 20 | return browser.execute(() => { 21 | // @ts-ignore 22 | if (window.testHooks && window.testHooks.overrideUploadCsv) { 23 | const originalUploadCsv = window.api.uploadCsv; 24 | window.testHooks.overrideUploadCsv(async (formData: { 25 | file: File, 26 | datasetName: string, 27 | description: string, 28 | textColumns: string[], 29 | metadataColumns: string[], 30 | splitIntoSentences: boolean, 31 | combineSentencesIntoChunks: boolean, 32 | sploderMaxSize: number, 33 | chunkSize: number, 34 | chunkOverlap: number, 35 | modelName: string, 36 | modelProvider: string 37 | }) => { 38 | // Mock implementation of uploadCsv 39 | console.log("Mock uploadCsv called with:", formData); 40 | formData["modelProvider"] = "mock"; // Ensure modelProvider is set to "mock" so we don't hit a paid API. 41 | return originalUploadCsv(formData); 42 | }); 43 | 44 | } 45 | }); 46 | }); 47 | // And a dataset "Test Dataset 1" has been uploaded 48 | // And a dataset "Test Dataset 2" has been uploaded 49 | // Given("a dataset {string} has been uploaded", async (datasetName: string) => { 50 | // const localFilePath = path.resolve(process.cwd(), 'e2e/test-storage/constellation-test.csv'); 51 | // // Upload the file to the Selenium/Electron server. 52 | // const remoteFilePath = await browser.uploadFile(localFilePath); 53 | 54 | // await browser.execute((index, remotePath) => { 55 | // // In the browser context, use fetch to retrieve the uploaded file as a blob. 56 | // fetch(remotePath) 57 | // .then(response => response.blob()) 58 | // .then(blob => { 59 | // const file = new File([blob], 'constellation-test.csv', { type: "text/csv" }); 60 | // // Use your app’s API to simulate the upload. 61 | // if (window.api && window.api.uploadCsv) { 62 | // window.api.uploadCsv({ 63 | // file: file, 64 | // datasetName: datasetName, 65 | // description: "", 66 | // textColumns: ["paragraph"], 67 | // metadataColumns: ["cik", "classification"], 68 | // splitIntoSentences: true, 69 | // combineSentencesIntoChunks: true, 70 | // sploderMaxSize: 500, 71 | // chunkSize: 100, 72 | // chunkOverlap: 20, 73 | // modelName: "text-embedding-3-small", 74 | // modelProvider: "mock" 75 | // }); 76 | // } 77 | // }); 78 | // }, remoteFilePath, datasetName); 79 | // await browser.pause(500); 80 | // }); 81 | 82 | 83 | Given("the metadata store contains {int} entries", async (count: number) => { 84 | // Resolve the local path for the CSV you want to upload. 85 | const localFilePath = path.resolve(process.cwd(), 'e2e/test-storage/constellation-test.csv'); 86 | // Upload the file to the Selenium/Electron server. 87 | const remoteFilePath = await browser.uploadFile(localFilePath); 88 | 89 | for (let i = 0; i < count; i++) { 90 | // Pass the remote file path into browser.execute. 91 | await browser.execute((index, remotePath) => { 92 | // In the browser context, use fetch to retrieve the uploaded file as a blob. 93 | fetch(remotePath) 94 | .then(response => response.blob()) 95 | .then(blob => { 96 | const file = new File([blob], 'constellation-test.csv', { type: "text/csv" }); 97 | // Use your app’s API to simulate the upload. 98 | if (window.api && window.api.uploadCsv) { 99 | window.api.uploadCsv({ 100 | file: file, 101 | datasetName: `Test ${index + 1}`, 102 | description: "", 103 | textColumns: ["paragraph"], 104 | metadataColumns: ["cik", "classification"], 105 | splitIntoSentences: true, 106 | combineSentencesIntoChunks: true, 107 | sploderMaxSize: 500, 108 | chunkSize: 100, 109 | chunkOverlap: 20, 110 | modelName: "text-embedding-3-small", 111 | modelProvider: "mock" 112 | }); 113 | } 114 | }); 115 | }, i, remoteFilePath); 116 | } 117 | // Pause a bit to let the uploads process. 118 | await browser.pause(500); 119 | }); 120 | 121 | Then("no datasets should be listed", async () => { 122 | const datasets = await $$(DATASET_ROW_SELECTOR); 123 | await expect(datasets).toBeElementsArrayOfSize(0); 124 | }); 125 | 126 | Then("{int} datasets should be listed", async (expectedCount: number) => { 127 | const datasets = await $$(DATASET_ROW_SELECTOR); 128 | await expect(datasets).toBeElementsArrayOfSize(expectedCount); 129 | }); 130 | 131 | Then("the dataset {string} should be listed", async (datasetName: string) => { 132 | const datasetNames = await $$(DATASET_ROW_SELECTOR).map((datasetRow) => datasetRow.$$('td')[0].getText()); 133 | expect(datasetNames).toContain(datasetName); 134 | }); 135 | 136 | /////////////////////////////////////////////////////////////////////////////////////////// 137 | 138 | 139 | 140 | 141 | // Given("the metadata store is empty", async () => { 142 | // // TODO: Implement logic to ensure the metadata store is empty. 143 | // // This might involve: 144 | // // - Calling a specific function via browser.execute: 145 | // // await browser.execute(() => (window as any).electronAPI.clearMetadataStore()); 146 | // // - Interacting with the UI to clear data if applicable. 147 | // // - Restarting the app in a clean state (might be handled by wdio setup). 148 | // console.warn("Step 'the metadata store is empty' requires implementation."); 149 | // }); 150 | 151 | // Given("the metadata store contains {int} entries", async (count: number) => { 152 | // // TODO: Implement logic to populate the metadata store with 'count' entries. 153 | // // Similar to the empty state, this might involve: 154 | // // - Calling a function via browser.execute: 155 | // // await browser.execute((num) => (window as any).electronAPI.addMockMetadata(num), count); 156 | // // - UI interactions to add data. 157 | // console.warn(`Step 'the metadata store contains ${count} entries' requires implementation.`); 158 | // // Add a small pause if data loading is asynchronous 159 | // await browser.pause(200); 160 | // }); 161 | 162 | 163 | // Then("no datasets should be listed", async () => { 164 | // const datasets = await $$(DATASET_ROW_SELECTOR); 165 | // await expect(datasets).toBeElementsArrayOfSize(0); 166 | // }); 167 | 168 | // Then("{int} datasets should be listed", async (expectedCount: number) => { 169 | // const datasets = await $$(DATASET_ROW_SELECTOR); 170 | // await expect(datasets).toBeElementsArrayOfSize(expectedCount); 171 | // }); -------------------------------------------------------------------------------- /e2e/step-definitions/search_page.steps.ts: -------------------------------------------------------------------------------- 1 | import { Given, When, Then } from '@wdio/cucumber-framework'; 2 | import { expect, $, $$ } from '@wdio/globals'; 3 | 4 | 5 | // Step: Enter a search query. 6 | When("a search query has been entered", async () => { 7 | const searchInput = await $('[data-testid="search-bar"]'); 8 | await searchInput.waitForDisplayed({ timeout: 5000 }); 9 | // Enter a sample query. 10 | await searchInput.setValue("test search query"); 11 | await browser.pause(500); 12 | }); 13 | 14 | When("no search query has been entered", async () => { 15 | const searchInput = await $('[data-testid="search-bar"]'); 16 | await searchInput.waitForDisplayed({ timeout: 5000 }); 17 | // Enter a sample query. 18 | await searchInput.clearValue(); 19 | await browser.pause(500); 20 | }); 21 | 22 | 23 | // Step: Verify search button state. 24 | Then("the search button is {string}", async (state: string) => { 25 | const searchButton = await $('[data-testid="search-button"]'); 26 | await searchButton.waitForDisplayed({ timeout: 5000 }); 27 | const isDisabled = await searchButton.getAttribute("disabled"); 28 | if (state === "disabled") { 29 | expect(isDisabled).not.toBeNull(); 30 | } else if (state === "enabled") { 31 | expect(isDisabled).toBeNull(); 32 | } else { 33 | throw new Error(`Unknown state: ${state}`); 34 | } 35 | }); 36 | 37 | // Step: Click the search button. 38 | When("the search button has been clicked", async () => { 39 | const searchButton = await $('[data-testid="search-button"]'); 40 | await searchButton.waitForDisplayed({ timeout: 5000 }); 41 | await searchButton.click(); 42 | // Allow search results to load. 43 | await browser.pause(1000); 44 | }); 45 | 46 | // Step: Verify that the Results component has multiple rows. 47 | // For this example, we assume that results are rendered as multiple elements within the Results component. 48 | Then("the {string} component should have multiple rows shown", async (componentName: string) => { 49 | let selector = ""; 50 | if (componentName === "Results") { 51 | // In your Results component, assume each result row has a common class or data attribute. 52 | // Adjust this selector to match your implementation. 53 | selector = '[data-testid="results"] tr'; 54 | } else { 55 | throw new Error(`Unknown component for rows: ${componentName}`); 56 | } 57 | const rows = await $$(selector); 58 | // Expect at least 2 rows. 59 | expect(rows.length).toBeGreaterThan(1); 60 | }); 61 | 62 | // Step: Click a result row modal button. 63 | When("a result row modal button has been clicked", async () => { 64 | // In your Results component, assume each row has a button to open the modal with data-testid="result-modal-button". 65 | const modalButtons = await $('[data-testid="result-modal-button"]'); 66 | // if (modalButtons.length === 0) { 67 | // throw new Error("No modal button found in results."); 68 | // } 69 | // Click the first result modal button. 70 | await modalButtons.click(); 71 | await browser.pause(1000); 72 | }); 73 | 74 | // Step: Verify the details component is scrollable. 75 | Then("the details component should be scrollable", async () => { 76 | const details = await $('[data-testid="details"]'); 77 | await details.waitForDisplayed({ timeout: 5000 }); 78 | // Check that scrollHeight is greater than clientHeight. 79 | const scrollHeight = await details.getProperty("scrollHeight"); 80 | const clientHeight = await details.getProperty("clientHeight"); 81 | expect(scrollHeight).toBeGreaterThan(clientHeight); 82 | }); -------------------------------------------------------------------------------- /e2e/step-definitions/settings_page.steps.ts: -------------------------------------------------------------------------------- 1 | import { Given, When, Then } from '@wdio/cucumber-framework'; 2 | import { expect, $ } from '@wdio/globals'; 3 | 4 | // Selectors 5 | const OPENAI_API_KEY_INPUT = '[data-testid="openai-api-key-input"]'; 6 | const SAVE_BUTTON = '[data-testid="save"]'; 7 | const FAKE_API_KEY = "sk-proj-meaningfullytesting-1234567890123456789012345678901234567890" 8 | 9 | // Step: Simulate entering an OpenAI API Key on the page. 10 | When("the OpenAI API Key value is set on the page", async () => { 11 | const input = await $(OPENAI_API_KEY_INPUT); 12 | await input.waitForDisplayed({ timeout: 5000 }); 13 | // Clear existing value and set a new one. 14 | await input.clearValue(); 15 | // Provide a new key value. 16 | await input.setValue(FAKE_API_KEY); 17 | await browser.pause(500); 18 | }); 19 | 20 | // // Step: Simulate clicking the Save button. 21 | // When('the "Save" component has been clicked', async () => { 22 | // const btn = await $(SAVE_BUTTON); 23 | // await btn.waitForDisplayed({ timeout: 5000 }); 24 | // await btn.click(); 25 | // await browser.pause(500); 26 | // }); 27 | 28 | Then('the {string} component should be empty', async (componentName: string) => { 29 | let selector = `[data-testid="${componentName.toLowerCase().replace(/ /g, '-')}"]`; 30 | const input = await $(selector); 31 | await input.waitForDisplayed({ timeout: 5000 }); 32 | const value = await input.getValue(); 33 | // Verify that the input is empty. 34 | expect(value).toBe(""); 35 | }); 36 | 37 | // Then: Verify that the OpenAI API Key's text is masked. 38 | Then('the text of the {string} component is masked', async (componentName: string) => { 39 | let selector = `[data-testid="${componentName.toLowerCase().replace(/ /g, '-')}"]`; 40 | const input = await $(selector); 41 | await input.waitForDisplayed({ timeout: 5000 }); 42 | const value = await input.getValue(); 43 | // We assume the masking inserts "*******" into the displayed value. 44 | expect(value).toContain("*******"); 45 | }); 46 | 47 | // Then: Verify that the text of the "OpenAI API Key input" component is a masked version of the set value. 48 | Then('the text of the {string} component is a masked version of the set value.', async (componentName: string) => { 49 | // This step may be similar to the previous, but you can add further checks if needed. 50 | let selector = `[data-testid="${componentName.toLowerCase().replace(/ /g, '-')}"]`; 51 | const input = await $(selector); 52 | await input.waitForDisplayed({ timeout: 5000 }); 53 | const value = await input.getValue(); 54 | // Check that the value both contains "*******" and does not equal the plain key. 55 | expect(value).toContain("*******"); 56 | expect(value).not.toEqual(FAKE_API_KEY); 57 | }); -------------------------------------------------------------------------------- /e2e/step-definitions/upload_process.steps.ts: -------------------------------------------------------------------------------- 1 | import { Given, When, Then } from '@wdio/cucumber-framework'; 2 | import path from 'path'; 3 | import { expect, $$, $ } from '@wdio/globals'; 4 | 5 | // Selectors – adjust these if needed. 6 | const UPLOAD_COMPONENT_SELECTOR = '[data-testid="upload-a-spreadsheet"]'; 7 | const CSV_UPLOAD_PAGE_SELECTOR = '[data-testid="csv-upload-settings"]'; 8 | const PREVIEW_COMPONENT_SELECTOR = '[data-testid="preview"]'; 9 | 10 | const TEST_CSV_FILE_NAME = "newline-test.csv"; // The name of the test CSV file to use. 11 | const INDEX_OF_COLUMN_TO_EMBED = 4; 12 | 13 | // Step: Simulate file selection using the test CSV file. 14 | Given( 15 | "a file has been selected in the {string} component", 16 | async (componentName: string) => { 17 | // Locate the file input inside the specified component. 18 | const fileInputSelector = `[data-testid="${componentName 19 | .toLowerCase() 20 | .replace(/ /g, '-')}"] input[type="file"]`; 21 | const fileInput = await $(fileInputSelector); 22 | // Resolve path to the test CSV file. 23 | const filePath = path.resolve(process.cwd(), `e2e/test-storage/${TEST_CSV_FILE_NAME}`); 24 | // Upload the file (this copies the file to a temporary location on the Selenium server). 25 | const remoteFilePath = await browser.uploadFile(filePath); 26 | await fileInput.setValue(remoteFilePath); 27 | // Trigger change event if necessary. 28 | await browser.execute((input: HTMLInputElement) => { 29 | const event = new Event('change', { bubbles: true }); 30 | input.dispatchEvent(event); 31 | }, fileInput); 32 | // Allow time for the file selection to process. 33 | await browser.pause(1000); 34 | } 35 | ); 36 | 37 | // Step: Simulate choosing a column to embed. 38 | When("the column {string} has been selected as column to embed", async (columnName: string) => { 39 | // Assume the CSV Upload page contains a 42 | 43 |
44 |

OLlama

45 |

OLlama lets you run embedding models on your computer. This is free (except for electricity, wear-and-tear, etc.).

46 | 47 | 48 |
49 | 50 | 51 | 52 | 53 | 54 | 79 | -------------------------------------------------------------------------------- /src/renderer/src/components/ApiKeyStatus.svelte: -------------------------------------------------------------------------------- 1 | 10 | 11 | {#if !validApiKeysSet } 12 |
13 |

No OpenAI API key is set. Please add one (or details for another provider) in order to use Meaningfully.

14 |
15 | {/if} 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/renderer/src/components/ExistingDatabases.svelte: -------------------------------------------------------------------------------- 1 | 70 | 71 | {#if hidden} 72 |
73 |
74 | {:else if loading} 75 |
76 |
77 |
78 | {:else if error} 79 |
80 | {error} 81 |
82 | {:else} 83 |
84 |

Existing Spreadsheets

85 | {#if documentSets.length === 0} 86 |

No spreadsheets found. Upload one to get started.

87 | {:else} 88 |
89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | {#each documentSets as set} 101 | 105 | 113 | 114 | 115 | 125 | 138 | 139 | {/each} 140 | 141 |
NameUpload DateDocumentsParametersActions
106 | 110 | {set.name} 111 | 112 | {set.uploadDate.toLocaleString()}{set.totalDocuments} 116 | {#if Object.keys(set.parameters).length > 0} 117 |
118 | View Parameters 119 |
{JSON.stringify(set.parameters, null, 2)}
120 |
121 | {:else} 122 | None 123 | {/if} 124 |
126 | 137 |
142 | 143 | 144 |
145 |
146 | Showing page {currentPage} of {totalPages} 147 |
148 |
149 | 156 | 163 |
164 |
165 |
166 | {/if} 167 |
168 | {/if} 169 | -------------------------------------------------------------------------------- /src/renderer/src/components/FrontPage.svelte: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 |
15 | {databasesComponent.hide()}} 18 | uploadComplete={() => { 19 | databasesComponent.loadDocumentSets(); 20 | databasesComponent.show(); 21 | }}/> 22 | 23 |
24 | -------------------------------------------------------------------------------- /src/renderer/src/components/HelpPage.svelte: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 |
6 |

Help

7 |
8 |

What is Semantic Search?

9 |

10 | Keyword search has been the only kind of search for decades. Sometimes it fails: 11 |

12 | 13 |
    14 |
  • Ambiguity and synonyms (people getting fired and things catching on fire, cars and automobiles)
  • 15 |
  • Circumlocutions and legalese
  • 16 |
  • Documents written by laypeople describing complex situations in widely-varied language
  • 17 |
  • Typos
  • 18 |
  • Multilingual documents
  • 19 |
20 |

21 | Semantic search works better in these situations, by finding results that mean something 22 | similar to the query. Even if the words are completely different, semantic search can still 23 | surface the results you need. 24 |

25 |
26 |
27 |

a CSV

28 |

29 | You can select a CSV from your computer to "upload" it to Meaningfully. Then, select one column 30 | from the CSV to search semantically, and any number of other columns to be shown alongside it in results. 31 |

32 |

33 | Once you upload the CSV, each entry in the chosen column will be embedded, with the results stored on your 34 | computer. If you choose a remote embedding API -- like OpenAI's text-embedding-small or text-embedding-large -- 35 | then the entries in your column will be sent 36 | to that service; if you choose a local one, then the data will not leave your computer. 37 |

38 |

39 | Meaningfully provides additional options: 40 |

41 |
    42 |
  • Split long text into sentences. When a single row contains many ideas, splitting it by sentence helps the search surface 43 | results that match a single idea. 44 |
  • 45 |
  • Combine short sentences into chunks. You can adjust how large the chunks are and how much overlap exists between chunks.
  • 46 |
47 |

48 | Embedding can take a while, especially for large CSVs with 10,000 or more rows. Once it finishes, you'll 49 | be able to search your CSV. 50 |

51 |
52 |
53 |

How should I write my search query?

54 |

55 | Do: Imagine the perfect version of what you're looking for, that you wish exists in your spreadsheet. My car caught on fire as I was driving on the highway. 56 |

57 |

58 | Don't: Just write keywords. crash OR fire OR aflame 59 |

60 |

61 | Don't: Ask a question, like you would to a chatbot. Please find me examples about cars catching on fire. 62 |

63 | 64 |
65 |
66 |

How much does it cost?

67 |

68 | Generally less than a dollar per document set, but you're paying OpenAI, not me, and you're responsible for all costs, no matter what. 69 |

70 |

71 | Eventually, some Meaningfully features may require payment. 72 |

73 |
74 |
75 |

How can I support development of Meaningfully??

76 |

77 | You can Buy Me A Coffee. 78 |

79 | 80 |
81 |
82 | 83 | 120 | -------------------------------------------------------------------------------- /src/renderer/src/components/Preview.svelte: -------------------------------------------------------------------------------- 1 | 18 | 19 |
20 | {#if loading} 21 |
22 |
23 |
24 | { :else } 25 |
26 |

Preview

27 |
28 | 34 | 35 | 36 | {/if} 37 | -------------------------------------------------------------------------------- /src/renderer/src/components/Results.svelte: -------------------------------------------------------------------------------- 1 | 32 | 33 |
34 |

Search Results

35 | 36 | {#if loading} 37 |
38 |
39 |
40 | {:else if results.length === 0} 41 |
42 |

No results found. Is it possible there is no data in the dataset?

43 |
44 | {:else} 45 |
46 |
54 | 55 | 56 | {#if displayCount < results.length} 57 |
58 | 64 |
65 | {/if} 66 | {/if} 67 | -------------------------------------------------------------------------------- /src/renderer/src/components/SearchPage.svelte: -------------------------------------------------------------------------------- 1 | 102 | 103 |
104 |
105 | 114 |
115 | 116 | {#if documentSetLoading} 117 |

Loading document set...

118 | {:else if !documentSet} 119 |

Document set not found. {documentSetId}

120 | {:else} 121 |
122 |

{documentSet.name}

123 |

124 | {documentSet.totalDocuments} documents • Uploaded {documentSet.uploadDate.toLocaleDateString()} 125 |

126 |
127 | 128 |
129 | 130 |
131 | 134 |

135 | Imagine the perfect document that you hope might exist in your spreadsheet. Type it here. Meaningfully will find the real documents that mean 136 | about the same thing -- even if they have no keywords in common. 137 |

138 |
139 | 147 | 155 |
156 |
157 | 158 | 159 | {#if metadataColumns.length > 0} 160 |
161 |

162 | Search only records that match... 163 |

164 |
165 | {#each metadataFilters as filter, index} 166 |
167 | 173 | 188 | 194 | 197 |
198 | {/each} 199 | 202 |
203 |
204 | {/if} 205 |
206 | 207 | 208 | {#if error} 209 |
210 | {error} 211 |
212 | {/if} 213 | {#if (searchQuery != blankSearchQuery || metadataFilters.length > 0) && hasResults} 214 | 215 |
216 | 223 |
224 | {/if} 225 | {/if} 226 |
227 | 228 | 229 | {#if showModal && modalContent} 230 |
231 |
232 |

Original Document

233 |
234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | {#each metadataColumns as key} 244 | 245 | 246 | 247 | 248 | {/each} 249 | 250 |
Original text{modalContent.text}
{key}{modalContent.metadata[key]}
251 | 252 |
253 |
254 | {/if} -------------------------------------------------------------------------------- /src/renderer/src/components/Table.svelte: -------------------------------------------------------------------------------- 1 | 39 | 40 |
41 | 42 | 43 | 44 | {#each columns as column} 45 | 46 | {/each} 47 | {#if showShowOriginal} 48 | 49 | {/if} 50 | 51 | 52 | 53 | {#each data as row} 54 | 55 | {#each columns as column} 56 | 65 | {/each} 66 | {#if showShowOriginal} 67 | 70 | {/if} 71 | 72 | {/each} 73 | 74 |
{column}
57 | {#if column === 'similarity' && row[column] !== undefined} 58 | {(row[column] * 100).toFixed(1)}% 59 | {:else if column === textColumn} 60 | {@html sanitizeAndFormatText(row[column] || '')} 61 | {:else} 62 | {(row[column]) || ''} 63 | {/if} 64 | 68 | 69 |
75 |
-------------------------------------------------------------------------------- /src/renderer/src/env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | 4 | 5 | interface Settings { 6 | openAIKey: string; 7 | oLlamaModelType: string; 8 | oLlamaBaseURL: string; 9 | } 10 | interface SearchResult { 11 | content: string; 12 | similarity: number; 13 | [key: string]: any; // For metadata fields 14 | sourceNodeId: string | undefined; 15 | } 16 | 17 | interface Window { 18 | electron: { 19 | ipcRenderer: { 20 | send: (channel: string, ...args: any[]) => void 21 | } 22 | process: { 23 | versions: Record 24 | } 25 | } 26 | api: { 27 | listDocumentSets: (page: number, pageSize: number) => Promise<{documents: DocumentSetMetadata[], total: number}> , 28 | uploadCsv: (formData: { 29 | file: File, 30 | datasetName: string, 31 | description: string, 32 | textColumns: string[], 33 | metadataColumns: string[], 34 | splitIntoSentences: boolean, 35 | combineSentencesIntoChunks: boolean, 36 | sploderMaxSize: number, 37 | chunkSize: number, 38 | chunkOverlap: number, 39 | modelName: string, 40 | modelProvider: string 41 | }) => Promise<{ success: true, documentSetId: number }>, 42 | generatePreviewData: (formData: { 43 | file: File, 44 | datasetName: string, // not really needed 45 | description: string, // not really needed 46 | textColumns: string[], 47 | metadataColumns: string[], 48 | splitIntoSentences: boolean, 49 | combineSentencesIntoChunks: boolean, 50 | sploderMaxSize: number, 51 | chunkSize: number, 52 | chunkOverlap: number, 53 | modelName: string, 54 | modelProvider: string 55 | }) => Promise<{ success: boolean, nodes: Record[], estimatedPrice: number, tokenCount: number, pricePer1M: number }>, 56 | searchDocumentSet: (params: { 57 | documentSetId: number; 58 | query: string; 59 | n_results: number; 60 | filters?: { 61 | key: string, 62 | operator: "==" | "in" | ">" | "<" | "!=" | ">=" | "<=" | "nin" | "any" | "all" | "text_match" | "contains" | "is_empty", 63 | value: any 64 | }[]; 65 | }) => Promise; 66 | getDocument: (params: {documentSetId: number, documentId: string}) => Promise<{ text: string, metadata: Record }>; 67 | getSettings: () => Promise; 68 | setSettings: (settings: Settings) => Promise<{success: boolean}>; 69 | deleteDocumentSet: (documentSetId: number) => Promise; 70 | getDocumentSet: (documentSetId: number) => Promise; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/renderer/src/main.ts: -------------------------------------------------------------------------------- 1 | import './assets/main.css' 2 | 3 | import App from './App.svelte' 4 | import { mount } from "svelte"; 5 | 6 | const app = mount(App, { 7 | target: document.getElementById('app') 8 | }) 9 | 10 | export default app 11 | 12 | export interface DocumentSet { 13 | documentSetId: number; 14 | name: string; 15 | uploadDate: Date; 16 | parameters: Record; 17 | totalDocuments: number; 18 | } 19 | -------------------------------------------------------------------------------- /svelte.config.mjs: -------------------------------------------------------------------------------- 1 | import { vitePreprocess } from '@sveltejs/vite-plugin-svelte' 2 | 3 | export default { 4 | // Consult https://svelte.dev/docs#compile-time-svelte-preprocess 5 | // for more information about preprocessors 6 | preprocess: vitePreprocess() 7 | } 8 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@electron-toolkit/tsconfig/tsconfig.json", 3 | "include": [ 4 | "src/renderer/src/env.d.ts", 5 | "src/renderer/src/**/*", 6 | "src/renderer/src/**/*.svelte", 7 | "src/preload/*.d.ts" 8 | ], 9 | "compilerOptions": { 10 | "verbatimModuleSyntax": true, 11 | "useDefineForClassFields": true, 12 | "strict": false, 13 | "allowJs": true, 14 | "checkJs": true, 15 | "lib": ["ESNext", "DOM", "DOM.Iterable"] 16 | }, 17 | "references": [{ "path": "./tsconfig.node.json" }] 18 | } 19 | -------------------------------------------------------------------------------- /tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@electron-toolkit/tsconfig/tsconfig.node.json", 3 | "include": ["electron.vite.config.*", "src/main/**/*", "src/preload/**/*"], 4 | "compilerOptions": { 5 | "composite": true, 6 | "types": ["electron-vite/node"], 7 | "moduleResolution": "bundler" 8 | } 9 | } 10 | --------------------------------------------------------------------------------