├── .github └── workflows │ └── deploy.yml ├── .gitignore ├── LICENSE ├── README.md ├── index.html ├── package-lock.json ├── package.json ├── src └── js │ ├── index.js │ └── worker.js └── webpack.config.js /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | deploy: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout repository 14 | uses: actions/checkout@v2 15 | 16 | - name: Set up Node.js 17 | uses: actions/setup-node@v2 18 | with: 19 | node-version: '14' 20 | 21 | - name: Install dependencies 22 | run: npm install 23 | 24 | - name: Build 25 | run: npm run build 26 | 27 | - name: Deploy to GitHub Pages 28 | uses: peaceiris/actions-gh-pages@v3 29 | with: 30 | personal_token: ${{ secrets.GH_PAGES_TOKEN }} 31 | publish_dir: ./dist 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # Docusaurus cache and generated files 108 | .docusaurus 109 | 110 | # Serverless directories 111 | .serverless/ 112 | 113 | # FuseBox cache 114 | .fusebox/ 115 | 116 | # DynamoDB Local files 117 | .dynamodb/ 118 | 119 | # TernJS port file 120 | .tern-port 121 | 122 | # Stores VSCode versions used for testing VSCode extensions 123 | .vscode-test 124 | 125 | # yarn v2 126 | .yarn/cache 127 | .yarn/unplugged 128 | .yarn/build-state.yml 129 | .yarn/install-state.gz 130 | .pnp.* 131 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Dominik Weckmüller 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Qdrant Frontend 2 | A universal Qdrant table frontend based on transformers.js 3 | 4 | ![Qdrant SDG snapshot in qdrant-frontend](https://github.com/do-me/qdrant-frontend/assets/47481567/92d58512-5aca-4b5d-a4f0-2708a20de871) 5 | 6 | ## Usage 7 | Simply go to https://do-me.github.io/qdrant-frontend/ and type in your Qdrant URL: 8 | - On localhost: http://localhost:6333/collections/yourcollection 9 | - Remote connection: http://yourserver.com:6333/collections/yourcollection 10 | In the latter case make sure that your reverse proxy (nginx, caddy etc.) is configured to allow CORS. 11 | 12 | Choose the model you used in the collection (unfortunately afaik there is no collectiopn metadata property for this yet) and decide whether you want to use smaller models (quantized) or larger but accurate models. 13 | Enter your search query and a row limit. 14 | 15 | Note that you can just bookmark the URL so that all input fields are filled out for you next time! 16 | 17 | ## Idea 18 | The model is loaded to your browser with transformers.js so your search query gets inferenced on the fly. The resulting vector is then used in the Qdrant search query. 19 | 20 | ## Motivation 21 | Qdrant's Web-UI is great but unfortunately it still requires an additional server for inferencing as Qdrant's fastembed is not yet integrated in a convenient way [1](https://github.com/qdrant/fastembed/discussions/117), [2](https://github.com/qdrant/qdrant-web-ui/issues/162). This static page is supposed to provide a minimalistic interface for quickly querying Qdrant collections. 22 | 23 | ## Installation 24 | If you want to develop or build locally clone the repo and run: 25 | - `npm install` 26 | - `npm run start` for development or 27 | - `npm run build` for a local build resulting in an index.html and index.js file you can deploy anywhere. 28 | 29 | ## To Do 30 | - Improve UI 31 | - Use Qdrant JS client instead of hardcoding the request 32 | - Testing 33 | - Add dimenstionality reduction feature based on bhtsne-wasm for the top results like in https://do-me.github.io/SemanticFinder/ 34 | 35 | PR's very welcome! 36 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Qdrant Frontend 6 | 7 | 8 | 10 | 13 | 14 | 78 | 79 | 80 | 81 |
82 |
83 |

Qdrant Frontend (GitHub)

84 |
A universal frontend for Qdrant built with transformers.js to allow
for 86 | quick semantic search without the need for server-side inferencing.
87 |

Download a Qdrant snapshot about SDGs here and give it a try!

88 |
89 |
90 |
91 |
92 |
Settings
93 |
94 |
95 | 97 | 98 |
99 |
100 |
101 |
102 |
103 |
104 | 106 | 107 |
108 |
109 |
110 | 111 |
112 | 113 |
114 |
115 | 116 |
117 |
118 |
119 |
120 | 121 | 122 |
123 |
124 | 125 |
Weighted Queries
126 | 127 | 128 |
129 |
130 | 131 |
132 |
133 | 135 | 136 |
137 |
138 | 139 |
140 |
141 | 143 | 144 |
145 |
146 | 147 |
148 | 149 |
150 | 151 |
152 |
153 | 154 |
155 | 156 | 165 | 166 | 167 |
168 | 169 | 170 |
171 |
172 | 173 | 174 | 175 | 176 |
177 |
178 |
179 | 183 |
184 |
185 |
186 | 187 |
188 |
189 | 200 |
201 |
202 | 203 | 204 | 205 |
206 |
207 | 208 |
209 | 210 | 211 |
212 | 216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |

Dimensionality Reduction

225 |

Using a fast wasm implementation of Barnes-Hut tSNE (wasm-bhtSNE).

227 | 228 |
229 | 231 |
232 |
233 |
234 | 236 | 237 |
238 |
239 |
240 |
241 | 243 | 244 |
245 |
246 |
247 |
248 | 250 | 252 |
253 |
254 |
255 |
256 | 257 | 258 |
259 |
260 |
261 | 262 |
263 | 264 |
265 |
266 |
267 |
268 | 269 | 270 | 271 | 272 |
273 | 274 | 275 |
276 |
277 | 278 |
279 |
280 |
281 |
282 | 283 | 284 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "qdrant-frontend", 3 | "version": "1.0.0", 4 | "description": "A universal Qdrant table frontend based on transformers.js", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "start": "webpack serve --mode development", 9 | "build": "webpack --config webpack.config.js" 10 | }, 11 | "author": "Dominik Weckmüller", 12 | "license": "MIT", 13 | "dependencies": { 14 | "@xenova/transformers": "^2.15.0", 15 | "ag-grid-community": "^31.1.0", 16 | "bootstrap": "^5.3.3", 17 | "deck.gl": "^8.9.35", 18 | "distinct-colors": "^3.0.0", 19 | "marked": "^12.0.1", 20 | "pako": "^2.1.0", 21 | "wasm-bhtsne": "^0.3.3", 22 | "xlsx": "^0.18.5" 23 | }, 24 | "devDependencies": { 25 | "css-loader": "^6.10.0", 26 | "html-webpack-plugin": "^5.6.0", 27 | "mini-css-extract-plugin": "^2.8.0", 28 | "webpack": "^5.90.3", 29 | "webpack-cli": "^5.1.4", 30 | "webpack-dev-server": "^5.0.2" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/js/index.js: -------------------------------------------------------------------------------- 1 | import { pipeline } from '@xenova/transformers'; 2 | import { createGrid } from 'ag-grid-community'; 3 | import * as XLSX from 'xlsx'; 4 | import 'ag-grid-community/styles/ag-grid.css'; 5 | import 'ag-grid-community/styles/ag-theme-quartz.css'; 6 | import { Deck } from '@deck.gl/core'; 7 | import { ScatterplotLayer, LineLayer } from '@deck.gl/layers'; 8 | import distinctColors from 'distinct-colors'; 9 | 10 | const worker = new Worker(new URL('./worker.js', import.meta.url), { 11 | type: 'module' 12 | }); 13 | 14 | window.semanticWorker = worker; 15 | 16 | const loadingElement = document.getElementById("loading"); 17 | const submitButton = document.getElementById("submit_button"); 18 | const submit_button_text = document.getElementById("submit_button_text") 19 | const filterTextBox = document.getElementById('filter-text-box'); 20 | const eGridDiv = document.querySelector('#myGrid'); 21 | 22 | let searchResults; 23 | let embedder; 24 | let loadedModelName = null; 25 | let quantizedFlag = true; 26 | let thisCollection; 27 | let lastCollection = ""; 28 | let queryEmbedding; 29 | 30 | const special_vector = [] // A special vector can be hardcoded here, so that instead of calculating it with the model, this vector is used. Will be displayed bold. 31 | 32 | // Function to update URL with form parameters 33 | function updateURL() { 34 | const qdrantURL = document.getElementById('QdrantURL').value; 35 | const qdrantLimit = document.getElementById('QdrantLimit').value; 36 | const hfModel = document.getElementById('HFModel').value; 37 | const quantizedToggle = document.getElementById('quantizedToggle').checked; 38 | 39 | // Select all query containers 40 | const queryContainers = document.querySelectorAll('.queryContainer'); 41 | let queries = []; 42 | 43 | queryContainers.forEach((container, index) => { 44 | // Adjusted selectors to match the updated HTML 45 | const inputText = container.querySelector('.inputText').value; 46 | const queryWeight = container.querySelector('.queryWeight').value; 47 | const activeState = container.querySelector(`.activeToggle`).checked; 48 | 49 | // Create an object for each query with its parameters and index 50 | const query = { 51 | index, // Add the index of the query row 52 | inputText, 53 | queryWeight, 54 | activeState 55 | }; 56 | 57 | // Add the query object to the queries array 58 | queries.push(query); 59 | }); 60 | 61 | // Convert the queries array to a string for the URL parameters 62 | const queriesString = JSON.stringify(queries); 63 | 64 | const params = new URLSearchParams({ 65 | qdrantURL, 66 | queries: queriesString, // This now includes the index of each query row 67 | qdrantLimit, 68 | hfModel, 69 | quantizedToggle 70 | }); 71 | 72 | window.history.replaceState({}, '', `?${params}`); 73 | } 74 | 75 | function setFormInputsFromURL() { 76 | // Parse the current URL 77 | const url = new URL(window.location.href); 78 | const urlParams = url.searchParams; 79 | 80 | // Check if there are any parameters in the URL 81 | if (urlParams.toString() === "") { 82 | return; // If no parameters, do nothing 83 | } 84 | 85 | // Update form inputs with URL parameters 86 | const qdrantURLParam = urlParams.get('qdrantURL'); 87 | document.getElementById('QdrantURL').value = qdrantURLParam || ''; 88 | 89 | const qdrantLimitParam = urlParams.get('qdrantLimit'); 90 | document.getElementById('QdrantLimit').value = qdrantLimitParam || ''; 91 | 92 | const hfModelParam = urlParams.get('hfModel'); 93 | document.getElementById('HFModel').value = hfModelParam || ''; 94 | 95 | const quantizedToggleParam = urlParams.get('quantizedToggle'); 96 | document.getElementById('quantizedToggle').checked = quantizedToggleParam === 'true'; 97 | 98 | // Handle query parameters 99 | const queriesParam = urlParams.get('queries'); 100 | if (queriesParam) { 101 | const queries = JSON.parse(queriesParam); 102 | let rowCount = 1; // Reset row count for dynamic rows 103 | 104 | // Directly update the first row if it's part of the queries 105 | if (queries.length > 0 && queries[0].hasOwnProperty('inputText')) { 106 | const firstQuery = queries.shift(); // Remove the first query from the array 107 | 108 | const inputText0 = document.getElementById('inputText0') 109 | inputText0.value = firstQuery.inputText || ''; 110 | 111 | if (inputText0.value === "special_vector") { 112 | // If the condition is met, apply italic text and grey background 113 | inputText0.style.fontStyle = 'italic'; 114 | //event.target.style.backgroundColor = 'grey'; 115 | } else { 116 | // If the condition is not met, remove italic text and grey background 117 | inputText0.style.fontStyle = 'normal'; 118 | //event.target.style.backgroundColor = ''; 119 | } 120 | 121 | document.getElementById('weight0').value = firstQuery.queryWeight || ''; 122 | document.getElementById('activeToggle0').checked = firstQuery.activeState; 123 | } 124 | 125 | // Remove existing query rows 126 | //const queryRowsContainer = document.getElementById('queryRowsContainer'); 127 | // Assuming you want to clear all existing dynamic rows before adding new ones 128 | //queryRowsContainer.innerHTML = ''; 129 | 130 | // Dynamically create query rows based on URL parameters 131 | queries.forEach((query, index) => { 132 | addRow(query, index + 1); // Pass query data and the new row number 133 | }); 134 | } 135 | } 136 | 137 | // Function to remove a row 138 | function removeRow(rowToRemove) { 139 | rowToRemove.remove(); 140 | // Adjust IDs of all remaining rows 141 | let remainingRows = document.querySelectorAll('.queryContainer'); 142 | for (let i = 0; i < remainingRows.length; i++) { 143 | remainingRows[i].querySelectorAll('input, button').forEach(function (element) { 144 | const currentId = element.id; 145 | const newId = currentId.replace(/\d+$/, i + 1); // Adjust the ID to reflect the new row count 146 | element.id = newId; 147 | }); 148 | } 149 | } 150 | 151 | function addRow(queryData, rowNumber) { 152 | const originalRow = document.getElementById('initialQueryContainer'); 153 | const clone = originalRow.cloneNode(true); 154 | clone.id = 'queryContainer' + rowNumber; // Adjust the ID of the cloned row 155 | 156 | // Adjust IDs of all elements within the cloned row 157 | clone.querySelectorAll('input, button').forEach(function (element) { 158 | const currentId = element.id; 159 | const newId = currentId.replace(/\d+$/, rowNumber); // Replace the last digit(s) with the current rowNumber 160 | element.id = newId; 161 | }); 162 | 163 | // Set values from queryData 164 | clone.querySelector('.inputText').value = queryData.inputText || ''; 165 | 166 | // Set values from queryData 167 | const inputTextX = clone.querySelector('.inputText') 168 | inputTextX.value = queryData.inputText || ''; 169 | 170 | if (inputTextX.value === "special_vector") { 171 | // If the condition is met, apply italic text and grey background 172 | inputTextX.style.fontStyle = 'italic'; 173 | } else { 174 | // If the condition is not met, remove italic text and grey background 175 | inputTextX.style.fontStyle = 'normal'; 176 | } 177 | 178 | clone.querySelector('.queryWeight').value = queryData.queryWeight || ''; 179 | clone.querySelector('.activeToggle').checked = queryData.activeState; 180 | 181 | const minusButton = clone.querySelector('.queryButton'); 182 | // must use SVG here as emoji create problems with npm 183 | minusButton.innerHTML = ` 184 | 185 | ` 186 | minusButton.title = 'Remove query'; 187 | minusButton.addEventListener('click', function () { removeRow(clone); }); // Attach event listener to the minus button 188 | 189 | document.getElementById('queryRowsContainer').appendChild(clone); 190 | } 191 | 192 | async function loadModel(model, quantized = true) { 193 | if (model !== loadedModelName || quantized !== quantizedFlag) { // Check if model or quantized flag changed 194 | submitButton.setAttribute("disabled", ""); 195 | loadingElement.style.display = ""; 196 | submit_button_text.textContent = "Loading model..."; 197 | 198 | embedder = await pipeline("feature-extraction", model, { quantized: quantized }); 199 | loadedModelName = model; 200 | quantizedFlag = quantized; // Update quantized flag 201 | console.log("Model loaded:", loadedModelName, " quantized: ", quantized); 202 | } else { 203 | console.log("Model already loaded:", loadedModelName, " quantized: ", quantized); 204 | } 205 | } 206 | 207 | submitButton.onclick = () => { 208 | const modelName = document.getElementById("HFModel").value; 209 | const quantized = document.getElementById("quantizedToggle").checked; 210 | loadModel(modelName, quantized).then(() => { 211 | sendRequest(); 212 | }); 213 | }; 214 | 215 | download_csv.onclick = () => { 216 | exportData(searchResults, "csv"); 217 | }; 218 | 219 | download_xlsx.onclick = () => { 220 | exportData(searchResults, "excel"); 221 | }; 222 | 223 | download_json.onclick = () => { 224 | exportData(searchResults, "json"); 225 | }; 226 | 227 | //////////////////////////////////////////////////////////////////// 228 | 229 | async function searchPoints(collectionName, vectorData, filter, limit, offset, withPayload, withVector, scoreThreshold) { 230 | var reqBody = JSON.stringify({ 231 | vector: vectorData, 232 | filter: filter, 233 | limit: limit, 234 | offset: offset, 235 | with_payload: withPayload, 236 | with_vector: withVector, 237 | score_threshold: scoreThreshold, 238 | }); 239 | 240 | const requestOptions = { 241 | method: "POST", 242 | headers: { "Content-Type": "application/json" }, 243 | body: reqBody, 244 | }; 245 | 246 | thisCollection = document.getElementById("QdrantURL").value 247 | const response = await fetch(`${thisCollection}/points/search`, requestOptions); 248 | const data = await response.json(); 249 | return data; 250 | } 251 | 252 | function getQueryTextsAndWeigths() { 253 | const queryContainers = document.querySelectorAll('.queryContainer'); 254 | 255 | const activeQueries = Array.from(queryContainers).filter(container => { 256 | const activeToggle = container.querySelector('.activeToggle'); 257 | return activeToggle.checked; 258 | }).map(container => { 259 | const inputText = container.querySelector('.inputText').value.trim(); 260 | const weight = container.querySelector('.queryWeight').value; 261 | return { inputText, weight }; 262 | }); 263 | 264 | const jsonObject = JSON.stringify(activeQueries); 265 | 266 | return jsonObject 267 | } 268 | 269 | async function processInputText(inputText) { 270 | 271 | if (inputText == "special_vector") { 272 | return special_vector 273 | } 274 | else { 275 | const output = await embedder(inputText, { pooling: 'mean', normalize: true }); 276 | const vectorArray = Array.from(output["data"]); 277 | queryEmbedding = vectorArray; 278 | return vectorArray; 279 | } 280 | } 281 | 282 | async function processQueries() { 283 | const jsonObject = getQueryTextsAndWeigths(); 284 | const queries = JSON.parse(jsonObject); 285 | 286 | // Step 1: Calculate the vector for each text 287 | const vectors = await Promise.all(queries.map(async query => { 288 | const { inputText } = query 289 | return await processInputText(inputText) 290 | })); 291 | 292 | // Step 2: Calculate the weighted average vector 293 | const weightedAverageVector = vectors.reduce((acc, vector, index) => { 294 | const weight = queries[index].weight; 295 | return acc.map((val, i) => val + vector[i] * weight); 296 | }, new Array(vectors[0].length).fill(0)); 297 | 298 | // Normalize the weighted average vector 299 | const magnitude = Math.sqrt(weightedAverageVector.reduce((sum, val) => sum + val * val, 0)); 300 | const normalizedWeightedAverageVector = weightedAverageVector.map(val => val / magnitude); 301 | 302 | return normalizedWeightedAverageVector 303 | } 304 | 305 | // Define global variables for the grid API and options 306 | let gridApi; 307 | let gridOptions; 308 | 309 | async function sendRequest() { 310 | try { 311 | gridApi.showLoadingOverlay(); 312 | } 313 | catch (error) { 314 | } 315 | loadingElement.style.display = ""; 316 | submit_button_text.textContent = "Loading results..."; 317 | submitButton.setAttribute("disabled", ""); 318 | 319 | let inputText = document.getElementsByClassName("inputText")[0].value.trim(); 320 | 321 | if (inputText !== "") { 322 | //let output = await embedder(inputText, { pooling: 'mean', normalize: true }); 323 | const collectionName = "test_collection"; 324 | const vectorData = await processQueries(); 325 | const filter = {}; 326 | const limit = parseInt(document.getElementById("QdrantLimit").value); 327 | const offset = 0; 328 | const withPayload = true; 329 | const withVector = true; 330 | const scoreThreshold = null; 331 | 332 | try { 333 | 334 | searchResults = await searchPoints(collectionName, vectorData, filter, limit, offset, withPayload, withVector, scoreThreshold); 335 | 336 | // Extract payload keys 337 | const payloadKeys = Object.keys(searchResults.result[0].payload); 338 | 339 | function isHyperlink(value) { 340 | return /^https?:\/\//.test(value); 341 | } 342 | 343 | // Custom cell renderer 344 | function customRenderer(params) { 345 | const nestedKey = Object.keys(params.data.payload).find(key => typeof params.data.payload[key] === 'object'); 346 | const value = params.data.payload[params.colDef.field.split('.')[1]]; 347 | 348 | if (params.colDef.field.endsWith(`.${nestedKey}`)) { 349 | return typeof value === 'object' ? JSON.stringify(value) : value; // Render nested element as string 350 | } else if (isHyperlink(value)) { 351 | return `${value}`; 352 | } else { 353 | return value; 354 | } 355 | } 356 | 357 | // Update your column definition 358 | const columnDefs = [ 359 | { headerName: 'id', field: 'id' }, 360 | { headerName: 'score', field: 'score' }, 361 | ...payloadKeys.map(key => ({ 362 | headerName: key, 363 | field: `payload.${key}`, 364 | maxWidth: 300, 365 | editable: true, 366 | valueGetter: params => params.data.payload[key], 367 | tooltipValueGetter: (params) => params.value, 368 | filter: true, 369 | autoHeight: true, 370 | cellRenderer: customRenderer // Use the custom cell renderer 371 | })), 372 | ]; 373 | 374 | // Check if the grid has already been initialized 375 | if (gridApi && thisCollection === lastCollection) { 376 | // If the grid is already initialized, update the row data 377 | gridApi.setRowData(searchResults.result); 378 | } else { 379 | 380 | try { 381 | if (thisCollection !== lastCollection) { 382 | // update column headers if needed 383 | gridApi.updateGridOptions({ columnDefs: columnDefs }) 384 | } 385 | gridApi.setRowData(searchResults.result); 386 | loadingElement.style.display = "none"; 387 | submit_button_text.textContent = "Submit"; 388 | submitButton.removeAttribute("disabled"); 389 | 390 | lastCollection = thisCollection 391 | return 392 | } 393 | catch { } 394 | 395 | // If the grid is not initialized, create the grid 396 | gridOptions = { 397 | autoSizeStrategy: { 398 | type: 'fitCellContents' 399 | }, 400 | domLayout: 'autoHeight', // Add this line to enable auto height 401 | columnDefs: columnDefs, 402 | rowData: searchResults.result, 403 | tooltipShowDelay: 0, 404 | overlayLoadingTemplate: 405 | '
', 406 | overlayNoRowsTemplate: 407 | 'This is a custom \'no rows\' overlay', 408 | 409 | }; 410 | 411 | gridApi = createGrid(eGridDiv, gridOptions); 412 | document.getElementById("exportDropdown").removeAttribute("disabled") 413 | document.getElementById("quickFilter").style.display = ""; 414 | 415 | } 416 | 417 | loadingElement.style.display = "none"; 418 | submit_button_text.textContent = "Submit"; 419 | submitButton.removeAttribute("disabled"); 420 | 421 | // on first click add the quick filter listener 422 | if (lastCollection == "" && !filterTextBox._listenerInitialized) { 423 | function onFilterTextBoxChanged() { 424 | gridApi.setGridOption( 425 | 'quickFilterText', 426 | document.getElementById('filter-text-box').value 427 | ); 428 | } 429 | 430 | filterTextBox.addEventListener('input', () => { 431 | onFilterTextBoxChanged(); 432 | }); 433 | 434 | // Mark listener as initialized 435 | filterTextBox._listenerInitialized = true; 436 | 437 | } 438 | lastCollection = thisCollection 439 | 440 | } catch (error) { 441 | console.error(error); 442 | } 443 | } 444 | } 445 | 446 | async function exportData(data, format) { 447 | // Function to flatten the payload object within each object 448 | function flattenPayload(jsonData) { 449 | // Check if jsonData is an array 450 | if (!Array.isArray(jsonData)) { 451 | console.error('jsonData is not an array'); 452 | return jsonData; 453 | } 454 | 455 | // Check if every element in the array is an object 456 | if (!jsonData.every(item => typeof item === 'object')) { 457 | console.error('One or more elements in jsonData are not objects'); 458 | return jsonData; 459 | } 460 | 461 | // Map over the array and flatten the payload object 462 | return jsonData.map(item => { 463 | const { payload, ...rest } = item; 464 | return { ...rest, ...payload }; 465 | }); 466 | } 467 | 468 | // Flatten the payload object within each object 469 | let jsonData = flattenPayload(data.result); 470 | 471 | // Based on the format parameter, generate the output accordingly 472 | if (format === 'excel') { 473 | // Create a new workbook 474 | const workbook = XLSX.utils.book_new(); 475 | 476 | // Convert JSON to worksheet 477 | const worksheet = XLSX.utils.json_to_sheet(jsonData); 478 | 479 | // Add the worksheet to the workbook 480 | XLSX.utils.book_append_sheet(workbook, worksheet, 'Sheet1'); 481 | 482 | // Generate a blob from the workbook 483 | const excelBlob = new Blob([XLSX.write(workbook, { type: 'array', bookType: 'xlsx' })], { 484 | type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' 485 | }); 486 | 487 | // Create a temporary URL for the blob 488 | const excelUrl = URL.createObjectURL(excelBlob); 489 | 490 | // Create a link element 491 | const link = document.createElement('a'); 492 | link.href = excelUrl; 493 | link.download = `${document.getElementById("inputText").value.trim()}.xlsx`; 494 | 495 | // Append the link to the document body and trigger the download 496 | document.body.appendChild(link); 497 | link.click(); 498 | 499 | // Clean up 500 | URL.revokeObjectURL(excelUrl); 501 | document.body.removeChild(link); 502 | } else if (format === 'csv') { 503 | // Convert JSON to CSV 504 | const csvContent = jsonData.map(row => { 505 | return Object.values(row).map(value => { 506 | if (typeof value === 'string') { 507 | // Escape double quotes within the value and enclose it in double quotes 508 | return '"' + value.replace(/"/g, '""') + '"'; 509 | } 510 | return value; 511 | }).join(','); 512 | }).join('\n'); 513 | 514 | // Create a blob from the CSV content 515 | const csvBlob = new Blob([csvContent], { type: 'text/csv' }); 516 | const csvUrl = URL.createObjectURL(csvBlob); 517 | 518 | // Create a link element 519 | const link = document.createElement('a'); 520 | link.href = csvUrl; 521 | link.download = `${document.getElementById("inputText").value.trim()}.csv`; 522 | 523 | // Append the link to the document body and trigger the download 524 | document.body.appendChild(link); 525 | link.click(); 526 | 527 | // Clean up 528 | URL.revokeObjectURL(csvUrl); 529 | document.body.removeChild(link); 530 | 531 | } else if (format === 'json') { 532 | // Convert JSON to string 533 | const jsonString = JSON.stringify(jsonData, null, 2); 534 | 535 | // Create a blob from the JSON string 536 | const jsonBlob = new Blob([jsonString], { type: 'application/json' }); 537 | 538 | // Create a temporary URL for the blob 539 | const jsonUrl = URL.createObjectURL(jsonBlob); 540 | 541 | // Create a link element 542 | const link = document.createElement('a'); 543 | link.href = jsonUrl; 544 | link.download = `${document.getElementById("inputText").value.trim()}.json`; 545 | 546 | // Append the link to the document body and trigger the download 547 | document.body.appendChild(link); 548 | link.click(); 549 | 550 | // Clean up 551 | URL.revokeObjectURL(jsonUrl); 552 | document.body.removeChild(link); 553 | } else { 554 | console.error('Unsupported format'); 555 | } 556 | } 557 | 558 | async function tsne() { 559 | semanticWorker.postMessage({ 560 | type: "tsne", 561 | data: { 562 | "queryEmbedding": queryEmbedding, 563 | "searchResults": searchResults, 564 | "iterations": document.getElementById("dimReductionIterations").value, 565 | "dimensionalityReductionSimilarityThreshold": document.getElementById("dimensionalityReductionSimilarityThreshold").value, 566 | "colorBy": document.getElementById("colorBy").value 567 | } 568 | 569 | }); 570 | } 571 | 572 | document.getElementById('copyURLButton').addEventListener('click', function () { 573 | var urlToCopy = window.location.href; 574 | navigator.clipboard.writeText(urlToCopy) 575 | .then(function () { 576 | }) 577 | .catch(function (err) { 578 | console.error('Failed to copy URL to clipboard: ', err); 579 | }); 580 | }); 581 | 582 | document.addEventListener('DOMContentLoaded', function () { 583 | // Initialize a counter for the current row count 584 | let rowCount = 1; // Assuming the initial row is already present 585 | 586 | // Function to clone the row and replace the plus button with a minus button 587 | function TaddRow() { 588 | const originalRow = document.getElementById('initialQueryContainer'); 589 | const clone = originalRow.cloneNode(true); 590 | clone.id = 'queryContainer' + rowCount; // Adjust the ID of the cloned row 591 | 592 | // Adjust IDs of all elements within the cloned row 593 | clone.querySelectorAll('input, button').forEach(function (element) { 594 | const currentId = element.id; 595 | const newId = currentId.replace(/\d+$/, rowCount); // Replace the last digit(s) with the current rowCount 596 | element.id = newId; 597 | }); 598 | 599 | const minusButton = clone.querySelector('.queryButton'); 600 | minusButton.innerHTML = ` 601 | 602 | ` 603 | minusButton.title = 'Remove query'; 604 | minusButton.addEventListener('click', function () { removeRow(clone); }); // Attach event listener to the minus button 605 | 606 | document.getElementById('queryRowsContainer').appendChild(clone); 607 | rowCount++; // Increment the row count 608 | } 609 | 610 | // Add event listener to the plus button 611 | const plusButton = document.querySelector('.btn-light'); 612 | plusButton.addEventListener('click', TaddRow); 613 | }); 614 | 615 | var URLModeHidden = document.getElementById("copyURLButton").hidden; 616 | 617 | if (URLModeHidden) { 618 | 619 | } else { 620 | // Call the function initially to set form inputs from URL parameters 621 | document.addEventListener('DOMContentLoaded', function () { 622 | // Your code here 623 | setFormInputsFromURL(); 624 | 625 | // Use event delegation to handle inputs dynamically added 626 | document.body.addEventListener('input', function (event) { 627 | if (event.target.matches('.form-control, .form-check-input')) { 628 | updateURL(event); 629 | } 630 | 631 | if (event.target.matches('.inputText')) { 632 | // Check if the trimmed value of the input is "special_vector" 633 | if (event.target.value.trim() === "special_vector") { 634 | // If the condition is met, apply italic text and grey background 635 | event.target.style.fontStyle = 'italic'; 636 | //event.target.style.backgroundColor = 'grey'; 637 | } else { 638 | // If the condition is not met, remove italic text and grey background 639 | event.target.style.fontStyle = 'normal'; 640 | //event.target.style.backgroundColor = ''; 641 | } 642 | } 643 | }); 644 | 645 | document.body.addEventListener('click', function (event) { 646 | if (event.target.matches('.queryButton')) { 647 | updateURL(event); 648 | } 649 | }); 650 | }); 651 | } 652 | 653 | document.getElementById('dimensionalityReduction').addEventListener('click', function (event) { 654 | tsne(); 655 | }); 656 | 657 | const plotContainer = document.getElementById("plot-container"); 658 | let deckgl; 659 | export async function loadScatterplot(data, similarityOpacity) { 660 | 661 | removeScatterplot(); 662 | // Find the minimum and maximum similarity values, x values, and y values in the data array 663 | const minSimilarity = Math.min(...data.map(item => item.similarity)); 664 | const maxSimilarity = Math.max(...data.map(item => item.similarity)); 665 | 666 | const minX = Math.min(...data.map(item => item.x)); 667 | const maxX = Math.max(...data.map(item => item.x)); 668 | 669 | const minY = Math.min(...data.map(item => item.y)); 670 | const maxY = Math.max(...data.map(item => item.y)); 671 | 672 | // If needed, color by a unique colorClass value 673 | // Count distinct colorClasses and generate unique colors 674 | const colorClassSet = new Set(data.map(item => item.colorClass)); 675 | const distinctColorClasses = Array.from(colorClassSet); 676 | 677 | const distinctColorClassesLength = distinctColorClasses.length 678 | 679 | if (distinctColorClassesLength == 1) { 680 | data.forEach(item => { 681 | item.distinctColor = [13,10,253,1] 682 | }); 683 | } 684 | 685 | else { 686 | const palette = distinctColors({ "count": distinctColorClassesLength }) 687 | 688 | // Add distinctColor node to each object in data 689 | data.forEach(item => { 690 | const colorClassIndex = distinctColorClasses.indexOf(item.colorClass); 691 | item.distinctColor = palette[colorClassIndex]._rgb 692 | }); 693 | } 694 | 695 | data = data.map(item => { 696 | // Normalize similarity values to the range [0, 1] 697 | let alpha; 698 | // Normalize x and y coordinates to the range [0, 1] 699 | const normalizedX = (item.x - minX) / (maxX - minX); 700 | const normalizedY = (item.y - minY) / (maxY - minY); 701 | if (similarityOpacity) { 702 | const normalizedSimilarity = (item.similarity - minSimilarity) / (maxSimilarity - minSimilarity); 703 | 704 | alpha = Math.floor(Math.min(1, Math.max(0, normalizedSimilarity)) * 255); 705 | } 706 | else { 707 | alpha = 255 708 | } 709 | // Map the alpha value to the entire opacity spectrum 710 | const color = [...item.distinctColor.slice(0, -1), alpha]; 711 | //console.log("COLOR:", color) 712 | return { 713 | coordinates: [normalizedX, normalizedY], 714 | color: color, 715 | similarity: item.similarity, 716 | label: item.label, 717 | }; 718 | }); 719 | 720 | // Calculate the bounding box of the data 721 | const bounds = data.reduce( 722 | (acc, point) => ({ 723 | minX: Math.min(acc.minX, point.coordinates[0]), 724 | minY: Math.min(acc.minY, point.coordinates[1]), 725 | maxX: Math.max(acc.maxX, point.coordinates[0]), 726 | maxY: Math.max(acc.maxY, point.coordinates[1]), 727 | }), 728 | { minX: Infinity, minY: Infinity, maxX: -Infinity, maxY: -Infinity } 729 | ); 730 | 731 | deckgl = new Deck({ 732 | canvas: 'deckgl', 733 | container: 'plot-container', 734 | initialViewState: { 735 | latitude: (bounds.minY + bounds.maxY) / 2, 736 | longitude: (bounds.minX + bounds.maxX) / 2, 737 | zoom: 9 738 | }, 739 | controller: true, 740 | pickingRadius: 25, 741 | layers: [ 742 | // Add a new LineLayer for the coordinate system 743 | /*new LineLayer({ 744 | id: 'coordinate-system', 745 | data: generateGridData(20), 746 | getSourcePosition: d => d.sourcePosition, 747 | getTargetPosition: d => d.targetPosition, 748 | getColor: d => d.color, 749 | getWidth: 1, 750 | pickable: false 751 | }), 752 | */ 753 | // ScatterplotLayer with all points added right away 754 | new ScatterplotLayer({ 755 | id: 'scatterplot', 756 | data: data, 757 | getPosition: d => d.coordinates, 758 | getRadius: parseInt(document.getElementById("scatterplotRadius").value), // Adjust the radius to fit the new range 759 | getFillColor: d => d.color, 760 | pickable: true, // Enable picking for on-hover interaction 761 | onHover: info => { 762 | const tooltip = document.getElementById('tooltip'); 763 | 764 | if (info.object) { 765 | const canvas = document.getElementById('deckgl'); 766 | const rect = canvas.getBoundingClientRect(); 767 | 768 | // Calculate the correct position by subtracting the canvas offset and adding the scroll position 769 | const left = window.scrollX + info.x + rect.left + 30; 770 | const top = window.scrollY + info.y + rect.top + -50; 771 | 772 | tooltip.innerHTML = `${info.object.label}
Similarity: ${info.object.similarity.toFixed(2)}`; 773 | tooltip.style.left = `${left}px`; 774 | tooltip.style.top = `${top}px`; 775 | tooltip.style.display = 'block'; 776 | } else { 777 | tooltip.style.display = 'none'; 778 | } 779 | }, 780 | onClick: info => { 781 | const tooltip = document.getElementById('tooltip'); 782 | 783 | if (info.object) { 784 | const canvas = document.getElementById('deckgl'); 785 | const rect = canvas.getBoundingClientRect(); 786 | 787 | // Calculate the correct position by subtracting the canvas offset and adding the scroll position 788 | const left = window.scrollX + info.x + rect.left + 30; 789 | const top = window.scrollY + info.y + rect.top + -50; 790 | 791 | tooltip.innerHTML = `${info.object.label}
Similarity: ${info.object.similarity.toFixed(2)}`; 792 | tooltip.style.left = `${left}px`; 793 | tooltip.style.top = `${top}px`; 794 | tooltip.style.display = 'block'; 795 | } else { 796 | tooltip.style.display = 'none'; 797 | } 798 | } 799 | 800 | }) 801 | ] 802 | }); 803 | 804 | plotContainer.style.height = "700px"; 805 | } 806 | 807 | export function removeScatterplot() { 808 | if (deckgl) { 809 | deckgl.finalize(); 810 | deckgl = null; 811 | } 812 | } 813 | 814 | worker.onmessage = function (event) { 815 | const message = event.data; 816 | let resolve; 817 | 818 | switch (message.type) { 819 | case 'tsne': 820 | loadScatterplot(message.plotDataArray, document.getElementById("toggleOpacity").checked); 821 | 822 | break 823 | default: 824 | console.error('Unknown message type: ' + message.type); 825 | } 826 | }; -------------------------------------------------------------------------------- /src/js/worker.js: -------------------------------------------------------------------------------- 1 | import init, { tSNE } from "wasm-bhtsne"; 2 | 3 | init(); 4 | let queryEmbedding 5 | 6 | function convertFloat32ArraysToArrays(arrayOfFloat32Arrays) { 7 | return arrayOfFloat32Arrays.reduce((accumulator, currentFloat32Array) => { 8 | // Convert Float32Array to a regular JavaScript array using Array.from 9 | const jsArray = Array.from(currentFloat32Array); 10 | 11 | // Add the converted array to the accumulator 12 | accumulator.push(jsArray); 13 | 14 | return accumulator; 15 | }, []); 16 | } 17 | 18 | function calculateCosineSimilarity(embedding) { 19 | let dotProduct = 0; 20 | let queryMagnitude = 0; 21 | let embeddingMagnitude = 0; 22 | const queryEmbeddingLength = queryEmbedding.length; 23 | 24 | for (let i = 0; i < queryEmbeddingLength; i++) { 25 | dotProduct += queryEmbedding[i] * embedding[i]; 26 | queryMagnitude += queryEmbedding[i] ** 2; 27 | embeddingMagnitude += embedding[i] ** 2; 28 | } 29 | 30 | return dotProduct / (Math.sqrt(queryMagnitude) * Math.sqrt(embeddingMagnitude)); 31 | } 32 | 33 | self.onmessage = async (event) => { 34 | const message = event.data; 35 | switch (message.type) { 36 | case 'tsne': 37 | const start = performance.now(); 38 | queryEmbedding = message.data.queryEmbedding; 39 | 40 | //data transformation 41 | let targetJson = {}; 42 | let colorClassJson = []; 43 | message.data.searchResults.result.forEach(item => { 44 | console.log(item) 45 | 46 | // Construct the key for the target object by concatenating all values from item.payload 47 | const key = Object.values(item.payload).join('
'); 48 | 49 | targetJson[key] = item.vector; 50 | colorClassJson.push(item.payload[message.data.colorBy]); 51 | 52 | }); 53 | 54 | const valuesFloat32Array = Array.from(Object.values(targetJson)); 55 | let valuesArray = convertFloat32ArraysToArrays(valuesFloat32Array); 56 | const valuesArrayLength = valuesArray.length; 57 | 58 | // Check if the length is below 61 59 | // ugly workaround needed as the wasm module has no param for perplexity yet 60 | 61 | let compressed_vectors; 62 | if (valuesArrayLength < 61) { 63 | const vectorLength = valuesArray[0].length; // Assuming all vectors have the same length 64 | const vectorsToAdd = 61 - valuesArrayLength; 65 | 66 | console.log("added: ", vectorsToAdd) 67 | // Add random vectors to the array 68 | for (let i = 0; i < vectorsToAdd; i++) { 69 | const randomVector = Array.from({ length: vectorLength }, () => Math.random()); 70 | valuesArray.push(randomVector); 71 | } 72 | 73 | const tsne_encoder = new tSNE(valuesArray); 74 | compressed_vectors = tsne_encoder.barnes_hut(message.data.iterations).slice(0, valuesArrayLength);//,theta=0.1); 75 | } 76 | else { 77 | const tsne_encoder = new tSNE(valuesArray); 78 | compressed_vectors = tsne_encoder.barnes_hut(message.data.iterations); 79 | } 80 | 81 | const end = performance.now(); 82 | console.log('BHtSNE Execution time:', Math.round(end - start), 'ms'); 83 | 84 | const originalKeys = Object.keys(targetJson); 85 | const originalEmbeddings = Object.values(targetJson) 86 | 87 | let plotDataArray = []; 88 | 89 | for (let i = 0; i < originalKeys.length; i++) { 90 | let thisVec = compressed_vectors[i]; 91 | let similarity = calculateCosineSimilarity(originalEmbeddings[i]); 92 | 93 | console.log(originalKeys) 94 | if (similarity >= message.data.dimensionalityReductionSimilarityThreshold) { 95 | plotDataArray.push({ 96 | "x": thisVec[0], 97 | "y": thisVec[1], 98 | "label": originalKeys[i], 99 | "similarity": similarity, 100 | "colorClass": colorClassJson[i] 101 | }); 102 | } 103 | } 104 | 105 | self.postMessage({ 106 | type: 'tsne', 107 | plotDataArray 108 | }); 109 | break 110 | 111 | default: 112 | } 113 | }; 114 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | const HtmlWebpackPlugin = require('html-webpack-plugin'); 3 | const MiniCssExtractPlugin = require('mini-css-extract-plugin'); // FOUC-correction 4 | 5 | module.exports = { 6 | entry: './src/js/index.js', 7 | mode: 'development', 8 | output: { 9 | filename: 'bundle.js', 10 | path: path.resolve(__dirname, 'dist'), 11 | clean: true 12 | }, 13 | module: { 14 | rules: [ 15 | { 16 | test: /\.css$/, 17 | use: [MiniCssExtractPlugin.loader, 'css-loader'], 18 | 19 | }, 20 | { 21 | test: /\.svg$/, 22 | type: 'asset/resource', 23 | generator: { 24 | filename: '[name][ext]' 25 | } 26 | }, 27 | ], 28 | }, 29 | plugins: [ 30 | new HtmlWebpackPlugin({ 31 | template: './index.html', 32 | }), 33 | new MiniCssExtractPlugin(), 34 | 35 | ], 36 | }; --------------------------------------------------------------------------------