├── .eslintrc.cjs ├── .github └── workflows │ └── static.yml ├── .gitignore ├── .npmignore ├── LICENSE ├── README.md ├── demos ├── HorizontalLinkList.jsx ├── paragraphs_as_options │ ├── App.jsx │ ├── context.jsx │ ├── data.json │ └── main.jsx └── simple_autocomplete │ ├── App.jsx │ └── main.jsx ├── gif-20240430-032634.gif ├── index.html ├── package-lock.json ├── package.json ├── src ├── SemanticAutocomplete.jsx └── worker.js └── vite.config.js /.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | env: { browser: true, es2020: true }, 4 | extends: [ 5 | 'eslint:recommended', 6 | 'plugin:react/recommended', 7 | 'plugin:react/jsx-runtime', 8 | 'plugin:react-hooks/recommended', 9 | ], 10 | ignorePatterns: ['dist', '.eslintrc.cjs'], 11 | parserOptions: { ecmaVersion: 'latest', sourceType: 'module' }, 12 | settings: { react: { version: '18.2' } }, 13 | plugins: ['react-refresh'], 14 | rules: { 15 | 'react/jsx-no-target-blank': 'off', 16 | 'react-refresh/only-export-components': [ 17 | 'warn', 18 | { allowConstantExport: true }, 19 | ], 20 | }, 21 | } 22 | -------------------------------------------------------------------------------- /.github/workflows/static.yml: -------------------------------------------------------------------------------- 1 | # Simple workflow for deploying static content to GitHub Pages 2 | name: Deploy static content to Pages 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["gh-pages"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: false 23 | 24 | jobs: 25 | # Single deploy job since we're just deploying 26 | deploy: 27 | environment: 28 | name: github-pages 29 | url: ${{ steps.deployment.outputs.page_url }} 30 | runs-on: ubuntu-latest 31 | steps: 32 | - name: Checkout 33 | uses: actions/checkout@v4 34 | - name: Setup Pages 35 | uses: actions/configure-pages@v5 36 | - name: Upload artifact 37 | uses: actions/upload-pages-artifact@v3 38 | with: 39 | # Upload entire repository 40 | path: '.' 41 | - name: Deploy to GitHub Pages 42 | id: deployment 43 | uses: actions/deploy-pages@v4 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | 26 | public 27 | index.html 28 | .eslintrc.cjs 29 | .gitignore 30 | src/App.jsx 31 | src/main.jsx 32 | package-lock.json 33 | vite.config.js -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2024 Mihai Chirculescu 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # semantic-autocomplete 2 | 3 | semantic-autocomplete is a React component that extends [v6 MUI's autocomplete](https://v6.mui.com/material-ui/react-autocomplete/) and performs **semantic similarity search** using a small, quantized machine learning (ML) model which runs on client side. The model is downloaded once and then taken from browser's cache. The full functionality is provided within this React component! 4 | 5 | ## Demo 6 | 7 | **Sort paragraphs of a webpage by meaning:** 8 | 9 | https://mihaiii.github.io/semantic-autocomplete/ 10 | 11 | ![](https://raw.githubusercontent.com/Mihaiii/semantic-autocomplete/main/gif-20240430-032634.gif) 12 | 13 | ## v5 MUI support 14 | This component works with both v5 and v6 MUI. It was not tested by the author on lower MUI versions. 15 | 16 | ## How to install 17 | Install: 18 | 19 | `npm install --save semantic-autocomplete` 20 | 21 | Then import: 22 | 23 | `import SemanticAutocomplete from "semantic-autocomplete";` 24 | 25 | ## Run on local from source code 26 | 27 | ``` 28 | npm install 29 | npm run dev 30 | ``` 31 | 32 | ## Usage 33 | 34 | Since semantic-autocomplete extends [MUI's autocomplete](https://v6.mui.com/material-ui/react-autocomplete/), the entire [v6 MUI's autocomplete API](https://v6.mui.com/material-ui/api/autocomplete/) will also work on semantic-autocomplete. The only exception is the [filterOptions property](https://mui.com/material-ui/react-autocomplete/#custom-filter). 35 | 36 | **If you're already using `autocomplete` in your project, just replace the tag name and you're done.** 🙌 37 | 38 | You can see the component being used in code [here](https://github.com/Mihaiii/semantic-autocomplete/blob/6d312a6264b7c3b79d053e23d3cdb4cf226196a1/demos/paragraphs_as_options/App.jsx#L26-L34) and [here](https://github.com/Mihaiii/semantic-autocomplete/blob/6d312a6264b7c3b79d053e23d3cdb4cf226196a1/demos/simple_autocomplete/App.jsx#L107-L112). 39 | 40 | 41 | [See this page for how you can use MUI's autocomplete and therefore semantic-autocomplete too](https://v6.mui.com/material-ui/react-autocomplete/). 42 | 43 | Besides the MUI's autocomplete API, the following props are provided: 44 | 45 | - `threshold`: if it has a value, then the component will filter out options below this cosine similarity value. Defaults to no value (meaning no filtering, only sorting). [Click for code example](https://github.com/Mihaiii/semantic-autocomplete/blob/6d312a6264b7c3b79d053e23d3cdb4cf226196a1/demos/simple_autocomplete/App.jsx#L110). 46 | 47 | - `onResult`: callback function once the sorting/filtering of the options is done, using the resulted options array as first param. [Click for code example](https://github.com/Mihaiii/semantic-autocomplete/blob/6d312a6264b7c3b79d053e23d3cdb4cf226196a1/demos/paragraphs_as_options/App.jsx#L29). 48 | 49 | - `model`: the name of the Huggingface ML model repo. It has to have the ONNX embeddings model. The folder structure of the repo has to be the standard one used by transformers.js. If you're interested in changing the default used model, you might find [this filter](https://huggingface.co/models?pipeline_tag=sentence-similarity&library=onnx&sort=trending) useful. [I made a bunch of small models for this component. Try them out and see what works best for your use case](https://huggingface.co/collections/Mihaiii/pokemons-662ce912d64b8a3bee518b7f). Default value: `Mihaiii/Venusaur` (pointing to [this repo](https://huggingface.co/Mihaiii/Venusaur)), which loads the ONNX quantized model having **~15 MB**. [Click here for code example](https://github.com/Mihaiii/semantic-autocomplete/blob/b16115492466eb1502107cf4581a804cb1dcbbe4/demos/simple_autocomplete/App.jsx#L115) 50 | 51 | - `pipelineParams`: the params to be passed to [transformer.js](https://github.com/xenova/transformers.js) when loading the model. Default value: `{ pooling: "mean", normalize: true }`. For more info, please [see this page](https://huggingface.co/docs/transformers.js/api/pipelines#module_pipelines.FeatureExtractionPipeline). 52 | 53 | ## Thanks / credit 54 | - [xonova](https://x.com/xenovacom?t=Mw1h_1joKgfrUXR_wl9Wrg&s=09) for building [transformers.js](https://github.com/xenova/transformers.js), providing clear & in detail documentation, always being willing to help out and for having [lots of demos](https://github.com/xenova/transformers.js/tree/main/examples) on [his HF account](https://huggingface.co/Xenova). The work for this component is based on his tutorial on [how to build a React component using tranaformers.js](https://huggingface.co/docs/transformers.js/en/tutorials/react). 55 | - [andersonbcdefg](https://x.com/andersonbcdefg?t=0Nkr_SRk-fMUrU_Kp0Wm5w&s=09) for building many small models like [gte-tiny](https://huggingface.co/TaylorAI/gte-tiny) or [bge-micro-v2](https://huggingface.co/TaylorAI/bge-micro-v2) and for providing some guidelines to me prior to making [Venusaur](https://huggingface.co/Mihaiii/Venusaur). 56 | -------------------------------------------------------------------------------- /demos/HorizontalLinkList.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { List, ListItem, Link } from '@mui/material'; 3 | 4 | const links = [ 5 | { href: 'https://github.com/Mihaiii/semantic-autocomplete', title: 'GitHub' }, 6 | { href: 'https://huggingface.co/Mihaiii/Venusaur', title: 'Model' }, 7 | { href: 'https://www.npmjs.com/package/semantic-autocomplete', title: 'npm' }, 8 | { href: 'https://mihaiii.github.io/semantic-autocomplete/', title: 'Demo' }, 9 | ]; 10 | 11 | const HorizontalLinkList = () => { 12 | const listStyle = { 13 | display: 'flex', 14 | flexDirection: 'row', 15 | padding: 0 16 | }; 17 | 18 | return ( 19 | 20 | {links.map((link, index) => ( 21 | 22 | 23 | 24 | {link.title} 25 | 26 | 27 | 28 | ))} 29 | 30 | ); 31 | }; 32 | 33 | export default HorizontalLinkList; -------------------------------------------------------------------------------- /demos/paragraphs_as_options/App.jsx: -------------------------------------------------------------------------------- 1 | import SemanticAutocomplete from "../../src/SemanticAutocomplete"; 2 | import { TextField, ListItem, ListItemText, List } from "@mui/material"; 3 | import React, { useContext, useMemo } from 'react' 4 | const SemanticAutocompleteMemoized = React.memo(SemanticAutocomplete) 5 | import { SortedOptionsContext } from './context.jsx' 6 | import jsonData from './data.json'; 7 | import HorizontalLinkList from '../HorizontalLinkList.jsx' 8 | 9 | function App() { 10 | const options = useMemo(() => jsonData, []); 11 | const { sortedOptions, setSortedOptions } = useContext(SortedOptionsContext); 12 | 13 | const ResultsList = () => { 14 | return ( 15 | 16 | {sortedOptions.map(op => ( 17 | 18 | 19 | 20 | ))} 21 | 22 | ); 23 | } 24 | 25 | return ( 26 |
27 | 28 | } 33 | open={false} 34 | popupIcon={null} 35 | /> 36 | 37 |
38 | ); 39 | } 40 | 41 | export default App; 42 | -------------------------------------------------------------------------------- /demos/paragraphs_as_options/context.jsx: -------------------------------------------------------------------------------- 1 | import React, { createContext, useState } from 'react'; 2 | import jsonData from './data.json'; 3 | 4 | const SortedOptionsContext = createContext(); 5 | 6 | export const SortedOptionsProvider = ({ children }) => { 7 | const [sortedOptions, setSortedOptions] = useState(jsonData); 8 | 9 | return ( 10 | 11 | {children} 12 | 13 | ); 14 | }; 15 | 16 | export { SortedOptionsContext }; 17 | -------------------------------------------------------------------------------- /demos/paragraphs_as_options/data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "label": "Word embeddings are a type of word representation that allows words to be represented as vectors in a continuous vector space. The primary goal is to capture the semantic meaning of words so that words with similar meanings are located close to each other in this space. This is achieved by transforming sparse, high-dimensional word vectors into lower-dimensional spaces while preserving semantic relationships.", 4 | "value": 1 5 | }, 6 | { 7 | "label": "Embeddings are used extensively across various NLP tasks. Some common applications include text classification, sentiment analysis, language modeling, and machine translation. They are also integral to more complex tasks like question-answering systems, chatbots, and content recommendation systems. Beyond NLP, embeddings find applications in image and video analysis, where they help in tasks like image classification and facial recognition.", 8 | "value": 2 9 | }, 10 | { 11 | "label": "Embeddings are used because they provide a dense and efficient representation of words, capturing complex patterns in language that are not apparent at the surface level. Unlike one-hot encoding, which treats words as isolated units without any notion of similarity, embeddings map words into a vector space based on their usage and context. This allows models to understand synonyms, analogies, and the overall semantics of text, leading to more nuanced and intelligent processing.", 12 | "value": 3 13 | }, 14 | { 15 | "label": "Embeddings are typically created using models like Word2Vec, GloVe, or FastText, which learn representations by analyzing word co-occurrences and relationships in large corpora of text. These models apply algorithms to adjust the position of each word in the vector space, such that the distance between vectors captures semantic relationships between words. For example, similar words are placed closer together, whereas unrelated words are positioned farther apart.", 16 | "value": 4 17 | }, 18 | { 19 | "label": "While embeddings are powerful, they also present challenges. One major concern is bias, as embeddings can perpetuate and amplify biases present in the training data. This requires careful consideration and mitigation strategies during model development and deployment. Additionally, creating and fine-tuning embeddings for specific domains or languages with limited resources can be challenging, necessitating innovative approaches to leverage embeddings effectively across diverse contexts.", 20 | "value": 5 21 | }, 22 | { 23 | "label": "Traditional word embeddings, like Word2Vec and GloVe, generate a single representation for each word, regardless of its context. This means that words with multiple meanings are represented by the same vector across different uses. Contextual embeddings, introduced by models such as BERT and ELMo, represent words as vectors that vary depending on the word's context within a sentence. This allows these models to capture the nuances of language more effectively, distinguishing between different meanings of a word based on its usage.", 24 | "value": 6 25 | }, 26 | { 27 | "label": "While primarily designed to capture semantic relationships between words, embeddings can also encode aspects of syntax and grammar to a certain extent. For example, embeddings can reflect syntactic categories like part of speech, and models trained on sentence-level tasks can learn representations that implicitly encode grammatical structures. However, explicit modeling of syntax and grammar often requires architectures designed specifically for these aspects, such as syntactic parsing models.", 28 | "value": 7 29 | }, 30 | { 31 | "label": "Embeddings are a cornerstone of transfer learning in NLP. Pre-trained embeddings, generated from large-scale language models on extensive corpora, can be used as the starting point for training on specific tasks. This approach allows models to leverage general linguistic knowledge learned from the broader language use, significantly improving performance on tasks with limited training data. Transfer learning with embeddings accelerates model development and enhances capabilities in domain-specific applications.", 32 | "value": 8 33 | }, 34 | { 35 | "label": "Evaluating the quality of embeddings involves assessing how well they capture semantic and syntactic relationships. This is often done through intrinsic methods, like analogy solving (e.g., \"king\" is to \"man\" as \"queen\" is to \"woman\") and similarity assessments, or through extrinsic methods, where embeddings are evaluated based on their performance in downstream tasks like text classification or sentiment analysis. Both approaches provide insights into the effectiveness of embeddings in encoding linguistic properties.", 36 | "value": 9 37 | }, 38 | { 39 | "label": "Significant efforts are underway to develop and refine embeddings for a wide range of languages beyond English. This includes both multilingual models, which learn embeddings capable of representing multiple languages in a single vector space, and language-specific models that cater to the unique characteristics of individual languages. Challenges in this area include dealing with low-resource languages and adapting models to capture linguistic features unique to each language.", 40 | "value": 10 41 | }, 42 | { 43 | "label": "Future developments in embeddings may focus on several areas, including improving the handling of polysemy and context, reducing biases in embeddings, and enhancing the efficiency and scalability of embedding models for large-scale applications. Additionally, there's a growing interest in cross-modal embeddings, which can represent data from different modalities (e.g., text and images) in a unified vector space, opening up new possibilities for multimodal applications and AI systems.", 44 | "value": 11 45 | }, 46 | { 47 | "label": "Graph embeddings aim to represent nodes, edges, and possibly whole subgraphs of a graph in a continuous vector space. These embeddings capture the structure of the graph as well as node-level and edge-level properties. Applications of graph embeddings include social network analysis, where they can predict connections or recommend content; knowledge graph completion, where they can infer missing relations; and in bioinformatics, for example, to predict protein interactions.", 48 | "value": 12 49 | }, 50 | { 51 | "label": "Embeddings can be adapted for time-series data by creating representations that capture temporal dynamics in addition to the underlying patterns. This involves training embeddings not just on the static features of data points but also on their changes over time, enabling models to understand periodic trends, anomalies, and long-term shifts in data. Applications include financial market analysis, weather forecasting, and predictive maintenance, where understanding the temporal dimension is crucial.", 52 | "value": 13 53 | }, 54 | { 55 | "label": "Scaling embedding models presents several challenges, including computational demands, memory requirements, and maintaining the quality of embeddings as the size of the data and the model increases. Solutions to these challenges include more efficient model architectures, quantization techniques to reduce the size of embeddings, and distributed computing strategies. Addressing these issues is key to enabling the application of embeddings to ever-larger datasets and more complex problems.", 56 | "value": 14 57 | } 58 | ] -------------------------------------------------------------------------------- /demos/paragraphs_as_options/main.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | import ReactDOM from 'react-dom/client' 3 | import App from './App.jsx' 4 | import { SortedOptionsProvider } from './context.jsx'; 5 | 6 | ReactDOM.createRoot(document.getElementById('root')).render( 7 | 8 | 9 | 10 | 11 | 12 | ) -------------------------------------------------------------------------------- /demos/simple_autocomplete/App.jsx: -------------------------------------------------------------------------------- 1 | import SemanticAutocomplete from "../../src/SemanticAutocomplete"; 2 | import { TextField } from "@mui/material"; 3 | import HorizontalLinkList from '../HorizontalLinkList.jsx' 4 | import React from 'react' 5 | 6 | function App() { 7 | const options = [ 8 | { label: "Spoon", value: 1 }, 9 | { label: "Fork", value: 2 }, 10 | { label: "Knife", value: 3 }, 11 | { label: "Plate", value: 4 }, 12 | { label: "Cup", value: 5 }, 13 | { label: "Mug", value: 6 }, 14 | { label: "Bowl", value: 7 }, 15 | { label: "Teapot", value: 8 }, 16 | { label: "Frying Pan", value: 9 }, 17 | { label: "Saucepan", value: 10 }, 18 | { label: "Spatula", value: 11 }, 19 | { label: "Whisk", value: 12 }, 20 | { label: "Oven Mitt", value: 13 }, 21 | { label: "Cutting Board", value: 14 }, 22 | { label: "Measuring Cup", value: 15 }, 23 | { label: "Blender", value: 16 }, 24 | { label: "Toaster", value: 17 }, 25 | { label: "Microwave", value: 18 }, 26 | { label: "Refrigerator", value: 19 }, 27 | { label: "Dishwasher", value: 20 }, 28 | { label: "Table", value: 21 }, 29 | { label: "Chair", value: 22 }, 30 | { label: "Sofa", value: 23 }, 31 | { label: "Lamp", value: 24 }, 32 | { label: "Bookshelf", value: 25 }, 33 | { label: "Bed", value: 26 }, 34 | { label: "Mattress", value: 27 }, 35 | { label: "Pillow", value: 28 }, 36 | { label: "Blanket", value: 29 }, 37 | { label: "Dresser", value: 30 }, 38 | { label: "Mirror", value: 31 }, 39 | { label: "Alarm Clock", value: 32 }, 40 | { label: "Curtains", value: 33 }, 41 | { label: "Rug", value: 34 }, 42 | { label: "Trash Can", value: 35 }, 43 | { label: "Laundry Basket", value: 36 }, 44 | { label: "Washing Machine", value: 37 }, 45 | { label: "Dryer", value: 38 }, 46 | { label: "Iron", value: 39 }, 47 | { label: "Vacuum Cleaner", value: 40 }, 48 | { label: "Broom", value: 41 }, 49 | { label: "Mop", value: 42 }, 50 | { label: "Bucket", value: 43 }, 51 | { label: "Garden Hose", value: 44 }, 52 | { label: "Rake", value: 45 }, 53 | { label: "Shovel", value: 46 }, 54 | { label: "Lawn Mower", value: 47 }, 55 | { label: "Hammer", value: 48 }, 56 | { label: "Screwdriver", value: 49 }, 57 | { label: "Wrench", value: 50 }, 58 | { label: "Drill", value: 51 }, 59 | { label: "Saw", value: 52 }, 60 | { label: "Nails", value: 53 }, 61 | { label: "Screws", value: 54 }, 62 | { label: "Bolts", value: 55 }, 63 | { label: "Paint Brush", value: 56 }, 64 | { label: "Roller", value: 57 }, 65 | { label: "Paint", value: 58 }, 66 | { label: "Vase", value: 59 }, 67 | { label: "Picture Frame", value: 60 }, 68 | { label: "Candle", value: 61 }, 69 | { label: "Book", value: 62 }, 70 | { label: "Magazine", value: 63 }, 71 | { label: "Remote Control", value: 64 }, 72 | { label: "TV", value: 65 }, 73 | { label: "Speaker", value: 66 }, 74 | { label: "Laptop", value: 67 }, 75 | { label: "Phone", value: 68 }, 76 | { label: "Charger", value: 69 }, 77 | { label: "Flashlight", value: 70 }, 78 | { label: "Bicycle", value: 71 }, 79 | { label: "Skateboard", value: 72 }, 80 | { label: "Helmet", value: 73 }, 81 | { label: "Ball", value: 74 }, 82 | { label: "Gloves", value: 75 }, 83 | { label: "Scarf", value: 76 }, 84 | { label: "Umbrella", value: 77 }, 85 | { label: "Backpack", value: 78 }, 86 | { label: "Wallet", value: 79 }, 87 | { label: "Keys", value: 80 }, 88 | { label: "Sunglasses", value: 81 }, 89 | { label: "Watch", value: 82 }, 90 | { label: "Fitness Tracker", value: 83 }, 91 | { label: "Yoga Mat", value: 84 }, 92 | { label: "Treadmill", value: 85 }, 93 | { label: "Weights", value: 86 }, 94 | { label: "Swimsuit", value: 87 }, 95 | { label: "Towel", value: 88 }, 96 | { label: "Shampoo", value: 89 }, 97 | { label: "Soap", value: 90 }, 98 | { label: "Toothbrush", value: 91 }, 99 | { label: "Toothpaste", value: 92 }, 100 | { label: "Floss", value: 93 }, 101 | { label: "Razor", value: 94 }, 102 | { label: "Deodorant", value: 95 }, 103 | { label: "Perfume", value: 96 }, 104 | { label: "Makeup", value: 97 }, 105 | { label: "Hairbrush", value: 98 }, 106 | ]; 107 | 108 | return ( 109 |
110 | 111 | } 117 | /> 118 |
119 | ); 120 | } 121 | 122 | export default App; 123 | -------------------------------------------------------------------------------- /demos/simple_autocomplete/main.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | import ReactDOM from 'react-dom/client' 3 | import App from './App.jsx' 4 | 5 | ReactDOM.createRoot(document.getElementById('root')).render( 6 | 7 | 8 | , 9 | ) 10 | -------------------------------------------------------------------------------- /gif-20240430-032634.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mihaiii/semantic-autocomplete/725535c75abf9658426bbf7f2ac0dbb2e26419e9/gif-20240430-032634.gif -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | SemanticAutocomplete demo 7 | 8 | 9 |
10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "semantic-autocomplete", 3 | "description": "A blazing-fast semantic search React component. Match by meaning, not just by letters. Search as you type without waiting (no debounce needed). Rank by cosine similarity.", 4 | "version": "3.1.0", 5 | "homepage": "https://mihaiii.github.io/semantic-autocomplete/", 6 | "type": "module", 7 | "main": "dist/semantic-autocomplete.umd.js", 8 | "module": "dist/semantic-autocomplete.es.js", 9 | "scripts": { 10 | "deploy": "gh-pages -d ghpages", 11 | "dev": "vite", 12 | "build": "vite build", 13 | "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0", 14 | "preview": "vite preview" 15 | }, 16 | "repository": { 17 | "type": "git", 18 | "url": "https://github.com/Mihaiii/semantic-autocomplete.git" 19 | }, 20 | "files": [ 21 | "dist/*", 22 | "README.md", 23 | "LICENSE" 24 | ], 25 | "dependencies": { 26 | "@emotion/styled": "^11.11.0", 27 | "@mui/material": ">=5.15.12", 28 | "@xenova/transformers": "^2.16.0", 29 | "react": "^18.2.0", 30 | "react-dom": "^18.2.0" 31 | }, 32 | "devDependencies": { 33 | "@types/react": "^18.2.56", 34 | "@types/react-dom": "^18.2.19", 35 | "@vitejs/plugin-react": "^4.2.1", 36 | "eslint": "^8.56.0", 37 | "eslint-plugin-react": "^7.33.2", 38 | "eslint-plugin-react-hooks": "^4.6.0", 39 | "eslint-plugin-react-refresh": "^0.4.5", 40 | "gh-pages": "^6.1.1", 41 | "vite": "^5.1.4" 42 | }, 43 | "keywords": [ 44 | "semantic", 45 | "search", 46 | "nlp", 47 | "cosine", 48 | "similarity", 49 | "react", 50 | "component", 51 | "autocomplete", 52 | "mui", 53 | "material", 54 | "ui", 55 | "materialui" 56 | ], 57 | "author": "Mihai Chirculescu", 58 | "license": "MIT" 59 | } 60 | -------------------------------------------------------------------------------- /src/SemanticAutocomplete.jsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useRef, useEffect } from "react"; 2 | import { Autocomplete, TextField, CircularProgress } from "@mui/material"; 3 | import { cos_sim } from "@xenova/transformers"; 4 | import EmbeddingsWorker from "./worker?worker&inline"; 5 | 6 | const SemanticAutocomplete = React.forwardRef((props, ref) => { 7 | const { 8 | loading: userLoading, 9 | onInputChange: userOnInputChange, 10 | onOpen: userOnOpen, 11 | onClose: userOnClose, 12 | } = props; 13 | const { onResult, threshold, pipelineParams, model, ...restOfProps } = props; 14 | const [options, setOptions] = useState([]); 15 | const [isOpen, setIsOpen] = useState(false); 16 | const [isLoading, setLoading] = useState(true); 17 | const [parentSize, setParentSize] = useState(0); 18 | const worker = useRef(null); 19 | const optionsWithEmbeddings = useRef([]); 20 | const userInput = useRef(""); 21 | const loading = userLoading ? true : isOpen && isLoading; 22 | const getOptionLabel = props.getOptionLabel || ((option) => option.label); 23 | 24 | useEffect(() => { 25 | if (!worker.current) { 26 | worker.current = new EmbeddingsWorker(); 27 | 28 | worker.current.postMessage({ 29 | type: "init", 30 | pipelineParams: pipelineParams, 31 | model: model, 32 | }); 33 | } 34 | 35 | const onMessageReceived = (e) => { 36 | switch (e.data.status) { 37 | case "completeOptions": 38 | optionsWithEmbeddings.current = e.data.optionsWithEmbeddings; 39 | setOptions(props.options); 40 | setLoading(false); 41 | //if user writes text before the embeddings are computed 42 | if (userInput.current) { 43 | worker.current.postMessage({ 44 | type: "computeInputText", 45 | text: userInput.current, 46 | }); 47 | } 48 | break; 49 | 50 | case "completeInputText": 51 | var sortedOptions = optionsWithEmbeddings.current 52 | .map((option) => ({ 53 | ...option, 54 | sim: cos_sim(option.embeddings, e.data.inputTextEmbeddings), 55 | })) 56 | .sort((optionA, optionB) => { 57 | const containsA = includesCaseInsensitive( 58 | optionA.labelSemAutoCom, 59 | e.data.inputText 60 | ); 61 | const containsB = includesCaseInsensitive( 62 | optionB.labelSemAutoCom, 63 | e.data.inputText 64 | ); 65 | 66 | if (containsA == containsB) { 67 | return optionB.sim - optionA.sim; 68 | } 69 | return containsA ? -1 : 1; 70 | }); 71 | 72 | if (threshold && e.data.inputText) { 73 | let index = sortedOptions.findIndex( 74 | (op) => 75 | includesCaseInsensitive( 76 | op.labelSemAutoCom, 77 | e.data.inputText 78 | ) == false && op.sim < threshold 79 | ); 80 | sortedOptions = sortedOptions.slice(0, index); 81 | } 82 | setOptions(sortedOptions); 83 | if (onResult) { 84 | onResult(sortedOptions); 85 | } 86 | break; 87 | } 88 | }; 89 | 90 | worker.current.addEventListener("message", onMessageReceived); 91 | return () => 92 | worker.current.removeEventListener("message", onMessageReceived); 93 | }); 94 | 95 | useEffect(() => { 96 | setLoading(true); 97 | worker.current.postMessage({ 98 | type: "computeOptions", 99 | options: props.options.map((op) => ({ 100 | ...op, 101 | labelSemAutoCom: getOptionLabel(op), 102 | })), 103 | }); 104 | }, [props.options]); 105 | 106 | const includesCaseInsensitive = (fullText, lookupValue) => { 107 | return fullText.toLowerCase().includes(lookupValue.toLowerCase()); 108 | }; 109 | 110 | const handleInputChange = (event, value, reason) => { 111 | userInput.current = value; 112 | 113 | worker.current.postMessage({ 114 | type: "computeInputText", 115 | text: value, 116 | }); 117 | 118 | if (userOnInputChange) { 119 | userOnInputChange(event, value, reason); 120 | } 121 | }; 122 | 123 | const handleOnOpen = (event) => { 124 | setIsOpen(true); 125 | 126 | if (userOnOpen) { 127 | userOnOpen(event); 128 | } 129 | }; 130 | 131 | const handleOnClose = (event) => { 132 | setIsOpen(false); 133 | 134 | if (userOnOpen) { 135 | userOnClose(event); 136 | } 137 | }; 138 | 139 | const renderLoadingInput = (params) => ( 140 | 146 | 147 | {params.InputProps.endAdornment} 148 | 149 | ), 150 | }} 151 | ref={(node) => { 152 | if (node && parentSize == 0) { 153 | const inputElement = node.querySelector('input'); 154 | if (inputElement) { 155 | //https://stackoverflow.com/a/62721389 156 | const { clientHeight, clientWidth } = inputElement; 157 | setParentSize(Math.min(clientHeight, clientWidth)); 158 | } 159 | } 160 | }} 161 | /> 162 | ); 163 | 164 | return ( 165 | x} 169 | onInputChange={handleInputChange} 170 | loading={loading} 171 | onOpen={handleOnOpen} 172 | onClose={handleOnClose} 173 | ref={ref} 174 | {...(loading ? { renderInput: renderLoadingInput } : {})} 175 | /> 176 | ); 177 | }); 178 | 179 | export default SemanticAutocomplete; 180 | -------------------------------------------------------------------------------- /src/worker.js: -------------------------------------------------------------------------------- 1 | import { env, pipeline } from "@xenova/transformers"; 2 | 3 | let configs = { 4 | pipelineParams: { pooling: "mean", normalize: true }, 5 | model: "Mihaiii/Venusaur", 6 | }; 7 | 8 | class MyEmbeddingsPipeline { 9 | static task = "embeddings"; 10 | static instance = null; 11 | 12 | static async getInstance(progress_callback = null) { 13 | if (this.instance === null) { 14 | //we get the models from huggingface. Ex: https://huggingface.co/Mihaiii/Venusaur 15 | env.allowLocalModels = false; 16 | this.instance = pipeline(this.task, configs.model, { progress_callback }); 17 | } 18 | 19 | return this.instance; 20 | } 21 | } 22 | 23 | self.addEventListener("message", async (event) => { 24 | switch (event.data.type) { 25 | case "init": 26 | configs.pipelineParams = 27 | event.data.pipelineParams || configs.pipelineParams; 28 | configs.model = event.data.model || configs.model; 29 | break; 30 | 31 | case "computeOptions": { 32 | let embeddingsPipeline = await MyEmbeddingsPipeline.getInstance(); 33 | const optionPromises = event.data.options.map(async (option) => { 34 | return { 35 | ...option, 36 | embeddings: await embeddingsPipeline( 37 | option.labelSemAutoCom, 38 | configs.pipelineParams 39 | ), 40 | }; 41 | }); 42 | let optionsWithEmbeddings = await Promise.all(optionPromises); 43 | let optionsWithEmbeddingsData = optionsWithEmbeddings.map((op) => ({ 44 | ...op, 45 | embeddings: op.embeddings.data, 46 | })); 47 | self.postMessage({ 48 | status: "completeOptions", 49 | optionsWithEmbeddings: optionsWithEmbeddingsData, 50 | }); 51 | break; 52 | } 53 | case "computeInputText": { 54 | let embeddingsPipeline = await MyEmbeddingsPipeline.getInstance(); 55 | let embeddings = await embeddingsPipeline( 56 | event.data.text, 57 | configs.pipelineParams 58 | ); 59 | 60 | self.postMessage({ 61 | status: "completeInputText", 62 | inputTextEmbeddings: embeddings.data, 63 | inputText: event.data.text, 64 | }); 65 | break; 66 | } 67 | } 68 | }); 69 | -------------------------------------------------------------------------------- /vite.config.js: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite'; 2 | import react from '@vitejs/plugin-react'; 3 | 4 | export default defineConfig({ 5 | plugins: [react()], 6 | build: { 7 | lib: { 8 | entry: 'src/SemanticAutocomplete.jsx', 9 | name: 'SemanticAutocomplete', 10 | // Output format options: "umd", "iife", "es" 11 | formats: ['es', 'umd'], 12 | fileName: (format) => `semantic-autocomplete.${format}.js` 13 | }, 14 | rollupOptions: { 15 | // Explicitly specify external dependencies (if any) 16 | external: ['react', 'react-dom'], 17 | output: { 18 | globals: { 19 | react: 'React', 20 | 'react-dom': 'ReactDOM' 21 | } 22 | } 23 | }, 24 | } 25 | }); 26 | --------------------------------------------------------------------------------