├── .env.example ├── .eslintrc.json ├── .gitignore ├── README.md ├── app ├── api │ └── chat │ │ └── route.ts ├── favicon.ico ├── globals.css ├── hooks │ └── useConfiguration.ts ├── layout.tsx └── page.tsx ├── components ├── Bubble.tsx ├── Configure.tsx ├── Dropdown.tsx ├── Footer.tsx ├── PromptSuggestions │ ├── PromptSuggestionButton.tsx │ └── PromptSuggestionsRow.tsx ├── ThemeButton.tsx └── Toggle.tsx ├── next-env.d.ts ├── next.config.js ├── package-lock.json ├── package.json ├── postcss.config.js ├── scripts ├── populateDb.ts └── sample_data.json ├── tailwind.config.js └── tsconfig.json /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=REPLACE_ME 2 | ASTRA_DB_ID=REPLACE_ME 3 | ASTRA_DB_REGION=REPLACE_ME 4 | ASTRA_DB_APPLICATION_TOKEN=REPLACE_ME 5 | ASTRA_DB_NAMESPACE=default_keyspace 6 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["next", "prettier"] 3 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | share/python-wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | *.py,cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | cover/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | .pybuilder/ 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | # For a library or package, you might want to ignore these files since the code is 86 | # intended to run in multiple environments; otherwise, check them in: 87 | # .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # poetry 97 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 98 | # This is especially recommended for binary packages to ensure reproducibility, and is more 99 | # commonly ignored for libraries. 100 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 101 | #poetry.lock 102 | 103 | # pdm 104 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 105 | #pdm.lock 106 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 107 | # in version control. 108 | # https://pdm.fming.dev/#use-with-ide 109 | .pdm.toml 110 | 111 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 112 | __pypackages__/ 113 | 114 | # Celery stuff 115 | celerybeat-schedule 116 | celerybeat.pid 117 | 118 | # SageMath parsed files 119 | *.sage.py 120 | 121 | # Environments 122 | .env 123 | .venv 124 | env/ 125 | venv/ 126 | ENV/ 127 | env.bak/ 128 | venv.bak/ 129 | 130 | # Spyder project settings 131 | .spyderproject 132 | .spyproject 133 | 134 | # Rope project settings 135 | .ropeproject 136 | 137 | # mkdocs documentation 138 | /site 139 | 140 | # mypy 141 | .mypy_cache/ 142 | .dmypy.json 143 | dmypy.json 144 | 145 | # Pyre type checker 146 | .pyre/ 147 | 148 | # pytype static type analyzer 149 | .pytype/ 150 | 151 | # Cython debug symbols 152 | cython_debug/ 153 | 154 | # PyCharm 155 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 156 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 157 | # and can be added to the global gitignore or merged into this file. For a more nuclear 158 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 159 | .idea/ 160 | 161 | node_modules 162 | .next -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RAGBot Starter 2 | 3 | This project is a starter for creating a chatbot using Astra DB and OpenAI. It's designed to be easy to deploy and use, with a focus on performance and usability. 4 | 5 | ## Features 6 | 7 | - **Astra DB Integration**: Store and retrieve data from your Astra DB database with ease. 8 | - **OpenAI Integration**: Leverage the power of OpenAI to generate intelligent responses. 9 | - **Easy Deployment**: Deploy your chatbot to Vercel with just a few clicks. 10 | - **Customizable**: Modify and extend the chatbot to suit your needs. 11 | 12 | ## Getting Started 13 | 14 | ### Prerequisites 15 | 16 | - An Astra DB account. You can [create one here](https://astra.datastax.com/register). 17 | - An Astra Vector Database 18 | - An OpenAI account. You can [create one here](https://platform.openai.com/). 19 | 20 | ## Deployment 21 | 22 | Easily deploy your chatbot to Vercel by clicking the button below: 23 | 24 | [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https://github.com/datastax/ragbot-starter&integration-ids=oac_HrgeXUSyqANAtm3MAOaTJ43a&env=OPENAI_API_KEY) 25 | 26 | Configure the Astra DB integration and add your `OPENAI_API_KEY` environment variable. 27 | 28 | Note: Before you deploying to prod, you will want to remove seed script (`npm run seed`) from the build step. 29 | 30 | ### Local Development 31 | 32 | 1. Clone this repository to your local machine. 33 | 2. Install the dependencies by running `npm install` in your terminal. 34 | 3. Set up the following environment variables in your IDE or `.env` file: 35 | - `OPENAI_API_KEY`: Your API key for OpenAI 36 | - `ASTRA_DB_API_ENDPOINT`: Your Astra DB vector database endpoint 37 | - `ASTRA_DB_APPLICATION_TOKEN`: The generated app token for your Astra database 38 | - To create a new token go to your database's `Connect` tab and click `Generate Token`. (your Application Token begins with `AstraCS:...`) 39 | - `ASTRA_DB_NAMESPACE`: (Optional) The existing Astra Namespace/Keyspace **_in a vector-enabled DB_** 40 | 4. Populate your database with sample data by running `npm run seed` in your terminal. 41 | 42 | ### Running the Project 43 | 44 | To start the development server, run `npm run dev` in your terminal. Open [http://localhost:3000](http://localhost:3000) to view the chatbot in your browser. 45 | -------------------------------------------------------------------------------- /app/api/chat/route.ts: -------------------------------------------------------------------------------- 1 | import OpenAI from 'openai'; 2 | import {OpenAIStream, StreamingTextResponse} from 'ai'; 3 | import {AstraDB} from "@datastax/astra-db-ts"; 4 | 5 | const openai = new OpenAI({ 6 | apiKey: process.env.OPENAI_API_KEY, 7 | }); 8 | 9 | const astraDb = new AstraDB(process.env.ASTRA_DB_APPLICATION_TOKEN, process.env.ASTRA_DB_API_ENDPOINT, process.env.ASTRA_DB_NAMESPACE); 10 | 11 | export async function POST(req: Request) { 12 | try { 13 | const {messages, useRag, llm, similarityMetric} = await req.json(); 14 | 15 | const latestMessage = messages[messages?.length - 1]?.content; 16 | 17 | let docContext = ''; 18 | if (useRag) { 19 | const {data} = await openai.embeddings.create({input: latestMessage, model: 'text-embedding-ada-002'}); 20 | 21 | const collection = await astraDb.collection(`chat_${similarityMetric}`); 22 | 23 | const cursor= collection.find(null, { 24 | sort: { 25 | $vector: data[0]?.embedding, 26 | }, 27 | limit: 5, 28 | }); 29 | 30 | const documents = await cursor.toArray(); 31 | 32 | docContext = ` 33 | START CONTEXT 34 | ${documents?.map(doc => doc.content).join("\n")} 35 | END CONTEXT 36 | ` 37 | } 38 | const ragPrompt = [ 39 | { 40 | role: 'system', 41 | content: `You are an AI assistant answering questions about Cassandra and Astra DB. Format responses using markdown where applicable. 42 | ${docContext} 43 | If the answer is not provided in the context, the AI assistant will say, "I'm sorry, I don't know the answer". 44 | `, 45 | }, 46 | ] 47 | 48 | 49 | const response = await openai.chat.completions.create( 50 | { 51 | model: llm ?? 'gpt-3.5-turbo', 52 | stream: true, 53 | messages: [...ragPrompt, ...messages], 54 | } 55 | ); 56 | const stream = OpenAIStream(response); 57 | return new StreamingTextResponse(stream); 58 | } catch (e) { 59 | throw e; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastax/ragbot-starter/fc0e279dbb79565e400d0b18b0d7f2b645461e75/app/favicon.ico -------------------------------------------------------------------------------- /app/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | :root { 6 | --text-primary: #FFFFFF; 7 | --text-primary-inverse: #090909; 8 | --text-primary-main: #7724AA; 9 | --text-secondary: #A6AAAE; 10 | --text-secondary-inverse: #494A4D; 11 | --background-bubble-primary: #611C9B; 12 | --background-bubble-secondary: #F7F7F7; 13 | --border-primary: #CED0D2; 14 | --background-soft: #F3E5F5; 15 | --background-start-rgb: 214, 219, 220; 16 | --background-end-rgb: 255, 255, 255; 17 | --text-tertiary: #6B6F73; 18 | --shadow-primary: 0px 6px 16px 0px #0A0A0A1F, 0px 4px 14px 0px #0A0A0A14, 0px 8px 10px 0px #0A0A0A14; 19 | --processing-dot-size: 4px; 20 | } 21 | 22 | 23 | .dark { 24 | --text-primary: #090909; 25 | --text-primary-inverse: #FFFFFF; 26 | --text-primary-main: #9946B9; 27 | --text-secondary: #494A4D; 28 | --text-secondary-inverse: #A6AAAE; 29 | --background-bubble-primary: #BA68C8; 30 | --background-bubble-secondary: #232324; 31 | --background-soft: #200033; 32 | --border-primary: #262626; 33 | --background-start-rgb: 23, 23, 23; 34 | --background-end-rgb: 28, 28, 28; 35 | } 36 | 37 | 38 | body { 39 | background: linear-gradient(to bottom, 40 | transparent, 41 | rgb(var(--background-end-rgb))) rgb(var(--background-start-rgb)); 42 | } 43 | 44 | .chatbot-section { 45 | background-color: var(--text-primary); 46 | box-shadow: var(--shadow-primary); 47 | border: 1px solid var(--border-primary); 48 | } 49 | 50 | .chatbot-header { 51 | border-bottom: 1px solid var(--border-primary);; 52 | } 53 | 54 | .chatbot-header svg { 55 | fill: var(--text-primary-inverse); 56 | } 57 | 58 | .chatbot-text-primary { 59 | color: var(--text-primary-inverse); 60 | } 61 | 62 | .chatbot-text-secondary { 63 | color: var(--text-secondary); 64 | } 65 | 66 | .chatbot-text-secondary-inverse { 67 | color: var(--text-secondary-inverse); 68 | } 69 | 70 | .chatbot-text-tertiary { 71 | color: var(--text-tertiary); 72 | } 73 | 74 | .chatbot-input { 75 | color: var(--text-primary-inverse); 76 | border: 1px solid var(--border-primary); 77 | } 78 | 79 | .chatbot-input::placeholder { 80 | color: var(--text-secondary); 81 | } 82 | 83 | .chatbot-send-button { 84 | background-color: var(--text-primary-inverse); 85 | color: var(--text-primary); 86 | } 87 | 88 | .chatbot-send-button svg { 89 | fill: var(--text-primary); 90 | } 91 | 92 | .chatbot-button-primary { 93 | background-color: var(--text-primary-inverse); 94 | color: var(--text-primary); 95 | } 96 | 97 | .chatbot-button-secondary { 98 | border: 1px solid var(--border-primary); 99 | color: var(--text-primary-inverse); 100 | } 101 | 102 | .chatbot-faq-link { 103 | border: 1px solid var(--border-primary); 104 | border-radius: 24px; 105 | color: var(--background-bubble-primary); 106 | } 107 | 108 | .chatbot-faq-link svg { 109 | fill: var(--text-tertiary); 110 | } 111 | 112 | .talk-bubble { 113 | text-align: left; 114 | display: inline-block; 115 | position: relative; 116 | color: var(--text-secondary-inverse); 117 | background-color: var(--background-bubble-secondary); 118 | border-radius: 10px; 119 | border-bottom-left-radius: 0px; 120 | } 121 | 122 | .talk-bubble.user { 123 | text-align: right; 124 | color: var(--text-primary); 125 | background-color: var(--background-bubble-primary); 126 | border-radius: 10px; 127 | border-bottom-right-radius: 0px; 128 | } 129 | 130 | .talk-bubble svg { 131 | position: absolute; 132 | left: -1px; 133 | bottom: 2px; 134 | transform: translateY(100%) rotateY(180deg); 135 | fill: var(--background-bubble-secondary); 136 | } 137 | 138 | .talk-bubble.user svg { 139 | right: -1px; 140 | left: auto; 141 | bottom: 2px; 142 | transform: translateY(100%); 143 | fill: var(--background-bubble-primary); 144 | } 145 | 146 | .dot-flashing { 147 | position: relative; 148 | /* padding-left: 8px; */ 149 | /* padding-right: 8px; */ 150 | width: var(--processing-dot-size); 151 | height: var(--processing-dot-size); 152 | border-radius: 100%; 153 | background-color: var(--text-tertiary); 154 | color: var(--text-tertiary); 155 | animation: dot-flashing 1s infinite linear alternate; 156 | animation-delay: 0.5s; 157 | } 158 | 159 | .dot-flashing::before, 160 | .dot-flashing::after { 161 | content: ""; 162 | display: inline-block; 163 | position: absolute; 164 | top: 0; 165 | } 166 | 167 | .dot-flashing::before { 168 | left: -6px; 169 | width: var(--processing-dot-size); 170 | height: var(--processing-dot-size); 171 | border-radius: 100%; 172 | background-color: var(--text-tertiary); 173 | color: var(--text-tertiary); 174 | animation: dot-flashing 1s infinite alternate; 175 | animation-delay: 0s; 176 | } 177 | 178 | .dot-flashing::after { 179 | left: 6px; 180 | width: var(--processing-dot-size); 181 | height: var(--processing-dot-size); 182 | border-radius: 100%; 183 | background-color: var(--text-tertiary); 184 | color: var(--text-tertiary); 185 | animation: dot-flashing 1s infinite alternate; 186 | animation-delay: 1s; 187 | } 188 | 189 | @keyframes dot-flashing { 190 | 0% { 191 | background-color: var(--text-tertiary); 192 | } 193 | 194 | 50%, 195 | 100% { 196 | background-color: rgba(152, 128, 255, 0.2); 197 | } 198 | } 199 | 200 | .prompt-button { 201 | background-color: var(--background-soft); 202 | color: var(--text-primary-main); 203 | } 204 | 205 | /* Toggle Styles */ 206 | .toggle-background { 207 | background-color: var(--background-bubble-primary); 208 | } 209 | 210 | .toggle-boarder { 211 | border: 1px solid var(--background-bubble-primary); 212 | } 213 | 214 | .vercel-link { 215 | color: var(--text-primary-inverse); 216 | background-color: var(--text-primary); 217 | border-color: var(--border-primary); 218 | } 219 | 220 | .vercel-link hr { 221 | border-color: var(--border-primary); 222 | } -------------------------------------------------------------------------------- /app/hooks/useConfiguration.ts: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import { useState, useEffect } from 'react'; 4 | 5 | export type SimilarityMetric = "cosine" | "euclidean" | "dot_product"; 6 | 7 | const useConfiguration = () => { 8 | // Safely get values from localStorage 9 | const getLocalStorageValue = (key: string, defaultValue: any) => { 10 | if (typeof window !== 'undefined') { 11 | const storedValue = localStorage.getItem(key); 12 | if (storedValue !== null) { 13 | return storedValue; 14 | } 15 | } 16 | return defaultValue; 17 | }; 18 | 19 | const [useRag, setUseRag] = useState(() => getLocalStorageValue('useRag', 'true') === 'true'); 20 | const [llm, setLlm] = useState(() => getLocalStorageValue('llm', 'gpt-3.5-turbo')); 21 | const [similarityMetric, setSimilarityMetric] = useState( 22 | () => getLocalStorageValue('similarityMetric', 'cosine') as SimilarityMetric 23 | ); 24 | 25 | const setConfiguration = (rag: boolean, llm: string, similarityMetric: SimilarityMetric) => { 26 | setUseRag(rag); 27 | setLlm(llm); 28 | setSimilarityMetric(similarityMetric); 29 | } 30 | 31 | // Persist to localStorage 32 | useEffect(() => { 33 | if (typeof window !== 'undefined') { 34 | localStorage.setItem('useRag', JSON.stringify(useRag)); 35 | localStorage.setItem('llm', llm); 36 | localStorage.setItem('similarityMetric', similarityMetric); 37 | } 38 | }, [useRag, llm, similarityMetric]); 39 | 40 | return { 41 | useRag, 42 | llm, 43 | similarityMetric, 44 | setConfiguration, 45 | }; 46 | } 47 | 48 | export default useConfiguration; 49 | -------------------------------------------------------------------------------- /app/layout.tsx: -------------------------------------------------------------------------------- 1 | import { GeistSans } from "geist/font/sans"; 2 | import "./globals.css"; 3 | 4 | export const metadata = { 5 | title: "RAGBot Starter", 6 | description: "RAGBot Starter - Powered by DataStax and Vercel", 7 | }; 8 | 9 | export default function RootLayout({ children }) { 10 | return ( 11 | 12 | {children} 13 | 14 | ); 15 | } 16 | -------------------------------------------------------------------------------- /app/page.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import {useEffect, useRef, useState} from 'react'; 3 | import Bubble from '../components/Bubble' 4 | import { useChat, Message } from 'ai/react'; 5 | import Footer from '../components/Footer'; 6 | import Configure from '../components/Configure'; 7 | import PromptSuggestionRow from '../components/PromptSuggestions/PromptSuggestionsRow'; 8 | import ThemeButton from '../components/ThemeButton'; 9 | import useConfiguration from './hooks/useConfiguration'; 10 | 11 | 12 | export default function Home() { 13 | const { append, messages, input, handleInputChange, handleSubmit } = useChat(); 14 | const { useRag, llm, similarityMetric, setConfiguration } = useConfiguration(); 15 | 16 | const messagesEndRef = useRef(null); 17 | const [configureOpen, setConfigureOpen] = useState(false); 18 | 19 | const scrollToBottom = () => { 20 | messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }); 21 | }; 22 | 23 | useEffect(() => { 24 | scrollToBottom(); 25 | }, [messages]); 26 | 27 | const handleSend = (e) => { 28 | handleSubmit(e, { options: { body: { useRag, llm, similarityMetric}}}); 29 | } 30 | 31 | const handlePrompt = (promptText) => { 32 | const msg: Message = { id: crypto.randomUUID(), content: promptText, role: 'user' }; 33 | append(msg, { options: { body: { useRag, llm, similarityMetric}}}); 34 | }; 35 | 36 | return ( 37 | <> 38 |
39 |
40 |
41 |
42 |
43 | 44 | 45 | 46 |

Chatbot

47 |
48 |
49 | 50 | 55 |
56 |
57 |

Chatting with the Astra chatbot is a breeze! Simply type your questions or requests in a clear and concise manner. Responses are sourced from Astra documentation and a link for further reading is provided.

58 |
59 |
60 |
61 | {messages.map((message, index) => )} 62 |
63 |
64 | {!messages || messages.length === 0 && ( 65 | 66 | )} 67 |
68 | 69 | 75 |
76 |
77 |
78 |
79 | setConfigureOpen(false)} 82 | useRag={useRag} 83 | llm={llm} 84 | similarityMetric={similarityMetric} 85 | setConfiguration={setConfiguration} 86 | /> 87 | 88 | ) 89 | } -------------------------------------------------------------------------------- /components/Bubble.tsx: -------------------------------------------------------------------------------- 1 | import Link from "next/link"; 2 | import {forwardRef, JSXElementConstructor, useMemo, RefObject} from "react"; 3 | import Markdown from "react-markdown"; 4 | import remarkGfm from "remark-gfm"; 5 | 6 | const Bubble:JSXElementConstructor = forwardRef(function Bubble({ content }, ref) { 7 | const { role } = content; 8 | const isUser = role === "user" 9 | 10 | return ( 11 |
} className={`block mt-4 md:mt-6 pb-[7px] clear-both ${isUser ? 'float-right' : 'float-left'}`}> 12 |
13 |
14 | {content.processing ? ( 15 |
16 |
17 |
18 | ) : ( 19 | 25 | {children} 26 | 27 | ) 28 | } 29 | }} 30 | > 31 | {content?.content} 32 | 33 | )} 34 | 35 | 36 | 37 |
38 |
39 | {content.url ? ( 40 |
41 |
42 | Source: 43 | 44 |
45 | 46 | 47 | 48 | Astra DB FAQs 49 |
50 | 51 |
52 |
53 | ) : 54 | null 55 | } 56 |
57 | ) 58 | }) 59 | 60 | export default Bubble; -------------------------------------------------------------------------------- /components/Configure.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from "react"; 2 | import Dropdown from "./Dropdown"; 3 | import Toggle from "./Toggle"; 4 | import Footer from "./Footer"; 5 | import { SimilarityMetric } from "../app/hooks/useConfiguration"; 6 | 7 | interface Props { 8 | isOpen: boolean; 9 | onClose: () => void; 10 | useRag: boolean; 11 | llm: string; 12 | similarityMetric: SimilarityMetric; 13 | setConfiguration: (useRag: boolean, llm: string, similarityMetric: SimilarityMetric) => void; 14 | } 15 | 16 | const Configure = ({ isOpen, onClose, useRag, llm, similarityMetric, setConfiguration }: Props) => { 17 | const [rag, setRag] = useState(useRag); 18 | const [selectedLlm, setSelectedLlm] = useState(llm); 19 | const [selectedSimilarityMetric, setSelectedSimilarityMetric] = useState(similarityMetric); 20 | 21 | if (!isOpen) return null; 22 | 23 | const llmOptions = [ 24 | { label: 'GPT 3.5 Turbo', value: 'gpt-3.5-turbo' }, 25 | { label: 'GPT 4', value: 'gpt-4' } 26 | ]; 27 | 28 | const similarityMetricOptions = [ 29 | { label: 'Cosine Similarity', value: 'cosine' }, 30 | { label: 'Euclidean Distance', value: 'euclidean' }, 31 | { label: 'Dot Product', value: 'dot_product' } 32 | ]; 33 | 34 | const handleSave = () => { 35 | setConfiguration( 36 | rag, 37 | selectedLlm, 38 | selectedSimilarityMetric 39 | ); 40 | onClose(); 41 | }; 42 | 43 | return ( 44 |
45 |
46 |
47 |
48 |

Configure

49 | 55 |
56 |
57 | 64 | setRag(!rag)} /> 65 |
66 | 73 |
74 |
75 |
76 | 82 | 88 |
89 |
90 |
91 |
92 |
93 | ); 94 | }; 95 | 96 | export default Configure; 97 | -------------------------------------------------------------------------------- /components/Dropdown.tsx: -------------------------------------------------------------------------------- 1 | const Dropdown = ({ fieldId, label, options, value, onSelect }) => { 2 | return ( 3 |
4 | {label && ( 5 | 8 | )} 9 |
10 | 22 |
23 | 24 | 25 | 26 |
27 |
28 |
29 | ); 30 | }; 31 | 32 | export default Dropdown; 33 | -------------------------------------------------------------------------------- /components/Footer.tsx: -------------------------------------------------------------------------------- 1 | 2 | const Footer = () => { 3 | return ( 4 |
5 | 10 | 11 |
12 | Deploy 13 |
14 |
15 | Powered by 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | and 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 |
56 |
57 | ); 58 | }; 59 | 60 | export default Footer; 61 | -------------------------------------------------------------------------------- /components/PromptSuggestions/PromptSuggestionButton.tsx: -------------------------------------------------------------------------------- 1 | 2 | const PromptSuggestionButton = ({ text, onClick }) => { 3 | return ( 4 | 10 | ); 11 | }; 12 | 13 | export default PromptSuggestionButton; 14 | -------------------------------------------------------------------------------- /components/PromptSuggestions/PromptSuggestionsRow.tsx: -------------------------------------------------------------------------------- 1 | import PromptSuggestionButton from "./PromptSuggestionButton"; 2 | 3 | const PromptSuggestionRow = ({ onPromptClick }) => { 4 | const prompts = [ 5 | 'How does similarity search work with a Vector DB?', 6 | 'What is DataStax Enterprise?', 7 | 'How does CassIO work?', 8 | 'What are some common FAQs about Astra?', 9 | ]; 10 | 11 | return ( 12 |
13 | {prompts.map((prompt, index) => ( 14 | onPromptClick(prompt)} /> 15 | ))} 16 |
17 | ); 18 | }; 19 | 20 | export default PromptSuggestionRow; 21 | -------------------------------------------------------------------------------- /components/ThemeButton.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useState } from "react"; 2 | 3 | const ToggleButton = () => { 4 | const [theme, setTheme] = useState(null); 5 | 6 | useEffect(() => { 7 | const getLocalValue = () => { 8 | const storedValue = localStorage.getItem('theme'); 9 | if (storedValue !== null) { 10 | return storedValue; 11 | } 12 | return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light'; 13 | }; 14 | 15 | const initialTheme = getLocalValue(); 16 | setTheme(initialTheme); 17 | 18 | if (initialTheme === 'dark') { 19 | document.documentElement.classList.add('dark'); 20 | } else { 21 | document.documentElement.classList.remove('dark'); 22 | } 23 | }, []); 24 | 25 | useEffect(() => { 26 | if (theme !== null) { 27 | localStorage.setItem('theme', theme); 28 | if (theme === 'dark') { 29 | document.documentElement.classList.add('dark'); 30 | } else { 31 | document.documentElement.classList.remove('dark'); 32 | } 33 | } 34 | }, [theme]); 35 | 36 | const handleToggle = () => { 37 | setTheme(theme === 'dark' ? 'light' : 'dark'); 38 | }; 39 | 40 | if (theme === null) { 41 | return null; 42 | } 43 | 44 | return ( 45 | 57 | ) 58 | }; 59 | 60 | export default ToggleButton; 61 | -------------------------------------------------------------------------------- /components/Toggle.tsx: -------------------------------------------------------------------------------- 1 | const Toggle = ({ enabled, label, onChange }) => { 2 | return ( 3 |
4 | 22 |
23 | ); 24 | }; 25 | 26 | export default Toggle; 27 | -------------------------------------------------------------------------------- /next-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | 4 | // NOTE: This file should not be edited 5 | // see https://nextjs.org/docs/basic-features/typescript for more information. 6 | -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ragbot-starter", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build && npm run seed", 8 | "start": "next start", 9 | "seed": "ts-node ./scripts/populateDb.ts", 10 | "lint": "next lint" 11 | }, 12 | "dependencies": { 13 | "@datastax/astra-db-ts": "0.1.4", 14 | "@types/node": "^20.8.10", 15 | "ai": "^2.2.20", 16 | "geist": "^1.1.0", 17 | "langchain": "^0.0.179", 18 | "next": "14.0.1", 19 | "openai": "^4.14.2", 20 | "react": "^18", 21 | "react-dom": "^18", 22 | "react-markdown": "^9.0.0", 23 | "remark-gfm": "^4.0.0", 24 | "ts-node": "^10.9.1", 25 | "typescript": "5.2.2" 26 | }, 27 | "devDependencies": { 28 | "@types/react": "^18.2.37", 29 | "autoprefixer": "^10", 30 | "dotenv": "^16.3.1", 31 | "eslint": "^8", 32 | "eslint-config-next": "13.5.5", 33 | "eslint-config-prettier": "^9.0.0", 34 | "postcss": "^8", 35 | "tailwindcss": "^3" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /scripts/populateDb.ts: -------------------------------------------------------------------------------- 1 | import { AstraDB } from "@datastax/astra-db-ts"; 2 | import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; 3 | import 'dotenv/config' 4 | import sampleData from './sample_data.json'; 5 | import OpenAI from 'openai'; 6 | import { SimilarityMetric } from "../app/hooks/useConfiguration"; 7 | 8 | const openai = new OpenAI({ 9 | apiKey: process.env.OPENAI_API_KEY, 10 | }); 11 | 12 | const {ASTRA_DB_APPLICATION_TOKEN, ASTRA_DB_API_ENDPOINT, ASTRA_DB_NAMESPACE } = process.env; 13 | 14 | const astraDb = new AstraDB(ASTRA_DB_APPLICATION_TOKEN, ASTRA_DB_API_ENDPOINT, ASTRA_DB_NAMESPACE); 15 | 16 | const splitter = new RecursiveCharacterTextSplitter({ 17 | chunkSize: 1000, 18 | chunkOverlap: 200, 19 | }); 20 | 21 | const similarityMetrics: SimilarityMetric[] = [ 22 | 'cosine', 23 | 'euclidean', 24 | 'dot_product', 25 | ] 26 | 27 | const createCollection = async (similarity_metric: SimilarityMetric = 'cosine') => { 28 | try { 29 | const res = await astraDb.createCollection(`chat_${similarity_metric}`, { 30 | vector: { 31 | dimension: 1536, 32 | metric: similarity_metric, 33 | } 34 | }); 35 | console.log(res); 36 | } catch (e) { 37 | console.log(`chat_${similarity_metric} already exists`); 38 | } 39 | }; 40 | 41 | const loadSampleData = async (similarity_metric: SimilarityMetric = 'cosine') => { 42 | const collection = await astraDb.collection(`chat_${similarity_metric}`); 43 | for await (const { url, title, content} of sampleData) { 44 | const chunks = await splitter.splitText(content); 45 | let i = 0; 46 | for await (const chunk of chunks) { 47 | const {data} = await openai.embeddings.create({input: chunk, model: 'text-embedding-ada-002'}); 48 | 49 | const res = await collection.insertOne({ 50 | document_id: `${url}-${i}`, 51 | $vector: data[0]?.embedding, 52 | url, 53 | title, 54 | content: chunk 55 | }); 56 | i++; 57 | } 58 | } 59 | console.log('data loaded'); 60 | }; 61 | 62 | similarityMetrics.forEach(metric => { 63 | createCollection(metric).then(() => loadSampleData(metric)); 64 | }); 65 | -------------------------------------------------------------------------------- /scripts/sample_data.json: -------------------------------------------------------------------------------- 1 | [{"url": "https://www.datastax.com/services/support/premium-support/faq", "title": "FAQ | DataStax Premium Support", "content": "What is DataStax Premium Support?\nPremium Support consists of three add-on offerings available to enhance your existing DataStax support experience\nThere are three Premium offerings currently available:\nPremium Cloud Engineer\nNamed Engineer\nTechnical Account Manager\nCan I purchase a Premium Support subscription for any DataStax product? \nYou can purchase Premium Support as an add-on to DataStax products that are generally available. Please contact PremiumSupport@datastax.com if you have any questions.\nWhat is included in each of the different DataStax technical support offerings?\nPlease refer to the DataStax \nSupport Comparison Matrix\n.\nCan I combine DataStax Premium Support options?\nYes.\nYou can combine add-on options to meet your business needs. The choice is entirely up to you.\nHow long is the Premium subscription period?\nTypical Premium subscriptions are annual.\nIf my support needs are different from what you currently offer, what should I do?\nContact \nPremiumSupport@datastax.com\n and we will be in touch with you to discuss possible alternatives."}, {"url": "https://www.datastax.com/blog/introducing-vector-search-empowering-cassandra-astra-db-developers-to-build-generative-ai-applications", "title": "Introducing Vector Search: Empowering Cassandra / Astra DB developers to build generative AI applications | DataStax", "content": "In the age of AI, Apache Cassandra\n\u00ae\n has emerged as a powerful and scalable distributed database solution. With its ability to handle massive amounts of data and provide high availability, Cassandra has become a go-to choice for many AI applications including \nUber\n, \nNetflix\n, and \nPriceline\n. However, with the introduction of generative AI and large language models (LLMs), new query capabilities are needed.\u00a0\nEnter vector search, a revolutionary new feature that empowers Cassandra with enhanced search and retrieval functionalities for generative AI applications. As a preview for our community, we\u2019ve made it available in DataStax Astra DB to try out and provide us with feedback. Get started by \nsigning up\n and then \ntrying the demo\n.\u00a0\nWhat is vector search?\nVector search\n is a cutting-edge approach to searching and retrieving data that leverages the power of vector similarity calculations. Unlike traditional keyword-based search, which matches documents based on the occurrence of specific terms, vector search focuses on the semantic meaning and similarity of data points. By representing data as vectors in a high-dimensional space, vector search enables more accurate and intuitive search results.\u00a0\nFor example, vector search easily identifies the semantics in these examples that term-based search would struggle with:\nFalse positive: \u201cMan bites dog\u201d and \u201cdog bites man\u201d include the same words but have opposite semantics.\nFalse negative: \u201cTourism numbers are collapsing\u201d and \u201cTravel industry fears Covid-19 crisis will cause more companies to enter bankruptcy\u201d have very similar meanings but different word choices and specificity.\nFalse negative: \u201cI need a new phone\u201d and \u201cMy old device is broken\u201d have related meanings but no common words.\nIntegrating vector search with Cassandra\nThe integration of vector search with Cassandra (for details, see \nCEP-30\n) offers several\u00a0 benefits. It opens up exciting possibilities for applications that require similarity-based queries\u2014and not just for text. Applications as diverse as recommendation systems, fraud detection, image recognition, and natural language processing can all benefit from vector search.\nHere are some key advantages of incorporating vector search into Cassandra:\nUnstructured data queries\nPrior to vector search, Cassandra was limited to searching structured data (floats, integers, or full strings). Vector search now opens the possibilities to query unstructured data, including text, audio, pictures, and videos. This makes Cassandra the one-stop-shop for high-scale database applications.\u00a0\nEnhanced search accuracy\nVector search allows for similarity-based queries, enabling more accurate and relevant search results. By considering the semantic meaning of data points, it can uncover hidden relationships and patterns that traditional keyword searches might miss.\nEfficient query processing\u00a0\nWith vector search, Cassandra can perform similarity calculations and ranking directly within the database. This eliminates the need to transfer large amounts of data to external systems, reducing latency and improving overall query performance. Furthermore, you can combine vector search with other Cassandra indexes for even more powerful queries to find exactly the data you need.\nScalability and distributed processing\nCassandra's distributed architecture aligns perfectly with vector search requirements. As data volumes grow, vector search can leverage Cassandra's scalability and distributed processing capabilities to handle large-scale similarity queries efficiently.\nBroad applicability\nVector search provides the flexibility to compute similarity across various types of data, including text, numerical values, images, and \nembeddings\n. This versatility enables developers to build advanced applications that span multiple domains and data types, all within the Cassandra ecosystem.\nVector search use cases\nIt seems that not a day goes by when a new, innovative application of generative AI is invented. Almost all generative AI use cases are enhanced by vector search because it allows developers to create more relevant prompts. Use cases of vector search for generative AI include:\nQuestion answering\u00a0\nConverting documents to text embeddings can be combined with modern natural language processing (NLP) to deliver full text answers to questions. This approach spares users from studying lengthy manuals and empowers your teams to provide answers more quickly. A \"question answering\" generative AI model can take the text-embedding representation for both the knowledge base of documents and your current question to deliver the closest match as an \"answer.\" (\ncode\n)\nSemantic search\nVector search powers semantic or similarity search. Because the meaning and context is captured in the embedding, vector search finds what users mean, without requiring an exact keyword match. It works with textual data (documents), images, and audio, to, for example, easily and quickly help users find products that are similar or related to their query.\nSemantic caching\u00a0\nAs your generative application grows in popularity and encounters higher traffic levels, the expenses related to LLM API calls can become substantial. Additionally, LLM services might exhibit slow response times, especially when dealing with a significant number of requests. Caching LLM responses can significantly increase response times, and lower the cost of using generative AI. However, to match the input of an LLM to previous input requires performing a semantic match rather than an exact match. Vector search provides users with that ability. (\ncode\n)\nA brief overview of transformers\u2014and their challenges\nTo gain deeper insight into the value of vector search in the domain of generative AI, it\u2019s important to understand what transformers are and what their limitations are. Transformers are designed to understand the context and semantics of language by taking in a sequence of tokens (words, or parts of words) and outputting a corresponding sequence. These models pay attention to each input token and the relationships between them, using a mechanism known as self-attention or scaled dot-product attention. This enables them to understand complex linguistic constructs and generate coherent and contextually accurate responses.\nDespite their capabilities, transformers face a significant challenge: the token limit. This constraint arises due to memory limitations in the computational hardware. With large input sequences, the self-attention mechanism requires storing each token's relationship with every other token, which can quickly exhaust available memory. As a result, transformers like GPT-3 or GPT-4 have a limit on the number of tokens they can process in a single pass; it\u2019s typically a few thousand tokens.\u00a0\nVector search solves this problem by retrieving the most semantically relevant data so that we can get the most value possible from the limited token window. Take for example a Q&A chatbot for a software product. Instead of passing the Q&A chatbot the entire Q&A repository for a software product, or using term-based search that can easily retrieve unrelated information, vector search enables you to selectively query for semantically relevant content to use the token limit effectively. (\ncode\n)\nAnother example is chat history. LLMs use chat history to give context to what the user has discussed with the LLM in the past. Simply using a fixed number of the last interactions to prevent going over the token limit would result in LLMs forgetting the previous context of the conversations. Vector search allows for only relevant historical conversations to be passed to the LLM (\ncode\n).\nBeyond the technical constraint, there's a cost aspect as well. Each token processed by the transformer uses computational resources, which directly translates into financial cost. Therefore, when using transformers in your applications, it's important to maximize the value extracted from each token within the given limit. Vector search reduces the cost aspect by first optimizing the content that is populated in the prompt, then also intelligently caching its responses.(\ncode\n)\nCassIO: integrating vector search into your generative AI app\nTo accelerate the integration of vector search into your app, we\u2019ve also created a library called CassIO. This software framework integrates seamlessly with popular LLM software such as \nLangChain\n, making it easy to leverage vector search in your database. CassIO can maintain chat history, create prompt templates, and cache LLM responses. To learn more, check out\u00a0 the \nCassIO website\n.\nGet started!\nCassandra and Astra DB developers can take a great leap forward with vector search. Today, Cassandra is the number one database for querying both structured and unstructured data. Understanding the strengths and limitations of transformers and using advanced data management technologies like DataStax can greatly enhance the effectiveness and cost-efficiency of generative AI applications. Try out a preview of vector search by \nsigning up\n now, and register for our \nJune 15 webinar on vector search\n.\nWant to go deeper with vector search, LLMs and GenAI? Join us on July 11, 2023, for a free virtual GenAI summit for architects and practitioners: \nAgent X: Architecture for GenAI\n. In two hours, we'll unpack and demonstrate how you can craft inspiring AI agents and GenAI experiences with your unique datasets."}, {"url": "https://www.datastax.com/legal/datastax-faq-direct-licensing", "title": "DataStax Licensing \u2013 FAQs | DataStax", "content": "How are software subscriptions licensed?\nDataStax Enterprise is available on a subscription basis: a subscription is the DataStax software licence for a specified term plus the right to receive support and maintenance during that same term.\nTo purchase a DataStax subscription you will need to sign an order schedule that itemizes your purchase (amount, start date, etc) and references our \nEnterprise Terms\n; this is your contract.\nNo license key is needed to get started with DataStax software \u2013 but customers will receive a welcome guide and on-boarding documentation confirming what they have purchased.\nWhat is the unit of measure for purchases of DataStax software?\nDataStax software Subscriptions are sold on a per-Node or per-Core basis. Each Node may run on a maximum of 16 Cores:\n\u201cNode\u201d means a Java Virtual Machine (a platform-independent execution environment that converts Java bytecode into machine language and executes it) that runs an instance of the software\n\u201cCore\u201d means an independent physical processing unit on a CPU responsible for executing programs\nVirtualised CPUs do not count as Cores for these purposes and so a Node may have any number of virtualised CPUs provided that the physical CPUs do not exceed 16\nFor DataStax Enterprise version 6.0 or higher, customers can use the system settings (tpc_cores:16) to comply with the above limitation.\nHow long is a subscription for DataStax Enterprise?\nDataStax software Subscriptions are typically offered for a subscription period of no less than one year, which is paid for up-front.\nDoes DataStax offer a trial or evaluation licence for DataStax Enterprise?\nYes: you can use unsupported DataStax Enterprise\u00a0once for free for a period of ninety (90) days (unless otherwise agreed to by DataStax in writing)\u00a0if you use it for non-production (test/development) purposes (please refer to our \u201cLimited No-Fee License\u201d terms available in the \nEnterprise Terms\n).\nDoes DataStax offer a free license for educational institutions or classroom use?\nEducational institutions may use DataStax Enterprise as part of a classroom curriculum with written permission from DataStax. Educators wishing to use DataStax Enterprise as part of their course instruction may contact DataStax using the information available \nhere\n. Other uses by educational institutions require a paid subscription.\nWhat usage of DataStax Software is permitted? What is prohibited?\nIf you have a DataStax Enterprise license you may:\ninstall and use DataStax Enterprise inside your organization\ninstall and use DataStax Enterprise on external servers that are hosted on your behalf (e.g. in AWS)\ntake advantage of the free \u2018bursting use\u2019 benefit, enabling you to increase your DataStax Enterprise usage by 20% for up to two months in each 12 month period\nspin up dedicated DataStax Enterprise clusters to store OpsCenter storage metrics at no additional cost\nYou may not:\nhave DSE clusters containing different types of DSE Subscription (i.e. all Nodes in each Cluster must be uniformly licensed to use the same level of DSE Subscription)\nincorporate DataStax Enterprise within a new software application that you distribute to other organizations\nuse DataStax Enterprise to provide a database-as-a-service product made available to other organizations\npurchase DataStax Enterprise on behalf of someone else\nThe above examples are indicative and for guidance purposes only \u2013 please refer to the full DataStax licence terms available \nhere\n.\nHow can I verify that I have the right number of subscriptions?\nCustomers that have purchased DataStax Enterprise will receive a welcome guide and on-boarding documentation that detail how many Nodes of subscriptions they have purchased, along with any applicable restrictions.\nYou are responsible for ensuring that your usage of DataStax Enterprise complies with the quantity and type of licenses you have purchased. If you think you may exceed or have exceeded your licensed usage, you should take one of the following steps:\nidentify any (additional) Nodes running DataStax Software and inform your DataStax relationship manager\nif this is not possible, request assistance from DataStax to enable you to run scanning tools to identify any (additional) Nodes running DataStax Enterprise within your organization.\nCustomers that are using free DataStax Software under the No-Fee Licence terms must comply with the relevant licence terms (please refer to question 4).\nPlease note that DataStax reserves the right to verify compliance, including by conducting audits on randomly selected users from time to time.\nCan I transfer subscriptions within my organization?\nAssignment of subscriptions to a member of the same corporate family is generally permitted on request (note that the assignor will have to delete the software). You should refer to the \nEnterprise Terms\n for more detail.\nWhat services, training and support does DataStax provide?\nServices.\n DataStax provides a range of services designed to help our customers be successful by accelerating adoption of the DataStax platform and features. For further information on the services we offer, see \nhere\n. Please note that DataStax does not provide custom application/code development.\nTraining.\n DataStax provides \ntraining\n for users of DataStax Enterprise, and Apache Cassandra at all levels. We offer online training via DataStax Academy and private training courses. For the current schedule of DataStax training and other events (including Cassandra events) see \nhere\n.\nSupport.\n All paid DataStax Software subscription customers benefit from expert DataStax Support. Support is provided remotely by DataStax experts based in the USA, UK, and Australia. Please note that the DataStax Support team does not have access to the data stored within customer\u2019s DataStax Enterprise databases other than data specifically provided by the customer. Customers should avoid including any personal, sensitive, or restrictive data in connection with any support requests.\nPremium Support.\n In addition to standard support, DataStax also offers DataStax Enterprise customers a premium support offering. More information is available \nhere\n.\nSupport for Apache Cassandra.\n DataStax also offers support for Apache Cassandra users. For more information, see the DataStax Luna \npage\n.\nWhat happens at the end of my subscription?\nDataStax will contact you to discuss the renewal of your subscription.\nIf your subscription is not renewed then you must delete the software at the end of the subscription period.\nWhat terms and conditions apply to usage of DataStax Software?\nDataStax Enterprise is made available under the DataStax \nEnterprise Terms\n. The Enterprise Terms also include:\nthe \nSupport Policy\n \u2013 which describes how support is provided; and\nthe \nService Terms\n \u2013 which describe how consultancy and training are provided, and describe the service packages offered by DataStax.\nWhat license terms apply to these DataStax software products: OpsCenter; DevCenter; Studio; Drivers?\nOpsCenter:\n OpsCenter is licensed as part of DataStax Enterprise, but it can also be downloaded and installed separately. When it is not used as part of a DataStax Enterprise subscription the applicable license terms can be found \nhere\n.\nDevCenter:\n DevCenter is licensed as part of DataStax Enterprise, but it can also be downloaded and installed separately. The license terms for DevCenter are part of our Enterprise Terms, and can be found \nhere\n.\nStudio:\n Studio is licensed as part of DataStax Enterprise, but it can also be downloaded and installed separately. When it is not used as part of a DataStax Enterprise subscription the applicable license terms can be found \nhere\n.\nDrivers:\n DataStax includes drivers as part of DataStax Enterprise, but many can also be downloaded and installed separately. For full information please refer to our drivers page \nhere\n.\nWhat is the US export classification of DataStax Enterprise?\nAs of July 6 2016 the US export classification of DataStax Enterprise is as set out below. The corresponding CCATS reference is G166668.\nProduct\nECCN\nLicense Exception\nAuthorized For Export (See Definitions List Below)\nEncryption Reporting Requirement\nDataStax Enterprise\n5D992.c\nNo License Required (\u201cNLR\u201d)\nAll countries, except Embargoed Countries, and the Crimea Region of Ukraine\nAnnual\nDataStax OpsCenter\n5D992.c\nNo License Required (\u201cNLR\u201d)\nAll countries, except Embargoed Countries, and the Crimea Region of Ukraine\nAnnual\nDataStax DevCenter\n5D992.c\nNo License Required (\u201cNLR\u201d)\nAll countries, except Embargoed Countries, and the Crimea Region of Ukraine\nAnnual\nDataStax Drivers and Utilities\n5D992.c\nNo License Required (\u201cNLR\u201d)\nAll countries, except Embargoed Countries, and the Crimea Region of Ukraine\nNone\nDataStax Source Code\n5D002.c1\nENC/(b)(2)\nAll countries, except: \u2013 Government End Users of Non-Supp3 Countries \u2013 Embargoed countries and the Crimea Region of Ukraine\nSemi-Annual\nDataStax Encryption Technology\n5E002(a)\nENC/(b)(2)\nAll countries, except: \u2013 Government End Users of non-Supp3 Countries \u2013 End Users in D:1 Countries (unless HQ\u2019ed in a Supp3 Country) \u2013 Embargoed Countries and the Crimea Region of Ukraine\nSemi-Annual\nIs DataStax software covered by patents?\nYes, the technology within DataStax\u2019s software is covered by patents (and pending patents).\nWhat third party software is contained in DataStax Enterprise?\nDataStax Enterprise incorporates open source components and other third party software (including \nApache Cassandra\u2122\n) that are scanned and approved in accordance with the DataStax Software Development Security Program (available \nhere\n). For a list of the included software including the applicable licenses please see \nhere\n.\nWhere is the documentation for DataStax Enterprise?\nDocumentation for DataStax Enterprise can be found \nhere\n. You will find document sets that include release notes, installation and configuration guides, and an overview of the latest features.\nDoes DataStax operate an anti-piracy program?\nDataStax audits the usage of its Software from time to time. Please refer to question 11 for further information.\nWhat is the relationship between DataStax and Apache Cassandra?\nApache Cassandra\u2122 is an operational database that is an \nopen source project\n. Cassandra has been an Apache Software Foundation top level project since \nFebruary 2010\n.\nDataStax takes the software developed by the community within the Apache Software Foundation, enhances it, does extensive quality assurance on it and adds enterprise features (e.g. Advanced security) before delivering it as a finished package (\nDataStax Enterprise\n) to our customers.\nWho can I contact with other questions?\nPlease contact us using the information available \nhere\n."}, {"url": "https://docs.datastax.com/en/mission-control/docs/overview/faq.html", "title": "Frequently Asked Questions (FAQ)s :: DataStax Project Mission Control", "content": "Frequently Asked Questions (FAQ)s\nContents\nWhat is DataStax Mission Control?\nWhat are the main components of DataStax Mission Control?\nWho is the target audience?\nWhat is the pricing model?\nIs there support available?\nWhat is a Control Plane Cluster?\nWhat is a Data Plane Cluster?\nWhat is a Kubernetes Cluster?\nWhat is a DataStax Enterprise Cluster?\nWhat is the mapping of Kubernetes terms to DataStax Enterprise / Apache Cassandra Terms?\nDataStax Mission Control is current in Private Preview. It is subject to the beta agreement executed between you and DataStax. DataStax Mission Control is not intended for production use, has not been certified for production workloads, and might contain bugs and other functional issues. There is no guarantee that DataStax Mission Control will ever become generally available. DataStax Mission Control is provided on an \u201cAS IS\u201d basis, without warranty or indemnity of any kind.\nIf you are interested in trying out DataStax Mission Control please contact your DataStax account team.\nWhat is DataStax Mission Control?\nDataStax Mission Control provides everything needed to run DSE clusters of any scale with reduced complexity and integrations with centralized services within the enterprise.\nDataStax Mission Control manages the entire operational surface of DataStax Enterprise, DSE, across a diverse array of hosting options, from self-managed bare-metal to cloud-provided virtual machines. DataStax Mission Control provides always-on, automated operations of DSE clusters based on expertise running DataStax Astra. It assists DataStax customers with their own on-premises deployments. It vastly simplifies lifecycle management, observability, best practice configuration, and advanced operations.\nWhat are the main components of DataStax Mission Control?\nLifecycle Management\nSimplify deploying and configuring DataStax Enterprise (DSE) clusters in Kubernetes.\nObservability\nFollow logs and metrics and use monitoring tools to track the operations of your DSE system.\nHealth & Best Practices\nKubernetes uses kubelet to probe the need to restart a container, while deployments use readiness probes to check a pod\u2019s ability to receive traffic.\nSecurity\nSecure the build and restore processes from nodes to datacenters from the cloud.\nAdvanced Operations\nPerform node repairs, compaction, streaming, and backup & restore.\nWho is the target audience?\nExisting and new DataStax Enterprise (DSE) users.\nBrowser-Based User Interface\nUsers connect to a web service running within the DataStax Mission Control environment. From here they are presented with an Astra-like experience for deploying and managing services running within their infrastructure. Based on user permissions multiple views are available for various parts of the system including connection and health information for developers, advanced observability and operations controls for database administrators, and hardware capacity and usage for infrastructure engineers.\nKubernetes API Endpoints\nAny task that can be run within the DataStax Mission Control user interface is also available via Kubernetes APIs and Custom Resource Definitions (CRD). This allows any Kubernetes client or enabled project to automate and interface with DataStax Mission Control. From GitOps workflows with Flux and Argo to template-based Helm charts, DataStax Mission Control provides extreme flexibility for integration with existing systems.\nWhat is the pricing model?\nPricing for DataStax Mission Control is included in the DSE license and all existing DSE customers will get a free download to replace OpsCenter.\nIs there support available?\nYes. Contact your account team for a license file and download links if the welcome email is inaccessible. The links include a method to provide feedback to DataStax and ask questions.\nWhat is a \nControl Plane\n Cluster?\nA \nControl Plane\n is the management layer that establishes and controls all key operations related to management of database components, provides access to its functions via APIs, and handles the ongoing lifecycle of and health of the whole system.\nWhat is a \nData Plane\n Cluster?\nWhile the \nControl Plane\n establishes policies for all key operations, the \nData Plane\n is the remaining infrastructure architecture that carries out the operational policies. In Kubernetes, worker nodes, along with their pods and containers, comprise the \nData Plane\n. A Kubelet is a small application that runs on each node in the cluster and executes actions.\nWhat is a Kubernetes Cluster?\nIt is a set of nodes that run containerized applications. An application is packaged into a container along with its dependencies and necessary services.\nIn Kubernetes, a pod is a wrapper around a single container, and that pod is what Kubernetes manages.\nWhat is a DataStax Enterprise Cluster?\nA cluster comprised of one or more nodes that exists as an always-on data platform for cloud applications that is powered by Apache Cassandra\nTM\n. It uses the OpsCenter Web application to monitor and run administrative operations on the nodes. DataStax Mission Control is intended to supersede OpsCenter functionality as Kubernetes clusters move, in part or wholly, to the Cloud.\nWhat is the mapping of Kubernetes terms to DataStax Enterprise / Apache Cassandra Terms?\nTable 1. Mapping of Terms\nTerm\nDescription\nKubernetes\nApache Cassandra \nTM\nContainer\na way to package an application along with its libraries and its dependencies\n\u221a\n\u221a\nDocker\nmost popular contnainer runtime software running containerization of applications\n\u221a\n\u221a\nManifest\ntypically either a JSON or YAML file that specifies a desired state of a Kubernetes API object such as a pod, deployment, or service.\n\u221a\n\u221a\nNamespace\nEquivalent with virtual cluster, providing a way to divide a physical cluster into multiple virtual clusters. It is also a way to provide organization to objects in a cluster.\n\u221a\n\u221a\nNode\neither physical or virtual machines in the cluster from which applications run\n\u221a controlled by the Kubernetes \nControl Plane\n\u221a\nLogging\nLogs are the list of events that are recorded by a cluster or application. They help us understand how data is flowing through applications as well as spot when and where errors occur.\n\u221a In Kubernetes your application should output logs to \nstdout\n and \nstderr\n.\n\u221a\nProxy\nA server that acts as an intermediary for a remote service, taking client requests and copying client data to the server, and sending the server replies to the client.\n\u221a kube-proxy is the network proxy that performs Kubernetes networking services in and out of the cluster\n\u221a\nRBAC\nRole-Based Access Control uses roles which grant the required level of access to sets of users in the cluster.\n\u221a Managed through the Kubernetes API\n\u221a\nSecret\nA kuberenetes object that stores sensitive information such as passwords, API keys, and ssh keys so that pods can use that information without the data being shown. Sensitive data is exposed to containers either as a file in a volume mount or through environment variables.\n\u221a\n\u221a\nRelease notes\nGet support\nRate this article"}, {"url": "https://docs.datastax.com/en/astra-classic/docs/migrate/faqs.html", "title": "Frequently Asked Questions :: DataStax Astra DB Classic Documentation", "content": "Frequently Asked Questions\nContents\nWhat is meant by Zero Downtime Migration?\nCan you illustrate the overall workflow and phases of a migration?\nDo you offer an interactive self-guided lab to help me learn about ZDM migrations at my own pace?\nWhat components are provided with ZDM?\nWhat exactly is ZDM Proxy?\nWhat are the benefits of Zero Downtime Migration and its use cases?\nWhich releases of Cassandra or DSE are supported for migrations?\nDoes ZDM migrate clusters?\nWhat challenges does ZDM solve?\nWhat is the pricing model?\nIs there support available if I have questions or issues during our migration?\nWhere are the public GitHub repos?\nDoes ZDM Proxy support Transport Layer Security (TLS)?\nHow does ZDM Proxy handle Lightweight Transactions (LWTs)?\nCan ZDM Proxy be deployed as a sidecar?\nWhat are the benefits of using a cloud-native database?\nIf you\u2019re new to the DataStax Zero Downtime Migration features, these FAQs are for you.\nWhat is meant by Zero Downtime Migration?\nZero Downtime Migration (ZDM) means the ability for you to reliably migrate client applications and data between CQL clusters with no interruption of service.\nZDM lets you accomplish migrations without the need to change your client application code, and with only minimal configuration changes. While in some cases you may need to make some minor changes at the client application level, these changes will be minimal and non-invasive, especially if your client application uses an externalized property configuration for contact points.\nThe suite of ZDM tools enables you to migrate the real-time activity generated by your client applications, as well as transfer your existing data, always with a simple rollback strategy that does not require any downtime.\nIt is important to note that the Zero Downtime Migration process requires you to be able to perform rolling restarts of your client applications during the migration.\nIn the context of migrating between clusters (client applications and data), the examples in this guide sometimes refer to the migration to our cloud-native database environment, DataStax Astra DB.\nHowever, it is important to emphasize that the ZDM Proxy can be freely used to migrate without downtime between any combination of CQL clusters of any type. In addition to Astra DB, examples include Apache Cassandra\u00ae or DataStax Enterprise (DSE).\nCan you illustrate the overall workflow and phases of a migration?\nSee the diagrams of the ZDM \nmigration phases\n.\nDo you offer an interactive self-guided lab to help me learn about ZDM migrations at my own pace?\nYes! Here\u2019s a fun way to learn.\nNow that you\u2019ve seen a conceptual overview of the process, let\u2019s put what you learned into practice.\nWe\u2019ve built a complementary learning resource that is a companion to this comprehensive ZDM documentation. It\u2019s the Zero Downtime Migration Interactive Lab, available for you here:\nhttps://www.datastax.com/dev/zdm\nAll you need is a browser and a GitHub account.\nThere\u2019s nothing to install for the lab, which opens in a pre-configured GitPod environment.\nYou\u2019ll learn about a full migration without leaving your browser!\nTo run the lab, all major browsers are supported, except Safari. For more, see the lab\u2019s \nstart page\n.\nWe encourage you to explore this free hands-on interactive lab from DataStax Academy. It\u2019s an excellent, detailed view of the migration process. The lab describes and demonstrates all the steps and automation performed to prepare for, and complete, a migration from any Cassandra/DSE/Astra DB database to another Cassandra/DSE/Astra DB database across clusters.\nThe interactive lab spans the pre-migration prerequisites and each of the five key migration phases.\nWhat components are provided with ZDM?\nDataStax Zero Downtime Migration includes the following:\nZDM Proxy\n is a service that operates between \nOrigin\n, which is your existing cluster, and \nTarget\n, which is the cluster to which you are migrating.\nZDM Proxy Automation\n is an Ansible-based tool that allows you to deploy and manage the ZDM Proxy instances and associated monitoring stack. To simplify its setup, the suite includes the ZDM Utility. This interactive utility creates a Docker container acting as the Ansible Control Host. The Ansible playbooks constitute the ZDM Proxy Automation.\nCassandra Data Migrator\n is designed to:\nConnect to your clusters and compare the data between Origin and Target\nReport differences in a detailed log file\nReconcile any missing records and fix any data inconsistencies between Origin and Target, if you enable \nautocorrect\n in a configuration file\nDSBulk Migrator\n is provided to migrate smaller amounts of data from Origin to Target\nWell-defined steps in this migration documentation, organized as a sequence of phases.\nWhat exactly is ZDM Proxy?\nZDM Proxy is a component designed to seamlessly handle the real-time client application activity while a migration is in progress. See \nhere\n for an overview.\nWhat are the benefits of Zero Downtime Migration and its use cases?\nMigrating client applications between clusters is a need that arises in many scenarios. For example, you may want to:\nMove to a cloud-native, managed service such as Astra DB.\nMigrate your client application to a brand new cluster, on a more recent version and perhaps on new infrastructure, or even a different CQL database entirely, without intermediate upgrade steps and ensuring that you always have an easy way to rollback in case of issues.\nSeparate out a client application from a shared cluster to a dedicated one.\nConsolidate client applications, currently running on separate clusters, into fewer clusters or even a single one.\nBottom line: You want to migrate your critical database infrastructure without risk or concern that your users' experiences will be affected.\nWhich releases of Cassandra or DSE are supported for migrations?\nOverall, you can use ZDM Proxy to migrate:\nFrom:\n Any Cassandra 2.1.6 or higher release, or from any DSE 4.7.1 or higher release\nTo:\n Any equivalent or higher release of Cassandra, or to any equivalent or higher release of DSE, or to Astra DB\nThere are many reasons why you may decide to migrate your data and client applications from one cluster to another, for example:\nMoving to a different type of CQL database, for example an on-demand cloud-based proposition such as Astra DB.\nUpgrading a cluster to a newer version, or newer infrastructure, in as little as one step while leaving your existing cluster untouched throughout the process.\nMoving one or more client applications out of a shared cluster and onto a dedicated one, in order to manage and configure each cluster independently.\nConsolidating client applications, which may be currently running on separate clusters, onto a shared one in order to reduce overall database footprint and maintenance overhead.\nHere are just a few examples of migration scenarios that are supported when moving from one type of CQL-based database to another:\nFrom an existing self-managed Cassandra or DSE cluster to cloud-native Astra DB. For example:\nCassandra 2.1.6+, 3.11.x, 4.0.x, or 4.1.x to Astra DB\nDSE 4.7.1+, 4.8.x, 5.1.x, 6.7.x or 6.8.x to Astra DB\nFrom an existing Cassandra or DSE cluster to another Cassandra or DSE cluster. For example:\nCassandra 2.1.6+ or 3.11.x to Cassandra 4.0.x or 4.1.x\nDSE 4.7.1+, 4.8.x, 5.1.x or 6.7.x to DSE 6.8.x\nCassandra 2.1.6+, 3.11.x, 4.0.x, or 4.1.x to DSE 6.8.x\nDSE 4.7.1+ or 4.8.x to Cassandra 4.0.x or 4.1.x\nFrom \nAstra DB Classic\n to \nAstra DB Serverless\nFrom any CQL-based database type/version to the equivalent CQL-based database type/version.\nDoes ZDM migrate clusters?\nZDM does not migrate clusters. With ZDM, we are migrating data and applications \nbetween clusters\n. At the end of the migration, your application will be running on your new cluster, which will have been populated with all the relevant data.\nWhat challenges does ZDM solve?\nBefore DataStax Zero Downtime Migration was available, migrating client applications between clusters involved granular and intrusive client application code changes, extensive migration preparation, and a window of downtime to the client application\u2019s end users.\nZDM allows you to leverage mature migration tools that have been used with large scale enterprises and applications to make migrations easy and transparent to end users.\nWhat is the pricing model?\nThe suite of Zero Downtime Migration tools from DataStax is free and open-sourced.\nIs there support available if I have questions or issues during our migration?\nIf needed, ZDM Proxy and related software tools in the migration suite include technical assistance by \nDataStax Support\n, if you have a DataStax Luna service contract with DataStax.Luna\n is a subscription to the Apache Cassandra support and expertise at DataStax.\nFor any observed problems with the ZDM Proxy, submit a \nGitHub Issue\n in the ZDM Proxy GitHub repo.\nAdditional examples serve as templates, from which you can learn about migrations. DataStax does not assume responsibility for making the templates work for specific use cases.\nWhere are the public GitHub repos?\nAll the DataStax Zero Downtime Migration GitHub repos are public and open source. You are welcome to read the code and submit feedback via GitHub Issues per repo. In addition to sending feedback, you may submit Pull Requests (PRs) for potential inclusion. To submit PRs, you must for first agree to the \nDataStax Contribution License Agreement (CLA)\n.\nZDM Proxy\n repo for ZDM Proxy.\nZDM Proxy Automation\n repo for the Ansible-based ZDM Proxy Automation, which includes the ZDM Utility.\ncassandra-data-migrator\n repo for the tool that supports migrating larger data quantities as well as detailed verifications and reconciliation options.\ndsbulk-migrator\n repo for the tool that allows simple data migrations without validation and reconciliation capabilities.\nDoes ZDM Proxy support Transport Layer Security (TLS)?\nYes, and here\u2019s a summary:\nFor application-to-proxy TLS, the application is the TLS client and the ZDM Proxy is the TLS server. One-way TLS and Mutual TLS are both supported.\nFor proxy-to-cluster TLS, the ZDM Proxy acts as the TLS client and the cluster as the TLS server. One-way TLS and Mutual TLS are both supported.\nWhen the ZDM Proxy connects to Astra DB clusters, it always implicitly uses Mutual TLS. This is done through the Secure Connect Bundle (SCB) and does not require any extra configuration.\nFor TLS details, see \nConfigure Transport Layer Security (TLS)\n.\nHow does ZDM Proxy handle Lightweight Transactions (LWTs)?\nZDM Proxy handles LWTs as write operations. The proxy sends the LWT to Origin and Target clusters concurrently, and waits for a response from both. ZDM Proxy will return a \nsuccess\n status to the client if both Origin and Target send successful acknowledgements, or otherwise will return a \nfailure\n status if one or both do not return an acknowledgement.\nWhat sets LWTs apart from regular writes is that they are conditional. For important details, including the client context for a returned \napplied\n flag, see \nLightweight Transactions and the \napplied\n flag\n.\nCan ZDM Proxy be deployed as a sidecar?\nZDM Proxy should not be deployed as a sidecar.\nZDM Proxy was designed to mimic a Cassandra cluster. For this reason, we recommend deploying multiple ZDM Proxy instances, each running on a dedicated machine, instance, or VM.\nFor best performance, this deployment should be close to the client applications (ideally on the same local network) but not co-deployed on the same machines as the client applications. This way, each client application instance can connect to all ZDM Proxy instances, just as it would connect to all nodes in a Cassandra cluster (or datacenter).\nThis deployment model gives maximum resilience and failure tolerance guarantees and allows the client application driver to continue using the same load balancing and retry mechanisms that it would normally use.\nConversely, deploying a single ZDM Proxy instance would undermine this resilience mechanism and create a single point of failure, which could affect the client applications if one or more nodes of the underlying clusters (Origin or Target) go offline. In a sidecar deployment, each client application instance would be connecting to a single ZDM Proxy instance, and would therefore be exposed to this risk.\nFor more information, see \nChoosing where to deploy the proxy\n.\nWhat are the benefits of using a cloud-native database?\nWhen moving your client applications and data from on-premise Cassandra Query Language (CQL) based data stores (Cassandra or DSE) to a cloud-native database (CNDB) like Astra DB, it\u2019s important to acknowledge the fundamental differences ahead. With on-premise infrastructure, of course, you have total control of the datacenter\u2019s physical infrastructure, software configurations, and your custom procedures. At the same time, with on-premise clusters you take on the cost of infrastructure resources, maintenance, operations, and personnel.\nRanging from large enterprises to small teams, IT managers, operators, and developers are realizing that the Total Cost of Ownership (TCO) with cloud solutions is much lower than continuing to run on-prem physical data centers.\nA CNDB like Astra DB is a different environment. Running on proven cloud providers like AWS, Google Cloud, and Azure, Astra DB greatly reduces complexity and increases convenience by surfacing a subset of configurable settings, providing a well-designed UI known as Astra Portal, plus a set of APIs and commands to interact with your Astra DB organizations and databases.\nComponents\nPreliminary steps\nRate this article"}, {"url": "https://docs.datastax.com/en/astra-serverless/docs/astra-faq.html", "title": "Astra DB FAQs :: DataStax Astra DB Serverless Documentation", "content": "Astra DB FAQs\nContents\nWhat tools does Astra DB include?\nHow is Astra DB different from DSE?\nWhich version of Apache Cassandra\u2122 is compatible with Astra DB?\nWhat is a Vector Search?\nWhich cloud vendors does Astra DB support?\nHow do I connect to Astra DB databases?\nWhat does DataStax consider \u201crecent\u201d when listing Recent Resources?\nWhich DataStax drivers work with Astra DB?\nHow do I manage billing for Astra DB?\nWhat happens when my monthly credit runs out?\nWhere do I find the Database ID and Organization ID?\nHow do I navigate to a CQL shell?\nI am trying to create a keyspace in the CQL shell, and I am running into an error. How do I fix this?\nHow do I delete a keyspace?\nShould I use materialized views?\nShould I use Storage Attached Indexing?\nWhat is the practical limit for the amount of data stored in an Astra DB database?\nHow can I fix a \u201cMissing Correct Permission Error\u201d?\nI created my table in Astra Portal. Now, when I run a select statement I get an error. What do I do?\nI need help with the Workshop Project. Where do I go for help?\nWhere can I find Astra DB project tutorials?\nHow do I open a support ticket?\nCan I change the keyspace name?\nWhat if my CSV file is too large for the Data Loader?\nCan I use cfstats or nodetool?\nCan I use strict security settings, such as an ad blocker, with Astra DB?\nWhere can I find help?\nWhich cloud regions are currently available for serverless databases?\nIs VPC peering possible for serverless databases?\nHow is pricing calculated for serverless databases?\nHow do I create a token?\nHow do I know which role to choose for my token?\nWhere can I find the Client ID, Client Secret, and Token?\nWhat values do I pass to username and password when connecting to Astra Drivers?\nCan I restore my data if my database is suspended?\nWhich ports are used by serverless databases?\nWhat can I update with my avatar on Astra?\nFrequently asked questions about DataStax Astra DB.\nWhat tools does Astra DB include?\nStargate APIs:\n The Stargate APIs are available as the \nAstra DB APIs\n. Create an \napplication token\n and begin interacting with your database.\nCQLSH:\n Each Astra DB database includes an integrated CQL shell for interactive Cassandra Query Language (CQL) commands.\nUse CQLSH to create tables, insert data, and modify data in your keyspace.\nAstra DB Data Loader\n: From the console, use \nData Loader\n to load the following into your Astra DB database:\nCSV data\nExample datasets\nExtracted DynamoDB data that resides in an Amazon S3 bucket\nDataStax Bulk Loader:\n Use DataStax Bulk Loader (dsbulk) to \nload and unload\n data into your Astra DB database.\nYou can load data from a variety of sources and unload data from DSE database for transfer, use, or storage of data.\nMetrics dashboards:\n Use the integrated metrics dashboards to \nview health metrics\n that include information regarding latency and throughput to the database.\nThese metrics provide insights into the performance of the database and how workloads are distributed.\nIntegrations\n:\n Connect a third-party tool of your choice to Astra DB for data management, machine learning, analytics, and more.\nHow is Astra DB different from DSE?\nWhile you can run DataStax Enterprise (DSE) both on premises and in the cloud, Astra DB allows you to create databases in minutes, with reduced complexity, fewer operational concerns, and an approachable entry point for developing your applications.\nAstra DB is a true database-as-a-service (DBaaS) offering that provides simplified cluster management with flexible pricing.\nDSE also includes Advanced Workloads (DSE Search, Analytics, and Graph), which are not available in Astra DB. Read \nthis whitepaper\n for an in-depth understanding of the cloud-native architecture of Astra DB.\nWhich version of Apache Cassandra\u2122 is compatible with Astra DB?\nApache Cassandra version 3.11 is compatible with Astra DB databases.\nWhat is a Vector Search?\nA Vector Search reviews data on a database to determine the distance between the vectors. The closer they are, the more similar the data. The more the distance, the less similar the data.\nWhich cloud vendors does Astra DB support?\nAstra DB supports creating databases on Amazon Web Services (AWS), Google Cloud, and Microsoft Azure.\nHow do I connect to Astra DB databases?\nOptions\nDescription\nI don\u2019t want to create or manage a \nschema\n. Just let me get started.\nUse schemaless JSON Documents with the \nDocument API\n.\nI want to start using my database now with APIs.\nUse the \nREST API\n or \nGraphQL API\n to begin interacting with your database and self manage the \nschema\n.\nI have an application and want to use the DataStax drivers.\nInitialize one of the \nDataStax drivers\n to manage database connections for your application.\nI know CQL and want to connect quickly to use my database.\nUse the integrated CQL shell or the \nstandalone CQLSH tool\n to interact with your database using CQL.\nWhat does DataStax consider \u201crecent\u201d when listing Recent Resources?\nRecent Resources includes the five most recent databases and streaming tenants resources used by someone in your organization.\nWhich DataStax drivers work with Astra DB?\nEach of the supported DSE and Apache Cassandra drivers is compatible.\nChoose the DataStax driver for your preferred language and configure it to connect to your Astra DB database.\nSee \nConnecting to Astra DB databases using DataStax drivers\n.\nHow do I manage billing for Astra DB?\nAstra DB handles billing through an integration with Stripe, and displays all related billing information on the \nBilling Summary\n page.\nEnter a credit card number and associated billing information to begin creating Astra DB databases.\nSee \nBilling\n for more information.\nWhat happens when my monthly credit runs out?\nYou are prompted to add a credit card when your credit limit is running low. You also receive this notification by email and in Astra Portal.\nIf your application uses all $25 in credit, the database temporarily pauses until you add a credit card to re-start the database. Alternatively, you can wait until the next month for the $25 credit to reload.\nUnused credits do not rollover. A $25 credit is added to your organization on the 1st of every month.\nView your credit balance in the Dashboard:\nWhere do I find the Database ID and Organization ID?\nWhile logged in to Astra, view the URL:\nastra.datastax.com/\n{organization_id}\nNavigate to your database dashboard:\nastra.datastax.com/org/{organization_id}/database/\n{database_id}\nAlternatively, you can view your Database ID in the Databases listing on the Astra DB dashboard. Use the clipboard icon to copy the ID.\nIf you need the Datacenter ID (or IDs, if you have a multi-region database), click the database name to see its details and to locate the ID of each datacenter next to its name in the list.\nHow do I navigate to a CQL shell?\nSelect your current database on the left panel. Navigate to the CQL console tab.\nHere, you can run CQL commands. These are a few to test out\nUse ;\n navigates to your keyspace\nDesc tables;\n describes the tables within your current keyspaces\nSelect * from limit 10;\n selects all columns from table, 10 rows\nI am trying to create a keyspace in the CQL shell, and I am running into an error. How do I fix this?\nKeyspaces need to be created in Astra Portal. Navigate to the \nDashboard overview\n and select \nAdd Keyspace\n.\nHow do I delete a keyspace?\nWe do not allow users to delete the last keyspace to avoid accidental deletion of data. This is the error you will see in the CQL shell:\ntoken@cqlsh\n>\n drop keyspace test_db\n;Unauthorized: Error from server: \ncode\n=\n2100\n \n[\nUnauthorized\n]\n \nmessage\n=\n\"Missing correct permission on test_db.\"\nshell\nCopied!\nshell\nCopied!\nTo avoid any storage costs associated with the keyspace, you can drop tables from the keyspace by running:\ndrop table \n<\nkeyspace_name\n>\nshell\nCopied!\nshell\nCopied!\nShould I use materialized views?\nMaterialized views are not available on Astra Serverless.\nShould I use Storage Attached Indexing?\nStorage Attached Indexing is a great feature of our Astra Serverless. Storage-Attached Indexing (SAI) is a highly-scalable, globally-distributed index for Apache Cassandra\u00ae that is available for DataStax Astra and DataStax Enterprise (DSE) databases.\nExplore this deep dive on \nStorage Attached Indexing\n.\nWhat is the practical limit for the amount of data stored in an Astra DB database?\nThere is no practical limit to the number of bytes an Astra DB database can store.\nHow can I fix a \u201cMissing Correct Permission Error\u201d?\nA permission error occurs for two reasons.\nThe first reason is that Astra places certain limitations on the user with \nAstra DB database guardrails and limits\n.\nThe second reason is that you don\u2019t have the appropriate permissions on your account. Learn more information about \nuser permissions\n.\ntoken@cqlsh\n>\n \nselect\n * from test_db.data\nUnauthorized: Error from server: \ncode\n=\n2100\n \n[\nUnauthorized\n]\n \nmessage\n=\n\"No SELECT permission on \"\nshell\nCopied!\nshell\nCopied!\nI created my table in Astra Portal. Now, when I run a select statement I get an error. What do I do?\nThis is the error: \nInvalid Request: Error from server: code=2200 [Invalid query] message=\"table {table_name} does not exist\"\nWhen you upload a table through Astra Portal, any table name, keyspace name, or column name with case-sensitive characters can be accessed by using double quotes.\nFor example, see the select statement below:\ntoken@cqlsh:test\n>\n desc tables\n;token@cqlsh:test\n>\n \nselect\n * from \n\"Case_Sensitive\"\n;\nshell\nCopied!\nshell\nCopied!\nI need help with the Workshop Project. Where do I go for help?\nCheck the YouTube description for the Workshop you attended. Use the Discord group or the YouTube chat to ask questions.\nAdditional links:\nDataStaxDevs Discord: \nhttps://bit.ly/cassandra-workshop\nBackup Stream (alternative to YouTube): \nhttps://www.twitch.tv/datastaxdevs\nStackOverflow for dev-related questions: \nFollow the \ncassandra\n tag\nDBA StackExchange for admin/ops-related questions: \nFollow the \ncassandra\n tag\nEvent Alerts: \nSubscribe for news\nWhere can I find Astra DB project tutorials?\nYou can find sample projects in our Sample App Gallery located on the left hand side of your Astra dashboard or the \nGithub Repository\n.\nAdditionally, you can follow along to video tutorials on our DataStax Developers YouTube page: \nhttps://www.youtube.com/c/DataStaxDevs\nHow do I open a support ticket?\nOpen your Astra Portal, select the \nHelp Center\n. A panel for Health Center appears on the right side of the screen.\nOn the bottom right hand corner, select \nSubmit a Ticket\n. You are redirected to the DataStax Support portal.\nSelect \nSubmit a Request\n at the top. You must complete the fields marked with an asterisk. When finished, select \nCreate\n.\nFor more, see \nGet support for Astra DB\n.\nCan I change the keyspace name?\nNo, you cannot alter the keyspace name. You can create an entire new keyspace with a different name.\nWhat if my CSV file is too large for the Data Loader?\nIf the file is over 40 MB, you can upload data with our DSBulk Loader.\nFor more, see how to get started with \nDSBulk Loader\n.\nCan I use \ncfstats\n or \nnodetool\n?\nNo, Astra is a zero-operations database-as-a-service (DBaaS) offering. Some operational Cassandra tools are not relevant. We offer access to a limited set of database metrics. You can view them in the \nHealth\n tab in Astra Portal.\nFor more, see the \nAstra DB database guardrails and limits\n.\nCan I use strict security settings, such as an ad blocker, with Astra DB?\nIf you are using strict security settings in your web browser, you might not see everything in Astra Portal, such as messages and certainly functionalities that simplify your experience.\nWhere can I find help?\nThere are several places to get help:\nUse this product documentation to get answers about the capabilities of your database and information about how to use your databases.\nPractice using the Astra DB database with our \nsample apps\n and \nintegrations\n.\nWithin the Astra Portal Home, there are guides to help you get started easier, faster, and more efficiently. These guides include step-by-step instructions in the Astra Portal, along with prerequisites and procedures.\nTo send us feedback or get additional help, use the chat feature in Astra Portal.\nWhich cloud regions are currently available for serverless databases?\nAstra DB supports single-region and multi-region deployments. The \navailable serverless regions\n vary based on the selected cloud provider and more are added over time.\nWithin a region, data is replicated across three availability zones to ensure high availability.\nTo discuss region needs, use the chat icon in Astra Portal to talk with DataStax for guidance.\nIs VPC peering possible for serverless databases?\nVPC peering is not supported for serverless databases at this time. \nPrivate endpoints\n are available.\nHow is pricing calculated for serverless databases?\nPricing for DataStax Astra serverless databases is based on units of measure, cloud provider, and region. See \nPricing\n for more information.\nHow do I create a token?\nSelect your \nOrganization settings\n.\nNavigate to \nToken management\n.\nSelect a role.\nCreate a token.\nFor more, see \nManaging application tokens\n.\nHow do I know which role to choose for my token?\nDefault and \ncustom roles\n allow admins to manage unique permissions for users based on your organization and database requirements.\nFor more, see \nUser permissions\n.\nWhere can I find the \nClient ID\n, \nClient Secret\n, and \nToken\n?\nAfter generating a token, you can find the \nClient ID\n, \nClient Secret\n, and \nToken\n.\nYou can download the CSV only when you generate the token. Make sure to download the CSV and save it. Store these credentials in a secure location.\nWhat values do I pass to username and password when connecting to Astra Drivers?\nIf it is suggested to pass the username and password in the connect documents, use the \nClient ID\n for the username and the \nClient Secret\n for the password:\ncredentials: \n{ username: \n\"\"\n,\n password: \n\"\"\n,\n \n}\nshell\nCopied!\nshell\nCopied!\nCan I restore my data if my database is suspended?\nYes, your data will be restored when your database becomes active again. If your database is suspended, you must add a credit card or wait for your credits to be renewed before your database will be active again.\nWhich ports are used by serverless databases?\nIf you need to make firewall exceptions, the following ports are used by Astra services:\nAPIs such as REST and GraphQL, and the Astra DB Health dashboard: 443\nCQL: 29042\nMetadata service: 29080\nWhat can I update with my avatar on Astra?\nClick your avatar to review your current account settings, update your username, and reset your password.\nRelease notes\nAstra DB Architecture FAQ\nRate this article"}, {"url": "https://docs.datastax.com/en/dse68-security/docs/secFAQ.html", "title": "DSE Advanced Security FAQs :: DataStax Enterprise 6.8 Security Guide", "content": "DSE Advanced Security FAQs\nContents\nGeneral\nAuthentication and Authorization\nRow-level Access Control (RLAC)\nEncryption\nAuditing\nQuestions and answers about DSE Advanced Security are provided in these categories:\nGeneral\nAuthentication and Authorization\nEncryption\nAuditing\nGeneral\nWhat communication protocols are used?\n \nAll communication occurs over TCP sockets and can be secured by using the standard Java Security SSL/TLS implementation in the JVM.\nAdditional application specific protocols like gossip and the CQL Binary Protocol rely on these sockets for transport, for a list of ports used by DSE see \nSecuring DataStax Enterprise ports\n.\nAuthentication and Authorization\nWhat are the restrictions to the default cassandra user?\n \nThe cassandra default account has access to all database resources.\nWhen logging in or performing an action, DSE sets the consistency level to \nQUORUM\n for this account.\nIn a production environment, using the cassandra account may negatively affect performance and encounter failures.\nDataStax recommends \nAdding a superuser login\n immediately after enabling DSE Unified Authentication with role based access control.\nHow are user permissions managed?\n \nSuperuser permissions allow creation and deletion of other users, and the ability to grant or revoke permissions.\nUse the default cassandra user only to assist with the initial setup of new users and superusers, and then disable it.\nThe DSE Role Manager determines which roles to assign to authenticated users.\nSee \nAbout Role Based Access Control\n.\nHow are user groups supported?\n \nDSE supports role management based on LDAP group membership.\nAll permissions granted to roles that reflect LDAP groups to which the user\nbelongs \u2013 directly or indirectly \u2013 are \ninherited\n.\nThe inherited permissions include login permission, scheme permissions,\nproxy execution permissions, and object permissions.\nConfigure an LDAP scheme with group lookup, set the Role Management mode option to LDAP, and create roles that match the group names and then assign permissions.\nSee \nDefining an LDAP scheme\n.\nFor efficiency, DataStax recommends using the \nmemberof\n search method for group lookup; however DataStax also supports directory search.\nHow are user-action permissions supported?\n \nDSE supports standard object permission management to assign roles specific permissions at the table and row level.\nPermissions to access all keyspaces, a named keyspace, a table, function, or MBean can be granted to a role.\nSee \nSetting up logins and users\n.\nWhat authentication mechanisms are supported?\n \nInternal: Connections provide credentials for a role that has an internally stored password. No additional configuration is required. See \nSetting up logins and users\n.\nLDAP: Connections provide LDAP credentials. DSE passes the credentials for verification to LDAP. See \nDefining an LDAP scheme\n.\nKerberos: Connections provide a Kerberos ticket. DSE is configured as a Service Principal (see \nSetting up Kerberos\n) and passes the tickets to the Key Dsitribution Service (KDS) for verification. See \nDefining a Kerberos scheme\n.\nWhat LDAP servers are supported?\n \nMicrosoft Active Directory, OpenLDAP, and Oracle Directory Server Enterprise Edition. See \nDefining an LDAP scheme\n.\nCan access be restricted using IP accesslisting and blocklisting?\n \nIn general, arbitrary client programs do not access the database.\nDatabase access by the general user population is controlled at the application layer.\nApplication node to database node access should be controlled by using conventional firewall mechanisms, such as Linux iptables.\nHowever, database administrators are an exception to allow connections from DBA hosts.\nWhat granularity of access to specific elements of data is supported?\n \nAuthorization is granted or revoked at the row level for data.\nWhat is the difference between RBAC and RLAC\n \nRole-based access control (RBAC) refers to authorization to any database resource including row-level access control (RLAC).\nRow-level access control refers to the feature that allows permissions to be granted/revoked on rows within a table by filtering a text-based partition column.\nRow-level Access Control (RLAC)\nFor details, see \nsetting up row-level permissions\n.\nHow do I restrict access to a row?\n \nEach table can have a single UTF-8 partition key column on which you build filters to grant access (separate command) to rows within the table.\nRESTRICT only sets the filtering column name:\nRESTRICT ROWS ON [.]\nUSING ;\nAfter setting the column name, use the GRANT command to configure access to rows.\nCan I unrestrict access to rows in a table with RLAC authorization?\n \nThe partition key to filter on using GRANT can be unselected from the table:\nUNRESTRICT ROWS ON [.]\nUSING ;\nUse the LIST command on the table to display all roles that have been granted permissions.\nUnrestricting the column does not grant access to all columns within the table; it invalidates existing filters.\nUsers that are granted access with a filter are unable to access any rows within the table.\nTo grant permissions to all rows, grant permission on the table to the role.\nHow do I grant permissions for rows in a table?\n \nConfigure access to rows within a table by specifying a filtering string that is applied to the partition key column selected in the \nRESTRICT\n command.\nUse case-sensitive literal text in the filter string.\nRow-level authorization applies only to rows that exactly match the filtering_data.\nYou can create as many \nRLAC\n grant variations as required by your security policies.\nTo allow access to rows within a table:\nGRANT \non '' ROWS IN .\nto ;\nUse the LIST command to display all permissions a role has on a resource.\nHow do I revoke permissions for rows in a table?\n \nRow permissions are stored based on the filtering string, to remove a permission use the REVOKE command with the exact filtering string you want to remove:\nREVOKE on '' ROWS IN .;\nLIST ALL PERMISSIONS ON TABLE shows all filters granted to roles.\nWhat happens if you run a RESTRICT command on a table that already has a restriction?\n \nTables have only a single restriction.\nRunning the \nRESTRICT\n command replaces the existing restriction.\nUse \nDESCRIBE TABLE\n to view the existing restrictions on the table.\nWhat happens if there a role has access to the keyspace/table level and I grant row access?\n \nPermission is hierarchical, if permission was also granted to the keyspace or table the user has access to all rows in the table.\nThe \nRLAC\n permissions have no affect.\nIs RLAC supported for use with DSE Graph?\n \nNo.\nAlthough permissions are shown and errors are not thrown with this statement:\nGRANT SELECT ON 'custom_key' ROWS IN graph_keyspace.graph_table to 'alice';\nPermissions are not enforced.\nRLAC is not supported for use with DSE Graph.\nGranting access on rows in a table provides access to data in all graph keyspaces.\nEncryption\nHow are encryption keys secured and managed?\n \nEncryption keys can be managed off-server or locally:\nKMIP\n (Key Management Interoperability Protocol) encryption for encryption keys stored on another server and are cached locally in the memory heap when used by DSE.\nUse \nlocal encryption\n keys and secure using Linux permissions to restrict access.\nCan the client-to-node encryption be configured as a two-way SSL?\n \nYes, although the client certificate DN is not used as a database user principal.\nClient-to-node encryption protects in-flight data from client machines to a\ndatabase cluster using SSL (Secure Sockets Layer) and establishes a secure\nchannel between the client and the coordinator node.\nHow is encryption of \nat-rest data\n supported?\n \nProtects sensitive at-rest data using a local encryption key file or remotely stored and managed Key Management Interoperability Protocol (KMIP) encryption key.\nCan encryption keys be changed for a particular table?\n \nYes, by designating transparent data encryption (TDE) on a per table basis.\nUsing encryption, your application can read and write to SSTables that use different encryption algorithms or use no encryption at all.\nUse a single \nALTER TABLE\n statement to set encryption and compression.\nWould encryption of EBS in AWS be a good replacement for using TDE, or is EBS better as a supplement to TDE (or neither)?\n \nEBS encryption is another way to encrypt the data files.\nEBS encryption ensures encryption of audit logs, system logs, and the SSTable index files, which have partition keys in plain text when using TDE.\nIn general, EBS encryption may be operationally simpler.\nPrimarily, use TDE when full-disk encryption is cost prohibitive or not feasible.\nIs encryption supported at granular data layers? For example record-level or column- or field-level?\n \nNo.\nDesignate transparent data encryption (TDE) only on a per table basis.\nAuditing\nWhich user actions and events are logged?\n \nWhen you \nconfigure audit logging\n, you can include or exclude categories of database activity such as querying or DML, see \nFiltering event categories\n.\nWhere are audit logs stored and who has access?\n \nAudit logs can be written to either file system log files using \nlogback\n, or to a database table.\nAudit events stored in database tables can be secured like any other database table using RBAC.\nFile-based audit logs are stored per-node and can be secured with standard Linux file system permissions.\nAbout DSE Advanced Security\nSecurity checklists\nRate this article"}] -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: [ 4 | "./components/**/*.{js,ts,jsx,tsx,mdx}", 5 | "./app/**/*.{js,ts,jsx,tsx,mdx}", 6 | ], 7 | future: { 8 | hoverOnlyWhenSupported: true, 9 | }, 10 | theme: { 11 | extend: { 12 | fontFamily: { 13 | sans: ["var(--font-geist-sans)"], 14 | }, 15 | screens: { 16 | origin: "800px", 17 | }, 18 | }, 19 | }, 20 | plugins: [], 21 | }; 22 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "lib": [ 4 | "dom", 5 | "dom.iterable", 6 | "esnext" 7 | ], 8 | "allowJs": true, 9 | "skipLibCheck": true, 10 | "strict": false, 11 | "noEmit": true, 12 | "incremental": true, 13 | "esModuleInterop": true, 14 | "module": "esnext", 15 | "moduleResolution": "node", 16 | "resolveJsonModule": true, 17 | "isolatedModules": true, 18 | "jsx": "preserve", 19 | "plugins": [ 20 | { 21 | "name": "next" 22 | } 23 | ] 24 | }, 25 | "include": [ 26 | "next-env.d.ts", 27 | ".next/types/**/*.ts", 28 | "**/*.ts", 29 | "**/*.tsx" 30 | ], 31 | "ts-node": { 32 | // these options are overrides used only by ts-node 33 | // same as the --compilerOptions flag and the TS_NODE_COMPILER_OPTIONS environment variable 34 | "compilerOptions": { 35 | "module": "commonjs" 36 | } 37 | }, 38 | "exclude": [ 39 | "node_modules" 40 | ] 41 | } 42 | --------------------------------------------------------------------------------