├── Best-Fit Embedding Demo.ipynb
├── Building_a_Gen_AI_App_with_LlamaIndex_and_SingleStore.ipynb
├── Building_a_Gen_AI_App_with_Vertex_AI_and_SingleStore (1).ipynb
├── CLIP.ipynb
├── Claude 3 Multimodal.ipynb
├── Deploying AI-Driven Personalization Engines.ipynb
├── Finetune Embedding (1).ipynb
├── Getting started with SingleStore webinar.ipynb
├── Getting started with SingleStore.ipynb
├── Google Gemini Audio Webinar.ipynb
├── Groq Webinar Code.ipynb
├── Langchain Multimodal Webinar.ipynb
├── Llama 3 Demo.ipynb
├── Llama_2_vs_Mistral_for_Fully_Private_GenAI_Apps (1).ipynb
├── RAG_Titan_Embeddings_Claude.ipynb
├── README.md
├── SingleStore + LangChain for Multimodal Apps.ipynb
├── chat-with-plg-data
    ├── .gitignore
    ├── README.md
    ├── assets
    │   ├── s2-get-creds-1.png
    │   ├── s2-get-creds-2.png
    │   └── s2-get-creds-3.png
    ├── chat_with_plg.ipynb
    ├── create_table_actions.sql
    ├── create_table_customers.sql
    ├── generator
    │   ├── .env.sample
    │   ├── Dockerfile
    │   ├── app.py
    │   └── requirements.txt
    └── webhook_server
    │   ├── .env.sample
    │   ├── .gitignore
    │   ├── Dockerfile
    │   ├── app.py
    │   └── requirements.txt
├── events.csv
├── fraud_detection.ipynb
├── kai-product-rec
    ├── README.md
    ├── books_scifi.txt
    └── webinar-product-rec-kai.ipynb
├── langchain-lift-off
    ├── README.md
    ├── langchain_demo1.ipynb
    └── langchain_demo2.ipynb
├── llama-2-local
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── assets
    │   ├── diagram_flow.png
    │   └── qa_output.png
    ├── config
    │   └── config.yml
    ├── data
    │   └── manu-20f-2022-09-24.pdf
    ├── db_build.py
    ├── main.py
    ├── requirements.txt
    └── src
    │   ├── __init__.py
    │   ├── llm.py
    │   ├── prompts.py
    │   └── utils.py
├── loan_approvals.ipynb
├── mktg-email-flow
    ├── README.md
    ├── docs
    │   ├── add-tracking-to-website.md
    │   ├── conversion-tracking.md
    │   ├── custom-event-tracking.md
    │   ├── dashboard-customization.md
    │   ├── data-export.md
    │   ├── funnels.md
    │   ├── generate-tracking-code.md
    │   ├── getting-started.md
    │   ├── real-time-analytics.md
    │   └── user-segmentation.md
    └── marketing-emails-webinar.ipynb
├── nodejs-with-singlestore
    ├── .env.example
    ├── .gitignore
    ├── .prettierrc.js
    ├── README.md
    ├── app.js
    ├── dataset.csv
    ├── lib
    │   ├── db.js
    │   └── load.js
    ├── package.json
    └── vercel.json
├── openai-plugin-webinar
    └── create_table_and_insert_from_csv.ipynb
├── real-time-plg-webinar
    └── Real-timePLG.ipynb
├── spark_webinar_1.ipynb
├── spark_webinar_2.ipynb
├── spark_webinar_3.ipynb
└── ucsb_datathon.ipynb


/Best-Fit Embedding Demo.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"attachments":{},"cell_type":"markdown","metadata":{"id":"0etRtS83RcWS"},"source":"# Choosing Best-Fit Embeddings"},{"attachments":{},"cell_type":"markdown","metadata":{"id":"r1IzNLho-NqV"},"source":"This notebook provides an example of how to choose the best-fit embeddings for your specific use case and store those embeddings in SingleStore."},{"cell_type":"code","execution_count":120,"metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:36:24.120953Z","iopub.status.busy":"2024-06-05T16:36:24.120645Z","iopub.status.idle":"2024-06-05T16:36:32.428275Z","shell.execute_reply":"2024-06-05T16:36:32.426335Z","shell.execute_reply.started":"2024-06-05T16:36:24.120928Z"},"language":"python","trusted":true},"outputs":[],"source":"!pip install -q -U langchain langchain-community singlestoredb langchain-openai langchain-huggingface --quiet"},{"cell_type":"code","execution_count":147,"metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:45:50.710353Z","iopub.status.busy":"2024-06-05T16:45:50.709984Z","iopub.status.idle":"2024-06-05T16:46:18.339568Z","shell.execute_reply":"2024-06-05T16:46:18.338860Z","shell.execute_reply.started":"2024-06-05T16:45:50.710328Z"},"language":"python","trusted":true},"outputs":[{"name":"stdin","output_type":"stream","text":" ········\n"}],"source":"import getpass\n\nOPENAI_API_KEY = getpass.getpass()\n\nos.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"},{"cell_type":"code","execution_count":141,"metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:43:59.101284Z","iopub.status.busy":"2024-06-05T16:43:59.100958Z","iopub.status.idle":"2024-06-05T16:43:59.770587Z","shell.execute_reply":"2024-06-05T16:43:59.769974Z","shell.execute_reply.started":"2024-06-05T16:43:59.101259Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain.chains import LLMChain\nfrom langchain_openai import ChatOpenAI\nfrom langchain_core.prompts import PromptTemplate\nfrom langchain_huggingface import HuggingFaceEmbeddings"},{"attachments":{},"cell_type":"markdown","metadata":{"language":"python"},"source":"## Loading a Hugging Face Transformers model from the MTEB Leaderboard\n\n(https://huggingface.co/spaces/mteb/leaderboard)\n\n"},{"cell_type":"code","execution_count":null,"metadata":{"language":"python","scrolled":true,"trusted":true},"outputs":[],"source":"model_name = \"mixedbread-ai/mxbai-embed-large-v1\"\nhf_embeddings = HuggingFaceEmbeddings(\n    model_name=model_name,\n)\ntexts = [\"Hello, world!\", \"How are you?\"]\nhf_embeddings.embed_documents(texts)"},{"cell_type":"markdown","metadata":{"language":"python"},"source":"## Benchmarking over a test data set\n\nHere we will construct a test data set and benchmark our chosen model against it."},{"cell_type":"code","execution_count":130,"metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:43:15.878049Z","iopub.status.busy":"2024-06-05T16:43:15.877745Z","iopub.status.idle":"2024-06-05T16:43:15.883181Z","shell.execute_reply":"2024-06-05T16:43:15.882688Z","shell.execute_reply.started":"2024-06-05T16:43:15.878024Z"},"language":"python","trusted":true},"outputs":[],"source":"cars = [\n    {\n        \"name\": \"Sedan\",\n        \"description\": \"A classic four-door car with a spacious interior and smooth ride, perfect for daily commutes or family trips.\"\n    },\n    {\n        \"name\": \"SUV\",\n        \"description\": \"A versatile vehicle with ample seating and cargo space. Enjoy off-road adventures or city driving with ease.\"\n    },\n    {\n        \"name\": \"Sports Car\",\n        \"description\": \"Indulge in high-speed thrills with this sleek, aerodynamic vehicle. Experience powerful performance and dynamic handling.\"\n    },\n    {\n        \"name\": \"Convertible\",\n        \"description\": \"Enjoy open-air driving with this stylish car. Whether you prefer sunny days or starry nights, it's a ride for all seasons.\"\n    },\n    {\n        \"name\": \"Hatchback\",\n        \"description\": \"Compact and practical, this car offers easy maneuverability and ample storage space for urban living or weekend getaways.\"\n    },\n    {\n        \"name\": \"Pickup Truck\",\n        \"description\": \"Robust and reliable, this truck is built for hauling and towing. Perfect for work or outdoor adventures.\"\n    },\n    {\n        \"name\": \"Minivan\",\n        \"description\": \"Spacious and family-friendly, this vehicle offers comfortable seating and modern amenities for long road trips.\"\n    },\n    {\n        \"name\": \"Coupe\",\n        \"description\": \"A stylish two-door car with a sporty design. Ideal for those who appreciate performance and aesthetics in a compact form.\"\n    }\n]"},{"cell_type":"code","execution_count":132,"metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:43:21.686274Z","iopub.status.busy":"2024-06-05T16:43:21.685913Z","iopub.status.idle":"2024-06-05T16:43:21.690939Z","shell.execute_reply":"2024-06-05T16:43:21.690275Z","shell.execute_reply.started":"2024-06-05T16:43:21.686247Z"},"language":"python","trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"[Document(page_content='A classic four-door car with a spacious interior and smooth ride, perfect for daily commutes or family trips.', metadata={'name': 'Sedan'}), Document(page_content='A versatile vehicle with ample seating and cargo space. Enjoy off-road adventures or city driving with ease.', metadata={'name': 'SUV'}), Document(page_content='Indulge in high-speed thrills with this sleek, aerodynamic vehicle. Experience powerful performance and dynamic handling.', metadata={'name': 'Sports Car'}), Document(page_content=\"Enjoy open-air driving with this stylish car. Whether you prefer sunny days or starry nights, it's a ride for all seasons.\", metadata={'name': 'Convertible'}), Document(page_content='Compact and practical, this car offers easy maneuverability and ample storage space for urban living or weekend getaways.', metadata={'name': 'Hatchback'}), Document(page_content='Robust and reliable, this truck is built for hauling and towing. Perfect for work or outdoor adventures.', metadata={'name': 'Pickup Truck'}), Document(page_content='Spacious and family-friendly, this vehicle offers comfortable seating and modern amenities for long road trips.', metadata={'name': 'Minivan'}), Document(page_content='A stylish two-door car with a sporty design. Ideal for those who appreciate performance and aesthetics in a compact form.', metadata={'name': 'Coupe'})]\n"}],"source":"from langchain.schema.document import Document\n\ndocs = []\n\nfor car in cars:\n    d = Document(page_content=car[\"description\"], metadata={\"name\": car[\"name\"]})\n    docs.append(d)\n\nprint(docs)"},{"cell_type":"code","execution_count":134,"metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:43:33.028837Z","iopub.status.busy":"2024-06-05T16:43:33.028277Z","iopub.status.idle":"2024-06-05T16:43:33.032474Z","shell.execute_reply":"2024-06-05T16:43:33.031788Z","shell.execute_reply.started":"2024-06-05T16:43:33.028806Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain.vectorstores import SingleStoreDB\nimport os\n\nos.environ[\"SINGLESTOREDB_URL\"] = f'{connection_user}:{connection_password}@{connection_host}:{connection_port}/{connection_default_database}'"},{"cell_type":"code","execution_count":152,"metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:48:47.630308Z","iopub.status.busy":"2024-06-05T16:48:47.629944Z","iopub.status.idle":"2024-06-05T16:48:48.121227Z","shell.execute_reply":"2024-06-05T16:48:48.120722Z","shell.execute_reply.started":"2024-06-05T16:48:47.630275Z"},"language":"python","trusted":true},"outputs":[],"source":"vectorstore=SingleStoreDB.from_documents(documents=docs, table_name=\"embedding_test\", embedding=hf_embeddings, use_vector_index=True)"},{"cell_type":"code","execution_count":148,"metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:46:21.304883Z","iopub.status.busy":"2024-06-05T16:46:21.304505Z","iopub.status.idle":"2024-06-05T16:46:23.156660Z","shell.execute_reply":"2024-06-05T16:46:23.156045Z","shell.execute_reply.started":"2024-06-05T16:46:21.304849Z"},"language":"python","trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":"/opt/conda/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The method `Chain.__call__` was deprecated in langchain 0.1.0 and will be removed in 0.3.0. Use invoke instead.\n  warn_deprecated(\n"},{"data":{"text/plain":"{'query': 'What cars are best suited for families?',\n 'result': 'The classic four-door car with a spacious interior and smooth ride, as well as the spacious and family-friendly vehicle with comfortable seating and modern amenities would be best suited for families.'}"},"execution_count":148,"metadata":{},"output_type":"execute_result"}],"source":"from langchain.chains import RetrievalQA\n\nllm=ChatOpenAI()\nqa_chain = RetrievalQA.from_chain_type(llm,retriever=vectorstore.as_retriever())\nqa_chain({\"query\": \"What cars are best suited for families?\"})"},{"cell_type":"code","execution_count":170,"metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:56:36.141666Z","iopub.status.busy":"2024-06-05T16:56:36.141287Z","iopub.status.idle":"2024-06-05T16:56:36.416417Z","shell.execute_reply":"2024-06-05T16:56:36.415900Z","shell.execute_reply.started":"2024-06-05T16:56:36.141633Z"},"language":"python","trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"Minivan\nSpacious and family-friendly, this vehicle offers comfortable seating and modern amenities for long road trips.\n"}],"source":"vectorResults = vectorstore.similarity_search(\n    \"family car\",\n    k=4,\n    search_strategy=SingleStoreDB.SearchStrategy.VECTOR_ONLY,\n)\nprint(vectorResults[0].metadata[\"name\"])\nprint(vectorResults[0].page_content)"},{"cell_type":"code","execution_count":171,"metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:56:38.581689Z","iopub.status.busy":"2024-06-05T16:56:38.581384Z","iopub.status.idle":"2024-06-05T16:56:38.738953Z","shell.execute_reply":"2024-06-05T16:56:38.738328Z","shell.execute_reply.started":"2024-06-05T16:56:38.581664Z"},"language":"python","trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"Coupe\nA stylish two-door car with a sporty design. Ideal for those who appreciate performance and aesthetics in a compact form.\n"}],"source":"vectorResults = vectorstore.similarity_search(\n    \"sporty car\",\n    k=4,\n    search_strategy=SingleStoreDB.SearchStrategy.VECTOR_ONLY,\n)\nprint(vectorResults[0].metadata[\"name\"])\nprint(vectorResults[0].page_content)"},{"cell_type":"markdown","metadata":{"execution":{"iopub.execute_input":"2024-06-05T16:55:35.325145Z","iopub.status.busy":"2024-06-05T16:55:35.324778Z","iopub.status.idle":"2024-06-05T16:55:35.331115Z","shell.execute_reply":"2024-06-05T16:55:35.330264Z","shell.execute_reply.started":"2024-06-05T16:55:35.325112Z"},"language":"sql"},"source":"## Query 1: \"Family vehicle\"\n\n| Rank | mixedbread-ai/mxbai-embed-large-v1  |\n|------|-------------------------------------|\n| 1    | ✅ SUV                              |\n| 2    | ❌ Sedan                            |\n| 3    | ✅ Minivan                          |\n| 4    | ❌ Pickup Truck                     |\n\n**Precision**: 2/4 = 0.50\n\n**Recall**: 2/3 = 0.67\n\n## Query 2: \"Sporty car\"\n\n| Rank | mixedbread-ai/mxbai-embed-large-v1  |\n|------|-------------------------------------|\n| 1    | ✅ Sports Car                       |\n| 2    | ❌ Sedan                            |\n| 3    | ✅ Convertible                      |\n| 4    | ❌ Hatchback                        |\n\n**Precision**: 2/4 = 0.50\n\n**Recall**: 2/3 = 0.67"}],"metadata":{"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.6"},"singlestore_cell_default_language":"sql","singlestore_connection":{"connectionID":"f3d98a31-c85c-4db3-9e5c-64b5051c6439","defaultDatabase":"database_58ed3"}},"nbformat":4,"nbformat_minor":4}


--------------------------------------------------------------------------------
/Getting started with SingleStore webinar.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"markdown","id":"85a217b8-503f-4082-9942-abf5d00a8663","metadata":{"language":"sql"},"source":"# Quick start to SingleStore using Stages"},{"cell_type":"markdown","id":"494a5d82-dff7-42a9-99c1-30108375943f","metadata":{"language":"sql"},"source":"What we will learn in this notebook\n1. Sign up for a free trial\n2. Create a database and assign compute resources\n3. Load your own dataset using Stages\n4. Query data\n\nYou may also like to see a [recording of this webinar] (https://www.singlestore.com/resources/webinar-getting-started-with-singlestoredb/)"},{"cell_type":"markdown","id":"e67f2db9-6276-417d-829b-6f5661ead762","metadata":{"language":"sql"},"source":"# Step 1- Sign up for a free trial\n1. Go to https://www.singlestore.com/cloud-trial/\n2. Just answer a few questions\n3. Verify your email\n4. Log in to portal.singlestore.com"},{"cell_type":"markdown","id":"9affb7d0-2739-4b8e-ae91-83f251ccfc9c","metadata":{"language":"sql"},"source":"# Step 2- Create a workspace and attach a database to it\nIn the portal:\n1. Create a workspace group by choosing a cloud provider and region\n2. On the next page, create a workspace. Change the size of the cluster if needed\n3. On the next page, wait for the workspace to be deployed. This may take a few minutes.\n4. Once the workspace is deployed, click on \" + Create Database\"\n5. Name your new database and make sure to attach it to the workspace you just created"},{"cell_type":"markdown","id":"6f7026d1-5960-4bb0-97be-398e79c7c51c","metadata":{},"source":"# Step 3- Load your own dataset using Stages\n\n## Step 3.1\n1. Find the feature \"Stages\" either on left sidebar under your workspace group or top nav bar for your workspace group. \n2. Click on \"Upload New File\"\n3. Drag and drop your CSV into Stages. You will see \"File uploaded to Stages successfully\". In this example, I use a dataset called foodhub_order.csv, which can be found here\nhttps://s2webinardemos.s3.us-west-2.amazonaws.com/foodhub_order.csv\n4. Under Actions for this Stages file, click on \"Load to Database\"\n5. On the \"Load Data\" screen, choose the same workspace and database that you created previously. Hit \"Generate Notebook\"\n6. This creates a notebook. You will see a \"Success\" message pop.\n\nThe following sections contain code mostly generated by Stages."},{"cell_type":"markdown","id":"b75d2652-983c-4b93-ab57-14f581132ec7","metadata":{"language":"sql"},"source":"## Step 3.2 - Completed for you by Stages- Load data to foodhub_order\nThis notebook provides step by step instructions for ingesting 'foodhub_order.csv' file to a database.\n\nStart by creating a table to store the ingested data:"},{"cell_type":"code","execution_count":369,"id":"e0ec7e57-c1f3-41c4-832a-f6204157aa0f","metadata":{"execution":{"iopub.execute_input":"2023-11-01T22:41:57.406966Z","iopub.status.busy":"2023-11-01T22:41:57.406690Z","iopub.status.idle":"2023-11-01T22:41:57.699393Z","shell.execute_reply":"2023-11-01T22:41:57.698962Z","shell.execute_reply.started":"2023-11-01T22:41:57.406949Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n        </tr>\n    </thead>\n    <tbody>\n    </tbody>\n</table>","text/plain":"++\n||\n++\n++"},"execution_count":369,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\n%%sql\n# If rerunning, clean up previously created resources\nDROP PIPELINE IF EXISTS foodhub_database.`foodhub_order`;\nDROP TABLE IF EXISTS foodhub_database.`foodhub_order`;"},{"cell_type":"markdown","id":"f9a856b2-14ba-4150-a059-0bd3cafdc21d","metadata":{"language":"sql"},"source":"## Step 3.3 - Completed for you by Stages- Create a table"},{"cell_type":"code","execution_count":372,"id":"2d10d5bc-f7c7-497a-a0ad-db50eabe4d3c","metadata":{"execution":{"iopub.execute_input":"2023-11-01T22:42:08.371756Z","iopub.status.busy":"2023-11-01T22:42:08.371459Z","iopub.status.idle":"2023-11-01T22:42:08.741103Z","shell.execute_reply":"2023-11-01T22:42:08.740610Z","shell.execute_reply.started":"2023-11-01T22:42:08.371738Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n        </tr>\n    </thead>\n    <tbody>\n    </tbody>\n</table>","text/plain":"++\n||\n++\n++"},"execution_count":372,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\n%%sql\nUSE foodhub_database;\nCREATE TABLE foodhub_database.`foodhub_order` (\n\t`order_id` bigint(20) NULL,\n\t`customer_id` bigint(20) NULL,\n\t`restaurant_name` text CHARACTER SET utf8 COLLATE utf8_general_ci NULL,\n\t`cuisine_type` text CHARACTER SET utf8 COLLATE utf8_general_ci NULL,\n\t`cost_of_the_order` double NULL,\n\t`day_of_the_week` text CHARACTER SET utf8 COLLATE utf8_general_ci NULL,\n\t`rating` text CHARACTER SET utf8 COLLATE utf8_general_ci NULL,\n\t`food_preparation_time` bigint(20) NULL,\n\t`delivery_time` bigint(20) NULL,\n\t SHARD KEY ()\n);"},{"cell_type":"markdown","id":"00211a7a-2ccf-4763-a038-891571b2e797","metadata":{},"source":"## Step 3.4 - Completed for you by Stages- Create a pipeline that will load file into the database\n### [Learn about Load Data with Pipeline through our documentation](https://docs.singlestore.com/managed-service/en/load-data/load-data-with-pipelines.html)"},{"cell_type":"code","execution_count":374,"id":"c6de784c-6f04-4ceb-b796-89adde953557","metadata":{"execution":{"iopub.execute_input":"2023-11-01T22:42:13.990604Z","iopub.status.busy":"2023-11-01T22:42:13.990352Z","iopub.status.idle":"2023-11-01T22:42:14.683611Z","shell.execute_reply":"2023-11-01T22:42:14.682972Z","shell.execute_reply.started":"2023-11-01T22:42:13.990588Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n        </tr>\n    </thead>\n    <tbody>\n    </tbody>\n</table>","text/plain":"++\n||\n++\n++"},"execution_count":374,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\n%%sql\nUSE foodhub_database;\nCREATE PIPELINE foodhub_database.`foodhub_order`\nAS LOAD DATA STAGE 'foodhub_order.csv'\nBATCH_INTERVAL 2500\nDISABLE OUT_OF_ORDER OPTIMIZATION\nDISABLE OFFSETS METADATA GC\nSKIP DUPLICATE KEY ERRORS -- SKIP ALL ERRORS can be used to skip all errors that can be tracked through \"Monitor the pipeline for errors\"\nINTO TABLE `foodhub_order`\nFIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\\\\' \nLINES TERMINATED BY '\\r\\n' STARTING BY '' \nIGNORE 1 LINES\nFORMAT CSV\n(\n\t`foodhub_order`.`order_id`,\n\t`foodhub_order`.`customer_id`,\n\t`foodhub_order`.`restaurant_name`,\n\t`foodhub_order`.`cuisine_type`,\n\t`foodhub_order`.`cost_of_the_order`,\n\t`foodhub_order`.`day_of_the_week`,\n\t`foodhub_order`.`rating`,\n\t`foodhub_order`.`food_preparation_time`,\n\t`foodhub_order`.`delivery_time`\n);"},{"cell_type":"markdown","id":"22094aad-1e2e-4b66-b5fe-cbfb5578d8de","metadata":{},"source":"## Step 3.5 - Completed for you by Stages- Start the pipeline"},{"cell_type":"code","execution_count":378,"id":"104d25c8-60cb-4a1e-9a34-71e65403f300","metadata":{"execution":{"iopub.execute_input":"2023-11-01T22:42:43.860168Z","iopub.status.busy":"2023-11-01T22:42:43.859954Z","iopub.status.idle":"2023-11-01T22:42:44.002338Z","shell.execute_reply":"2023-11-01T22:42:44.001794Z","shell.execute_reply.started":"2023-11-01T22:42:43.860150Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n        </tr>\n    </thead>\n    <tbody>\n    </tbody>\n</table>","text/plain":"++\n||\n++\n++"},"execution_count":378,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\n%%sql\nSTART PIPELINE foodhub_database.`foodhub_order`;"},{"cell_type":"markdown","id":"56d98912-8ac8-4f6c-94ea-d900f469ce41","metadata":{},"source":"### Monitor the pipeline for errors"},{"cell_type":"code","execution_count":379,"id":"b7aa029a-fbda-4ae8-b9dc-ed53917aa6d8","metadata":{"execution":{"iopub.execute_input":"2023-11-01T22:42:46.316900Z","iopub.status.busy":"2023-11-01T22:42:46.316508Z","iopub.status.idle":"2023-11-01T22:42:46.578015Z","shell.execute_reply":"2023-11-01T22:42:46.577443Z","shell.execute_reply.started":"2023-11-01T22:42:46.316881Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n            <th>DATABASE_NAME</th>\n            <th>PIPELINE_NAME</th>\n            <th>ERROR_UNIX_TIMESTAMP</th>\n            <th>ERROR_TYPE</th>\n            <th>ERROR_CODE</th>\n            <th>ERROR_MESSAGE</th>\n            <th>ERROR_KIND</th>\n            <th>STD_ERROR</th>\n            <th>LOAD_DATA_LINE</th>\n            <th>LOAD_DATA_LINE_NUMBER</th>\n            <th>BATCH_ID</th>\n            <th>ERROR_ID</th>\n            <th>BATCH_SOURCE_PARTITION_ID</th>\n            <th>BATCH_EARLIEST_OFFSET</th>\n            <th>BATCH_LATEST_OFFSET</th>\n            <th>HOST</th>\n            <th>PORT</th>\n            <th>PARTITION</th>\n        </tr>\n    </thead>\n    <tbody>\n    </tbody>\n</table>","text/plain":"+---------------+---------------+----------------------+------------+------------+---------------+------------+-----------+----------------+-----------------------+----------+----------+---------------------------+-----------------------+---------------------+------+------+-----------+\n| DATABASE_NAME | PIPELINE_NAME | ERROR_UNIX_TIMESTAMP | ERROR_TYPE | ERROR_CODE | ERROR_MESSAGE | ERROR_KIND | STD_ERROR | LOAD_DATA_LINE | LOAD_DATA_LINE_NUMBER | BATCH_ID | ERROR_ID | BATCH_SOURCE_PARTITION_ID | BATCH_EARLIEST_OFFSET | BATCH_LATEST_OFFSET | HOST | PORT | PARTITION |\n+---------------+---------------+----------------------+------------+------------+---------------+------------+-----------+----------------+-----------------------+----------+----------+---------------------------+-----------------------+---------------------+------+------+-----------+\n+---------------+---------------+----------------------+------------+------------+---------------+------------+-----------+----------------+-----------------------+----------+----------+---------------------------+-----------------------+---------------------+------+------+-----------+"},"execution_count":379,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\n%%sql\nUSE foodhub_database;\nSELECT * FROM information_schema.pipelines_errors\nWHERE pipeline_name ='foodhub_order';"},{"cell_type":"markdown","id":"13fea3e0-356e-474f-bfa8-1e5d8c730c3c","metadata":{},"source":"### Check that the data has loaded"},{"cell_type":"code","execution_count":380,"id":"092a4b8e-4df8-4ce6-8dc3-2377be0da12f","metadata":{"execution":{"iopub.execute_input":"2023-11-01T22:42:50.016452Z","iopub.status.busy":"2023-11-01T22:42:50.016082Z","iopub.status.idle":"2023-11-01T22:42:50.222688Z","shell.execute_reply":"2023-11-01T22:42:50.222119Z","shell.execute_reply.started":"2023-11-01T22:42:50.016433Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n            <th>order_id</th>\n            <th>customer_id</th>\n            <th>restaurant_name</th>\n            <th>cuisine_type</th>\n            <th>cost_of_the_order</th>\n            <th>day_of_the_week</th>\n            <th>rating</th>\n            <th>food_preparation_time</th>\n            <th>delivery_time</th>\n        </tr>\n    </thead>\n    <tbody>\n        <tr>\n            <td>1476849</td>\n            <td>42052</td>\n            <td>Pepe Rosso To Go</td>\n            <td>Italian</td>\n            <td>13.73</td>\n            <td>Weekend</td>\n            <td>Not given</td>\n            <td>21</td>\n            <td>28</td>\n        </tr>\n        <tr>\n            <td>1476821</td>\n            <td>80434</td>\n            <td>Bareburger</td>\n            <td>American</td>\n            <td>12.18</td>\n            <td>Weekend</td>\n            <td>Not given</td>\n            <td>33</td>\n            <td>18</td>\n        </tr>\n        <tr>\n            <td>1477535</td>\n            <td>125123</td>\n            <td>S'MAC</td>\n            <td>American</td>\n            <td>15.57</td>\n            <td>Weekend</td>\n            <td>5</td>\n            <td>34</td>\n            <td>28</td>\n        </tr>\n        <tr>\n            <td>1476915</td>\n            <td>142273</td>\n            <td>Pepe Rosso To Go</td>\n            <td>Italian</td>\n            <td>15.57</td>\n            <td>Weekend</td>\n            <td>5</td>\n            <td>29</td>\n            <td>27</td>\n        </tr>\n        <tr>\n            <td>1476637</td>\n            <td>49695</td>\n            <td>Blue Ribbon Fried Chicken</td>\n            <td>American</td>\n            <td>18.24</td>\n            <td>Weekend</td>\n            <td>Not given</td>\n            <td>21</td>\n            <td>16</td>\n        </tr>\n    </tbody>\n</table>","text/plain":"+----------+-------------+---------------------------+--------------+-------------------+-----------------+-----------+-----------------------+---------------+\n| order_id | customer_id |      restaurant_name      | cuisine_type | cost_of_the_order | day_of_the_week |   rating  | food_preparation_time | delivery_time |\n+----------+-------------+---------------------------+--------------+-------------------+-----------------+-----------+-----------------------+---------------+\n| 1476849  |    42052    |      Pepe Rosso To Go     |   Italian    |       13.73       |     Weekend     | Not given |           21          |       28      |\n| 1476821  |    80434    |         Bareburger        |   American   |       12.18       |     Weekend     | Not given |           33          |       18      |\n| 1477535  |    125123   |           S'MAC           |   American   |       15.57       |     Weekend     |     5     |           34          |       28      |\n| 1476915  |    142273   |      Pepe Rosso To Go     |   Italian    |       15.57       |     Weekend     |     5     |           29          |       27      |\n| 1476637  |    49695    | Blue Ribbon Fried Chicken |   American   |       18.24       |     Weekend     | Not given |           21          |       16      |\n+----------+-------------+---------------------------+--------------+-------------------+-----------------+-----------+-----------------------+---------------+"},"execution_count":380,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\n%%sql\nSELECT * FROM foodhub_database.`foodhub_order`\nLIMIT 5;"},{"cell_type":"markdown","id":"e8746cd2-d274-4714-9c41-ca1d5c94613a","metadata":{"language":"sql"},"source":"# Step 4- Query the data"},{"cell_type":"markdown","id":"e0c44687-5814-432a-b56f-58bcdf591fe6","metadata":{"language":"sql"},"source":"## Step 4.1- Run your own queries\nExamples below"},{"cell_type":"code","execution_count":429,"id":"171b183c-18cf-4876-a77f-6f7fed408707","metadata":{"execution":{"iopub.execute_input":"2023-11-01T22:48:53.772761Z","iopub.status.busy":"2023-11-01T22:48:53.772338Z","iopub.status.idle":"2023-11-01T22:48:53.926631Z","shell.execute_reply":"2023-11-01T22:48:53.926077Z","shell.execute_reply.started":"2023-11-01T22:48:53.772740Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n            <th>AVG (cost_of_the_order)</th>\n        </tr>\n    </thead>\n    <tbody>\n        <tr>\n            <td>16.49885142255006</td>\n        </tr>\n    </tbody>\n</table>","text/plain":"+-------------------------+\n| AVG (cost_of_the_order) |\n+-------------------------+\n|    16.49885142255006    |\n+-------------------------+"},"execution_count":429,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\n%%sql\n# What is the average order value?\n\nSELECT AVG (cost_of_the_order)\nFROM foodhub_database.`foodhub_order`;"},{"cell_type":"code","execution_count":430,"id":"b4d8d7c5-2c82-4c50-9214-8fd263a36a4b","metadata":{"execution":{"iopub.execute_input":"2023-11-01T22:48:55.508490Z","iopub.status.busy":"2023-11-01T22:48:55.508126Z","iopub.status.idle":"2023-11-01T22:48:55.663189Z","shell.execute_reply":"2023-11-01T22:48:55.662643Z","shell.execute_reply.started":"2023-11-01T22:48:55.508472Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n            <th>COUNT (order_id)</th>\n        </tr>\n    </thead>\n    <tbody>\n        <tr>\n            <td>1898</td>\n        </tr>\n    </tbody>\n</table>","text/plain":"+------------------+\n| COUNT (order_id) |\n+------------------+\n|       1898       |\n+------------------+"},"execution_count":430,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\n%%sql\n# What is the total number of orders?\nSELECT COUNT (order_id)\nFROM foodhub_database.`foodhub_order`;"},{"cell_type":"markdown","id":"67b5e8fe-b747-4655-9c4e-0a6485309095","metadata":{"language":"sql"},"source":"## Step 4.2 (Optional)- Use SQrL to generate queries\n1. On the top right, toggle to enable \"Code with SQRrL\". Type what you'd like to accomplish with the query. Something like this\n\"I have a table foodhub_database.`foodhub_order` with the fields order_id customer_id restaurant_name cuisine_type\tcost_of_the_order. Write a query to find out the most popular cuisine type.\"\n2. Hit \"Add Cell\" to add this code to your notebook. You should see something similar to the query below"},{"cell_type":"code","execution_count":null,"id":"b1f0232c-e1d1-4d62-97a8-6ffc453d210d","metadata":{"language":"sql","trusted":true},"outputs":[],"source":"%%sql\n%%sql\n# SQrL created query- What is the most popular cuisine?\nSELECT cuisine_type, COUNT(*) as orders_count\nFROM foodhub_database.foodhub_order\nGROUP BY cuisine_type\nORDER BY orders_count DESC LIMIT 1;"}],"metadata":{"jupyterlab":{"notebooks":{"version_major":6,"version_minor":4}},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.4"},"singlestore_cell_default_language":"sql","singlestore_connection":{"connectionID":"810f97bd-7e0b-4b2a-b624-594de59bc570","defaultDatabase":"foodhub_database"}},"nbformat":4,"nbformat_minor":5}


--------------------------------------------------------------------------------
/Getting started with SingleStore.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"markdown","id":"85a217b8-503f-4082-9942-abf5d00a8663","metadata":{"language":"sql"},"source":"# Quick start to SingleStore using Stages"},{"cell_type":"markdown","id":"494a5d82-dff7-42a9-99c1-30108375943f","metadata":{"language":"sql"},"source":"What we will learn in this notebook\n1. Sign up for a free trial\n2. Create a database and assign compute resources\n3. Loading data using Stages\n4. Query data"},{"cell_type":"markdown","id":"e67f2db9-6276-417d-829b-6f5661ead762","metadata":{"language":"sql"},"source":"## Step 1- Sign up for a free trial\n1. Go to https://www.singlestore.com/cloud-trial/\n2. Just answer a few questions\n3. Verify your email"},{"cell_type":"markdown","id":"9affb7d0-2739-4b8e-ae91-83f251ccfc9c","metadata":{"language":"sql"},"source":"## Step 2- Create a database and assign compute resources\n"},{"cell_type":"markdown","id":"6f7026d1-5960-4bb0-97be-398e79c7c51c","metadata":{},"source":"## Step 3- Load data to foodhub_order\n\nThis notebook provides step by step instructions for ingesting 'foodhub_order.csv' file to a database.\n\nStart by creating a table to store the ingested data:"},{"cell_type":"code","execution_count":null,"id":"2d10d5bc-f7c7-497a-a0ad-db50eabe4d3c","metadata":{"language":"sql"},"outputs":[],"source":"%%sql\n# Clean up previously created resources\nDROP PIPELINE IF EXISTS foodhub_database.`foodhub_order`;\nDROP TABLE IF EXISTS foodhub_database.`foodhub_order`;\n\nUSE foodhub_database;\nCREATE TABLE foodhub_database.`foodhub_order` (\n\t`order_id` bigint(20) NULL,\n\t`customer_id` bigint(20) NULL,\n\t`restaurant_name` text CHARACTER SET utf8 COLLATE utf8_general_ci NULL,\n\t`cuisine_type` text CHARACTER SET utf8 COLLATE utf8_general_ci NULL,\n\t`cost_of_the_order` double NULL,\n\t`day_of_the_week` text CHARACTER SET utf8 COLLATE utf8_general_ci NULL,\n\t`rating` text CHARACTER SET utf8 COLLATE utf8_general_ci NULL,\n\t`food_preparation_time` bigint(20) NULL,\n\t`delivery_time` bigint(20) NULL,\n\t SHARD KEY ()\n);"},{"cell_type":"markdown","id":"00211a7a-2ccf-4763-a038-891571b2e797","metadata":{},"source":"## Create a pipeline that will load file into the database\n### [Learn about Load Data with Pipeline through our documentation](https://docs.singlestore.com/managed-service/en/load-data/load-data-with-pipelines.html)"},{"cell_type":"code","execution_count":null,"id":"c6de784c-6f04-4ceb-b796-89adde953557","metadata":{"language":"sql"},"outputs":[],"source":"%%sql\nUSE foodhub_database;\nCREATE PIPELINE foodhub_database.`foodhub_order`\nAS LOAD DATA STAGE 'foodhub_order.csv'\nBATCH_INTERVAL 2500\nDISABLE OUT_OF_ORDER OPTIMIZATION\nDISABLE OFFSETS METADATA GC\nSKIP DUPLICATE KEY ERRORS -- SKIP ALL ERRORS can be used to skip all errors that can be tracked through \"Monitor the pipeline for errors\"\nINTO TABLE `foodhub_order`\nFIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\\\\' \nLINES TERMINATED BY '\\r\\n' STARTING BY '' \nIGNORE 1 LINES\nFORMAT CSV\n(\n\t`foodhub_order`.`order_id`,\n\t`foodhub_order`.`customer_id`,\n\t`foodhub_order`.`restaurant_name`,\n\t`foodhub_order`.`cuisine_type`,\n\t`foodhub_order`.`cost_of_the_order`,\n\t`foodhub_order`.`day_of_the_week`,\n\t`foodhub_order`.`rating`,\n\t`foodhub_order`.`food_preparation_time`,\n\t`foodhub_order`.`delivery_time`\n);"},{"cell_type":"markdown","id":"22094aad-1e2e-4b66-b5fe-cbfb5578d8de","metadata":{},"source":"## Start the pipeline"},{"cell_type":"code","execution_count":null,"id":"104d25c8-60cb-4a1e-9a34-71e65403f300","metadata":{"language":"sql"},"outputs":[],"source":"%%sql\nSTART PIPELINE foodhub_database.`foodhub_order`;"},{"cell_type":"markdown","id":"56d98912-8ac8-4f6c-94ea-d900f469ce41","metadata":{},"source":"## Monitor the pipeline for errors"},{"cell_type":"code","execution_count":null,"id":"b7aa029a-fbda-4ae8-b9dc-ed53917aa6d8","metadata":{"language":"sql"},"outputs":[],"source":"%%sql\nUSE foodhub_database;\nSELECT * FROM information_schema.pipelines_errors\nWHERE pipeline_name ='foodhub_order';"},{"cell_type":"markdown","id":"13fea3e0-356e-474f-bfa8-1e5d8c730c3c","metadata":{},"source":"## Check that the data has loaded"},{"cell_type":"code","execution_count":null,"id":"092a4b8e-4df8-4ce6-8dc3-2377be0da12f","metadata":{"language":"sql"},"outputs":[],"source":"%%sql\nSELECT * FROM foodhub_database.`foodhub_order`\nLIMIT 100;"},{"cell_type":"code","execution_count":null,"id":"171b183c-18cf-4876-a77f-6f7fed408707","metadata":{"language":"sql","trusted":true},"outputs":[],"source":"%%sql\n# Query the data\n\n# What is the average order value?\n\nSELECT AVG (cost_of_the_order)\nFROM foodhub_database.`foodhub_order`;\n\n# What is the total number of orders?\nSELECT COUNT (order_id)\nFROM foodhub_database.`foodhub_order`;\n\n\n#Using SQrL to generate queries\n# I have a table foodhub_database.`foodhub_order` with the fields order_id customer_id restaurant_name cuisine_type\tcost_of_the_order. \nWrite a query to find out the most popular cuisine type.  \n\n# SQrL created queries\n# What is the most popular cuisine?\nSELECT cuisine_type, COUNT(*) as orders_count\nFROM foodhub_database.foodhub_order\nGROUP BY cuisine_type\nORDER BY orders_count DESC LIMIT 1;"}],"metadata":{"jupyterlab":{"notebooks":{"version_major":6,"version_minor":4}},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.4"},"singlestore_cell_default_language":"sql","singlestore_connection":{"connectionID":"810f97bd-7e0b-4b2a-b624-594de59bc570","defaultDatabase":"foodhub_database"}},"nbformat":4,"nbformat_minor":5}


--------------------------------------------------------------------------------
/Google Gemini Audio Webinar.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"attachments":{},"cell_type":"markdown","metadata":{"id":"0etRtS83RcWS"},"source":"# Gemini API: Audio Quickstart"},{"attachments":{},"cell_type":"markdown","metadata":{"id":"r1IzNLho-NqV"},"source":"This notebook provides an example of how to prompt Gemini 1.5 Pro using an audio file. "},{"cell_type":"code","execution_count":159,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T09:03:53.160734Z","iopub.status.busy":"2024-04-26T09:03:53.160355Z","iopub.status.idle":"2024-04-26T09:03:56.233395Z","shell.execute_reply":"2024-04-26T09:03:56.232731Z","shell.execute_reply.started":"2024-04-26T09:03:53.160719Z"},"id":"Y6eH_Aq_NyNi","language":"python","trusted":true},"outputs":[],"source":"!pip install -q -U google-generativeai langchain langchain-google-genai langchain-openai singlestoredb --quiet"},{"cell_type":"code","execution_count":110,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T08:55:38.215642Z","iopub.status.busy":"2024-04-26T08:55:38.215174Z","iopub.status.idle":"2024-04-26T08:55:38.217869Z","shell.execute_reply":"2024-04-26T08:55:38.217441Z","shell.execute_reply.started":"2024-04-26T08:55:38.215628Z"},"id":"LSe1pMEpR2L2","language":"python","trusted":true},"outputs":[],"source":"import google.generativeai as genai"},{"attachments":{},"cell_type":"markdown","metadata":{"id":"TXiv-NeZR5WA"},"source":"## Configure your API key\n\nThis API key will be from aistudio.google.com"},{"cell_type":"code","execution_count":111,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T08:55:41.177966Z","iopub.status.busy":"2024-04-26T08:55:41.177496Z","iopub.status.idle":"2024-04-26T08:55:41.180370Z","shell.execute_reply":"2024-04-26T08:55:41.179981Z","shell.execute_reply.started":"2024-04-26T08:55:41.177950Z"},"id":"dm-iaNMGPdid","language":"python","trusted":true},"outputs":[],"source":"import os\n\nos.environ['GOOGLE_API_KEY']='ap'\n\ngenai.configure(api_key=os.environ['GOOGLE_API_KEY'])"},{"attachments":{},"cell_type":"markdown","metadata":{"id":"2YoxMrCdR7hf"},"source":"## Upload an audio file with the File API\n\nTo use an audio file in your prompt, you must first upload it using the [File API](https://github.com/google-gemini/cookbook/blob/main/quickstarts/File_API.ipynb).\n"},{"cell_type":"code","execution_count":112,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T08:55:42.502583Z","iopub.status.busy":"2024-04-26T08:55:42.502121Z","iopub.status.idle":"2024-04-26T08:55:42.504725Z","shell.execute_reply":"2024-04-26T08:55:42.504342Z","shell.execute_reply.started":"2024-04-26T08:55:42.502569Z"},"id":"OHvNLws4RRjx","language":"python","trusted":true},"outputs":[],"source":"URL = \"https://ia803402.us.archive.org/14/items/lp_mozart-divertimento17-k-334-horn-quintet-k_wolfgang-amadeus-mozart-members-of-the-ber/disc1/01.03.%20Divertmento%20In%20D%20Major%2C%20K.%20334%20Menuetto.mp3\""},{"cell_type":"code","execution_count":114,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T08:55:43.968356Z","iopub.status.busy":"2024-04-26T08:55:43.967887Z","iopub.status.idle":"2024-04-26T08:55:48.422874Z","shell.execute_reply":"2024-04-26T08:55:48.422250Z","shell.execute_reply.started":"2024-04-26T08:55:43.968341Z"},"id":"Cxq31LDwSFH6","language":"python","trusted":true},"outputs":[],"source":"!wget -q $URL -O sample.mp3"},{"cell_type":"code","execution_count":115,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T08:55:49.566063Z","iopub.status.busy":"2024-04-26T08:55:49.565548Z","iopub.status.idle":"2024-04-26T08:55:50.743497Z","shell.execute_reply":"2024-04-26T08:55:50.743033Z","shell.execute_reply.started":"2024-04-26T08:55:49.566039Z"},"id":"MAObE0BpaAwG","language":"python","trusted":true},"outputs":[],"source":"your_file = genai.upload_file(path='sample.mp3')"},{"attachments":{},"cell_type":"markdown","metadata":{"id":"m01XDoo4UQvN"},"source":"## Use the file in your prompt"},{"cell_type":"code","execution_count":122,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":73},"execution":{"iopub.execute_input":"2024-04-26T08:56:33.801544Z","iopub.status.busy":"2024-04-26T08:56:33.801267Z","iopub.status.idle":"2024-04-26T08:56:35.979751Z","shell.execute_reply":"2024-04-26T08:56:35.979079Z","shell.execute_reply.started":"2024-04-26T08:56:33.801529Z"},"id":"YmISEsqpafRb","language":"python","outputId":"b5873da2-7f3c-4fc7-9ec5-f5c74aad212f","trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"The audio file contains a series of classical music pieces featuring piano, strings, and woodwinds. \n\n"}],"source":"prompt = \"Listen carefully to the following audio file. Provide a one sentence summary.\"\nmodel = genai.GenerativeModel('models/gemini-1.5-pro-latest')\nresponse = model.generate_content([prompt, your_file])\nprint(response.text)"},{"attachments":{},"cell_type":"markdown","metadata":{"execution":{"iopub.execute_input":"2024-04-26T08:44:11.362703Z","iopub.status.busy":"2024-04-26T08:44:11.362215Z","iopub.status.idle":"2024-04-26T08:44:11.364950Z","shell.execute_reply":"2024-04-26T08:44:11.364515Z","shell.execute_reply.started":"2024-04-26T08:44:11.362679Z"},"language":"python"},"source":"## RAG over audio files using SingleStoreDB\n\nNow we will embed the text descriptions of the audio file(s). This allows us to search and retrieve relevant files for RAG later."},{"cell_type":"code","execution_count":172,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T09:06:00.761738Z","iopub.status.busy":"2024-04-26T09:06:00.761467Z","iopub.status.idle":"2024-04-26T09:06:00.764687Z","shell.execute_reply":"2024-04-26T09:06:00.764281Z","shell.execute_reply.started":"2024-04-26T09:06:00.761721Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain.vectorstores import SingleStoreDB\nimport os\n\nfrom langchain_google_genai import GoogleGenerativeAIEmbeddings\n\nembeddings = GoogleGenerativeAIEmbeddings(model=\"models/embedding-001\")\n\nos.environ[\"SINGLESTOREDB_URL\"] = f'{connection_user}:{connection_password}@{connection_host}:{connection_port}/{connection_default_database}'"},{"cell_type":"code","execution_count":244,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T09:18:44.313877Z","iopub.status.busy":"2024-04-26T09:18:44.313602Z","iopub.status.idle":"2024-04-26T09:18:45.005645Z","shell.execute_reply":"2024-04-26T09:18:45.005187Z","shell.execute_reply.started":"2024-04-26T09:18:44.313862Z"},"language":"python","trusted":true},"outputs":[],"source":"vectorstore=SingleStoreDB(table_name=\"audio1\", embedding=embeddings)"},{"cell_type":"code","execution_count":248,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T09:18:46.410577Z","iopub.status.busy":"2024-04-26T09:18:46.410062Z","iopub.status.idle":"2024-04-26T09:18:47.338408Z","shell.execute_reply":"2024-04-26T09:18:47.338025Z","shell.execute_reply.started":"2024-04-26T09:18:46.410562Z"},"language":"python","trusted":true},"outputs":[{"data":{"text/plain":"[]"},"execution_count":248,"metadata":{},"output_type":"execute_result"}],"source":"from langchain_core.documents import Document\n\nmozart_doc = Document(page_content=response.text, metadata={'path': 'sample.mp3'})\n\nvectorstore.add_documents([mozart_doc])   \n\nvectorstore.add_texts(['foo', 'bar'])"},{"cell_type":"code","execution_count":259,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T09:19:39.907371Z","iopub.status.busy":"2024-04-26T09:19:39.906838Z","iopub.status.idle":"2024-04-26T09:19:40.246762Z","shell.execute_reply":"2024-04-26T09:19:40.246307Z","shell.execute_reply.started":"2024-04-26T09:19:39.907355Z"},"language":"python","trusted":true},"outputs":[],"source":"query = \"beethoven\"\ndocs = vectorstore.similarity_search(query)  # Find documents that correspond to the query"},{"cell_type":"code","execution_count":260,"metadata":{"execution":{"iopub.execute_input":"2024-04-26T09:19:41.271503Z","iopub.status.busy":"2024-04-26T09:19:41.271084Z","iopub.status.idle":"2024-04-26T09:19:41.274037Z","shell.execute_reply":"2024-04-26T09:19:41.273647Z","shell.execute_reply.started":"2024-04-26T09:19:41.271489Z"},"language":"python","trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"The audio file contains a series of classical music pieces featuring piano, strings, and woodwinds. \n\n"}],"source":"print(docs[-1].page_content)"}],"metadata":{"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.6"},"singlestore_cell_default_language":"python","singlestore_connection":{"connectionID":"6efce35a-2db5-4ae3-bc0b-c3d5810a45f4","defaultDatabase":"langchain_multimodal"}},"nbformat":4,"nbformat_minor":4}


--------------------------------------------------------------------------------
/Groq Webinar Code.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"attachments":{},"cell_type":"markdown","metadata":{"id":"0etRtS83RcWS"},"source":"# SingleStore and Groq RAG Quickstart"},{"attachments":{},"cell_type":"markdown","metadata":{"id":"r1IzNLho-NqV"},"source":"This notebook provides an example of how to use SingleStore as a vector database in conjunction with Groq, the world's fastest LLM."},{"cell_type":"code","execution_count":7,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:03:54.919609Z","iopub.status.busy":"2024-05-02T07:03:54.919230Z","iopub.status.idle":"2024-05-02T07:04:09.803928Z","shell.execute_reply":"2024-05-02T07:04:09.791624Z","shell.execute_reply.started":"2024-05-02T07:03:54.919573Z"},"language":"python","trusted":true},"outputs":[],"source":"!pip install -q -U langchain langchain-groq singlestoredb langchain-openai --quiet"},{"cell_type":"code","execution_count":12,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:04:45.525503Z","iopub.status.busy":"2024-05-02T07:04:45.525125Z","iopub.status.idle":"2024-05-02T07:05:04.099932Z","shell.execute_reply":"2024-05-02T07:05:04.098886Z","shell.execute_reply.started":"2024-05-02T07:04:45.525474Z"},"language":"python","trusted":true},"outputs":[{"name":"stdin","output_type":"stream","text":" ········\n"}],"source":"from getpass import getpass\n\nimport os\n\nGROQ_API_KEY = getpass()\n\nos.environ[\"GROQ_API_KEY\"] = GROQ_API_KEY"},{"cell_type":"code","execution_count":30,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:07:30.774357Z","iopub.status.busy":"2024-05-02T07:07:30.773770Z","iopub.status.idle":"2024-05-02T07:07:35.700618Z","shell.execute_reply":"2024-05-02T07:07:35.700002Z","shell.execute_reply.started":"2024-05-02T07:07:30.774327Z"},"language":"python","trusted":true},"outputs":[{"name":"stdin","output_type":"stream","text":" ········\n"}],"source":"OPENAI_API_KEY = getpass()\n\nos.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"},{"cell_type":"code","execution_count":21,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:07:01.938444Z","iopub.status.busy":"2024-05-02T07:07:01.938072Z","iopub.status.idle":"2024-05-02T07:07:02.500894Z","shell.execute_reply":"2024-05-02T07:07:02.500153Z","shell.execute_reply.started":"2024-05-02T07:07:01.938411Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain_groq import ChatGroq\nfrom langchain_core.prompts import ChatPromptTemplate"},{"attachments":{},"cell_type":"markdown","metadata":{"id":"m01XDoo4UQvN"},"source":"## Initialize GroqChat"},{"cell_type":"code","execution_count":55,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:13:12.578759Z","iopub.status.busy":"2024-05-02T07:13:12.578301Z","iopub.status.idle":"2024-05-02T07:13:12.616178Z","shell.execute_reply":"2024-05-02T07:13:12.615557Z","shell.execute_reply.started":"2024-05-02T07:13:12.578720Z"},"language":"python","trusted":true},"outputs":[],"source":"groq = ChatGroq(temperature=0, model_name=\"llama3-8b-8192\")"},{"cell_type":"code","execution_count":48,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:11:40.087810Z","iopub.status.busy":"2024-05-02T07:11:40.087468Z","iopub.status.idle":"2024-05-02T07:11:40.120633Z","shell.execute_reply":"2024-05-02T07:11:40.119896Z","shell.execute_reply.started":"2024-05-02T07:11:40.087785Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain_openai import ChatOpenAI\n\nopenai = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")"},{"cell_type":"markdown","metadata":{"language":"python"},"source":"# Now let's test groq!"},{"cell_type":"code","execution_count":66,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:14:56.734805Z","iopub.status.busy":"2024-05-02T07:14:56.734397Z","iopub.status.idle":"2024-05-02T07:14:57.739193Z","shell.execute_reply":"2024-05-02T07:14:57.738711Z","shell.execute_reply.started":"2024-05-02T07:14:56.734767Z"},"language":"python","trusted":true},"outputs":[{"data":{"text/plain":"AIMessage(content=\"Large Language Models (LLMs) have revolutionized the field of natural language processing (NLP) by enabling applications such as language translation, text summarization, and chatbots. However, traditional LLMs often suffer from high latency, which can be a significant limitation in many applications. Low latency LLMs, on the other hand, offer several advantages that make them crucial for various use cases. Here are some reasons why low latency LLMs are important:\\n\\n1. **Real-time applications**: In applications like chatbots, virtual assistants, and real-time language translation, low latency is essential to provide a seamless user experience. Low latency LLMs enable faster response times, reducing the delay between user input and the AI's response.\\n2. **Interactive systems**: Interactive systems like language-based games, quizzes, or educational platforms require low latency to ensure a smooth user experience. Low latency LLMs can process user input quickly, providing instant feedback and maintaining user engagement.\\n3. **Real-time analytics and insights**: In industries like finance, healthcare, or customer service, low latency LLMs can analyze large amounts of data in real-time, providing valuable insights and enabling data-driven decision-making.\\n4. **Edge computing and IoT**: With the increasing adoption of edge computing and IoT devices, low latency LLMs can process data locally, reducing latency and improving response times in applications like smart homes, industrial automation, or autonomous vehicles.\\n5. **Improved user experience**: Low latency LLMs can provide a more responsive and engaging user experience, reducing frustration and increasing user satisfaction in applications like language translation, text summarization, or chatbots.\\n6. **Competitive advantage**: In competitive industries like customer service, finance, or e-commerce, low latency LLMs can provide a competitive advantage by enabling faster response times, improved customer satisfaction, and increased conversions.\\n7. **Scalability and efficiency**: Low latency LLMs can be designed to scale more efficiently, reducing the computational resources required to process large amounts of data, which is particularly important in cloud-based or distributed computing environments.\\n8. **Enhanced security**: Low latency LLMs can be designed with enhanced security features, such as encryption and secure communication protocols, to protect sensitive data and prevent unauthorized access.\\n\\nIn summary, low latency LLMs are crucial for applications that require real-time processing, interactive systems, and real-time analytics. They offer a competitive advantage, improved user experience, and enhanced security, making them essential for various industries and use cases.\", response_metadata={'token_usage': {'completion_time': 0.613, 'completion_tokens': 508, 'prompt_time': 0.014, 'prompt_tokens': 32, 'queue_time': None, 'total_time': 0.627, 'total_tokens': 540}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_af05557ca2', 'finish_reason': 'stop', 'logprobs': None}, id='run-8a0eb815-113c-4981-a979-9a60c5745cdb-0')"},"execution_count":66,"metadata":{},"output_type":"execute_result"}],"source":"system = \"You are a helpful assistant.\"\nhuman = \"{text}\"\nprompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n\nchain = prompt | groq\nchain.invoke({\"text\": \"Explain the importance of low latency LLMs.\"})"},{"cell_type":"code","execution_count":null,"metadata":{"language":"python","trusted":true},"outputs":[],"source":"# OpenAI GPT 3.5 Turbo for comparison"},{"cell_type":"code","execution_count":65,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:14:47.035597Z","iopub.status.busy":"2024-05-02T07:14:47.034830Z","iopub.status.idle":"2024-05-02T07:14:52.763091Z","shell.execute_reply":"2024-05-02T07:14:52.762095Z","shell.execute_reply.started":"2024-05-02T07:14:47.035561Z"},"language":"python","trusted":true},"outputs":[{"data":{"text/plain":"AIMessage(content='Low latency LLMs, or Low-Latency Memory Modules, are important in high-performance computing environments where fast data access is critical. Here are some reasons why low latency LLMs are important:\\n\\n1. Reduced response time: Low latency LLMs provide faster access to data, reducing the time it takes for the CPU to retrieve information from memory. This results in quicker response times for applications, leading to improved overall system performance.\\n\\n2. Enhances system efficiency: By minimizing latency, low latency LLMs help in reducing data access bottlenecks and increasing the efficiency of data processing. This is particularly important in real-time applications where timely data retrieval is crucial.\\n\\n3. Increased throughput: Low latency LLMs can improve the overall throughput of a system by allowing data to be accessed and processed more quickly. This can be beneficial for applications that require high data transfer rates, such as in financial trading or scientific research.\\n\\n4. Better user experience: In consumer applications, low latency LLMs can improve the user experience by reducing loading times and improving the responsiveness of applications. This can lead to higher customer satisfaction and retention.\\n\\nOverall, low latency LLMs play a crucial role in enhancing system performance, improving data access speeds, and providing a better user experience in various computing environments.', response_metadata={'token_usage': {'completion_tokens': 259, 'prompt_tokens': 28, 'total_tokens': 287}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': 'fp_a450710239', 'finish_reason': 'stop', 'logprobs': None}, id='run-aa35104e-8007-4a7b-9e7e-9319397bfc1e-0')"},"execution_count":65,"metadata":{},"output_type":"execute_result"}],"source":"system = \"You are a helpful assistant.\"\nhuman = \"{text}\"\nprompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n\nchain = prompt | openai\nchain.invoke({\"text\": \"Explain the importance of low latency LLMs.\"})"},{"attachments":{},"cell_type":"markdown","metadata":{"execution":{"iopub.execute_input":"2024-04-26T08:44:11.362703Z","iopub.status.busy":"2024-04-26T08:44:11.362215Z","iopub.status.idle":"2024-04-26T08:44:11.364950Z","shell.execute_reply":"2024-04-26T08:44:11.364515Z","shell.execute_reply.started":"2024-04-26T08:44:11.362679Z"},"language":"python"},"source":"## RAG using SingleStoreDB"},{"cell_type":"code","execution_count":71,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:15:24.937182Z","iopub.status.busy":"2024-05-02T07:15:24.936798Z","iopub.status.idle":"2024-05-02T07:15:24.941820Z","shell.execute_reply":"2024-05-02T07:15:24.941153Z","shell.execute_reply.started":"2024-05-02T07:15:24.937156Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain.vectorstores import SingleStoreDB\nimport os\n\nfrom langchain_openai import OpenAIEmbeddings\n\nos.environ[\"SINGLESTOREDB_URL\"] = f'{connection_user}:{connection_password}@{connection_host}:{connection_port}/{connection_default_database}'"},{"cell_type":"code","execution_count":70,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:15:23.102499Z","iopub.status.busy":"2024-05-02T07:15:23.102071Z","iopub.status.idle":"2024-05-02T07:15:23.362855Z","shell.execute_reply":"2024-05-02T07:15:23.362094Z","shell.execute_reply.started":"2024-05-02T07:15:23.102460Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain.document_loaders import WebBaseLoader\n\nloader = WebBaseLoader(\"https://python.langchain.com/docs/integrations/chat/groq/\")\ndata = loader.load()"},{"cell_type":"code","execution_count":72,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:15:25.943232Z","iopub.status.busy":"2024-05-02T07:15:25.942826Z","iopub.status.idle":"2024-05-02T07:15:25.949098Z","shell.execute_reply":"2024-05-02T07:15:25.948374Z","shell.execute_reply.started":"2024-05-02T07:15:25.943198Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain.text_splitter import RecursiveCharacterTextSplitter\n\ntext_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)\nall_splits = text_splitter.split_documents(data)"},{"cell_type":"code","execution_count":74,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:15:33.693898Z","iopub.status.busy":"2024-05-02T07:15:33.693518Z","iopub.status.idle":"2024-05-02T07:15:36.589102Z","shell.execute_reply":"2024-05-02T07:15:36.588349Z","shell.execute_reply.started":"2024-05-02T07:15:33.693869Z"},"language":"python","trusted":true},"outputs":[],"source":"vectorstore=SingleStoreDB.from_documents(documents=all_splits, table_name=\"test9\", embedding=OpenAIEmbeddings())"},{"cell_type":"code","execution_count":75,"metadata":{"execution":{"iopub.execute_input":"2024-05-02T07:15:55.771195Z","iopub.status.busy":"2024-05-02T07:15:55.770797Z","iopub.status.idle":"2024-05-02T07:15:57.433926Z","shell.execute_reply":"2024-05-02T07:15:57.433268Z","shell.execute_reply.started":"2024-05-02T07:15:55.771161Z"},"language":"python","trusted":true},"outputs":[{"data":{"text/plain":"{'query': 'Please show a simple example of how to chat with Groq with Langchain in python.',\n 'result': 'Here is a simple example of how to chat with Groq using Langchain in Python:\\n\\n```\\nfrom langchain.groq import ChatGroq\\nfrom langchain.prompts import ChatPromptTemplate\\n\\n# Initialize the ChatGroq class\\nchat = ChatGroq(temperature=0, model_name=\"mixtral-8x7b-32768\")\\n\\n# Define the system message\\nsystem = \"You are a helpful assistant.\"\\n\\n# Define the human message\\nhuman = \"What is the definition of Groq?\"\\n\\n# Create a prompt template\\nprompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\\n\\n# Invoke ChatGroq to create completions\\nchain = prompt | chat.invoke({\"text\": \"Explain the definition of Groq.\"})\\n\\n# Print the response\\nprint(chain)\\n```\\n\\nThis code initializes the `ChatGroq` class with a temperature of 0 and a model name of \"mixtral-8x7b-32768\". It then defines a system message and a human message, creates a prompt template using these messages, and invokes the `ChatGroq` class to generate a response to the human message. Finally, it prints the response.'}"},"execution_count":75,"metadata":{},"output_type":"execute_result"}],"source":"from langchain.chains import RetrievalQA\n\nqa_chain = RetrievalQA.from_chain_type(llm,retriever=vectorstore.as_retriever())\nqa_chain({\"query\": \"Please show a simple example of how to chat with Groq with Langchain in python.\"})"}],"metadata":{"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.6"},"singlestore_cell_default_language":"python","singlestore_connection":{"connectionID":"6efce35a-2db5-4ae3-bc0b-c3d5810a45f4","defaultDatabase":"llama3_demo"}},"nbformat":4,"nbformat_minor":4}


--------------------------------------------------------------------------------
/Llama 3 Demo.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"attachments":{},"cell_type":"markdown","metadata":{"id":"0etRtS83RcWS"},"source":"# SingleStore and Llama 3 RAG Quickstart"},{"attachments":{},"cell_type":"markdown","metadata":{"id":"r1IzNLho-NqV"},"source":"This notebook provides an example of how to use SingleStore as a vector database in conjunction with Llama 3. We'll be using Replicate to host an instance of Llama 3 ov"},{"cell_type":"code","execution_count":133,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:23:26.745533Z","iopub.status.busy":"2024-04-30T16:23:26.745247Z","iopub.status.idle":"2024-04-30T16:23:29.724848Z","shell.execute_reply":"2024-04-30T16:23:29.724195Z","shell.execute_reply.started":"2024-04-30T16:23:26.745517Z"},"language":"python","trusted":true},"outputs":[],"source":"!pip install -q -U langchain singlestoredb langchain-openai --quiet"},{"cell_type":"code","execution_count":110,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:15:34.889051Z","iopub.status.busy":"2024-04-30T16:15:34.888805Z","iopub.status.idle":"2024-04-30T16:15:36.873714Z","shell.execute_reply":"2024-04-30T16:15:36.873113Z","shell.execute_reply.started":"2024-04-30T16:15:34.889035Z"},"language":"python","trusted":true},"outputs":[],"source":"!pip install replicate --quiet"},{"cell_type":"code","execution_count":125,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:20:57.102796Z","iopub.status.busy":"2024-04-30T16:20:57.102518Z","iopub.status.idle":"2024-04-30T16:21:15.655746Z","shell.execute_reply":"2024-04-30T16:21:15.655228Z","shell.execute_reply.started":"2024-04-30T16:20:57.102779Z"},"language":"python","trusted":true},"outputs":[{"name":"stdin","output_type":"stream","text":" ········\n"}],"source":"from getpass import getpass\n\nimport os\n\nREPLICATE_API_TOKEN = getpass()\n\nos.environ[\"REPLICATE_API_TOKEN\"] = REPLICATE_API_TOKEN"},{"cell_type":"code","execution_count":138,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:25:44.319879Z","iopub.status.busy":"2024-04-30T16:25:44.319607Z","iopub.status.idle":"2024-04-30T16:25:45.720634Z","shell.execute_reply":"2024-04-30T16:25:45.720135Z","shell.execute_reply.started":"2024-04-30T16:25:44.319863Z"},"language":"python","trusted":true},"outputs":[{"name":"stdin","output_type":"stream","text":" ········\n"}],"source":"OPENAI_API_KEY = getpass()\n\nos.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"},{"cell_type":"code","execution_count":126,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:21:22.269448Z","iopub.status.busy":"2024-04-30T16:21:22.269189Z","iopub.status.idle":"2024-04-30T16:21:22.272649Z","shell.execute_reply":"2024-04-30T16:21:22.272009Z","shell.execute_reply.started":"2024-04-30T16:21:22.269430Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain.chains import LLMChain\nfrom langchain_community.llms import Replicate\nfrom langchain_core.prompts import PromptTemplate"},{"attachments":{},"cell_type":"markdown","metadata":{"id":"m01XDoo4UQvN"},"source":"## Initialize Llama 3 in Replicate"},{"cell_type":"code","execution_count":131,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:23:00.001757Z","iopub.status.busy":"2024-04-30T16:23:00.001504Z","iopub.status.idle":"2024-04-30T16:23:08.353593Z","shell.execute_reply":"2024-04-30T16:23:08.353034Z","shell.execute_reply.started":"2024-04-30T16:23:00.001741Z"},"language":"python","trusted":true},"outputs":[{"data":{"text/plain":"'SingleStoreDB is a distributed relational database that is designed to handle large amounts of data and scale horizontally. It is optimized for real-time analytics and machine learning workloads.\\n\\nSingleStoreDB is built from the ground up to be a cloud-native database. It is designed to take advantage of cloud computing, including scalability, high availability, and cost-effectiveness.\\n\\nSome of the key features of SingleStoreDB include:\\n\\n* Scalability: SingleStoreDB can scale horizontally to handle large amounts of data and increasing workloads.\\n* High availability: SingleStoreDB is designed to provide high availability, ensuring that your database is always available and accessible.\\n*'"},"execution_count":131,"metadata":{},"output_type":"execute_result"}],"source":"llm = Replicate(\n    model=\"meta/meta-llama-3-8b-instruct\",\n    model_kwargs={\"temperature\": 0.75, \"max_length\": 500, \"top_p\": 1},\n)\nprompt = \"\"\"\nUser: What is SingleStoreDB?\nAssistant:\n\"\"\"\nllm(prompt)"},{"attachments":{},"cell_type":"markdown","metadata":{"execution":{"iopub.execute_input":"2024-04-26T08:44:11.362703Z","iopub.status.busy":"2024-04-26T08:44:11.362215Z","iopub.status.idle":"2024-04-26T08:44:11.364950Z","shell.execute_reply":"2024-04-26T08:44:11.364515Z","shell.execute_reply.started":"2024-04-26T08:44:11.362679Z"},"language":"python"},"source":"## RAG over audio files using SingleStoreDB\n\nNow we will embed the text descriptions of the audio file(s). This allows us to search and retrieve relevant files for RAG later."},{"cell_type":"code","execution_count":164,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:31:34.107933Z","iopub.status.busy":"2024-04-30T16:31:34.107351Z","iopub.status.idle":"2024-04-30T16:31:34.432853Z","shell.execute_reply":"2024-04-30T16:31:34.432358Z","shell.execute_reply.started":"2024-04-30T16:31:34.107910Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain.vectorstores import SingleStoreDB\nimport os\n\nfrom langchain_openai import OpenAIEmbeddings\n\nos.environ[\"SINGLESTOREDB_URL\"] = f'{connection_user}:{connection_password}@{connection_host}:{connection_port}/{connection_default_database}'"},{"cell_type":"code","execution_count":223,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:47:35.474206Z","iopub.status.busy":"2024-04-30T16:47:35.473942Z","iopub.status.idle":"2024-04-30T16:47:42.860963Z","shell.execute_reply":"2024-04-30T16:47:42.860466Z","shell.execute_reply.started":"2024-04-30T16:47:35.474190Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain.document_loaders import WebBaseLoader\n\nloader = WebBaseLoader(\"https://aws.amazon.com/blogs/aws/metas-llama-3-models-are-now-available-in-amazon-bedrock/\")\ndata = loader.load()"},{"cell_type":"code","execution_count":224,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:47:43.868724Z","iopub.status.busy":"2024-04-30T16:47:43.868477Z","iopub.status.idle":"2024-04-30T16:47:43.872090Z","shell.execute_reply":"2024-04-30T16:47:43.871572Z","shell.execute_reply.started":"2024-04-30T16:47:43.868710Z"},"language":"python","trusted":true},"outputs":[],"source":"from langchain.text_splitter import RecursiveCharacterTextSplitter\n\ntext_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)\nall_splits = text_splitter.split_documents(data)"},{"cell_type":"code","execution_count":225,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:47:45.382362Z","iopub.status.busy":"2024-04-30T16:47:45.382119Z","iopub.status.idle":"2024-04-30T16:47:48.153998Z","shell.execute_reply":"2024-04-30T16:47:48.153403Z","shell.execute_reply.started":"2024-04-30T16:47:45.382348Z"},"language":"python","trusted":true},"outputs":[],"source":"vectorstore=SingleStoreDB.from_documents(documents=all_splits, table_name=\"test6\", embedding=OpenAIEmbeddings())"},{"cell_type":"code","execution_count":227,"metadata":{"execution":{"iopub.execute_input":"2024-04-30T16:48:44.854397Z","iopub.status.busy":"2024-04-30T16:48:44.854136Z","iopub.status.idle":"2024-04-30T16:48:46.955494Z","shell.execute_reply":"2024-04-30T16:48:46.954897Z","shell.execute_reply.started":"2024-04-30T16:48:44.854376Z"},"language":"python","trusted":true},"outputs":[{"data":{"text/plain":"{'query': 'How do you use Llama 3 models in the AWS console?',\n 'result': 'To use Llama 3 models in the AWS console, you can follow these steps:\\n\\n1. Log in to the AWS Management Console.\\n2. Navigate to the Amazon SageMaker console.\\n3. Click on the \"Models\" tab.\\n4. Click on the Llama 3 model you want to use.\\n5. In the \"Model details\" page, scroll down to the \"Use in a SageMaker notebook\" section.\\n6. Click on the \"Use\" button.\\n7. Review the following steps from the previous response.\\n\\nYou can also use the Llama 3 model to generate a new Llama 3 model to'}"},"execution_count":227,"metadata":{},"output_type":"execute_result"}],"source":"from langchain.chains import RetrievalQA\n\nqa_chain = RetrievalQA.from_chain_type(llm,retriever=vectorstore.as_retriever())\nqa_chain({\"query\": \"How do you use Llama 3 models in the AWS console?\"})"}],"metadata":{"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.6"},"singlestore_cell_default_language":"python","singlestore_connection":{"connectionID":"6efce35a-2db5-4ae3-bc0b-c3d5810a45f4","defaultDatabase":"llama3_demo"}},"nbformat":4,"nbformat_minor":4}


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Webinar Code Examples
2 | 
3 | **Attention**: The code in this repository is intended for experimental use only and is not fully tested, documented, or supported by SingleStore. Visit the [SingleStore Forums](https://www.singlestore.com/forum/) to ask questions about this repository.
4 | 
5 | In this repository you'll find sub-folders for each webinar that we host that has follow along code examples.
6 | 


--------------------------------------------------------------------------------
/chat-with-plg-data/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | ../.DS_Store
3 | 


--------------------------------------------------------------------------------
/chat-with-plg-data/README.md:
--------------------------------------------------------------------------------
  1 | # ChatGPT for PLG: Talk with Your Salesforce or Segment Data
  2 | 
  3 | In today’s competitive landscape, Product Led Growth (PLG) is emerging as a crucial strategy for scaling your business. Central to PLG is the ability to deeply understand your data and user activity. Knowing what campaigns are working, and why, is essential for high-growth and successful products. Join us for an exclusive webinar where we will walk you through building a powerful application that allows you to interact with your Salesforce or Segment data using ChatGPT. This hands-on session, powered by Langchain and a vector database, will guide you on keeping your internal data private and secure. Learn how to seamlessly handle multi-model data in different formats from various sources, empowering your Marketing, Demand Gen, Product, and Engineering teams to make data-driven decisions with confidence.
  4 | 
  5 | ## What You'll Learn
  6 | 
  7 | - Hands-on Demo: Building an app using Langchain and a vector database
  8 | - Data Privacy & Security: Best practices for keeping your internal data private and secure
  9 | - Multi-Model Data Handling: Strategies for integrating and querying data in different formats from various sources
 10 | 
 11 | ## Featured Speakers
 12 | - [Madhukar Kumar](https://github.com/madhukarkumar/), Chief Developer Evangelist at SingleStore
 13 | - [Wes Kennedy](https://github.com/wesdottoday/), Principal Technical Evangelist at Singlestore
 14 | 
 15 | ## Instructions
 16 | 
 17 | Below you'll find the instructions you need to follow in order to deploy this yourself.
 18 | 
 19 | ### Common Steps
 20 | 
 21 | 1. Clone this repo onto a VPS or something which you can reverse proxy to safely
 22 |     ``` shell
 23 |     git clone git@github.com:singlestore-labs/webinar-code-examples.git
 24 |     cd chat-with-plg-data
 25 |     ```
 26 | 
 27 | ### SingleStore
 28 | 
 29 | 1. Sign up for SingleStoreDB using [this Cloud Trial link](https://bit.ly/chatgpt-for-plg-raffle) to receive $600 in credit!
 30 | 2. Accept the Terms of Use
 31 | 3. Create a New Workspace.
 32 |     - The workspaces allow you to choose your preferred Cloud Provider, Region, Version of SingleStoreDB
 33 |     - You can also set the auto-terminate feature, which will suspend your workspace when it has been determined inactive for a period of time you choose. (This helps save your credits!)
 34 | 4. Go to SQL Editor, under Develop and create your database:
 35 |     - Choose your workspace in the dropdown at the top
 36 |     - Run the following SQL to create your DB
 37 | 
 38 |     ``` sql
 39 |     CREATE DATABASE plg_data;
 40 |     ```
 41 |     - Click Run
 42 | 5. Create the Actions table
 43 |     - Choose your workspace and database in the dropdown at the top
 44 |     - Copy the contents of `create_table_actions.sql` and paste into SQL Editor
 45 |     - Click Run
 46 | 6. Create the Customers table
 47 |     - Choose your workspace and database in the dropdown at the top
 48 |     - Copy the contents of `create_table_customers.sql` and paste into SQL Editor
 49 |     - Click Run
 50 | 
 51 | ### Segment
 52 | 
 53 | 1. Create a Segment Account
 54 | 2. Create a Segment Source (Python), save the Access Key it gives you in a safe place
 55 | 3. Create a Segment Destination (Webhook), do not fill anything out right now, except choosing the source (which you defined in the last step)
 56 | 
 57 | ### Deploy Data Generator
 58 | 
 59 | 1. Create the `.env` file for the Data Generator
 60 |     ``` shell
 61 |     cd generator
 62 |     mv .env.sample .env
 63 |     ```
 64 | 2. Edit the `.env` to set the `SEGMENT_WRITE_KEY` to what you received from Segment when creating the Python Generator Source
 65 | 3. Build the docker container locally
 66 |     ``` shell
 67 |     docker build -t gpt4plg_generator:0.0.1 .
 68 |     ```
 69 | 4. Run the docker container to see if it works
 70 |     ``` shell
 71 |     docker run gpg4plg_generator:0.0.1 .
 72 |     ```
 73 |     - Validate that the service starts and services are listening from your host at Port 5000.
 74 |     > Note: SingleStore is not responsible for the security ramifications from testing this demo. When in doubt, please work with your security team to use a proper testing environment.
 75 |     - Log into Segment > Sources > Your Python Generator > Debug
 76 |     - You should see new events coming in to Segment
 77 | 
 78 | ### Deploy Webhook Server
 79 | 
 80 | 1. In a new terminal window, cd into the `webinar/chat-with-plg-data` folder.
 81 | 2. Create the `.env` file for the Webhook Server
 82 |     ``` shell
 83 |     cd webhook_server
 84 |     mv .env.sample .env
 85 |     ```
 86 | 3. Edit the `.env` file to set the variables to what you need
 87 | 4. Retrieve SingleStoreDB credentials
 88 |     - Click on your cloud group
 89 |         ![SingleStore Cloud Group](assets/s2-get-creds-1.png)
 90 |     - Click on your Workspace/Connect drop down, then choose "Connect Directly"
 91 |         ![SingleStore Connections](assets/s2-get-creds-2.png)
 92 |     - Get your Username and Host URL
 93 |         ![SingleStore Connect Directly](assets/s2-get-creds-3.png)
 94 |    > Note:
 95 |    >    - The user will almost always be `admin`, so set that aside
 96 |    >    - Following user, you'll see the connection URI (Hint: it'll begin with `svc...`), copy that and set aside
 97 |    - Take those two items `user` and `host` and then edit your `.env` file with those variables
 98 |    ``` shell
 99 |    vi .env # use whatever editor you're comfy with
100 |    ```
101 |    ``` shell
102 |    S2_HOST=svc...
103 |    S2_USER=admin
104 |    S2_PASS=
105 |    OPENAI_API_KEY=
106 |    ```
107 |    - Choose your Cloud Group again, Click Access, then set your admin password. Update your .env file to reflect.
108 |    ``` shell
109 |    S2_HOST=svc...
110 |    S2_USER=admin
111 |    S2_PASS=thisismypassword
112 |    OPENAI_API_KEY=
113 |    ```
114 | 5. Retrieve your OpenAI key
115 |     - Create an OpenAI account
116 |     - Click on your user, then click API Keys
117 |     - Generate an API key
118 |     - update `.env` to reflect this API key
119 |    ``` shell
120 |    S2_HOST=svc...
121 |    S2_USER=admin
122 |    S2_PASS=thisismypassword
123 |    OPENAI_API_KEY=thisismyapikey
124 |    ```
125 | 
126 | 6. Build the docker container
127 |     ``` shell
128 |     docker build -t gpg4plg_webhook:0.0.1 -t gpg4plg_webhook:latest .
129 |     ```
130 | 7. Deploy the docker container
131 | 
132 |     ``` shell
133 |     docker run -p 0:0:0:0:5000/:5000 gpg4plg_webhook:0.0.1 .
134 |     ```
135 | 
136 | ### Re-run the generator
137 | 
138 | At this point you should be able to re-run the generator container and see data begin populating in your database.


--------------------------------------------------------------------------------
/chat-with-plg-data/assets/s2-get-creds-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singlestore-labs/webinar-code-examples/70e860e51e3346d044134f187bd16bf4bfef614e/chat-with-plg-data/assets/s2-get-creds-1.png


--------------------------------------------------------------------------------
/chat-with-plg-data/assets/s2-get-creds-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singlestore-labs/webinar-code-examples/70e860e51e3346d044134f187bd16bf4bfef614e/chat-with-plg-data/assets/s2-get-creds-2.png


--------------------------------------------------------------------------------
/chat-with-plg-data/assets/s2-get-creds-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singlestore-labs/webinar-code-examples/70e860e51e3346d044134f187bd16bf4bfef614e/chat-with-plg-data/assets/s2-get-creds-3.png


--------------------------------------------------------------------------------
/chat-with-plg-data/chat_with_plg.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"markdown","id":"dd024475-4cdc-4fd7-aac7-4a4a81f1d1f6","metadata":{},"source":"# Have a conversation with your PLG data"},{"cell_type":"code","execution_count":null,"id":"f6fc04f7-3a35-429a-beca-5e707b139041","metadata":{"tags":[],"trusted":true},"outputs":[],"source":"!pip install openai --quiet\n!pip install langchain --quiet"},{"cell_type":"code","execution_count":null,"id":"a8c79220-ef1e-422c-bd27-f3d0e000594a","metadata":{"tags":[],"trusted":true},"outputs":[],"source":"import os\nimport getpass\nimport openai\nfrom sqlalchemy import *\n\ndb_conn = create_engine(connection_url)\n\nos.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"},{"cell_type":"markdown","id":"ad75e7e9-3327-4276-8107-36702ae9ea01","metadata":{},"source":"## Generate Embeddings Function"},{"cell_type":"code","execution_count":null,"id":"5f86c005-9345-4ee7-a34a-37999f34f5d1","metadata":{"tags":[],"trusted":true},"outputs":[],"source":"openai.api_key = os.getenv('OPENAI_API_KEY')\nmodel_id = 'text-embedding-ada-002'\n\ndef create_embedding(content,model_id):\n    try:\n        response = openai.Embedding.create(input=str(content),model=model_id)\n        embedding = response['data'][0]['embedding']\n        tokens = response['usage']['total_tokens']\n        status = 'success'\n        return embedding\n    except Exception as e:\n        print(e)\n        embedding = ''\n        tokens = 0\n        status = 'failed'\n        return embedding"},{"cell_type":"markdown","id":"5c364fe7-01d4-46b8-9058-8671fe1a4040","metadata":{},"source":"## Chat w/ your data"},{"cell_type":"code","execution_count":null,"id":"06c4cf01-4a2d-4c3e-b22b-7e4f2bfebe8e","metadata":{"tags":[],"trusted":true},"outputs":[],"source":"query = input(\"How can I help you?\")\n\nquery_embedding = create_embedding(query,model_id)\n\nsql_stmt = \"\"\"\n    SELECT\n        customers.user_id,\n        customers.first_name,\n        customers.last_name,\n        customers.email,\n        actions.event,\n        actions.seg_timestamp,\n        DOT_PRODUCT(JSON_ARRAY_PACK(%s), actions.embedding) AS similarity\n    FROM actions\n    INNER JOIN customers ON customers.user_id = actions.user_id\n    ORDER BY similarity DESC\n    LIMIT 150;\n\"\"\"\n\nresults = db_conn.execute(sql_stmt, str(query_embedding))\n\nr_list = []\nfor r in results:\n    r_dict = {}\n    r_dict['user_id'] = r['user_id']\n    r_dict['first_name'] = r['first_name']\n    r_dict['last_name'] = r['last_name']\n    r_dict['email'] = r['email']\n    r_dict['event'] = r['event']\n    r_dict['seg_timestamp'] = r['seg_timestamp']\n    r_dict['similarity'] = r['similarity']\n    r_list.append(r_dict)\n\ndata = str(r_list)\ndata = data[:9000]\n\nprompt = f\"The user asked: {query}. From the following context, please help answer the query: {data}\"\n\nresponse = openai.ChatCompletion.create(\n    model=\"gpt-3.5-turbo\",\n    messages=[\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant working for Super Duper ToDo who can help us sort out details about our customers, write emails about our award winning todo app, and who is excellent at understanding how to get users to re-engage with our application. You will always deliver your answers in plain language without referencing the raw data output. When refering to users, always use their first and last name, not their user_id.\"},\n        {\"role\": \"user\", \"content\": prompt}\n    ]\n)\n\nprint(response['choices'][0]['message']['content'])\n"},{"cell_type":"code","execution_count":null,"id":"91cee5ee-dce3-4aec-9967-a4cb2ab242f3","metadata":{},"outputs":[],"source":""}],"metadata":{"jupyterlab":{"notebooks":{"version_major":6,"version_minor":4}},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.9"},"singlestore_connection":{"connectionID":"7ef1b138-b97d-4dd8-8612-6876cdfdf589","defaultDatabase":"plg_data"},"singlestore_row_limit":300},"nbformat":4,"nbformat_minor":5}


--------------------------------------------------------------------------------
/chat-with-plg-data/create_table_actions.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS actions (
 2 |     id INTEGER PRIMARY KEY AUTO_INCREMENT,
 3 |     event VARCHAR(255),
 4 |     seg_message_id VARCHAR(255),
 5 |     seg_original_timestamp DATETIME,
 6 |     seg_received_at DATETIME,
 7 |     seg_sent_at DATETIME,
 8 |     seg_timestamp DATETIME,
 9 |     user_id VARCHAR(255),
10 |     embedding BLOB NOT NULL
11 | );


--------------------------------------------------------------------------------
/chat-with-plg-data/create_table_customers.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS customers (
 2 |     id INTEGER PRIMARY KEY AUTO_INCREMENT,
 3 |     created_at DATETIME,
 4 |     email VARCHAR(255),
 5 |     first_name VARCHAR(255),
 6 |     last_name VARCHAR(255),
 7 |     phone VARCHAR(255),
 8 |     user_id VARCHAR(255),
 9 |     seg_message_id VARCHAR(255),
10 |     seg_received_at DATETIME,
11 |     seg_sent_at DATETIME,
12 |     seg_timestamp DATETIME,
13 |     embedding BLOB NOT NULL
14 | );


--------------------------------------------------------------------------------
/chat-with-plg-data/generator/.env.sample:
--------------------------------------------------------------------------------
1 | SEGMENT_WRITE_KEY=


--------------------------------------------------------------------------------
/chat-with-plg-data/generator/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim@sha256:cc91315c3561d0b87d0525cb814d430cfbc70f10ca54577def184da80e87c1db
 2 | 
 3 | WORKDIR /usr/app
 4 | 
 5 | COPY requirements.txt .
 6 | 
 7 | RUN pip3 install -r requirements.txt
 8 | 
 9 | COPY .env ./.env
10 | 
11 | COPY . .
12 | 
13 | CMD [ "python", "app.py" ]


--------------------------------------------------------------------------------
/chat-with-plg-data/generator/app.py:
--------------------------------------------------------------------------------
  1 | import segment.analytics as analytics
  2 | from dotenv import load_dotenv
  3 | from faker import Faker
  4 | from datetime import datetime
  5 | import random
  6 | import os
  7 | import time
  8 | 
  9 | 
 10 | fake = Faker()
 11 | load_dotenv()
 12 | 
 13 | analytics.write_key = os.getenv('SEGMENT_WRITE_KEY')
 14 | 
 15 | customers = []
 16 | signup_actions = ['sign_up', 'log_in', 'confirm_email', 'completed_setup', 'added_payment_info', 'start_subscription', 'end_subscription']
 17 | app_actions = ['opened_app', 'viewed_dashboard', 'viewed_todo', 'add_todo', 'complete_todo', 'delete_todo', 'edit_todo']
 18 | 
 19 | # a function that uses faker to fake a user with first/last name, email, and phone number. also add a way to generate a 16 character alphanumeric id
 20 | 
 21 | def fake_user():
 22 |     return {
 23 |         "userId": fake.uuid4(),
 24 |         "firstName": fake.first_name(),
 25 |         "lastName": fake.last_name(),
 26 |         "email": fake.email(),
 27 |         "phone": fake.phone_number()
 28 |     }
 29 | 
 30 | # a function that appends a fake user to the customers list
 31 | 
 32 | def add_fake_user():
 33 |     customers.append(fake_user())
 34 | 
 35 | # a function that outputs a random number between 0 and 6
 36 | 
 37 | def random_action():
 38 |     return random.randint(0, 6)
 39 | 
 40 | # a function that selects the indexed item using an argument integer and the list name as argument
 41 | 
 42 | def select_action(index, list_name):
 43 |     return list_name[index]
 44 | 
 45 | # a function that records a user action using the segment analytics library. the function should take a user id, action name, and a dictionary of properties as arguments
 46 | 
 47 | def record_action(user_id, action_name, properties):
 48 |     analytics.track(user_id, action_name, properties)
 49 | 
 50 | # the signup_actions list contains all the actions that a user can take when signing up for the app and going through the setup steps. they should be considered successive and completed in order. end_subscription should be rare.
 51 | # a function that loops through the users in the customers list and creates records for each of the signup actions for each user. the users should not all complete all of the signup actions, but they should be completed in order.
 52 | # the function should output each user and their signup actions to the console in a dictionary format
 53 | 
 54 | def signup():
 55 |     for customer in customers:
 56 |         action_counter = 0
 57 |         action_count = random_action()
 58 |         analytics.identify(customer['userId'], {
 59 |             "firstName": customer['firstName'],
 60 |             "lastName": customer['lastName'],
 61 |             "email": customer['email'],
 62 |             "phone": customer['phone'],
 63 |             'created_at': '{}'.format(datetime.now())
 64 |         })
 65 |         time.sleep(3)
 66 |         customer['signup_actions'] = []
 67 |         while action_counter <= action_count:
 68 |             action = select_action(action_counter, signup_actions)
 69 |             record_action(customer['userId'], action, {
 70 |                 'time': datetime.now()
 71 |             })
 72 |             time.sleep(1.1)
 73 |             action_counter += 1
 74 |             customer['signup_actions'].append(action)
 75 |         # add a small delay
 76 |         time.sleep(1.1)
 77 |     print(customers)
 78 | 
 79 | # a function that loops through each customer in the customers list and then creates a random number of app actions for each user. Each app action should be recorded with a timestamp. Only generate app actions for users that have signed up and logged in.
 80 | 
 81 | def taking_actions(customer):
 82 |     if 'signup_actions' in customer and 'log_in' in customer['signup_actions']:
 83 |         action_counter = 0
 84 |         action_count = random_action()
 85 |         customer['app_actions'] = []
 86 |         while action_counter <= action_count:
 87 |             action = select_action(action_counter, app_actions)
 88 |             record_action(customer['userId'], action, {
 89 |                 'time': datetime.now()
 90 |             })
 91 |             customer['app_actions'].append(action)
 92 |             action_counter += 1
 93 |             # add a small delay
 94 |             time.sleep(random.randint(1, 9))
 95 | 
 96 | # a function that generates n number of fake users
 97 | 
 98 | def generate_users(n):
 99 |     for i in range(n):
100 |         add_fake_user()
101 | 
102 | def fake_actions_loop(customers):
103 |     while True:
104 |         for customer in customers:
105 |                 taking_actions(customer)
106 | 
107 | # a function that generates n number of fake users and then runs the signup and app actions functions
108 | 
109 | def generate_data(n):
110 |     generate_users(n)
111 |     signup()
112 |     fake_actions_loop(customers)
113 | 
114 | generate_data(100)


--------------------------------------------------------------------------------
/chat-with-plg-data/generator/requirements.txt:
--------------------------------------------------------------------------------
1 | Faker==19.1.0
2 | python-dotenv==1.0.0
3 | segment_analytics_python==2.2.3


--------------------------------------------------------------------------------
/chat-with-plg-data/webhook_server/.env.sample:
--------------------------------------------------------------------------------
1 | S2_HOST=
2 | S2_USER=
3 | S2_PASS=
4 | OPENAI_API_KEY=


--------------------------------------------------------------------------------
/chat-with-plg-data/webhook_server/.gitignore:
--------------------------------------------------------------------------------
1 | .env


--------------------------------------------------------------------------------
/chat-with-plg-data/webhook_server/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim@sha256:cc91315c3561d0b87d0525cb814d430cfbc70f10ca54577def184da80e87c1db
 2 | 
 3 | WORKDIR /usr/app
 4 | 
 5 | COPY requirements.txt .
 6 | 
 7 | RUN pip3 install -r requirements.txt
 8 | 
 9 | COPY .env ./.env
10 | 
11 | COPY . .
12 | 
13 | CMD [ "python", "app.py" ]


--------------------------------------------------------------------------------
/chat-with-plg-data/webhook_server/app.py:
--------------------------------------------------------------------------------
  1 | import uvicorn
  2 | from fastapi import FastAPI, Depends, Request
  3 | import logging
  4 | import sys
  5 | from sqlalchemy import *
  6 | from dotenv import load_dotenv
  7 | import os
  8 | from pydantic import BaseModel
  9 | from datetime import datetime
 10 | import openai
 11 | 
 12 | class Action(BaseModel):
 13 |     anonymousId: str = None
 14 |     channel: str
 15 |     context: dict
 16 |     event: str
 17 |     integrations: dict
 18 |     messageId: str
 19 |     originalTimestamp: str
 20 |     projectId: str
 21 |     properties: dict
 22 |     receivedAt: str
 23 |     sentAt: str
 24 |     timestamp: str
 25 |     type: str
 26 |     userId: str
 27 |     version: str
 28 |     writeKey: str
 29 | 
 30 | class Customer(BaseModel):
 31 |     anonymousId: str = None
 32 |     channel: str
 33 |     context: dict
 34 |     integrations: dict
 35 |     messageId: str
 36 |     originalTimestamp: str
 37 |     projectId: str
 38 |     receivedAt: str
 39 |     sentAt: str
 40 |     timestamp: str
 41 |     traits: dict
 42 |     type: str
 43 |     userId: str
 44 |     version: str
 45 |     writeKey: str
 46 | 
 47 | logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
 48 | 
 49 | logger = logging.getLogger(__name__)
 50 | logger.setLevel(logging.DEBUG)
 51 | 
 52 | load_dotenv()
 53 | s2_user = os.getenv('S2_USER')
 54 | s2_pass = os.getenv('S2_PASS')
 55 | s2_host = os.getenv('S2_HOST')
 56 | print(s2_host)
 57 | model_id = 'text-embedding-ada-002'
 58 | openai.api_key = os.getenv('OPENAI_API_KEY')
 59 | 
 60 | connection_url = "mysql://{}:{}@{}:3306/plg_data".format(s2_user,s2_pass,s2_host)
 61 | conn = create_engine(connection_url)
 62 | 
 63 | app = FastAPI()
 64 | 
 65 | def create_embeddings(content,model_id):
 66 |     try:
 67 |         response = openai.Embedding.create(input=str(content),model=model_id)
 68 | 
 69 |         embedding = response['data'][0]['embedding']
 70 |         tokens = response['usage']['total_tokens']
 71 |         status = 'success'
 72 |         return embedding,tokens,status
 73 |     except Exception as e:
 74 |         print(e)
 75 |         embedding = ''
 76 |         tokens = 0
 77 |         status = 'failed'
 78 |         return embedding,tokens,status
 79 | 
 80 | def format_timestamp(date_time_string):
 81 |     try:
 82 |         # Attempt to parse with 'Z' format
 83 |         parsed_datetime = datetime.strptime(date_time_string, "%Y-%m-%dT%H:%M:%S.%fZ")
 84 |     except ValueError:
 85 |         try:
 86 |             # If parsing with 'Z' format fails, attempt to parse with timezone offset
 87 |             parsed_datetime = datetime.fromisoformat(date_time_string)
 88 |         except ValueError:
 89 |             raise ValueError("Invalid date-time format")
 90 | 
 91 |     # Format the parsed datetime as a string in the required format for SQL
 92 |     formatted_datetime = parsed_datetime.strftime("%Y-%m-%d %H:%M:%S.%f")
 93 |     return formatted_datetime
 94 | 
 95 | def write_action_to_db(payload):
 96 |     event = payload['event']
 97 |     seg_message_id = payload['messageId']
 98 |     seg_original_timestamp = format_timestamp(payload['originalTimestamp'])
 99 |     seg_received_at = format_timestamp(payload['receivedAt'])
100 |     seg_sent_at = format_timestamp(payload['sentAt'])
101 |     seg_timestamp = format_timestamp(payload['timestamp'])
102 |     user_id = payload['userId']
103 | 
104 |     new_payload = {}
105 |     new_payload['event'] = event
106 |     new_payload['seg_message_id'] = seg_message_id
107 |     new_payload['seg_original_timestamp'] = seg_original_timestamp
108 |     new_payload['seg_received_at'] = seg_received_at
109 |     new_payload['seg_sent_at'] = seg_sent_at
110 |     new_payload['seg_timestamp'] = seg_timestamp
111 |     new_payload['user_id'] = user_id
112 | 
113 |     logger.info("Creating Embedding for {}".format(new_payload))
114 |     embedding,tokens,status = create_embeddings(new_payload,model_id)
115 |     logger.info("Writing to DB")
116 |     try:
117 |         with conn.connect() as connection:
118 |             connection.execute(text("INSERT INTO \
119 |                         actions (event, seg_message_id, seg_original_timestamp, \
120 |                         seg_received_at, seg_sent_at, seg_timestamp, user_id, embedding) \
121 |                         VALUES ('{}', '{}', '{}', '{}', '{}', '{}', '{}', JSON_ARRAY_PACK('{}'))".format(event, seg_message_id, seg_original_timestamp, seg_received_at, seg_sent_at, seg_timestamp, user_id, str(embedding))))
122 |             connection.commit()
123 |             return {"status": "successfully wrote action to db"}
124 |     except Exception as e:
125 |         logger.error(e)
126 | 
127 | def write_customer_to_db(payload):
128 | 
129 |     cust_created_timestamp = format_timestamp(payload['traits']['created_at'])
130 |     cust_email = payload['traits']['email']
131 |     cust_first_name = payload['traits']['firstName']
132 |     cust_last_name = payload['traits']['lastName']
133 |     cust_phone = payload['traits']['phone']
134 |     cust_user_id = payload['userId']
135 |     seg_message_id = payload['messageId']
136 |     seg_received_at = format_timestamp(payload['receivedAt'])
137 |     seg_sent_at = format_timestamp(payload['sentAt'])
138 |     seg_timestamp = format_timestamp(payload['timestamp'])
139 | 
140 |     new_payload = {}
141 |     new_payload['cust_created_timestamp'] = cust_created_timestamp
142 |     new_payload['cust_email'] = cust_email
143 |     new_payload['cust_first_name'] = cust_first_name
144 |     new_payload['cust_last_name'] = cust_last_name
145 |     new_payload['cust_phone'] = cust_phone
146 |     new_payload['cust_user_id'] = cust_user_id
147 |     new_payload['seg_message_id'] = seg_message_id
148 |     new_payload['seg_received_at'] = seg_received_at
149 |     new_payload['seg_sent_at'] = seg_sent_at
150 |     new_payload['seg_timestamp'] = seg_timestamp
151 | 
152 |     logger.info("Creating Embedding for {}".format(new_payload))
153 |     embedding,tokens,status = create_embeddings(new_payload,model_id)
154 |     logger.info("Writing customer to DB")
155 |     try:
156 |         with conn.connect() as connection:
157 |             connection.execute(text("INSERT INTO \
158 |                         customers (created_at, email, first_name, \
159 |                         last_name, phone, user_id, seg_message_id, \
160 |                         seg_received_at, seg_sent_at, seg_timestamp, embedding) \
161 |                         VALUES ('{}', '{}', '{}', '{}', '{}', '{}', \
162 |                     '{}', '{}', '{}', '{}', JSON_ARRAY_PACK('{}'))".format(cust_created_timestamp, cust_email, cust_first_name, cust_last_name, cust_phone, cust_user_id, seg_message_id, seg_received_at, seg_sent_at, seg_timestamp, str(embedding))))
163 |             connection.commit()
164 |             return {"status": "successfully wrote customer to db"}
165 |     except Exception as e:
166 |         logger.error(e)
167 | 
168 | @app.get("/")
169 | async def root():
170 |     return {"message": "Hello World"}
171 | 
172 | @app.post("/action")
173 | async def webhook(action: Action):
174 |     payload = action.dict()
175 |     logger.info("Writing to DB")
176 |     write_action_to_db(payload)
177 |     logger.info("Received action: {}/{}".format(action.event,action.userId))
178 |     return {"status": "ok"}
179 | 
180 | @app.post("/identify")
181 | async def identify(customer: Customer):
182 |     payload = customer.dict()
183 |     logger.info("Writing to DB")
184 |     write_customer_to_db(payload)
185 |     logger.info("Identified customer {} in database".format(customer.userId))
186 |     return {"status": "okay"}
187 | 
188 | @app.post("/test")
189 | async def get_body(request: Request):
190 |     logger.info(await request.body())
191 |     return {"status": "ok"}
192 | 
193 | if __name__ == "__main__":
194 |     logger.info('Starting Server')
195 |     uvicorn.run(app, host="0.0.0.0", port=5000)


--------------------------------------------------------------------------------
/chat-with-plg-data/webhook_server/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi==0.103.0
2 | pydantic==1.10.7
3 | python-dotenv==1.0.0
4 | SQLAlchemy==2.0.15
5 | uvicorn==0.23.2
6 | mysqlclient==2.1.1


--------------------------------------------------------------------------------
/kai-product-rec/README.md:
--------------------------------------------------------------------------------
 1 | # Using GPT for Product Recommendation Engines
 2 | 
 3 | Join us on June 27th for an exciting webinar titled "Using GPT for Product Recommendation Engines," featuring [@wesdottoday](https://github.com/wesdottoday) from SingleStore. In this webinar, we will explore how GPT (Generative Pre-trained Transformer) can revolutionize your product recommendation engines, providing personalized and relevant suggestions to your customers.
 4 | 
 5 | Don't miss this opportunity to gain valuable insights from [@wesdottoday](https://github.com/wesdottoday) and learn how to leverage GPT for powerful product recommendation engines. Register now to secure your spot for this informative webinar!
 6 | 
 7 | Make sure to sign up for a [Free Trial of SingleStoreDB](https://www.singlestore.com/cloud-trial/?utm_campaign=7014X000002eefLQAQ&utm_medium=webinar&utm_source=singlestore&utm_content=repo-link) today!
 8 | 
 9 | ## Discussion Topics
10 | 
11 | - Understand the power of GPT in transforming product recommendation engines and enhancing customer experiences.
12 | - Discover effective strategies for implementing GPT-based recommendation systems, including data preprocessing, model training, and evaluation techniques.
13 | - Learn how to leverage SingleStore's advanced capabilities to scale and optimize your GPT-powered recommendation engine.
14 | 


--------------------------------------------------------------------------------
/kai-product-rec/webinar-product-rec-kai.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"markdown","id":"d977a96a-791b-4e4f-9690-51b4d79112eb","metadata":{},"source":"# Using GPT for Product Recommendation Engines\n\nToday we will be using SingleStore Kai™ for MongoDB, along with OpenAI, to put together a simple product recommendation engine in Python."},{"cell_type":"markdown","id":"854151ef-f222-4d65-a07e-391e4aae7b7d","metadata":{},"source":"## API Keys\n\nBefore you get started, make sure you drop in your API key for OPENAI and your OPENAI ORG.\n\n- You can find your `OPENAI_API_KEY` [here](https://platform.openai.com/account/api-keys), if you don't have one, generate a project specific one.\n- You can find your `OPENAI_ORG` [here](https://platform.openai.com/account/org-settings) under `Organization ID`."},{"cell_type":"code","execution_count":null,"id":"db397f6b-15cf-451b-b52c-ba82e6d08764","metadata":{"tags":[],"trusted":true},"outputs":[],"source":"OPENAI_API_KEY = ''\nOPENAI_ORG = ''"},{"cell_type":"markdown","id":"4c6998dc-9a1c-452d-8bba-4b631ec189da","metadata":{},"source":"## Import Dataset\n\nFirst, we'll import a dataset into our database. Let's use [Open Library's Works](https://openlibrary.org/data/ol_dump_works_latest.txt.gz) dataset.\n\nFor the sake of brevity for the webinar, we've downloaded the extremely large dataset linked above and restricted it to just the Science Fiction novels using the following commands:"},{"cell_type":"code","execution_count":null,"id":"a2121dbe-2c4e-42d7-b02c-596a1de720db","metadata":{},"outputs":[],"source":"wget https://openlibrary.org/data/ol_dump_works_latest.txt.gz\ngunzip ol_dump_works_latest.txt\ncat ol_dump_works_latest.txt | grep -i \"science fiction\" | grep -i \"description\" | cut -f5 \u003e books_scifi.txt"},{"cell_type":"markdown","id":"1f5897b3-14c6-46fc-981e-ea07c99d2b00","metadata":{},"source":"#### Example Text Line"},{"cell_type":"code","execution_count":null,"id":"0ac45639-895c-47c7-9304-5d54b15e6899","metadata":{},"outputs":[],"source":"{\n    \"created\": {\n        \"type\": \"/type/datetime\",\n        \"value\": \"2009-12-11T01:41:04.053897\"\n    },\n    \"latest_revision\": 4,\n    \"description\": \"A science fiction suspense story. The setting starts on Earth, but then branches off to other realms with interesting\\r\\ncharacters.\",\n    \"key\": \"/works/OL9889262W\",\n    \"title\": \"Dark Paladin\",\n    \"authors\": [\n        {\n            \"type\": {\n                \"key\": \"/type/author_role\"\n            },\n            \"author\": {\n                \"key\": \"/authors/OL3871508A\"\n            }\n        }\n    ],\n    \"type\": {\n        \"key\": \"/type/work\"\n    },\n    \"last_modified\": {\n        \"type\": \"/type/datetime\",\n        \"value\": \"2012-07-11T20:19:14.469842\"\n    },\n    \"covers\": [\n        2940133\n    ],\n    \"revision\": 4\n}"},{"cell_type":"markdown","id":"5b35a875-8245-462f-b02e-e0a9f97c9ac4","metadata":{},"source":"### Download Dataset\n\nI have hosted the slimmed down dataset on the [GitHub Repo](https://github.com/singlestore-labs/webinar-code-examples/tree/main/kai-product-rec) for this webinar. We'll download it from there.\n\nUsing the `requests` library, we will download the txt file, iterate through it in chunks, writing those chunks to a local file.\n\nBelow, you'll see us setting the output of the `download_file()` function to the variable `local_dataset`."},{"cell_type":"code","execution_count":null,"id":"8485cfce-9287-4513-a30b-bae0d60df011","metadata":{"tags":[],"trusted":true},"outputs":[],"source":"import requests\n\ndataset_url = 'https://raw.githubusercontent.com/singlestore-labs/webinar-code-examples/main/kai-product-rec/books_scifi.txt'\n\ndef download_file(dataset_url):\n    local_filename = dataset_url.split('/')[-1]\n    with requests.get(dataset_url, stream=True) as r:\n        r.raise_for_status()\n        with open(local_filename, 'wb') as f:\n            for chunk in r.iter_content(chunk_size=8192): \n                # If you have chunk encoded response uncomment if\n                # and set chunk_size parameter to None.\n                #if chunk: \n                f.write(chunk)\n    return local_filename\n\nlocal_dataset = download_file(dataset_url)"},{"cell_type":"markdown","id":"08df354e-a408-4b83-955c-40eb75760d09","metadata":{},"source":"### Read text file into variable\n\nThe file created in the previous step will now be opened and read line-by-line into the variable `data`."},{"cell_type":"code","execution_count":null,"id":"a9583590-f3dc-4ec1-b325-4068177a9e02","metadata":{"tags":[],"trusted":true},"outputs":[],"source":"file = open(local_dataset)\ndata = file.readlines()\nfile.close()"},{"cell_type":"markdown","id":"5e764abb-eea8-486c-958a-7ae666aaad2d","metadata":{},"source":"### Track Token Usage\n\nQuerying OpenAI a bunch of times can get expensive, so you'll want to keep track of your token usage. In our case, I looked at the [Pricing Page](https://openai.com/pricing). specifically for the model that we will be using to create our vectors (`text-embedding-ada-002`).\n\nThe function below will take the dollar limit you set for the project and do the math to determine the max number of tokens you can expend, while the function (`budget_status()` will let us know throughout our project how we're doing on token usage."},{"cell_type":"code","execution_count":null,"id":"2594abfa-2268-4330-8b33-3fe1e3485c70","metadata":{"tags":[],"trusted":true},"outputs":[],"source":"cost_per_1k = 0.0001\ndollar_limit = 20.00\nbudget_tokens = (dollar_limit / cost_per_1k) * 1000\ntoken_usage = 0\n\ndef budget_status(token_usage):\n    if budget_tokens \u003e token_usage:\n        return 'ok'\n    else:\n        return 'spent'"},{"cell_type":"markdown","id":"48186a20-4e4a-44cd-8105-b82a9976cc1f","metadata":{"tags":[]},"source":"### Create SQL Table\n\nHere we are creating the table to store our books in. We will have an auto incrementing `_id` as the primary key, while storing the book title and the embedding generated from OpenAI."},{"cell_type":"code","execution_count":null,"id":"5908bfd7-e9bb-48f1-a1f0-a2b9d56896db","metadata":{"tags":[],"trusted":true},"outputs":[],"source":"%%sql\nCREATE TABLE IF NOT EXISTS products (\n  _id INT AUTO_INCREMENT PRIMARY KEY,\n  title VARCHAR(255) NOT NULL,\n  embedding BLOB NOT NULL\n);"},{"cell_type":"markdown","id":"2da984d0-6899-4150-b39a-1c246e386ece","metadata":{},"source":"### Create Embeddings and Load into S2\n\nThis is a fairly complex process, so we'll break everything out as best as we can here. Here are the high-level steps we need to cover:\n\n1. Install `openai` and import required libraries. Additionally set some default configuration settings and create the database connection.\n\n\u003e Note: `connection_url` is unique to SingleStore Notebooks, as it's a variable that contains the connection string for your databse.\n\n2. Create the functions needed to query OpenAI for the Embeddings and to Write to the Database.\n3. Loop through the books dataset (which is stored in `data`), calling for the creation of embedding, then store it to the database."},{"cell_type":"markdown","id":"fa2d683c-11b5-4559-8401-02bad5187cff","metadata":{"execution":{"iopub.execute_input":"2023-06-27T18:52:37.630667Z","iopub.status.busy":"2023-06-27T18:52:37.630183Z","iopub.status.idle":"2023-06-27T18:52:37.633477Z","shell.execute_reply":"2023-06-27T18:52:37.632756Z","shell.execute_reply.started":"2023-06-27T18:52:37.630644Z"},"tags":[]},"source":"#### Step 1: Install and import libraries, adjust configuration"},{"cell_type":"code","execution_count":null,"id":"3e90767b-12c9-4511-bd66-2831691b9a83","metadata":{"tags":[],"trusted":true},"outputs":[],"source":"!pip install openai\n\nimport openai\nimport ast\nfrom sqlalchemy import *\n\nopenai.organization = OPENAI_ORG\nopenai.api_key = OPENAI_API_KEY\n\nconn = create_engine(connection_url)\nupdate_interval = 500 # How often to update you in the terminal of status\n\nmodel_id = 'text-embedding-ada-002'"},{"cell_type":"markdown","id":"29ec57d8-0c07-48c2-9c44-f0eadef55bdb","metadata":{},"source":"#### Step 2: Create the functions to query OpenAI and Write to Database"},{"cell_type":"code","execution_count":null,"id":"72e4b31a-970f-40e1-a9ca-ad085dcf4a0e","metadata":{},"outputs":[],"source":"ds_with_embeddings = []\ntotal_items = len(data)\n\ndef request_embedding(text, token_usage):\n    \n    budget = budget_status(token_usage)\n    \n    if budget == 'ok':\n        #print('Budget status: OK\\nTokens: {}/{}'.format(token_usage,budget_tokens))\n        try:\n            if OPENAI_API_KEY:\n                response = openai.Embedding.create(input=text,model=model_id)\n                embedding = response['data'][0]['embedding']\n                tokens = response['usage']['total_tokens']\n                status = 'success'\n                #print(embedding)\n                return embedding,tokens,status\n            else:\n                print('You need to set your OpenAI API Key to the variable OPENAI_API_KEY')\n        except Exception as e:\n            print(e)\n            embedding = ''\n            tokens = 0\n            status = 'failed'\n            return embedding,tokens,status\n    else:\n        print('Budget Spent: {}/{}'.format(token_usage,budget_tokens))\n        embedding = ''\n        tokens = 0\n        status = 'budget_spent'\n        return embedding,tokens,status\n\ndef write_to_db(data):\n    keys = [\"title\", \"embedding\" ];\n    query = \"INSERT INTO products (title, embedding) VALUES (%s, JSON_ARRAY_PACK_F32(%s))\"\n    \n    try:\n        with conn:\n            conn.execute(query, (data[keys[0]].replace(\"'\",\"\"), str(data[keys[1]])))\n            print(\"Wrote item\")\n    except Exception as e:\n        print(e)"},{"cell_type":"markdown","id":"b3882bc5-16c9-401d-8237-9cf06238643f","metadata":{},"source":"#### Step 4: Loop through dataset requesting embedding, append embedding to dataset, write dataset to database"},{"cell_type":"code","execution_count":null,"id":"527542d7-9732-43f1-bf65-b0864de90450","metadata":{},"outputs":[],"source":"loop_counter = 0\nprint('Requesting embeddings. I will update you every {} embeddings.'.format(str(update_interval))\nfor b in data:\n    try:\n        embedding,tokens,status = request_embedding(b, token_usage)\n        if status != 'failed' and status != 'budget_spent':\n            book = ast.literal_eval(b)\n            book['embedding'] = embedding\n            write_to_db(book)\n            token_usage += tokens\n            #print('Completed {}/{}'.format(len(ds_with_embeddings),total_items))\n            loop_counter += 1\n            if loop_counter == update_interval:\n                print('Completed {}/{}'.format(len(ds_with_embeddings),total_items))\n                print('Token usage: {}/{}'.format(token_usage,budget_tokens))\n                loop_counter = 0\n        elif status == 'budget_spent':\n            print('Getting embedding failed because the budget is spent.')\n        else:\n            print('Getting embedding for this book failed:\\n{}'.format(b))\n    except Exception as e:\n        print(e)\n        \nconn.close()"},{"cell_type":"code","execution_count":null,"id":"b23b6746-8a17-4441-abd4-9ae6d7d8b648","metadata":{},"outputs":[],"source":"query = 'The Martian'\n\n\n\nsql_query = 'SELECT title FROM products WHERE EUCLIDEAN_DISTANCE(vector, JSON_ARRAY_PACK('query')) ;\n\nSELECT EUCLIDEAN_DISTANCE(vector, JSON_ARRAY_PACK('[5.9,3,5.1,1.8]')) AS euclidean_distance, title\nFROM products\nORDER BY euclidean_distance\nLIMIT 5;\n\n"},{"cell_type":"code","execution_count":null,"id":"1906aa2c-8929-4e70-b64b-6843359323ec","metadata":{},"outputs":[],"source":""}],"metadata":{"jupyterlab":{"notebooks":{"version_major":6,"version_minor":4}},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.9"},"singlestore_connection":{"connectionID":"00336d64-e4db-4635-aa2b-4ab9f330727a","defaultDatabase":"kai_product_rec"},"singlestore_row_limit":300},"nbformat":4,"nbformat_minor":5}


--------------------------------------------------------------------------------
/langchain-lift-off/README.md:
--------------------------------------------------------------------------------
 1 | # LangChain Lift-Off: Launching Open-Source Apps on Private Network
 2 | 
 3 | Get ready for "LangChain Lift-off: Launch Your Open Source GPT Apps Today". This is the future of AI, where internal data including documents, wikis, code, and meeting notes are at your AI's fingertips. Our ideal tool for the job, LangChain, can store vector data, perform semantic searches, and pull data from various sources without extensive ETL.
 4 | 
 5 | Join us on June 22nd for an exclusive webinar featuring Akmal Chaudhri. This event is perfect for developers, data engineers, and anyone interested in building intelligent GPT applications.
 6 | 
 7 | Don't miss this chance to learn from the expert and gain valuable insights on how to build a GPT App on an open-source stack using LangChain. Register now!
 8 | 
 9 | ## Discussion Topics
10 | 
11 | - Dive deep into building a GPT App using LangChain, with hands-on examples and live coding.
12 | - Uncover LangChain’s native support for efficient vector functions to power Generative AI with simple SQL queries.
13 | - Absorb practical techniques and strategies for building intelligent GPT applications.
14 | - Delve into the power of LangChain's scalable, distributed architecture and OpenAI's advanced machine learning models for GPT.
15 | 
16 | ## Free Trial!
17 | Don't forget to try this for yourself by signing up for a [free trial of SingleStoreDB](https://www.singlestore.com/cloud-trial/?utm_campaign=7014X000002edsdQAA&utm_medium=webinar&utm_source=singlestore&utm_content=webinar-github) today!
18 | 


--------------------------------------------------------------------------------
/llama-2-local/.gitignore:
--------------------------------------------------------------------------------
  1 | # GGML Models
  2 | models/*.bin
  3 | 
  4 | # PPT decks
  5 | *.pptx
  6 | 
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | share/python-wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .nox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | *.py,cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | cover/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | .pybuilder/
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | #   For a library or package, you might want to ignore these files since the code is
 93 | #   intended to run in multiple environments; otherwise, check them in:
 94 | # .python-version
 95 | 
 96 | # pipenv
 97 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 98 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 99 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | #   install all needed dependencies.
101 | #Pipfile.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/#use-with-ide
116 | .pdm.toml
117 | 
118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119 | __pypackages__/
120 | 
121 | # Celery stuff
122 | celerybeat-schedule
123 | celerybeat.pid
124 | 
125 | # SageMath parsed files
126 | *.sage.py
127 | 
128 | # Environments
129 | .env
130 | .venv
131 | env/
132 | venv/
133 | ENV/
134 | env.bak/
135 | venv.bak/
136 | 
137 | # Spyder project settings
138 | .spyderproject
139 | .spyproject
140 | 
141 | # Rope project settings
142 | .ropeproject
143 | 
144 | # mkdocs documentation
145 | /site
146 | 
147 | # mypy
148 | .mypy_cache/
149 | .dmypy.json
150 | dmypy.json
151 | 
152 | # Pyre type checker
153 | .pyre/
154 | 
155 | # pytype static type analyzer
156 | .pytype/
157 | 
158 | # Cython debug symbols
159 | cython_debug/
160 | 
161 | # PyCharm
162 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
165 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | #.idea/
167 | 


--------------------------------------------------------------------------------
/llama-2-local/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Kenneth Leung
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/llama-2-local/README.md:
--------------------------------------------------------------------------------
 1 | # Running Llama 2 Locally for Document Q&A
 2 | 
 3 | ## Quickstart
 4 | - Ensure you have downloaded the GGML binary file from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML and placed it into the `models/` folder
 5 | - run `pip install -r requirements.txt`
 6 | - To start parsing user queries into the application, launch the terminal from the project directory and run the following command:
 7 | `python main.py "<user query>" 2>/dev/null`
 8 | - For example, `python main.py "What is the minimum guarantee payable by Adidas?" 2>/dev/null`
 9 | 
10 | <br><br>
11 | 
12 | Reference: https://github.com/kennethleungty/Llama-2-Open-Source-LLM-CPU-Inference
13 | 
14 | 


--------------------------------------------------------------------------------
/llama-2-local/assets/diagram_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singlestore-labs/webinar-code-examples/70e860e51e3346d044134f187bd16bf4bfef614e/llama-2-local/assets/diagram_flow.png


--------------------------------------------------------------------------------
/llama-2-local/assets/qa_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singlestore-labs/webinar-code-examples/70e860e51e3346d044134f187bd16bf4bfef614e/llama-2-local/assets/qa_output.png


--------------------------------------------------------------------------------
/llama-2-local/config/config.yml:
--------------------------------------------------------------------------------
 1 | RETURN_SOURCE_DOCUMENTS: True
 2 | VECTOR_COUNT: 2
 3 | CHUNK_SIZE: 500
 4 | CHUNK_OVERLAP: 50
 5 | DATA_PATH: 'data/'
 6 | DB_FAISS_PATH: 'vectorstore/db_faiss'
 7 | # MODEL_TYPE: 'mpt'
 8 | # MODEL_BIN_PATH: 'models/mpt-7b-instruct.ggmlv3.q8_0.bin'
 9 | MODEL_TYPE: 'llama'
10 | MODEL_BIN_PATH: 'models/llama-2-7b-chat.ggmlv3.q8_0.bin'
11 | MAX_NEW_TOKENS: 256
12 | TEMPERATURE: 0.01


--------------------------------------------------------------------------------
/llama-2-local/data/manu-20f-2022-09-24.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singlestore-labs/webinar-code-examples/70e860e51e3346d044134f187bd16bf4bfef614e/llama-2-local/data/manu-20f-2022-09-24.pdf


--------------------------------------------------------------------------------
/llama-2-local/db_build.py:
--------------------------------------------------------------------------------
 1 | # =========================
 2 | #  Module: Vector DB Build
 3 | # =========================
 4 | import box
 5 | import yaml
 6 | from langchain.vectorstores import FAISS
 7 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 8 | from langchain.document_loaders import PyPDFLoader, DirectoryLoader
 9 | from langchain.embeddings import HuggingFaceEmbeddings
10 | 
11 | # Import config vars
12 | with open('config/config.yml', 'r', encoding='utf8') as ymlfile:
13 |     cfg = box.Box(yaml.safe_load(ymlfile))
14 | 
15 | 
16 | # Build vector database
17 | def run_db_build():
18 |     loader = DirectoryLoader(cfg.DATA_PATH,
19 |                              glob='*.pdf',
20 |                              loader_cls=PyPDFLoader)
21 |     documents = loader.load()
22 |     text_splitter = RecursiveCharacterTextSplitter(chunk_size=cfg.CHUNK_SIZE,
23 |                                                    chunk_overlap=cfg.CHUNK_OVERLAP)
24 |     texts = text_splitter.split_documents(documents)
25 | 
26 |     embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
27 |                                        model_kwargs={'device': 'cpu'})
28 | 
29 |     vectorstore = FAISS.from_documents(texts, embeddings)
30 |     vectorstore.save_local(cfg.DB_FAISS_PATH)
31 | 
32 | if __name__ == "__main__":
33 |     run_db_build()
34 | 


--------------------------------------------------------------------------------
/llama-2-local/main.py:
--------------------------------------------------------------------------------
 1 | from urllib import response
 2 | import box
 3 | import timeit
 4 | import yaml
 5 | import argparse
 6 | from dotenv import find_dotenv, load_dotenv
 7 | from src.utils import setup_dbqa
 8 | 
 9 | # Load environment variables from .env file
10 | load_dotenv(find_dotenv())
11 | 
12 | # Import config vars
13 | with open('config/config.yml', 'r', encoding='utf8') as ymlfile:
14 |     cfg = box.Box(yaml.safe_load(ymlfile))
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     parser = argparse.ArgumentParser()
19 |     parser.add_argument('input',
20 |                         type=str,
21 |                         default='How much is the minimum guarantee payable by adidas?',
22 |                         help='Enter the query to pass into the LLM')
23 |     args = parser.parse_args()
24 | 
25 |     # Setup DBQA
26 |     start = timeit.default_timer()
27 |     dbqa = setup_dbqa()
28 |     response = dbqa({'query': args.input})
29 |     end = timeit.default_timer()
30 | 
31 |     # Process source documents
32 |     source_docs = response['source_documents']
33 |     for i, doc in enumerate(source_docs):
34 |         print(f'\nSource Document {i+1}\n')
35 |         print(f'Source Text: {doc.page_content}')
36 |         print(f'Document Name: {doc.metadata["source"]}')
37 |         print(f'Page Number: {doc.metadata["page"]}\n')
38 |         print('='* 60)
39 |     
40 |     print(f'\nAnswer: {response["result"]}')
41 |     print('='*50)
42 | 
43 |     print(f"Time to retrieve response: {end - start}")
44 |     dbqa = setup_dbqa()
45 |     response = dbqa({'query': 'How much is the minimum guarantee payable by adidas?'})
46 | 


--------------------------------------------------------------------------------
/llama-2-local/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiofiles==23.1.0
  2 | aiohttp==3.8.5
  3 | aiosignal==1.3.1
  4 | anyio==3.7.1
  5 | appnope==0.1.3
  6 | asttokens==2.2.1
  7 | async-timeout==4.0.2
  8 | attrs==23.1.0
  9 | backcall==0.2.0
 10 | blinker==1.5
 11 | brotlipy==0.7.0
 12 | build==0.10.0
 13 | certifi==2023.5.7
 14 | cffi==1.15.1
 15 | charset-normalizer==2.0.4
 16 | click==8.1.3
 17 | comm==0.1.3
 18 | cryptography==39.0.1
 19 | ctransformers==0.2.5
 20 | dataclasses-json==0.5.13
 21 | debugpy==1.6.7
 22 | decorator==5.1.1
 23 | diskcache==5.6.1
 24 | executing==1.2.0
 25 | faiss-cpu==1.7.4
 26 | fastapi==0.100.0
 27 | filelock==3.12.2
 28 | frozenlist==1.4.0
 29 | fsspec==2023.6.0
 30 | h11==0.14.0
 31 | h2==4.1.0
 32 | hpack==4.0.0
 33 | huggingface-hub==0.16.4
 34 | hypercorn==0.14.3
 35 | hyperframe==6.0.1
 36 | idna==3.4
 37 | ipykernel==6.25.0
 38 | ipython==8.14.0
 39 | itsdangerous==2.1.2
 40 | jedi==0.18.2
 41 | Jinja2==3.1.2
 42 | joblib==1.3.1
 43 | jupyter_client==8.3.0
 44 | jupyter_core==5.3.1
 45 | langchain==0.0.236
 46 | langchainplus-sdk==0.0.20
 47 | llama-cpp-python==0.1.77
 48 | MarkupSafe==2.1.2
 49 | marshmallow==3.20.1
 50 | matplotlib-inline==0.1.6
 51 | mpmath==1.3.0
 52 | multidict==6.0.4
 53 | mypy-extensions==1.0.0
 54 | nest-asyncio==1.5.6
 55 | networkx==3.1
 56 | nltk==3.8.1
 57 | numexpr==2.8.4
 58 | numpy==1.25.1
 59 | openapi-schema-pydantic==1.2.4
 60 | packaging==23.1
 61 | parso==0.8.3
 62 | pexpect==4.8.0
 63 | pickleshare==0.7.5
 64 | Pillow==10.0.0
 65 | pip==23.0.1
 66 | platformdirs==3.9.1
 67 | priority==2.0.0
 68 | prompt-toolkit==3.0.39
 69 | psutil==5.9.5
 70 | ptyprocess==0.7.0
 71 | pure-eval==0.2.2
 72 | pycparser==2.21
 73 | pydantic==1.10.12
 74 | Pygments==2.15.1
 75 | PyJWT==2.8.0
 76 | PyMySQL==1.1.0
 77 | pyOpenSSL==23.0.0
 78 | pypdf==3.8.1
 79 | pyproject_hooks==1.0.0
 80 | PySocks==1.7.1
 81 | python-box==7.0.1
 82 | python-dateutil==2.8.2
 83 | python-dotenv==1.0.0
 84 | PyYAML==6.0.1
 85 | pyzmq==25.1.0
 86 | quart==0.18.4
 87 | quart-cors==0.6.0
 88 | regex==2023.6.3
 89 | requests==2.29.0
 90 | safetensors==0.3.1
 91 | scikit-learn==1.3.0
 92 | scipy==1.11.1
 93 | sentence-transformers==2.2.2
 94 | sentencepiece==0.1.99
 95 | setuptools==66.0.0
 96 | singlestoredb==0.8.1
 97 | six==1.16.0
 98 | sniffio==1.3.0
 99 | SQLAlchemy==2.0.19
100 | sqlparams==5.1.0
101 | stack-data==0.6.2
102 | starlette==0.27.0
103 | sympy==1.12
104 | tenacity==8.2.2
105 | threadpoolctl==3.2.0
106 | tokenizers==0.13.3
107 | toml==0.10.2
108 | tomlkit==0.12.1
109 | torch==2.0.1
110 | torchvision==0.15.2
111 | tornado==6.3.2
112 | tqdm==4.65.0
113 | traitlets==5.9.0
114 | transformers==4.31.0
115 | typing_extensions==4.7.1
116 | typing-inspect==0.9.0
117 | urllib3==1.26.15
118 | uvicorn==0.23.1
119 | wcwidth==0.2.6
120 | Werkzeug==2.3.4
121 | wheel==0.38.4
122 | wsproto==1.2.0
123 | yarl==1.9.2
124 | 


--------------------------------------------------------------------------------
/llama-2-local/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singlestore-labs/webinar-code-examples/70e860e51e3346d044134f187bd16bf4bfef614e/llama-2-local/src/__init__.py


--------------------------------------------------------------------------------
/llama-2-local/src/llm.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ===========================================
 3 |         Module: Open-source LLM Setup
 4 | ===========================================
 5 | '''
 6 | from langchain.llms import CTransformers, LlamaCpp
 7 | from dotenv import find_dotenv, load_dotenv
 8 | import box
 9 | import yaml
10 | 
11 | # Load environment variables from .env file
12 | load_dotenv(find_dotenv())
13 | 
14 | # Import config vars
15 | with open('config/config.yml', 'r', encoding='utf8') as ymlfile:
16 |     cfg = box.Box(yaml.safe_load(ymlfile))
17 | 
18 | 
19 | def build_llm():
20 |         n_gpu_layers = 1  # Metal set to 1 is enough.
21 |         n_batch = 1024  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
22 | 
23 |         # Make sure the model path is correct for your system!
24 |         llm = LlamaCpp(
25 |                 model_path="./models/llama-2-7b-chat.ggmlv3.q2_K.bin",
26 |                 n_gpu_layers=n_gpu_layers,
27 |                 n_batch=n_batch,
28 |                 f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
29 |                 verbose=False,
30 |         )
31 | 
32 |         return llm
33 | 


--------------------------------------------------------------------------------
/llama-2-local/src/prompts.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ===========================================
 3 |         Module: Prompts collection
 4 | ===========================================
 5 | '''
 6 | # Note: Precise formatting of spacing and indentation of the prompt template is important for Llama-2-7B-Chat,
 7 | # as it is highly sensitive to whitespace changes. For example, it could have problems generating
 8 | # a summary from the pieces of context if the spacing is not done correctly
 9 | 
10 | qa_template = """Use the following pieces of information to answer the user's question.
11 | If you don't know the answer, just say that you don't know, don't try to make up an answer.
12 | 
13 | Context: {context}
14 | Question: {question}
15 | 
16 | Only return the helpful answer below and nothing else.
17 | Helpful answer:
18 | """
19 | 


--------------------------------------------------------------------------------
/llama-2-local/src/utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ===========================================
 3 |         Module: Util functions
 4 | ===========================================
 5 | '''
 6 | import box
 7 | import yaml
 8 | import os
 9 | 
10 | from langchain import PromptTemplate
11 | from langchain.chains import RetrievalQA
12 | from langchain.embeddings import HuggingFaceEmbeddings
13 | from langchain.vectorstores import FAISS, SingleStoreDB
14 | from langchain.text_splitter import RecursiveCharacterTextSplitter
15 | from langchain.document_loaders import PyPDFLoader, DirectoryLoader
16 | from src.prompts import qa_template
17 | from src.llm import build_llm
18 | 
19 | # Import config vars
20 | with open('config/config.yml', 'r', encoding='utf8') as ymlfile:
21 |     cfg = box.Box(yaml.safe_load(ymlfile))
22 | 
23 | 
24 | def set_qa_prompt():
25 |     """
26 |     Prompt template for QA retrieval for each vectorstore
27 |     """
28 |     prompt = PromptTemplate(template=qa_template,
29 |                             input_variables=['context', 'question'])
30 |     return prompt
31 | 
32 | 
33 | def build_retrieval_qa(llm, prompt, vectordb):
34 |     dbqa = RetrievalQA.from_chain_type(llm=llm,
35 |                                        chain_type='stuff',
36 |                                        retriever=vectordb.as_retriever(search_kwargs={'k': cfg.VECTOR_COUNT}),
37 |                                        return_source_documents=cfg.RETURN_SOURCE_DOCUMENTS,
38 |                                        chain_type_kwargs={'prompt': prompt}
39 |                                        )
40 |     return dbqa
41 | 
42 | 
43 | def setup_dbqa():
44 |     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
45 |                                        model_kwargs={'device': 'cpu'})
46 |     
47 |     loader = DirectoryLoader(cfg.DATA_PATH,
48 |                              glob='*.pdf',
49 |                              loader_cls=PyPDFLoader)
50 |     documents = loader.load()
51 |     text_splitter = RecursiveCharacterTextSplitter(chunk_size=cfg.CHUNK_SIZE,
52 |                                                    chunk_overlap=cfg.CHUNK_OVERLAP)
53 |     texts = text_splitter.split_documents(documents)
54 | 
55 |     os.environ["SINGLESTOREDB_URL"] = "placeholder"
56 |     vectorstore = SingleStoreDB(embeddings, distance_strategy="DOT_PRODUCT", table_name="demo0")
57 | 
58 |     # if you want to build a new vectorstore, use the following line instead of the above
59 |     # vectorstore = SingleStoreDB.from_documents(texts, embeddings, distance_strategy="DOT_PRODUCT", table_name="demo0")
60 | 
61 |     llm = build_llm()
62 |     qa_prompt = set_qa_prompt()
63 |     dbqa = build_retrieval_qa(llm, qa_prompt, vectorstore)
64 | 
65 |     return dbqa
66 | 


--------------------------------------------------------------------------------
/mktg-email-flow/README.md:
--------------------------------------------------------------------------------
 1 | # Beyond Vectors: How to Build a ChatGPT Super App
 2 | 
 3 | Join us for this upcoming webinar exploring how sales and marketing technology has evolved over time, and what’s now possible in today’s era of LLMs and Generative AI.
 4 | 
 5 | From the age-old "spray and pray" approach to sophisticated personalization strategies driven by AI and vector databases, our Head of Solution Engineering Sarung Tripathi will present an overview of how versatile general-purpose databases (with vector capabilities) can enhance prospect engagement and conversion rates. Sarung will also showcase how multi-model capabilities (covering JSON, NoSQL, and other data types) within LLMs can offer stronger contextual insights from various data stores (website and app data, etc) for even more personalized communications.
 6 | 
 7 | The spotlight of the webinar, however, will be Sarung’s demo of a modern general-purpose database application that can use vector embeddings, structured data and unstructured data to generate mass personalization at scale in milliseconds. In this insightful session, you'll grasp the real-world applications and advantages of this technology, setting a new standard for real-time, data-driven decision making.
 8 | 
 9 | ## Discussion Topics
10 | 
11 | - Discover Generative AI and LLM capabilities for application development, focusing on user experience, engagement, and conversion rates optimization.
12 | - Understand how to harness vector embeddings for mass personalization at scale, delivering real-time, tailored experiences.
13 | - Navigate the critical considerations around privacy and security in AI-powered applications, ensuring data integrity and robustness.
14 | 
15 | Sign up for a [free trial](http://bit.ly/beyond-vectors-raffle) of SingleStoreDB with a $600 Credit today!
16 | 


--------------------------------------------------------------------------------
/mktg-email-flow/docs/add-tracking-to-website.md:
--------------------------------------------------------------------------------
 1 | # Add Tracking to Your Website
 2 | 
 3 | This page guides you through the process of adding the Awesome Web Analytics tracking code to your website. It provides code examples and instructions for popular platforms and frameworks like WordPress, Django, and Node.js. It also includes troubleshooting tips in case you encounter any issues during the implementation.
 4 | 
 5 | ## Getting Started
 6 | 
 7 | ### Add Tracking to Your Website
 8 | 
 9 | This page provides a step-by-step guide on how to add the Awesome Web Analytics tracking code to your website. By following these instructions, you will be able to start tracking valuable data and gaining insights into your website's performance.
10 | 
11 | #### Why Tracking is Important
12 | 
13 | Before we dive into the implementation process, let's briefly understand why tracking is crucial for your website. Awesome Web Analytics allows you to gather valuable data that can help you make informed decisions about your website's design, content, and marketing strategies. By tracking user behavior, you can identify areas of improvement, optimize conversion rates, and ultimately enhance the user experience.
14 | 
15 | #### Implementation Instructions
16 | 
17 | To get started with tracking your website using Awesome Web Analytics, follow the instructions below. We have provided code examples and instructions for popular platforms and frameworks like WordPress, Django, and Node.js to ensure compatibility with your chosen technology stack. 
18 | 
19 | 1. **WordPress:**
20 | 
21 |    If you are using WordPress, follow these steps to add the Awesome Web Analytics tracking code:
22 | 
23 |    - Login to your WordPress admin dashboard.
24 |    - Navigate to the "Appearance" section and click on "Editor."
25 |    - Locate your theme's header.php file and open it for editing.
26 |    - Find the closing head tag (`</head>`) and paste the Awesome Web Analytics tracking code just before it.
27 |    - Save the changes, and you're done! Awesome Web Analytics is now tracking your WordPress website.
28 | 
29 | 2. **Django:**
30 | 
31 |    For Django websites, the process of adding the Awesome Web Analytics tracking code is as follows:
32 | 
33 |    - Open your Django project's base.html file.
34 |    - Locate the closing head tag (`</head>`) and insert the Awesome Web Analytics tracking code just before it.
35 |    - Save the file, and you're all set! Awesome Web Analytics will now track your Django website.
36 | 
37 | 3. **Node.js:**
38 | 
39 |    If you're using Node.js as your backend framework, here's how you can add the Awesome Web Analytics tracking code:
40 | 
41 |    - Open your main server file (e.g., `server.js` or `app.js`).
42 |    - Require the Awesome Web Analytics module at the top of your file.
43 |    - Initialize the Awesome Web Analytics tracking code with your unique tracking ID.
44 |    - Add the middleware function to track incoming requests.
45 |    - Save the file, and your Node.js website is now being tracked by Awesome Web Analytics.
46 | 
47 | #### Troubleshooting Tips
48 | 
49 | If you encounter any issues during the implementation process or if the tracking code is not functioning correctly, refer to the troubleshooting tips below:
50 | 
51 | - Double-check that you have correctly copied and pasted the Awesome Web Analytics tracking code. Even a small typo or missing character can cause issues.
52 | - Ensure that the tracking code is placed within the `<head>` section of your HTML document.
53 | - Verify that you have a stable internet connection to ensure the tracking data is being sent to the Awesome Web Analytics servers.
54 | - Clear your browser cache and test the website again to rule out any caching-related problems.
55 | - If you're still experiencing difficulties, don't hesitate to reach out to our support team for further assistance. We're here to help!
56 | 
57 | Congratulations! You have successfully added the Awesome Web Analytics tracking code to your website. Now you can start analyzing valuable data and gaining insights into your website's performance using the powerful features of Awesome Web Analytics.
58 | 


--------------------------------------------------------------------------------
/mktg-email-flow/docs/conversion-tracking.md:
--------------------------------------------------------------------------------
 1 | # Conversion Tracking
 2 | This page provides information on how to set up conversion tracking with Awesome Web Analytics. It explains how to define and track conversions, such as sign-ups, purchases, or specific actions on your website. It covers the configuration steps in the dashboard and demonstrates how to interpret the conversion data to optimize your website's performance.
 3 | 
 4 | This page provides detailed information on how to set up and utilize the conversion tracking feature with Awesome Web Analytics. It will guide you through the process of defining and tracking conversions, such as sign-ups, purchases, or specific actions on your website. Additionally, it covers the configuration steps in the Awesome Web Analytics dashboard and provides insights on how to interpret the conversion data to optimize your website's performance.
 5 | 
 6 | ## Table of Contents
 7 | 
 8 | 1. Introduction
 9 | 2. Setting up Conversion Tracking
10 |    - Defining Conversions
11 |    - Tracking Conversions
12 | 3. Configuring Conversion Tracking in the Awesome Web Analytics Dashboard
13 |    - Step 1: Accessing the Conversion Tracking Settings
14 |    - Step 2: Defining Conversion Goals
15 |    - Step 3: Implementing Conversion Tracking Code
16 | 4. Interpreting Conversion Data
17 |    - Conversion Reports
18 |    - Analyzing Conversion Funnel
19 | 5. Optimizing Website Performance with Conversion Data
20 |    - A/B Testing
21 |    - Conversion Rate Optimization
22 | 
23 | ## 1. Introduction
24 | 
25 | Conversion tracking is a crucial feature in Awesome Web Analytics that enables you to measure and analyze the success of your website's goals and objectives. By accurately tracking conversions, you can gain valuable insights into user behavior, identify areas for improvement, and make data-driven decisions to enhance your website's performance.
26 | 
27 | This documentation will walk you through the step-by-step process of setting up conversion tracking, configuring the necessary settings in the Awesome Web Analytics dashboard, interpreting conversion data, and utilizing it to optimize your website's performance.
28 | 
29 | ## 2. Setting up Conversion Tracking
30 | 
31 | ### Defining Conversions
32 | 
33 | Before you begin tracking conversions, it is essential to define what actions or events on your website you consider as conversions. Common examples of conversions include sign-ups, purchases, form submissions, or any specific action that indicates a desired user behavior. In this section, we will guide you on how to define the conversions that align with your business goals.
34 | 
35 | ### Tracking Conversions
36 | 
37 | Once you have defined your conversions, Awesome Web Analytics provides you with an easy-to-implement tracking mechanism. By integrating a small snippet of code into the relevant pages or events on your website, Awesome Web Analytics will be able to accurately track when these conversions occur. This section will guide you on how to track conversions effectively using Awesome Web Analytics.
38 | 
39 | ## 3. Configuring Conversion Tracking in the Awesome Web Analytics Dashboard
40 | 
41 | To start utilizing conversion tracking in Awesome Web Analytics, you need to configure the relevant settings in your Awesome Web Analytics dashboard. The following steps will guide you through the configuration process:
42 | 
43 | ### Step 1: Accessing the Conversion Tracking Settings
44 | 
45 | Login to your Awesome Web Analytics account and navigate to the "Settings" section in the dashboard. From there, locate the "Conversion Tracking" option and click on it to access the conversion tracking settings.
46 | 
47 | ### Step 2: Defining Conversion Goals
48 | 
49 | In this step, you will define the specific conversion goals you want to track. Enter a descriptive name for each goal and specify the relevant details, such as the URL or event associated with the conversion. Awesome Web Analytics allows you to track multiple conversion goals simultaneously.
50 | 
51 | ### Step 3: Implementing Conversion Tracking Code
52 | 
53 | Awesome Web Analytics provides you with a unique tracking code snippet for each conversion goal you define. Copy the corresponding code snippet and paste it into the relevant pages or events on your website. This code will enable Awesome Web Analytics to track the occurrence of the defined conversions accurately.
54 | 
55 | ## 4. Interpreting Conversion Data
56 | 
57 | After setting up conversion tracking and collecting data, Awesome Web Analytics offers various reports and tools to help you interpret and analyze your conversion data effectively. This section will cover:
58 | 
59 | ### Conversion Reports
60 | 
61 | Awesome Web Analytics generates comprehensive reports that provide an overview of your conversion data, including the number of conversions, conversion rates, and trends over time. These reports will give you a clear understanding of how your website is performing in terms of achieving your defined conversion goals.
62 | 
63 | ### Analyzing Conversion Funnel
64 | 
65 | Understanding the conversion funnel is crucial for identifying potential areas of improvement and optimizing your website. Awesome Web Analytics's conversion funnel analysis allows you to visualize the steps users take before completing a conversion, enabling you to pinpoint any bottlenecks or drop-off points in the conversion process.
66 | 
67 | ## 5. Optimizing Website Performance with Conversion Data
68 | 
69 | Conversion data can be a powerful tool for optimizing your website's performance and achieving better results. This section will explore two key strategies for utilizing conversion data effectively:
70 | 
71 | ### A/B Testing
72 | 
73 | Awesome Web Analytics's A/B testing feature allows you to compare different versions of your website or specific elements to determine which version generates more conversions. By experimenting with different variations and analyzing the conversion data, you can make informed decisions about the most effective design, content, or user experience for your target audience.
74 | 
75 | ### Conversion Rate Optimization
76 | 
77 | Awesome Web Analytics provides insights and recommendations for optimizing your website's conversion rate. By analyzing user behavior, identifying potential barriers, and implementing best practices, you can make data-driven improvements to your website that result in higher conversion rates.
78 | 
79 | ## Conclusion
80 | 
81 | By following the steps outlined in this documentation, you will be able to effectively set up and utilize conversion tracking with Awesome Web Analytics. Tracking conversions, interpreting the data, and leveraging it for optimization will empower you to make informed decisions and continuously improve the performance of your website.
82 | 


--------------------------------------------------------------------------------
/mktg-email-flow/docs/custom-event-tracking.md:
--------------------------------------------------------------------------------
 1 | # Custom Event Tracking
 2 | 
 3 | This documentation explains how to track custom events with Awesome Web Analytics. It covers the process of defining and implementing custom events to track specific user interactions on your website, such as button clicks, form submissions, or video plays. It provides code examples and explains how to analyze the event data in the dashboard to gain insights into user behavior.
 4 | 
 5 | ## Introduction
 6 | 
 7 | This documentation provides a comprehensive guide on how to effectively track custom events using Awesome Web Analytics. Custom event tracking allows you to monitor and analyze specific user interactions on your website, such as button clicks, form submissions, or video plays. By implementing custom event tracking, you can gain valuable insights into user behavior and make data-driven decisions to improve your website's performance.
 8 | 
 9 | ## Table of Contents
10 | 
11 | - [Getting Started](#getting-started)
12 | - [Defining Custom Events](#defining-custom-events)
13 | - [Implementing Custom Event Tracking](#implementing-custom-event-tracking)
14 | - [Analyzing Event Data](#analyzing-event-data)
15 | 
16 | ## Getting Started
17 | 
18 | Before diving into custom event tracking, ensure that you have successfully integrated Awesome Web Analytics into your website. If you haven't done so yet, refer to our [Integration Guide](permalink:integration-guide) for step-by-step instructions.
19 | 
20 | ## Defining Custom Events
21 | 
22 | Custom events provide a way to track specific user interactions that are important to your business. These events can be anything from button clicks, form submissions, to video plays. Defining custom events helps you focus on capturing the data that matters most to your website's performance.
23 | 
24 | To define custom events with Awesome Web Analytics, follow these steps:
25 | 
26 | 1. Log in to your Awesome Web Analytics dashboard.
27 | 2. Navigate to the **Events** section.
28 | 3. Click on the **Custom Events** tab.
29 | 4. Click on the **Add Custom Event** button.
30 | 5. Provide a name and description for your custom event.
31 | 6. Select the appropriate event category (e.g., button click, form submission).
32 | 7. Define any additional event properties that you wish to capture.
33 | 8. Save your custom event definition.
34 | 
35 | ## Implementing Custom Event Tracking
36 | 
37 | Once you have defined your custom events, it's time to implement event tracking on your website. Awesome Web Analytics provides an easy-to-use JavaScript library that allows you to track events seamlessly.
38 | 
39 | To implement custom event tracking, follow these steps:
40 | 
41 | 1. Include the Awesome Web Analytics JavaScript library in your website's HTML code.
42 |    ```html
43 |    <script src="Awesome Web Analytics.js"></script>
44 |    ```
45 | 
46 | 2. Identify the user interactions you want to track and use the following code to send custom event data to Awesome Web Analytics:
47 |    ```javascript
48 |    Awesome Web Analytics.trackEvent('custom_event_name', { 
49 |      property1: 'value1',
50 |      property2: 'value2',
51 |      // Add any additional properties as needed
52 |    });
53 |    ```
54 | 
55 | 3. Replace `'custom_event_name'` with the name of the custom event you defined in the Awesome Web Analytics dashboard.
56 | 4. Customize the `property1`, `value1`, `property2`, `value2` placeholders with the relevant properties and values of your custom event.
57 | 5. Repeat this code snippet for each user interaction you want to track.
58 | 
59 | ## Analyzing Event Data
60 | 
61 | Once you have successfully implemented custom event tracking, you can analyze the event data in your Awesome Web Analytics dashboard. This analysis will provide valuable insights into user behavior and help you make informed decisions to optimize your website's performance.
62 | 
63 | To analyze event data, follow these steps:
64 | 
65 | 1. Log in to your Awesome Web Analytics dashboard.
66 | 2. Navigate to the **Events** section.
67 | 3. Click on the **Custom Events** tab.
68 | 4. Select the custom event you want to analyze.
69 | 5. Explore the available metrics, such as event count, conversion rate, and average duration.
70 | 6. Utilize filters and segmentation options to narrow down your analysis.
71 | 7. Generate reports or visualize the data using charts and graphs.
72 | 8. Draw conclusions and identify areas for improvement based on the insights gained.
73 | 
74 | ## Conclusion
75 | 
76 | Custom event tracking with Awesome Web Analytics empowers you to better understand user behavior on your website. By defining and implementing custom events, you can gather valuable data and gain insights that drive performance improvements. Use this documentation as a guide to effectively track custom events and make data-driven decisions to enhance your website's user experience.
77 | 


--------------------------------------------------------------------------------
/mktg-email-flow/docs/dashboard-customization.md:
--------------------------------------------------------------------------------
  1 | # Dashboard Customization
  2 | This documentation explains how to customize the Awesome Web Analytics dashboard to suit your preferences and needs. It covers options like rearranging widgets, creating custom dashboards, and setting up personalized reports. It also provides tips on utilizing the available widgets and visualizations to create insightful and tailored analytics views.
  3 | 
  4 | ## Overview
  5 | This documentation provides a comprehensive guide on how to customize the Awesome Web Analytics dashboard to suit your preferences and needs. Whether you want to rearrange widgets, create custom dashboards, or set up personalized reports, this guide will walk you through the process step-by-step. Additionally, it offers valuable tips on leveraging the available widgets and visualizations to create insightful and tailored analytics views.
  6 | 
  7 | ## Table of Contents
  8 | 1. Introduction
  9 | 2. Getting Started
 10 |    - Accessing the Dashboard Customization Options
 11 | 3. Rearranging Widgets
 12 |    - Drag and Drop Method
 13 |    - Widget Menu Method
 14 | 4. Creating Custom Dashboards
 15 |    - Adding New Dashboards
 16 |    - Modifying Dashboard Layout
 17 |    - Setting Dashboard Permissions
 18 | 5. Personalized Reports
 19 |    - Creating a Report Template
 20 |    - Customizing Report Content
 21 |    - Scheduling and Sharing Reports
 22 | 6. Utilizing Widgets and Visualizations
 23 |    - Available Widgets and Visualizations
 24 |    - Best Practices for Widget Selection
 25 |    - Advanced Widget Configuration
 26 | 
 27 | ## 1. Introduction
 28 | The Awesome Web Analytics Dashboard Customization feature empowers you to tailor your analytics experience according to your unique requirements. By customizing the dashboard, you can gain deeper insights into your website's performance, track specific metrics, and monitor key goals effortlessly.
 29 | 
 30 | In this guide, you will learn how to:
 31 | 
 32 | - Rearrange widgets to prioritize the most important information.
 33 | - Create custom dashboards to organize and visualize data that is relevant to specific teams or projects.
 34 | - Set up personalized reports that deliver timely and meaningful insights to stakeholders.
 35 | - Utilize the available widgets and visualizations effectively to create impactful analytics views.
 36 | 
 37 | ## 2. Getting Started
 38 | To access the Dashboard Customization options, follow these steps:
 39 | 
 40 | 1. Log in to your Awesome Web Analytics account.
 41 | 2. Navigate to the main dashboard.
 42 | 3. Locate the "Customize" or "Settings" button in the top-right corner of the screen.
 43 | 4. Click on the button to access the customization menu.
 44 | 
 45 | ## 3. Rearranging Widgets
 46 | Customizing the widget arrangement allows you to prioritize the information that matters most to you. Awesome Web Analytics offers two methods for rearranging widgets: the Drag and Drop method and the Widget Menu method.
 47 | 
 48 | ### 3.1 Drag and Drop Method
 49 | To rearrange widgets using the Drag and Drop method, follow these steps:
 50 | 
 51 | 1. Access the customization menu as described in the previous section.
 52 | 2. Click and hold on the widget you want to move.
 53 | 3. Drag the widget to the desired location on the dashboard.
 54 | 4. Release the mouse button to drop the widget into its new position.
 55 | 
 56 | ### 3.2 Widget Menu Method
 57 | The Widget Menu method provides an alternative way to rearrange widgets. To use this method, follow these steps:
 58 | 
 59 | 1. Access the customization menu.
 60 | 2. Locate the widget you want to move.
 61 | 3. Click on the widget's menu icon (typically represented by three dots or a gear icon).
 62 | 4. From the menu options, select "Move" or "Rearrange."
 63 | 5. Choose the new position for the widget from the available options.
 64 | 
 65 | ## 4. Creating Custom Dashboards
 66 | Custom dashboards allow you to focus on specific metrics or areas of your website's performance. You can create multiple dashboards tailored to different teams, projects, or campaigns. Follow the steps below to create and modify custom dashboards.
 67 | 
 68 | ### 4.1 Adding New Dashboards
 69 | To add a new custom dashboard, follow these steps:
 70 | 
 71 | 1. Access the customization menu.
 72 | 2. Click on the "Add Dashboard" or "New Dashboard" button.
 73 | 3. Provide a name for the new dashboard.
 74 | 4. Optionally, select a layout template that suits your needs.
 75 | 5. Click "Save" to create the new dashboard.
 76 | 
 77 | ### 4.2 Modifying Dashboard Layout
 78 | Once you have created a custom dashboard, you can modify its layout to display the relevant widgets.
 79 | 
 80 | 1. Access the customization menu.
 81 | 2. Select the custom dashboard you want to modify.
 82 | 3. Click on the "Edit Layout" or "Customize Layout" option.
 83 | 4. Drag and drop widgets onto the dashboard from the available options.
 84 | 5. Rearrange the widgets as desired.
 85 | 6. Click "Save" to apply the changes.
 86 | 
 87 | ### 4.3 Setting Dashboard Permissions
 88 | Awesome Web Analytics allows you to control dashboard access and permissions. You can determine who can view and edit specific dashboards.
 89 | 
 90 | 1. Access the customization menu.
 91 | 2. Select the custom dashboard for which you want to set permissions.
 92 | 3. Click on the "Permissions" or "Access Control" option.
 93 | 4. Specify the users or teams who should have access to the dashboard.
 94 | 5. Define their permission levels (view-only, edit, etc.).
 95 | 6. Save the changes to apply the permissions.
 96 | 
 97 | ## 5. Personalized Reports
 98 | Awesome Web Analytics enables you to generate personalized reports that deliver valuable insights to stakeholders. You can customize report content, schedule automated deliveries, and share reports with relevant team members.
 99 | 
100 | ### 5.1 Creating a Report Template
101 | To create a report template, follow these steps:
102 | 
103 | 1. Access the customization menu.
104 | 2. Select the "Reports" or "Report Templates" option.
105 | 3. Click "Create New Template" or "Add Template."
106 | 4. Define the report's title, scope, and frequency.
107 | 5. Select the desired widgets and visualizations to include in the report.
108 | 6. Customize the report layout and branding.
109 | 7. Save the template for future use.
110 | 
111 | ### 5.2 Customizing Report Content
112 | After creating a report template, you can customize its content to focus on specific metrics or insights.
113 | 
114 | 1. Access the customization menu.
115 | 2. Open the report template you want to modify.
116 | 3. Click on the "Edit Content" or "Customize Content" option.
117 | 4. Add or remove widgets and visualizations as needed.
118 | 5. Configure widget settings, filters, and time ranges.
119 | 6. Save the changes to update the report content.
120 | 
121 | ### 5.3 Scheduling and Sharing Reports
122 | Once you have customized a report template, you can schedule its automated delivery and share it with relevant stakeholders.
123 | 
124 | 1. Access the customization menu.
125 | 2. Open the report template you want to schedule and share.
126 | 3. Click on the "Schedule" or "Delivery Options" option.
127 | 4. Define the recipients and delivery frequency.
128 | 5. Set the preferred report format (PDF, email, etc.).
129 | 6. Save the settings to schedule and share the report.
130 | 
131 | ## 6. Utilizing Widgets and Visualizations
132 | Awesome Web Analytics offers a wide range of widgets and visualizations to help you gain comprehensive insights into your website's performance. This section provides an overview of the available options and best practices for widget selection.
133 | 
134 | ### 6.1 Available Widgets and Visualizations
135 | Awesome Web Analytics provides the following widgets and visualizations:
136 | 
137 | - Line Charts
138 | - Bar Charts
139 | - Pie Charts
140 | - Funnel Charts
141 | - Heatmaps
142 | - Tables
143 | - Metrics Cards
144 | - Conversion Tracking Widgets
145 | 
146 | ### 6.2 Best Practices for Widget Selection
147 | To maximize the effectiveness of your custom dashboards and reports, consider the following best practices when selecting widgets:
148 | 
149 | - Choose widgets that align with your specific goals and metrics.
150 | - Prioritize widgets that provide real-time or near-real-time data.
151 | - Use a mix of different visualizations to present data from various perspectives.
152 | - Experiment with different widgets to find the most informative and visually appealing combination.
153 | 
154 | ### 6.3 Advanced Widget Configuration
155 | Awesome Web Analytics allows for advanced widget configuration, enabling you to fine-tune the displayed data, set filters, and customize the widget appearance.
156 | 
157 | 1. Access the customization menu.
158 | 2. Select the widget you want to configure.
159 | 3. Click on the widget's menu icon (three dots or gear icon).
160 | 4. Choose "Configure" or "Settings" from the menu options.
161 | 5. Adjust the widget's settings, filters, and appearance.
162 | 6. Save the changes to update the widget.
163 | 
164 | ## Conclusion
165 | By customizing your Awesome Web Analytics dashboard, you can create a personalized analytics experience that meets your specific needs. Whether it's rearranging widgets, creating custom dashboards, setting up personalized reports, or utilizing the available widgets and visualizations effectively, this guide has provided you with the necessary knowledge to make the most out of the Dashboard Customization feature. Start exploring the customization options today and unlock deeper insights into your website's performance.
166 | 


--------------------------------------------------------------------------------
/mktg-email-flow/docs/data-export.md:
--------------------------------------------------------------------------------
 1 | # Data Export
 2 | This page provides information on how to export your analytics data from Awesome Web Analytics. It explains the available data export options, including CSV and JSON formats, and guides you through the process of exporting data for further analysis or integration with other tools. It also covers any limitations or considerations when exporting large datasets.
 3 | 
 4 | **Available Data Export Options**
 5 | 
 6 | Awesome Web Analytics offers two popular data export formats: CSV (Comma Separated Values) and JSON (JavaScript Object Notation). Both formats are widely supported and can be easily imported into various analysis and visualization tools.
 7 | 
 8 | **Exporting Data in CSV Format**
 9 | 
10 | To export your data in CSV format, follow these simple steps:
11 | 
12 | 1. Log in to your Awesome Web Analytics account.
13 | 2. Navigate to the "Data Export" section.
14 | 3. Select the desired date range or any other filtering options you need.
15 | 4. Click on the "Export" button, and choose "CSV" as the export format.
16 | 5. The export process will begin, and a download link will be provided once the export is complete.
17 | 6. Click on the download link to save the CSV file to your local machine.
18 | 
19 | **Exporting Data in JSON Format**
20 | 
21 | To export your data in JSON format, follow these steps:
22 | 
23 | 1. Log in to your Awesome Web Analytics account.
24 | 2. Go to the "Data Export" section.
25 | 3. Choose the date range or any other filters you want to apply.
26 | 4. Click on the "Export" button, and select "JSON" as the export format.
27 | 5. The export process will start, and a download link will be provided when the export is finished.
28 | 6. Click on the download link to save the JSON file to your local machine.
29 | 
30 | **Limitations and Considerations**
31 | 
32 | When exporting large datasets from Awesome Web Analytics, please keep the following limitations and considerations in mind:
33 | 
34 | 1. **Export Speed**: Exporting large datasets may take some time, depending on the size of your data. Please be patient and avoid interrupting the export process.
35 | 2. **File Size**: The exported files may be large, especially for extensive date ranges or complex data sets. Ensure that you have enough available storage space on your local machine or the destination system.
36 | 3. **Data Accuracy**: While we strive to provide accurate and reliable data exports, there may be minor discrepancies between real-time data and exported data due to the asynchronous nature of data processing.
37 | 4. **Data Privacy**: Ensure that you comply with any applicable data privacy regulations when exporting and handling analytics data.
38 | 
39 | We hope this documentation has provided you with a comprehensive understanding of how to export your analytics data from Awesome Web Analytics. If you have any further questions or issues, please don't hesitate to reach out to our support team.
40 | 


--------------------------------------------------------------------------------
/mktg-email-flow/docs/funnels.md:
--------------------------------------------------------------------------------
 1 | # Funnels
 2 | This documentation explains how to create and analyze funnels in Awesome Web Analytics. It provides step-by-step instructions on defining the stages of a funnel, setting up the necessary events or goals, and visualizing the funnel flow in the dashboard. It also discusses the insights gained from funnel analysis and how to identify areas for improvement in your conversion process.
 3 | 
 4 | ## Introduction
 5 | 
 6 | Welcome to the Funnels documentation for Awesome Web Analytics! This guide will walk you through the process of creating and analyzing funnels in Awesome Web Analytics. Funnels are a powerful tool that can help you understand the conversion process and identify areas for improvement in your website or app.
 7 | 
 8 | ## Table of Contents
 9 | 
10 | 1. [Overview](#overview)
11 | 2. [Creating a Funnel](#creating-a-funnel)
12 | 3. [Defining Funnel Stages](#defining-funnel-stages)
13 | 4. [Setting up Events or Goals](#setting-up-events-or-goals)
14 | 5. [Visualizing Funnel Flow](#visualizing-funnel-flow)
15 | 6. [Analyzing Funnel Insights](#analyzing-funnel-insights)
16 | 7. [Identifying Areas for Improvement](#identifying-areas-for-improvement)
17 | 
18 | ## Overview <a name="overview"></a>
19 | 
20 | The Funnels feature in Awesome Web Analytics allows you to track and analyze the conversion process of your users. By defining stages and setting up events or goals, you can gain valuable insights into how users interact with your website or app.
21 | 
22 | ## Creating a Funnel <a name="creating-a-funnel"></a>
23 | 
24 | To create a funnel in Awesome Web Analytics, follow these steps:
25 | 
26 | 1. Log in to your Awesome Web Analytics account and navigate to the Funnels section.
27 | 2. Click on the "Create New Funnel" button.
28 | 3. Give your funnel a descriptive name and select the website or app you want to track.
29 | 4. Save your funnel.
30 | 
31 | ## Defining Funnel Stages <a name="defining-funnel-stages"></a>
32 | 
33 | Defining stages is crucial for creating an accurate funnel visualization. Each stage represents a step in the conversion process. To define stages for your funnel:
34 | 
35 | 1. Select the funnel you want to edit.
36 | 2. Click on the "Edit Stages" button.
37 | 3. Add stages by providing a name and a description for each step.
38 | 4. Save your changes.
39 | 
40 | ## Setting up Events or Goals <a name="setting-up-events-or-goals"></a>
41 | 
42 | Events or goals are the actions you want to track in your funnel. These can be specific user interactions or conversions. To set up events or goals for your funnel:
43 | 
44 | 1. Select the funnel you want to edit.
45 | 2. Click on the "Edit Events/Goals" button.
46 | 3. Add events or goals by specifying the criteria for each action.
47 | 4. Save your changes.
48 | 
49 | ## Visualizing Funnel Flow <a name="visualizing-funnel-flow"></a>
50 | 
51 | Once you have defined stages and set up events or goals, you can visualize the flow of your funnel in the Awesome Web Analytics dashboard. The funnel visualization provides a clear overview of how users progress through each stage.
52 | 
53 | ## Analyzing Funnel Insights <a name="analyzing-funnel-insights"></a>
54 | 
55 | Awesome Web Analytics provides detailed insights into your funnel performance. You can analyze conversion rates, drop-off points, and identify areas of improvement. The funnel analysis helps you understand user behavior and optimize your conversion process.
56 | 
57 | ## Identifying Areas for Improvement <a name="identifying-areas-for-improvement"></a>
58 | 
59 | By examining the funnel insights, you can identify areas where users are dropping off or encountering obstacles. This information can guide your optimization efforts, allowing you to improve the user experience and increase conversion rates.
60 | 
61 | ## Conclusion
62 | 
63 | Congratulations! You now have a comprehensive understanding of how to create and analyze funnels in Awesome Web Analytics. By leveraging the power of funnels, you can gain valuable insights into your conversion process and make data-driven decisions to improve your website or app.
64 | 


--------------------------------------------------------------------------------
/mktg-email-flow/docs/generate-tracking-code.md:
--------------------------------------------------------------------------------
 1 | # Generate Tracking Code
 2 | This documentation explains how to generate a unique tracking code for your website. It provides step-by-step instructions on where to find the tracking code generation feature in the Awesome Web Analytics dashboard and how to customize the code to suit your needs. It also includes information on how to implement the tracking code on your website.
 3 | 
 4 | ## Getting Started: Generate Tracking Code
 5 | 
 6 | ### Overview
 7 | This documentation will guide you through the process of generating a unique tracking code for your website using Awesome Web Analytics. The tracking code is essential for collecting and analyzing data about your website visitors. By following the step-by-step instructions provided, you will learn how to generate the tracking code, customize it to meet your specific requirements, and implement it successfully on your website.
 8 | 
 9 | ### Table of Contents
10 | 1. Introduction
11 | 2. Accessing the Tracking Code Generation Feature
12 | 3. Customizing the Tracking Code
13 | 4. Implementing the Tracking Code on Your Website
14 | 5. Troubleshooting
15 | 
16 | ### 1. Introduction
17 | Before you begin, it's crucial to understand the significance of the tracking code. The tracking code is a small snippet of JavaScript that needs to be added to every page of your website. It enables Awesome Web Analytics to collect various data points, such as page views, visitor demographics, and user behavior, helping you gain valuable insights into your website's performance.
18 | 
19 | ### 2. Accessing the Tracking Code Generation Feature
20 | To generate your unique tracking code, follow these steps:
21 | 
22 | 1. Log into your Awesome Web Analytics account.
23 | 2. Navigate to the Awesome Web Analytics dashboard.
24 | 3. In the sidebar menu, locate the "Tracking Code" section and click on it.
25 | 4. You will be redirected to the Tracking Code Generation page.
26 | 
27 | ### 3. Customizing the Tracking Code
28 | Awesome Web Analytics allows you to customize the tracking code to suit your specific needs. Here's how you can do it:
29 | 
30 | 1. Select the tracking options you want to enable or disable. These options include:
31 |    - Page Views
32 |    - Click Tracking
33 |    - Form Submissions
34 |    - E-commerce Tracking
35 |    - Custom Events
36 | 
37 | 2. Once you have chosen your desired options, click on the "Generate Code" button.
38 | 3. The system will generate a unique tracking code snippet based on your preferences.
39 | 
40 | ### 4. Implementing the Tracking Code on Your Website
41 | After generating your tracking code, you need to implement it on your website. Follow these instructions:
42 | 
43 | 1. Copy the generated tracking code snippet.
44 | 2. Open the HTML source code of your website in your preferred code editor.
45 | 3. Paste the tracking code snippet just before the closing `</head>` tag of every page on your website.
46 | 4. Save the changes to your website's HTML files.
47 | 5. Publish the updated website.
48 | 
49 | ### 5. Troubleshooting
50 | If you encounter any issues during the tracking code implementation, please refer to the following troubleshooting tips:
51 | 
52 | - Double-check that the tracking code snippet is placed correctly before the closing `</head>` tag on every page.
53 | - Ensure that the tracking code snippet is not duplicated on any page.
54 | - Verify that your website is not blocking any JavaScript files, including the Awesome Web Analytics tracking code.
55 | - Clear your browser cache and try accessing your website again.
56 | - If the problem persists, please consult our comprehensive troubleshooting guide or contact our support team for further assistance.
57 | 
58 | Congratulations! You have successfully generated and implemented the tracking code for your website using Awesome Web Analytics. Now sit back, relax, and let Awesome Web Analytics gather valuable insights about your website visitors and their behavior.
59 | 


--------------------------------------------------------------------------------
/mktg-email-flow/docs/getting-started.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | This page provides an overview of how to get started with Awesome Web Analytics. It covers the steps to sign up for an account, generate a tracking code, and add it to your website. Additionally, it explains how to access your analytics dashboard and navigate through the different features.
 3 | 
 4 | ### Signing Up for an Account
 5 | 
 6 | To begin using Awesome Web Analytics, you will first need to sign up for an account. Follow these steps to create your Awesome Web Analytics account:
 7 | 
 8 | 1. Visit our website at [www.awesomewebanalytics.com](https://www.awesomewebanalytics.com) and click on the "Sign Up" button.
 9 | 2. Fill out the registration form with your name, email address, and password.
10 | 3. Read and accept our terms of service and privacy policy.
11 | 4. Click on the "Create Account" button to complete the registration process.
12 | 
13 | Once you have successfully signed up, you will receive a confirmation email with further instructions.
14 | 
15 | ### Generating a Tracking Code
16 | 
17 | After signing up, you will need to generate a tracking code to collect data from your website. The tracking code allows Awesome Web Analytics to monitor visitor activity and provide you with valuable insights. Here's how you can generate a tracking code:
18 | 
19 | 1. Log in to your Awesome Web Analytics account using your email address and password.
20 | 2. From the dashboard, navigate to the "Settings" section.
21 | 3. In the settings menu, click on "Tracking Code".
22 | 4. Copy the provided tracking code snippet.
23 | 
24 | ### Adding the Tracking Code to Your Website
25 | 
26 | Now that you have your tracking code, you need to add it to your website to start collecting data. The process may vary depending on your website platform, but here are the general steps:
27 | 
28 | 1. Access the backend or admin panel of your website.
29 | 2. Locate the section where you can add or edit the HTML code of your website.
30 | 3. Paste the tracking code snippet generated from Awesome Web Analytics into the appropriate location.
31 | 4. Save the changes to update your website with the tracking code.
32 | 
33 | Once the tracking code is added, Awesome Web Analytics will begin collecting data from your website's visitors.
34 | 
35 | ### Accessing Your Analytics Dashboard
36 | 
37 | Once your tracking code is in place and your website starts receiving traffic, you can access your analytics dashboard to explore the wealth of information Awesome Web Analytics provides. To access your analytics dashboard:
38 | 
39 | 1. Log in to your Awesome Web Analytics account.
40 | 2. From the main menu, click on "Analytics" or "Dashboard".
41 | 3. You will be redirected to your analytics dashboard, where you can view various reports, metrics, and insights about your website's performance.
42 | 
43 | ### Navigating Through the Different Features
44 | 
45 | Awesome Web Analytics offers a range of features and tools to help you analyze your website's data effectively. Here are some key features you can explore:
46 | 
47 | 1. **Real-time Analytics**: Monitor live visitor activity on your website.
48 | 2. **Audience Insights**: Understand your audience demographics, interests, and behavior.
49 | 3. **Traffic Sources**: Identify where your website traffic is coming from.
50 | 4. **Conversion Tracking**: Set up goals and track conversions.
51 | 5. **E-commerce Analytics**: Analyze your online store's performance, sales, and customer behavior.
52 | 
53 | To navigate through these features and more, use the menu or sidebar provided in your analytics dashboard.
54 | 
55 | Congratulations! You are now ready to make the most of Awesome Web Analytics and gain valuable insights into your website's performance. If you have any further questions or need assistance, feel free to explore our detailed documentation or contact our support team.
56 | 


--------------------------------------------------------------------------------
/mktg-email-flow/docs/real-time-analytics.md:
--------------------------------------------------------------------------------
 1 | # Real-time Analytics
 2 | This documentation explains the real-time analytics feature of Awesome Web Analytics. It describes how to access real-time data in the dashboard, including the number of current visitors, their locations, and the pages they are viewing. It also highlights the benefits of real-time analytics and provides use cases for different types of websites.
 3 | 
 4 | ## Table of Contents
 5 | 1. Introduction
 6 | 2. Accessing Real-time Data
 7 | 3. Understanding Real-time Metrics
 8 | 4. Benefits of Real-time Analytics
 9 | 5. Use Cases
10 | 
11 | ## 1. Introduction
12 | Welcome to the documentation for the Real-time Analytics feature of Awesome Web Analytics. This guide will provide you with a comprehensive understanding of how to leverage real-time data to gain valuable insights into your website's performance. Real-time Analytics allows you to monitor the number of current visitors, their locations, and the pages they are viewing in the moment.
13 | 
14 | ## 2. Accessing Real-time Data
15 | To access real-time data in the Awesome Web Analytics dashboard, follow these steps:
16 | 
17 | 1. Log in to your Awesome Web Analytics account.
18 | 2. Navigate to the Analytics section.
19 | 3. Click on the Real-time Analytics tab.
20 | 
21 | Once you are in the Real-time Analytics section, you will have access to up-to-the-minute information about your website's visitors and their activities.
22 | 
23 | ## 3. Understanding Real-time Metrics
24 | Awesome Web Analytics provides several key metrics that can help you understand the current state of your website:
25 | 
26 | ### 3.1 Current Visitors
27 | The Current Visitors metric shows you the number of people currently active on your website. This metric gives you an instant overview of the level of engagement happening at any given time.
28 | 
29 | ### 3.2 Visitor Locations
30 | Awesome Web Analytics's Real-time Analytics feature also provides insights into the geographical locations of your current visitors. You can view a world map with real-time markers indicating the locations of your website visitors. This information can be useful for analyzing the global reach of your website and tailoring your content to specific regions.
31 | 
32 | ### 3.3 Pages Being Viewed
33 | Another essential metric in real-time analytics is the Pages Being Viewed. This metric shows you the specific pages that your current visitors are actively viewing. Understanding which pages are most popular in real-time can help you identify trends, optimize content, and improve user experience.
34 | 
35 | ## 4. Benefits of Real-time Analytics
36 | Real-time Analytics provides numerous benefits for website owners and marketers:
37 | 
38 | ### 4.1 Immediate Insights
39 | By accessing real-time data, you can gain immediate insights into how users are interacting with your website. This allows you to react quickly to trends or issues, making data-driven decisions in real-time.
40 | 
41 | ### 4.2 Enhanced User Experience
42 | Real-time Analytics empowers you to identify popular pages and monitor user behavior as it happens. With this information, you can optimize your website's navigation, content, and design to enhance the overall user experience.
43 | 
44 | ### 4.3 Real-time Campaign Performance
45 | If you are running marketing campaigns, real-time analytics can help you evaluate their effectiveness promptly. By monitoring the impact of your campaigns in real-time, you can make adjustments on the fly to maximize your return on investment.
46 | 
47 | ## 5. Use Cases
48 | Real-time Analytics can be valuable in various scenarios, including:
49 | 
50 | ### 5.1 E-commerce Websites
51 | For online stores, real-time analytics can provide insights into customer behavior, allowing you to optimize product placement, promotional offers, and checkout processes to increase conversions.
52 | 
53 | ### 5.2 News and Media Websites
54 | News and media websites can leverage real-time analytics to track the popularity of articles, identify trending topics, and tailor content distribution strategies accordingly.
55 | 
56 | ### 5.3 Event Websites
57 | Event websites can benefit from real-time analytics by monitoring registrations, tracking attendee engagement, and making real-time adjustments to event schedules or promotions.
58 | 
59 | ## Conclusion
60 | Congratulations! You now have a solid understanding of the Real-time Analytics feature in Awesome Web Analytics. By leveraging real-time data, you can gain valuable insights, improve user experience, and make informed decisions to drive the success of your website.
61 | 


--------------------------------------------------------------------------------
/mktg-email-flow/docs/user-segmentation.md:
--------------------------------------------------------------------------------
 1 | # User Segmentation
 2 | 
 3 | This page introduces the user segmentation feature of Awesome Web Analytics. It explains how to segment your website visitors based on various criteria such as demographics, behavior, or custom properties. It demonstrates how to create segments, apply them to your analytics reports, and gain insights into different user groups. It also provides examples of how user segmentation can be utilized for targeted marketing campaigns.
 4 | 
 5 | 
 6 | ### Introduction
 7 | 
 8 | User segmentation allows you to divide your website visitors into distinct groups based on specific characteristics. This feature enables you to analyze and understand different user behaviors, preferences, and demographics, empowering you to make data-driven decisions.
 9 | 
10 | ### Creating Segments
11 | 
12 | To create a segment in Awesome Web Analytics, follow these steps:
13 | 
14 | 1. Access the Awesome Web Analytics dashboard and navigate to the "User Segmentation" section.
15 | 2. Click on the "Create New Segment" button.
16 | 3. Define the criteria for your segment. You can choose from a wide range of options, including demographics (age, gender, location), behavior (pages visited, time spent on site), or even custom properties specific to your business.
17 | 4. Set the conditions for each criterion to precisely define your segment.
18 | 5. Save the segment and give it a meaningful name for easy reference.
19 | 
20 | ### Applying Segments to Analytics Reports
21 | 
22 | Once you have created your segments, you can apply them to your analytics reports to gain deeper insights into user behavior. Follow these steps to apply segments:
23 | 
24 | 1. Navigate to the "Analytics Reports" section in Awesome Web Analytics.
25 | 2. Select the desired report you wish to analyze.
26 | 3. Locate the "Segmentation" option and choose the segment you want to apply.
27 | 4. The report will now display data specifically for the selected segment, allowing you to observe trends and patterns unique to that group of users.
28 | 
29 | ### Leveraging User Segmentation for Targeted Marketing Campaigns
30 | 
31 | User segmentation is a powerful tool for optimizing your marketing campaigns. By tailoring your strategies to specific user groups, you can increase engagement and conversion rates. Here are some examples of how you can utilize user segmentation for targeted marketing campaigns:
32 | 
33 | 1. **Personalized Content:** Customize your website content to cater to the interests and preferences of different user segments. This level of personalization can significantly enhance user experience and drive higher engagement.
34 | 
35 | 2. **Email Marketing:** Use user segmentation to send targeted email campaigns based on user behavior or demographics. By delivering relevant content to specific segments, you can improve open rates, click-through rates, and overall campaign effectiveness.
36 | 
37 | 3. **Ad Campaign Optimization:** Segment your audience to optimize your ad campaigns. By targeting specific segments with tailored advertisements, you can improve ad relevancy, reduce ad spend wastage, and increase conversion rates.
38 | 
39 | 4. **Product Development:** Utilize user segmentation insights to inform your product development decisions. Understanding the needs and preferences of different user segments can guide you in enhancing existing features or developing new ones to better cater to their requirements.
40 | 
41 | By leveraging the user segmentation feature in Awesome Web Analytics, you can unlock a wealth of information about your website visitors and make data-driven decisions to drive your business forward.
42 | 
43 | For more detailed instructions and advanced user segmentation techniques, refer to the comprehensive user segmentation guide available in the Awesome Web Analytics Knowledge Base.
44 | 
45 | We hope this documentation helps you make the most of the User Segmentation feature in Awesome Web Analytics. Should you have any further questions or need assistance, please don't hesitate to reach out to our support team.
46 | 


--------------------------------------------------------------------------------
/nodejs-with-singlestore/.env.example:
--------------------------------------------------------------------------------
1 | DB_HOST=""
2 | DB_USER=""
3 | DB_PORT=3333
4 | DB_NAME=""
5 | DB_PASSWORD=""
6 | OPENAI_API_KEY=""
7 | 


--------------------------------------------------------------------------------
/nodejs-with-singlestore/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | .yarn/install-state.gz
 8 | package-lock.json
 9 | 
10 | # testing
11 | /coverage
12 | 
13 | # next.js
14 | /.next/
15 | /out/
16 | 
17 | # production
18 | /build
19 | 
20 | # misc
21 | .DS_Store
22 | *.pem
23 | 
24 | # debug
25 | npm-debug.log*
26 | yarn-debug.log*
27 | yarn-error.log*
28 | 
29 | # local env files
30 | .env
31 | .env*.local
32 | 
33 | # vercel
34 | .vercel
35 | 
36 | # typescript
37 | *.tsbuildinfo
38 | next-env.d.ts
39 | 


--------------------------------------------------------------------------------
/nodejs-with-singlestore/.prettierrc.js:
--------------------------------------------------------------------------------
 1 | // prettier.config.js, .prettierrc.js, prettier.config.mjs, or .prettierrc.mjs
 2 | 
 3 | /**
 4 |  * @see https://prettier.io/docs/en/configuration.html
 5 |  * @type {import("prettier").Config}
 6 |  */
 7 | module.exports = {
 8 |   arrowParens: "always",
 9 |   bracketSameLine: false,
10 |   bracketSpacing: true,
11 |   jsxSingleQuote: false,
12 |   printWidth: 128,
13 |   quoteProps: "consistent",
14 |   semi: true,
15 |   singleAttributePerLine: true,
16 |   singleQuote: false,
17 |   tabWidth: 2,
18 |   trailingComma: "all",
19 |   useTabs: false,
20 | };
21 | 


--------------------------------------------------------------------------------
/nodejs-with-singlestore/README.md:
--------------------------------------------------------------------------------
 1 | # NodeJS With SingleStore Expenses Demo App
 2 | 
 3 | ## Getting Started
 4 | 
 5 | 1. Download an SSL certificate using this [link](https://portal.singlestore.com/static/ca/singlestore_bundle.pem).
 6 | 2. Place the SSL certificate in the root directory of the project.
 7 | 3. Create a `.env` file based on the `.env.example` file.
 8 | 4. Install the dependencies by running `npm install`.
 9 | 5. Load data into a database by running `npm run load`.
10 | 6. Run the server by running `npm run start`.
11 | 
12 | ## API
13 | 
14 | #### Get All Expenses
15 | 
16 | ```
17 | GET http://localhost:3000/expenses
18 | ```
19 | 
20 | #### Get Filtered Expenses
21 | 
22 | ```
23 | GET http://localhost:3000/expenses?merchant=Airbnb&category=Travel
24 | ```
25 | 
26 | #### Get Expenses Record by ID
27 | 
28 | ```
29 | GET http://localhost:3000/expenses/:id
30 | ```
31 | 
32 | #### Update Expenses Record by ID
33 | 
34 | ```
35 | PUT http://localhost:3000/expenses/:id
36 | ```
37 | 
38 | #### Delete Expenses Record by ID
39 | 
40 | ```
41 | DELETE http://localhost:3000/expenses/:id
42 | ```
43 | 
44 | #### Search Expenses
45 | 
46 | ```
47 | POST http://localhost:3000/expenses/search
48 | 
49 | {
50 |     "query": "Travel with Airbnb"
51 | }
52 | ```
53 | 
54 | #### Ask Assistant
55 | 
56 | ```
57 | POST http://localhost:3000/expenses/ask
58 | 
59 | {
60 |     "query": "How much have I spent on Netfix?"
61 | }
62 | ```


--------------------------------------------------------------------------------
/nodejs-with-singlestore/app.js:
--------------------------------------------------------------------------------
  1 | const express = require("express");
  2 | const { ChatCompletionTool } = require("@singlestore/ai");
  3 | const z = require("zod");
  4 | const { ai, database } = require("./lib/db");
  5 | 
  6 | const app = express();
  7 | app
  8 |   .use(express.json())
  9 |   .use((error, _req, res, next) => {
 10 |     console.error(error.stack);
 11 |     res.status(500).send(error.message || "Internal Server Error");
 12 |   });
 13 | ;
 14 | 
 15 | const expensesTable = database.table.use("expenses");
 16 | 
 17 | app.get("/expenses", async (req, res, next) => {
 18 |   try {
 19 |     const { merchant, category } = req.query;
 20 | 
 21 |     const rows = await expensesTable.find({
 22 |       where: {
 23 |         merchant,
 24 |         category,
 25 |       },
 26 |     });
 27 | 
 28 |     res.status(200).json(rows);
 29 |   } catch (error) {
 30 |     next(error);
 31 |   }
 32 | });
 33 | 
 34 | app.get("/expenses/:id", async (req, res, next) => {
 35 |   try {
 36 |     const [row] = await expensesTable.find({ where: { id: req.params.id } });
 37 |     res.status(200).json(row);
 38 |   } catch (error) {
 39 |     next(error);
 40 |   }
 41 | });
 42 | 
 43 | app.put("/expenses/:id", async (req, res, next) => {
 44 |   try {
 45 |     const { id, ...values } = req.body;
 46 |     await expensesTable.update(values, { id: req.params.id });
 47 |     res.status(200).send();
 48 |   } catch (error) {
 49 |     next(error);
 50 |   }
 51 | });
 52 | 
 53 | app.delete("/expenses/:id", async (_req, res, next) => {
 54 |   try {
 55 |     await expensesTable.delete({ id: res.params.id });
 56 |     res.status(200).send();
 57 |   } catch (error) {
 58 |     next(error);
 59 |   }
 60 | });
 61 | 
 62 | app.post("/expenses/search", async (req, res, next) => {
 63 |   try {
 64 |     const { query } = req.body;
 65 | 
 66 |     const tableSchema = await expensesTable.showColumnsInfo(true);
 67 | 
 68 |     const completion = await ai.chatCompletions.create({
 69 |       model: "gpt-4o",
 70 |       prompt: `\
 71 |       User prompt: ${query}
 72 |       Table schema: ${JSON.stringify(tableSchema)}
 73 | 
 74 |       Based on the table schema, parse the user's prompt into parameters.
 75 |       Include only the JSON value without any formatting in your response to make it ready for use with the JS JSON.parse method.
 76 |       If there is an issue return an empty JSON value.
 77 |     `,
 78 |     });
 79 | 
 80 |     const { category, merchant, amount } = JSON.parse(completion.content);
 81 | 
 82 |     const rows = await expensesTable.find({
 83 |       where: {
 84 |         category,
 85 |         merchant,
 86 |         amount: amount ? Number(amount) : undefined,
 87 |       },
 88 |     });
 89 | 
 90 |     res.status(200).json(rows);
 91 |   } catch (error) {
 92 |     console.log(error);
 93 |     next(error);
 94 |   }
 95 | });
 96 | 
 97 | app.post("/expenses/ask", async (req, res, next) => {
 98 |   try {
 99 |     const { query } = req.body;
100 | 
101 |     const queryTableTool = new ChatCompletionTool({
102 |       name: "query_table",
103 |       description:
104 |         "Generates and executes a MySQL SELECT query based on a natural language prompt, adhering to the provided table schema.",
105 |       params: z.object({
106 |         prompt: z.string().describe("A natural language description of the data you wish to retrieve from the table."),
107 |       }),
108 |       call: async (params) => {
109 |         let value = "";
110 |         const schema = await expensesTable.showColumnsInfo();
111 | 
112 |         const gptQuery = await ai.chatCompletions.create({
113 |           stream: false,
114 |           model: "gpt-4o",
115 |           prompt: params.prompt,
116 |           systemRole: `\
117 |             You are a MySQL database expert.
118 |             Generate a valid MySQL SELECT query based on the following table schema: ${JSON.stringify(schema)}
119 | 
120 |             The query must adhere to these rules:
121 |             - Only SELECT operations are allowed.
122 | 
123 |             Respond with the MySQL query only, without any formatting.
124 |             For example: "SELECT * FROM expenses"
125 |           `,
126 |         });
127 | 
128 |         if (gptQuery && "content" in gptQuery && typeof gptQuery.content === "string") {
129 |           const [rows] = await database.query(gptQuery.content);
130 |           value = JSON.stringify(rows);
131 |         }
132 | 
133 |         return { name: "query_table", params, value };
134 |       },
135 |     });
136 | 
137 |     const completion = await ai.chatCompletions.create({
138 |       model: "gpt-4o",
139 |       prompt: query,
140 |       stream: false,
141 |       systemRole: `\
142 |         You are a knowledgeable assistant focused on helping the user with queries related to the "expenses" table.\
143 |         Provide accurate and relevant answers based on the structure and data in the "expenses" table, and assist with any related tasks or requests.
144 |       `,
145 |       tools: [queryTableTool],
146 |     });
147 | 
148 |     res.status(200).send(completion.content);
149 |   } catch (error) {
150 |     next(error);
151 |   }
152 | });
153 | 
154 | const PORT = process.env.PORT || 3000;
155 | app.listen(PORT, async () => {
156 |   console.log(`Server is running on port ${PORT}`);
157 | });
158 | 


--------------------------------------------------------------------------------
/nodejs-with-singlestore/lib/db.js:
--------------------------------------------------------------------------------
 1 | const { AI } = require("@singlestore/ai");
 2 | const { SingleStoreClient } = require("@singlestore/client");
 3 | const fs = require("fs");
 4 | const path = require('path');
 5 | require("dotenv").config();
 6 | 
 7 | const ai = new AI({ openAIApiKey: process.env.OPENAI_API_KEY });
 8 | const client = new SingleStoreClient({ ai });
 9 | 
10 | const certPath = path.join(__dirname, 'singlestore_bundle.pem');
11 | console.log('Looking for cert at:', certPath);
12 | 
13 | const connection = client.connect({
14 |   host: process.env.DB_HOST,
15 |   user: process.env.DB_USER,
16 |   port: Number(process.env.DB_PORT),
17 |   database: process.env.DB_NAME,
18 |   password: process.env.DB_PASSWORD,
19 |   ssl: {
20 |     ca: fs.readFileSync(certPath),
21 |   },
22 | });
23 | 
24 | const database = connection.database.use(process.env.DB_NAME);
25 | 
26 | module.exports = {
27 |   ai,
28 |   client,
29 |   connection,
30 |   database,
31 | };
32 | 


--------------------------------------------------------------------------------
/nodejs-with-singlestore/lib/load.js:
--------------------------------------------------------------------------------
 1 | const csv = require("csv-parser");
 2 | const path = require("path");
 3 | const fs = require("fs");
 4 | const { database } = require("./db");
 5 | 
 6 | (async () => {
 7 |   try {
 8 |     await database.table.drop("expenses");
 9 | 
10 |     const table = await database.table.create({
11 |       name: "expenses",
12 |       columns: {
13 |         id: { type: "BIGINT", autoIncrement: true, primaryKey: true },
14 |         createdAt: { type: "DATETIME(6)", default: "CURRENT_TIMESTAMP(6)" },
15 |         merchant: { type: "VARCHAR(128)" },
16 |         category: { type: "VARCHAR(64)" },
17 |         amount: { type: "DECIMAL(10, 2)" },
18 |       },
19 |     });
20 | 
21 |     const dataset = await (() => {
22 |       return new Promise((resolve, reject) => {
23 |         const result = [];
24 |         fs.createReadStream(path.resolve(process.cwd(), "dataset.csv"))
25 |           .pipe(csv())
26 |           .on("data", ({ createdAt, merchant, category, amount }) => {
27 |             result.push({
28 |               createdAt: new Date(createdAt),
29 |               merchant,
30 |               category,
31 |               amount: Number(amount),
32 |             });
33 |           })
34 |           .on("error", (error) => reject(error))
35 |           .on("end", () => resolve(result));
36 |       });
37 |     })();
38 | 
39 |     await table.insert(dataset);
40 | 
41 |     process.exit(0);
42 |   } catch (error) {
43 |     console.error(error);
44 |     process.exit(1);
45 |   }
46 | })();
47 | 


--------------------------------------------------------------------------------
/nodejs-with-singlestore/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "nodejs-with-singlestore-demo",
 3 |   "version": "1.0.0",
 4 |   "main": "app.js",
 5 |   "scripts": {
 6 |     "start": "node app.js",
 7 |     "dev": "nodemon app.js",
 8 |     "load": "node lib/load.js",
 9 |     "prettify": "prettier --write \"**/*.{js,jsx,ts,tsx,md}\""
10 |   },
11 |   "keywords": [],
12 |   "author": "",
13 |   "license": "ISC",
14 |   "description": "",
15 |   "dependencies": {
16 |     "@singlestore/ai": "^0.0.32",
17 |     "@singlestore/client": "^0.0.43",
18 |     "csv-parser": "^3.0.0",
19 |     "dotenv": "^16.4.5",
20 |     "express": "^4.19.2",
21 |     "zod": "^3.23.8"
22 |   },
23 |   "devDependencies": {
24 |     "nodemon": "^3.1.7",
25 |     "prettier": "^3.3.3"
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/nodejs-with-singlestore/vercel.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": 2,
 3 |     "builds": [
 4 |       {
 5 |         "src": "app.js",
 6 |         "use": "@vercel/node"
 7 |       }
 8 |     ],
 9 |     "routes": [
10 |       {
11 |         "src": "/(.*)",
12 |         "dest": "app.js"
13 |       }
14 |     ]
15 |   }
16 |   


--------------------------------------------------------------------------------
/openai-plugin-webinar/create_table_and_insert_from_csv.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"markdown","id":"97cf39b7-a787-44bc-9255-13a523307b16","metadata":{},"source":"## Install Libraries"},{"cell_type":"code","execution_count":182,"id":"96b0c76c-cc0f-4593-af04-94322fe3a19a","metadata":{"execution":{"iopub.execute_input":"2023-07-17T23:24:14.461738Z","iopub.status.busy":"2023-07-17T23:24:14.461396Z","iopub.status.idle":"2023-07-17T23:24:16.502095Z","shell.execute_reply":"2023-07-17T23:24:16.500841Z","shell.execute_reply.started":"2023-07-17T23:24:14.461714Z"},"tags":[],"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"Note: you may need to restart the kernel to use updated packages.\n"}],"source":"%pip install pandas requests --quiet"},{"cell_type":"markdown","id":"594235eb-7e7a-4f81-a0d7-4209956860a6","metadata":{},"source":"## Connect to database"},{"cell_type":"code","execution_count":185,"id":"7ea1ff05-4282-48df-bd48-e9ce7927555a","metadata":{"execution":{"iopub.execute_input":"2023-07-17T23:24:29.763728Z","iopub.status.busy":"2023-07-17T23:24:29.763342Z","iopub.status.idle":"2023-07-17T23:24:30.014257Z","shell.execute_reply":"2023-07-17T23:24:30.013504Z","shell.execute_reply.started":"2023-07-17T23:24:29.763701Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/html":"\u003ctable\u003e\n    \u003cthead\u003e\n        \u003ctr\u003e\n            \u003cth\u003ecount(8)\u003c/th\u003e\n        \u003c/tr\u003e\n    \u003c/thead\u003e\n    \u003ctbody\u003e\n        \u003ctr\u003e\n            \u003ctd\u003e40\u003c/td\u003e\n        \u003c/tr\u003e\n    \u003c/tbody\u003e\n\u003c/table\u003e","text/plain":"+----------+\n| count(8) |\n+----------+\n|    40    |\n+----------+"},"execution_count":185,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nUSE winter_wikipedia;\nselect count(8) from my_book;"},{"cell_type":"markdown","id":"aadbfd16-5274-45d5-a5a6-fea6614b76bb","metadata":{},"source":"## Read the file into Pandas frame"},{"cell_type":"code","execution_count":187,"id":"5b817e34-f8f5-4515-bbd2-6e875d6e2b8b","metadata":{"execution":{"iopub.execute_input":"2023-07-17T23:24:35.289256Z","iopub.status.busy":"2023-07-17T23:24:35.288984Z","iopub.status.idle":"2023-07-17T23:24:35.859898Z","shell.execute_reply":"2023-07-17T23:24:35.859086Z","shell.execute_reply.started":"2023-07-17T23:24:35.289236Z"},"tags":[],"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"   userId  movieId  rating   timestamp\n0       1       31     2.5  1260759144\n1       1     1029     3.0  1260759179\n2       1     1061     3.0  1260759182\n3       1     1129     2.0  1260759185\n4       1     1172     4.0  1260759205\n"}],"source":"import pandas as pd\nimport requests\nfrom io import StringIO\n\n# Provide the URL of your CSV file\nurl = \"https://raw.githubusercontent.com/madhukarkumar/my_csvs/main/ratings_small.csv\"\n\n# Use requests to get the content of the CSV file\ncontent = requests.get(url).text\n\n# Create a pandas DataFrame\ndf = pd.read_csv(StringIO(content))\n\n# Show the first 5 rows of the dataframe\nprint(df.head())"},{"cell_type":"code","execution_count":null,"id":"3ef49732-e95b-49ce-ad40-99b49a68b5cb","metadata":{},"outputs":[],"source":""},{"cell_type":"code","execution_count":188,"id":"3c614831-6ddb-4baf-bb3a-450beb1e8220","metadata":{"execution":{"iopub.execute_input":"2023-07-17T23:24:40.515201Z","iopub.status.busy":"2023-07-17T23:24:40.514686Z","iopub.status.idle":"2023-07-17T23:24:40.519836Z","shell.execute_reply":"2023-07-17T23:24:40.519112Z","shell.execute_reply.started":"2023-07-17T23:24:40.515174Z"},"tags":[],"trusted":true},"outputs":[],"source":"from sqlalchemy import *\n\ndb_connection = create_engine(connection_url)"},{"cell_type":"markdown","id":"2c8dd006-7d23-47d5-8e54-9a82f135a711","metadata":{},"source":"## Create table based on the columns of the CSV file"},{"cell_type":"code","execution_count":191,"id":"456c3da6-ff41-4521-8a34-2b31c04698d5","metadata":{"execution":{"iopub.execute_input":"2023-07-17T23:25:04.723048Z","iopub.status.busy":"2023-07-17T23:25:04.722659Z","iopub.status.idle":"2023-07-17T23:25:06.025955Z","shell.execute_reply":"2023-07-17T23:25:06.025322Z","shell.execute_reply.started":"2023-07-17T23:25:04.723021Z"},"tags":[],"trusted":true},"outputs":[{"data":{"text/plain":"\u003csqlalchemy.engine.cursor.LegacyCursorResult at 0x7f6c966f2950\u003e"},"execution_count":191,"metadata":{},"output_type":"execute_result"}],"source":"\n# Generate a CREATE TABLE statement based on the DataFrame's dtypes\ncreate_table_statement = \"CREATE TABLE IF NOT EXISTS csv_table (\"\n\nfor col, dtype in df.dtypes.items():\n    if \"int\" in str(dtype):\n        sql_type = \"INT\"\n    elif \"float\" in str(dtype):\n        sql_type = \"FLOAT\"\n    elif \"object\" in str(dtype):\n        sql_type = \"TEXT\"\n    else:\n        sql_type = \"TEXT\"\n    create_table_statement += \"\\n{} {},\".format(col, sql_type)\n\ncreate_table_statement = create_table_statement.rstrip(\",\") + \"\\n);\"\n\n# Execute the CREATE TABLE statement in the database\ndb_connection.execute(create_table_statement)\n"},{"cell_type":"markdown","id":"824b815a-9ff3-4fd2-91e9-01512e8da4e1","metadata":{},"source":"## Load the data into the CSV file"},{"cell_type":"code","execution_count":null,"id":"bc9053af-e36b-4121-b912-33b6744f40bc","metadata":{"execution":{"iopub.execute_input":"2023-07-17T23:25:16.173994Z","iopub.status.busy":"2023-07-17T23:25:16.173451Z"},"tags":[],"trusted":true},"outputs":[],"source":"# Insert the data row by row\nfor i, row in df.iterrows():\n    placeholders = ', '.join(['%s'] * len(row))\n    columns = ', '.join(row.keys())\n    sql = \"INSERT INTO csv_table ( %s ) VALUES ( %s )\" % (columns, placeholders)\n    db_connection.execute(sql, tuple(row))\n"},{"cell_type":"code","execution_count":null,"id":"cfbecfb6-f91e-4d0c-8849-96399a554bfa","metadata":{},"outputs":[],"source":""}],"metadata":{"jupyterlab":{"notebooks":{"version_major":6,"version_minor":4}},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.9"},"singlestore_connection":{"connectionID":"eff5c501-ff0f-4a14-b3c0-c72858bb7a02","defaultDatabase":"winter_wikipedia"},"singlestore_row_limit":300},"nbformat":4,"nbformat_minor":5}


--------------------------------------------------------------------------------
/spark_webinar_3.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"attachments":{},"cell_type":"markdown","id":"b52452b1-90e7-4e77-a4aa-4109c5fb79c4","metadata":{"language":"python"},"source":"<img src = \"https://github.com/singlestore-labs/spaces-notebooks/blob/e551e274bb67bb1e5081131ee1150cdba713fc43/common/images/singlestore-jupyter.png?raw=true\">"},{"attachments":{},"cell_type":"markdown","id":"e83a3630-3c94-4bc6-ab80-18563b926388","metadata":{"language":"python"},"source":"<div id=\"singlestore-header\" style=\"display: flex; background-color: rgba(235, 249, 245, 0.25); padding: 5px;\">\n    <div id=\"icon-image\" style=\"width: 90px; height: 90px;\">\n        <img width=\"100%\" height=\"100%\" src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/header-icons/browser.png\" />\n    </div>\n    <div id=\"text\" style=\"padding: 5px; margin-left: 10px;\">\n        <div id=\"badge\" style=\"display: inline-block; background-color: rgba(0, 0, 0, 0.15); border-radius: 4px; padding: 4px 8px; align-items: center; margin-top: 6px; margin-bottom: -2px; font-size: 80%\">SingleStore Notebooks</div>\n        <h1 style=\"font-weight: 500; margin: 8px 0 0 4px;\">Apache Spark + OpenAI for Personalized Banking Services, Part 3</h1>\n    </div>\n</div>"},{"cell_type":"code","execution_count":4,"id":"31d548b8-4622-422a-9bbd-468477ee88fb","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:53:55.997919Z","iopub.status.busy":"2024-04-16T15:53:55.997653Z","iopub.status.idle":"2024-04-16T15:53:56.441762Z","shell.execute_reply":"2024-04-16T15:53:56.441128Z","shell.execute_reply.started":"2024-04-16T15:53:55.997898Z"},"language":"python","trusted":true},"outputs":[],"source":"!pip cache purge --quiet"},{"cell_type":"code","execution_count":5,"id":"4aed2148-2b94-4eb0-95e1-dd6e9a2b3dc9","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:53:56.443999Z","iopub.status.busy":"2024-04-16T15:53:56.442934Z","iopub.status.idle":"2024-04-16T15:54:35.202039Z","shell.execute_reply":"2024-04-16T15:54:35.201374Z","shell.execute_reply.started":"2024-04-16T15:53:56.443980Z"},"language":"python","trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"Collecting package metadata (current_repodata.json): ...working... done\nSolving environment: ...working... done\n\n# All requested packages already installed.\n\n"}],"source":"!conda install -y --quiet -c conda-forge openjdk pyspark"},{"cell_type":"code","execution_count":6,"id":"7f8b6247-0e7e-4a95-832b-c116cc8192f9","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:54:35.203322Z","iopub.status.busy":"2024-04-16T15:54:35.203036Z","iopub.status.idle":"2024-04-16T15:54:40.620410Z","shell.execute_reply":"2024-04-16T15:54:40.619702Z","shell.execute_reply.started":"2024-04-16T15:54:35.203287Z"},"language":"python","trusted":true},"outputs":[],"source":"!pip uninstall langchain-openai -y --quiet\n!pip install openai==0.28 --quiet\n!pip install nltk --quiet"},{"cell_type":"code","execution_count":7,"id":"c3923018-1424-4586-ad14-242ad6eafa78","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:54:40.622642Z","iopub.status.busy":"2024-04-16T15:54:40.622411Z","iopub.status.idle":"2024-04-16T15:54:51.405623Z","shell.execute_reply":"2024-04-16T15:54:51.405116Z","shell.execute_reply.started":"2024-04-16T15:54:40.622619Z"},"language":"python","trusted":true},"outputs":[{"name":"stdin","output_type":"stream","text":"OpenAI API Key: ········\n"}],"source":"import getpass\nimport openai\n\nos.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"},{"cell_type":"code","execution_count":8,"id":"a3a23d21-013d-412e-a120-4c56d9111af8","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:54:51.407043Z","iopub.status.busy":"2024-04-16T15:54:51.406284Z","iopub.status.idle":"2024-04-16T15:54:51.410356Z","shell.execute_reply":"2024-04-16T15:54:51.409879Z","shell.execute_reply.started":"2024-04-16T15:54:51.407020Z"},"language":"python","trusted":true},"outputs":[],"source":"import os\n\nos.makedirs(\"jars\", exist_ok = True)\nos.makedirs(\"data\", exist_ok = True)"},{"cell_type":"code","execution_count":9,"id":"d935b474-40a7-46ec-9ded-51cc2a572ca3","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:54:51.411502Z","iopub.status.busy":"2024-04-16T15:54:51.411102Z","iopub.status.idle":"2024-04-16T15:54:51.692509Z","shell.execute_reply":"2024-04-16T15:54:51.691952Z","shell.execute_reply.started":"2024-04-16T15:54:51.411482Z"},"language":"python","trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"JAR files downloaded successfully\n"}],"source":"import requests\n\ndef download_jar(url, destination):\n    response = requests.get(url)\n    with open(destination, \"wb\") as f:\n        f.write(response.content)\n\njar_urls = [\n    (\"https://repo1.maven.org/maven2/com/singlestore/singlestore-jdbc-client/1.2.1/singlestore-jdbc-client-1.2.1.jar\", \"jars/singlestore-jdbc-client-1.2.1.jar\"),\n    (\"https://repo1.maven.org/maven2/com/singlestore/singlestore-spark-connector_2.12/4.1.5-spark-3.5.0/singlestore-spark-connector_2.12-4.1.5-spark-3.5.0.jar\", \"jars/singlestore-spark-connector_2.12-4.1.5-spark-3.5.0.jar\"),\n    (\"https://repo1.maven.org/maven2/org/apache/commons/commons-dbcp2/2.12.0/commons-dbcp2-2.12.0.jar\", \"jars/commons-dbcp2-2.12.0.jar\"),\n    (\"https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/2.12.0/commons-pool2-2.12.0.jar\", \"jars/commons-pool2-2.12.0.jar\"),\n    (\"https://repo1.maven.org/maven2/io/spray/spray-json_3/1.3.6/spray-json_3-1.3.6.jar\", \"jars/spray-json_3-1.3.6.jar\")\n]\n\nfor url, destination in jar_urls:\n    download_jar(url, destination)\n\nprint(\"JAR files downloaded successfully\")"},{"cell_type":"code","execution_count":10,"id":"de40d2d2-3558-4d47-bf86-a6d766b8d5d8","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:54:51.694548Z","iopub.status.busy":"2024-04-16T15:54:51.694340Z","iopub.status.idle":"2024-04-16T15:54:56.238967Z","shell.execute_reply":"2024-04-16T15:54:56.238280Z","shell.execute_reply.started":"2024-04-16T15:54:51.694534Z"},"language":"python","trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":"24/04/16 15:54:53 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\nSetting default log level to \"WARN\".\nTo adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n24/04/16 15:54:55 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.\n"}],"source":"from pyspark.sql import SparkSession\n\n# Create a Spark session\nspark = (SparkSession\n             .builder\n             .config(\"spark.jars\", \",\".join([destination for _, destination in jar_urls]))\n             .appName(\"Spark Webinar\")\n             .getOrCreate()\n        )\n\nspark.sparkContext.setLogLevel(\"ERROR\")"},{"cell_type":"code","execution_count":11,"id":"06a810e8-bc76-4f9e-95b9-51db46304828","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:54:56.239823Z","iopub.status.busy":"2024-04-16T15:54:56.239666Z","iopub.status.idle":"2024-04-16T15:54:56.860164Z","shell.execute_reply":"2024-04-16T15:54:56.859667Z","shell.execute_reply.started":"2024-04-16T15:54:56.239806Z"},"language":"python","trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":"[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...\n[nltk_data]   Package punkt is already up-to-date!\n[nltk_data] Downloading package averaged_perceptron_tagger to\n[nltk_data]     /home/jovyan/nltk_data...\n[nltk_data]   Package averaged_perceptron_tagger is already up-to-\n[nltk_data]       date!\n[nltk_data] Downloading package wordnet to /home/jovyan/nltk_data...\n[nltk_data]   Package wordnet is already up-to-date!\n[nltk_data] Downloading package omw to /home/jovyan/nltk_data...\n[nltk_data]   Package omw is already up-to-date!\n"},{"data":{"text/plain":"True"},"execution_count":11,"metadata":{},"output_type":"execute_result"}],"source":"import nltk\nimport random\nfrom nltk.corpus import wordnet as wn\nfrom nltk.tokenize import word_tokenize\n\n# Download NLTK\nnltk.download(\"punkt\")\nnltk.download(\"averaged_perceptron_tagger\")\nnltk.download(\"wordnet\")\nnltk.download(\"omw\")"},{"cell_type":"code","execution_count":12,"id":"cafc9dbb-0b3f-4054-a208-7d5a1d60e54f","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:54:56.861335Z","iopub.status.busy":"2024-04-16T15:54:56.860968Z","iopub.status.idle":"2024-04-16T15:55:04.503301Z","shell.execute_reply":"2024-04-16T15:55:04.502692Z","shell.execute_reply.started":"2024-04-16T15:54:56.861315Z"},"language":"python","trusted":true},"outputs":[],"source":"# Define the directory to save the files\noutput_dir = \"data\"\n\n# Generate meaningful sentences\ndef generate_meaningful_sentence():\n    # Choose a random set of synonyms from WordNet\n    synset = random.choice(list(wn.all_synsets()))\n\n    # Generate a sentence\n    definition = synset.definition()\n    tokens = word_tokenize(definition)\n\n    # Capitalise the first word and end with a period\n    tokens[0] = tokens[0].capitalize()\n    tokens[-1] = tokens[-1] + \".\"\n\n    return \" \".join(tokens)\n\n# Number of files to generate\nnum_files = 5\n\n# Number of sentences in each file\nnum_sentences_per_file = 1\n\n# Generate text files\nfor i in range(num_files):\n    file_path = os.path.join(output_dir, f\"file_{i+1}.txt\")\n    with open(file_path, \"w\") as file:\n        for _ in range(num_sentences_per_file):\n            # Generate a meaningful sentence\n            sentence = generate_meaningful_sentence()\n            file.write(sentence + \"\\n\")"},{"cell_type":"code","execution_count":13,"id":"db8ec8cb-d0e1-4bd9-bec6-fe63c9f09ba3","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:55:04.505911Z","iopub.status.busy":"2024-04-16T15:55:04.505402Z","iopub.status.idle":"2024-04-16T15:55:04.529341Z","shell.execute_reply":"2024-04-16T15:55:04.528819Z","shell.execute_reply.started":"2024-04-16T15:55:04.505891Z"},"language":"python","trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":"File: data/file_1.txt\nJump on skis.\n----------------------\nFile: data/file_2.txt\nHybrid willow usually not strongly weeping in habit.\n----------------------\nFile: data/file_3.txt\nWide-ranging light-brown frog of moist North American woodlands especially spruce.\n----------------------\nFile: data/file_4.txt\nA tear gas that is stronger than CN gas but wears off faster ; can be deployed by grenades or cluster bombs ; can cause skin burns and fatal pulmonary edema.\n----------------------\nFile: data/file_5.txt\nIndicating continuing action ; continuously or steadily.\n----------------------\n"}],"source":"%%bash\n\nfor file in data/*.txt; do\n    echo \"File: $file\"\n    cat \"$file\"\n    echo \"----------------------\"\ndone"},{"cell_type":"code","execution_count":14,"id":"159866a7-ddbb-4f71-9486-19e40d407c58","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:55:04.529944Z","iopub.status.busy":"2024-04-16T15:55:04.529810Z","iopub.status.idle":"2024-04-16T15:55:40.694406Z","shell.execute_reply":"2024-04-16T15:55:40.693868Z","shell.execute_reply.started":"2024-04-16T15:55:04.529933Z"},"language":"python","trusted":true},"outputs":[{"name":"stdin","output_type":"stream","text":"Password: ········\n"}],"source":"import getpass\n\nhost = \"<HOST>\"\nport = \"3306\"\ncluster = host + \":\" + port\n\npassword = getpass.getpass(\"Password:\")"},{"cell_type":"code","execution_count":15,"id":"25ab713c-e57a-428b-acff-8167d6a1f575","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:55:40.695617Z","iopub.status.busy":"2024-04-16T15:55:40.695237Z","iopub.status.idle":"2024-04-16T15:55:41.619617Z","shell.execute_reply":"2024-04-16T15:55:41.619041Z","shell.execute_reply.started":"2024-04-16T15:55:40.695597Z"},"language":"python","trusted":true},"outputs":[],"source":"spark.conf.set(\"spark.datasource.singlestore.ddlEndpoint\", cluster)\nspark.conf.set(\"spark.datasource.singlestore.user\", \"admin\")\nspark.conf.set(\"spark.datasource.singlestore.password\", password)\nspark.conf.set(\"spark.datasource.singlestore.disablePushdown\", \"false\")"},{"cell_type":"code","execution_count":19,"id":"65b6fc2b-427e-455f-af29-92180c12940e","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:56:15.143672Z","iopub.status.busy":"2024-04-16T15:56:15.143467Z","iopub.status.idle":"2024-04-16T15:56:15.610331Z","shell.execute_reply":"2024-04-16T15:56:15.609745Z","shell.execute_reply.started":"2024-04-16T15:56:15.143658Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n        </tr>\n    </thead>\n    <tbody>\n    </tbody>\n</table>","text/plain":"++\n||\n++\n++"},"execution_count":19,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nUSE spark_demo;\n\nDROP TABLE IF EXISTS comments;\nCREATE TABLE IF NOT EXISTS comments (\n     value TEXT,\n     file_name TEXT,\n     embedding VECTOR(1536) NOT NULL\n);"},{"cell_type":"code","execution_count":20,"id":"4aa6262d-7007-44f8-a932-c5deb8d0cfc0","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:56:25.915870Z","iopub.status.busy":"2024-04-16T15:56:25.915575Z","iopub.status.idle":"2024-04-16T15:56:25.920937Z","shell.execute_reply":"2024-04-16T15:56:25.920400Z","shell.execute_reply.started":"2024-04-16T15:56:25.915854Z"},"language":"python","trusted":true},"outputs":[],"source":"from pyspark.sql.functions import input_file_name, udf\nfrom pyspark.sql.types import StringType\n\nopenai.api_key = os.environ.get(\"OPENAI_API_KEY\")\n\n# Generate embeddings for text\ndef generate_embeddings(text):\n    # Generate embeddings for text using OpenAI\n    return openai.Embedding.create(\n        input = text,\n        engine = \"text-embedding-3-small\"\n    ).data[0].embedding\n\n# Register the function as a UDF\ngenerate_embeddings_udf = udf(generate_embeddings, StringType())"},{"cell_type":"code","execution_count":21,"id":"b9b0bdff-b9a1-4992-9470-fbf4a6d7d9f0","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:56:33.759811Z","iopub.status.busy":"2024-04-16T15:56:33.759523Z","iopub.status.idle":"2024-04-16T15:56:45.576247Z","shell.execute_reply":"2024-04-16T15:56:45.575687Z","shell.execute_reply.started":"2024-04-16T15:56:33.759794Z"},"language":"python","trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":"                                                                                \r"}],"source":"import time\n\ninput_dir = output_dir\n\n# Read from the directory\ndf = (spark.readStream\n    .format(\"text\")\n    .option(\"path\", input_dir)\n    .load()\n    .withColumn(\"file_name\", input_file_name())\n)\n\n# Apply the function to the DataFrame to generate embeddings for each row\ndf_with_embeddings = df.withColumn(\"embedding\", generate_embeddings_udf(\"value\"))\n\n# Write each batch of data to SingleStore\ndef write_to_singlestore(df_with_embeddings, epoch_id):\n    (df_with_embeddings.write\n         .format(\"singlestore\")\n         .option(\"loadDataCompression\", \"LZ4\")\n         .mode(\"append\")\n         .save(\"spark_demo.comments\")\n    )\n\n# Write the streaming DataFrame to SingleStore using foreachBatch\nquery = (df_with_embeddings.writeStream\n    .foreachBatch(write_to_singlestore)\n    .start()\n)\n\n# Wait for the query to finish processing\nwhile query.isActive:\n    time.sleep(1)\n    if not query.status[\"isDataAvailable\"]:\n        query.stop()"},{"cell_type":"code","execution_count":22,"id":"a0c6b8a6-4164-44ff-9fd4-c12f14ef9219","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:56:56.086316Z","iopub.status.busy":"2024-04-16T15:56:56.086016Z","iopub.status.idle":"2024-04-16T15:56:56.399590Z","shell.execute_reply":"2024-04-16T15:56:56.398995Z","shell.execute_reply.started":"2024-04-16T15:56:56.086298Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n            <th>value</th>\n            <th>file_name</th>\n            <th>embedding</th>\n        </tr>\n    </thead>\n    <tbody>\n        <tr>\n            <td>Indicating continuing action ;</td>\n            <td>file_5.txt</td>\n            <td>[0.0237321071,0.0341514125,0.0182303879,0.03942219</td>\n        </tr>\n        <tr>\n            <td>Wide-ranging light-brown frog </td>\n            <td>file_3.txt</td>\n            <td>[-0.00194031477,-0.0134520773,0.0547336452,0.01292</td>\n        </tr>\n        <tr>\n            <td>Jump on skis.</td>\n            <td>file_1.txt</td>\n            <td>[0.030417813,0.0341594741,0.0108481226,-0.03507469</td>\n        </tr>\n        <tr>\n            <td>A tear gas that is stronger th</td>\n            <td>file_4.txt</td>\n            <td>[0.0302113201,0.0374153629,0.00788259227,0.0550957</td>\n        </tr>\n        <tr>\n            <td>Hybrid willow usually not stro</td>\n            <td>file_2.txt</td>\n            <td>[0.0352415368,0.0366364606,-0.00554679148,0.038584</td>\n        </tr>\n    </tbody>\n</table>","text/plain":"+--------------------------------+------------+----------------------------------------------------+\n|             value              | file_name  |                     embedding                      |\n+--------------------------------+------------+----------------------------------------------------+\n| Indicating continuing action ; | file_5.txt | [0.0237321071,0.0341514125,0.0182303879,0.03942219 |\n| Wide-ranging light-brown frog  | file_3.txt | [-0.00194031477,-0.0134520773,0.0547336452,0.01292 |\n|         Jump on skis.          | file_1.txt | [0.030417813,0.0341594741,0.0108481226,-0.03507469 |\n| A tear gas that is stronger th | file_4.txt | [0.0302113201,0.0374153629,0.00788259227,0.0550957 |\n| Hybrid willow usually not stro | file_2.txt | [0.0352415368,0.0366364606,-0.00554679148,0.038584 |\n+--------------------------------+------------+----------------------------------------------------+"},"execution_count":22,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nUSE spark_demo;\n\nSELECT\n    SUBSTR(value, 1, 30) AS value,\n    SUBSTR(file_name, LENGTH(file_name) - 9) AS file_name,\n    SUBSTR(embedding, 1, 50) AS embedding\nFROM comments;"},{"cell_type":"code","execution_count":23,"id":"240a9c43-6419-4a5a-b5f8-73bc7d5afbca","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:57:20.189000Z","iopub.status.busy":"2024-04-16T15:57:20.188747Z","iopub.status.idle":"2024-04-16T15:57:20.376801Z","shell.execute_reply":"2024-04-16T15:57:20.376280Z","shell.execute_reply.started":"2024-04-16T15:57:20.188984Z"},"language":"python","trusted":true},"outputs":[],"source":"# Define the text for which you want an embedding\ntext = \"frog\"\n\n# Request the embedding\nresponse = openai.Embedding.create(\n    input = text,\n    engine= \"text-embedding-3-small\"\n)\n\n# Get the embedding from the response\nquery_string = str(response.data[0].embedding)"},{"cell_type":"code","execution_count":24,"id":"67896e42-4413-4816-9b46-51e5a81dfd4a","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:57:24.204523Z","iopub.status.busy":"2024-04-16T15:57:24.204274Z","iopub.status.idle":"2024-04-16T15:57:24.208625Z","shell.execute_reply":"2024-04-16T15:57:24.208086Z","shell.execute_reply.started":"2024-04-16T15:57:24.204507Z"},"language":"python","trusted":true},"outputs":[],"source":"%config SqlMagic.named_parameters = True"},{"cell_type":"code","execution_count":25,"id":"ef4584f1-c945-4c4f-8b62-7dccaa18e49e","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:57:28.889668Z","iopub.status.busy":"2024-04-16T15:57:28.889409Z","iopub.status.idle":"2024-04-16T15:57:29.263901Z","shell.execute_reply":"2024-04-16T15:57:29.263381Z","shell.execute_reply.started":"2024-04-16T15:57:28.889652Z"},"language":"sql","trusted":true},"outputs":[{"data":{"text/html":"<table>\n    <thead>\n        <tr>\n            <th>value</th>\n            <th>file_name</th>\n            <th>similarity</th>\n        </tr>\n    </thead>\n    <tbody>\n        <tr>\n            <td>Wide-ranging light-brown frog </td>\n            <td>file_3.txt</td>\n            <td>1.1131780208445137</td>\n        </tr>\n        <tr>\n            <td>Jump on skis.</td>\n            <td>file_1.txt</td>\n            <td>1.2943939467682306</td>\n        </tr>\n        <tr>\n            <td>Indicating continuing action ;</td>\n            <td>file_5.txt</td>\n            <td>1.3149931253318798</td>\n        </tr>\n        <tr>\n            <td>Hybrid willow usually not stro</td>\n            <td>file_2.txt</td>\n            <td>1.319062131851387</td>\n        </tr>\n        <tr>\n            <td>A tear gas that is stronger th</td>\n            <td>file_4.txt</td>\n            <td>1.3730401463567572</td>\n        </tr>\n    </tbody>\n</table>","text/plain":"+--------------------------------+------------+--------------------+\n|             value              | file_name  |     similarity     |\n+--------------------------------+------------+--------------------+\n| Wide-ranging light-brown frog  | file_3.txt | 1.1131780208445137 |\n|         Jump on skis.          | file_1.txt | 1.2943939467682306 |\n| Indicating continuing action ; | file_5.txt | 1.3149931253318798 |\n| Hybrid willow usually not stro | file_2.txt | 1.319062131851387  |\n| A tear gas that is stronger th | file_4.txt | 1.3730401463567572 |\n+--------------------------------+------------+--------------------+"},"execution_count":25,"metadata":{},"output_type":"execute_result"}],"source":"%%sql\nUSE spark_demo;\n\nSELECT\n    SUBSTR(value, 1, 30) AS value,\n    SUBSTR(file_name, LENGTH(file_name) - 9) AS file_name,\n    embedding <-> :query_string AS similarity\nFROM comments\nORDER BY similarity\nLIMIT 5;"},{"cell_type":"code","execution_count":26,"id":"efad259c-6c50-4cc9-aaaa-1ebd644c75e6","metadata":{"execution":{"iopub.execute_input":"2024-04-16T15:57:35.964045Z","iopub.status.busy":"2024-04-16T15:57:35.963799Z","iopub.status.idle":"2024-04-16T15:57:36.941044Z","shell.execute_reply":"2024-04-16T15:57:36.940365Z","shell.execute_reply.started":"2024-04-16T15:57:35.964029Z"},"language":"python","trusted":true},"outputs":[],"source":"spark.stop()"}],"metadata":{"jupyterlab":{"notebooks":{"version_major":6,"version_minor":4}},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.6"},"singlestore_cell_default_language":"python","singlestore_connection":{"connectionID":"a22b6f8b-b11b-4979-98da-98513e9876e6","defaultDatabase":""}},"nbformat":4,"nbformat_minor":5}


--------------------------------------------------------------------------------
/ucsb_datathon.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"code","execution_count":4,"id":"5a6706d4-d75f-4220-ad1d-b667419b9090","metadata":{"execution":{"iopub.execute_input":"2024-01-29T13:45:51.963497Z","iopub.status.busy":"2024-01-29T13:45:51.963258Z","iopub.status.idle":"2024-01-29T13:45:53.515450Z","shell.execute_reply":"2024-01-29T13:45:53.514873Z","shell.execute_reply.started":"2024-01-29T13:45:51.963480Z"},"language":"python","trusted":true},"outputs":[],"source":"!pip install scikit-learn --quiet"},{"cell_type":"code","execution_count":5,"id":"ad127df2-7ad6-4f0f-b203-e11366c2448c","metadata":{"execution":{"iopub.execute_input":"2024-01-29T13:45:55.640333Z","iopub.status.busy":"2024-01-29T13:45:55.640066Z","iopub.status.idle":"2024-01-29T13:45:55.643350Z","shell.execute_reply":"2024-01-29T13:45:55.642664Z","shell.execute_reply.started":"2024-01-29T13:45:55.640313Z"},"language":"python","trusted":true},"outputs":[],"source":"import pandas as pd"},{"attachments":{},"cell_type":"markdown","id":"ecb3250f-e775-4c2e-bdc2-2bdf8a6efd38","metadata":{"language":"python"},"source":"## Using GitHub"},{"cell_type":"code","execution_count":6,"id":"0883da68-f142-4c0c-ad98-88182cabbb46","metadata":{"execution":{"iopub.execute_input":"2024-01-29T13:46:03.911735Z","iopub.status.busy":"2024-01-29T13:46:03.911471Z","iopub.status.idle":"2024-01-29T13:46:04.043347Z","shell.execute_reply":"2024-01-29T13:46:04.042908Z","shell.execute_reply.started":"2024-01-29T13:46:03.911719Z"},"language":"python","trusted":true},"outputs":[{"data":{"text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>fixed acidity</th>\n      <th>volatile acidity</th>\n      <th>citric acid</th>\n      <th>residual sugar</th>\n      <th>chlorides</th>\n      <th>free sulfur dioxide</th>\n      <th>total sulfur dioxide</th>\n      <th>density</th>\n      <th>pH</th>\n      <th>sulphates</th>\n      <th>alcohol</th>\n      <th>quality</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>7.4</td>\n      <td>0.70</td>\n      <td>0.00</td>\n      <td>1.9</td>\n      <td>0.076</td>\n      <td>11.0</td>\n      <td>34.0</td>\n      <td>0.9978</td>\n      <td>3.51</td>\n      <td>0.56</td>\n      <td>9.4</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>7.8</td>\n      <td>0.88</td>\n      <td>0.00</td>\n      <td>2.6</td>\n      <td>0.098</td>\n      <td>25.0</td>\n      <td>67.0</td>\n      <td>0.9968</td>\n      <td>3.20</td>\n      <td>0.68</td>\n      <td>9.8</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>7.8</td>\n      <td>0.76</td>\n      <td>0.04</td>\n      <td>2.3</td>\n      <td>0.092</td>\n      <td>15.0</td>\n      <td>54.0</td>\n      <td>0.9970</td>\n      <td>3.26</td>\n      <td>0.65</td>\n      <td>9.8</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>11.2</td>\n      <td>0.28</td>\n      <td>0.56</td>\n      <td>1.9</td>\n      <td>0.075</td>\n      <td>17.0</td>\n      <td>60.0</td>\n      <td>0.9980</td>\n      <td>3.16</td>\n      <td>0.58</td>\n      <td>9.8</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>7.4</td>\n      <td>0.70</td>\n      <td>0.00</td>\n      <td>1.9</td>\n      <td>0.076</td>\n      <td>11.0</td>\n      <td>34.0</td>\n      <td>0.9978</td>\n      <td>3.51</td>\n      <td>0.56</td>\n      <td>9.4</td>\n      <td>5</td>\n    </tr>\n  </tbody>\n</table>\n</div>","text/plain":"   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \\\n0            7.4              0.70         0.00             1.9      0.076   \n1            7.8              0.88         0.00             2.6      0.098   \n2            7.8              0.76         0.04             2.3      0.092   \n3           11.2              0.28         0.56             1.9      0.075   \n4            7.4              0.70         0.00             1.9      0.076   \n\n   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \\\n0                 11.0                  34.0   0.9978  3.51       0.56   \n1                 25.0                  67.0   0.9968  3.20       0.68   \n2                 15.0                  54.0   0.9970  3.26       0.65   \n3                 17.0                  60.0   0.9980  3.16       0.58   \n4                 11.0                  34.0   0.9978  3.51       0.56   \n\n   alcohol  quality  \n0      9.4        5  \n1      9.8        5  \n2      9.8        5  \n3      9.8        6  \n4      9.4        5  "},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":"github_url = 'https://raw.githubusercontent.com/aniruddhachoudhury/Red-Wine-Quality/master/winequality-red.csv'\nwine_data = pd.read_csv(github_url)\nwine_data.head()"},{"attachments":{},"cell_type":"markdown","id":"46696c2d-42c0-41d2-8504-158f73f85f1e","metadata":{"language":"python"},"source":"## Using scikit-learn"},{"cell_type":"code","execution_count":8,"id":"e432fcb0-793f-43b2-a71b-7932844887ff","metadata":{"execution":{"iopub.execute_input":"2024-01-29T13:46:15.256410Z","iopub.status.busy":"2024-01-29T13:46:15.256172Z","iopub.status.idle":"2024-01-29T13:46:15.637916Z","shell.execute_reply":"2024-01-29T13:46:15.637125Z","shell.execute_reply.started":"2024-01-29T13:46:15.256395Z"},"language":"python","trusted":true},"outputs":[{"data":{"text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>fixed_acidity</th>\n      <th>volatile_acidity</th>\n      <th>citric_acid</th>\n      <th>residual_sugar</th>\n      <th>chlorides</th>\n      <th>free_sulfur_dioxide</th>\n      <th>total_sulfur_dioxide</th>\n      <th>density</th>\n      <th>pH</th>\n      <th>sulphates</th>\n      <th>alcohol</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>7.4</td>\n      <td>0.70</td>\n      <td>0.00</td>\n      <td>1.9</td>\n      <td>0.076</td>\n      <td>11.0</td>\n      <td>34.0</td>\n      <td>0.9978</td>\n      <td>3.51</td>\n      <td>0.56</td>\n      <td>9.4</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>7.8</td>\n      <td>0.88</td>\n      <td>0.00</td>\n      <td>2.6</td>\n      <td>0.098</td>\n      <td>25.0</td>\n      <td>67.0</td>\n      <td>0.9968</td>\n      <td>3.20</td>\n      <td>0.68</td>\n      <td>9.8</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>7.8</td>\n      <td>0.76</td>\n      <td>0.04</td>\n      <td>2.3</td>\n      <td>0.092</td>\n      <td>15.0</td>\n      <td>54.0</td>\n      <td>0.9970</td>\n      <td>3.26</td>\n      <td>0.65</td>\n      <td>9.8</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>11.2</td>\n      <td>0.28</td>\n      <td>0.56</td>\n      <td>1.9</td>\n      <td>0.075</td>\n      <td>17.0</td>\n      <td>60.0</td>\n      <td>0.9980</td>\n      <td>3.16</td>\n      <td>0.58</td>\n      <td>9.8</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>7.4</td>\n      <td>0.70</td>\n      <td>0.00</td>\n      <td>1.9</td>\n      <td>0.076</td>\n      <td>11.0</td>\n      <td>34.0</td>\n      <td>0.9978</td>\n      <td>3.51</td>\n      <td>0.56</td>\n      <td>9.4</td>\n    </tr>\n  </tbody>\n</table>\n</div>","text/plain":"   fixed_acidity  volatile_acidity  citric_acid  residual_sugar  chlorides  \\\n0            7.4              0.70         0.00             1.9      0.076   \n1            7.8              0.88         0.00             2.6      0.098   \n2            7.8              0.76         0.04             2.3      0.092   \n3           11.2              0.28         0.56             1.9      0.075   \n4            7.4              0.70         0.00             1.9      0.076   \n\n   free_sulfur_dioxide  total_sulfur_dioxide  density    pH  sulphates  \\\n0                 11.0                  34.0   0.9978  3.51       0.56   \n1                 25.0                  67.0   0.9968  3.20       0.68   \n2                 15.0                  54.0   0.9970  3.26       0.65   \n3                 17.0                  60.0   0.9980  3.16       0.58   \n4                 11.0                  34.0   0.9978  3.51       0.56   \n\n   alcohol  \n0      9.4  \n1      9.8  \n2      9.8  \n3      9.8  \n4      9.4  "},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":"import sklearn\nfrom sklearn.datasets import fetch_openml\n\ndata = fetch_openml(name='wine-quality-red', version=1)\nwine_data = data.data\nwine_data.head()"},{"attachments":{},"cell_type":"markdown","id":"b5098c2d-b213-4dbc-af7f-c57e921982ad","metadata":{"language":"python"},"source":"## Using Google Drive"},{"cell_type":"code","execution_count":10,"id":"b56dab8c-19f8-495e-83c3-d749684673f7","metadata":{"execution":{"iopub.execute_input":"2024-01-29T13:46:24.872892Z","iopub.status.busy":"2024-01-29T13:46:24.872621Z","iopub.status.idle":"2024-01-29T13:46:25.309002Z","shell.execute_reply":"2024-01-29T13:46:25.308457Z","shell.execute_reply.started":"2024-01-29T13:46:24.872874Z"},"language":"python","trusted":true},"outputs":[{"data":{"text/html":"<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>fixed acidity</th>\n      <th>volatile acidity</th>\n      <th>citric acid</th>\n      <th>residual sugar</th>\n      <th>chlorides</th>\n      <th>free sulfur dioxide</th>\n      <th>total sulfur dioxide</th>\n      <th>density</th>\n      <th>pH</th>\n      <th>sulphates</th>\n      <th>alcohol</th>\n      <th>quality</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>7.4</td>\n      <td>0.70</td>\n      <td>0.00</td>\n      <td>1.9</td>\n      <td>0.076</td>\n      <td>11.0</td>\n      <td>34.0</td>\n      <td>0.9978</td>\n      <td>3.51</td>\n      <td>0.56</td>\n      <td>9.4</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>7.8</td>\n      <td>0.88</td>\n      <td>0.00</td>\n      <td>2.6</td>\n      <td>0.098</td>\n      <td>25.0</td>\n      <td>67.0</td>\n      <td>0.9968</td>\n      <td>3.20</td>\n      <td>0.68</td>\n      <td>9.8</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>7.8</td>\n      <td>0.76</td>\n      <td>0.04</td>\n      <td>2.3</td>\n      <td>0.092</td>\n      <td>15.0</td>\n      <td>54.0</td>\n      <td>0.9970</td>\n      <td>3.26</td>\n      <td>0.65</td>\n      <td>9.8</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>11.2</td>\n      <td>0.28</td>\n      <td>0.56</td>\n      <td>1.9</td>\n      <td>0.075</td>\n      <td>17.0</td>\n      <td>60.0</td>\n      <td>0.9980</td>\n      <td>3.16</td>\n      <td>0.58</td>\n      <td>9.8</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>7.4</td>\n      <td>0.70</td>\n      <td>0.00</td>\n      <td>1.9</td>\n      <td>0.076</td>\n      <td>11.0</td>\n      <td>34.0</td>\n      <td>0.9978</td>\n      <td>3.51</td>\n      <td>0.56</td>\n      <td>9.4</td>\n      <td>5</td>\n    </tr>\n  </tbody>\n</table>\n</div>","text/plain":"   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \\\n0            7.4              0.70         0.00             1.9      0.076   \n1            7.8              0.88         0.00             2.6      0.098   \n2            7.8              0.76         0.04             2.3      0.092   \n3           11.2              0.28         0.56             1.9      0.075   \n4            7.4              0.70         0.00             1.9      0.076   \n\n   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \\\n0                 11.0                  34.0   0.9978  3.51       0.56   \n1                 25.0                  67.0   0.9968  3.20       0.68   \n2                 15.0                  54.0   0.9970  3.26       0.65   \n3                 17.0                  60.0   0.9980  3.16       0.58   \n4                 11.0                  34.0   0.9978  3.51       0.56   \n\n   alcohol  quality  \n0      9.4        5  \n1      9.8        5  \n2      9.8        5  \n3      9.8        6  \n4      9.4        5  "},"execution_count":10,"metadata":{},"output_type":"execute_result"}],"source":"import io\nimport requests\n\nfile_id = \"1wdaIQytMzC4HlZblMSIAuEYBh4c-loI2\"\nurl = f\"https://drive.google.com/uc?id={file_id}\"\n\ndata = requests.get(url).content\nwine_data = pd.read_csv(io.StringIO(data.decode(\"utf-8\")))\nwine_data.head()"},{"cell_type":"code","execution_count":null,"id":"4aec2c73-eb27-41a4-9604-0fc375544fdc","metadata":{"language":"python","trusted":true},"outputs":[],"source":"from sqlalchemy import *\n\ndb_connection = create_engine(connection_url)"},{"cell_type":"code","execution_count":null,"id":"b7a38b60-2d59-4f9f-a1e7-8285e2cf239b","metadata":{"language":"python","trusted":true},"outputs":[],"source":"wine_data.to_sql(\n    \"wine_data\",\n    con = db_connection,\n    if_exists = \"append\",\n    index = False,\n    chunksize = 1000\n)"},{"cell_type":"code","execution_count":null,"id":"31547381-19ee-4f4c-bd22-c7d1c834d688","metadata":{"language":"sql","trusted":true},"outputs":[],"source":"%%sql\nSELECT * FROM wine_data LIMIT 1;"}],"metadata":{"jupyterlab":{"notebooks":{"version_major":6,"version_minor":4}},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.6"},"singlestore_cell_default_language":"sql","singlestore_connection":{"connectionID":"ef511a14-4713-491d-890d-f7aff864ff0a","defaultDatabase":""}},"nbformat":4,"nbformat_minor":5}


--------------------------------------------------------------------------------