├── module-1
    ├── environment.yml
    └── call_transcript_analysis.ipynb
├── README.md
├── module-3
    ├── load_support_tickets.ipynb
    ├── support_ticket_response_app.py
    └── finetuning_mistral_7b.ipynb
├── module-2
    ├── call_transcripts_analytics_app.py
    ├── intro_to_LLM_functions.ipynb
    └── using_LLM_functions.ipynb
└── additional-demos
    └── medical_notes_extraction.ipynb


/module-1/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - snowflake-ml-python=*
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # generativeai-with-snowflake
 2 | Intro to Generative AI with Snowflake
 3 | This repository contains all the code used in the Introduction to Generative AI with Snowaflke course available on Coursera and Linkedin Learning
 4 | 
 5 | Note (June 2025):
 6 | LLM Functions are now called Cortex AISQL. Your existing code, including the code we use in the course, that calls SNOWFLAKE.CORTEX.* functions can still be used. You’ll also see new functions prefixed with AI_ (e.g., AI_COMPLETE, AI_CLASSIFY), some of which have added capabilities. These new functions under the AI_ namespace are also available in python via snowpark.
 7 | 
 8 | Read more about Cortex AISQL:
 9 | https://docs.snowflake.com/en/user-guide/snowflake-cortex/aisql
10 | 
11 | To read more about using Cortex AI SQL in Python:
12 | https://docs.snowflake.com/en/user-guide/snowflake-cortex/aisql#using-snowflake-cortex-aisql-with-python	
13 | 


--------------------------------------------------------------------------------
/module-3/load_support_tickets.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "metadata": {
 3 |   "kernelspec": {
 4 |    "display_name": "Streamlit Notebook",
 5 |    "name": "streamlit"
 6 |   }
 7 |  },
 8 |  "nbformat_minor": 5,
 9 |  "nbformat": 4,
10 |  "cells": [
11 |   {
12 |    "cell_type": "code",
13 |    "id": "3775908f-ca36-4846-8f38-5adca39217f2",
14 |    "metadata": {
15 |     "language": "python",
16 |     "name": "cell1",
17 |     "collapsed": false,
18 |     "resultHeight": 0
19 |    },
20 |    "source": "from snowflake.snowpark.context import get_active_session\nsession = get_active_session()",
21 |    "execution_count": null,
22 |    "outputs": []
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "id": "39629c8d-0f3b-4c98-a126-8c9a9c6d3b3b",
27 |    "metadata": {
28 |     "language": "sql",
29 |     "name": "cell4",
30 |     "collapsed": false,
31 |     "resultHeight": 112
32 |    },
33 |    "outputs": [],
34 |    "source": "use database TELCO_SUPPORT_DB;\nuse schema SUPPORT_DATA;",
35 |    "execution_count": null
36 |   },
37 |   {
38 |    "cell_type": "code",
39 |    "id": "8d50cbf4-0c8d-4950-86cb-114990437ac9",
40 |    "metadata": {
41 |     "language": "sql",
42 |     "name": "cell2",
43 |     "collapsed": false,
44 |     "resultHeight": 112
45 |    },
46 |    "source": "CREATE or REPLACE file format csvformat\n  SKIP_HEADER = 1\n  FIELD_OPTIONALLY_ENCLOSED_BY = '\"'\n  type = 'CSV';\n\nCREATE or REPLACE stage support_tickets_data_stage\n  file_format = csvformat\n  url = 's3://sfquickstarts/finetuning_llm_using_snowflake_cortex_ai/';\n\nCREATE or REPLACE TABLE SUPPORT_TICKETS (\n  ticket_id VARCHAR(60),\n  customer_name VARCHAR(60),\n  customer_email VARCHAR(60),\n  service_type VARCHAR(60),\n  request VARCHAR,\n  contact_preference VARCHAR(60)\n)\nCOMMENT = '{\"origin\":\"sf_sit-is\", \"name\":\"aiml_notebooks_fine_tuning\", \"version\":{\"major\":1, \"minor\":0}, \"attributes\":{\"is_quickstart\":1, \"source\":\"sql\"}}';\n\nCOPY into SUPPORT_TICKETS\n  from @support_tickets_data_stage;",
47 |    "execution_count": null,
48 |    "outputs": []
49 |   }
50 |  ]
51 | }


--------------------------------------------------------------------------------
/module-3/support_ticket_response_app.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from snowflake.snowpark.context import get_active_session
 3 | import streamlit as st
 4 | import ast
 5 | session = get_active_session()
 6 | 
 7 | prompt = """You are a customer support representative at a telecommunications company. 
 8 | Suddenly there is a spike in customer support tickets. 
 9 | You need to understand and analyze the support requests from customers.
10 | Based on the root cause of the main issue in the support request, craft a response to resolve the customer issue.
11 | Write a text message under 25 words, if the contact_preference field is text message.
12 | Write an email in maximum of 100 words if the contact_preference field is email. 
13 | Focus on alleviating the customer issue and improving customer satisfaction in your response.
14 | Strictly follow the word count limit for the response. 
15 | Write only email or text message response based on the contact_preference for every customer. 
16 | Do not generate both email and text message response.
17 | """
18 | 
19 | ticket_categories = ['Roaming fees', 'Slow data speed', 'Lost phone', 'Add new line', 'Closing account']
20 | 
21 | st.subheader("Auto-generate custom emails or text messages")
22 | 
23 | with st.container():
24 |     with st.expander("Enter customer request and select LLM", expanded=True):
25 |         customer_request = st.text_area('Request',"""I traveled to Japan for two weeks and kept my data usage to a minimum. However, I was charged $90 in international fees. These charges were not communicated to me, and I request a detailed breakdown and a refund. Thank you for your prompt assistance.""")
26 |     
27 |         with st.container():
28 |             left_col, right_col = st.columns(2)
29 |             with left_col:
30 |                 selected_preference = st.selectbox('Select contact preference', ('Text message', 'Email'))
31 |             with right_col:
32 |                 selected_llm = st.selectbox('Select LLM',('llama3-8b', 'mistral-7b', 'mistral-large', 'SUPPORT_MESSAGES_FINETUNED_MISTRAL_7B',))
33 | 
34 | with st.container():
35 |     _,mid_col,_ = st.columns([.4,.3,.3])
36 |     with mid_col:
37 |         generate_template = st.button('Generate messages ⚡',type="primary")
38 | 
39 | with st.container():
40 |     if generate_template:
41 |         category_sql = f"""
42 |         select snowflake.cortex.classify_text('{customer_request}', {ticket_categories}) as ticket_category
43 |         """
44 |         df_category = session.sql(category_sql).to_pandas().iloc[0]['TICKET_CATEGORY']
45 |         df_category_dict = ast.literal_eval(df_category)
46 |         st.subheader("Ticket category")
47 |         st.write(df_category_dict['label'])
48 | 
49 |         message_sql = f"""
50 |         select snowflake.cortex.complete('{selected_llm}',concat('{prompt}', '{customer_request}', '{selected_preference}')) as custom_message
51 |         """
52 |         df_message = session.sql(message_sql).to_pandas().iloc[0]['CUSTOM_MESSAGE']
53 |         st.subheader(selected_preference)
54 |         st.write(df_message)


--------------------------------------------------------------------------------
/module-2/call_transcripts_analytics_app.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from snowflake.snowpark.context import get_active_session
 3 | 
 4 | st.set_page_config(layout='wide')
 5 | session = get_active_session()
 6 | 
 7 | transcript_example = f"""Agent: Hello, how can I assist you today? 
 8 | Customer: Hi, I recently bought the XYZ-2000 vacuum cleaner and it's not working properly. 
 9 | Agent: I'm sorry to hear that. Could you please describe the issue? 
10 | Customer: Sure, when I turn it on, it makes a strange noise and doesn't suck up dirt like it should. 
11 | Agent: It sounds like a motor issue. Have you checked if there's any blockage in the vacuum? 
12 | Customer: Yes, I've checked and there's no blockage. 
13 | Agent: Alright, it seems like the motor might be defective. 
14 | I'll arrange for a replacement motor to be sent to you. 
15 | Customer: Thank you, that would be great. 
16 | Agent: You're welcome. It should arrive within the next few days. 
17 | If you have any other issues, feel free to contact us. 
18 | Customer: Okay, thanks for your help. 
19 | Agent: You're welcome. Have a great day!"""
20 | 
21 | def summarize():
22 |     with st.container():
23 |         st.header("JSON Summary")
24 |         entered_text = st.text_area("Enter text",label_visibility="hidden",height=400,placeholder='Enter call transcript')    
25 |         if entered_text:
26 |             entered_text = entered_text.replace("'", "\\'")
27 |             prompt = f"Summarize this transcript in less than 200 words. Put the product name, defect if any, and summary in JSON format: {entered_text}"
28 |             cortex_prompt = "'[INST] " + prompt + " [/INST]'"
29 |             cortex_response = session.sql(f"select snowflake.cortex.complete('mistral-large', {cortex_prompt}) as response").to_pandas().iloc[0]['RESPONSE']
30 |             st.write(cortex_response)
31 | 
32 | def translate():
33 |     supported_languages = {'German':'de','French':'fr','Korean':'ko','Portuguese':'pt','English':'en','Italian':'it','Russian':'ru','Swedish':'sv','Spanish':'es','Japanese':'ja','Polish':'pl'}
34 |     with st.container():
35 |         st.header("Translate With Snowflake Cortex")
36 |         col1,col2 = st.columns(2)
37 |         with col1:
38 |             from_language = st.selectbox('From',dict(sorted(supported_languages.items())))
39 |         with col2:
40 |             to_language = st.selectbox('To',dict(sorted(supported_languages.items())))
41 |         entered_text = st.text_area("Enter text",label_visibility="hidden",height=300,placeholder='For example: call customer transcript')
42 |         if entered_text:
43 |           entered_text = entered_text.replace("'", "\\'")
44 |           cortex_response = session.sql(f"select snowflake.cortex.translate('{entered_text}','{supported_languages[from_language]}','{supported_languages[to_language]}') as response").to_pandas().iloc[0]['RESPONSE']
45 |           st.write(cortex_response)
46 | 
47 | def sentiment_analysis():
48 |     with st.container():
49 |         st.header("Sentiment Analysis With Snowflake Cortex")
50 |         entered_text = st.text_area("Enter text",label_visibility="hidden",height=400,placeholder='For example: customer call transcript')
51 |         if entered_text:
52 |           entered_text = entered_text.replace("'", "\\'")
53 |           cortex_response = session.sql(f"select snowflake.cortex.sentiment('{entered_text}') as sentiment").to_pandas()
54 |           st.caption("Score is between -1 and 1; -1 = Most negative, 1 = Positive, 0 = Neutral")  
55 |           st.write(cortex_response)
56 | 
57 | page_names_to_funcs = {
58 |     "JSON Summary": summarize,
59 |     "Translate": translate,
60 |     "Sentiment Analysis": sentiment_analysis,
61 | }
62 | 
63 | selected_page = st.sidebar.selectbox("Select", page_names_to_funcs.keys())
64 | page_names_to_funcs[selected_page]()


--------------------------------------------------------------------------------
/additional-demos/medical_notes_extraction.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "metadata": {
 3 |   "kernelspec": {
 4 |    "display_name": "Streamlit Notebook",
 5 |    "name": "streamlit"
 6 |   }
 7 |  },
 8 |  "nbformat_minor": 5,
 9 |  "nbformat": 4,
10 |  "cells": [
11 |   {
12 |    "cell_type": "code",
13 |    "id": "3775908f-ca36-4846-8f38-5adca39217f2",
14 |    "metadata": {
15 |     "language": "python",
16 |     "name": "cell1",
17 |     "resultHeight": 0,
18 |     "collapsed": false
19 |    },
20 |    "source": "import json\n\nfrom snowflake.snowpark.context import get_active_session\nsession = get_active_session()",
21 |    "execution_count": null,
22 |    "outputs": []
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "id": "3dfc1bf8-4dc9-40fd-a04e-aabb381ec224",
27 |    "metadata": {
28 |     "language": "python",
29 |     "name": "cell5",
30 |     "resultHeight": 0,
31 |     "codeCollapsed": false,
32 |     "collapsed": false
33 |    },
34 |    "outputs": [],
35 |    "source": "medical_notes = \"\"\"\nPatient Name: John Doe\nDate of Visit: November 19, 2024\nDOB: January 15, 1980\n\nChief Complaint:\nFollow-up for proteinuria.\n\nHistory of Present Illness:\n44-year-old male with proteinuria (1.2 g/day) confirmed three months ago. He denies hematuria or dysuria. On Losartan potassium 50 mg daily for 6 weeks with partial improvement. Blood pressure remains mildly elevated.\n\nObjective Data:\nVitals:\n\nBP: 142/88 mmHg\nHR: 78 bpm\n\nLabs:\nUrine protein\nratio: 1.1 (improved from 1.5)\nSerum creatinine: 1.1 mg/dL (stable)\nPotassium: 4.8 mmol/L\nAssessment:\nProteinuria: Likely hypertensive nephropathy, improving with Losartan.\nHypertension: Partially controlled.\nPlan:\nContinue Losartan Potassium 50 mg daily; consider dose increase if needed.\nInitiate Comparator: Placebo (Losartan) for monitoring as part of trial.\nEvaluate response to Amlodipine Besylate, Placebo (Amlodipine), and potential Enalapril Maleate if ARB response is insufficient.\nRecheck labs in 6 weeks, including proteinuria and kidney function.\nProvider's Name: Dr. Amanda Clarke\n\"\"\"",
36 |    "execution_count": null
37 |   },
38 |   {
39 |    "cell_type": "code",
40 |    "id": "0ab1a000-22d9-4dc9-8da4-e33987c2c508",
41 |    "metadata": {
42 |     "language": "python",
43 |     "name": "cell4",
44 |     "codeCollapsed": false,
45 |     "resultHeight": 0,
46 |     "collapsed": false
47 |    },
48 |    "outputs": [],
49 |    "source": "medical_request = [\n    {'role': 'system', 'content': \"Your goal is to extract structured information from the user's input that matches the form described below. When extracting information please make sure it matches the type information exactly, and only provide the extracted information. \\n Conditions: <complete conditions> \\n Interventions: {complete interventions}\" },\n    {'role': 'user', 'content': medical_notes}\n]",
50 |    "execution_count": null
51 |   },
52 |   {
53 |    "cell_type": "code",
54 |    "id": "d30b04ec-586d-488f-9274-1f81d69e7730",
55 |    "metadata": {
56 |     "language": "python",
57 |     "name": "cell2",
58 |     "resultHeight": 323,
59 |     "codeCollapsed": false,
60 |     "collapsed": false
61 |    },
62 |    "outputs": [],
63 |    "source": "from snowflake.cortex import Complete\n\nresponse = Complete(\"llama3.1-405b\", medical_request)\n\nresponse\n\n#json.loads(response)[\"choices\"][0][\"messages\"]",
64 |    "execution_count": null
65 |   },
66 |   {
67 |    "cell_type": "code",
68 |    "id": "0c399912-ef7e-491d-95f6-170e010a85f1",
69 |    "metadata": {
70 |     "language": "python",
71 |     "name": "cell3",
72 |     "codeCollapsed": false,
73 |     "resultHeight": 54,
74 |     "collapsed": false
75 |    },
76 |    "outputs": [],
77 |    "source": "from snowflake.cortex import Complete\n\nresponse = Complete(\"llama3.2-1b\", medical_request)\n\nresponse\n\n#json.loads(response)[\"choices\"][0][\"messages\"]",
78 |    "execution_count": null
79 |   },
80 |   {
81 |    "cell_type": "code",
82 |    "id": "c082e39b-b819-4962-b223-646b9069e67c",
83 |    "metadata": {
84 |     "language": "python",
85 |     "name": "cell6",
86 |     "codeCollapsed": false,
87 |     "resultHeight": 1084,
88 |     "collapsed": false
89 |    },
90 |    "outputs": [],
91 |    "source": "from snowflake.cortex import Complete\n\nresponse = Complete(\"llama3.2-3b\", medical_request)\n\nresponse\n\n#json.loads(response)[\"choices\"][0][\"messages\"]",
92 |    "execution_count": null
93 |   }
94 |  ]
95 | }


--------------------------------------------------------------------------------
/module-1/call_transcript_analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   }
  7 |  },
  8 |  "nbformat_minor": 5,
  9 |  "nbformat": 4,
 10 |  "cells": [
 11 |   {
 12 |    "cell_type": "code",
 13 |    "id": "3775908f-ca36-4846-8f38-5adca39217f2",
 14 |    "metadata": {
 15 |     "language": "python",
 16 |     "name": "cell1",
 17 |     "collapsed": false,
 18 |     "resultHeight": 0
 19 |    },
 20 |    "source": "# Import python packages\nimport streamlit as st\nimport pandas as pd\n\n# We can also use Snowpark for our analyses!\nfrom snowflake.snowpark.context import get_active_session\nsession = get_active_session()",
 21 |    "execution_count": null,
 22 |    "outputs": []
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "id": "8d50cbf4-0c8d-4950-86cb-114990437ac9",
 27 |    "metadata": {
 28 |     "language": "sql",
 29 |     "name": "cell2",
 30 |     "collapsed": false,
 31 |     "resultHeight": 112
 32 |    },
 33 |    "source": "USE ROLE ACCOUNTADMIN;\n\nCREATE WAREHOUSE IF NOT EXISTS ski_gear_s WAREHOUSE_SIZE=SMALL;\nCREATE DATABASE IF NOT EXISTS ski_gear_support_db;\nCREATE SCHEMA IF NOT EXISTS ski_gear_support_schema;\n\nUSE ski_gear_support_db.ski_gear_support_schema;\nUSE WAREHOUSE ski_gear_s;",
 34 |    "execution_count": null,
 35 |    "outputs": []
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "id": "c695373e-ac74-4b62-a1f1-08206cbd5c81",
 40 |    "metadata": {
 41 |     "language": "sql",
 42 |     "name": "cell3",
 43 |     "collapsed": false,
 44 |     "resultHeight": 112
 45 |    },
 46 |    "source": "CREATE or REPLACE file format csvformat\n  SKIP_HEADER = 1\n  FIELD_OPTIONALLY_ENCLOSED_BY = '\"'\n  type = 'CSV';\n\nCREATE or REPLACE stage call_transcripts_data_stage\n  file_format = csvformat\n  url = 's3://sfquickstarts/misc/call_transcripts/';\n\nCREATE or REPLACE table CALL_TRANSCRIPTS ( \n  date_created date,\n  language varchar(60),\n  country varchar(60),\n  product varchar(60),\n  category varchar(60),\n  damage_type varchar(90),\n  transcript varchar\n);\n\nCOPY into CALL_TRANSCRIPTS\n  from @call_transcripts_data_stage;",
 47 |    "execution_count": null,
 48 |    "outputs": []
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "id": "24a8cace-5821-4473-a2cf-9f309951926c",
 53 |    "metadata": {
 54 |     "language": "python",
 55 |     "name": "cell_4",
 56 |     "collapsed": false,
 57 |     "resultHeight": 0
 58 |    },
 59 |    "outputs": [],
 60 |    "source": "transcript = \"\"\"\nCustomer: Hello\n\nAgent: Hi there, I hope you're having a great day! To better assist you, could you please provide your first and last name and the company you are calling from?\n\nCustomer: Sure, my name is Jessica Turner and I'm calling from Mountain Ski Adventures.\n\nAgent: Thanks, Jessica. What can I help you with today?\n\nCustomer: Well, we recently ordered a batch of XtremeX helmets, and upon inspection, we noticed that the buckles on several helmets are broken and won't secure the helmet properly.\n\nAgent: I apologize for the inconvenience this has caused you. To confirm, is your order number 68910?\n\nCustomer: Yes, that's correct.\n\nAgent: Thank you for confirming. I'm going to look into this issue and see what we can do to correct it. Would you prefer a refund or a replacement for the damaged helmets?\n\nCustomer: A replacement would be ideal, as we still need the helmets for our customers.\n\nAgent: I understand. I will start the process to send out replacements for the damaged helmets as soon as possible. Can you please specify the quantity of helmets with broken buckles?\n\nCustomer: There are ten helmets with broken buckles in total.\n\nAgent: Thank you for providing me with the quantity. We will expedite a new shipment of ten XtremeX helmets with functioning buckles to your location. You should expect them to arrive within 3-5 business days.\n\nCustomer: Thank you for your assistance, I appreciate it.\n\n\"\"\"",
 61 |    "execution_count": null
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "id": "0436c75c-051c-47bd-a578-5aafc9ca8526",
 66 |    "metadata": {
 67 |     "language": "python",
 68 |     "name": "cell_5",
 69 |     "collapsed": false,
 70 |     "resultHeight": 0
 71 |    },
 72 |    "outputs": [],
 73 |    "source": "from snowflake.cortex import Complete\nprompt = \"\"\"\n    Summarize this transcript in less than 200 words. \nPut the product name, defect and summary in JSON format. \n\"\"\"",
 74 |    "execution_count": null
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "id": "5b178388-02c4-457b-82d0-e732a792af31",
 79 |    "metadata": {
 80 |     "language": "python",
 81 |     "name": "cell_6",
 82 |     "collapsed": false,
 83 |     "resultHeight": 945
 84 |    },
 85 |    "outputs": [],
 86 |    "source": "print(Complete('llama3.2-1b', prompt + transcript))",
 87 |    "execution_count": null
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "id": "741afc26-fdb6-4a1f-a454-e652e01aa0c0",
 92 |    "metadata": {
 93 |     "language": "python",
 94 |     "name": "cell_7",
 95 |     "collapsed": false,
 96 |     "codeCollapsed": false,
 97 |     "resultHeight": 133
 98 |    },
 99 |    "outputs": [],
100 |    "source": "print(Complete(\"mistral-7b\", prompt + transcript))",
101 |    "execution_count": null
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "id": "5cdbf7ef-fb82-4a0b-b8dd-e4a8d2f0cfe0",
106 |    "metadata": {
107 |     "language": "python",
108 |     "name": "cell_8",
109 |     "collapsed": false,
110 |     "resultHeight": 561
111 |    },
112 |    "outputs": [],
113 |    "source": "from snowflake.cortex import Complete\ndef summarize():\n    with st.container():\n        st.header(\"JSON Summary\")\n        entered_text = st.text_area(\"Enter text\",label_visibility=\"hidden\",height=400,placeholder='Enter text. For example, a call transcript.')\n        btn_summarize = st.button(\"Summarize\",type=\"primary\")\n        if entered_text and btn_summarize:\n            entered_text = entered_text.replace(\"'\", \"\\\\'\")\n            prompt = f\"Summarize this transcript in less than 200 words. Put the product name, defect if any, and summary in JSON format: {entered_text}\"\n            cortex_prompt = \"'[INST] \" + prompt + \" [/INST]'\"\n            cortex_response = Complete(\"mistral-7b\", cortex_prompt + transcript)\n            st.json(cortex_response)\n\npage_names_to_funcs = {\n    \"JSON Summary\": summarize\n}\n\nselected_page = st.sidebar.selectbox(\"Select\", page_names_to_funcs.keys())\npage_names_to_funcs[selected_page]()",
114 |    "execution_count": null
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "id": "5ce18e02-96e9-4683-b29b-2ff72b56f9b6",
119 |    "metadata": {
120 |     "language": "python",
121 |     "name": "cell_9",
122 |     "collapsed": false,
123 |     "resultHeight": 561
124 |    },
125 |    "outputs": [],
126 |    "source": "def summarize():\n    with st.container():\n        st.header(\"JSON Summary\")\n        entered_text = st.text_area(\"Enter text\",label_visibility=\"hidden\",height=400,placeholder='Enter text. For example, a call transcript.')\n        btn_summarize = st.button(\"Summarize\",type=\"primary\")\n        if entered_text and btn_summarize:\n            entered_text = entered_text.replace(\"'\", \"\\\\'\")\n            prompt = f\"Summarize this transcript in less than 200 words. Put the product name, defect if any, and summary in JSON format: {entered_text}\"\n            cortex_prompt = \"'[INST] \" + prompt + \" [/INST]'\"\n            cortex_response = session.sql(f\"select snowflake.cortex.complete('mistral-7b', {cortex_prompt}) as response\").to_pandas().iloc[0]['RESPONSE']\n            st.json(cortex_response)\n\npage_names_to_funcs = {\n    \"JSON Summary\": summarize\n}\n\nselected_page = st.sidebar.selectbox(\"Select\", page_names_to_funcs.keys())\npage_names_to_funcs[selected_page]()",
127 |    "execution_count": null
128 |   }
129 |  ]
130 | }


--------------------------------------------------------------------------------
/module-2/intro_to_LLM_functions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   }
  7 |  },
  8 |  "nbformat_minor": 5,
  9 |  "nbformat": 4,
 10 |  "cells": [
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "c37f4892-bc2f-4250-98a2-d7e20f5edfe0",
 14 |    "metadata": {
 15 |     "collapsed": false,
 16 |     "name": "cell9",
 17 |     "resultHeight": 74
 18 |    },
 19 |    "source": [
 20 |     "# Setup"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "id": "3775908f-ca36-4846-8f38-5adca39217f2",
 27 |    "metadata": {
 28 |     "codeCollapsed": false,
 29 |     "collapsed": false,
 30 |     "language": "python",
 31 |     "name": "cell1",
 32 |     "resultHeight": 0
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# set up snowpark session\n",
 37 |     "from snowflake.snowpark.context import get_active_session\n",
 38 |     "session = get_active_session()"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "id": "7767ef7e-1d4c-4f23-acd3-56ad3000175f",
 45 |    "metadata": {
 46 |     "vscode": {
 47 |      "languageId": "sql"
 48 |     },
 49 |     "name": "cell2",
 50 |     "language": "sql",
 51 |     "collapsed": false,
 52 |     "resultHeight": 0
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "use database SKI_GEAR_SUPPORT_DB;\n",
 57 |     "use schema SKI_GEAR_SUPPORT_SCHEMA;"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "id": "b6d74cd3-5c0b-4aed-8770-d60c3328e28d",
 63 |    "metadata": {
 64 |     "collapsed": false,
 65 |     "name": "cell21",
 66 |     "resultHeight": 74
 67 |    },
 68 |    "source": [
 69 |     "# Introduction to Complete"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "id": "ee4d1a1b-fe0a-46b0-91cf-476492a48f22",
 76 |    "metadata": {
 77 |     "codeCollapsed": false,
 78 |     "collapsed": false,
 79 |     "language": "python",
 80 |     "name": "cell4",
 81 |     "resultHeight": 0
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "from snowflake.cortex import Complete\n",
 86 |     "\n",
 87 |     "Complete(\"llama3.1-405b\", \"how do snowflakes get their unique patterns?\")"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "id": "4a6a8838-3a3a-49a8-9da4-5f44394a163f",
 93 |    "metadata": {
 94 |     "collapsed": false,
 95 |     "name": "cell20",
 96 |     "resultHeight": 74
 97 |    },
 98 |    "source": [
 99 |     "# Introduction to Task-specific Functions"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "id": "a5b686ac-80ba-4aca-926f-d23f1e0b8f5a",
105 |    "metadata": {
106 |     "collapsed": false,
107 |     "name": "cell3",
108 |     "resultHeight": 74
109 |    },
110 |    "source": [
111 |     "# Translate"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "id": "aafda587-453f-4792-bbe6-a51ca52b2b6a",
118 |    "metadata": {
119 |     "codeCollapsed": false,
120 |     "collapsed": false,
121 |     "language": "python",
122 |     "name": "cell5",
123 |     "resultHeight": 60
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "from snowflake.cortex import Translate\n",
128 |     "\n",
129 |     "Translate(\"Snowflakes get their unique patterns through a combination of temperature, humidity, and air currents in the atmosphere.\", \"en\", \"fr\")"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "id": "86d00d4b-e79f-476e-8aa5-c74cbaa1f8d1",
136 |    "metadata": {
137 |     "collapsed": false,
138 |     "language": "python",
139 |     "name": "cell12",
140 |     "resultHeight": 54
141 |    },
142 |    "outputs": [],
143 |    "source": [
144 |     "from snowflake.cortex import Translate\n",
145 |     "\n",
146 |     "Translate(\"Les flocons de neige obtiennent leurs modèles uniques grâce à une combinaison de température, d’humidité et de courants atmosphériques.\", \"\", \"en\")"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "id": "5e907a7d-897c-4340-8fef-659837e2a13e",
153 |    "metadata": {
154 |     "collapsed": false,
155 |     "language": "sql",
156 |     "name": "cell6",
157 |     "resultHeight": 0
158 |    },
159 |    "outputs": [],
160 |    "source": [
161 |     "CREATE OR REPLACE TABLE call_transcripts AS\n",
162 |     "SELECT *, SNOWFLAKE.CORTEX.TRANSLATE(transcript, '', 'en') AS EN_TRANSCRIPT\n",
163 |     "FROM call_transcripts;\n",
164 |     "\n",
165 |     "SELECT * FROM call_transcripts LIMIT 10;"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "id": "165cb043-496c-49c1-bf73-18897689944f",
171 |    "metadata": {
172 |     "collapsed": false,
173 |     "name": "cell19",
174 |     "resultHeight": 74
175 |    },
176 |    "source": [
177 |     "# Sentiment"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "id": "0add9369-43f3-4f36-88f5-0c4c4c939e91",
184 |    "metadata": {
185 |     "codeCollapsed": false,
186 |     "collapsed": false,
187 |     "language": "sql",
188 |     "name": "cell16",
189 |     "resultHeight": 0
190 |    },
191 |    "outputs": [],
192 |    "source": [
193 |     "SELECT transcript, \n",
194 |     "    SNOWFLAKE.CORTEX.SENTIMENT(en_transcript) AS SENTIMENT\n",
195 |     "FROM call_transcripts\n",
196 |     "LIMIT 10;"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "id": "88aafa5d-e610-4ab8-a494-3aafd4d40030",
202 |    "metadata": {
203 |     "collapsed": false,
204 |     "name": "cell13",
205 |     "resultHeight": 74
206 |    },
207 |    "source": [
208 |     "# Summarize"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "id": "6c1ef790-572f-42b1-aa55-b33953c42bd3",
215 |    "metadata": {
216 |     "codeCollapsed": false,
217 |     "collapsed": false,
218 |     "language": "python",
219 |     "name": "cell15",
220 |     "resultHeight": 0
221 |    },
222 |    "outputs": [],
223 |    "source": [
224 |     "from snowflake.cortex import Summarize\n",
225 |     "\n",
226 |     "text = \"\"\"\n",
227 |     "A snowflake is a single ice crystal that has achieved a sufficient size, and may have amalgamated with others, which falls through the Earth's atmosphere as snow.\n",
228 |     "Each flake nucleates around a tiny particle in supersaturated air masses by attracting supercooled cloud water droplets, which freeze and accrete in crystal form. \n",
229 |     "Snow appears white in color despite being made of clear ice. \n",
230 |     "This is due to diffuse reflection of the whole spectrum of light by the small crystal facets of the snowflakes.\n",
231 |     "\"\"\"\n",
232 |     "\n",
233 |     "Summarize(text)\n"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "id": "7bd3ad0b-71db-45fb-b998-db88a498eca9",
239 |    "metadata": {
240 |     "collapsed": false,
241 |     "name": "cell22",
242 |     "resultHeight": 74
243 |    },
244 |    "source": [
245 |     "# Text Classification"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "id": "7c0215de-1f2c-4167-9920-aff6615811e9",
252 |    "metadata": {
253 |     "codeCollapsed": false,
254 |     "collapsed": false,
255 |     "language": "sql",
256 |     "name": "cell17",
257 |     "resultHeight": 0
258 |    },
259 |    "outputs": [],
260 |    "source": [
261 |     "SELECT *,\n",
262 |     "SNOWFLAKE.CORTEX.CLASSIFY_TEXT(en_transcript, ['refund', 'replacement']) AS text_classify_output\n",
263 |     "FROM call_transcripts\n",
264 |     "LIMIT 50;"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": null,
270 |    "id": "8d2209ea-5679-489f-bfbd-a6226671ae44",
271 |    "metadata": {
272 |     "codeCollapsed": false,
273 |     "collapsed": false,
274 |     "language": "sql",
275 |     "name": "cell23",
276 |     "resultHeight": 0
277 |    },
278 |    "outputs": [],
279 |    "source": [
280 |     "SELECT *,\n",
281 |     "SNOWFLAKE.CORTEX.CLASSIFY_TEXT(\n",
282 |     "  en_transcript,\n",
283 |     "  [\n",
284 |     "    {\n",
285 |     "      'label': 'Refund',\n",
286 |     "      'description': 'questions or issues related to refunding the order amount',\n",
287 |     "      'examples': [\n",
288 |     "        'Hi, I noticed a defective product was delivered to me. I need a refund',\n",
289 |     "        'I received the wrong size of ski gear in my recent order. I would like to request a refund for this item.',\n",
290 |     "        'The ski gear delivered does not match the product description on your website. I need a refund.'\n",
291 |     "      ]\n",
292 |     "    },\n",
293 |     "    {\n",
294 |     "      'label': 'Replacement',\n",
295 |     "      'description': 'questions or issues related to replacing a product order',\n",
296 |     "      'examples': [\n",
297 |     "        'The snowboard I received was damaged during shipping. It has visible dents and scratches that affect its performance. I would appreciate it if you could send a replacement',\n",
298 |     "        'The ski goggles I received do not match the product description on your website. They lack the anti-fog coating that was advertised. Could you please provide a replacement that meets the described specifications?'\n",
299 |     "      ]\n",
300 |     "    }\n",
301 |     "  ],\n",
302 |     "  {'task_description': 'Return a classification of the type of issue described in the request'}\n",
303 |     ") AS text_classify_output\n",
304 |     "FROM call_transcripts;\n"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": null,
310 |    "id": "590311f3-82bc-4448-94c5-69f006273a4d",
311 |    "metadata": {
312 |     "collapsed": false,
313 |     "language": "python",
314 |     "name": "cell24",
315 |     "resultHeight": 0
316 |    },
317 |    "outputs": [],
318 |    "source": [
319 |     "from snowflake.cortex import ClassifyText\n",
320 |     "\n",
321 |     "ClassifyText(\"How can I buy a ticket for the Subway?\", [\"how to\", \"recommendations\"])"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "code",
326 |    "execution_count": null,
327 |    "id": "a9d7eef1-27ce-4c50-b151-d406dcb3c9c0",
328 |    "metadata": {
329 |     "codeCollapsed": false,
330 |     "collapsed": false,
331 |     "language": "python",
332 |     "name": "cell25",
333 |     "resultHeight": 0
334 |    },
335 |    "outputs": [],
336 |    "source": [
337 |     "ClassifyText(\"What is the best broadway show to see right now?\", [\"how to\", \"recommendations\"])"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "markdown",
342 |    "id": "4d5ec720-f336-4b6f-a6f0-36ea0e31b811",
343 |    "metadata": {
344 |     "collapsed": false,
345 |     "name": "cell7",
346 |     "resultHeight": 74
347 |    },
348 |    "source": [
349 |     "# Intro to Helper Functions"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "markdown",
354 |    "id": "e4a7afb6-664c-4de4-833d-22c2b1d783e1",
355 |    "metadata": {
356 |     "collapsed": false,
357 |     "name": "cell11",
358 |     "resultHeight": 74
359 |    },
360 |    "source": [
361 |     "# TRY_COMPLETE()"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": null,
367 |    "id": "65f44833-e63a-4862-b946-229e04b1e042",
368 |    "metadata": {
369 |     "collapsed": false,
370 |     "language": "sql",
371 |     "name": "cell10",
372 |     "resultHeight": 0
373 |    },
374 |    "outputs": [],
375 |    "source": [
376 |     "SELECT SNOWFLAKE.CORTEX.TRY_COMPLETE(\n",
377 |     "    'llama2-70b-chaty',\n",
378 |     "    [\n",
379 |     "        {\n",
380 |     "            'role': 'user',\n",
381 |     "            'content': 'how does a snowflake get its unique pattern?'\n",
382 |     "        }\n",
383 |     "    ],\n",
384 |     "    {\n",
385 |     "        'temperature': 0.7,\n",
386 |     "        'max_tokens': 10\n",
387 |     "    }\n",
388 |     ");"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "markdown",
393 |    "id": "5605dc00-fa08-47c2-b7bf-56b8efc1f8e9",
394 |    "metadata": {
395 |     "collapsed": false,
396 |     "name": "cell14",
397 |     "resultHeight": 74
398 |    },
399 |    "source": [
400 |     "# COUNT_TOKEN()"
401 |    ]
402 |   },
403 |   {
404 |    "cell_type": "code",
405 |    "execution_count": null,
406 |    "id": "002f7517-adb1-475b-9d53-c96dffabaaf0",
407 |    "metadata": {
408 |     "collapsed": false,
409 |     "language": "sql",
410 |     "name": "cell18",
411 |     "resultHeight": 0
412 |    },
413 |    "outputs": [],
414 |    "source": [
415 |     "SELECT SNOWFLAKE.CORTEX.COUNT_TOKENS( 'llama3.1-70b', \n",
416 |     "    'To be Jedi is to face the truth, and choose. Give off light, or darkness, Padawan. Be a candle, or the night.') \n",
417 |     "    AS number_of_tokens;\n"
418 |    ]
419 |   }
420 |  ]
421 | }


--------------------------------------------------------------------------------
/module-2/using_LLM_functions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   }
  7 |  },
  8 |  "nbformat_minor": 5,
  9 |  "nbformat": 4,
 10 |  "cells": [
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "c37f4892-bc2f-4250-98a2-d7e20f5edfe0",
 14 |    "metadata": {
 15 |     "name": "cell9",
 16 |     "collapsed": false,
 17 |     "resultHeight": 74
 18 |    },
 19 |    "source": "# Setup"
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "id": "3775908f-ca36-4846-8f38-5adca39217f2",
 24 |    "metadata": {
 25 |     "language": "python",
 26 |     "name": "cell1",
 27 |     "collapsed": false,
 28 |     "resultHeight": 0,
 29 |     "codeCollapsed": false
 30 |    },
 31 |    "source": "import snowflake.snowpark.functions as F\n\n# set up snowpark session\nfrom snowflake.snowpark.context import get_active_session\nsession = get_active_session()",
 32 |    "execution_count": null,
 33 |    "outputs": []
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "id": "9d329b88-ecb3-4e69-976d-6d04959d295a",
 38 |    "metadata": {
 39 |     "language": "sql",
 40 |     "name": "cell40",
 41 |     "collapsed": false,
 42 |     "resultHeight": 0
 43 |    },
 44 |    "outputs": [],
 45 |    "source": "use database SKI_GEAR_SUPPORT_DB;\nuse schema SKI_GEAR_SUPPORT_SCHEMA;",
 46 |    "execution_count": null
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "id": "456bf475-5bb7-454f-8d89-a942b1763613",
 51 |    "metadata": {
 52 |     "name": "cell26",
 53 |     "collapsed": false,
 54 |     "resultHeight": 74
 55 |    },
 56 |    "source": "# Using Complete"
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "id": "bbcb4a42-002a-4385-a619-1c677faca131",
 61 |    "metadata": {
 62 |     "language": "python",
 63 |     "name": "cell27",
 64 |     "resultHeight": 261,
 65 |     "collapsed": false
 66 |    },
 67 |    "outputs": [],
 68 |    "source": "from snowflake.cortex import Complete\n\nComplete(\"llama3.1-405b\", \"¿Cómo adquieren los copos de nieve su forma única?. Reply in english\")",
 69 |    "execution_count": null
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "id": "144d2ad8-5839-484c-97db-54788f3135cb",
 74 |    "metadata": {
 75 |     "name": "cell34",
 76 |     "collapsed": false,
 77 |     "resultHeight": 135
 78 |    },
 79 |    "source": "# System and User roles in Complete\n## Options: Cortex Guard"
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "id": "6fa32811-428b-4696-a0f1-fd3c106d1c9a",
 84 |    "metadata": {
 85 |     "language": "python",
 86 |     "name": "cell29",
 87 |     "resultHeight": 239,
 88 |     "codeCollapsed": false,
 89 |     "collapsed": false
 90 |    },
 91 |    "outputs": [],
 92 |    "source": "messages = [\n    {'role': 'system', 'content': 'You are a helpful assistant that answers programming questions in the style of a rancher from the western United States.' },\n    {'role': 'user', 'content': 'Are semicolons optional in JavaScript?'}\n]\n\noptions = {\n    'guardrails': True,\n}\n\nComplete(\"llama3.1-405b\", messages, options = options)",
 93 |    "execution_count": null
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "id": "7090ee9d-90cc-4255-82f8-52d27f5b2525",
 98 |    "metadata": {
 99 |     "language": "python",
100 |     "name": "cell2",
101 |     "collapsed": false,
102 |     "resultHeight": 60
103 |    },
104 |    "outputs": [],
105 |    "source": "messages = [\n    {'role': 'system', 'content': 'You are a helpful assistant that answers programming questions in the style of a rancher from the western United States.' },\n    {'role': 'user', 'content': 'How to rob a bank?'}\n]\n\noptions = {\n    'guardrails': True,\n}\n\nComplete(\"llama3.1-405b\", messages, options = options)",
106 |    "execution_count": null
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "id": "e8209113-4c20-4394-bb58-045fc19020cb",
111 |    "metadata": {
112 |     "name": "cell35",
113 |     "collapsed": false,
114 |     "resultHeight": 74
115 |    },
116 |    "source": "# Multi-turn chat"
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "id": "96d1ff80-9bc1-4819-a77c-3de71ff1ed21",
121 |    "metadata": {
122 |     "language": "python",
123 |     "name": "cell30",
124 |     "resultHeight": 306,
125 |     "codeCollapsed": false,
126 |     "collapsed": false
127 |    },
128 |    "outputs": [],
129 |    "source": "messages_with_history = [\n    {'role': 'system', 'content': 'You are a helpful assistant that answers programming questions in the style of a rancher from the western United States.' },\n    {'role': 'user', 'content': 'Are semicolons optional in JavaScript?'},\n    {'role': 'assistant', 'content': \"Well, howdy there, partner! I reckon you're askin' about semicolons in JavaScript, eh? …, and may your code always be tidy!\"},\n    {'role': 'user', 'content': 'So to be clear, my code will not break with out them?'}\n    ]\n\noptions = {\n    'guardrails': True\n}\n\nfrom snowflake.cortex import Complete\n\nresponse = Complete(\"llama3.1-405b\", messages_with_history, options = options)\n\nresponse\n",
130 |    "execution_count": null
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "id": "f9c15527-c4e5-4b33-8c6e-ad9b7045b414",
135 |    "metadata": {
136 |     "name": "cell36",
137 |     "collapsed": false,
138 |     "resultHeight": 74
139 |    },
140 |    "source": "# Temperature"
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "id": "f710627a-4bc7-4af3-8dfa-fe2b599c7b61",
145 |    "metadata": {
146 |     "language": "python",
147 |     "name": "cell31",
148 |     "resultHeight": 351,
149 |     "codeCollapsed": false,
150 |     "collapsed": false
151 |    },
152 |    "outputs": [],
153 |    "source": "options = {\n    'guardrails': True,\n    'temperature': 0.9\n}\n\nresponse = Complete(\"llama3.1-405b\", messages_with_history, options = options)\n\nresponse\n",
154 |    "execution_count": null
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "id": "7b5ef04c-b137-4433-ac4c-42538081ecf1",
159 |    "metadata": {
160 |     "name": "cell37",
161 |     "collapsed": false,
162 |     "resultHeight": 74
163 |    },
164 |    "source": "# Top P"
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "id": "bbfdb7ff-33c7-4801-9c75-2d9b0a46b2ea",
169 |    "metadata": {
170 |     "language": "python",
171 |     "name": "cell32",
172 |     "resultHeight": 194,
173 |     "codeCollapsed": false,
174 |     "collapsed": false
175 |    },
176 |    "outputs": [],
177 |    "source": "options = {\n    'guardrails': True,\n    'top_p': 0.9\n}\n\nresponse = Complete(\"llama3.1-405b\", messages_with_history, options = options)\n\nresponse",
178 |    "execution_count": null
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "id": "835fcb50-321c-4e70-9255-4473d7ef092a",
183 |    "metadata": {
184 |     "name": "cell38",
185 |     "collapsed": false,
186 |     "resultHeight": 74
187 |    },
188 |    "source": "# Max Tokens"
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "id": "98c7729c-776c-42e9-bf56-ba32a05aea78",
193 |    "metadata": {
194 |     "language": "python",
195 |     "name": "cell39",
196 |     "resultHeight": 54,
197 |     "collapsed": false
198 |    },
199 |    "outputs": [],
200 |    "source": "messages = [\n    {'role': 'system', 'content': 'You are a helpful assistant that decides if a call transcript needs follow-up. You only respond by saying YES or NO.' },\n    {'role': 'user', 'content': \"Call Transcript: Your billing department is a disaster! I've been overcharged three times this month. Please refund me\"}\n]\n\noptions = {\"max_tokens\": 1}\n\nresponse = Complete(\"llama3.2-1b\", messages, options = options)\n\nresponse",
201 |    "execution_count": null
202 |   },
203 |   {
204 |    "cell_type": "markdown",
205 |    "id": "3f2e3429-eb44-4042-b7cf-4b26183fd628",
206 |    "metadata": {
207 |     "name": "cell3",
208 |     "collapsed": false,
209 |     "resultHeight": 74
210 |    },
211 |    "source": "# Review Output of Complete()"
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "id": "d128374c-135c-42b9-9d09-023b141c6c24",
216 |    "metadata": {
217 |     "language": "sql",
218 |     "name": "cell33",
219 |     "resultHeight": 427,
220 |     "codeCollapsed": false,
221 |     "collapsed": false
222 |    },
223 |    "outputs": [],
224 |    "source": "SELECT SNOWFLAKE.CORTEX.COMPLETE(\n    'llama2-70b-chat',\n    [\n        {\n            'role': 'system',\n            'content': 'You are a helpful assistant that decides if a call transcript needs follow-up. You only respond by saying YES or NO.'\n        },\n        {\n            'role': 'user',\n            'content': transcript\n        }\n    ],\n    {'max_tokens': 1}\n) AS followup_needed\nFROM CALL_TRANSCRIPTS\nLIMIT 10;\n",
225 |    "execution_count": null
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "id": "ace7d2a4-1ed2-4ceb-99d5-ad3dae0109fa",
230 |    "metadata": {
231 |     "name": "cell4",
232 |     "collapsed": false,
233 |     "resultHeight": 112
234 |    },
235 |    "source": "# Task-Specific Functions: Translate, Sentiment, Summarize and Classify text"
236 |   },
237 |   {
238 |    "cell_type": "markdown",
239 |    "id": "f04ebbe9-e661-4366-b299-f24f53a0e0fc",
240 |    "metadata": {
241 |     "name": "cell7",
242 |     "collapsed": false,
243 |     "resultHeight": 74
244 |    },
245 |    "source": "# Translate"
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "id": "d873aaae-2908-4105-ab9a-15b78d88cd4c",
250 |    "metadata": {
251 |     "language": "python",
252 |     "name": "cell6",
253 |     "collapsed": false,
254 |     "codeCollapsed": false,
255 |     "resultHeight": 439
256 |    },
257 |    "outputs": [],
258 |    "source": "# Calling UnicodeTranslateError\nfrom snowflake.cortex import Translate\n\narticle_df = session.table(\"CALL_TRANSCRIPTS\")\narticle_df = article_df.filter(F.col('language')=='German')\narticle_df = article_df.withColumn(\n    \"DE_TRANSCRIPT\",\n    Translate(F.col('EN_TRANSCRIPT'), \"en\", \"de\")\n)\narticle_df",
259 |    "execution_count": null
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "id": "22ff26cb-4fa7-4b28-bd2f-0ac797c582f5",
264 |    "metadata": {
265 |     "language": "sql",
266 |     "name": "cell5",
267 |     "collapsed": false,
268 |     "resultHeight": 427
269 |    },
270 |    "outputs": [],
271 |    "source": "SELECT TRANSCRIPT, \nSNOWFLAKE.CORTEX.TRANSLATE(TRANSCRIPT, '', 'en') AS ENGLISH_TRANSCRIPTS\nFROM CALL_TRANSCRIPTS \nLIMIT 10;",
272 |    "execution_count": null
273 |   },
274 |   {
275 |    "cell_type": "markdown",
276 |    "id": "5a1b0a0c-7244-4e85-a013-74285627ff9f",
277 |    "metadata": {
278 |     "name": "cell8",
279 |     "collapsed": false,
280 |     "resultHeight": 74
281 |    },
282 |    "source": "# Sentiment"
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "id": "1dc2c29b-4376-422b-939d-bbcd712ae82b",
287 |    "metadata": {
288 |     "language": "python",
289 |     "name": "cell11",
290 |     "collapsed": false,
291 |     "resultHeight": 439
292 |    },
293 |    "outputs": [],
294 |    "source": "# Using Sentiment\nfrom snowflake.cortex import Sentiment\nfrom snowflake.snowpark.functions import col\n\narticle_df = session.table(\"CALL_TRANSCRIPTS\")\narticle_df = article_df.withColumn(\"sentiment_score_en\", Sentiment(col(\"EN_TRANSCRIPT\")))\n\narticle_df.collect()",
295 |    "execution_count": null
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "id": "97c6a7b0-dcb8-4d88-8f55-1e20e0218dda",
300 |    "metadata": {
301 |     "language": "sql",
302 |     "name": "cell10",
303 |     "collapsed": false,
304 |     "resultHeight": 427
305 |    },
306 |    "outputs": [],
307 |    "source": "SELECT\nSNOWFLAKE.CORTEX.SENTIMENT(EN_TRANSCRIPT), EN_TRANSCRIPT\nFROM CALL_TRANSCRIPTS \nLIMIT 10;",
308 |    "execution_count": null
309 |   },
310 |   {
311 |    "cell_type": "markdown",
312 |    "id": "0c279bf4-4ffe-4d8a-8a22-fcbc4b82ca64",
313 |    "metadata": {
314 |     "name": "cell13",
315 |     "collapsed": false,
316 |     "resultHeight": 74
317 |    },
318 |    "source": "# Summarize"
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "id": "9dd35f88-83cc-4ad9-bda0-f42a2d893de1",
323 |    "metadata": {
324 |     "language": "python",
325 |     "name": "cell15",
326 |     "collapsed": false,
327 |     "resultHeight": 60
328 |    },
329 |    "outputs": [],
330 |    "source": "from snowflake.cortex import Summarize\n\ntext = \"\"\"\n    The Snowflake company was co-founded by Thierry Cruanes, Marcin Zukowski,\n    and Benoit Dageville in 2012 and is headquartered in Bozeman, Montana.\n\"\"\"\n\nSummarize(text)",
331 |    "execution_count": null
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "id": "777e8916-852f-4bb3-b0c3-746ca5a7f84d",
336 |    "metadata": {
337 |     "language": "sql",
338 |     "name": "cell14",
339 |     "collapsed": false,
340 |     "resultHeight": 427
341 |    },
342 |    "outputs": [],
343 |    "source": "CREATE OR REPLACE TABLE call_transcripts\nAS \nSELECT *, SNOWFLAKE.CORTEX.SUMMARIZE(EN_TRANSCRIPT) as EN_CALL_SUMMARY\nFROM CALL_TRANSCRIPTS;\n\nSELECT * FROM call_transcripts LIMIT 10;",
344 |    "execution_count": null
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "id": "818d3b83-dd6b-43a6-b870-9c947a96be57",
349 |    "metadata": {
350 |     "name": "cell12",
351 |     "collapsed": false,
352 |     "resultHeight": 74
353 |    },
354 |    "source": "# Classify text"
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "id": "79493dad-79f2-4a17-86c9-312412a34efb",
359 |    "metadata": {
360 |     "language": "python",
361 |     "name": "cell16",
362 |     "collapsed": false,
363 |     "resultHeight": 99
364 |    },
365 |    "outputs": [],
366 |    "source": "from snowflake.cortex import ClassifyText\n\nClassifyText(\"What is the best restaurant in my city?\", [\"how to\", \"recommendations\"])",
367 |    "execution_count": null
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "id": "124ad326-5d9c-4c7a-a9f9-56a4fd066fdf",
372 |    "metadata": {
373 |     "language": "sql",
374 |     "name": "cell17",
375 |     "collapsed": false,
376 |     "resultHeight": 439
377 |    },
378 |    "outputs": [],
379 |    "source": "SELECT EN_CALL_SUMMARY,\nSNOWFLAKE.CORTEX.CLASSIFY_TEXT(TRANSCRIPT, ['missing price tags','washed out color', 'Broken zipper']) AS damage_type_label\nFROM call_transcripts\nLimit 20;",
380 |    "execution_count": null
381 |   },
382 |   {
383 |    "cell_type": "markdown",
384 |    "id": "a0ac3158-c3aa-4796-a97a-39848b12ff35",
385 |    "metadata": {
386 |     "name": "cell18",
387 |     "collapsed": false,
388 |     "resultHeight": 74
389 |    },
390 |    "source": "# Using Helper Functions"
391 |   },
392 |   {
393 |    "cell_type": "markdown",
394 |    "id": "66e68570-42b5-4f4b-8a9d-449bdb8e1d95",
395 |    "metadata": {
396 |     "name": "cell22",
397 |     "collapsed": false,
398 |     "resultHeight": 74
399 |    },
400 |    "source": "# COUNT_TOKEN"
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "id": "05167b47-5162-4fd0-bdbd-b16a264709f6",
405 |    "metadata": {
406 |     "language": "sql",
407 |     "name": "cell23",
408 |     "collapsed": false,
409 |     "resultHeight": 112
410 |    },
411 |    "outputs": [],
412 |    "source": "SELECT SNOWFLAKE.CORTEX.COUNT_TOKENS( 'llama3.1-70b', \n    'To be Jedi is to face the truth, and choose. Give off light, or darkness, Padawan. Be a candle, or the night.') \n    AS number_of_tokens;",
413 |    "execution_count": null
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "id": "00394cb0-e1bb-49da-9390-9434b5848c37",
418 |    "metadata": {
419 |     "language": "sql",
420 |     "name": "cell24",
421 |     "collapsed": false,
422 |     "resultHeight": 112
423 |    },
424 |    "outputs": [],
425 |    "source": "SELECT SNOWFLAKE.CORTEX.COUNT_TOKENS( 'mistral-7b', \n    'To be Jedi is to face the truth, and choose. Give off light, or darkness, Padawan. Be a candle, or the night.') \n    AS number_of_tokens;",
426 |    "execution_count": null
427 |   },
428 |   {
429 |    "cell_type": "markdown",
430 |    "id": "57d39288-05f8-40f5-9bc3-5154c0eda072",
431 |    "metadata": {
432 |     "name": "cell19",
433 |     "collapsed": false,
434 |     "resultHeight": 74
435 |    },
436 |    "source": "# TRY_COMPLETE"
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "id": "19aeb0ac-356a-4e4b-b9da-8da9c3bb512a",
441 |    "metadata": {
442 |     "language": "sql",
443 |     "name": "cell20",
444 |     "collapsed": false,
445 |     "resultHeight": 135
446 |    },
447 |    "outputs": [],
448 |    "source": "SELECT SNOWFLAKE.CORTEX.COMPLETE(\n    'llama2-70b-chaty',\n    [\n        {\n            'role': 'user',\n            'content': 'how does a snowflake get its unique pattern?'\n        }\n    ],\n    {\n        'temperature': 0.7,\n        'max_tokens': 10\n    }\n);",
449 |    "execution_count": null
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "id": "f279961c-dad4-47d2-98cd-c7772b2fef34",
454 |    "metadata": {
455 |     "language": "sql",
456 |     "name": "cell21",
457 |     "collapsed": false,
458 |     "resultHeight": 112
459 |    },
460 |    "outputs": [],
461 |    "source": "SELECT SNOWFLAKE.CORTEX.TRY_COMPLETE(\n    'llama2-70b-chaty',\n    [\n        {\n            'role': 'user',\n            'content': 'how does a snowflake get its unique pattern?'\n        }\n    ],\n    {\n        'temperature': 0.7,\n        'max_tokens': 10\n    }\n);",
462 |    "execution_count": null
463 |   }
464 |  ]
465 | }


--------------------------------------------------------------------------------
/module-3/finetuning_mistral_7b.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "b5727103-e592-4879-bb1f-5f7384994e46",
  6 |    "metadata": {
  7 |     "collapsed": false,
  8 |     "name": "cell0",
  9 |     "resultHeight": 74
 10 |    },
 11 |    "source": [
 12 |     "# Preview Support Tickets"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "id": "7331af7b",
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": []
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "id": "cb7310e0-4362-4cd2-bad9-fd70854ef709",
 27 |    "metadata": {
 28 |     "codeCollapsed": false,
 29 |     "collapsed": false,
 30 |     "language": "python",
 31 |     "name": "cell1",
 32 |     "resultHeight": 0
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "from snowflake.snowpark.context import get_active_session\n",
 37 |     "import snowflake.snowpark.functions as F\n",
 38 |     "import streamlit as st\n",
 39 |     "import ast\n",
 40 |     "\n",
 41 |     "session = get_active_session()"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "id": "4e462120-ae80-4ab6-bce3-1025164f3705",
 48 |    "metadata": {
 49 |     "collapsed": false,
 50 |     "language": "sql",
 51 |     "name": "cell21",
 52 |     "resultHeight": 112
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "use database TELCO_SUPPORT_DB;\n",
 57 |     "use schema SUPPORT_DATA;"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "id": "5cc43b0f-f99d-4790-b4f7-eb56c0dbfe2f",
 64 |    "metadata": {
 65 |     "collapsed": false,
 66 |     "language": "python",
 67 |     "name": "cell16",
 68 |     "resultHeight": 439
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "df_support_tickets = session.table('SUPPORT_TICKETS')\n",
 73 |     "df_support_tickets"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "id": "1a34347c-0a82-4cac-950a-1b9c848c6200",
 79 |    "metadata": {
 80 |     "collapsed": false,
 81 |     "name": "cell13",
 82 |     "resultHeight": 60
 83 |    },
 84 |    "source": [
 85 |     "## Custom email or text reponse generation for support tickets using LLMs"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "id": "61d5f948-7fe4-482c-b178-00e90a327e18",
 92 |    "metadata": {
 93 |     "collapsed": false,
 94 |     "language": "python",
 95 |     "name": "cell9",
 96 |     "resultHeight": 0
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "prompt = \"\"\"You are a customer support representative at a telecommunications company. \n",
101 |     "Suddenly there is a spike in customer support tickets. \n",
102 |     "You need to understand and analyze the support requests from customers.\n",
103 |     "Based on the root cause of the main issue in the support request, craft a response to resolve the customer issue.\n",
104 |     "Write a text message under 25 words, if the contact_preference field is text message.\n",
105 |     "Write an email in maximum of 100 words if the contact_preference field is email. \n",
106 |     "Focus on alleviating the customer issue and improving customer satisfaction in your response.\n",
107 |     "Strictly follow the word count limit for the response. \n",
108 |     "Write only email or text message response based on the contact_preference for every customer. \n",
109 |     "Do not generate both email and text message response.\n",
110 |     "\"\"\"\n",
111 |     "\n",
112 |     "\n",
113 |     "prompt1 = \"\"\"\n",
114 |     "Please write an email or text promoting a new plan that will save customers total costs. \n",
115 |     "Also resolve the customer issue based on the ticket category. \n",
116 |     "If the contact_preference is text message, write text message response in less than 25 words. \n",
117 |     "If the contact_preference is email, write email response in maximum 100 words.\n",
118 |     "Write only email or text message response based on the contact_preference for every customer.\n",
119 |     "\"\"\""
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "id": "a894e3a1-ec1d-4933-ba66-956db6986d0e",
126 |    "metadata": {
127 |     "collapsed": false,
128 |     "language": "python",
129 |     "name": "cell24",
130 |     "resultHeight": 0
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "ticket_categories = ['Roaming fees', 'Slow data speed', 'Lost phone', 'Add new line', 'Closing account']"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "id": "5d1c1dab-ae52-4cb6-9dbc-5a91ca2fac10",
140 |    "metadata": {
141 |     "collapsed": false,
142 |     "name": "cell14",
143 |     "resultHeight": 60
144 |    },
145 |    "source": [
146 |     "## Mistral-7b response"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "id": "4954d50e-58db-4781-8d15-014a09b843ce",
153 |    "metadata": {
154 |     "collapsed": false,
155 |     "language": "python",
156 |     "name": "cell28",
157 |     "resultHeight": 439
158 |    },
159 |    "outputs": [],
160 |    "source": [
161 |     "df_mistral_7b_response_sql = f\"\"\" select ticket_id, \n",
162 |     "snowflake.cortex.classify_text(request, {ticket_categories}) as ticket_category,\n",
163 |     "contact_preference, \n",
164 |     "trim(snowflake.cortex.complete('mistral-7b',concat('{prompt}', request, ticket_category, contact_preference)),'\\n') \n",
165 |     "    as mistral_7b_response\n",
166 |     "from SUPPORT_TICKETS\"\"\"\n",
167 |     "\n",
168 |     "df_mistral_7b_response = session.sql(df_mistral_7b_response_sql)\n",
169 |     "df_mistral_7b_response"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "id": "3b38e140-412f-461a-a411-d65b8c446d81",
175 |    "metadata": {
176 |     "collapsed": false,
177 |     "name": "cell31",
178 |     "resultHeight": 60
179 |    },
180 |    "source": [
181 |     "## mistral-large response"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "id": "e38557d9-9e4b-4f1f-8ee5-28b47a97b5ad",
188 |    "metadata": {
189 |     "collapsed": false,
190 |     "language": "python",
191 |     "name": "cell23",
192 |     "resultHeight": 439
193 |    },
194 |    "outputs": [],
195 |    "source": [
196 |     "mistral_large_response_sql = f\"\"\" select ticket_id, \n",
197 |     "snowflake.cortex.classify_text(request, {ticket_categories}) as ticket_category,\n",
198 |     "contact_preference, \n",
199 |     "trim(snowflake.cortex.complete('mistral-large',concat('{prompt}', request, ticket_category, contact_preference)),'\\n') \n",
200 |     "    as mistral_large_response\n",
201 |     "from SUPPORT_TICKETS\"\"\"\n",
202 |     "\n",
203 |     "df_mistral_large_response = session.sql(mistral_large_response_sql)\n",
204 |     "df_mistral_large_response"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "id": "7f8cd51b-96d8-44d5-adc4-50a1b62fc914",
210 |    "metadata": {
211 |     "collapsed": false,
212 |     "name": "cell10",
213 |     "resultHeight": 74
214 |    },
215 |    "source": [
216 |     "# Generate dataset to fine-tune mistral-7b"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "id": "ffa07665-65a4-4456-8c43-85b5c7091b31",
223 |    "metadata": {
224 |     "collapsed": false,
225 |     "language": "python",
226 |     "name": "cell5",
227 |     "resultHeight": 439
228 |    },
229 |    "outputs": [],
230 |    "source": [
231 |     "# Stage 1: Filter by CONTACT_PREFERENCE\n",
232 |     "df_text = df_mistral_large_response.filter(F.col(\"CONTACT_PREFERENCE\") == 'Text Message')\n",
233 |     "df_email = df_mistral_large_response.filter(F.col(\"CONTACT_PREFERENCE\") == \"Email\")\n",
234 |     "\n",
235 |     "# Stage 2: Apply word count logic\n",
236 |     "df_text_filtered = df_text.filter(F.regexp_count(F.col(\"MISTRAL_LARGE_RESPONSE\"), r\" \") <= 25)\n",
237 |     "df_email_filtered = df_email.filter(F.regexp_count(F.col(\"MISTRAL_LARGE_RESPONSE\"), r\" \") > 30)\n",
238 |     "\n",
239 |     "# Combine the results\n",
240 |     "df_filtered = df_text_filtered.union(df_email_filtered)\n",
241 |     "\n",
242 |     "df_filtered"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": null,
248 |    "id": "e5cac625-2b29-4dcc-a802-ddefb6c28a2d",
249 |    "metadata": {
250 |     "collapsed": false,
251 |     "language": "python",
252 |     "name": "cell30",
253 |     "resultHeight": 439
254 |    },
255 |    "outputs": [],
256 |    "source": [
257 |     "df_filtered.write.save_as_table(\"SUPPORT_TICKET_RESPONSES\", mode=\"overwrite\")  # \"overwrite\" can be changed to \"append\"\n",
258 |     "\n",
259 |     "# Optional: Show the saved table to verify\n",
260 |     "saved_df = session.table(\"SUPPORT_TICKET_RESPONSES\")\n",
261 |     "saved_df"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "id": "139c2111-f220-4be2-b907-4b2a140fdea4",
268 |    "metadata": {
269 |     "codeCollapsed": false,
270 |     "collapsed": false,
271 |     "language": "python",
272 |     "name": "cell2",
273 |     "resultHeight": 0
274 |    },
275 |    "outputs": [],
276 |    "source": [
277 |     "df_fine_tune = saved_df.with_column(\"prompt\", \n",
278 |     "                                    F.concat(F.lit(prompt), \n",
279 |     "                                             F.lit(\" \"), \n",
280 |     "                                             F.lit(\"Contact preference: \"),\n",
281 |     "                                             F.col(\"contact_preference\"), \n",
282 |     "                                             F.lit(\" \"),\n",
283 |     "                                             F.col(\"ticket_category\")))\\\n",
284 |     "                        .select(\"ticket_id\",\n",
285 |     "                                \"prompt\",\n",
286 |     "                                \"mistral_large_response\")\n",
287 |     "\n",
288 |     "df_fine_tune.write.mode('overwrite').save_as_table('support_tickets_finetune_message_style')"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "id": "5ce01346-2b85-425f-9675-d3a2626c27f6",
294 |    "metadata": {
295 |     "collapsed": false,
296 |     "name": "cell6",
297 |     "resultHeight": 135
298 |    },
299 |    "source": [
300 |     "# Fine-tune mistral-7b\n",
301 |     "\n",
302 |     "## Split data into training and evaluation"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "id": "07123242-032c-4c28-aa00-be737c45af80",
309 |    "metadata": {
310 |     "codeCollapsed": false,
311 |     "collapsed": false,
312 |     "language": "python",
313 |     "name": "cell4",
314 |     "resultHeight": 0
315 |    },
316 |    "outputs": [],
317 |    "source": [
318 |     "train_df, eval_df = session.table(\"support_tickets_finetune_message_style\").random_split(weights=[0.8, 0.2], seed=42)\n",
319 |     "\n",
320 |     "train_df.write.mode('overwrite').save_as_table('support_tickets_finetune_message_style_train')\n",
321 |     "eval_df.write.mode('overwrite').save_as_table('support_tickets_finetune_message_style_eval')"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "code",
326 |    "execution_count": null,
327 |    "id": "0c86dfda-75f7-451d-bf1b-2e19b0d938a5",
328 |    "metadata": {
329 |     "collapsed": false,
330 |     "language": "python",
331 |     "name": "cell17",
332 |     "resultHeight": 357
333 |    },
334 |    "outputs": [],
335 |    "source": [
336 |     "session.table('support_tickets_finetune_message_style_train').show(1)"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": null,
342 |    "id": "dc07d19f-415e-4f5a-bc6d-c6fe3b046577",
343 |    "metadata": {
344 |     "collapsed": false,
345 |     "language": "python",
346 |     "name": "cell18",
347 |     "resultHeight": 357
348 |    },
349 |    "outputs": [],
350 |    "source": [
351 |     "session.table('support_tickets_finetune_message_style_eval').show(1)"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "markdown",
356 |    "id": "c8b34618-1fce-4bd7-b715-ccccac5e1eae",
357 |    "metadata": {
358 |     "collapsed": false,
359 |     "name": "cell8",
360 |     "resultHeight": 60
361 |    },
362 |    "source": [
363 |     "## *Fine-tune mistral-7b model using Cortex*"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": null,
369 |    "id": "e00144d1-0e22-4e08-aeae-a46e47541703",
370 |    "metadata": {
371 |     "collapsed": false,
372 |     "language": "sql",
373 |     "name": "cell12",
374 |     "resultHeight": 112
375 |    },
376 |    "outputs": [],
377 |    "source": [
378 |     "select snowflake.cortex.finetune('CREATE', \n",
379 |     "'SUPPORT_MESSAGES_FINETUNED_MISTRAL_7B', \n",
380 |     "'mistral-7b', \n",
381 |     "'SELECT prompt, mistral_large_response as completion from support_tickets_finetune_message_style_train',\n",
382 |     "'SELECT prompt, mistral_large_response as completion from support_tickets_finetune_message_style_eval');"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "id": "97e22e9e-6201-4643-a036-acb29e04867d",
389 |    "metadata": {
390 |     "collapsed": false,
391 |     "language": "sql",
392 |     "name": "cell25",
393 |     "resultHeight": 112
394 |    },
395 |    "outputs": [],
396 |    "source": [
397 |     "select snowflake.cortex.finetune('SHOW');"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "markdown",
402 |    "id": "1db774c3-b9cc-444e-84b5-7de8e61d680a",
403 |    "metadata": {
404 |     "collapsed": false,
405 |     "name": "cell20",
406 |     "resultHeight": 46
407 |    },
408 |    "source": [
409 |     "### See status of the fine tuning job"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "id": "a21516fe-e328-4793-9953-7d5d6b7e616c",
416 |    "metadata": {
417 |     "collapsed": false,
418 |     "language": "sql",
419 |     "name": "cell19",
420 |     "resultHeight": 112
421 |    },
422 |    "outputs": [],
423 |    "source": [
424 |     "select snowflake.cortex.finetune('DESCRIBE', 'ft_2a4d9d51-5969-4d9b-bde5-93bdc5414bb9');"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "markdown",
429 |    "id": "2c6c7a29-681b-43c1-b977-f4cacceed5bf",
430 |    "metadata": {
431 |     "collapsed": false,
432 |     "name": "cell11",
433 |     "resultHeight": 74
434 |    },
435 |    "source": [
436 |     "# Inference using fine-tuned model"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "code",
441 |    "execution_count": null,
442 |    "id": "abb461ab-f107-4136-b000-e3a4792ac7f8",
443 |    "metadata": {
444 |     "collapsed": false,
445 |     "language": "python",
446 |     "name": "cell15",
447 |     "resultHeight": 439
448 |    },
449 |    "outputs": [],
450 |    "source": [
451 |     "fine_tuned_model_name = 'SUPPORT_MESSAGES_FINETUNED_MISTRAL_7B'\n",
452 |     "\n",
453 |     "sql = f\"\"\" select ticket_id, \n",
454 |     "snowflake.cortex.classify_text(request, {ticket_categories}) as ticket_category,\n",
455 |     "contact_preference, \n",
456 |     "trim(snowflake.cortex.complete('{fine_tuned_model_name}',concat('{prompt}', request, ticket_category, contact_preference)),'\\n') \n",
457 |     "    as fine_tuned_mistral_7b_model_response\n",
458 |     "from SUPPORT_TICKETS\"\"\"\n",
459 |     "\n",
460 |     "df_fine_tuned_mistral_7b_response = session.sql(sql)\n",
461 |     "df_fine_tuned_mistral_7b_response"
462 |    ]
463 |   },
464 |   {
465 |    "cell_type": "markdown",
466 |    "id": "5c07bb19-fd7e-47f8-b99d-43d6c65241d9",
467 |    "metadata": {
468 |     "collapsed": false,
469 |     "name": "cell7",
470 |     "resultHeight": 60
471 |    },
472 |    "source": [
473 |     "## Streamlit application to auto-generate custom emails and text messages"
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "code",
478 |    "execution_count": null,
479 |    "id": "cacc8136-a122-41c6-ac0b-86348b5fb75c",
480 |    "metadata": {
481 |     "collapsed": false,
482 |     "language": "python",
483 |     "name": "cell22",
484 |     "resultHeight": 570
485 |    },
486 |    "outputs": [],
487 |    "source": [
488 |     "st.subheader(\"Auto-generate custom emails or text messages\")\n",
489 |     "\n",
490 |     "with st.container():\n",
491 |     "    with st.expander(\"Enter customer request and select LLM\", expanded=True):\n",
492 |     "        customer_request = st.text_area('Request',\"\"\"I traveled to Japan for two weeks and kept my data usage to a minimum. However, I was charged $90 in international fees. These charges were not communicated to me, and I request a detailed breakdown and a refund. Thank you for your prompt assistance.\"\"\")\n",
493 |     "    \n",
494 |     "        with st.container():\n",
495 |     "            left_col, right_col = st.columns(2)\n",
496 |     "            with left_col:\n",
497 |     "                selected_preference = st.selectbox('Select contact preference', ('Text message', 'Email'))\n",
498 |     "            with right_col:\n",
499 |     "                selected_llm = st.selectbox('Select LLM',('llama3-8b', 'mistral-7b', 'mistral-large', 'SUPPORT_MESSAGES_FINETUNED_MISTRAL_7B',))\n",
500 |     "\n",
501 |     "with st.container():\n",
502 |     "    _,mid_col,_ = st.columns([.4,.3,.3])\n",
503 |     "    with mid_col:\n",
504 |     "        generate_template = st.button('Generate messages ⚡',type=\"primary\")\n",
505 |     "\n",
506 |     "with st.container():\n",
507 |     "    if generate_template:\n",
508 |     "        category_sql = f\"\"\"\n",
509 |     "        select snowflake.cortex.classify_text('{customer_request}', {ticket_categories}) as ticket_category\n",
510 |     "        \"\"\"\n",
511 |     "        df_category = session.sql(category_sql).to_pandas().iloc[0]['TICKET_CATEGORY']\n",
512 |     "        df_category_dict = ast.literal_eval(df_category)\n",
513 |     "        st.subheader(\"Ticket category\")\n",
514 |     "        st.write(df_category_dict['label'])\n",
515 |     "\n",
516 |     "        message_sql = f\"\"\"\n",
517 |     "        select snowflake.cortex.complete('{selected_llm}',concat('{prompt}', '{customer_request}', '{selected_preference}')) as custom_message\n",
518 |     "        \"\"\"\n",
519 |     "        df_message = session.sql(message_sql).to_pandas().iloc[0]['CUSTOM_MESSAGE']\n",
520 |     "        st.subheader(selected_preference)\n",
521 |     "        st.write(df_message)"
522 |    ]
523 |   }
524 |  ],
525 |  "metadata": {
526 |   "kernelspec": {
527 |    "display_name": "Streamlit Notebook",
528 |    "name": "streamlit"
529 |   }
530 |  },
531 |  "nbformat": 4,
532 |  "nbformat_minor": 5
533 | }
534 | 


--------------------------------------------------------------------------------