├── .github
    └── workflows
    │   └── python-app.yml
├── .gitignore
├── Access External Endpoints
    ├── Access External Endpoints.ipynb
    ├── environment.yml
    └── setup.sql
├── Anomaly Detection with Snowflake ML Functions
    └── Anomaly Detection with Snowflake ML Functions.ipynb
├── ArcGIS_Snowflake
    ├── ARCGIS_SERVICEAREA.ipynb
    └── environment.yml
├── Avalanche-Customer-Review-Analytics
    ├── Avalanche-Customer-Review-Analytics.ipynb
    ├── customer_reviews.csv
    ├── customer_reviews_docx.zip
    ├── environment.yml
    └── setup.sql
├── Bioinformatics_Solubility_Dashboard
    ├── Bioinformatics_Solubility_Dashboard.ipynb
    ├── delaney_solubility_with_descriptors.csv
    └── environment.yml
├── Build and Optimize Machine Learning Models with Streamlit
    ├── Build_and_Optimize_Machine_Learning_Models_with_Streamlit.ipynb
    └── environment.yml
├── Create and Manage Snowflake Objects like a Pro
    └── Create and Manage Snowflake Objects like a Pro.ipynb
├── Creating Snowflake Object using Python API
    ├── Creating Snowflake Object using Python API.ipynb
    └── environment.yml
├── Dashboard_with_Streamlit
    ├── Build_a_Dashboard_with_Streamlit_in_Snowflake_Notebooks.ipynb
    └── environment.yml
├── Data Engineering Pipelines with Snowpark Python
    └── Data Engineering Pipelines with Snowpark Python.ipynb
├── Data Pipeline Observability
    ├── finalizer_task_summary_to_html_email.ipynb
    ├── pipeline_alerts_level_1.ipynb
    ├── task_graph_run_demo.ipynb
    └── task_graphs_dmf_quality_checks.ipynb
├── Data_Analysis_with_LLM_RAG
    ├── Data_Analysis_with_LLM_RAG.ipynb
    └── environment.yml
├── End-to-End Machine Learning with Snowpark ML
    ├── 1_sf_nb_snowpark_ml_data_ingest.ipynb
    └── environment.yml
├── End-to-end ML with Feature Store and Model Registry
    └── End-to-end ML with Feature Store and Model Registry.ipynb
├── Feature Store API Overview
    └── Feature Store API Overview.ipynb
├── Feature Store Quickstart
    └── Feature Store Quickstart.ipynb
├── Fine tuning LLM using Snowflake Cortex AI
    ├── Fine tuning LLM using Snowflake Cortex AI.ipynb
    └── environment.yml
├── Getting Started With Snowflake Cortex AI in Snowflake Notebooks
    └── dash_snowflake_cortex_ai_101_notebook_app.ipynb
├── Getting Started with Container Runtimes
    ├── README.md
    ├── assets
    │   ├── diamonds_upload.png
    │   ├── eai.png
    │   ├── notebook_setup.png
    │   └── notebook_upload.png
    ├── diamonds.csv
    └── getting_started_with_container_runtimes.ipynb
├── Getting Started with Snowflake Cortex ML-Based Functions
    └── Getting Started with Snowflake Cortex ML-Based Functions.ipynb
├── Getting started with Snowpark using Snowflake Notebooks
    ├── Getting Started with Snowpark using Snowflake notebooks.ipynb
    └── environment.yml
├── Hyperparameter Tuning with sklearn
    ├── Hyperparameter Tuning with sklearn.ipynb
    └── environment.yml
├── Image_Classification_PyTorch
    └── image_classification_pytorch.ipynb
├── Image_Processing_Pipeline_Stream_Task_Cortex_Complete
    ├── Image_Processing_Pipeline.ipynb
    └── Image_Processing_Pipeline.pdf
├── Import Package from Stage
    ├── Import Package from Stage.ipynb
    ├── package_from_stage.png
    ├── simple.zip
    └── simple
    │   └── __init__.py
├── Ingest Public JSON
    └── Ingest Public JSON.ipynb
├── Intro to Snowpark pandas
    ├── Intro to Snowpark pandas.ipynb
    └── environment.yml
├── Java User-Defined Functions and Stored Procedures
    └── Java User-Defined Functions and Stored Procedures.ipynb
├── LICENSE
├── Load CSV from S3
    └── Load CSV from S3.ipynb
├── MFA_Audit_of_Users
    ├── MFA_Audit_of_Users_with_Streamlit_in_Snowflake_Notebooks.ipynb
    ├── demo_data.csv
    └── environment.yml
├── ML Lineage Workflows
    └── ML Lineage Workflows.ipynb
├── Manage features in DBT with Feature Store
    └── Manage features in DBT with Feature Store.ipynb
├── Monitoring_Table_Size_with_Streamlit
    ├── Monitoring_Table_Size_with_Streamlit.ipynb
    └── environment.yml
├── My First Notebook Project
    ├── My First Notebook Project.ipynb
    └── environment.yml
├── Navigating and Browsing Files
    ├── Navigating and Browsing Files.ipynb
    ├── data.csv
    ├── data.json
    ├── display.py
    ├── img
    │   ├── browse_files.png
    │   ├── git_diff.png
    │   ├── git_files.png
    │   └── upload_files.png
    └── stats.py
├── Query_Caching_Effectiveness
    ├── Query_Caching_Effectiveness.ipynb
    └── environment.yml
├── Query_Cost_Monitoring
    ├── Query_Cost_Monitoring.ipynb
    └── environment.yml
├── Query_Performance_Insights
    ├── Automated_Query_Performance_Insights_in_Snowflake_Notebooks.ipynb
    └── environment.yml
├── Query_Performance_Insights_using_Streamlit
    ├── Build_an_Interactive_Query_Performance_App_with_Streamlit.ipynb
    └── environment.yml
├── RAG Chatbot for KubeCon Sessions
    └── RAG Chatbot for KubeCon Sessions.ipynb
├── README.md
├── Reference cells and variables
    └── Reference cells and variables.ipynb
├── Role_Based_Access_Auditing_with_Streamlit
    ├── Role_Based_Access_Auditing_with_Streamlit.ipynb
    └── environment.yml
├── Scheduled_Query_Execution_Report
    ├── Scheduled_Query_Execution_Report.ipynb
    └── environment.yml
├── Schema_Change_Tracker
    ├── Schema_Change_Tracker.ipynb
    └── environment.yml
├── Snowflake_Notebooks_Summit_2024_Demo
    └── aileen_summit_notebook.ipynb
├── Snowflake_Semantic_View
    ├── environment.yml
    └── getting-started-with-snowflake-semantic-view.ipynb
├── Snowflake_Trail_Alerts_Notifications
    ├── environment.yml
    ├── screenshot.png
    └── truck_sentiment_analysis_with_trail.ipynb
├── Streamlit_Zero_To_Hero_Machine_Learning_App
    ├── Streamlit_Machine_Learning_App.ipynb
    └── environment.yml
├── Telco Churn Data Analysis
    ├── Telco Churn Data Analysis.ipynb
    └── environment.yml
├── Visual Data Stories with Snowflake Notebooks
    ├── Visual Data Stories with Snowflake Notebooks.ipynb
    ├── environment.yml
    └── snowflake-logo.png
├── Warehouse_Utilization_with_Streamlit
    ├── Warehouse_Utilization_with_Streamlit.ipynb
    └── environment.yml
├── Working with Files
    └── Working with Files.ipynb
├── Working with Git
    ├── Working with Git.ipynb
    ├── environment.yml
    └── git_setup.sql
└── config.toml


/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
  1 | name: Python application
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ "main" ]
  6 |   pull_request:
  7 |     branches: [ "main" ]
  8 | 
  9 | permissions:
 10 |   contents: read
 11 | 
 12 | jobs:
 13 |   build:
 14 |     runs-on: ubuntu-latest
 15 |     steps:
 16 |     - name: Set up Snowflake connection by putting secrets into config file
 17 |       env:
 18 |         SNOWCLI_CONFIG: ${{secrets.SNOWCLI_CONFIG}}
 19 |       shell: bash
 20 |       run: |
 21 |         echo -e "$SNOWCLI_CONFIG" > config.toml
 22 |         # Snowflake CLI requires the config.toml file to limit its file permissions to read and write for the file owner only
 23 |         chown $USER config.toml
 24 |         chmod 0600 config.toml
 25 |     - name: Snowflake CLI installation
 26 |       uses: Snowflake-Labs/snowflake-cli-action@v1
 27 |       with:
 28 |         cli-version: "latest"
 29 |         default-config-file-path: "config.toml"
 30 |     - name: Fetch the latest update from Github
 31 |       run: |
 32 |         snow sql -q "ALTER GIT REPOSITORY SFLAB_DEMO_NB FETCH;"
 33 |     - name: Test Notebook - My First Notebook Project
 34 |       run: |
 35 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_FIRST_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/My First Notebook Project/' MAIN_FILE = 'My First Notebook Project.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 36 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_FIRST_NB ADD LIVE VERSION FROM LAST;"
 37 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_FIRST_NB();"
 38 |     - name: Test Notebook - Visual Data Stories
 39 |       run: |
 40 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_VISUAL_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Visual Data Stories with Snowflake Notebooks/' MAIN_FILE = 'Visual Data Stories with Snowflake Notebooks.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 41 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_VISUAL_NB ADD LIVE VERSION FROM LAST;"
 42 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_VISUAL_NB();"    
 43 |     - name: Test Notebook - Ingest Public JSON
 44 |       run: |
 45 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_PUBLIC_JSON_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Ingest Public JSON/' MAIN_FILE = 'Ingest Public JSON.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 46 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_PUBLIC_JSON_NB ADD LIVE VERSION FROM LAST;"
 47 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_PUBLIC_JSON_NB();"
 48 |     - name: Test Notebook - Load CSV from S3
 49 |       run: |
 50 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_CSV_S3_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Load CSV from S3/' MAIN_FILE = 'Load CSV from S3.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 51 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_CSV_S3_NB ADD LIVE VERSION FROM LAST;"
 52 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_CSV_S3_NB();"
 53 |     - name: Test Notebook - Reference cells and variables
 54 |       run: |
 55 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_CELLREF_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Reference cells and variables/' MAIN_FILE = 'Reference cells and variables.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 56 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_CELLREF_NB ADD LIVE VERSION FROM LAST;"
 57 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_CELLREF_NB();"
 58 |     - name: Test Notebook - Working with Files
 59 |       run: |
 60 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_FILES_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Working with Files/' MAIN_FILE = 'Working with Files.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 61 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_FILES_NB ADD LIVE VERSION FROM LAST;"
 62 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_FILES_NB();"
 63 |     - name: Test Notebook - Navigating and Browsing Files
 64 |       run: |
 65 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_MULTIFILE_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Navigating and Browsing Files/' MAIN_FILE = 'Navigating and Browsing Files.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 66 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_MULTIFILE_NB ADD LIVE VERSION FROM LAST;"
 67 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_MULTIFILE_NB();"
 68 |     - name:  Test Notebook - Access External Endpoints
 69 |       run: |
 70 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_EAI_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Access External Endpoints/' MAIN_FILE = 'Access External Endpoints.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 71 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_EAI_NB ADD LIVE VERSION FROM LAST;"
 72 |         snow sql -q "EXECUTE IMMEDIATE FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Access External Endpoints/setup.sql';"
 73 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_EAI_NB();" 
 74 |     - name: Test Notebook - Hyperparameter Tuning with sklearn
 75 |       run: |
 76 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_SKLEARN_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Hyperparameter Tuning with sklearn/' MAIN_FILE = 'Hyperparameter Tuning with sklearn.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 77 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_SKLEARN_NB ADD LIVE VERSION FROM LAST;"
 78 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_SKLEARN_NB();"
 79 |     - name: Test Notebook - Import from Stage
 80 |       run: |
 81 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_STAGE_IMPORT_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Import Package from Stage/' MAIN_FILE = 'Import Package from Stage.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 82 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_STAGE_IMPORT_NB ADD LIVE VERSION FROM LAST;"
 83 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_STAGE_IMPORT_NB();"
 84 |     # - name: Test Notebook - Working with Git 
 85 |     #   run: |
 86 |     #     snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_GIT_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Working with Git/' MAIN_FILE = 'Working with Git.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 87 |     #     snow sql -q "ALTER NOTEBOOK GH_ACTION_GIT_NB ADD LIVE VERSION FROM LAST;"
 88 |     #     snow sql -q "EXECUTE NOTEBOOK GH_ACTION_GIT_NB();"
 89 |     - name: Test Notebook - Create Objects with Python API
 90 |       run: |
 91 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_PYTHONAPI_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Creating Snowflake Object using Python API/' MAIN_FILE = 'Creating Snowflake Object using Python API.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 92 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_PYTHONAPI_NB ADD LIVE VERSION FROM LAST;"
 93 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_PYTHONAPI_NB();"
 94 |     - name: Test Notebook - Cortex ML Function
 95 |       run: |
 96 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_CORTEX_MLFUNC_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Getting Started with Snowflake Cortex ML-Based Functions/' MAIN_FILE = 'Getting Started with Snowflake Cortex ML-Based Functions.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
 97 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_CORTEX_MLFUNC_NB ADD LIVE VERSION FROM LAST;"
 98 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_CORTEX_MLFUNC_NB();"
 99 |     - name: Test Notebook - End-to-End Machine Learning with Snowpark ML (1)
100 |       run: |
101 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_SPML1_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/End-to-End Machine Learning with Snowpark ML/' MAIN_FILE = '1_sf_nb_snowpark_ml_data_ingest.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
102 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_SPML1_NB ADD LIVE VERSION FROM LAST;"
103 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_SPML1_NB();"
104 |     - name: Test Notebook - End-to-End Machine Learning with Snowpark ML (2)
105 |       run: |
106 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_SPML2_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/End-to-End Machine Learning with Snowpark ML/' MAIN_FILE = '2_sf_nb_snowpark_ml_feature_transformations.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
107 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_SPML2_NB ADD LIVE VERSION FROM LAST;"
108 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_SPML2_NB();"
109 |     - name: Test Notebook - End-to-End Machine Learning with Snowpark ML (3)
110 |       run: |
111 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_SPML3_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/End-to-End Machine Learning with Snowpark ML/' MAIN_FILE = '3_sf_nb_snowpark_ml_model_training_inference.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
112 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_SPML3_NB ADD LIVE VERSION FROM LAST;"
113 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_SPML3_NB();"
114 |     - name: Test Notebook - Intro to Snowpark pandas
115 |       run: |
116 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_PANDAS_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Intro to Snowpark pandas/' MAIN_FILE = 'Intro to Snowpark pandas.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
117 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_PANDAS_NB ADD LIVE VERSION FROM LAST;"
118 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_PANDAS_NB();"
119 |     - name: Test Notebook - Data Engineering Pipelines with Snowpark Python
120 |       run: |
121 |         snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_DE_SNOWPARK_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Data Engineering Pipelines with Snowpark Python/' MAIN_FILE = 'Data Engineering Pipelines with Snowpark Python.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
122 |         snow sql -q "ALTER NOTEBOOK GH_ACTION_DE_SNOWPARK_NB ADD LIVE VERSION FROM LAST;"
123 |         snow sql -q "EXECUTE NOTEBOOK GH_ACTION_DE_SNOWPARK_NB();"
124 |     # - name: Test Notebook - Create and Manage Snowflake Objects like a Pro
125 |     #   run: |
126 |     #     snow sql -q "CREATE OR REPLACE NOTEBOOK GH_ACTION_PRO_NB FROM '@"GH_ACTION"."PUBLIC"."SFLAB_DEMO_NB"/branches/main/Create and Manage Snowflake Objects like a Pro/' MAIN_FILE = 'Create and Manage Snowflake Objects like a Pro.ipynb' QUERY_WAREHOUSE = 'GH_ACTION_WH';"
127 |     #     snow sql -q "ALTER NOTEBOOK GH_ACTION_PRO_NB ADD LIVE VERSION FROM LAST;"
128 |     #     snow sql -q "EXECUTE NOTEBOOK GH_ACTION_PRO_NB();"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 
150 | # pytype static type analyzer
151 | .pytype/
152 | 
153 | # Cython debug symbols
154 | cython_debug/
155 | 
156 | # PyCharm
157 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
160 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 | scripts/
163 | 


--------------------------------------------------------------------------------
/Access External Endpoints/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - pytorch=2.2.0
6 |   - transformers=4.37.2


--------------------------------------------------------------------------------
/Access External Endpoints/setup.sql:
--------------------------------------------------------------------------------
 1 | -- Create the HuggingFace external access integration and the network rule it relies on.
 2 | CREATE OR REPLACE NETWORK RULE hf_network_rule
 3 |   MODE = EGRESS
 4 |   TYPE = HOST_PORT
 5 |   VALUE_LIST = ('huggingface.co','cdn-lfs-us-1.huggingface.co');
 6 | 
 7 | CREATE OR REPLACE EXTERNAL ACCESS INTEGRATION hf_access_integration
 8 |   ALLOWED_NETWORK_RULES = (hf_network_rule)
 9 |   ENABLED = true;
10 | 
11 | -- Create the Github external access integration and the network rule it relies on.
12 | CREATE OR REPLACE NETWORK RULE gh_network_rule
13 |   MODE = EGRESS
14 |   TYPE = HOST_PORT
15 |   VALUE_LIST = ('raw.githubusercontent.com', 'githubusercontent.com','github.com');
16 | 
17 | CREATE OR REPLACE EXTERNAL ACCESS INTEGRATION gh_access_integration
18 |   ALLOWED_NETWORK_RULES = (gh_network_rule)
19 |   ENABLED = true;
20 | 
21 | ALTER NOTEBOOK GH_ACTION_EAI_NB set EXTERNAL_ACCESS_INTEGRATIONS = (hf_access_integration, gh_access_integration);


--------------------------------------------------------------------------------
/ArcGIS_Snowflake/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - pydeck=*


--------------------------------------------------------------------------------
/Avalanche-Customer-Review-Analytics/Avalanche-Customer-Review-Analytics.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   },
  7 |   "lastEditStatus": {
  8 |    "notebookId": "2gfpag77rjklnaepw2qp",
  9 |    "authorId": "6841714608330",
 10 |    "authorName": "CHANINN",
 11 |    "authorEmail": "chanin.nantasenamat@snowflake.com",
 12 |    "sessionId": "fd937486-2fde-4160-99dc-ddfca8af4103",
 13 |    "lastEditTime": 1743707076161
 14 |   }
 15 |  },
 16 |  "nbformat_minor": 5,
 17 |  "nbformat": 4,
 18 |  "cells": [
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "id": "3e3bdd35-2104-4280-a28f-e02cac177a85",
 22 |    "metadata": {
 23 |     "name": "md_title",
 24 |     "collapsed": false
 25 |    },
 26 |    "source": "# Build a Customer Review Analytics Dashboard with Streamlit on Snowflake\n\nIn this notebook, we're performing data processing of the Avalanche customer review data. By the end of the tutorial, we'll have created a few data visualization to gain insights into the general sentiment of the products."
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "id": "3fc8fa46-8a26-43e3-a2a9-381c89eae2a7",
 31 |    "metadata": {
 32 |     "name": "md_about",
 33 |     "collapsed": false
 34 |    },
 35 |    "source": "## Avalanche data\n\nThe Avalanche data set is based on a hypothetical company that sells winter sports gear. Holistically, this data set is comprised of the product catalog, customer review, shipping logistics and order history.\n\nIn this particular notebook, we'll use only the customer review data. We'll start by uploading customer review data in DOCX format. Next, we'll parse and reshape the data into a semi-structured form. Particularly, we'll apply LLMs for language translation and text summarization along with sentiment analysis."
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "id": "03e5be91-6497-450d-97c0-ca70199b8eef",
 40 |    "metadata": {
 41 |     "name": "md_data",
 42 |     "collapsed": false
 43 |    },
 44 |    "source": "## Retrieve customer review data\n\nFirst, we're starting by querying and parsing the content from DOCX files that are stored on the `@avalanche_db.avalanche_schema.customer-reviews` stage."
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "id": "b45557a0-01b9-4775-9b97-28da754ec326",
 49 |    "metadata": {
 50 |     "language": "sql",
 51 |     "name": "sql1",
 52 |     "collapsed": false,
 53 |     "codeCollapsed": false
 54 |    },
 55 |    "outputs": [],
 56 |    "source": "-- Parse content from DOCX files\nWITH files AS (\n  SELECT \n    REPLACE(REGEXP_SUBSTR(file_url, '[^/]+$'), '%2e', '.') as filename\n  FROM DIRECTORY('@avalanche_db.avalanche_schema.customer_reviews')\n  WHERE filename LIKE '%.docx'\n)\nSELECT \n  filename,\n  SNOWFLAKE.CORTEX.PARSE_DOCUMENT(\n    @avalanche_db.avalanche_schema.customer_reviews,\n    filename,\n    {'mode': 'layout'}\n  ):content AS layout\nFROM files;",
 57 |    "execution_count": null
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "id": "796ba2b7-2d50-4d22-911d-db20912257f5",
 62 |    "metadata": {
 63 |     "name": "md_sql2",
 64 |     "collapsed": false
 65 |    },
 66 |    "source": "## Data reshaping\n\nWe're reshaping the data to a more structured form by using regular expression to create additional columns from the customer review `LAYOUT` column."
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "id": "c6f47ba7-4c5a-46f1-a2eb-3533f4dcda05",
 71 |    "metadata": {
 72 |     "language": "sql",
 73 |     "name": "sql2",
 74 |     "codeCollapsed": false,
 75 |     "collapsed": false
 76 |    },
 77 |    "outputs": [],
 78 |    "source": "-- Extract PRODUCT name, DATE, and CUSTOMER_REVIEW from the LAYOUT column\nSELECT \n  filename,\n  REGEXP_SUBSTR(layout, 'Product: (.*?) Date:', 1, 1, 'e') as product,\n  REGEXP_SUBSTR(layout, 'Date: (202[0-9]-[0-9]{2}-[0-9]{2})', 1, 1, 'e') as date,\n  REGEXP_SUBSTR(layout, '## Customer Review\\n([\\\\s\\\\S]*?)$', 1, 1, 'es') as customer_review\nFROM {{sql1}};",
 79 |    "execution_count": null
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "id": "99f6b075-3d7c-4615-8414-86568a80ee20",
 84 |    "metadata": {
 85 |     "name": "md_sql3",
 86 |     "collapsed": false
 87 |    },
 88 |    "source": "## Apply Cortex LLM on customer review data\n\nHere, we'll apply the Cortex LLM to perform the following 3 tasks:\n- Text translation is performed on foreign language text where they are translated to English.\n- Text summarization is performed on the translated text to obtain a more concise summary.\n- Sentiment score is calculated to give insights on whether the sentiment was positive or negative."
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "id": "74be7b08-6122-4a98-b113-99ff874375e3",
 93 |    "metadata": {
 94 |     "language": "sql",
 95 |     "name": "sql3",
 96 |     "collapsed": false,
 97 |     "codeCollapsed": false
 98 |    },
 99 |    "outputs": [],
100 |    "source": "-- Perform translation, summarization and sentiment analysis on customer review\nSELECT \n    product,\n    date,\n    SNOWFLAKE.CORTEX.TRANSLATE(customer_review, '', 'en') as translated_review,\n    SNOWFLAKE.CORTEX.SUMMARIZE(translated_review) as summary,\n    SNOWFLAKE.CORTEX.SENTIMENT(translated_review) as sentiment_score\nFROM {{sql2}}\nORDER BY date;",
101 |    "execution_count": null
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "id": "adaa0f32-5263-41ac-aa30-88cc75303d42",
106 |    "metadata": {
107 |     "name": "md_df",
108 |     "collapsed": false
109 |    },
110 |    "source": "## Convert SQL output to Pandas DataFrame\n\nHere, we'll convert the SQL output to a Pandas DataFrame by applying the `to_pandas()` method."
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "id": "b88d6ae3-0de9-42c1-b48a-f2ebc4d34255",
115 |    "metadata": {
116 |     "language": "python",
117 |     "name": "df",
118 |     "codeCollapsed": false,
119 |     "collapsed": false
120 |    },
121 |    "outputs": [],
122 |    "source": "sql3.to_pandas()",
123 |    "execution_count": null
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "id": "a3a0334d-29df-494f-982f-3e1fcd916066",
128 |    "metadata": {
129 |     "name": "md_bar",
130 |     "collapsed": false
131 |    },
132 |    "source": "## Bar charts\n\nHere, we're creating some bar charts for the sentiment scores.\n\n### Daily sentiment scores\n\nNote: Positive values are shown in green while negative values in red."
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "id": "4cd85ca2-f005-4285-a633-744b12de2109",
137 |    "metadata": {
138 |     "language": "python",
139 |     "name": "py_bar",
140 |     "codeCollapsed": false,
141 |     "collapsed": false
142 |    },
143 |    "outputs": [],
144 |    "source": "import streamlit as st\nimport altair as alt\nimport pandas as pd\n\n# Ensure SENTIMENT_SCORE is numeric\ndf['SENTIMENT_SCORE'] = pd.to_numeric(df['SENTIMENT_SCORE'])\n\n# Create the base chart with bars\nchart = alt.Chart(df).mark_bar(size=15).encode(\n    x=alt.X('DATE:T',\n            axis=alt.Axis(\n                format='%Y-%m-%d',  # YYYY-MM-DD format\n                labelAngle=90)  # Rotate labels 90 degrees\n            ),\n    y=alt.Y('SENTIMENT_SCORE:Q'),\n    color=alt.condition(\n        alt.datum.SENTIMENT_SCORE >= 0,\n        alt.value('#2ecc71'),  # green for positive\n        alt.value('#e74c3c')   # red for negative\n    ),\n    tooltip=['PRODUCT:N', 'DATE:T'] # Add tooltip\n).properties(\n    height=500\n)\n\n# Display the chart\nst.altair_chart(chart, use_container_width=True)",
145 |    "execution_count": null
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "id": "32bcfa7b-c940-4615-94a2-373c199ede4f",
150 |    "metadata": {
151 |     "name": "md_bar_2",
152 |     "collapsed": false
153 |    },
154 |    "source": "### Product sentiment scores"
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "id": "74951343-25ef-41c7-825e-4d487dc676eb",
159 |    "metadata": {
160 |     "language": "python",
161 |     "name": "py_product_sentiment",
162 |     "codeCollapsed": false
163 |    },
164 |    "outputs": [],
165 |    "source": "import streamlit as st\nimport altair as alt\nimport pandas as pd\n\n# Create the base chart with aggregation by PRODUCT\nbars = alt.Chart(df).mark_bar(size=15).encode(\n    y=alt.Y('PRODUCT:N', \n            axis=alt.Axis(\n                labelAngle=0,  # Horizontal labels\n                labelOverlap=False,  # Prevent label overlap\n                labelPadding=10  # Add some padding\n            )\n    ),\n    x=alt.X('mean(SENTIMENT_SCORE):Q',  # Aggregate mean sentiment score\n            title='MEAN SENTIMENT_SCORE'),\n    color=alt.condition(\n        alt.datum.mean_SENTIMENT_SCORE >= 0,\n        alt.value('#2ecc71'),  # green for positive\n        alt.value('#e74c3c')   # red for negative\n    ),\n    tooltip=['PRODUCT:N', 'mean(SENTIMENT_SCORE):Q']\n).properties(\n    height=400\n)\n\n# Display the chart\nst.altair_chart(bars, use_container_width=True)",
166 |    "execution_count": null
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "id": "d430287f-867c-484a-8e09-d9d29ca9ef3f",
171 |    "metadata": {
172 |     "language": "python",
173 |     "name": "py_download",
174 |     "codeCollapsed": false
175 |    },
176 |    "outputs": [],
177 |    "source": "# Download button for the CSV file\nst.subheader('Processed Customer Reviews Data')\nst.download_button(\n    label=\"Download CSV\",\n    data=df[['PRODUCT', 'DATE', 'SUMMARY', 'SENTIMENT_SCORE']].to_csv(index=False).encode('utf-8'),\n    mime=\"text/csv\"\n)",
178 |    "execution_count": null
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "id": "597a05b3-0ead-4fb0-a821-d02ce6802b47",
183 |    "metadata": {
184 |     "language": "sql",
185 |     "name": "cell1"
186 |    },
187 |    "outputs": [],
188 |    "source": "",
189 |    "execution_count": null
190 |   }
191 |  ]
192 | }
193 | 


--------------------------------------------------------------------------------
/Avalanche-Customer-Review-Analytics/customer_reviews_docx.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Avalanche-Customer-Review-Analytics/customer_reviews_docx.zip


--------------------------------------------------------------------------------
/Avalanche-Customer-Review-Analytics/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - snowflake.core=*
6 | 


--------------------------------------------------------------------------------
/Avalanche-Customer-Review-Analytics/setup.sql:
--------------------------------------------------------------------------------
 1 | -- STEP 1
 2 | -- Create the avalanche database and schema
 3 | CREATE DATABASE IF NOT EXISTS avalanche_db;
 4 | CREATE SCHEMA IF NOT EXISTS avalanche_schema;
 5 | 
 6 | -- STEP 2
 7 | -- Option 1: Manual upload to Stage
 8 | -- Create the stage for storing our files
 9 | -- Uncomment code block below for this option:
10 | --
11 | CREATE STAGE IF NOT EXISTS avalanche_db.avalanche_schema.customer_reviews
12 |   ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE')
13 |   DIRECTORY = (ENABLE = true);
14 | --
15 | -- Now go and upload files to the stage. 
16 | -- Once you've done that proceed to the next step
17 | 
18 | -- Option 2: Push files to Stage from S3
19 | -- Uncomment lines below to use:
20 | --
21 | -- Create the stage for storing our files
22 | -- CREATE OR REPLACE STAGE customer_reviews
23 |    -- URL = 's3://sfquickstarts/misc/customer_reviews/'
24 |    -- DIRECTORY = (ENABLE = TRUE AUTO_REFRESH = TRUE);
25 | 
26 | 
27 | -- STEP 3
28 | -- List the contents of the newly created stage
29 | ls @avalanche_db.avalanche_schema.customer_reviews;
30 | 
31 | 
32 | -- STEP 4
33 | -- USAGE
34 | -- 
35 | -- Read single file
36 | -- Uncomment lines below to use:
37 | --
38 | -- SELECT
39 | --   SNOWFLAKE.CORTEX.PARSE_DOCUMENT(
40 | --     @avalanche_db.avalanche_schema.customer_reviews,
41 | --     'review-01.docx',
42 | --     {'mode': 'layout'}
43 | --   ) AS layout;
44 | 
45 | -- Read multiple files into a table
46 | -- Uncomment lines below to use:
47 | --
48 | -- WITH files AS (
49 | --   SELECT 
50 | --     REPLACE(REGEXP_SUBSTR(file_url, '[^/]+$'), '%2e', '.') as filename
51 | --   FROM DIRECTORY('@avalanche_db.avalanche_schema.customer_reviews')
52 | --   WHERE filename LIKE '%.docx'
53 | -- )
54 | -- SELECT 
55 | --   filename,
56 | --   SNOWFLAKE.CORTEX.PARSE_DOCUMENT(
57 | --     @avalanche_db.avalanche_schema.customer_reviews,
58 | --     filename,
59 | --     {'mode': 'layout'}
60 | --   ):content AS layout
61 | -- FROM files;
62 | 


--------------------------------------------------------------------------------
/Bioinformatics_Solubility_Dashboard/Bioinformatics_Solubility_Dashboard.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   },
  7 |   "lastEditStatus": {
  8 |    "notebookId": "7rpm6lxftnqo2r7bqwsp",
  9 |    "authorId": "6841714608330",
 10 |    "authorName": "CHANINN",
 11 |    "authorEmail": "chanin.nantasenamat@snowflake.com",
 12 |    "sessionId": "6c69bcea-e09a-4f87-a91d-99ff6aecc8bf",
 13 |    "lastEditTime": 1741649071648
 14 |   }
 15 |  },
 16 |  "nbformat_minor": 5,
 17 |  "nbformat": 4,
 18 |  "cells": [
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "id": "407331eb-29af-42a3-976c-43e3652cd685",
 22 |    "metadata": {
 23 |     "name": "md_title",
 24 |     "collapsed": false
 25 |    },
 26 |    "source": "# Build a Bioinformatics Solubility Dashboard in Snowflake\n\nIn this notebook, you'll build a **bioinformatics project** from scratch in Snowflake. \n\nBriefly, we're using the *Delaney* solubility data set. Solubility is an important property for successful drug discovery efforts and is amongst one of the key metrics used in defining drug-like molecules according to the Lipinski Rule of 5.\n\nIn a nutshell, here's what you're building:\n- Load data into Snowflake\n- Perform data preparation using Pandas\n- Build a simple dashboard with Streamlit\n"
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "id": "121d2db7-d366-4363-a464-fadf2ffbb1dc",
 31 |    "metadata": {
 32 |     "name": "md_solubility",
 33 |     "collapsed": false
 34 |    },
 35 |    "source": "## About molecular solubility\n\nMolecular solubility is a crucial property in drug development that affects whether a drug can reach its target in the human body. Let me explain why it matters in simple terms.\n\n### Solubility\nSolubility is a molecule's ability to dissolve in a liquid, which literally means the ability to dissolve in human bloodstream and transport to its desired target in the human body. If it can dissolve, it can't work!\n\nPoorly soluble drugs might require higher doses or special formulations, leading to potential side effects or complicated treatment regimens. So we want drugs that are both effective and yet soluble so that fewer of it is required so as to minimize potential side effects.\n\n### Lipinski's Rule of 5\nDrug development often refer to a guidelines known as the Lipinski's Rule of 5 to predict whether a molecule will be soluble enough to make a good oral drug. This includes factors like:\n- Molecule's size\n- How water-loving or water-repelling it is\n- Number of hydrogen bond donors and acceptors\n\nUnderstanding and optimizing solubility helps pharmaceutical companies develop effective medicines that can be easily administered and work efficiently in the body."
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "id": "3a2a4205-5392-4730-8495-93fea5c1602f",
 40 |    "metadata": {
 41 |     "name": "md_data",
 42 |     "collapsed": false
 43 |    },
 44 |    "source": "## Load data\n\nHere, we're loading the Delaney data set ([reference](https://pubs.acs.org/doi/10.1021/ci034243x))."
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "id": "92528066-a158-4733-8747-a2915c832c58",
 49 |    "metadata": {
 50 |     "language": "sql",
 51 |     "name": "sql_data"
 52 |    },
 53 |    "outputs": [],
 54 |    "source": "SELECT * FROM CHANINN_DEMO_DATA.PUBLIC.SOLUBILITY",
 55 |    "execution_count": null
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "id": "32b8bb10-45e2-4c81-8953-b4af097fe619",
 60 |    "metadata": {
 61 |     "name": "md_to_pandas",
 62 |     "collapsed": false
 63 |    },
 64 |    "source": "## Convert SQL output to Pandas DataFrame\n\nWe're using `to_pandas()` method to convert our SQL output table to a Pandas DataFrame."
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "id": "24aef3fd-6815-4874-a712-d7ab940660f7",
 69 |    "metadata": {
 70 |     "language": "python",
 71 |     "name": "df",
 72 |     "codeCollapsed": false
 73 |    },
 74 |    "outputs": [],
 75 |    "source": "sql_data.to_pandas()",
 76 |    "execution_count": null
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "id": "126ab616-c4bc-484a-9d44-833b0bf26143",
 81 |    "metadata": {
 82 |     "name": "md_class",
 83 |     "collapsed": false
 84 |    },
 85 |    "source": "## Data Aggregation\n\nHere, we're aggregating the data (grouping it) by its molecular weight:\n- `small` if <300\n- `large` if >= 300"
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "id": "ab0fb5ec-3cf1-45d6-872c-d92691cb9d9d",
 90 |    "metadata": {
 91 |     "language": "python",
 92 |     "name": "py_class",
 93 |     "codeCollapsed": false
 94 |    },
 95 |    "outputs": [],
 96 |    "source": "df['MOLWT_CLASS'] = pd.Series(['small' if x < 300 else 'large' for x in df['MOLWT']])\ndf_class = df.groupby('MOLWT_CLASS').mean().reset_index()\ndf_class",
 97 |    "execution_count": null
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "id": "dd9543d3-31b7-4c54-9bde-530c42e36a90",
102 |    "metadata": {
103 |     "name": "md_app",
104 |     "collapsed": false
105 |    },
106 |    "source": "## Building the Solubility Dashboard"
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "id": "89a6c1ff-71e9-4c2f-be2b-6d14879ddd00",
111 |    "metadata": {
112 |     "language": "python",
113 |     "name": "py_app",
114 |     "codeCollapsed": false
115 |    },
116 |    "outputs": [],
117 |    "source": "import streamlit as st\n\nst.title('☘️ Solubility Dashboard')\n\n# Data Filtering\nmol_size = st.slider('Select a value', 100, 500, 300)\ndf['MOLWT_CLASS'] = pd.Series(['small' if x < mol_size else 'large' for x in df['MOLWT']])\ndf_class = df.groupby('MOLWT_CLASS').mean().reset_index()\n\nst.divider()\n\n# Calculate Metrics\nmolwt_large = round(df_class['MOLWT'][0], 2)\nmolwt_small = round(df_class['MOLWT'][1], 2)\nnumrotatablebonds_large = round(df_class['NUMROTATABLEBONDS'][0], 2)\nnumrotatablebonds_small = round(df_class['NUMROTATABLEBONDS'][1], 2)\nmollogp_large = round(df_class['MOLLOGP'][0], 2)\nmollogp_small = round(df_class['MOLLOGP'][1], 2)\naromaticproportion_large = round(df_class['AROMATICPROPORTION'][0], 2)\naromaticproportion_small = round(df_class['AROMATICPROPORTION'][1], 2)\n\n# Data metrics and visualizations\ncol = st.columns(2)\nwith col[0]:\n    st.subheader('Molecular Weight')\n    st.metric('Large', molwt_large)\n    st.metric('Small', molwt_small)\n    st.bar_chart(df_class, x='MOLWT_CLASS', y='MOLWT', color='MOLWT_CLASS')\n\n    st.subheader('Number of Rotatable Bonds')\n    st.metric('Large', numrotatablebonds_large)\n    st.metric('Small', numrotatablebonds_small)\n    st.bar_chart(df_class, x='MOLWT_CLASS', y='NUMROTATABLEBONDS', color='MOLWT_CLASS')\nwith col[1]:\n    st.subheader('Molecular LogP')\n    st.metric('Large', mollogp_large)\n    st.metric('Small', mollogp_small)\n    st.bar_chart(df_class, x='MOLWT_CLASS', y='MOLLOGP', color='MOLWT_CLASS')\n\n    st.subheader('Aromatic Proportion')\n    st.metric('Large', mollogp_large)\n    st.metric('Small', mollogp_small)\n    st.bar_chart(df_class, x='MOLWT_CLASS', y='AROMATICPROPORTION', color='MOLWT_CLASS')\n\nwith st.expander('Show Original DataFrame'):\n    st.dataframe(df)\nwith st.expander('Show Aggregated DataFrame'):\n    st.dataframe(df_class)",
118 |    "execution_count": null
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "id": "81a409e7-7219-4c20-9276-f3b27e0b8ea4",
123 |    "metadata": {
124 |     "name": "md_reference",
125 |     "collapsed": false
126 |    },
127 |    "source": "## References\n\n- [ESOL:  Estimating Aqueous Solubility Directly from Molecular Structure](https://pubs.acs.org/doi/10.1021/ci034243x)\n- [st.bar_chart](https://docs.streamlit.io/develop/api-reference/charts/st.bar_chart)\n- [st.expander](https://docs.streamlit.io/develop/api-reference/layout/st.expander)\n- [st.slider](https://docs.streamlit.io/develop/api-reference/widgets/st.slider)"
128 |   }
129 |  ]
130 | }


--------------------------------------------------------------------------------
/Bioinformatics_Solubility_Dashboard/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - pandas=*
6 | 


--------------------------------------------------------------------------------
/Build and Optimize Machine Learning Models with Streamlit/Build_and_Optimize_Machine_Learning_Models_with_Streamlit.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "metadata": {
 3 |   "kernelspec": {
 4 |    "display_name": "Streamlit Notebook",
 5 |    "name": "streamlit"
 6 |   }
 7 |  },
 8 |  "nbformat_minor": 5,
 9 |  "nbformat": 4,
10 |  "cells": [
11 |   {
12 |    "cell_type": "markdown",
13 |    "id": "2ca12abe-9d90-46c7-a40b-3631fe7e7665",
14 |    "metadata": {
15 |     "name": "md_title",
16 |     "collapsed": false
17 |    },
18 |    "source": "# Build and Optimize a Machine Learning Models in Snowflake Notebooks with Streamlit\n\nIn this notebook, we'll build and optimize machine learning models. We'll also sprinkle in UI interactivity with Streamlit widgets to allow users to experiment and play with the parameters and settings.\n\n## Libraries used\n- `streamlit` - build the frontend UI\n- `pandas` - handle and wrangle data\n- `numpy` - numerical computing\n- `scikit-learn` - build machine learning models\n- `altair` - data visualization\n\n## Protocol\nHere's a breakdown of what we'll be doing:\n1. Load and prepare a dataset for modeling.\n2. Perform grid search hyperparameter optimization using the radial basis function (RBF) kernel with the support vector machine (SVM) algorithm.\n3. Visualize the hyperparameter optimization via a heatmap and line chart.\n"
19 |   },
20 |   {
21 |    "cell_type": "markdown",
22 |    "id": "cc43846f-0d71-40d4-9c6c-ebd7e81e4db4",
23 |    "metadata": {
24 |     "name": "cell1",
25 |     "collapsed": false
26 |    },
27 |    "source": "## Build the ML Hyperparameter Optimization App using Streamlit"
28 |   },
29 |   {
30 |    "cell_type": "code",
31 |    "id": "59bf3b1e-92f9-4a24-919a-b7ea11f164b6",
32 |    "metadata": {
33 |     "language": "python",
34 |     "name": "py_app",
35 |     "codeCollapsed": false,
36 |     "collapsed": false
37 |    },
38 |    "outputs": [],
39 |    "source": "import streamlit as st\nimport pandas as pd\nimport numpy as np\nimport altair as alt\nfrom sklearn.model_selection import train_test_split, GridSearchCV\nfrom sklearn.svm import SVC\nfrom sklearn.datasets import load_wine\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.preprocessing import StandardScaler\n\nst.title('ML Hyperparameter Optimization')\n\n# Load wine dataset\ndataset = load_wine()\nX = dataset.data\ny = dataset.target\nfeature_names = dataset.feature_names\n\n# Create DataFrame\ndf = pd.DataFrame(X, columns=feature_names)\ndf['target'] = y\n\n# Display dataset info using metrics\nst.header('📖 Dataset Information')\ncol1, col2, col3 = st.columns(3)\nwith col1:\n    st.metric(\"Number of features\", len(feature_names))\nwith col2:\n    st.metric(\"Number of classes\", len(dataset.target_names))\nwith col3:\n    st.metric(\"Number of samples\", len(y))\n\n# Display class names\nformatted_classes = \", \".join([f\"`{i+1}`\" for i in range(len(dataset.target_names))])\nst.write(f\"Classes: {formatted_classes}\")\n\n# Display sample of the data\nwith st.expander(\"👀 See the dataset\"):\n    st.write(df.head())\n\n# Model hyperparameters using powers of 2\nst.header('⚙️ Hyperparameters')\n\n# Parameter range selection\nst.subheader(\"Parameter Ranges (in powers of 2)\")\ncol1, col2 = st.columns(2)\n\n# Create list of powers of 2\npowers = list(range(-10, 11, 2))\n\nwith col1:\n    C_power_range = st.select_slider(\n        'C (Regularization) range - powers of 2',\n        options=powers,\n        value=(-4, 4),\n        help='C = 2^value'\n    )\n    st.info(f'''\n    C range: $2^{{{C_power_range[0]}}}$ to $2^{{{C_power_range[1]}}}$\n    \n    {2**C_power_range[0]:.6f} to {2**C_power_range[1]:.6f}\n    ''')\n\nwith col2:\n    gamma_power_range = st.select_slider(\n        'γ range - powers of 2',\n        options=powers,\n        value=(-4, 4),\n        help='gamma = 2^value'\n    )\n    st.info(f'''\n    γ range: $2^{{{gamma_power_range[0]}}}$ to $2^{{{gamma_power_range[1]}}}$\n    \n    {2**gamma_power_range[0]:.6f} to {2**gamma_power_range[1]:.6f}\n    ''')\n\n# Step size selection\nst.subheader(\"Step Size for Grid Search\")\ncol1, col2, col3 = st.columns(3)\n\nwith col1:\n    C_step = st.slider('C step size', 0.1, 2.0, 0.5, 0.1)\nwith col2:\n    gamma_step = st.slider('Gamma step size', 0.1, 2.0, 0.5, 0.1)\nwith col3:\n    test_size = st.slider('Test size', 0.1, 0.5, 0.2)\n\nst.divider()\n\n# Split and scale data\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)\n\n# Scale the features\nscaler = StandardScaler()\nX_train_scaled = scaler.fit_transform(X_train)\nX_test_scaled = scaler.transform(X_test)\n\n# Create parameter grid using powers of 2 with specified step sizes\ndef create_param_range(start_power, end_power, step):\n    powers = np.arange(start_power, end_power + step, step)\n    return np.power(2, powers)\n\nC_range = create_param_range(C_power_range[0], C_power_range[1], C_step)\ngamma_range = create_param_range(gamma_power_range[0], gamma_power_range[1], gamma_step)\n\n# Train model with GridSearchCV\nparam_grid = {\n    'C': C_range,\n    'gamma': gamma_range\n}\n\nsvm = SVC(kernel='rbf', random_state=42)\ngrid = GridSearchCV(svm, param_grid, cv=5)\ngrid.fit(X_train_scaled, y_train)\n\n# Results\ny_pred = grid.predict(X_test_scaled)\naccuracy = accuracy_score(y_test, y_pred)\n\n# Display metrics in columns\nmetrics1, metrics2, metrics3 = st.columns(3)\nwith metrics1:\n    st.header('Model Performance')\n    st.metric(\"Accuracy\", f\"{accuracy:.2f}\")\nwith metrics2:\n    best_C_power = np.log2(grid.best_params_['C'])\n    st.header('Best Parameters')\n    st.write(\"C\")\n    st.write(f\"$2^{{{best_C_power:.1f}}}$ = {grid.best_params_['C']:.6f}\")\n    st.write(f\"\")\nwith metrics3:\n    best_gamma_power = np.log2(grid.best_params_['gamma'])\n    st.header('󠀠󠀠‎')\n    st.write(\"γ\")\n    st.write(f\"$2^{{{best_gamma_power:.1f}}}$ = {grid.best_params_['gamma']:.6f}\")\n\n# Create visualization data with means and standard deviations\nresults = pd.DataFrame(grid.cv_results_)\nparam_results = pd.DataFrame({\n    'C': np.log2(results['param_C']),\n    'gamma': np.log2(results['param_gamma']),\n    'score': results['mean_test_score']\n})\n\n# Calculate means and standard errors for C\nC_stats = param_results.groupby('C').agg({\n    'score': ['mean', 'std', 'count']\n}).reset_index()\nC_stats.columns = ['C', 'mean_score', 'std_score', 'count']\nC_stats['stderr'] = C_stats['std_score'] / np.sqrt(C_stats['count'])\nC_stats['ci_upper'] = C_stats['mean_score'] + (2 * C_stats['stderr'])\nC_stats['ci_lower'] = C_stats['mean_score'] - (2 * C_stats['stderr'])\n\n# Calculate means and standard errors for gamma\ngamma_stats = param_results.groupby('gamma').agg({\n    'score': ['mean', 'std', 'count']\n}).reset_index()\ngamma_stats.columns = ['gamma', 'mean_score', 'std_score', 'count']\ngamma_stats['stderr'] = gamma_stats['std_score'] / np.sqrt(gamma_stats['count'])\ngamma_stats['ci_upper'] = gamma_stats['mean_score'] + (2 * gamma_stats['stderr'])\ngamma_stats['ci_lower'] = gamma_stats['mean_score'] - (2 * gamma_stats['stderr'])\n\n# Create heatmap\nst.header(\"Hyperparameter optimization\")\ncolor_schemes = ['yellowgreenblue', 'spectral', 'viridis', 'inferno', 'magma', 'plasma', 'turbo', 'greenblue', 'blues', 'reds', 'greens', 'purples', 'oranges']\nselected_color = st.selectbox('Select heatmap color scheme:', color_schemes)\n\n# Create heatmap with grid lines and selected color scheme\nheatmap = alt.Chart(param_results).mark_rect().encode(\n    x=alt.X('C:Q', \n            title='C parameter', \n            scale=alt.Scale(domain=[C_power_range[0], C_power_range[1]]),\n            axis=alt.Axis(grid=True, gridDash=[5,5])),\n    y=alt.Y('gamma:Q', \n            title='γ parameter', \n            scale=alt.Scale(domain=[gamma_power_range[0], gamma_power_range[1]]),\n            axis=alt.Axis(grid=True, gridDash=[5,5])),\n    color=alt.Color('score:Q', \n                   title='Cross-validation Score',\n                   scale=alt.Scale(scheme=selected_color)),\n    tooltip=['C', 'gamma', alt.Tooltip('score:Q', format='.3f')]\n).transform_window(\n    row_number='row_number()'\n).transform_fold(['score']\n).properties(\n    width=900,\n    height=300,\n)\n\n# Add grid lines as a separate layer\ngrid = alt.Chart(param_results).mark_rule(color='darkgray', strokeOpacity=0.2).encode(\n    x='C:Q'\n).properties(\n    width=900,\n    height=300\n) + alt.Chart(param_results).mark_rule(color='darkgray', strokeOpacity=0.2).encode(\n    y='gamma:Q'\n).properties(\n    width=900,\n    height=300\n)\n\n# Combine heatmap and grid\nfinal_heatmap = (heatmap + grid)\nst.altair_chart(final_heatmap)\n\n# Define common Y axis title\ny_axis_title = 'Cross-validation Score'\n\n# Create C parameter plot with error bands\nc_line_base = alt.Chart(C_stats)\n\nc_line = c_line_base.mark_line().encode(\n    x=alt.X('C:Q', title='C parameter', \n            scale=alt.Scale(domain=[C_power_range[0], C_power_range[1]])),\n    y=alt.Y('mean_score:Q', title=y_axis_title, scale=alt.Scale(zero=False))\n)\n\nc_points = c_line_base.mark_point(size=50).encode(\n    x='C:Q',\n    y=alt.Y('mean_score:Q', title=y_axis_title),\n    tooltip=[\n        alt.Tooltip('C:Q', title='C', format='.1f'),\n        alt.Tooltip('mean_score:Q', title='Mean Score', format='.3f'),\n        alt.Tooltip('std_score:Q', title='Std Dev', format='.3f')\n    ]\n)\n\nc_errorbars = c_line_base.mark_errorbar().encode(\n    x='C:Q',\n    y=alt.Y('ci_lower:Q', title=y_axis_title),\n    y2='ci_upper:Q'\n)\n\nc_band = c_line_base.mark_area(opacity=0.3).encode(\n    x='C:Q',\n    y=alt.Y('ci_lower:Q', title=y_axis_title),\n    y2='ci_upper:Q'\n)\n\nc_plot = (c_band + c_line + c_errorbars + c_points).properties(\n    width=400,\n    height=300,\n)\n\n# Create gamma parameter plot with error bands\ngamma_line_base = alt.Chart(gamma_stats)\n\ngamma_line = gamma_line_base.mark_line().encode(\n    x=alt.X('gamma:Q', title='γ parameter', \n            scale=alt.Scale(domain=[gamma_power_range[0], gamma_power_range[1]])),\n    y=alt.Y('mean_score:Q', title=y_axis_title, scale=alt.Scale(zero=False))\n)\n\ngamma_points = gamma_line_base.mark_point(size=50).encode(\n    x='gamma:Q',\n    y=alt.Y('mean_score:Q', title=y_axis_title),\n    tooltip=[\n        alt.Tooltip('gamma:Q', title='Gamma', format='.1f'),\n        alt.Tooltip('mean_score:Q', title='Mean Score', format='.3f'),\n        alt.Tooltip('std_score:Q', title='Std Dev', format='.3f')\n    ]\n)\n\ngamma_errorbars = gamma_line_base.mark_errorbar().encode(\n    x='gamma:Q',\n    y=alt.Y('ci_lower:Q', title=y_axis_title),\n    y2='ci_upper:Q'\n)\n\ngamma_band = gamma_line_base.mark_area(opacity=0.3).encode(\n    x='gamma:Q',\n    y=alt.Y('ci_lower:Q', title=y_axis_title),\n    y2='ci_upper:Q'\n)\n\ngamma_plot = (gamma_band + gamma_line + gamma_errorbars + gamma_points).properties(\n    width=400,\n    height=300,\n)\n\ncol = st.columns(2)\nwith col[0]:\n    st.altair_chart(c_plot)\nwith col[1]:\n    st.altair_chart(gamma_plot)",
40 |    "execution_count": null
41 |   },
42 |   {
43 |    "cell_type": "markdown",
44 |    "id": "6e59b550-b740-4c15-a23e-a510b85762ce",
45 |    "metadata": {
46 |     "name": "cell2",
47 |     "collapsed": false
48 |    },
49 |    "source": "## Resources\n\n- An overview of [Snowflake Notebooks](https://www.snowflake.com/en/data-cloud/notebooks/) and its capabilities.\n- About [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks) in the [Snowflake Documentation](https://docs.snowflake.com/).\n- Further information on the use of Streamlit can be found at the [Streamlit Docs](https://docs.streamlit.io/)."
50 |   }
51 |  ]
52 | }


--------------------------------------------------------------------------------
/Build and Optimize Machine Learning Models with Streamlit/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - altair=*
6 |   - numpy=*
7 |   - pandas=*
8 |   - scikit-learn=*
9 | 


--------------------------------------------------------------------------------
/Creating Snowflake Object using Python API/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - snowflake=0.8.0
6 | 


--------------------------------------------------------------------------------
/Dashboard_with_Streamlit/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - numpy=*
6 |   - pandas=*
7 | 


--------------------------------------------------------------------------------
/Data_Analysis_with_LLM_RAG/Data_Analysis_with_LLM_RAG.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   },
  7 |   "lastEditStatus": {
  8 |    "notebookId": "7vfpxlcc5brsm6magpsd",
  9 |    "authorId": "6841714608330",
 10 |    "authorName": "CHANINN",
 11 |    "authorEmail": "chanin.nantasenamat@snowflake.com",
 12 |    "sessionId": "248cc86f-5bc6-4821-99fc-2eb76b036f89",
 13 |    "lastEditTime": 1739213397874
 14 |   }
 15 |  },
 16 |  "nbformat_minor": 5,
 17 |  "nbformat": 4,
 18 |  "cells": [
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "id": "414e046d-9d1c-4919-9914-a9ca160084b3",
 22 |    "metadata": {
 23 |     "name": "md_title",
 24 |     "collapsed": false
 25 |    },
 26 |    "source": "# Data Analysis with LLM RAG in Snowflake Notebooks\n\nA notebook that answer questions about data via the use of an LLM reasoning model namely the DeepSeek-R1.\n\nHere's what we're implementing to investigate the tables:\n1. Retrieve penguins data\n2. Convert table to a DataFrame\n3. Create a text box for accepting user input\n4. Generate LLM response to answer questions about the data"
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "id": "d069b3b5-7abe-4a46-a359-9b321ee539d8",
 31 |    "metadata": {
 32 |     "name": "md_retrieve_data",
 33 |     "collapsed": false
 34 |    },
 35 |    "source": "## 1. Retrieve penguins data\n\nWe'll start by performing a simple SQL query to retrieve the penguins data."
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "id": "8d50cbf4-0c8d-4950-86cb-114990437ac9",
 40 |    "metadata": {
 41 |     "language": "sql",
 42 |     "name": "sql_output",
 43 |     "codeCollapsed": false,
 44 |     "collapsed": false
 45 |    },
 46 |    "source": "SELECT * FROM CHANINN_DEMO_DATA.PUBLIC.PENGUINS",
 47 |    "execution_count": null,
 48 |    "outputs": []
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "id": "40ea697a-bca6-400b-b1c4-0a1eb90948b6",
 53 |    "metadata": {
 54 |     "name": "md_dataframe",
 55 |     "collapsed": false
 56 |    },
 57 |    "source": "## 2. Convert table to a DataFrame\n\nNext, we'll convert the table to a Pandas DataFrame."
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "id": "115fa0b9-4adb-413f-ad7c-34037e9f341d",
 62 |    "metadata": {
 63 |     "language": "python",
 64 |     "name": "df",
 65 |     "collapsed": false
 66 |    },
 67 |    "outputs": [],
 68 |    "source": "sql_output.to_pandas()",
 69 |    "execution_count": null
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "id": "1ef20081-c6f2-4e3e-8191-e9477e356a4c",
 74 |    "metadata": {
 75 |     "name": "md_helper",
 76 |     "collapsed": false
 77 |    },
 78 |    "source": "## 3. Create helper functions\n\nHere, we'll create several helper functions that will be used in the forthcoming app that we're developing.\n1. `generate_deepseek_response()` - accepts user-provided `prompt` as input query model. Briefly, the input box allow users to ask questions about data and that will be assigned to the `prompt` variable."
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "id": "c695373e-ac74-4b62-a1f1-08206cbd5c81",
 83 |    "metadata": {
 84 |     "language": "python",
 85 |     "name": "py_helper",
 86 |     "codeCollapsed": false,
 87 |     "collapsed": false
 88 |    },
 89 |    "source": "# Helper function\ndef generate_deepseek_response(prompt):\n    cortex_prompt = f\"'[INST] {prompt} [/INST]'\"\n    prompt_data = [{'role': 'user', 'content': cortex_prompt}]\n    prompt_json = escape_sql_string(json.dumps(prompt_data))\n    response = session.sql(\n        \"select snowflake.cortex.complete(?, ?)\", \n        params=['deepseek-r1', prompt_json]\n    ).collect()[0][0]\n    \n    return response\n\ndef extract_think_content(response):\n    think_pattern = r'<think>(.*?)</think>'\n    think_match = re.search(think_pattern, response, re.DOTALL)\n    \n    if think_match:\n        think_content = think_match.group(1).strip()\n        main_response = re.sub(think_pattern, '', response, flags=re.DOTALL).strip()\n        return think_content, main_response\n    return None, response\n\ndef escape_sql_string(s):\n    return s.replace(\"'\", \"''\")",
 90 |    "execution_count": null,
 91 |    "outputs": []
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "id": "d2e6771a-80c6-474c-ac2d-46ada30dbb5d",
 96 |    "metadata": {
 97 |     "name": "md_app",
 98 |     "collapsed": false
 99 |    },
100 |    "source": "## Create the Asking about Penguins app\n\nNow that we have the data and helper functions ready, let's wrap up by creating the app.\n\n"
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "id": "8b8bcc88-fcb1-4abc-ad40-91a42fca5314",
105 |    "metadata": {
106 |     "language": "python",
107 |     "name": "py_app",
108 |     "collapsed": false,
109 |     "codeCollapsed": false
110 |    },
111 |    "outputs": [],
112 |    "source": "import streamlit as st\nfrom snowflake.snowpark.context import get_active_session\nimport json\nimport pandas as pd\nimport re\n\n# Write directly to the app\nst.title(\"🐧 Ask about Penguins\")\n\n# Get the current credentials\nsession = get_active_session()\n\n# df = sql_output.to_pandas()\n\nuser_queries = [\"Which penguins has the longest bill length?\",\n                \"Where do the heaviest penguins live?\",\n                \"Which penguins has the shortest flippers?\"]\n\nquestion = st.selectbox(\"What would you like to know?\", user_queries)\n# question = st.text_input(\"Ask a question\", user_queries[0])\n\nprompt = [\n    {\n        'role': 'system',\n        'content': 'You are a helpful assistant that uses provided data to answer natural language questions.'\n    },\n    {\n        'role': 'user',\n        'content': (\n            f'The user has asked a question: {question}. '\n            f'Please use this data to answer the question: {df.to_markdown(index=False)}'\n        )\n    },\n    {\n        'temperature': 0.7,\n        'max_tokens': 1000,\n        'guardrails': True\n    }\n]\n\ndf\n\nif st.button(\"Submit\"):\n    status_container = st.status(\"Thinking ...\", expanded=True)\n    with status_container:\n        response = generate_deepseek_response(prompt)\n        think_content, main_response = extract_think_content(response)\n        if think_content:\n            st.write(think_content)\n                \n    status_container.update(label=\"Thoughts\", state=\"complete\", expanded=False)\n    st.markdown(main_response)",
113 |    "execution_count": null
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "id": "c6e6119e-3a35-4c28-ac37-26f71d24e62b",
118 |    "metadata": {
119 |     "name": "md_resources",
120 |     "collapsed": false
121 |    },
122 |    "source": "## Want to learn more?\n\n- More about [palmerpenguins](https://allisonhorst.github.io/palmerpenguins/) data set.\n- More about [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake)\n- For more inspiration on how to use Streamlit widgets in Notebooks, check out [Streamlit Docs](https://docs.streamlit.io/) and this list of what is currently supported inside [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake#label-notebooks-streamlit-support)"
123 |   }
124 |  ]
125 | }
126 | 


--------------------------------------------------------------------------------
/Data_Analysis_with_LLM_RAG/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - tabulate=*
6 | 


--------------------------------------------------------------------------------
/End-to-End Machine Learning with Snowpark ML/1_sf_nb_snowpark_ml_data_ingest.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "cell_type": "markdown",
 5 |       "metadata": {},
 6 |       "source": [
 7 |         "## This repo has been moved\n",
 8 |         "\n",
 9 |         "Visit [this Github repo](https://github.com/Snowflake-Labs/sfguide-intro-to-machine-learning-with-snowflake-ml-for-python) to see the full quickstart source code."
10 |       ]
11 |     }
12 |   ],
13 |   "metadata": {
14 |     "kernelspec": {
15 |       "display_name": "Python 3 (ipykernel)",
16 |       "language": "python",
17 |       "name": "python3"
18 |     },
19 |     "language_info": {
20 |       "codemirror_mode": {
21 |         "name": "ipython",
22 |         "version": 3
23 |       },
24 |       "file_extension": ".py",
25 |       "mimetype": "text/x-python",
26 |       "name": "python",
27 |       "nbconvert_exporter": "python",
28 |       "pygments_lexer": "ipython3",
29 |       "version": "3.11.5"
30 |     }
31 |   },
32 |   "nbformat": 4,
33 |   "nbformat_minor": 4
34 | }
35 | 


--------------------------------------------------------------------------------
/End-to-End Machine Learning with Snowpark ML/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment  
2 | channels:  
3 |   - snowflake  
4 | dependencies:  
5 |   - matplotlib=3.7.2  
6 |   - seaborn=0.12.2  
7 |   - snowflake-ml-python=1.3.1
8 | 


--------------------------------------------------------------------------------
/Feature Store Quickstart/Feature Store Quickstart.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "8cc93dd0",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## This repo has been moved\n",
 9 |     "\n",
10 |     "Visit [this Github repo](https://github.com/Snowflake-Labs/sfguide-intro-to-feature-store-using-snowflake-notebooks) to see the full quickstart source code."
11 |    ]
12 |   }
13 |  ],
14 |  "metadata": {
15 |   "kernelspec": {
16 |    "display_name": "Python 3 (ipykernel)",
17 |    "language": "python",
18 |    "name": "python3"
19 |   },
20 |   "language_info": {
21 |    "codemirror_mode": {
22 |     "name": "ipython",
23 |     "version": 3
24 |    },
25 |    "file_extension": ".py",
26 |    "mimetype": "text/x-python",
27 |    "name": "python",
28 |    "nbconvert_exporter": "python",
29 |    "pygments_lexer": "ipython3",
30 |    "version": "3.8.19"
31 |   }
32 |  },
33 |  "nbformat": 4,
34 |  "nbformat_minor": 5
35 | }
36 | 


--------------------------------------------------------------------------------
/Fine tuning LLM using Snowflake Cortex AI/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - snowflake=0.8.0
6 |   - streamlit=1.26.0
7 | 


--------------------------------------------------------------------------------
/Getting Started with Container Runtimes/README.md:
--------------------------------------------------------------------------------
  1 | # Getting Started with Notebooks on Container Runtimes
  2 | 
  3 | This example notebooks demonstrates how to get started using Snowflake's Container Runtime for Notebooks. It includes the setup, configuration, and execution of a straightforward machine learning training job.
  4 | 
  5 | For more info about the Notebooks Container runtime, check out:
  6 | - The [public documentation](https://docs.snowflake.com/LIMITEDACCESS/snowsight-notebooks/ui-snowsight-notebooks-runtime)
  7 | - An [overview presentation](https://docs.google.com/presentation/d/1pModfkpZuoAsKiYAYfpcO50PxaFB460VCO6qeM_S66s/edit#slide=id.g293d2d1b46a_1_87) of the notebooks container runtime
  8 | 
  9 | ## Setup
 10 | 
 11 | **Note: as of July 15, 2024, the Notebooks Container Runtime is in Private Preview.** Make sure that your account is enabled for this Private Preview if you intend to run this example.
 12 | 
 13 | ### Step 1 - SQL Setup Script
 14 | 
 15 | Run the following SQL code in a Snowflake SQL Worksheet to create the database objects, roles, privileges, and compute pools needed to run this example notebook.
 16 | 
 17 | The setup script is to be run by ACCOUNTADMIN. However, a different role needs to be used to create and author notebooks. This role cannot be ACCOUNTADMIN, SECURITYADMIN, or ORGADMIN. In the example, we’re granting privileges to SYSADMIN which will then be used to create notebooks. Please choose a role that has the privilege of creating a table in a schema. 
 18 | 
 19 | ```sql
 20 | ------------------
 21 | -- DEMO STEP #1 --
 22 | ------------------
 23 | -- General setup
 24 | use role accountadmin;
 25 | create database public;
 26 | create schema notebooks;
 27 | 
 28 | grant usage on database public to role sysadmin;
 29 | grant usage on schema public.notebooks to role sysadmin;
 30 | grant create stage on schema public.notebooks to role sysadmin;
 31 | grant create notebook on schema public.notebooks to role sysadmin;
 32 | grant create service on schema public.notebooks to role sysadmin;
 33 | grant usage on warehouse compute_wh to role sysadmin;
 34 | 
 35 | -- Create and grant access to compute pools
 36 | CREATE COMPUTE POOL CPU_XS_5_NODES
 37 |   MIN_NODES = 1
 38 |   MAX_NODES = 5
 39 |   INSTANCE_FAMILY = CPU_X64_XS;
 40 | 
 41 | CREATE COMPUTE POOL GPU_S_5_NODES
 42 |   MIN_NODES = 1
 43 |   MAX_NODES = 5
 44 |   INSTANCE_FAMILY = GPU_NV_S;
 45 | 
 46 | grant usage on compute pool CPU_XS_5_NODES to role sysadmin;
 47 | grant usage on compute pool GPU_S_5_NODES to role sysadmin;
 48 | 
 49 | -- Create and grant access to EAIs
 50 | -- Substep #1: create network rules (these are schema-level objects; end users do not need direct access to the network rules)
 51 | 
 52 | create network rule allow_all_rule
 53 |   TYPE = 'HOST_PORT'
 54 |   MODE= 'EGRESS'
 55 |   VALUE_LIST = ('0.0.0.0:443','0.0.0.0:80');
 56 | 
 57 | -- Substep #2: create external access integration (these are account-level objects; end users need access to this to access the public internet with endpoints defined in network rules)
 58 | 
 59 | CREATE EXTERNAL ACCESS INTEGRATION allow_all_integration
 60 |   ALLOWED_NETWORK_RULES = (allow_all_rule)
 61 |   ENABLED = true;
 62 | 
 63 | CREATE OR REPLACE NETWORK RULE pypi_network_rule
 64 |   MODE = EGRESS
 65 |   TYPE = HOST_PORT
 66 |   VALUE_LIST = ('pypi.org', 'pypi.python.org', 'pythonhosted.org',  'files.pythonhosted.org');
 67 | 
 68 | CREATE OR REPLACE EXTERNAL ACCESS INTEGRATION pypi_access_integration
 69 |   ALLOWED_NETWORK_RULES = (pypi_network_rule)
 70 |   ENABLED = true;
 71 | 
 72 | Grant USAGE ON INTEGRATION allow_all_integration to ROLE sysadmin;
 73 | Grant USAGE ON INTEGRATION pypi_access_integration to ROLE sysadmin;
 74 | ```
 75 | 
 76 | Additional information about compute pool configurations is available in [the documentation](https://docs.snowflake.com/developer-guide/snowpark-container-services/working-with-compute-pool).
 77 | 
 78 | ### Step 2 - Upload `diamonds.csv` Data
 79 | 
 80 | Next, we will upload the [diamonds.csv](https://github.com/Snowflake-Labs/snowflake-demo-notebooks/tree/main/Getting%20Started%20with%Container%Runtimes/diamonds.csv) dataset included in this git repo.
 81 | 
 82 | In Snowsight, navigate to **Data >> Databases** and select the database.schema where the role has privileges to create a table. For example, we'll be using `SYSADMIN` to upload the dataset and create a table out of it in the schema `PUBLIC.NOTEBOOKS`. Select **Create >> Table >> From File >> Standard** in the top right, and upload the `diamonds.csv` dataset.
 83 | ![diamonds upload](./assets/diamonds_upload.png)
 84 | 
 85 | ### Step 3 - Import the `getting_started_with_container_runtimes.ipynb` file and create a notebook
 86 | 
 87 | Using the `SYSADMIN` role, navigate to the **Notebooks** page on Snowsight, and select the upload button to `Import .ipynb file`.
 88 | ![notebook upload](./assets/notebook_upload.png)
 89 | 
 90 | Fill out the creation dialog using the schema, warehouse, and compute pool set up in Step #1.
 91 | ![notebook setup](./assets/notebook_setup.png)
 92 | 
 93 | ### Step 4 - Attach External Access Integrations (EAIs)
 94 | 
 95 | Navigate to the notebook settings via the three dots in the top right hand corner, and select the External Accesses tab. Toggle on the `allow_all_integration` EAI.
 96 | ![configure EAI](./assets/eai.png)
 97 | 
 98 | ### Step 5 - Run the notebook!
 99 | 
100 | You're now ready to run the notebook! Checkout out the Notebook Markdown cells for an explanation of what is happening at each step along the way.
101 | 
102 | ## Additional Resources
103 | - [Documentation](https://docs.snowflake.com/LIMITEDACCESS/snowsight-notebooks/ui-snowsight-notebooks-runtime)
104 | - [YouTube Tutorials](https://www.youtube.com/playlist?list=PLavJpcg8cl1Efw8x_fBKmfA2AMwjUaeBI)
105 | - [GitHub repo](https://github.com/Snowflake-Labs/snowflake-demo-notebooks) of more example notebooks


--------------------------------------------------------------------------------
/Getting Started with Container Runtimes/assets/diamonds_upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Getting Started with Container Runtimes/assets/diamonds_upload.png


--------------------------------------------------------------------------------
/Getting Started with Container Runtimes/assets/eai.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Getting Started with Container Runtimes/assets/eai.png


--------------------------------------------------------------------------------
/Getting Started with Container Runtimes/assets/notebook_setup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Getting Started with Container Runtimes/assets/notebook_setup.png


--------------------------------------------------------------------------------
/Getting Started with Container Runtimes/assets/notebook_upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Getting Started with Container Runtimes/assets/notebook_upload.png


--------------------------------------------------------------------------------
/Getting Started with Container Runtimes/getting_started_with_container_runtimes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "id": "07e67d82-cb27-4518-b025-b74c117c5637",
  6 |       "metadata": {
  7 |         "collapsed": false,
  8 |         "name": "cell1"
  9 |       },
 10 |       "source": [
 11 |         "# Welcome to the Notebooks Container Runtime!\n",
 12 |         "\n",
 13 |         "Make sure you've completed all of the setup instructions outlined in the [README]() file prior to running this Notebook.\n",
 14 |         "\n",
 15 |         "- Have you uploaded the data?\n",
 16 |         "- Have you configured the EAI?\n",
 17 |         "\n",
 18 |         "If so, proceed!"
 19 |       ]
 20 |     },
 21 |     {
 22 |       "cell_type": "code",
 23 |       "execution_count": null,
 24 |       "id": "2a609d6f-f3de-4b32-9731-1411db287f9f",
 25 |       "metadata": {
 26 |         "collapsed": false,
 27 |         "language": "python",
 28 |         "name": "cell2"
 29 |       },
 30 |       "outputs": [],
 31 |       "source": [
 32 |         "import warnings\n",
 33 |         "warnings.filterwarnings(\"ignore\")\n",
 34 |         "\n",
 35 |         "from snowflake.snowpark.context import get_active_session\n",
 36 |         "session = get_active_session()\n",
 37 |         "# Add a query tag to the session. This helps with troubleshooting and performance monitoring.\n",
 38 |         "session.query_tag = {\"origin\":\"sf_sit-is\", \n",
 39 |         "                    \"name\":\"aiml_notebooks_xgboost_on_gpu\", \n",
 40 |         "                    \"version\":{\"major\":1, \"minor\":0},\n",
 41 |         "                    \"attributes\":{\"is_quickstart\":1, \"source\":\"notebook\"}}"
 42 |       ]
 43 |     },
 44 |     {
 45 |       "cell_type": "code",
 46 |       "execution_count": null,
 47 |       "id": "e6b51bc3-e121-4b6c-a84f-20f04eb1f28a",
 48 |       "metadata": {
 49 |         "collapsed": false,
 50 |         "language": "python",
 51 |         "name": "cell3"
 52 |       },
 53 |       "outputs": [],
 54 |       "source": [
 55 |         "!pip freeze"
 56 |       ]
 57 |     },
 58 |     {
 59 |       "cell_type": "markdown",
 60 |       "id": "507dda4f-a92a-4144-b715-3c9a5b994eb7",
 61 |       "metadata": {
 62 |         "collapsed": false,
 63 |         "name": "cell4"
 64 |       },
 65 |       "source": [
 66 |         "Notebooks Container Runtime, along with External Access Integrations give us the flexibility to `pip install` packages from anywhere, including popular package repositories such as pypi. You can install whatever packages you need by running `!pip install <package_name>` directly in the Notebook."
 67 |       ]
 68 |     },
 69 |     {
 70 |       "cell_type": "code",
 71 |       "execution_count": null,
 72 |       "id": "01982269-5dac-46a6-8af6-2b495e65862f",
 73 |       "metadata": {
 74 |         "language": "python",
 75 |         "name": "cell5"
 76 |       },
 77 |       "outputs": [],
 78 |       "source": [
 79 |         "!pip install seaborn"
 80 |       ]
 81 |     },
 82 |     {
 83 |       "cell_type": "markdown",
 84 |       "id": "ce5d7e1e-2323-428b-ad5d-dbab1b0f34a8",
 85 |       "metadata": {
 86 |         "name": "cell6"
 87 |       },
 88 |       "source": [
 89 |         "Just like Notebooks on the Warehouse Runtime, we can intermingle both SQL and Python cells:"
 90 |       ]
 91 |     },
 92 |     {
 93 |       "cell_type": "code",
 94 |       "execution_count": null,
 95 |       "id": "78126cdd-9f6e-4524-ac92-b12d915255ae",
 96 |       "metadata": {
 97 |         "collapsed": false,
 98 |         "language": "sql",
 99 |         "name": "cell7"
100 |       },
101 |       "outputs": [],
102 |       "source": [
103 |         "show tables;"
104 |       ]
105 |     },
106 |     {
107 |       "cell_type": "markdown",
108 |       "id": "b43cb438-746d-476d-8d00-a5fc4cd67648",
109 |       "metadata": {
110 |         "collapsed": false,
111 |         "name": "cell8"
112 |       },
113 |       "source": [
114 |         "Let's visualize some of our data using the `seaborn` package that we installed above:"
115 |       ]
116 |     },
117 |     {
118 |       "cell_type": "code",
119 |       "execution_count": null,
120 |       "id": "23f0f888-3d70-42c4-9071-bc366c861a52",
121 |       "metadata": {
122 |         "collapsed": false,
123 |         "language": "python",
124 |         "name": "cell9"
125 |       },
126 |       "outputs": [],
127 |       "source": [
128 |         "diamonds_df = session.table(\"DIAMONDS\")\n",
129 |         "diamonds_df.show()"
130 |       ]
131 |     },
132 |     {
133 |       "cell_type": "code",
134 |       "execution_count": null,
135 |       "id": "63e2849a-df59-45d2-81e1-14b7880601fc",
136 |       "metadata": {
137 |         "language": "python",
138 |         "name": "cell10"
139 |       },
140 |       "outputs": [],
141 |       "source": [
142 |         "df = diamonds_df.to_pandas()\n",
143 |         "\n",
144 |         "import seaborn as sns\n",
145 |         "\n",
146 |         "# Create a visualization\n",
147 |         "sns.histplot(\n",
148 |         "    data=df,\n",
149 |         "    x=\"PRICE\"\n",
150 |         ")"
151 |       ]
152 |     },
153 |     {
154 |       "cell_type": "markdown",
155 |       "id": "3020ac4d-058f-49aa-9686-ca0558d1a97b",
156 |       "metadata": {
157 |         "collapsed": false,
158 |         "name": "cell11"
159 |       },
160 |       "source": [
161 |         "Now, let's train a basic `XGBRegressor` machine learning model. The ML Container Runtime for Snowflake Notebooks includes pre-installed common packages for doing machine learning tasks, including SnowparkML and other OSS packages."
162 |       ]
163 |     },
164 |     {
165 |       "cell_type": "code",
166 |       "execution_count": null,
167 |       "id": "53aad007-803a-4120-b227-596caa842cba",
168 |       "metadata": {
169 |         "language": "python",
170 |         "name": "cell12"
171 |       },
172 |       "outputs": [],
173 |       "source": [
174 |         "import time\n",
175 |         "from snowflake.ml.modeling.xgboost import XGBRegressor\n",
176 |         "\n",
177 |         "CATEGORICAL_COLUMNS = [\"CUT\", \"COLOR\", \"CLARITY\"]\n",
178 |         "NUMERICAL_COLUMNS = [\"CARAT\", \"DEPTH\", \"X\", \"Y\", \"Z\"]\n",
179 |         "LABEL_COLUMNS = ['PRICE']\n",
180 |         "diamonds_df = session.table(\"diamonds\")\n",
181 |         "\n",
182 |         "model = XGBRegressor(max_depth=400, input_cols=NUMERICAL_COLUMNS, label_cols=LABEL_COLUMNS)\n",
183 |         "\n",
184 |         "t0 = time.time()\n",
185 |         "model.fit(diamonds_df)\n",
186 |         "\n",
187 |         "t1 = time.time()\n",
188 |         "\n",
189 |         "print(f\"Fit in {t1-t0} seconds.\")"
190 |       ]
191 |     },
192 |     {
193 |       "cell_type": "markdown",
194 |       "id": "159e14c5",
195 |       "metadata": {
196 |         "name": "cell13"
197 |       },
198 |       "source": [
199 |         "SnowparkML on the container runtime automatically captures various logs and metrics associated with your training job. We can run some quick functions to fetch, print, or even visualize those metrics:"
200 |       ]
201 |     },
202 |     {
203 |       "cell_type": "code",
204 |       "execution_count": null,
205 |       "id": "63126a6a-3c5c-4877-8b69-3e31e65e6587",
206 |       "metadata": {
207 |         "language": "python",
208 |         "name": "cell14"
209 |       },
210 |       "outputs": [],
211 |       "source": [
212 |         "# utils\n",
213 |         "import requests\n",
214 |         "\n",
215 |         "### Get logs depending on type\n",
216 |         "def fetch_log(log_type):\n",
217 |         "    file_path = f'/var/log/managedservices/{log_type}/mlrs/logs-mlrs.log'\n",
218 |         "    with open(file_path, 'r') as file:\n",
219 |         "        # Read the contents of the file\n",
220 |         "        file_contents = file.read()\n",
221 |         "        return file_contents\n",
222 |         "\n",
223 |         "### Get response text\n",
224 |         "def fetch_metrics(port):\n",
225 |         "    metrics_url = f\"http://localhost:{port}/metrics\"\n",
226 |         "    response = requests.get(metrics_url)\n",
227 |         "    return response.text\n",
228 |         "\n",
229 |         "def list_mlrs_metrics():\n",
230 |         "    txt = fetch_metrics(11501)\n",
231 |         "    metrics_name_and_value = {}\n",
232 |         "    for line in txt.split(\"\\n\")[:-1]:\n",
233 |         "        if not line.startswith(\"#\"):\n",
234 |         "            tokens = line.split(\" \")\n",
235 |         "            name, value = tokens[0], tokens[1]\n",
236 |         "            metrics_name_and_value[name] = value\n",
237 |         "        elif line.startswith(\"# HELP\"):\n",
238 |         "            tokens = line.split(\" \")\n",
239 |         "    return metrics_name_and_value"
240 |       ]
241 |     },
242 |     {
243 |       "cell_type": "code",
244 |       "execution_count": null,
245 |       "id": "c655b9b5-f07f-4906-9530-761145ded013",
246 |       "metadata": {
247 |         "language": "python",
248 |         "name": "cell15"
249 |       },
250 |       "outputs": [],
251 |       "source": [
252 |         "print(\"train attempt\", list_mlrs_metrics()['train_attempts_total'])"
253 |       ]
254 |     }
255 |   ],
256 |   "metadata": {
257 |     "kernelspec": {
258 |       "display_name": "Streamlit Notebook",
259 |       "name": "streamlit"
260 |     }
261 |   },
262 |   "nbformat": 4,
263 |   "nbformat_minor": 5
264 | }
265 | 


--------------------------------------------------------------------------------
/Getting started with Snowpark using Snowflake Notebooks/Getting Started with Snowpark using Snowflake notebooks.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "e41f588b",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## This repo has been moved\n",
 9 |     "\n",
10 |     "Visit [this Github repo](https://github.com/Snowflake-Labs/sfguide-getting-started-with-snowpark-in-worksheets-notebooks) to see the full quickstart source code."
11 |    ]
12 |   }
13 |  ],
14 |  "metadata": {
15 |   "kernelspec": {
16 |    "display_name": "Streamlit Notebook",
17 |    "name": "streamlit"
18 |   }
19 |  },
20 |  "nbformat": 4,
21 |  "nbformat_minor": 5
22 | }
23 | 


--------------------------------------------------------------------------------
/Getting started with Snowpark using Snowflake Notebooks/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - snowflake=0.8.0
6 | 


--------------------------------------------------------------------------------
/Hyperparameter Tuning with sklearn/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - scikit-learn=1.3.0
6 |   - python=3.8.*
7 |   - snowbooks=1.27.0
8 |   - streamlit=1.26.0
9 | 


--------------------------------------------------------------------------------
/Image_Processing_Pipeline_Stream_Task_Cortex_Complete/Image_Processing_Pipeline.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   },
  7 |   "lastEditStatus": {
  8 |    "notebookId": "n54d2mm74cvdxf25chvs",
  9 |    "authorId": "94022846931",
 10 |    "authorName": "DASH",
 11 |    "authorEmail": "dash.desai@snowflake.com",
 12 |    "sessionId": "f4f1ed7a-3ad8-43ab-9e3f-102f3f6fd367",
 13 |    "lastEditTime": 1744728063667
 14 |   }
 15 |  },
 16 |  "nbformat_minor": 5,
 17 |  "nbformat": 4,
 18 |  "cells": [
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "id": "28916a15-ea2d-47ca-8d1f-75dc395fdcae",
 22 |    "metadata": {
 23 |     "name": "Overview",
 24 |     "collapsed": false
 25 |    },
 26 |    "source": "# Image Processing Pipeline using Snowflake Cortex\n\nThis notebooks demonstrates the implementation of an image processing pipeline using [Streams](https://docs.snowflake.com/en/user-guide/streams-intro), [Tasks](https://docs.snowflake.com/en/user-guide/tasks-intro) and [SNOWFLAKE.CORTEX.COMPLETE multimodal](https://docs.snowflake.com/en/sql-reference/functions/complete-snowflake-cortex-multimodal) capability. (*Currently in Public Preview.*)"
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "id": "db0e5507-9aa1-4115-a642-65709994bad5",
 31 |    "metadata": {
 32 |     "name": "_Step1",
 33 |     "collapsed": false
 34 |    },
 35 |    "source": "Step 1: Create Snowflake managed stage to store sample images."
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "id": "0eb15096-8d11-48b2-abc3-0250ed43c599",
 40 |    "metadata": {
 41 |     "language": "sql",
 42 |     "name": "Create_Stage"
 43 |    },
 44 |    "outputs": [],
 45 |    "source": "CREATE stage GENAI_IMAGES encryption = (TYPE = 'SNOWFLAKE_SSE') directory = ( ENABLE = true );",
 46 |    "execution_count": null
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "id": "e5ebef76-111f-4652-b301-586a9fb1ea7b",
 51 |    "metadata": {
 52 |     "name": "_Step2",
 53 |     "collapsed": false
 54 |    },
 55 |    "source": "Step 2: Download two sample images provided below and upload them on stage `GENAI_IMAGES`. [Learn how](https://docs.snowflake.com/en/user-guide/data-load-local-file-system-stage-ui?_fsi=oZm563yp&_fsi=oZm563yp#upload-files-onto-a-named-internal-stage)\n\nSample images:\n- https://sfquickstarts.s3.us-west-1.amazonaws.com/misc/images/other/sample-img-1.png\n- https://sfquickstarts.s3.us-west-1.amazonaws.com/misc/images/other/sample-img-2.jpg\n\n\n*Note: Sample images provided courtesy of [Dash](https://natureunraveled.com/).*"
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "id": "21d0374d-5467-4922-8fa5-e118ca0e5310",
 60 |    "metadata": {
 61 |     "name": "_Step3",
 62 |     "collapsed": false
 63 |    },
 64 |    "source": "Step 3: Create Stream `images_stream` on stage `GENAI_IMAGES` to detect changes."
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "id": "7b1d037f-d0f4-44e1-8443-afd4da31face",
 69 |    "metadata": {
 70 |     "language": "sql",
 71 |     "name": "Create_Stream"
 72 |    },
 73 |    "outputs": [],
 74 |    "source": "CREATE OR REPLACE STREAM images_stream ON STAGE GENAI_IMAGES;",
 75 |    "execution_count": null
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "id": "15a8d1c1-449e-4e26-8435-b2c19affe343",
 80 |    "metadata": {
 81 |     "name": "_Step4",
 82 |     "collapsed": false
 83 |    },
 84 |    "source": "Step 4: Create target table `image_analysis` to store image analysis."
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "id": "917a7304-f0d1-4445-a91e-8b355c8b2db1",
 89 |    "metadata": {
 90 |     "language": "sql",
 91 |     "name": "Create_Target_Table"
 92 |    },
 93 |    "outputs": [],
 94 |    "source": "CREATE OR REPLACE TABLE image_analysis \nas \nSELECT RELATIVE_PATH,SNOWFLAKE.CORTEX.COMPLETE('pixtral-large',\n    'Put image filename in an attribute called \"Image.\"\n     Put a short title in title case in an attribute called \"Title\".\n     Put a 200-word detailed summary summarizing the image in an attribute called \"Summary\"', \n     TO_FILE('@GENAI_IMAGES', RELATIVE_PATH)) as image_classification \nfrom directory(@GENAI_IMAGES);",
 95 |    "execution_count": null
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "id": "53594c24-762c-48d1-8572-c3f17a98a1e2",
100 |    "metadata": {
101 |     "name": "_step5",
102 |     "collapsed": false
103 |    },
104 |    "source": "Step 5: Preview image analysis produced on the sample images"
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "id": "d11b5868-3892-447a-bd54-cd58932ead67",
109 |    "metadata": {
110 |     "language": "sql",
111 |     "name": "Preview_Images"
112 |    },
113 |    "outputs": [],
114 |    "source": "select * from image_analysis;",
115 |    "execution_count": null
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "id": "565ef0dd-9ed7-4deb-b2ea-1710a6449ca8",
120 |    "metadata": {
121 |     "name": "_Step6",
122 |     "collapsed": false
123 |    },
124 |    "source": "Step 6: Create Task `image_analysis_task` to process new images uploaded on stage `GENAI_IMAGES` using SNOWFLAKE.CORTEX.COMPLETE() multimodal capability."
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "id": "d80b2f3e-c82e-4281-8ef0-4897bcae5d86",
129 |    "metadata": {
130 |     "language": "sql",
131 |     "name": "Create_Task"
132 |    },
133 |    "outputs": [],
134 |    "source": "CREATE OR REPLACE TASK image_analysis_task\nSCHEDULE = '1 minute'\nWHEN\n  SYSTEM$STREAM_HAS_DATA('images_stream')\nAS\n  INSERT INTO image_analysis (RELATIVE_PATH, image_classification)\n    SELECT RELATIVE_PATH,SNOWFLAKE.CORTEX.COMPLETE('pixtral-large',\n        'Put image filename in an attribute called \"Image.\"\n         Put a short title in title case in an attribute called \"Title\".\n         Put a 200-word detailed summary summarizing the image in an attribute called \"Summary\"', \n         TO_FILE('@GENAI_IMAGES', RELATIVE_PATH)) as image_classification \n    from images_stream;\n\n-- NOTE: Tasks are suspended by default so let's resume it.\nALTER TASK image_analysis_task RESUME;",
135 |    "execution_count": null
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "id": "5fc732cd-b4d1-4487-a877-b7507519aa8a",
140 |    "metadata": {
141 |     "name": "_Step7",
142 |     "collapsed": false
143 |    },
144 |    "source": "Step 7: Confirm Task status "
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "id": "1b629f24-ab24-4ce8-bdd4-936d82d83b00",
149 |    "metadata": {
150 |     "language": "sql",
151 |     "name": "Task_Status"
152 |    },
153 |    "outputs": [],
154 |    "source": "SHOW TASKS like 'image_analysis_task';",
155 |    "execution_count": null
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "id": "2fb915bd-c5ed-4be8-8863-5a8d71e3e344",
160 |    "metadata": {
161 |     "name": "_Step8",
162 |     "collapsed": false
163 |    },
164 |    "source": "Step 8:  Download new sample image provided below and upload it on stage `GENAI_IMAGES`. [Learn how](https://docs.snowflake.com/en/user-guide/data-load-local-file-system-stage-ui?_fsi=oZm563yp&_fsi=oZm563yp#upload-files-onto-a-named-internal-stage)\n\nSample image:\n- https://sfquickstarts.s3.us-west-1.amazonaws.com/misc/images/other/sample-img-3.jpg\n\n*Note: Sample image provided courtesy of [Dash](https://natureunraveled.com/).*"
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "id": "ae0b6047-de5a-43f4-bdb5-7b6dee3345ac",
169 |    "metadata": {
170 |     "name": "_Step9",
171 |     "collapsed": false
172 |    },
173 |    "source": "Step 9: Preview image analysis produced on the new sample image"
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "id": "e66b4b64-3987-4d54-af94-bbdb9eea3765",
178 |    "metadata": {
179 |     "language": "sql",
180 |     "name": "Preview_New_Image"
181 |    },
182 |    "outputs": [],
183 |    "source": "select * from image_analysis;",
184 |    "execution_count": null
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "id": "11acad0a-209b-4538-b447-ad57dd9c1d2e",
189 |    "metadata": {
190 |     "name": "_Step10",
191 |     "collapsed": false
192 |    },
193 |    "source": "Step 10: Suspend task"
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "id": "6e8ff070-38b7-4f60-88b6-b21e2113d8d4",
198 |    "metadata": {
199 |     "language": "sql",
200 |     "name": "Suspend_Task"
201 |    },
202 |    "outputs": [],
203 |    "source": "ALTER TASK image_analysis_task SUSPEND;",
204 |    "execution_count": null
205 |   }
206 |  ]
207 | }


--------------------------------------------------------------------------------
/Image_Processing_Pipeline_Stream_Task_Cortex_Complete/Image_Processing_Pipeline.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Image_Processing_Pipeline_Stream_Task_Cortex_Complete/Image_Processing_Pipeline.pdf


--------------------------------------------------------------------------------
/Import Package from Stage/Import Package from Stage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "id": "a78daa85-b3fa-4dd6-bde7-38371c64c08d",
  6 |       "metadata": {
  7 |         "collapsed": false,
  8 |         "name": "cell1"
  9 |       },
 10 |       "source": [
 11 |         "# Import custom package from stage into notebook\n",
 12 |         "\n",
 13 |         "If the Python package that you are looking to use is not available in Anaconda, then you can upload the package to a stage and import the package from stage. Here we show a simple example of importing a custom package into a notebook.\n",
 14 |         "\n",
 15 |         "| Feature        | Availability  |\n",
 16 |         "| -------------- | --------------|\n",
 17 |         "| Preview Feature — Private | Support for this feature is currently not in production and is available only to selected accounts. |"
 18 |       ]
 19 |     },
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "id": "c8b3a287-6cb3-4525-b0ed-a2188d37993c",
 23 |       "metadata": {
 24 |         "collapsed": false,
 25 |         "name": "cell2"
 26 |       },
 27 |       "source": [
 28 |         "# Example Package\n",
 29 |         "\n",
 30 |         "Here is the Python package used in this example. It is a simple package with a single Python code file. You can download the `simple.zip` package [here](https://github.com/Snowflake-Labs/snowflake-demo-notebooks/tree/main/Import%20Package%20from%20Stage/simple.zip).\n",
 31 |         "\n",
 32 |         "## Create a test package\n",
 33 |         "```bash\n",
 34 |         "mkdir simple\n",
 35 |         "touch simple/__init__.py\n",
 36 |         "cat >> simple/__init__.py  # Paste the source below.\n",
 37 |         "zip -r simple simple\n",
 38 |         "```\n",
 39 |         "\n",
 40 |         "Inside `simple/__init__.py`, we create a simple package that returns Hello world: \n",
 41 |         "\n",
 42 |         "```python\n",
 43 |         "import streamlit as st\n",
 44 |         "\n",
 45 |         "def greeting():\n",
 46 |         "  return \"Hello world!\"\n",
 47 |         "\n",
 48 |         "def hi():\n",
 49 |         "  st.write(greeting())\n",
 50 |         "```\n",
 51 |         "\n",
 52 |         "\n"
 53 |       ]
 54 |     },
 55 |     {
 56 |       "cell_type": "markdown",
 57 |       "id": "f36e4fd2-1c4b-4fec-8419-73036fd40d04",
 58 |       "metadata": {
 59 |         "collapsed": false,
 60 |         "name": "cell3"
 61 |       },
 62 |       "source": [
 63 |         "# Upload Package to Stage\n",
 64 |         "\n",
 65 |         "Next, we create a stage to upload the `simple.zip` package."
 66 |       ]
 67 |     },
 68 |     {
 69 |       "cell_type": "code",
 70 |       "execution_count": null,
 71 |       "id": "ee92159e-eaa4-4eb2-a606-12003ae2ba43",
 72 |       "metadata": {
 73 |         "codeCollapsed": false,
 74 |         "collapsed": false,
 75 |         "language": "sql",
 76 |         "name": "cell4"
 77 |       },
 78 |       "outputs": [],
 79 |       "source": [
 80 |         "-- create a stage for the package.\n",
 81 |         "CREATE STAGE IF NOT EXISTS MY_PACKAGES;\n",
 82 |         "-- assign Query Tag to Session. This helps with performance monitoring and troubleshooting\n",
 83 |         "ALTER SESSION SET query_tag = '{\"origin\":\"sf_sit-is\",\"name\":\"notebook_demo_pack\",\"version\":{\"major\":1, \"minor\":0},\"attributes\":{\"is_quickstart\":0, \"source\":\"sql\", \"vignette\":\"import_package_stage\"}}';"
 84 |       ]
 85 |     },
 86 |     {
 87 |       "cell_type": "markdown",
 88 |       "id": "35e0da06-7c20-410a-a66d-960cb0fa09a7",
 89 |       "metadata": {
 90 |         "collapsed": false,
 91 |         "name": "cell5"
 92 |       },
 93 |       "source": [
 94 |         "To upload the file to stage, you can run the following command. \n",
 95 |         "\n",
 96 |         "Using [snowscli](https://github.com/snowflakedb/snowflake-cli):\n",
 97 |         "\n",
 98 |         "```bash\n",
 99 |         "snow snowpark package upload --file simple.zip --stage MY_PACKAGES --overwrite\n",
100 |         "```\n",
101 |         "Alternatively, using [snowsql](https://docs.snowflake.com/en/user-guide/snowsql):\n",
102 |         "\n",
103 |         "```bash\n",
104 |         "snowsql -q \"PUT file://simple.zip @MY_PACKAGES AUTO_COMPRESS=FALSE OVERWRITE=TRUE\"\n",
105 |         "\n",
106 |         "```\n"
107 |       ]
108 |     },
109 |     {
110 |       "cell_type": "code",
111 |       "execution_count": null,
112 |       "id": "16bb85c1-e3ac-45af-833e-51c84bb031c6",
113 |       "metadata": {
114 |         "codeCollapsed": false,
115 |         "language": "sql",
116 |         "name": "cell6"
117 |       },
118 |       "outputs": [],
119 |       "source": [
120 |         "LS @MY_PACKAGES;"
121 |       ]
122 |     },
123 |     {
124 |       "cell_type": "markdown",
125 |       "id": "2ecef987-0162-407e-b739-3c38613253d7",
126 |       "metadata": {
127 |         "collapsed": false,
128 |         "name": "cell7"
129 |       },
130 |       "source": [
131 |         "## Upload the package using the Package Picker UI\n",
132 |         "\n",
133 |         "Now that the `simple.zip` package is on the stage, we can specify the path to this pacakge in the Package Picker. \n",
134 |         "\n",
135 |         "- Click on the `Packages` dropdown \n",
136 |         "- Navigate to `Stage Packages` tab\n",
137 |         "- Enter the Stage Package Path as `@<database>.<schema>.my_packages/simple.zip`  (all lowercase) where `<database>.<schema>` is the actual namespace of the stage "
138 |       ]
139 |     },
140 |     {
141 |       "cell_type": "code",
142 |       "execution_count": null,
143 |       "id": "b38bb25b-5e17-4b70-bcd4-f602fe7554bd",
144 |       "metadata": {
145 |         "codeCollapsed": false,
146 |         "collapsed": false,
147 |         "language": "python",
148 |         "name": "cell8"
149 |       },
150 |       "outputs": [],
151 |       "source": [
152 |         "import streamlit as st\n",
153 |         "st.image(\"https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/main/Import%20Package%20from%20Stage/package_from_stage.png\")"
154 |       ]
155 |     },
156 |     {
157 |       "cell_type": "markdown",
158 |       "id": "c0c1b8dd-b690-42a4-a330-0369e27f5d47",
159 |       "metadata": {
160 |         "name": "cell9"
161 |       },
162 |       "source": [
163 |         "Now that this package is uploaded and you have restarted your notebook session, you can import the `simple` package."
164 |       ]
165 |     },
166 |     {
167 |       "cell_type": "code",
168 |       "execution_count": null,
169 |       "id": "d576bf8f-92cd-4012-9aa3-af2ef5795c6c",
170 |       "metadata": {
171 |         "codeCollapsed": false,
172 |         "collapsed": false,
173 |         "language": "python",
174 |         "name": "cell10"
175 |       },
176 |       "outputs": [],
177 |       "source": [
178 |         "import simple"
179 |       ]
180 |     },
181 |     {
182 |       "cell_type": "code",
183 |       "execution_count": null,
184 |       "id": "f2f327b4-b48a-4936-a671-87f81ac0748a",
185 |       "metadata": {
186 |         "codeCollapsed": false,
187 |         "collapsed": false,
188 |         "language": "python",
189 |         "name": "cell11"
190 |       },
191 |       "outputs": [],
192 |       "source": [
193 |         "simple.hi()"
194 |       ]
195 |     }
196 |   ],
197 |   "metadata": {
198 |     "kernelspec": {
199 |       "display_name": "Streamlit Notebook",
200 |       "name": "streamlit"
201 |     }
202 |   },
203 |   "nbformat": 4,
204 |   "nbformat_minor": 5
205 | }
206 | 


--------------------------------------------------------------------------------
/Import Package from Stage/package_from_stage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Import Package from Stage/package_from_stage.png


--------------------------------------------------------------------------------
/Import Package from Stage/simple.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Import Package from Stage/simple.zip


--------------------------------------------------------------------------------
/Import Package from Stage/simple/__init__.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | 
3 | def greeting():
4 |   return "Hello world!"
5 | 
6 | def hi():
7 |   st.write(greeting())
8 | 
9 | 


--------------------------------------------------------------------------------
/Intro to Snowpark pandas/Intro to Snowpark pandas.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "cell_type": "markdown",
 5 |       "id": "9997baf8",
 6 |       "metadata": {},
 7 |       "source": [
 8 |         "## This repo has been moved\n",
 9 |         "\n",
10 |         "Visit [this Github repo](https://github.com/Snowflake-Labs/sfguide-getting-started-with-pandas-on-snowflake) to see the full quickstart source code."
11 |       ]
12 |     }
13 |   ],
14 |   "metadata": {
15 |     "kernelspec": {
16 |       "display_name": "Python 3 (ipykernel)",
17 |       "language": "python",
18 |       "name": "python3"
19 |     },
20 |     "language_info": {
21 |       "codemirror_mode": {
22 |         "name": "ipython",
23 |         "version": 3
24 |       },
25 |       "file_extension": ".py",
26 |       "mimetype": "text/x-python",
27 |       "name": "python",
28 |       "nbconvert_exporter": "python",
29 |       "pygments_lexer": "ipython3",
30 |       "version": "3.9.19"
31 |     }
32 |   },
33 |   "nbformat": 4,
34 |   "nbformat_minor": 5
35 | }
36 | 


--------------------------------------------------------------------------------
/Intro to Snowpark pandas/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - modin=0.28.1
6 |   - pandas=2.2.1
7 | 


--------------------------------------------------------------------------------
/Load CSV from S3/Load CSV from S3.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "id": "13f35857-7833-4c7a-820b-421f7156fc94",
  6 |       "metadata": {
  7 |         "collapsed": false,
  8 |         "name": "cell1"
  9 |       },
 10 |       "source": [
 11 |         "# How to load CSV files from stage to Snowflake Notebooks 📁\n",
 12 |         "\n",
 13 |         "In this example, we will show how you can load a CSV file from stage and create a table with Snowpark. \n",
 14 |         "\n",
 15 |         "First, let's use the `get_active_session` command to get the [session](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session#snowflake.snowpark.Session) context variable to work with Snowpark as follows:"
 16 |       ]
 17 |     },
 18 |     {
 19 |       "cell_type": "code",
 20 |       "execution_count": null,
 21 |       "id": "4babf2c9-2d53-48dc-9b2e-07cda9bcc03c",
 22 |       "metadata": {
 23 |         "codeCollapsed": false,
 24 |         "collapsed": false,
 25 |         "language": "python",
 26 |         "name": "cell2"
 27 |       },
 28 |       "outputs": [],
 29 |       "source": [
 30 |         "from snowflake.snowpark.context import get_active_session\n",
 31 |         "session = get_active_session()\n",
 32 |         "# Add a query tag to the session. This helps with troubleshooting and performance monitoring.\n",
 33 |         "session.query_tag = {\"origin\":\"sf_sit-is\", \n",
 34 |         "                     \"name\":\"notebook_demo_pack\", \n",
 35 |         "                     \"version\":{\"major\":1, \"minor\":0},\n",
 36 |         "                     \"attributes\":{\"is_quickstart\":1, \"source\":\"notebook\", \"vignette\":\"csv_from_s3\"}}\n",
 37 |         "print(session)"
 38 |       ]
 39 |     },
 40 |     {
 41 |       "cell_type": "markdown",
 42 |       "id": "b8151396-3ae3-4991-8ef0-be82fc33f363",
 43 |       "metadata": {
 44 |         "collapsed": false,
 45 |         "name": "cell3"
 46 |       },
 47 |       "source": [
 48 |         "Next, we will create an [external stage](https://docs.snowflake.com/en/sql-reference/sql/create-stage) that references data files stored in a location outside of Snowflake, in this case, the data lives in a [S3 bucket](https://docs.snowflake.com/en/user-guide/data-load-s3-create-stage)."
 49 |       ]
 50 |     },
 51 |     {
 52 |       "cell_type": "code",
 53 |       "execution_count": null,
 54 |       "id": "f7d7f866-a698-457f-8bd0-4deff26ba329",
 55 |       "metadata": {
 56 |         "codeCollapsed": false,
 57 |         "collapsed": false,
 58 |         "language": "sql",
 59 |         "name": "cell4"
 60 |       },
 61 |       "outputs": [],
 62 |       "source": [
 63 |         "CREATE STAGE IF NOT EXISTS TASTYBYTE_STAGE \n",
 64 |         "\tURL = 's3://sfquickstarts/frostbyte_tastybytes/';"
 65 |       ]
 66 |     },
 67 |     {
 68 |       "cell_type": "markdown",
 69 |       "id": "614a9f59-b202-4102-81e8-192b66b656fd",
 70 |       "metadata": {
 71 |         "collapsed": false,
 72 |         "name": "cell5"
 73 |       },
 74 |       "source": [
 75 |         "Let's take a look at the files in the stage."
 76 |       ]
 77 |     },
 78 |     {
 79 |       "cell_type": "code",
 80 |       "execution_count": null,
 81 |       "id": "18fdb36a-f3f6-46b0-92db-e06a28b14867",
 82 |       "metadata": {
 83 |         "codeCollapsed": false,
 84 |         "collapsed": false,
 85 |         "language": "sql",
 86 |         "name": "cell6"
 87 |       },
 88 |       "outputs": [],
 89 |       "source": [
 90 |         "LS @TASTYBYTE_STAGE/app/app_orders;"
 91 |       ]
 92 |     },
 93 |     {
 94 |       "cell_type": "markdown",
 95 |       "id": "9feb2dbb-8752-41c1-bd88-f2075e89f4ea",
 96 |       "metadata": {
 97 |         "collapsed": false,
 98 |         "name": "cell7"
 99 |       },
100 |       "source": [
101 |         "We can use [Snowpark DataFrameReader](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/1.14.0/api/snowflake.snowpark.DataFrameReader) to read in the CSV file.\n",
102 |         "\n",
103 |         "By using the `infer_schema = True` option, Snowflake will automatically infer the schema based on data types present in CSV file, so that you don't need to specify the schema beforehand. "
104 |       ]
105 |     },
106 |     {
107 |       "cell_type": "code",
108 |       "execution_count": null,
109 |       "id": "2bf5c75a-b4e8-4212-a645-b8d63102757d",
110 |       "metadata": {
111 |         "codeCollapsed": false,
112 |         "language": "python",
113 |         "name": "cell8"
114 |       },
115 |       "outputs": [],
116 |       "source": [
117 |         "# Create a DataFrame that is configured to load data from the CSV file.\n",
118 |         "df = session.read.options({\"infer_schema\":True}).csv('@TASTYBYTE_STAGE/app/app_orders/app_order_detail.csv.gz')"
119 |       ]
120 |     },
121 |     {
122 |       "cell_type": "code",
123 |       "execution_count": null,
124 |       "id": "81196d0e-3979-46f1-b11d-871082171f61",
125 |       "metadata": {
126 |         "codeCollapsed": false,
127 |         "language": "python",
128 |         "name": "cell9"
129 |       },
130 |       "outputs": [],
131 |       "source": [
132 |         "df"
133 |       ]
134 |     },
135 |     {
136 |       "cell_type": "markdown",
137 |       "id": "94b0bc16-c31c-4cf0-8bf0-f2fdcdbfac0f",
138 |       "metadata": {
139 |         "collapsed": false,
140 |         "name": "cell10"
141 |       },
142 |       "source": [
143 |         "Now that the data is loaded into a Snowpark DataFrame, we can work with the data using [Snowpark DataFrame API](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.DataFrame). \n",
144 |         "\n",
145 |         "For example, I can compute descriptive statistics on the columns."
146 |       ]
147 |     },
148 |     {
149 |       "cell_type": "code",
150 |       "execution_count": null,
151 |       "id": "bac152b7-8c98-4e0a-9ecc-42f2c104f49d",
152 |       "metadata": {
153 |         "codeCollapsed": false,
154 |         "language": "python",
155 |         "name": "cell11"
156 |       },
157 |       "outputs": [],
158 |       "source": [
159 |         "df.describe()"
160 |       ]
161 |     },
162 |     {
163 |       "cell_type": "markdown",
164 |       "id": "b5ff2c51-66d9-4ca4-a060-0b40286ae37c",
165 |       "metadata": {
166 |         "collapsed": false,
167 |         "name": "cell12"
168 |       },
169 |       "source": [
170 |         "We can write the dataframe into a table called `APP_ORDER` and query it with SQL. "
171 |       ]
172 |     },
173 |     {
174 |       "cell_type": "code",
175 |       "execution_count": null,
176 |       "id": "1f7b5940-47cb-438c-a666-817267b4bf39",
177 |       "metadata": {
178 |         "codeCollapsed": false,
179 |         "collapsed": false,
180 |         "language": "python",
181 |         "name": "cell13"
182 |       },
183 |       "outputs": [],
184 |       "source": [
185 |         "df.write.mode(\"overwrite\").save_as_table(\"APP_ORDER\")"
186 |       ]
187 |     },
188 |     {
189 |       "cell_type": "code",
190 |       "execution_count": null,
191 |       "id": "90e335b9-f60a-4971-aec8-288f0470340b",
192 |       "metadata": {
193 |         "codeCollapsed": false,
194 |         "collapsed": false,
195 |         "language": "sql",
196 |         "name": "cell14"
197 |       },
198 |       "outputs": [],
199 |       "source": [
200 |         "-- Preview the newly created APP_ORDER table\n",
201 |         "SELECT * from APP_ORDER;"
202 |       ]
203 |     },
204 |     {
205 |       "cell_type": "markdown",
206 |       "id": "966f07d5-d246-49da-b133-6ab39fb0578d",
207 |       "metadata": {
208 |         "collapsed": false,
209 |         "name": "cell15"
210 |       },
211 |       "source": [
212 |         "Finally, we show how you can read the table back to Snowpark via the `session.table` syntax."
213 |       ]
214 |     },
215 |     {
216 |       "cell_type": "code",
217 |       "execution_count": null,
218 |       "id": "76dd9c74-019d-47ff-a462-10499503bace",
219 |       "metadata": {
220 |         "codeCollapsed": false,
221 |         "collapsed": false,
222 |         "language": "python",
223 |         "name": "cell16"
224 |       },
225 |       "outputs": [],
226 |       "source": [
227 |         "df = session.table(\"APP_ORDER\")\n",
228 |         "df"
229 |       ]
230 |     },
231 |     {
232 |       "cell_type": "markdown",
233 |       "id": "ca22f85f-9073-44e6-a255-e34155b19bbb",
234 |       "metadata": {
235 |         "collapsed": false,
236 |         "name": "cell17"
237 |       },
238 |       "source": [
239 |         "From here, you can continue to query and process the data. "
240 |       ]
241 |     },
242 |     {
243 |       "cell_type": "code",
244 |       "execution_count": null,
245 |       "id": "2ff779a9-c9ba-434d-b098-2564b9b6e337",
246 |       "metadata": {
247 |         "codeCollapsed": false,
248 |         "language": "python",
249 |         "name": "cell18"
250 |       },
251 |       "outputs": [],
252 |       "source": [
253 |         "df.groupBy('\"c4\"').count()"
254 |       ]
255 |     },
256 |     {
257 |       "cell_type": "code",
258 |       "execution_count": null,
259 |       "id": "792359f0-42fa-4639-b286-f8a8afeb1188",
260 |       "metadata": {
261 |         "codeCollapsed": false,
262 |         "language": "sql",
263 |         "name": "cell19"
264 |       },
265 |       "outputs": [],
266 |       "source": [
267 |         "-- Teardown table and stage created as part of this example\n",
268 |         "DROP TABLE APP_ORDER;\n",
269 |         "DROP STAGE TASTYBYTE_STAGE;"
270 |       ]
271 |     },
272 |     {
273 |       "cell_type": "markdown",
274 |       "id": "d149c3c7-4a48-446e-a75f-beefc949790b",
275 |       "metadata": {
276 |         "collapsed": false,
277 |         "name": "cell20"
278 |       },
279 |       "source": [
280 |         "### Conclusion\n",
281 |         "In this example, we took a look at how you can load a CSV file from an external stage to process and query the data in your notebook using Snowpark. You can learn more about how to work with your data using Snowpark Python [here](https://docs.snowflake.com/en/developer-guide/snowpark/python/index)."
282 |       ]
283 |     }
284 |   ],
285 |   "metadata": {
286 |     "kernelspec": {
287 |       "display_name": "Streamlit Notebook",
288 |       "name": "streamlit"
289 |     }
290 |   },
291 |   "nbformat": 4,
292 |   "nbformat_minor": 5
293 | }
294 | 


--------------------------------------------------------------------------------
/MFA_Audit_of_Users/demo_data.csv:
--------------------------------------------------------------------------------
 1 | USER_ID,NAME,CREATED_ON,DELETED_ON,LOGIN_NAME,DISPLAY_NAME,FIRST_NAME,LAST_NAME,EMAIL,MUST_CHANGE_PASSWORD,HAS_PASSWORD,COMMENT,DISABLED,SNOWFLAKE_LOCK,DEFAULT_WAREHOUSE,DEFAULT_NAMESPACE,DEFAULT_ROLE,EXT_AUTHN_DUO,EXT_AUTHN_UID,HAS_MFA,BYPASS_MFA_UNTIL,LAST_SUCCESS_LOGIN,EXPIRES_AT,LOCKED_UNTIL_TIME,HAS_RSA_PUBLIC_KEY,PASSWORD_LAST_SET_TIME,OWNER,DEFAULT_SECONDARY_ROLE,TYPE
 2 | 42,John Doe,2023-01-15 09:00:00,,john_doe,John D.,John,Doe,john.doe@example.com,FALSE,TRUE,"Senior Developer",FALSE,FALSE,COMPUTE_WH,ANALYTICS,SYSADMIN,FALSE,,TRUE,,2024-09-27 08:30:00,,,TRUE,2024-03-15 10:00:00,ACCOUNTADMIN,DEVELOPER,INTERNAL
 3 | 255,Jane Smith,2023-02-20 10:30:00,,jane_smith,Jane S.,Jane,Smith,jane.smith@example.com,FALSE,TRUE,"Database Administrator",FALSE,FALSE,DBA_WH,PUBLIC,SECURITYADMIN,TRUE,jsmith123,TRUE,,2024-09-26 17:45:00,,,FALSE,2024-02-01 14:30:00,ACCOUNTADMIN,SYSADMIN,INTERNAL
 4 | 578,Robert Johnson,2023-03-10 11:45:00,,robert_johnson,Rob J.,Robert,Johnson,robert.johnson@example.com,TRUE,TRUE,"Sales",FALSE,FALSE,SALES_WH,SALES,SALES_ROLE,FALSE,,FALSE,,2024-09-25 09:15:00,,,FALSE,2024-09-25 09:00:00,USERADMIN,,INTERNAL
 5 | 890,Emily Brown,2023-04-05 13:15:00,2024-08-01 16:00:00,emily_brown,Emily B.,Emily,Brown,emily.brown@example.com,FALSE,TRUE,"HR Manager",TRUE,FALSE,HR_WH,HR,HR_ADMIN,FALSE,,TRUE,,2024-07-31 11:30:00,,,FALSE,2024-01-10 08:45:00,ACCOUNTADMIN,,INTERNAL
 6 | 952,Michael Lee,2023-05-12 14:30:00,,michael_lee,Mike L.,Michael,Lee,michael.lee@example.com,FALSE,TRUE,"CFO",FALSE,FALSE,FINANCE_WH,FINANCE,FINANCE_ADMIN,TRUE,mlee456,TRUE,,2024-09-27 10:00:00,,,TRUE,2024-06-20 16:15:00,ACCOUNTADMIN,AUDITOR,INTERNAL
 7 | 1205,Sarah Wilson,2023-06-18 09:45:00,,sarah_wilson,Sarah W.,Sarah,Wilson,sarah.wilson@example.com,FALSE,TRUE,"Data Analyst",FALSE,FALSE,ANALYST_WH,MARKETING,ANALYST,FALSE,,FALSE,,2024-09-26 14:20:00,,,FALSE,2024-04-05 11:00:00,USERADMIN,,INTERNAL
 8 | 2506,David Taylor,2023-07-22 11:00:00,,david_taylor,Dave T.,David,Taylor,david.taylor@example.com,FALSE,TRUE,"Software Engineer",FALSE,FALSE,DEV_WH,DEVELOPMENT,DEVELOPER,FALSE,,TRUE,,2024-09-25 16:40:00,,,FALSE,2024-05-12 09:30:00,SYSADMIN,,INTERNAL
 9 | 3789,Lisa Anderson,2023-08-30 10:15:00,,lisa_anderson,Lisa A.,Lisa,Anderson,lisa.anderson@example.com,FALSE,TRUE,"BI Specialist",FALSE,FALSE,BI_WH,BUSINESS_INTEL,BI_ROLE,TRUE,landerson789,TRUE,,2024-09-27 11:10:00,,,FALSE,2024-07-01 13:45:00,ACCOUNTADMIN,,INTERNAL
10 | 5050,James Martinez,2023-09-14 15:30:00,,james_martinez,James M.,James,Martinez,james.martinez@example.com,FALSE,TRUE,"QA Engineer",FALSE,FALSE,QA_WH,TESTING,QA_ROLE,FALSE,,FALSE,,2024-09-26 09:50:00,,,TRUE,2024-08-05 10:20:00,SYSADMIN,DEVELOPER,INTERNAL
11 | 5555,Olivia Garcia,2023-10-05 12:45:00,,olivia_garcia,Olivia G.,Olivia,Garcia,olivia.garcia@example.com,FALSE,TRUE,"HR Specialist",FALSE,FALSE,HR_WH,HR,HR_ROLE,FALSE,,TRUE,,2024-09-25 13:30:00,2025-10-05 12:45:00,,FALSE,2024-09-01 15:00:00,USERADMIN,,INTERNAL
12 | 


--------------------------------------------------------------------------------
/MFA_Audit_of_Users/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - modin=*
6 |   - pandas=*
7 | 


--------------------------------------------------------------------------------
/Monitoring_Table_Size_with_Streamlit/Monitoring_Table_Size_with_Streamlit.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   }
  7 |  },
  8 |  "nbformat_minor": 5,
  9 |  "nbformat": 4,
 10 |  "cells": [
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "cc4fb15e-f9db-44eb-9f60-1b9589b755cb",
 14 |    "metadata": {
 15 |     "name": "md_title",
 16 |     "collapsed": false
 17 |    },
 18 |    "source": "# Monitoring the Table Size in Snowflake Notebooks with Streamlit\n\nA notebook that tracks the size of specific tables over time to help developers monitor storage growth trends. \n\nHere's what we're implementing to investigate the tables:\n1. Retrieve the Top 100 largest tables\n2. Analyze query patterns on the largest tables\n3. Identify which tables are users interacting with"
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "id": "42a7b143-0779-4706-affc-c214213f55c5",
 23 |    "metadata": {
 24 |     "name": "md_section1",
 25 |     "collapsed": false
 26 |    },
 27 |    "source": "## 1. Retrieve the Top 100 largest tables\n\nThis query shows the top 100 largest tables, sorted by row count, including their size in GB, owners and last modification details."
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "id": "e17f14a5-ea50-4a1d-bc15-c64a6447d0a8",
 32 |    "metadata": {
 33 |     "language": "sql",
 34 |     "name": "sql_top_tables",
 35 |     "codeCollapsed": false,
 36 |     "collapsed": false
 37 |    },
 38 |    "outputs": [],
 39 |    "source": "-- Top 100 largest tables with metrics\nSELECT \n    CONCAT(TABLE_CATALOG, '.', TABLE_SCHEMA, '.', TABLE_NAME) AS FULLY_RESOLVED_TABLE_NAME,\n    TABLE_OWNER,\n    LAST_DDL,\n    LAST_DDL_BY,\n    ROW_COUNT,\n    ROUND(BYTES / 1024 / 1024 / 1024, 2) AS SIZE_GB,\n    LAST_ALTERED,\n    CASE \n        WHEN LAST_DDL <= DATEADD(DAY, -90, CURRENT_DATE) THEN 'YES' \n        ELSE 'NO' \n    END AS LAST_ACCESSED_90DAYS\nFROM SNOWFLAKE.ACCOUNT_USAGE.TABLES\nWHERE DELETED IS NULL\n  AND ROW_COUNT > 0\n  AND LAST_ACCESSED_90DAYS = 'NO'\nORDER BY ROW_COUNT DESC\nLIMIT 100;\n",
 40 |    "execution_count": null
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "id": "26cf2c60-f4a0-493d-bb62-fbde9e4226b9",
 45 |    "metadata": {
 46 |     "name": "md_variable_info",
 47 |     "collapsed": false
 48 |    },
 49 |    "source": "You can now run this query in Python without any additional code -- simply use your cell name as a variable! We're going to convert our cell to a pandas DataFrame below to make it easier to work with "
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "id": "ac2608a7-5cd1-45fb-bb89-17f1bf010b5f",
 54 |    "metadata": {
 55 |     "language": "python",
 56 |     "name": "sql_top_tables_pd",
 57 |     "codeCollapsed": false,
 58 |     "collapsed": false
 59 |    },
 60 |    "outputs": [],
 61 |    "source": "sql_top_tables.to_pandas()",
 62 |    "execution_count": null
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "id": "40d926ac-d441-4799-b56a-c200a13cbc09",
 67 |    "metadata": {
 68 |     "name": "md_section2",
 69 |     "collapsed": false
 70 |    },
 71 |    "source": "## 2. Explore a specific table \n\nLet's explore one of these tables in greater detail to figure out the most common queries and who is using it most often. \n\n💡 **Pro tip:** You can interact with the below cell and select the fully resolved table name you want to explore more in your account!"
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "id": "50216adb-e5e2-4dd0-8b82-0e7dae07d27f",
 76 |    "metadata": {
 77 |     "language": "python",
 78 |     "name": "py_input",
 79 |     "collapsed": false,
 80 |     "codeCollapsed": false
 81 |    },
 82 |    "outputs": [],
 83 |    "source": "import streamlit as st\n\nselection = st.text_input(label=\"Enter a fully resolved table path to explore\")",
 84 |    "execution_count": null
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "id": "089287ef-efe4-423d-96ce-2ff4d53df21c",
 89 |    "metadata": {
 90 |     "name": "md_pass_variable",
 91 |     "collapsed": false
 92 |    },
 93 |    "source": "Let's now pass that variable into a SQL query so we can grab query analytics on this table"
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "id": "7ad267bb-645d-4fa6-8e16-3666b2372fd8",
 98 |    "metadata": {
 99 |     "language": "sql",
100 |     "name": "sql_most_expensive_queries_on_table",
101 |     "collapsed": false,
102 |     "codeCollapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": "-- Grab most expensive queries on this table \nSELECT \n    '{{selection}}' as FULLY_RESOLVED_TABLE_NAME,\n    q.QUERY_TEXT,\n    q.QUERY_TYPE,\n    SUM(CREDITS_USED_CLOUD_SERVICES) as CREDITS_USED,\n    MAX(TOTAL_ELAPSED_TIME) as MAX_elapsed_time,\n    AVG(TOTAL_ELAPSED_TIME)/1000 as AVG_EXECUTION_TIME_SEC\nFROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q\nWHERE START_TIME >= CURRENT_DATE - interval '90 days'\n    AND query_text LIKE '%{{selection}}%'\nGROUP BY ALL\nORDER BY AVG_EXECUTION_TIME_SEC DESC\nLIMIT 10",
106 |    "execution_count": null
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "id": "14945658-f869-4047-b486-0a5456287948",
111 |    "metadata": {
112 |     "language": "python",
113 |     "name": "py_visualization",
114 |     "codeCollapsed": false,
115 |     "collapsed": false
116 |    },
117 |    "outputs": [],
118 |    "source": "df = sql_most_expensive_queries_on_table.to_pandas()\nst.dataframe(df,\n             column_config={\n                \"CREDITS_USED\": st.column_config.ProgressColumn(\n                \"CREDITS_USED\",\n                format=\"%.4f\",\n                min_value=df.CREDITS_USED.min(),\n                max_value=df.CREDITS_USED.max(),\n        ),\n    },)",
119 |    "execution_count": null
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "id": "d80fe813-7fe3-48a7-a30b-eb0b3495d0f3",
124 |    "metadata": {
125 |     "name": "md_section3",
126 |     "collapsed": false
127 |    },
128 |    "source": "## 3. Find out which users most commonly query this table\n\nLet's say we want to take our top most expensive query and turn it into a materialization. Who will be the users who are most likely to be impacted by our activities? \n\nTo find out, we're going to grab the list of users who queried our table of interest in the last 90 days as well as the users who have executed the expensive query. We can then contact them when we make an update and tell them about improvements we made! 🎉 \n\n-----\n\nFirst, let's find out who has used our table in the last 90 days.  We already have a variable `selection` we can use, so we're plugging it into the below query: "
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "id": "23866f56-0731-492e-8306-4f6fc28ddb6e",
133 |    "metadata": {
134 |     "language": "sql",
135 |     "name": "py_user_queries",
136 |     "codeCollapsed": false,
137 |     "collapsed": true
138 |    },
139 |    "outputs": [],
140 |    "source": "-- Identify users who have queried selected table in last 90 days \nSELECT \n    USER_NAME, \n    COUNT(*) number_of_queries\nFROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q\nWHERE START_TIME >= CURRENT_DATE - interval '90 days'\n    AND query_text LIKE '%{{selection}}%'\nGROUP BY ALL\nORDER BY number_of_queries DESC\n",
141 |    "execution_count": null
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "id": "0aa5ad71-a360-4fbf-a9d3-868d1d7a329f",
146 |    "metadata": {
147 |     "name": "md_query_selection",
148 |     "collapsed": false
149 |    },
150 |    "source": "Now, let's say we want to materialize a specific long running query. Grab a query from the `py_visualization` cell from Section 2. \n\nWe can now plug it into the `QUERY_TEXT` value below to find out who else would benefit from materializing this pattern. \n\n💡 **Pro tip:** If the query is too long, try a unique subset of the query in the box below"
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "id": "a041825e-a1fa-4d80-9e2b-9426ee818023",
155 |    "metadata": {
156 |     "language": "python",
157 |     "name": "py_query_selection",
158 |     "collapsed": true,
159 |     "codeCollapsed": false
160 |    },
161 |    "outputs": [],
162 |    "source": "query_selection = st.text_input(label=\"Enter the query text you want to look up\")\nst.write(\"**You Entered:** `\" + query_selection + \"`\")",
163 |    "execution_count": null
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "id": "b2368c7e-7325-4752-a2fb-ff4d6601123b",
168 |    "metadata": {
169 |     "name": "md_user_list",
170 |     "collapsed": false
171 |    },
172 |    "source": "Sweet! Now we get a list of all the users who might have run this query, along with their total credit\nconsumption and query execution time over the last 90 days."
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "id": "506d54d9-1a00-46df-9307-dcce94ce8fb9",
177 |    "metadata": {
178 |     "language": "sql",
179 |     "name": "py_user_list",
180 |     "collapsed": true,
181 |     "codeCollapsed": false
182 |    },
183 |    "outputs": [],
184 |    "source": "SELECT \n    USER_NAME, \n    SUM(CREDITS_USED_CLOUD_SERVICES) as total_credits, \n    MAX(TOTAL_ELAPSED_TIME) as MAX_elapsed_time,\n    AVG(TOTAL_ELAPSED_TIME)/1000 as AVG_EXECUTION_TIME_SEC\nFROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY q\nWHERE START_TIME >= CURRENT_DATE - interval '90 days'\n    AND query_text LIKE '%{{query_selection}}%'\nGROUP BY ALL\nORDER BY total_credits DESC",
185 |    "execution_count": null
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "id": "f6e54924-57e2-4dfb-8bf1-bad9b7fb635d",
190 |    "metadata": {
191 |     "name": "md_resources",
192 |     "collapsed": false
193 |    },
194 |    "source": "## Want to learn more?\n\n- Snowflake Docs on [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage) and [QUERY_HISTORY view](https://docs.snowflake.com/en/sql-reference/account-usage/query_history)\n\n- More about [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake)\n\n- For more inspiration on how to use Streamlit widgets in Notebooks, check out [Streamlit Docs](https://docs.streamlit.io/) and this list of what is currently supported inside [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake#label-notebooks-streamlit-support)"
195 |   }
196 |  ]
197 | }
198 | 


--------------------------------------------------------------------------------
/Monitoring_Table_Size_with_Streamlit/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - pandas=*
6 | 


--------------------------------------------------------------------------------
/My First Notebook Project/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - matplotlib=3.7.2
6 |   - scipy=1.10.1
7 | 


--------------------------------------------------------------------------------
/Navigating and Browsing Files/Navigating and Browsing Files.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "metadata": {
  3 |     "kernelspec": {
  4 |       "display_name": "Streamlit Notebook",
  5 |       "name": "streamlit"
  6 |     }
  7 |   },
  8 |   "nbformat_minor": 5,
  9 |   "nbformat": 4,
 10 |   "cells": [
 11 |     {
 12 |       "cell_type": "markdown",
 13 |       "id": "9e16b61a-d9a5-475e-9c72-1c5fb53540df",
 14 |       "metadata": {
 15 |         "name": "cell1",
 16 |         "collapsed": false
 17 |       },
 18 |       "source": "# Navigating and Browsing Files in Snowflake Notebooks\n\nIn addition to files created within your notebook, you may need to interact with files from your local machine or a linked Git repository. These files can include code, data, media files, and more.\n\nYou'll find a **Files** tab located on the left-hand panel. This tab provides a list of files available for reference within the notebook. By default, every notebook is associated with two files: \n\n- Main Notebook File: Named \"notebook_app.ipynb\" by default, unless sourced from Git or uploaded from another .ipynb file with a different name.\n- `environment.yml`: An autogenerated file used for configuring the notebook environment, including required packages.\n\nTo inspect a file's contents, simply click on the file. A pop-up window will display a preview. Note that files previews are read-only. \n\n## Adding Files from Your Local Computer\n\nYou can upload files directly from your local machine to use within your Snowflake notebook. Simply click the `+` button in the Files pane and upload the selected files. Uploaded files will be stored in the Notebook's internal stage and persisted across sessions.\n\nYou can find the list of files that we are working with in this tutorial [here](https://github.com/Snowflake-Labs/snowflake-demo-notebooks/tree/main/Navigating%20and%20Browsing%20Files). Download all of the following files to your local machine.\n- `data.csv`\n- `data.json`\n- `display.py`\n- `stats.py`\n\nClick on `+` to upload files: \n\n![](https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/main/Navigating%20and%20Browsing%20Files/img/upload_files.png)\n\nOnce they are uploaded, you should see them on the File pane.\n\n![](https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/main/Navigating%20and%20Browsing%20Files/img/browse_files.png)\n\n\nNote: If your notebook session is active when you upload a file, you'll need to restart the session for the file to become accessible due to a known bug."
 19 |     },
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "id": "36eca65b-fa05-4ede-bc15-a3d9a93ed530",
 23 |       "metadata": {
 24 |         "name": "cell2",
 25 |         "collapsed": false
 26 |       },
 27 |       "source": "## Example 1: Working with Data Files\n\nOnce uploaded, you can reference the file in your notebook as if it were colocated with your notebook. For instance, to load a CSV file named `data.csv` into a Pandas DataFrame:"
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "id": "2537412c-5ec5-4b1d-898f-29fd4ce7a14a",
 32 |       "metadata": {
 33 |         "language": "python",
 34 |         "name": "cell3",
 35 |         "codeCollapsed": false
 36 |       },
 37 |       "outputs": [],
 38 |       "source": "import pandas as pd\ndf = pd.read_csv(\"data.csv\")\ndf",
 39 |       "execution_count": null
 40 |     },
 41 |     {
 42 |       "cell_type": "markdown",
 43 |       "id": "b59c60b8-3ebb-4fe6-9663-52c410db4802",
 44 |       "metadata": {
 45 |         "name": "cell4",
 46 |         "collapsed": false
 47 |       },
 48 |       "source": "You can do the same with loading `data.json`: "
 49 |     },
 50 |     {
 51 |       "cell_type": "code",
 52 |       "id": "15974c58-8347-41cf-af87-689bd511e759",
 53 |       "metadata": {
 54 |         "language": "python",
 55 |         "name": "cell5",
 56 |         "codeCollapsed": false
 57 |       },
 58 |       "outputs": [],
 59 |       "source": "df = pd.read_json(\"data.json\",lines=True)\ndf",
 60 |       "execution_count": null
 61 |     },
 62 |     {
 63 |       "cell_type": "markdown",
 64 |       "id": "ce12b87d-1c18-4240-b744-bb4cbb4e3866",
 65 |       "metadata": {
 66 |         "name": "cell6",
 67 |         "collapsed": false
 68 |       },
 69 |       "source": "## Example 2: Working with Code Files\n\nTo improve the readability of your notebook, you can organize your code into modules that you can import and use in your notebooks. This is often useful if you have helper functions that span many lines of code or files. Here are the two helper files that have been loaded: \n- `stats.py` : Helper functions for generating random numbers and computing statistics\n- `display.py`: Helper function for generating data report display using Streamlit\n\nYou can click on the two files from the **File** pane to browse the code.\n\nNow let's take a look at how we can import and use the functions in each module."
 70 |     },
 71 |     {
 72 |       "cell_type": "code",
 73 |       "id": "f3d0b6e4-9b74-45a0-9138-50433d7c9b05",
 74 |       "metadata": {
 75 |         "language": "python",
 76 |         "name": "cell7",
 77 |         "codeCollapsed": false
 78 |       },
 79 |       "outputs": [],
 80 |       "source": "# Import from stats.py\nfrom stats import generate_random_list, median_absolute_deviation",
 81 |       "execution_count": null
 82 |     },
 83 |     {
 84 |       "cell_type": "code",
 85 |       "id": "46f0e49d-a8ed-4065-95c2-81a6284bb40e",
 86 |       "metadata": {
 87 |         "language": "python",
 88 |         "name": "cell8",
 89 |         "codeCollapsed": false
 90 |       },
 91 |       "outputs": [],
 92 |       "source": "# Generate a list of 5 random numbers between 0 and 100\nrandom_numbers = generate_random_list(5, 0, 100)\nprint(random_numbers)",
 93 |       "execution_count": null
 94 |     },
 95 |     {
 96 |       "cell_type": "code",
 97 |       "id": "d9ebfd72-17cc-401f-ab98-68be4fb6493c",
 98 |       "metadata": {
 99 |         "language": "python",
100 |         "name": "cell9",
101 |         "codeCollapsed": false
102 |       },
103 |       "outputs": [],
104 |       "source": "# Compute the median absolute deviation of the list\nmad_val = median_absolute_deviation(random_numbers)\nprint(mad_val)",
105 |       "execution_count": null
106 |     },
107 |     {
108 |       "cell_type": "code",
109 |       "id": "dcae54cf-78b7-4415-a934-c55ccc1570ac",
110 |       "metadata": {
111 |         "language": "python",
112 |         "name": "cell10",
113 |         "codeCollapsed": false
114 |       },
115 |       "outputs": [],
116 |       "source": "# Import from display.py\nfrom display import print_report",
117 |       "execution_count": null
118 |     },
119 |     {
120 |       "cell_type": "code",
121 |       "id": "59635d1d-0540-43e6-94ee-588fe88b5305",
122 |       "metadata": {
123 |         "language": "python",
124 |         "name": "cell11",
125 |         "codeCollapsed": false
126 |       },
127 |       "outputs": [],
128 |       "source": "# Generate data report with visualizations using Streamlit\nprint_report(\"My Data Report\", random_numbers, mad_val)",
129 |       "execution_count": null
130 |     },
131 |     {
132 |       "cell_type": "markdown",
133 |       "id": "0e4c8b48-9933-4193-b97c-4acb107780a6",
134 |       "metadata": {
135 |         "name": "cell12",
136 |         "collapsed": false
137 |       },
138 |       "source": "## Referencing Files with Stage Path\nEach uploaded file has a full stage path associated with it, which you can find by clicking on the  `...` button on each file and selecting `Copy path`.\n\nThe stage path represents the file's location within your notebook environment. For example:\n\n```\nsnow://notebook/<DATABASE>.<SCHEMA>.<NOTEBOOK_NAME>/versions/live/data.csv\n```\n\nYou can run SQL using the full stage path."
139 |     },
140 |     {
141 |       "cell_type": "code",
142 |       "id": "14320dad-f9a7-4ff8-8248-98ed1083127a",
143 |       "metadata": {
144 |         "language": "python",
145 |         "name": "cell13",
146 |         "codeCollapsed": false
147 |       },
148 |       "outputs": [],
149 |       "source": "from snowflake.snowpark.context import get_active_session\nsession = get_active_session()\ndatabase = session.get_current_database()\nschema = session.get_current_schema()",
150 |       "execution_count": null
151 |     },
152 |     {
153 |       "cell_type": "code",
154 |       "id": "8e98d14b-2804-42e1-aab9-f54d9c0bb086",
155 |       "metadata": {
156 |         "language": "sql",
157 |         "name": "cell14",
158 |         "codeCollapsed": false
159 |       },
160 |       "outputs": [],
161 |       "source": "LIST 'snow://notebook/{{database}}.{{schema}}.\"GH_ACTION_MULTIFILE_NB\"/versions/live/data.csv'",
162 |       "execution_count": null
163 |     },
164 |     {
165 |       "cell_type": "markdown",
166 |       "id": "47581354-621c-4724-bb1c-797c22affac5",
167 |       "metadata": {
168 |         "name": "cell15",
169 |         "collapsed": false
170 |       },
171 |       "source": "## Working with Files from Git\n\nIf your Notebook is connected to Git, then all the files in the same folder as your notebook will be displayed on the Files Tab. \n\n\n![](https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/main/Navigating%20and%20Browsing%20Files/img/git_files.png)\n\n\nIn addition, any addition or removal of files associated with the notebook will be version controlled through Git. For example, if you removed `data.json` by clicking on `...`>`Remove`. You will see the files that are modified in the `Commit` dialog which shows the Git diff. \n\n![](https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/main/Navigating%20and%20Browsing%20Files/img/git_diff.png)\n\nYou can learn more about how you can set up Git integration with Notebooks [here](https://docs.snowflake.com/en/developer-guide/git/git-overview)."
172 |     }
173 |   ]
174 | }


--------------------------------------------------------------------------------
/Navigating and Browsing Files/data.csv:
--------------------------------------------------------------------------------
1 | fruit,size,weight
2 | apple,3.4,1.4
3 | orange,5.4,3.2


--------------------------------------------------------------------------------
/Navigating and Browsing Files/data.json:
--------------------------------------------------------------------------------
1 | {"fruit":"apple", "size":3.4, "weight":1.4},{"fruit":"orange", "size":5.4, "weight":3.2}


--------------------------------------------------------------------------------
/Navigating and Browsing Files/display.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import altair as alt
 3 | import streamlit as st
 4 | 
 5 | def print_report(title, data, value):
 6 |     st.title(title)
 7 |     df = pd.DataFrame({'x': range(len(data)), 'y': data})
 8 |     bars = alt.Chart(df).mark_bar().encode(
 9 |         x='x:O',
10 |         y='y:Q'
11 |     )
12 |     line = pd.DataFrame({'y': [value]})
13 |     median_line = alt.Chart(line).mark_rule(color='red', strokeDash=[3, 3], strokeWidth=3).encode(
14 |         y='y:Q'
15 |     )
16 |     chart = (bars + median_line).properties(
17 |         width=400,
18 |         height=400
19 |     )
20 |     st.altair_chart(chart)


--------------------------------------------------------------------------------
/Navigating and Browsing Files/img/browse_files.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Navigating and Browsing Files/img/browse_files.png


--------------------------------------------------------------------------------
/Navigating and Browsing Files/img/git_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Navigating and Browsing Files/img/git_diff.png


--------------------------------------------------------------------------------
/Navigating and Browsing Files/img/git_files.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Navigating and Browsing Files/img/git_files.png


--------------------------------------------------------------------------------
/Navigating and Browsing Files/img/upload_files.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Navigating and Browsing Files/img/upload_files.png


--------------------------------------------------------------------------------
/Navigating and Browsing Files/stats.py:
--------------------------------------------------------------------------------
 1 | def generate_random_list(length, min_val=0, max_val=100):
 2 |     """
 3 |     Generates a random list of integers.
 4 | 
 5 |     Args:
 6 |         length (int): The length of the list.
 7 |         min_val (int): The minimum value of the integers (default is 0).
 8 |         max_val (int): The maximum value of the integers (default is 100).
 9 | 
10 |     Returns:
11 |         list: A list of random integers.
12 |     """
13 |     import random
14 |     random_list = [random.randint(min_val, max_val) for _ in range(length)]
15 |     return random_list
16 | 
17 | def median_absolute_deviation(numbers):
18 |     """
19 |     Calculates the median absolute deviation (MAD) of a list of numbers.
20 | 
21 |     Args:
22 |     numbers (list): A list of numerical values.
23 | 
24 |     Returns:
25 |     float: The median absolute deviation (MAD).
26 |     """
27 |     if not numbers:
28 |         return None
29 | 
30 |     # Calculate median
31 |     sorted_numbers = sorted(numbers)
32 |     n = len(sorted_numbers)
33 |     if n % 2 == 0:
34 |         median = (sorted_numbers[n//2 - 1] + sorted_numbers[n//2]) / 2
35 |     else:
36 |         median = sorted_numbers[n//2]
37 | 
38 |     # Calculate absolute deviations from median
39 |     absolute_deviations = [abs(x - median) for x in numbers]
40 | 
41 |     # Calculate MAD
42 |     mad = sorted(absolute_deviations)[len(absolute_deviations) // 2]
43 | 
44 |     return mad


--------------------------------------------------------------------------------
/Query_Caching_Effectiveness/Query_Caching_Effectiveness.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   }
  7 |  },
  8 |  "nbformat_minor": 5,
  9 |  "nbformat": 4,
 10 |  "cells": [
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "cc4fb15e-f9db-44eb-9f60-1b9589b755cb",
 14 |    "metadata": {
 15 |     "name": "md_title",
 16 |     "collapsed": false,
 17 |     "resultHeight": 311
 18 |    },
 19 |    "source": "# Query Caching Effectiveness Report\n\nThis utility notebook analyzes the query cache hit rates. This is to ensure that caching is being used effectively and to reduce unnecessary compute costs.\n\nHere's our 4 step process:\n1. SQL query to retrieve data\n2. Convert SQL table to a Pandas DataFrame\n3. Data preparation and filtering (using user input from Streamlit widgets)\n4. Data visualization and exploration"
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "id": "42a7b143-0779-4706-affc-c214213f55c5",
 24 |    "metadata": {
 25 |     "name": "md_retrieve_data",
 26 |     "collapsed": false,
 27 |     "resultHeight": 220
 28 |    },
 29 |    "source": "## 1. Retrieve Data\n\nThe following query filters for queries that actually scanned data, groups results by `WAREHOUSE_NAME`, and orders them by *percentage of data scanned from cache* (`percent_scanned_from_cache`). \n\nThis helps to identify which warehouses are making the most effective use of caching.\n"
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "id": "d549f7ac-bbbd-41f4-9ee3-98284e587de1",
 34 |    "metadata": {
 35 |     "language": "sql",
 36 |     "name": "sql_query_caching",
 37 |     "resultHeight": 439,
 38 |     "codeCollapsed": false,
 39 |     "collapsed": false
 40 |    },
 41 |    "outputs": [],
 42 |    "source": "SELECT \n    warehouse_name,\n    DATE_TRUNC('day', start_time) AS query_date,\n    COUNT(DISTINCT query_parameterized_hash) AS query_parameterized_hash_count,\n    COUNT(*) AS daily_executions,\n    AVG(total_elapsed_time)/1000 AS avg_execution_time,\n    SUM(total_elapsed_time)/1000 AS total_execution_time,\n    SUM(CASE WHEN bytes_scanned > 0 THEN bytes_scanned ELSE 0 END) AS daily_bytes_scanned,\n    SUM(bytes_scanned * percentage_scanned_from_cache) / NULLIF(SUM(CASE WHEN bytes_scanned > 0 THEN bytes_scanned ELSE 0 END), 0) AS daily_cache_hit_ratio,\n    MAX_BY(query_text, start_time) AS latest_query_text,\n    MAX_BY(user_name, start_time) AS latest_user_name\nFROM snowflake.account_usage.query_history qh\nWHERE start_time >= dateadd(day, -30, current_timestamp())\nGROUP BY 1, 2\nHAVING daily_bytes_scanned > 0\nORDER BY \n    query_date DESC,\n    daily_cache_hit_ratio DESC,\n    daily_bytes_scanned DESC",
 43 |    "execution_count": null
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "id": "870b69dd-aae0-4dd3-93f7-7adce1268159",
 48 |    "metadata": {
 49 |     "name": "md_dataframe",
 50 |     "collapsed": false,
 51 |     "resultHeight": 102
 52 |    },
 53 |    "source": "## 2. Convert Table to a DataFrame\n\nNext, we'll convert the tables to a Pandas DataFrame.\n"
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "id": "4a5559a8-ef3a-40c3-a9d5-54602403adab",
 58 |    "metadata": {
 59 |     "language": "python",
 60 |     "name": "py_query_caching",
 61 |     "codeCollapsed": false,
 62 |     "resultHeight": 439,
 63 |     "collapsed": false
 64 |    },
 65 |    "outputs": [],
 66 |    "source": "sql_query_caching.to_pandas()",
 67 |    "execution_count": null
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "id": "e618ffe5-481f-4105-bc3f-f5e903b45e34",
 72 |    "metadata": {
 73 |     "name": "md_data_preparation",
 74 |     "collapsed": false,
 75 |     "resultHeight": 102
 76 |    },
 77 |    "source": "## Data Preparation\n\nHere, we'll do some data preparation prior to visualization."
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "id": "a3f93f11-dd74-42f2-bd05-410bb66931a2",
 82 |    "metadata": {
 83 |     "language": "python",
 84 |     "name": "py_data_preparation",
 85 |     "resultHeight": 439,
 86 |     "collapsed": false,
 87 |     "codeCollapsed": false
 88 |    },
 89 |    "outputs": [],
 90 |    "source": "df = py_query_caching.copy()\n\n# Convert QUERY_DATE to datetime\ndf['QUERY_DATE'] = pd.to_datetime(df['QUERY_DATE'])\n\n# Create WEEK_NUMBER column\ndf['WEEK_NUMBER'] = df['QUERY_DATE'].dt.isocalendar().week\n\n# Create MONTH_YEAR column\ndf['MONTH_YEAR'] = df['QUERY_DATE'].dt.strftime('%b %Y')\n\n# Group by\ngrouped_df = df.groupby('WAREHOUSE_NAME').agg({\n    'QUERY_PARAMETERIZED_HASH_COUNT': 'count',\n    'DAILY_EXECUTIONS': 'sum',\n    'AVG_EXECUTION_TIME': 'mean',\n    'TOTAL_EXECUTION_TIME': 'sum',\n    'DAILY_BYTES_SCANNED': 'sum',\n    'DAILY_CACHE_HIT_RATIO': 'mean'\n}).reset_index()\n\ngrouped_df",
 91 |    "execution_count": null
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "id": "59b04137-ca95-4fb8-b216-133272349a78",
 96 |    "metadata": {
 97 |     "name": "md_bar_chart",
 98 |     "collapsed": false,
 99 |     "resultHeight": 201
100 |    },
101 |    "source": "## 3. Visualize Bar Chart\n\nHere, we'll visualize the data via a bar chart for the columns:\n- Query count\n- Bytes scanned\n- Percent of bytes scanned\n"
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "id": "3b382b54-fd8a-49f5-8bc9-72ca420608ff",
106 |    "metadata": {
107 |     "language": "python",
108 |     "name": "py_bar_chart",
109 |     "resultHeight": 623,
110 |     "codeCollapsed": false
111 |    },
112 |    "outputs": [],
113 |    "source": "import altair as alt\nimport pandas as pd\n\n# Create bar chart\nchart = alt.Chart(grouped_df).mark_bar().encode(\n    y=alt.Y('WAREHOUSE_NAME:N', \n            title='',\n            axis=alt.Axis(\n                labels=True,\n                labelLimit=250,\n                tickMinStep=1,\n                labelOverlap=False,\n                labelPadding=10\n            ),\n            sort='-x'),\n    x=alt.X('DAILY_CACHE_HIT_RATIO:Q', \n            title='Cache Hit Ratio'),\n    color=alt.Color('WAREHOUSE_NAME:N', legend=None),\n    tooltip=[\n        alt.Tooltip('WAREHOUSE_NAME', title='Warehouse'),\n        alt.Tooltip('DAILY_CACHE_HIT_RATIO', title='Cache Hit Ratio'),\n        alt.Tooltip('DAILY_EXECUTIONS', title='Daily Executions'),\n        alt.Tooltip('AVG_EXECUTION_TIME', title='Avg Execution Time (ms)')\n    ]\n).properties(\n    width=400,\n    height=600,\n    title='Cache Hit Ratio by Warehouse'\n).configure_axis(\n    labelFontSize=12,\n    titleFontSize=14\n).configure_title(\n    fontSize=16,\n    anchor='middle'\n)\n\n# Display the chart\nst.altair_chart(chart, use_container_width=True)",
114 |    "execution_count": null
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "id": "3c995961-473b-42be-b824-9c5dcb8ef041",
119 |    "metadata": {
120 |     "name": "md_heatmap",
121 |     "collapsed": false,
122 |     "resultHeight": 201
123 |    },
124 |    "source": "## 4. Visualize as Heatmap\n\nHere, we'll visualize the data via a heatmap for the columns:\n- Query count\n- Bytes scanned\n- Percent of bytes scanned\n"
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "id": "02b09580-6a70-4769-a8b1-68fda0dc72bf",
129 |    "metadata": {
130 |     "language": "python",
131 |     "name": "py_heatmap",
132 |     "resultHeight": 623,
133 |     "codeCollapsed": false,
134 |     "collapsed": false
135 |    },
136 |    "outputs": [],
137 |    "source": "import pandas as pd\nimport altair as alt\n\n# Convert QUERY_DATE to datetime if it isn't already\ndf['QUERY_DATE'] = pd.to_datetime(df['QUERY_DATE'])\n\n# Format date as string for display\ndf['DATE'] = df['QUERY_DATE'].dt.strftime('%Y-%m-%d')\n\n# Aggregate data by date and warehouse\nagg_df = df.groupby(['DATE', 'WAREHOUSE_NAME'])['DAILY_CACHE_HIT_RATIO'].sum().reset_index()\n\n# Create the heatmap\nheatmap = alt.Chart(agg_df).mark_rect(stroke='black', strokeWidth=1).encode(\n   x=alt.X('DATE:O',\n           title='Date',\n           axis=alt.Axis(\n               labelAngle=90,\n               labelOverlap=False,\n               tickCount=10\n           )),\n   y=alt.Y('WAREHOUSE_NAME:N',\n           title='',\n           axis=alt.Axis(\n               labels=True,\n               labelLimit=250,\n               tickMinStep=1,\n               labelOverlap=False,\n               labelPadding=10\n           )),\n   color=alt.Color('DAILY_CACHE_HIT_RATIO:Q',\n                   title='Cache Hit Ratio',\n                   scale=alt.Scale(scheme='blues')),\n   tooltip=['DATE', 'WAREHOUSE_NAME', \n           alt.Tooltip('DAILY_CACHE_HIT_RATIO:Q', format='.2%')]\n).properties(\n   title=f'Daily Warehouse Cache Hit Ratio Heatmap',\n   width=500,\n   height=600\n)\n\n# Add configuration to make the chart more interactive\nheatmap = heatmap.configure_axis(\n   grid=False\n).configure_view(\n   strokeWidth=0\n)\n\n# Display or save the chart\nst.altair_chart(heatmap, use_container_width=True)",
138 |    "execution_count": null
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "id": "b9e3e4da-4674-46aa-9e91-ed8697bfef5b",
143 |    "metadata": {
144 |     "name": "md_pro_tip",
145 |     "collapsed": false,
146 |     "resultHeight": 134
147 |    },
148 |    "source": "💡 Pro tip:\n\nWhen you see a low cache scan percentage for queries that repeatedly access the same data, you can significantly improve its performance by optimizing the cache usage. This is especially true for reports or dashboards that run similar queries throughout the day."
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "id": "eb3e9b67-6a6e-4218-b17a-3f8564a04d18",
153 |    "metadata": {
154 |     "name": "md_resources",
155 |     "collapsed": false,
156 |     "resultHeight": 268
157 |    },
158 |    "source": "## Want to learn more?\n\n- Snowflake Docs on [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage) and [QUERY_HISTORY view](https://docs.snowflake.com/en/sql-reference/account-usage/query_history)\n- More about [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake)\n- For more inspiration on how to use Streamlit widgets in Notebooks, check out [Streamlit Docs](https://docs.streamlit.io/) and this list of what is currently supported inside [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake#label-notebooks-streamlit-support)\n- Check out the [Altair User Guide](https://altair-viz.github.io/user_guide/data.html) for further information on customizing Altair charts\n"
159 |   }
160 |  ]
161 | }


--------------------------------------------------------------------------------
/Query_Caching_Effectiveness/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - altair=*
6 |   - pandas=*
7 | 


--------------------------------------------------------------------------------
/Query_Cost_Monitoring/Query_Cost_Monitoring.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   }
  7 |  },
  8 |  "nbformat_minor": 5,
  9 |  "nbformat": 4,
 10 |  "cells": [
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "cc4fb15e-f9db-44eb-9f60-1b9589b755cb",
 14 |    "metadata": {
 15 |     "name": "md_title",
 16 |     "collapsed": false,
 17 |     "resultHeight": 336
 18 |    },
 19 |    "source": "# Query Cost Monitoring\n\nA notebook that breaks down compute costs by individual query, allowing teams to identify high-cost operations.\n\nHere's our 4 step process:\n1. SQL query to retrieve query cost data\n2. Convert SQL table to a Pandas DataFrame\n3. Data preparation and filtering (using user input from Streamlit widgets)\n4. Data visualization and exploration"
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "id": "42a7b143-0779-4706-affc-c214213f55c5",
 24 |    "metadata": {
 25 |     "name": "md_retrieve_data",
 26 |     "collapsed": false,
 27 |     "resultHeight": 231
 28 |    },
 29 |    "source": "## 1. Retrieve Data\n\nTo gain insights on query costs, we'll write a SQL query to retrieve the `credits_used` data from the `snowflake.account_usage.metering_history` table and merging this with associated user, database, schema and warehouse information from the `snowflake.account_usage.query_history` table.\n"
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "id": "d549f7ac-bbbd-41f4-9ee3-98284e587de1",
 34 |    "metadata": {
 35 |     "language": "sql",
 36 |     "name": "sql_data",
 37 |     "resultHeight": 511,
 38 |     "codeCollapsed": false,
 39 |     "collapsed": false
 40 |    },
 41 |    "outputs": [],
 42 |    "source": "SELECT\n  query_history.query_id,\n  query_history.query_text,\n  query_history.start_time,\n  query_history.end_time,\n  query_history.user_name,\n  query_history.database_name,\n  query_history.schema_name,\n  query_history.warehouse_name,\n  query_history.warehouse_size,\n  metering_history.credits_used,\n  execution_time/1000 as execution_time_s,\nFROM\n  snowflake.account_usage.query_history\n  JOIN snowflake.account_usage.metering_history ON query_history.start_time >= metering_history.start_time\n  AND query_history.end_time <= metering_history.end_time\nWHERE\n  query_history.start_time >= DATEADD (DAY, -7, CURRENT_TIMESTAMP())\nORDER BY\n  query_history.query_id;",
 43 |    "execution_count": null
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "id": "870b69dd-aae0-4dd3-93f7-7adce1268159",
 48 |    "metadata": {
 49 |     "name": "md_dataframe",
 50 |     "collapsed": false,
 51 |     "resultHeight": 102
 52 |    },
 53 |    "source": "## 2. Convert Table to a DataFrame\n\nNext, we'll convert the table to a Pandas DataFrame.\n"
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "id": "4a5559a8-ef3a-40c3-a9d5-54602403adab",
 58 |    "metadata": {
 59 |     "language": "python",
 60 |     "name": "py_dataframe",
 61 |     "codeCollapsed": false,
 62 |     "resultHeight": 511,
 63 |     "collapsed": false
 64 |    },
 65 |    "outputs": [],
 66 |    "source": "sql_data.to_pandas()",
 67 |    "execution_count": null
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "id": "59b04137-ca95-4fb8-b216-133272349a78",
 72 |    "metadata": {
 73 |     "name": "md_data_preparation",
 74 |     "collapsed": false,
 75 |     "resultHeight": 195
 76 |    },
 77 |    "source": "## 3. Create an Interactive Slider Widget & Data Preparation\n\nHere, we'll create an interactive slider for dynamically selecting the number of days to analyze. This would then trigger the filtering of the DataFrame to the specified number of days.\n\nNext, we'll reshape the data by calculating the frequency count by hour and task name, which will subsequently be used for creating the heatmap in the next step.\n"
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "id": "aeff0dbb-5a3d-4c15-bcc6-f19e5f2398ac",
 82 |    "metadata": {
 83 |     "language": "python",
 84 |     "name": "cell9",
 85 |     "resultHeight": 1246,
 86 |     "codeCollapsed": false,
 87 |     "collapsed": false
 88 |    },
 89 |    "outputs": [],
 90 |    "source": "import pandas as pd\nimport streamlit as st\nimport altair as alt\n\n# Get data\ndf = py_dataframe.copy()\n\n# Create date filter slider\nst.subheader(\"Select time duration\")\n\ncol = st.columns(3)\n\nwith col[0]:\n    days = st.slider('Select number of days to analyze', \n                     min_value=1, \n                     max_value=7, \n                     value=7, \n                     step=1)\nwith col[1]:\n    var = st.selectbox(\"Select a variable\", ['WAREHOUSE_NAME', 'USER_NAME', 'WAREHOUSE_SIZE'])\nwith col[2]:\n    metric = st.selectbox(\"Select a metric\", [\"COUNT\", \"TOTAL_CREDITS_USED\"])\n\n# Filter data according to day duration\ndf['START_TIME'] = pd.to_datetime(df['START_TIME'])\nlatest_date = df['START_TIME'].max()\ncutoff_date = latest_date - pd.Timedelta(days=days)\nfiltered_df = df[df['START_TIME'] > cutoff_date].copy()\n    \n# Prepare data for heatmap\nfiltered_df['HOUR_OF_DAY'] = filtered_df['START_TIME'].dt.hour\nfiltered_df['HOUR_DISPLAY'] = filtered_df['HOUR_OF_DAY'].apply(lambda x: f\"{x:02d}:00\")\n    \n# Calculate frequency count by hour and query\n#agg_df = filtered_df.groupby(['QUERY_ID', 'HOUR_DISPLAY', var]).size().reset_index(name='COUNT')\n\n# Calculate frequency count and sum of credits by hour and query\nagg_df = (filtered_df.groupby(['QUERY_ID', 'HOUR_DISPLAY', var])\n          .agg(\n              COUNT=('QUERY_ID', 'size'),\n              TOTAL_CREDITS_USED=('CREDITS_USED', 'sum')\n          )\n          .reset_index()\n)\n\nst.warning(f\"Analyzing {var} data for the last {days} days!\")\n\n\n\n## Initialize the button state in session state\nif 'expanded_btn' not in st.session_state:\n    st.session_state.expanded_btn = False\n\n## Callback function to toggle the state\ndef toggle_expand():\n    st.session_state.expanded_btn = not st.session_state.expanded_btn\n\n## Create button with callback\nst.button(\n    '⊕ Expand DataFrames' if not st.session_state.expanded_btn else '⊖ Collapse DataFrames',\n    on_click=toggle_expand,\n    type='secondary' if st.session_state.expanded_btn else 'primary'\n)\n\n## State conditional\nif st.session_state.expanded_btn:\n    expand_value = True\nelse:\n    expand_value = False\n\nwith st.expander(\"See Filtered DataFrame\", expanded=expand_value):\n    st.dataframe(filtered_df.head(100))\nwith st.expander(\"See Heatmap DataFrame\", expanded=expand_value):\n    st.dataframe(agg_df)\n",
 91 |    "execution_count": null
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "id": "35f31e4e-95d5-4ee5-a146-b9e93dd9d570",
 96 |    "metadata": {
 97 |     "name": "md_heatmap",
 98 |     "collapsed": false,
 99 |     "resultHeight": 102
100 |    },
101 |    "source": "## 4. Create a Heatmap for Visualizing Query Cost\n\nFinally, a heatmap, and stacked bar chart, and bubble chart are generated that will allow us to gain insights on query cost and frequency."
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "id": "414edc5e-3597-478e-aac7-f787f68bb3b1",
106 |    "metadata": {
107 |     "language": "python",
108 |     "name": "py_heatmap",
109 |     "collapsed": false,
110 |     "resultHeight": 366,
111 |     "codeCollapsed": false
112 |    },
113 |    "outputs": [],
114 |    "source": "## Heatmap\nheatmap = alt.Chart(agg_df).mark_rect(stroke='black',strokeWidth=1).encode(\n    x='HOUR_DISPLAY:O',\n    #y='WAREHOUSE_NAME:N',\n    y=alt.Y(f'{var}:N', \n            title='',\n            axis=alt.Axis(\n                labels=True,\n                labelLimit=250,\n                tickMinStep=1,\n                labelOverlap=False,\n                labelPadding=10\n            )),\n    color=f'{metric}:Q',\n    tooltip=['HOUR_DISPLAY', var, metric]\n).properties(\n    title=f'Query Activity Heatmap by Hour and {var}'\n)\n\nst.altair_chart(heatmap, use_container_width=True)",
115 |    "execution_count": null
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "id": "84ed25f3-03ef-495a-a12d-247970a29f4a",
120 |    "metadata": {
121 |     "language": "python",
122 |     "name": "py_stacked_bar_chart",
123 |     "codeCollapsed": false,
124 |     "collapsed": false,
125 |     "resultHeight": 423
126 |    },
127 |    "outputs": [],
128 |    "source": "## Stacked bar chart with time series\nbar_time = alt.Chart(agg_df).mark_bar().encode(\n    x='HOUR_DISPLAY:O',\n    y=f'{metric}:Q',\n    color=alt.Color(f'{var}:N', legend=alt.Legend(orient='bottom')),\n    tooltip=['HOUR_DISPLAY', var, metric]\n).properties(\n    title=f'Query Activity by Hour and {var}',\n    height=400\n)\n\nst.altair_chart(bar_time, use_container_width=True)\n",
129 |    "execution_count": null
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "id": "0774909e-3ab5-48e4-92ea-c433488e96b7",
134 |    "metadata": {
135 |     "language": "python",
136 |     "name": "py_bubble_plot",
137 |     "collapsed": false,
138 |     "resultHeight": 573,
139 |     "codeCollapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": "## Bubble plot with size representing the metric\nbubble = alt.Chart(agg_df).mark_circle().encode(\n    x='HOUR_DISPLAY:O',\n    y=alt.Y(f'{var}:N', title=''),\n    size=alt.Size(f'{metric}:Q', legend=alt.Legend(title='Query Count')),\n    color=alt.Color(f'{var}:N', legend=None),\n    tooltip=['HOUR_DISPLAY', var, metric]\n).properties(\n    title=f'Query Distribution by Hour and {var}',\n    height=550\n)\n\nst.altair_chart(bubble, use_container_width=True)",
143 |    "execution_count": null
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "id": "eb3e9b67-6a6e-4218-b17a-3f8564a04d18",
148 |    "metadata": {
149 |     "name": "md_resources",
150 |     "collapsed": false,
151 |     "resultHeight": 217
152 |    },
153 |    "source": "## Want to learn more?\n\n- Snowflake Docs on [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage), [METERING_HISTORY view](https://docs.snowflake.com/en/sql-reference/account-usage/task_history) and [QUERY_HISTORY](https://docs.snowflake.com/en/sql-reference/account-usage/query_history)\n- More about [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake)\n- For more inspiration on how to use Streamlit widgets in Notebooks, check out [Streamlit Docs](https://docs.streamlit.io/) and this list of what is currently supported inside [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake#label-notebooks-streamlit-support)\n- Check out the [Altair User Guide](https://altair-viz.github.io/user_guide/data.html) for further information on customizing Altair charts\n"
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "id": "6c11317d-7fd7-412d-aeae-cd131dd1530d",
158 |    "metadata": {
159 |     "name": "cell1",
160 |     "collapsed": false
161 |    },
162 |    "source": ""
163 |   }
164 |  ]
165 | }


--------------------------------------------------------------------------------
/Query_Cost_Monitoring/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - altair=*
6 |   - pandas=*
7 | 


--------------------------------------------------------------------------------
/Query_Performance_Insights/Automated_Query_Performance_Insights_in_Snowflake_Notebooks.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   }
  7 |  },
  8 |  "nbformat_minor": 5,
  9 |  "nbformat": 4,
 10 |  "cells": [
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "d43a3edd-7c40-4a96-a4c6-c46e52b415ed",
 14 |    "metadata": {
 15 |     "name": "md_title",
 16 |     "collapsed": false
 17 |    },
 18 |    "source": "# Automated Query Performance Insights in Snowflake Notebooks\n\nIn this notebook, we'll provide SQL queries that you can use to analyze query history and gain insights on performance and bottlenecks.\n\nThe following 6 queries against the `ACCOUNT_USAGE` schema provide insight into the past performance of queries (examples 1-4), warehouses (example 5), and tasks (example 6)."
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "id": "201438af-5d95-44b5-9582-ac165686ea47",
 23 |    "metadata": {
 24 |     "name": "md_1",
 25 |     "collapsed": false
 26 |    },
 27 |    "source": "## 1. Top n longest-running queries\n\nThis query provides a listing of the top n (50 in the example below) longest-running queries in the last day. You can adjust the `DATEADD` function to focus on a shorter or longer period of time. Replace `STREAMLIT_DEMO_APPS` with the name of a warehouse."
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "id": "c695373e-ac74-4b62-a1f1-08206cbd5c81",
 32 |    "metadata": {
 33 |     "language": "sql",
 34 |     "name": "sql_1",
 35 |     "codeCollapsed": false,
 36 |     "collapsed": false
 37 |    },
 38 |    "source": "SELECT query_id,\n  ROW_NUMBER() OVER(ORDER BY partitions_scanned DESC) AS query_id_int,\n  query_text,\n  total_elapsed_time/1000 AS query_execution_time_seconds,\n  partitions_scanned,\n  partitions_total,\nFROM snowflake.account_usage.query_history Q\nWHERE warehouse_name = 'STREAMLIT_DEMO_APPS' AND TO_DATE(Q.start_time) > DATEADD(day,-1,TO_DATE(CURRENT_TIMESTAMP()))\n  AND total_elapsed_time > 0 --only get queries that actually used compute\n  AND error_code IS NULL\n  AND partitions_scanned IS NOT NULL\nORDER BY total_elapsed_time desc\nLIMIT 50;",
 39 |    "execution_count": null,
 40 |    "outputs": []
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "id": "fbb8e757-c732-46d8-a929-e291f6b8fff7",
 45 |    "metadata": {
 46 |     "name": "md_2",
 47 |     "collapsed": false
 48 |    },
 49 |    "source": "## 2. Queries organized by execution time over past month\n\nThis query groups queries for a given warehouse by buckets for execution time over the last month. These trends in query completion time can help inform decisions to resize warehouses or separate out some queries to another warehouse. Replace `STREAMLIT_DEMO_APPS` with the name of a warehouse."
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "id": "07b6ef1f-36d3-4f94-a784-6a348f8214d6",
 54 |    "metadata": {
 55 |     "language": "sql",
 56 |     "name": "sql_2",
 57 |     "collapsed": false,
 58 |     "codeCollapsed": false
 59 |    },
 60 |    "outputs": [],
 61 |    "source": "SELECT\n  CASE\n    WHEN Q.total_elapsed_time <= 1000 THEN 'Less than 1 second'\n    WHEN Q.total_elapsed_time <= 60000 THEN '1 second to 1 minute'\n    WHEN Q.total_elapsed_time <= 300000 THEN '1 minute to 5 minutes'\n    ELSE 'more than 5 minutes'\n  END AS BUCKETS,\n  COUNT(query_id) AS number_of_queries\nFROM snowflake.account_usage.query_history Q\nWHERE  TO_DATE(Q.START_TIME) >  DATEADD(month,-1,TO_DATE(CURRENT_TIMESTAMP()))\n  AND total_elapsed_time > 0\n  AND warehouse_name = 'STREAMLIT_DEMO_APPS'\nGROUP BY 1;",
 62 |    "execution_count": null
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "id": "fe72eeaf-21ab-491c-bf7b-9de506419512",
 67 |    "metadata": {
 68 |     "name": "md_3",
 69 |     "collapsed": false
 70 |    },
 71 |    "source": "## 3. Find long running repeated queries\n\nYou can use the query hash (the value of the query_hash column in the ACCOUNT_USAGE QUERY_HISTORY view) to find patterns in query performance that might not be obvious. For example, although a query might not be excessively expensive during any single execution, a frequently repeated query could lead to high costs, based on the number of times the query runs.\n\nYou can use the query hash to identify the queries that you should focus on optimizing first. For example, the following query uses the value in the query_hash column to identify the query IDs for the 100 longest-running queries:"
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "id": "b8fe9d0d-3c06-4288-958d-44376364a0ae",
 76 |    "metadata": {
 77 |     "language": "sql",
 78 |     "name": "sql_3",
 79 |     "collapsed": false,
 80 |     "codeCollapsed": false
 81 |    },
 82 |    "outputs": [],
 83 |    "source": "SELECT\n    query_hash,\n    COUNT(*),\n    SUM(total_elapsed_time),\n    ANY_VALUE(query_id)\n  FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY\n  WHERE warehouse_name = 'STREAMLIT_DEMO_APPS'\n    AND DATE_TRUNC('day', start_time) >= CURRENT_DATE() - 7\n  GROUP BY query_hash\n  ORDER BY SUM(total_elapsed_time) DESC\n  LIMIT 100;",
 84 |    "execution_count": null
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "id": "98d2b8b5-ab49-4a15-bac1-fa026d3206aa",
 89 |    "metadata": {
 90 |     "name": "md_4",
 91 |     "collapsed": false
 92 |    },
 93 |    "source": "## 4. Track the average performance of a query over time\n\nThe following statement computes the daily average total elapsed time for all queries that have a specific parameterized query hash (7f5c370a5cddc67060f266b8673a347b)."
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "id": "a37b360e-7c7e-4ff8-a81d-93c223498f15",
 98 |    "metadata": {
 99 |     "language": "sql",
100 |     "name": "sql_4",
101 |     "codeCollapsed": false,
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": "SELECT\n    DATE_TRUNC('day', start_time),\n    SUM(total_elapsed_time),\n    ANY_VALUE(query_id)\n  FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY\n  WHERE query_parameterized_hash = '7f5c370a5cddc67060f266b8673a347b'\n    AND DATE_TRUNC('day', start_time) >= CURRENT_DATE() - 30\n  GROUP BY DATE_TRUNC('day', start_time);",
106 |    "execution_count": null
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "id": "8dce0934-ef0c-4bdb-a28a-25c1286f9789",
111 |    "metadata": {
112 |     "name": "md_5",
113 |     "collapsed": false
114 |    },
115 |    "source": "## 5. Total warehouse load\nThis query provides insight into the total load of a warehouse for executed and queued queries. These load values represent the ratio of the total execution time (in seconds) of all queries in a specific state in an interval by the total time (in seconds) for that interval.\n\nFor example, if 276 seconds was the total time for 4 queries in a 5 minute (300 second) interval, then the query load value is 276 / 300 = 0.92."
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "id": "24486435-31df-457e-9ce4-a55cce2824d1",
120 |    "metadata": {
121 |     "language": "sql",
122 |     "name": "sql_5",
123 |     "codeCollapsed": false,
124 |     "collapsed": false
125 |    },
126 |    "outputs": [],
127 |    "source": "SELECT TO_DATE(start_time) AS date,\n  warehouse_name,\n  SUM(avg_running) AS sum_running,\n  SUM(avg_queued_load) AS sum_queued\nFROM snowflake.account_usage.warehouse_load_history\nWHERE TO_DATE(start_time) >= DATEADD(month,-1,CURRENT_TIMESTAMP())\nGROUP BY 1,2\nHAVING SUM(avg_queued_load) >0;",
128 |    "execution_count": null
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "id": "e654c671-c5f4-40e2-9cb4-301a028e4b83",
133 |    "metadata": {
134 |     "name": "md_6",
135 |     "collapsed": false
136 |    },
137 |    "source": "## 6. Longest running tasks\nThis query lists the longest running tasks in the last day, which can indicate an opportunity to optimize the SQL being executed by the task."
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "id": "ff6c5cf8-7a65-460f-b95c-48e2559692b0",
142 |    "metadata": {
143 |     "language": "sql",
144 |     "name": "sql_6",
145 |     "codeCollapsed": false,
146 |     "collapsed": false
147 |    },
148 |    "outputs": [],
149 |    "source": "SELECT DATEDIFF(seconds, query_start_time,completed_time) AS duration_seconds,*\nFROM snowflake.account_usage.task_history\nWHERE state = 'SUCCEEDED'\n  AND query_start_time >= DATEADD (week, -1, CURRENT_TIMESTAMP())\nORDER BY duration_seconds DESC;",
150 |    "execution_count": null
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "id": "9989e783-5e01-4a59-aaee-cb71f05fd468",
155 |    "metadata": {
156 |     "name": "md_resources",
157 |     "collapsed": false
158 |    },
159 |    "source": "## Resources\n\nQueries used in this notebook is from the [Snowflake Docs](https://docs.snowflake.com/) on [Exploring execution times](https://docs.snowflake.com/en/user-guide/performance-query-exploring)"
160 |   }
161 |  ]
162 | }
163 | 


--------------------------------------------------------------------------------
/Query_Performance_Insights/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies: []
5 | 


--------------------------------------------------------------------------------
/Query_Performance_Insights_using_Streamlit/Build_an_Interactive_Query_Performance_App_with_Streamlit.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "metadata": {
 3 |   "kernelspec": {
 4 |    "display_name": "Streamlit Notebook",
 5 |    "name": "streamlit"
 6 |   }
 7 |  },
 8 |  "nbformat_minor": 5,
 9 |  "nbformat": 4,
10 |  "cells": [
11 |   {
12 |    "cell_type": "markdown",
13 |    "id": "d43a3edd-7c40-4a96-a4c6-c46e52b415ed",
14 |    "metadata": {
15 |     "name": "md_title",
16 |     "collapsed": false
17 |    },
18 |    "source": "# Build an Interactive Query Performance App in Snowflake Notebooks using Streamlit\n\nIn this notebook, we'll create an interactive Streamlit app for analyzing query history to shed light on longest-running queries. These insights can help in further actions to optimize computation. \n"
19 |   },
20 |   {
21 |    "cell_type": "markdown",
22 |    "id": "201438af-5d95-44b5-9582-ac165686ea47",
23 |    "metadata": {
24 |     "name": "md_query",
25 |     "collapsed": false
26 |    },
27 |    "source": "## SQL Query: Top n longest-running queries\n\nThis query provides a listing of the top n (50 in the example below) longest-running queries in the last day. You can adjust the `DATEADD` function to focus on a shorter or longer period of time. Replace `STREAMLIT_DEMO_APPS` with the name of a warehouse."
28 |   },
29 |   {
30 |    "cell_type": "code",
31 |    "id": "c695373e-ac74-4b62-a1f1-08206cbd5c81",
32 |    "metadata": {
33 |     "language": "sql",
34 |     "name": "sql_query",
35 |     "codeCollapsed": false,
36 |     "collapsed": false
37 |    },
38 |    "source": "SELECT query_id,\n  ROW_NUMBER() OVER(ORDER BY partitions_scanned DESC) AS query_id_int,\n  query_text,\n  total_elapsed_time/1000 AS query_execution_time_seconds,\n  partitions_scanned,\n  partitions_total,\nFROM snowflake.account_usage.query_history Q\nWHERE warehouse_name = 'STREAMLIT_DEMO_APPS' AND TO_DATE(Q.start_time) > DATEADD(day,-1,TO_DATE(CURRENT_TIMESTAMP()))\n  AND total_elapsed_time > 0 --only get queries that actually used compute\n  AND error_code IS NULL\n  AND partitions_scanned IS NOT NULL\nORDER BY total_elapsed_time desc\nLIMIT 50;",
39 |    "execution_count": null,
40 |    "outputs": []
41 |   },
42 |   {
43 |    "cell_type": "markdown",
44 |    "id": "51f7f20c-f6d7-4e44-b22d-5409560ef0a3",
45 |    "metadata": {
46 |     "name": "md_app",
47 |     "collapsed": false
48 |    },
49 |    "source": "## Implementing the Interactive Query Performance App\n\nThe workflow is implemented using 5 Python libraries:\n- **Snowflake Snowpark**: Database connectivity to Snowflake\n- **Pandas**: Data wrangling\n- **Streamlit**: Web application framework\n- **Altair**: Data visualization\n- **NumPy**: Numerical computing\n\nUsers can provide the following input parameters:\n- Timeframes (day, week, month,\n- Number of rows to display, \n- Bin sizes for histograms\n- SQL commands to analyze\n\nThese input are used to retrieve and process data resulting in the generation of various visualizations and data analysis as follows:\n- Histogram of query execution time\n- Box plot of query execution time\n- Summary statistics"
50 |   },
51 |   {
52 |    "cell_type": "code",
53 |    "id": "2bdb7d5a-f4dc-4eed-99bc-8726adfa5f8c",
54 |    "metadata": {
55 |     "language": "python",
56 |     "name": "py_app",
57 |     "collapsed": false,
58 |     "codeCollapsed": false
59 |    },
60 |    "outputs": [],
61 |    "source": "from snowflake.snowpark.context import get_active_session\nimport pandas as pd\nimport streamlit as st\nimport altair as alt\nimport numpy as np\n\nst.title('Top n longest-running queries')\n\n# Input widgets\ncol = st.columns(3)\n\nwith col[0]:\n    timeframe_option = st.selectbox('Select a timeframe', ('day', 'week', 'month'))\n\nwith col[1]:\n    limit_option = st.slider('Display n rows', 10, 200, 100)\n\nwith col[2]:\n    bin_option = st.slider('Bin size', 1, 30, 10)\n\nsql_command_option = st.multiselect('Select a SQL command to analyze', \n                                  ['describe', 'execute', 'show', 'PUT', 'SELECT'],\n                                  ['describe', 'show'])\n\n# Data retrieval\nsession = get_active_session()\ndf = session.sql(\n    f\"\"\"\n    SELECT query_id,\n      ROW_NUMBER() OVER(ORDER BY partitions_scanned DESC) AS query_id_int,\n      query_text,\n      total_elapsed_time/1000 AS query_execution_time_seconds,\n      partitions_scanned,\n      partitions_total,\n    FROM snowflake.account_usage.query_history Q\n    WHERE warehouse_name = 'STREAMLIT_DEMO_APPS' AND TO_DATE(Q.start_time) > DATEADD({timeframe_option},-1,TO_DATE(CURRENT_TIMESTAMP()))\n      AND total_elapsed_time > 0 --only get queries that actually used compute\n      AND error_code IS NULL\n      AND partitions_scanned IS NOT NULL\n    ORDER BY total_elapsed_time desc\n    LIMIT {limit_option};\n    \"\"\"\n    ).to_pandas()\n\ndf = df[df['QUERY_TEXT'].str.lower().str.startswith(tuple(commands.lower() for commands in sql_command_option))]\n\nst.title('Histogram of Query Execution Times')\n\n# Create a DataFrame for the histogram data\nhist, bin_edges = np.histogram(df['QUERY_EXECUTION_TIME_SECONDS'], bins=bin_option)\n\nhistogram_df = pd.DataFrame({\n    'bin_start': bin_edges[:-1],\n    'bin_end': bin_edges[1:],\n    'count': hist\n})\nhistogram_df['bin_label'] = histogram_df.apply(lambda row: f\"{row['bin_start']:.2f} - {row['bin_end']:.2f}\", axis=1)\n\n# Create plots\nhistogram_plot = alt.Chart(histogram_df).mark_bar().encode(\n    x=alt.X('bin_label:N', sort=histogram_df['bin_label'].tolist(),\n            axis=alt.Axis(title='QUERY_EXECUTION_TIME_SECONDS', labelAngle=90)),\n    y=alt.Y('count:Q', axis=alt.Axis(title='Count')),\n    tooltip=['bin_label', 'count']\n)\n\nbox_plot = alt.Chart(df).mark_boxplot(\n    extent=\"min-max\",\n    color='yellow'\n).encode(\n    alt.X(\"QUERY_EXECUTION_TIME_SECONDS:Q\", scale=alt.Scale(zero=False))\n).properties(\n    height=200\n)\n\nst.altair_chart(histogram_plot, use_container_width=True)\nst.altair_chart(box_plot, use_container_width=True)\n\n\n# Data display\nwith st.expander('Show data'):\n    st.dataframe(df)\nwith st.expander('Show summary statistics'):\n    st.write(df['QUERY_EXECUTION_TIME_SECONDS'].describe())",
62 |    "execution_count": null
63 |   },
64 |   {
65 |    "cell_type": "markdown",
66 |    "id": "9989e783-5e01-4a59-aaee-cb71f05fd468",
67 |    "metadata": {
68 |     "name": "md_resources",
69 |     "collapsed": false
70 |    },
71 |    "source": "## Resources\n\nQueries used in this notebook is from the [Snowflake Docs](https://docs.snowflake.com/) on [Exploring execution times](https://docs.snowflake.com/en/user-guide/performance-query-exploring)\n\nFurther information on the use of Streamlit can be found at the [Streamlit Docs](https://docs.streamlit.io/)."
72 |   }
73 |  ]
74 | }
75 | 


--------------------------------------------------------------------------------
/Query_Performance_Insights_using_Streamlit/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - altair=*
6 |   - numpy=*
7 |   - pandas=*
8 |   - snowflake-snowpark-python=*
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Snowflake Notebook Demos
  2 | Snowflake Notebooks is your familiar, interactive development environment to perform Data Science, Data Engineering, and AI/ML workloads end-to-end in Snowflake. Write Python & SQL in the same interface.
  3 | 
  4 | This repo contains a collection of Snowflake Notebook demos, tutorials, and examples. Browse each folder to access the notebook files associated with each demo. Here is a list of notebooks you can find in this repo.
  5 | <table>
  6 |   <tr>
  7 |     <td style="vertical-align: top;">
  8 |       <img src="https://docs.snowflake.com/_images/create-sf-notebooks-tile.png" alt="Image" style="width:100px">
  9 |     </td>
 10 |     <td style="vertical-align: center;">
 11 |       <h4>Getting Started</h4>
 12 |       <ul>
 13 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/My%20First%20Notebook%20Project/My%20First%20Notebook%20Project.ipynb">My First Notebook Project</a> <a href="https://www.youtube.com/watch?v=tpg35YgA9Gk">🎥</a></li>
 14 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Visual%20Data%20Stories%20with%20Snowflake%20Notebooks/Visual%20Data%20Stories%20with%20Snowflake%20Notebooks.ipynb">Visual Data Stories With Snowflake Notebooks</a> <a href="https://www.youtube.com/watch?v=WJUNTudCsYM">🎥</a></li>
 15 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Reference%20cells%20and%20variables/Reference%20cells%20and%20variables.ipynb">Reference cells and variables</a></li>
 16 |       </ul>
 17 |     </td>
 18 |   </tr>
 19 | 
 20 |   <tr>    
 21 |     <td style="vertical-align: top;">
 22 |       <img src="https://docs.snowflake.com/_images/data-science-notebooks-tile.png" alt="Image" style="width:100px">
 23 |     </td>
 24 |     <td style="vertical-align: top;">
 25 |       <h4>Data Administration</h4>
 26 |       <ul>
 27 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/MFA_Audit_of_Users/MFA_Audit_of_Users_with_Streamlit_in_Snowflake_Notebooks.ipynb">Multi-Factor Authentication Audit of Users with Streamlit in Snowflake Notebooks</a> <a href="https://youtu.be/WojbkHRCiHU">🎥</a></li>
 28 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Query_Performance_Insights/Automated_Query_Performance_Insights_in_Snowflake_Notebooks.ipynb">Automated Query Performance Insights in Snowflake Notebooks</a> <a href="https://youtu.be/h_pb4qdTfzg">🎥</a></li>
 29 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Query_Performance_Insights_using_Streamlit/Build_an_Interactive_Query_Performance_App_with_Streamlit.ipynb">Interactive Query Performance App in Snowflake Notebooks using Streamlit</a> <a href="https://youtu.be/vdW8xZYBOI0">🎥</a></li>
 30 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Monitoring_Table_Size_with_Streamlit/Monitoring_Table_Size_with_Streamlit.ipynb">Monitoring the Table Size in Snowflake Notebooks with Streamlit</a> <a href="https://youtu.be/ANlzffewNGk">🎥</a></li>
 31 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Role_Based_Access_Auditing_with_Streamlit/Role_Based_Access_Auditing_with_Streamlit.ipynb">Role-Based Access Auditing in Snowflake Notebooks with Streamlit</a> 
 32 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Warehouse_Utilization_with_Streamlit/Warehouse_Utilization_with_Streamlit.ipynb">Warehouse Utilization in Snowflake Notebooks with Streamlit</a> <a href="https://youtu.be/fpVAe0BHc7Q">🎥</a></li>
 33 |       </ul>
 34 |     </td>
 35 |   </tr>
 36 | 
 37 |   <tr>
 38 |     <td style="vertical-align: top;">
 39 |       <img src="https://docs.snowflake.com/_images/data-science-notebooks-tile.png" alt="Image" style="width:100px">
 40 |     </td>
 41 |     <td style="vertical-align: top;">
 42 |       <h4>Data Science</h4>
 43 |       <ul>
 44 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Dashboard_with_Streamlit/Build_a_Dashboard_with_Streamlit_in_Snowflake_Notebooks.ipynb">Dashboard In Snowflake Notebooks Using Streamlit</a> <a href="https://youtu.be/LrQwXQm28qE">🎥</a></li>
 45 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Telco%20Churn%20Data%20Analysis/Telco%20Churn%20Data%20Analysis.ipynb">Data Analysis and Churn Prediction using Snowflake Notebooks</a> <a href="https://www.youtube.com/watch?v=eqb5RdmpW8c">🎥</a></li>
 46 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Ingest%20Public%20JSON/Ingest%20Public%20JSON.ipynb">How to Ingest JSON Data from Public Endpoint</a></li>
 47 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Load%20CSV%20from%20S3/Load%20CSV%20from%20S3.ipynb">How to load CSV files from S3 stage</a></li>
 48 |       </ul>
 49 |     </td>
 50 |   </tr>
 51 | 
 52 |   <tr>
 53 |     <td style="vertical-align: top;">
 54 |       <img src="https://docs.snowflake.com/_images/create-sf-notebooks-tile.png" alt="Image" style="width:100px">
 55 |     </td>
 56 |     <td style="vertical-align: top;">
 57 |       <h4>Data Engineering</h4>
 58 |       <ul>
 59 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Create%20and%20Manage%20Snowflake%20Objects%20like%20a%20Pro/Create%20and%20Manage%20Snowflake%20Objects%20like%20a%20Pro.ipynb">Create and Manage Snowflake Objects Like a Pro</a> <a href="https://www.youtube.com/watch?v=Dj8aAoEOfrw">🎥</a></li>
 60 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Data%20Engineering%20Pipelines%20with%20Snowpark%20Python/Data%20Engineering%20Pipelines%20with%20Snowpark%20Python.ipynb">Data Engineering Pipelines with Snowpark Python</a> <a href="https://www.youtube.com/watch?v=mpstEt0fU8U">🎥</a></li>
 61 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Creating%20Snowflake%20Object%20using%20Python%20API/Creating%20Snowflake%20Object%20using%20Python%20API.ipynb">Creating Snowflake Object using Python API</a></li>
 62 |       </ul>
 63 |     </td>
 64 |   </tr>
 65 |   <tr>
 66 |     <td style="vertical-align: top;">
 67 |       <img src="https://docs.snowflake.com/_images/ml-notebooks-tile.png" alt="Image" style="width:100px">
 68 |     </td>
 69 |     <td style="vertical-align: top;">
 70 |       <h4>Machine Learning</h4>
 71 |       <ul>
 72 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/tree/main/End-to-End%20Machine%20Learning%20with%20Snowpark%20ML">End-to-End Machine Learning with Snowpark ML</a> <a href="https://www.youtube.com/watch?v=LeSGBW0YoLg">🎥</a></li>
 73 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Hyperparameter%20Tuning%20with%20sklearn/Hyperparameter%20Tuning%20with%20sklearn.ipynb">Hyperparameter Tuning with sklearn</a></li>
 74 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Getting%20Started%20with%20Snowflake%20Cortex%20ML-Based%20Functions/Getting%20Started%20with%20Snowflake%20Cortex%20ML-Based%20Functions.ipynb">Getting Started with Snowflake Cortex ML-Based Functions</a></li>
 75 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Feature%20Store%20API%20Overview/Feature%20Store%20API%20Overview.ipynb">Feature Store API Overview</a></li>
 76 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/End-to-end%20ML%20with%20Feature%20Store%20and%20Model%20Registry/End-to-end%20ML%20with%20Feature%20Store%20and%20Model%20Registry.ipynb">End-to-end ML with Feature Store and Model Registry</a></li>
 77 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Manage%20features%20in%20DBT%20with%20Feature%20Store/Manage%20features%20in%20DBT%20with%20Feature%20Store.ipynb">Manage features in DBT with Feature Store</a></li>
 78 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/ML%20Lineage%20Workflows/ML%20Lineage%20Workflows.ipynb">ML Lineage Workflows</a></li>
 79 |       </ul>
 80 |     </td>
 81 |   </tr>
 82 |   <tr>
 83 |     <td style="vertical-align: top;">
 84 |       <img src="https://docs.snowflake.com/_images/develop-sf-notebooks-tile.png" alt="Image" style="width:100px">
 85 |     </td>
 86 |     <td style="vertical-align: top;">
 87 |       <h4>Using Notebooks</h4>
 88 |       <ul>
 89 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Import%20Package%20from%20Stage/Import%20Package%20from%20Stage.ipynb">Import Custom Package from Stage into Notebook</a></li>
 90 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Working%20with%20Files/Working%20with%20Files.ipynb">How to work with Files in Snowflake Notebooks</a></li>
 91 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Navigating%20and%20Browsing%20Files/Navigating%20and%20Browsing%20Files.ipynb">Navigating and Browsing Files in Snowflake Notebooks</a></li>
 92 |         <li><a href="https://github.com/Snowflake-Labs/snowflake-demo-notebooks/blob/main/Access%20External%20Endpoints/Access%20External%20Endpoints.ipynb">Access External Endpoints</a></li>
 93 |       </ul>
 94 |     </td>
 95 |   </tr>
 96 | </table>
 97 | 
 98 | 
 99 | ## Load demo notebooks to Snowflake
100 | 
101 | The notebook files are available for download as `.ipynb` files. To load the demo notebooks into your Snowflake Notebook, follow these steps: 
102 | 
103 | 1. On Github, click into each folder containing the tutorial and the corresponding `.ipynb file`, such as [this](https://github.com/Snowflake-Labs/notebook-demo/blob/main/My%20First%20Notebook%20Project/My%20First%20Notebook%20Project.ipynb). Download the file by clicking on the `Download raw file` from the top right.
104 | 
105 | 2. Go to the Snowflake web interface, [Snowsight](https://app.snowflake.com), on your browser.
106 | 
107 | 3. Navigate to `Project` > `Notebooks` from the left menu bar. 
108 | 
109 | 3. Import the .ipynb file you've download into your Snowflake Notebook by using the `Import from .ipynb` button located on the top right of the Notebooks page.
110 | 
111 | 4. Select the file from your local directory and press `Open`.
112 | 
113 | 5. A `Create Notebook` dialog will show up. Select a database, schema, and warehouse for the Notebook and click `Create`.
114 | 
115 | ## Resources
116 | 
117 | Here are some resources to learn more about Snowflake Notebooks:
118 | 
119 | * [Documentation](https://docs.snowflake.com/LIMITEDACCESS/snowsight-notebooks/ui-snowsight-notebooks-about)
120 | * [YouTube Playlist](https://www.youtube.com/playlist?list=PLavJpcg8cl1Efw8x_fBKmfA2AMwjUaeBI)
121 | * [Solution Center](https://developers.snowflake.com/solutions/?_sft_technology=notebooks)
122 | 
123 | ## License
124 | 
125 | All code and notebooks included in this repo is available with an Apache 2.0 license.
126 | 
127 | ## Other links
128 | 
129 | * Interested in developing and running interactive Streamlit apps in Snowflake? Check out the [Streamlit in Snowflake Demo Repo](https://github.com/Snowflake-Labs/snowflake-demo-streamlit/) to learn more!
130 | 


--------------------------------------------------------------------------------
/Reference cells and variables/Reference cells and variables.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "id": "d40f15d5-0f06-4c81-b4e6-a760771d44c2",
  6 |       "metadata": {
  7 |         "collapsed": false,
  8 |         "name": "cell1"
  9 |       },
 10 |       "source": [
 11 |         "# Reference cells and variables in Snowflake Notebooks"
 12 |       ]
 13 |     },
 14 |     {
 15 |       "cell_type": "markdown",
 16 |       "id": "884f6e12-725b-4ae2-b9c9-5eaa4f4f964f",
 17 |       "metadata": {
 18 |         "collapsed": false,
 19 |         "name": "cell2"
 20 |       },
 21 |       "source": [
 22 |         "You can reference the results of previous cells in a cell in your notebook. This allows you to seamless switch between working in Python and SQL and reuse the results and variables.\n",
 23 |         "\n"
 24 |       ]
 25 |     },
 26 |     {
 27 |       "cell_type": "markdown",
 28 |       "id": "1ad40569-c979-461e-a2a0-98449785ba2f",
 29 |       "metadata": {
 30 |         "collapsed": false,
 31 |         "name": "cell3"
 32 |       },
 33 |       "source": [
 34 |         "## Referencing SQL output in Python cells\n",
 35 |         "\n",
 36 |         "We can access the SQL results directly in Python and convert the results to a Snowpark or pandas dataframe.\n",
 37 |         "\n",
 38 |         "The cell reference is based on the cell name. Note that if you change the cell name, you will also need to update the subsequent cell reference accordingly.\n",
 39 |         "\n",
 40 |         "\n",
 41 |         "### Example 1: Access SQL results as Snowpark or Pandas Dataframes"
 42 |       ]
 43 |     },
 44 |     {
 45 |       "cell_type": "code",
 46 |       "execution_count": null,
 47 |       "id": "3775908f-ca36-4846-8f38-5adca39217f2",
 48 |       "metadata": {
 49 |         "codeCollapsed": false,
 50 |         "language": "sql",
 51 |         "name": "cell4"
 52 |       },
 53 |       "outputs": [],
 54 |       "source": [
 55 |         "-- assign Query Tag to Session. This helps with performance monitoring and troubleshooting\n",
 56 |         "ALTER SESSION SET query_tag = '{\"origin\":\"sf_sit-is\",\"name\":\"notebook_demo_pack\",\"version\":{\"major\":1, \"minor\":0},\"attributes\":{\"is_quickstart\":0, \"source\":\"sql\", \"vignette\":\"reference_cells\"}}';\n",
 57 |         "\n",
 58 |         "SELECT 'FRIDAY' as SNOWDAY, 0.2 as CHANCE_OF_SNOW\n",
 59 |         "UNION ALL\n",
 60 |         "SELECT 'SATURDAY',0.5\n",
 61 |         "UNION ALL \n",
 62 |         "SELECT 'SUNDAY', 0.9;"
 63 |       ]
 64 |     },
 65 |     {
 66 |       "cell_type": "code",
 67 |       "execution_count": null,
 68 |       "id": "8d50cbf4-0c8d-4950-86cb-114990437ac9",
 69 |       "metadata": {
 70 |         "codeCollapsed": false,
 71 |         "language": "python",
 72 |         "name": "cell5"
 73 |       },
 74 |       "outputs": [],
 75 |       "source": [
 76 |         "snowpark_df = cell4.to_df()"
 77 |       ]
 78 |     },
 79 |     {
 80 |       "cell_type": "code",
 81 |       "execution_count": null,
 82 |       "id": "c695373e-ac74-4b62-a1f1-08206cbd5c81",
 83 |       "metadata": {
 84 |         "codeCollapsed": false,
 85 |         "language": "python",
 86 |         "name": "cell6"
 87 |       },
 88 |       "outputs": [],
 89 |       "source": [
 90 |         "pandas_df = cell4.to_pandas()"
 91 |       ]
 92 |     },
 93 |     {
 94 |       "cell_type": "markdown",
 95 |       "id": "585a54f7-5dd4-412a-9c42-89d5c5d5978c",
 96 |       "metadata": {
 97 |         "collapsed": false,
 98 |         "name": "cell7"
 99 |       },
100 |       "source": [
101 |         "## Referencing variables in SQL code\n",
102 |         "\n",
103 |         "You can use the Jinja syntax `{{..}}` to reference Python variables within your SQL queries as follows.\n",
104 |         "\n",
105 |         "### Example 2: Using Python variable value in a SQL query\n"
106 |       ]
107 |     },
108 |     {
109 |       "cell_type": "code",
110 |       "execution_count": null,
111 |       "id": "e73b633a-57d4-436c-baae-960c92c9cef6",
112 |       "metadata": {
113 |         "codeCollapsed": false,
114 |         "collapsed": false,
115 |         "language": "sql",
116 |         "name": "cell8"
117 |       },
118 |       "outputs": [],
119 |       "source": [
120 |         "-- Create a dataset of countries\n",
121 |         "CREATE OR REPLACE TABLE countries (\n",
122 |         "    country_name VARCHAR(100)\n",
123 |         ");\n",
124 |         "\n",
125 |         "INSERT INTO countries (country_name) VALUES\n",
126 |         "    ('USA'),('Canada'),('United Kingdom'),('Germany'),('France'),\n",
127 |         "    ('Australia'),('Japan'),('China'),('India'),('Brazil');"
128 |       ]
129 |     },
130 |     {
131 |       "cell_type": "code",
132 |       "execution_count": null,
133 |       "id": "e7a6f119-4f67-4ef5-a35f-117a7f502475",
134 |       "metadata": {
135 |         "codeCollapsed": false,
136 |         "language": "python",
137 |         "name": "cell9"
138 |       },
139 |       "outputs": [],
140 |       "source": [
141 |         "c = \"'USA'\""
142 |       ]
143 |     },
144 |     {
145 |       "cell_type": "code",
146 |       "execution_count": null,
147 |       "id": "60a59077-a4b1-4699-81a5-645addd8ad6d",
148 |       "metadata": {
149 |         "codeCollapsed": false,
150 |         "language": "sql",
151 |         "name": "cell10"
152 |       },
153 |       "outputs": [],
154 |       "source": [
155 |         "-- Filter to record where country is USA\n",
156 |         "SELECT * FROM countries WHERE COUNTRY_NAME = {{c}}"
157 |       ]
158 |     },
159 |     {
160 |       "cell_type": "markdown",
161 |       "id": "decf8b5e-e804-439d-a186-3a329da12563",
162 |       "metadata": {
163 |         "name": "cell11"
164 |       },
165 |       "source": [
166 |         "### Example 3: Using Python dataframe in a SQL query"
167 |       ]
168 |     },
169 |     {
170 |       "cell_type": "code",
171 |       "execution_count": null,
172 |       "id": "9b49d972-3966-4fa6-9457-f028b06484a3",
173 |       "metadata": {
174 |         "codeCollapsed": false,
175 |         "language": "sql",
176 |         "name": "cell12"
177 |       },
178 |       "outputs": [],
179 |       "source": [
180 |         "-- Create dataset with columns PRODUCT_ID, RATING, PRICE\n",
181 |         "SELECT CONCAT('SNOW-',UNIFORM(1000,9999, RANDOM())) AS PRODUCT_ID, \n",
182 |         "       ABS(NORMAL(5, 3, RANDOM())) AS RATING, \n",
183 |         "       ABS(NORMAL(750, 200::FLOAT, RANDOM())) AS PRICE\n",
184 |         "FROM TABLE(GENERATOR(ROWCOUNT => 100));"
185 |       ]
186 |     },
187 |     {
188 |       "cell_type": "code",
189 |       "execution_count": null,
190 |       "id": "b7040f85-0ab8-4bdb-a36e-33599b79ea54",
191 |       "metadata": {
192 |         "codeCollapsed": false,
193 |         "language": "sql",
194 |         "name": "cell13"
195 |       },
196 |       "outputs": [],
197 |       "source": [
198 |         "-- Filter to products where price is greater than 500\n",
199 |         "SELECT * FROM {{cell12}} where PRICE > 500"
200 |       ]
201 |     }
202 |   ],
203 |   "metadata": {
204 |     "kernelspec": {
205 |       "display_name": "Streamlit Notebook",
206 |       "name": "streamlit"
207 |     }
208 |   },
209 |   "nbformat": 4,
210 |   "nbformat_minor": 5
211 | }
212 | 


--------------------------------------------------------------------------------
/Role_Based_Access_Auditing_with_Streamlit/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - altair=*
6 |   - pandas=*
7 | 


--------------------------------------------------------------------------------
/Scheduled_Query_Execution_Report/Scheduled_Query_Execution_Report.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   }
  7 |  },
  8 |  "nbformat_minor": 5,
  9 |  "nbformat": 4,
 10 |  "cells": [
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "cc4fb15e-f9db-44eb-9f60-1b9589b755cb",
 14 |    "metadata": {
 15 |     "name": "md_title",
 16 |     "collapsed": false,
 17 |     "resultHeight": 285
 18 |    },
 19 |    "source": "# Scheduled Query Execution Report\n\nA notebook to report on failed or long-running scheduled queries, providing insights into reliability issues.\n\nHere's a breakdown of the steps:\n1. Retrieve Data\n2. Convert Table to a DataFrame\n3. Create an Interactive Slider Widget & Data Preparation\n4. Create a Heatmap for Visualizing Scheduled Query Execution"
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "id": "42a7b143-0779-4706-affc-c214213f55c5",
 24 |    "metadata": {
 25 |     "name": "md_retrieve_data",
 26 |     "collapsed": false,
 27 |     "resultHeight": 170
 28 |    },
 29 |    "source": "## 1. Retrieve Data\n\nFirstly, we'll write an SQL query to retrieve the execution history for scheduled queries, along with their status, timing metrics, and execution status. \n\nWe're obtaining this from the `snowflake.account_usage.task_history` table."
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "id": "39f7713b-dd7a-41a2-872e-cc534c6dc4f6",
 34 |    "metadata": {
 35 |     "language": "sql",
 36 |     "name": "sql_data",
 37 |     "resultHeight": 439,
 38 |     "collapsed": false,
 39 |     "codeCollapsed": false
 40 |    },
 41 |    "outputs": [],
 42 |    "source": "SELECT \n    name,\n    database_name,\n    query_id,\n    query_text,\n    schema_name,\n    scheduled_time,\n    query_start_time,\n    completed_time,\n    DATEDIFF('second', query_start_time, completed_time) as execution_time_seconds,\n    state,\n    error_code,\n    error_message,\nFROM snowflake.account_usage.task_history\nWHERE scheduled_time >= DATEADD(days, -1, CURRENT_TIMESTAMP())\nORDER BY scheduled_time DESC;",
 43 |    "execution_count": null
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "id": "870b69dd-aae0-4dd3-93f7-7adce1268159",
 48 |    "metadata": {
 49 |     "name": "md_dataframe",
 50 |     "collapsed": false,
 51 |     "resultHeight": 102
 52 |    },
 53 |    "source": "## 2. Convert Table to a DataFrame\n\nNext, we'll convert the table to a Pandas DataFrame."
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "id": "4a5559a8-ef3a-40c3-a9d5-54602403adab",
 58 |    "metadata": {
 59 |     "language": "python",
 60 |     "name": "py_dataframe",
 61 |     "codeCollapsed": false,
 62 |     "resultHeight": 439,
 63 |     "collapsed": false
 64 |    },
 65 |    "outputs": [],
 66 |    "source": "sql_data.to_pandas()",
 67 |    "execution_count": null
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "id": "59b04137-ca95-4fb8-b216-133272349a78",
 72 |    "metadata": {
 73 |     "name": "md_data_preparation",
 74 |     "collapsed": false,
 75 |     "resultHeight": 195
 76 |    },
 77 |    "source": "## 3. Create an Interactive Slider Widget & Data Preparation\n\nHere, we'll create an interactive slider for dynamically selecting the number of days to analyze. This would then trigger the filtering of the DataFrame to the specified number of days.\n\nNext, we'll reshape the data by calculating the frequency count by hour and task name, which will subsequently be used for creating the heatmap in the next step."
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "id": "ba8fa564-d7d5-4d1c-9f6b-400f9c05ecae",
 82 |    "metadata": {
 83 |     "language": "python",
 84 |     "name": "py_data_preparation",
 85 |     "codeCollapsed": false,
 86 |     "resultHeight": 216
 87 |    },
 88 |    "outputs": [],
 89 |    "source": "import pandas as pd\nimport streamlit as st\nimport altair as alt\n\n# Create date filter slider\nst.subheader(\"Select time duration\")\ndays = st.slider('Select number of days to analyze', \n                 min_value=10, \n                 max_value=90, \n                 value=30, \n                 step=10)\n    \n# Filter data according to day duration\nlatest_date = pd.to_datetime(df['SCHEDULED_TIME']).max()\ncutoff_date = latest_date - pd.Timedelta(days=days)\nfiltered_df = df[pd.to_datetime(df['SCHEDULED_TIME']) > cutoff_date].copy()\n    \n# Prepare data for heatmap\nfiltered_df['HOUR_OF_DAY'] = pd.to_datetime(filtered_df['SCHEDULED_TIME']).dt.hour\nfiltered_df['HOUR_DISPLAY'] = filtered_df['HOUR_OF_DAY'].apply(lambda x: f\"{x:02d}:00\")\n    \n# Calculate frequency count by hour and task name\nagg_df = filtered_df.groupby(['NAME', 'HOUR_DISPLAY', 'STATE']).size().reset_index(name='COUNT')\n\nst.warning(f\"Analyzing data for the last {days} days!\")",
 90 |    "execution_count": null
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "id": "35f31e4e-95d5-4ee5-a146-b9e93dd9d570",
 95 |    "metadata": {
 96 |     "name": "md_heatmap",
 97 |     "collapsed": false,
 98 |     "resultHeight": 128
 99 |    },
100 |    "source": "## 4. Create a Heatmap for Visualizing Scheduled Query Execution\n\nFinally, a heatmap and summary statistics table are generated that will allow us to gain insights on the task name and state (e.g. `SUCCEEDED`, `FAILED`, `SKIPPED`)."
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "id": "e3049001-f3ba-4b66-ba54-c9f02f551992",
105 |    "metadata": {
106 |     "language": "python",
107 |     "name": "py_heatmap",
108 |     "codeCollapsed": false,
109 |     "resultHeight": 791
110 |    },
111 |    "outputs": [],
112 |    "source": "# Create heatmap\nchart = alt.Chart(agg_df).mark_rect(\n    stroke='black',\n    strokeWidth=1\n).encode(\n    x=alt.X('HOUR_DISPLAY:O', \n            title='Hour of Day',\n            axis=alt.Axis(\n                labels=True,\n                tickMinStep=1,\n                labelOverlap=False\n            )),\n    y=alt.Y('NAME:N', \n            title='',\n            axis=alt.Axis(\n                labels=True,\n                labelLimit=200,\n                tickMinStep=1,\n                labelOverlap=False,\n                labelPadding=10\n            )),\n    color=alt.Color('COUNT:Q', \n                    title='Number of Executions'),\n    row=alt.Row('STATE:N', \n                title='Task State',\n                header=alt.Header(labelAlign='left')),\n    tooltip=[\n        alt.Tooltip('NAME', title='Task Name'),\n        alt.Tooltip('HOUR_DISPLAY', title='Hour'),\n        alt.Tooltip('STATE', title='State'),\n        alt.Tooltip('COUNT', title='Number of Executions')\n    ]\n).properties(\n    height=100,\n    width=450\n).configure_view(\n    stroke=None,\n    continuousWidth=300\n).configure_axis(\n    labelFontSize=10\n)\n\n# Display the chart\nst.subheader(f'Task Execution Frequency by State ({days} Days)')\nst.altair_chart(chart)\n\n# Optional: Display summary statistics\nst.subheader(\"Summary Statistics\")\nsummary_df = filtered_df.groupby('NAME').agg({\n    'STATE': lambda x: pd.Series(x).value_counts().to_dict()\n}).reset_index()\n\n# Format the state counts as separate columns\nstate_counts = pd.json_normalize(summary_df['STATE']).fillna(0).astype(int)\nsummary_df = pd.concat([summary_df['NAME'], state_counts], axis=1)\n\nst.dataframe(summary_df)",
113 |    "execution_count": null
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "id": "eb3e9b67-6a6e-4218-b17a-3f8564a04d18",
118 |    "metadata": {
119 |     "name": "md_resources",
120 |     "collapsed": false,
121 |     "resultHeight": 217
122 |    },
123 |    "source": "## Want to learn more?\n\n- Snowflake Docs on [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage) and [TASK_HISTORY view](https://docs.snowflake.com/en/sql-reference/account-usage/task_history)\n- More about [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake)\n- For more inspiration on how to use Streamlit widgets in Notebooks, check out [Streamlit Docs](https://docs.streamlit.io/) and this list of what is currently supported inside [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake#label-notebooks-streamlit-support)\n- Check out the [Altair User Guide](https://altair-viz.github.io/user_guide/data.html) for further information on customizing Altair charts"
124 |   }
125 |  ]
126 | }


--------------------------------------------------------------------------------
/Scheduled_Query_Execution_Report/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - altair=*
6 |   - pandas=*
7 | 


--------------------------------------------------------------------------------
/Schema_Change_Tracker/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - altair=*
6 |   - pandas=*
7 | 


--------------------------------------------------------------------------------
/Snowflake_Notebooks_Summit_2024_Demo/aileen_summit_notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   }
  7 |  },
  8 |  "nbformat_minor": 5,
  9 |  "nbformat": 4,
 10 |  "cells": [
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "30fcf7ae-e7f3-4a88-8afc-6568831d1c2a",
 14 |    "metadata": {
 15 |     "name": "Title",
 16 |     "collapsed": false,
 17 |     "resultHeight": 333
 18 |    },
 19 |    "source": "# :date: Send :orange[Daily Digest] of Fresh Foods Customer Reviews to :orange[Slack]  \n\n## Features\n:gray[In this demo, we'll cover the following features:]\n- :gray[Calling Snowflake Cortex functions]\n- :gray[Integrating with external endpoints, i.e. Slack APIs]\n- :gray[Scheduling the notebook to run daily]\n- :gray[Keeping version control with Git]\n- :green[**BONUS**] :gray[- Run one notebook from another :knot: :knot: :knot:]"
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "id": "754480e1-8983-4b6c-8ba7-270e9dc5994f",
 24 |    "metadata": {
 25 |     "name": "Step_1_Get_data",
 26 |     "collapsed": false,
 27 |     "resultHeight": 60
 28 |    },
 29 |    "source": "## Step :one: - Get the customer reviews data :speech_balloon:"
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "id": "465f4adb-3571-483b-90da-cd3e576b9435",
 34 |    "metadata": {
 35 |     "language": "sql",
 36 |     "name": "Get_data",
 37 |     "collapsed": false,
 38 |     "codeCollapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": "USE SCHEMA PUBLIC.PUBLIC;\nSELECT * FROM FRESH_FOODS_REVIEWS;",
 42 |    "execution_count": null
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "id": "89f98a73-ef13-4a4e-a8c6-7ed8bf620930",
 47 |    "metadata": {
 48 |     "language": "python",
 49 |     "name": "Set_review_date",
 50 |     "collapsed": false
 51 |    },
 52 |    "outputs": [],
 53 |    "source": "from datetime import date\nimport streamlit as st\n\nreview_date = date(2024, 6, 4) # change to `date.today()` to always grab the current date \nst.write(review_date)",
 54 |    "execution_count": null
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "id": "d3530f1e-55dd-43d9-9e09-0c0797116102",
 59 |    "metadata": {
 60 |     "name": "Step_2_Cortex",
 61 |     "collapsed": false,
 62 |     "resultHeight": 377
 63 |    },
 64 |    "source": "## Step :two: - Ask Snowflake Cortex to generate the daily digest :mega:\nSnowflake Cortex is a fully-managed service that enables access to industry-leading large language models (LLMs).\n- COMPLETE: Given a prompt, returns a response that completes the prompt. This function accepts either a single prompt or a conversation with multiple prompts and responses.\n\n- EMBED_TEXT_768: Given a piece of text, returns a vector embedding that represents that text.\n\n- EXTRACT_ANSWER: Given a question and unstructured data, returns the answer to the question if it can be found in the data.\n\n- SENTIMENT: Returns a sentiment score, from -1 to 1, representing the detected positive or negative sentiment of the given text.\n\n- SUMMARIZE: Returns a summary of the given text.\n\n- TRANSLATE: Translates given text from any supported language to any other."
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "id": "58a6bf2f-34df-452d-946f-ba416b07118d",
 69 |    "metadata": {
 70 |     "language": "sql",
 71 |     "name": "Cortex_SUMMARIZE",
 72 |     "collapsed": false
 73 |    },
 74 |    "outputs": [],
 75 |    "source": "WITH CUSTOMER_REVIEWS AS(\n    SELECT LISTAGG(DISTINCT REVIEW) AS REVIEWS \n    FROM {{Get_data}}  \n    WHERE to_date(DATE) = '{{review_date}}' )\n\nSELECT SNOWFLAKE.CORTEX.SUMMARIZE(REVIEWS) FROM CUSTOMER_REVIEWS;",
 76 |    "execution_count": null
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "id": "eea93bfd-ed59-4478-9931-b145261dab5b",
 81 |    "metadata": {
 82 |     "language": "python",
 83 |     "name": "Summary",
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [],
 87 |    "source": "summary_text = Cortex_SUMMARIZE.to_pandas().iloc[0]['SNOWFLAKE.CORTEX.SUMMARIZE(REVIEWS)']\nst.write(summary_text)",
 88 |    "execution_count": null
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "id": "4849cc86-d8b4-4b7c-a4b2-f73174798593",
 93 |    "metadata": {
 94 |     "language": "sql",
 95 |     "name": "Daily_avg_score",
 96 |     "collapsed": false
 97 |    },
 98 |    "outputs": [],
 99 |    "source": "SELECT AVG(SNOWFLAKE.CORTEX.SENTIMENT(REVIEW)) AS AVERAGE_RATING FROM FRESH_FOODS_REVIEWS WHERE DATE = '{{review_date}}';",
100 |    "execution_count": null
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "id": "c61883bc-ff05-4627-9558-681383d477f6",
105 |    "metadata": {
106 |     "name": "Step_3_Slack",
107 |     "collapsed": false,
108 |     "resultHeight": 60
109 |    },
110 |    "source": "## Step :three: - Send the summary and sentiment to Slack    :tada:\n"
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "id": "f69f5fcf-f470-48a6-a688-259440c95741",
115 |    "metadata": {
116 |     "language": "python",
117 |     "name": "Send_to_Slack",
118 |     "collapsed": false,
119 |     "codeCollapsed": false
120 |    },
121 |    "outputs": [],
122 |    "source": "import requests\nimport numpy as np\n\n\nheaders = {\n  'Content-Type': 'Content-type: application/json',\n}\n\n# Extract Daily_avg_score contents\nsentiment_score = str(np.round(Daily_avg_score.to_pandas().values[0][0], 2))\n\n\ndata = {\n\t\"blocks\": [\n\t\t{\n\t\t\t\"type\": \"section\",\n\t\t\t\"text\": {\n\t\t\t\t\"type\": \"mrkdwn\",\n\t\t\t\t\"text\": f\":mega: *Daily summary | Sentiment score: {sentiment_score} | {review_date}*\"\n\t\t\t}\n\t\t},\n\t\t{\n\t\t\t\"type\": \"section\",\n\t\t\t\"text\": {\n\t\t\t\t\"type\": \"mrkdwn\",\n\t\t\t\t\"text\": summary_text\n\t\t\t}\n\t\t},\n\t\t{\n\t\t\t\"type\": \"divider\"\n\t\t},\n\t\t{\n\t\t\t\"type\": \"context\",\n\t\t\t\"elements\": [\n\t\t\t\t{\n\t\t\t\t\t\"type\": \"mrkdwn\",\n\t\t\t\t\t\"text\": \"<https://go/notebooks-feedback|See all feedback>\"\n\t\t\t\t}\n\t\t\t]\n\t\t}\n\t]\n}\n\nresponse = requests.post(\n    'https://hooks.slack.com/services/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', \n    headers=headers, \n    json=data)\n\nif response.status_code == 200:\n    st.write('✅ Daily summary sent to Slack')",
123 |    "execution_count": null
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "id": "89b1c2bd-043b-4313-a20c-91a927e4dbd6",
128 |    "metadata": {
129 |     "name": "Step_4_Schedule",
130 |     "collapsed": false,
131 |     "resultHeight": 60
132 |    },
133 |    "source": "## Step :four: - Schedule the notebook to send daily updates automatically"
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "id": "8780c297-a747-44f9-8f94-ae9a3084814d",
138 |    "metadata": {
139 |     "name": "Git_integration",
140 |     "collapsed": false,
141 |     "resultHeight": 538
142 |    },
143 |    "source": "## Let's keep track of code changes!\n- :rainbow[GitHub], :orange[GitLab], :blue[BitBucket], :violet[Azure DevOps]\n\n![](https://pngimg.com/uploads/github/github_PNG23.png)"
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "id": "a1089358-dc72-4c1b-bb20-29d86e6ecdd2",
148 |    "metadata": {
149 |     "name": "Bonus_Chain_notebooks",
150 |     "collapsed": false,
151 |     "resultHeight": 60
152 |    },
153 |    "source": "## Bonus - :chains: Chain multiple notebooks together "
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "id": "440692da-0080-4352-87ee-37e94d24027f",
158 |    "metadata": {
159 |     "language": "sql",
160 |     "name": "Run_2nd_notebook",
161 |     "collapsed": false,
162 |     "codeCollapsed": false
163 |    },
164 |    "outputs": [],
165 |    "source": "EXECUTE NOTEBOOK PUBLIC.PUBLIC.AILEEN_SUMMIT_DEEP_ANALYSIS_2()",
166 |    "execution_count": null
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "id": "97229677-6288-414c-906f-9e74ee1d31de",
171 |    "metadata": {
172 |     "name": "cell1",
173 |     "collapsed": false,
174 |     "resultHeight": 176
175 |    },
176 |    "source": "## You can also:\n- ### Wrap EXECUTE NOTEBOOK in business logic and call it from a Python cell :bulb:\n- ### Integrate with other orchestration tools  :keyboard:"
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "id": "3157f79a-f841-4be8-9a50-de312a474723",
181 |    "metadata": {
182 |     "language": "python",
183 |     "name": "Run_on_condition",
184 |     "collapsed": false,
185 |     "codeCollapsed": false
186 |    },
187 |    "outputs": [],
188 |    "source": "from snowflake.snowpark.context import get_active_session\nsession = get_active_session()\n\nsentiment_score_flt = np.round(Daily_avg_score.to_pandas().values[0][0], 2)\n \nif sentiment_score_flt < 0.9:\n    st.markdown(\"\"\"\n    :rotating_light: Sentiment is below threshold! \n    \n    Kick off the 2nd notebook Deep Analysis.\"\"\")\n    session.sql(\"EXECUTE NOTEBOOK PUBLIC.PUBLIC.AILEEN_SUMMIT_DEEP_ANALYSIS_2()\").collect()\nelse:\n    st.write(\":sunflower: Sentiment is good. Do nothing.\")",
189 |    "execution_count": null
190 |   }
191 |  ]
192 | }
193 | 


--------------------------------------------------------------------------------
/Snowflake_Semantic_View/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies: []
5 | 


--------------------------------------------------------------------------------
/Snowflake_Trail_Alerts_Notifications/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - snowflake=*
6 |   - snowflake-ml-python=*
7 |   - snowflake-snowpark-python=*
8 | 


--------------------------------------------------------------------------------
/Snowflake_Trail_Alerts_Notifications/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Snowflake_Trail_Alerts_Notifications/screenshot.png


--------------------------------------------------------------------------------
/Streamlit_Zero_To_Hero_Machine_Learning_App/environment.yml:
--------------------------------------------------------------------------------
 1 | name: app_environment
 2 | channels:
 3 |     - snowflake
 4 | dependencies:
 5 |     - streamlit=1.35.0
 6 |     - snowflake-snowpark-python
 7 |     - scikit-learn=1.3.0
 8 |     - pandas=2.0.3
 9 |     - numpy=1.24.3
10 | 


--------------------------------------------------------------------------------
/Telco Churn Data Analysis/Telco Churn Data Analysis.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "cell_type": "markdown",
 5 |       "id": "de537cfd",
 6 |       "metadata": {},
 7 |       "source": [
 8 |         "## This repo has been moved\n",
 9 |         "\n",
10 |         "Visit [this Github repo](https://github.com/Snowflake-Labs/sfguide-data-analysis-churn-prediction-in-snowflake-notebooks/) to see the full quickstart source code."
11 |       ]
12 |     }
13 |   ],
14 |   "metadata": {
15 |     "kernelspec": {
16 |       "display_name": "Python 3 (ipykernel)",
17 |       "language": "python",
18 |       "name": "python3"
19 |     },
20 |     "language_info": {
21 |       "codemirror_mode": {
22 |         "name": "ipython",
23 |         "version": 3
24 |       },
25 |       "file_extension": ".py",
26 |       "mimetype": "text/x-python",
27 |       "name": "python",
28 |       "nbconvert_exporter": "python",
29 |       "pygments_lexer": "ipython3",
30 |       "version": "3.11.5"
31 |     }
32 |   },
33 |   "nbformat": 4,
34 |   "nbformat_minor": 5
35 | }
36 | 


--------------------------------------------------------------------------------
/Telco Churn Data Analysis/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - imbalanced-learn=0.11.0
6 |   - snowflake-ml-python=1.3.1
7 | 


--------------------------------------------------------------------------------
/Visual Data Stories with Snowflake Notebooks/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - matplotlib=3.7.2
6 |   - plotly=5.19.0
7 | 


--------------------------------------------------------------------------------
/Visual Data Stories with Snowflake Notebooks/snowflake-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Snowflake-Labs/snowflake-demo-notebooks/982169ee826e4eb851e964275f7afe6539727574/Visual Data Stories with Snowflake Notebooks/snowflake-logo.png


--------------------------------------------------------------------------------
/Warehouse_Utilization_with_Streamlit/Warehouse_Utilization_with_Streamlit.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "Streamlit Notebook",
  5 |    "name": "streamlit"
  6 |   }
  7 |  },
  8 |  "nbformat_minor": 5,
  9 |  "nbformat": 4,
 10 |  "cells": [
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "cc4fb15e-f9db-44eb-9f60-1b9589b755cb",
 14 |    "metadata": {
 15 |     "name": "md_title",
 16 |     "collapsed": false
 17 |    },
 18 |    "source": "# Analyze Warehouse Utilization in Snowflake Notebooks with Streamlit\n\nA notebook that generates a heatmap of warehouse usage patterns to identify peak hours that can help with cost optimization.\n\nHere's what we're implementing to investigate the tables:\n1. Retrieve warehouse utilization data\n2. Convert table to a DataFrame\n3. Create an interactive slider widget\n4. Create a Heatmap for visualizing warehouse usage patterns"
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "id": "42a7b143-0779-4706-affc-c214213f55c5",
 23 |    "metadata": {
 24 |     "name": "md_retrieve_data",
 25 |     "collapsed": false
 26 |    },
 27 |    "source": "## 1. Retrieve warehouse utilization data\n\nFirstly, we'll write a SQL query to retrieve warehouse utilization data."
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "id": "e17f14a5-ea50-4a1d-bc15-c64a6447d0a8",
 32 |    "metadata": {
 33 |     "language": "sql",
 34 |     "name": "sql_warehouse_data",
 35 |     "codeCollapsed": false,
 36 |     "collapsed": false
 37 |    },
 38 |    "outputs": [],
 39 |    "source": "SELECT \n    DATE(start_time) AS usage_date,\n    HOUR(start_time) AS hour_of_day,\n    warehouse_name,\n    avg_running,\n    avg_queued_load,\n    start_time,\n    end_time\nFROM snowflake.account_usage.warehouse_load_history\nWHERE start_time >= DATEADD(month, -1, CURRENT_TIMESTAMP())\nORDER BY warehouse_name, start_time;",
 40 |    "execution_count": null
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "id": "b2ef4485-566e-4b11-bb5a-8085c9bc0c97",
 45 |    "metadata": {
 46 |     "name": "md_dataframe",
 47 |     "collapsed": false
 48 |    },
 49 |    "source": "## 2. Convert table to a DataFrame\n\nNext, we'll convert the table to a Pandas DataFrame."
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "id": "014ceccb-9447-43c9-ad8f-a91a80722de1",
 54 |    "metadata": {
 55 |     "language": "python",
 56 |     "name": "py_dataframe",
 57 |     "collapsed": false,
 58 |     "codeCollapsed": false
 59 |    },
 60 |    "outputs": [],
 61 |    "source": "sql_warehouse_data.to_pandas()",
 62 |    "execution_count": null
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "id": "d4027f90-ae2a-41e7-8a09-5c088b3ab3bf",
 67 |    "metadata": {
 68 |     "name": "md_",
 69 |     "collapsed": false
 70 |    },
 71 |    "source": "## 3. Create an Interactive slider widget\n\nLet's create an interactive slider using Streamlit. This would allow users to select the number of days to analyze, which would filter the DataFrame. \n\nFinally, we'll calculate the total warehouse load (`TOTAL_LOAD`) and format the hour display (`HOUR_DISPLAY`) for each record."
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "id": "137f2fc5-c5df-4dd4-b223-0e0690b6f8a6",
 76 |    "metadata": {
 77 |     "language": "python",
 78 |     "name": "py_data_preparation",
 79 |     "codeCollapsed": false,
 80 |     "collapsed": false
 81 |    },
 82 |    "outputs": [],
 83 |    "source": "import pandas as pd\nimport streamlit as st\n\n# Get data\ndf = py_dataframe.copy()\n\n# Create date filter slider\ndays = st.slider('Select number of days to analyze', \n                 min_value=10, \n                 max_value=90, \n                 value=30, \n                 step=10)\n\n# Filter data based on selected days and create a copy\nlatest_date = pd.to_datetime(df['USAGE_DATE']).max()\ncutoff_date = latest_date - pd.Timedelta(days=days)\nfiltered_df = df[pd.to_datetime(df['USAGE_DATE']) > cutoff_date].copy()\n\n# Prepare data and create heatmap\n#filtered_df.loc[:, 'TOTAL_LOAD'] = filtered_df['AVG_RUNNING'] + filtered_df['AVG_QUEUED_LOAD']\n#filtered_df.loc[:, 'HOUR_DISPLAY'] = filtered_df['HOUR_OF_DAY'].apply(lambda x: f\"{x:02d}:00\")\nfiltered_df['TOTAL_LOAD'] = filtered_df['AVG_RUNNING'] + filtered_df['AVG_QUEUED_LOAD']\nfiltered_df['HOUR_DISPLAY'] = filtered_df['HOUR_OF_DAY'].apply(lambda x: f\"{x:02d}:00\")\n\nst.warning(f\"You've selected {days} days to analyze!\")\nfiltered_df",
 84 |    "execution_count": null
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "id": "84929a0b-de27-4655-93dc-fd15bac9f3e5",
 89 |    "metadata": {
 90 |     "name": "md_heatmap",
 91 |     "collapsed": false
 92 |    },
 93 |    "source": "## 4. Create a Heatmap for visualizing warehouse usage patterns\n\nFinally, we're create a heatmap using Altair. The heatmap shows the warehouse usage pattern across different hours of the day. Color intensity represents the total load and interactive tooltips showing detailed metrics for each cell."
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "id": "f84a45e7-288f-400c-8a99-badb37a13707",
 98 |    "metadata": {
 99 |     "language": "python",
100 |     "name": "py_heatmap",
101 |     "codeCollapsed": false,
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": "import altair as alt\nimport streamlit as st\n\nchart = alt.Chart(filtered_df).mark_rect(\n    stroke='black',\n    strokeWidth=1\n).encode(\n    x=alt.X('HOUR_DISPLAY:O', \n            title='Hour of Day',\n            axis=alt.Axis(\n                labels=True,\n                tickMinStep=1,\n                labelOverlap=False\n            )),\n    y=alt.Y('WAREHOUSE_NAME:N', \n            title='Warehouse Name',\n            axis=alt.Axis(\n                labels=True,\n                labelLimit=200,\n                tickMinStep=1,\n                labelOverlap=False,\n                labelPadding=10\n            )),\n    color=alt.Color('TOTAL_LOAD:Q', title='Total Load'),\n    tooltip=['WAREHOUSE_NAME', 'HOUR_DISPLAY', 'TOTAL_LOAD', \n            'AVG_RUNNING', 'AVG_QUEUED_LOAD']\n).properties(\n    #width=700,\n    #height=450,\n    title=f'Warehouse Usage Patterns ({days} Days)'\n).configure_view(\n    stroke=None,\n    continuousHeight=400\n).configure_axis(\n    labelFontSize=10\n)\n\n# Display the chart\nst.altair_chart(chart, use_container_width=True)",
106 |    "execution_count": null
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "id": "f6e54924-57e2-4dfb-8bf1-bad9b7fb635d",
111 |    "metadata": {
112 |     "name": "md_resources",
113 |     "collapsed": false
114 |    },
115 |    "source": "## Want to learn more?\n\n- Snowflake Docs on [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage) and [WAREHOUSE_LOAD_HISTORY view](https://docs.snowflake.com/en/sql-reference/account-usage/warehouse_load_history)\n- More about [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake)\n- For more inspiration on how to use Streamlit widgets in Notebooks, check out [Streamlit Docs](https://docs.streamlit.io/) and this list of what is currently supported inside [Snowflake Notebooks](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks-use-with-snowflake#label-notebooks-streamlit-support)\n- Check out the [Altair User Guide](https://altair-viz.github.io/user_guide/data.html) for further information on customizing Altair charts"
116 |   }
117 |  ]
118 | }
119 | 


--------------------------------------------------------------------------------
/Warehouse_Utilization_with_Streamlit/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - altair=*
6 |   - pandas=*
7 | 


--------------------------------------------------------------------------------
/Working with Git/Working with Git.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "id": "38d31fbc-6666-4495-a2b1-d716ffe24329",
  6 |       "metadata": {
  7 |         "collapsed": false,
  8 |         "name": "cell1"
  9 |       },
 10 |       "source": [
 11 |         "In this example, we will demonstrate how you can easily go from prototyping for development purposes to production with Git integration.\n",
 12 |         "\n",
 13 |         "We will show an example of a simple data pipeline with one query. By changing the `MODE` variable to `DEV` or `PROD` with different warehouse and schema configurations.\n",
 14 |         "\n",
 15 |         "For `DEV`, we will be using an extra small warehouse on a sample of the TPCH data.\n",
 16 |         "For `PROD`, we will be using a large warehouse on a sample of the TPCH data that is 100X the size of the DEV sample."
 17 |       ]
 18 |     },
 19 |     {
 20 |       "cell_type": "code",
 21 |       "execution_count": null,
 22 |       "id": "3775908f-ca36-4846-8f38-5adca39217f2",
 23 |       "metadata": {
 24 |         "codeCollapsed": false,
 25 |         "collapsed": false,
 26 |         "language": "python",
 27 |         "name": "cell2"
 28 |       },
 29 |       "outputs": [],
 30 |       "source": [
 31 |         "MODE = \"DEV\" # Parameter to control whether to run in DEV or PROD mode\n",
 32 |         "\n",
 33 |         "if MODE == \"DEV\":\n",
 34 |         "    # For development, use XSMALL warehouse on TPCH data with scale factor of 1\n",
 35 |         "    warehouse_name = \"GIT_EXAMPLE_DEV_WH\"\n",
 36 |         "    schema_name = \"TPCH_SF1\"\n",
 37 |         "    size = 'XSMALL'\n",
 38 |         "elif MODE == \"PROD\": \n",
 39 |         "    # For production, use LARGE warehouse on TPCH data with scale factor of 100\n",
 40 |         "    warehouse_name = \"GIT_EXAMPLE_PROD_WH\"\n",
 41 |         "    schema_name = \"TPCH_SF100\"\n",
 42 |         "    size = 'LARGE'"
 43 |       ]
 44 |     },
 45 |     {
 46 |       "cell_type": "markdown",
 47 |       "id": "01bd1a4d-1715-4c10-8fdc-08be7b115be5",
 48 |       "metadata": {
 49 |         "name": "cell3"
 50 |       },
 51 |       "source": [
 52 |         "Let's create and use a warehouse with the specified name and size."
 53 |       ]
 54 |     },
 55 |     {
 56 |       "cell_type": "code",
 57 |       "execution_count": null,
 58 |       "id": "55bb9c45-e1e4-49ba-a7db-e5eb671ad13a",
 59 |       "metadata": {
 60 |         "codeCollapsed": false,
 61 |         "collapsed": false,
 62 |         "language": "sql",
 63 |         "name": "cell4"
 64 |       },
 65 |       "outputs": [],
 66 |       "source": [
 67 |         "-- Create warehouse with specified name and size\n",
 68 |         "CREATE OR REPLACE WAREHOUSE {{warehouse_name}} WITH WAREHOUSE_SIZE= {{size}};"
 69 |       ]
 70 |     },
 71 |     {
 72 |       "cell_type": "code",
 73 |       "execution_count": null,
 74 |       "id": "2b1f4b91-7988-432b-afe1-cb599eea5cc6",
 75 |       "metadata": {
 76 |         "collapsed": false,
 77 |         "language": "sql",
 78 |         "name": "cell5"
 79 |       },
 80 |       "outputs": [],
 81 |       "source": [
 82 |         "-- Use specified warehouse for subsequent query\n",
 83 |         "USE WAREHOUSE {{warehouse_name}};"
 84 |       ]
 85 |     },
 86 |     {
 87 |       "cell_type": "markdown",
 88 |       "id": "f330162f-b59e-467d-bc4e-5c297993c4ee",
 89 |       "metadata": {
 90 |         "collapsed": false,
 91 |         "name": "cell6"
 92 |       },
 93 |       "source": [
 94 |         "Use the TPC-H Sample dataset with differing scale factor. \n",
 95 |         "- Note: Sample data sets are provided in a database named SNOWFLAKE_SAMPLE_DATA that has been shared with your account from the Snowflake SFC_SAMPLES account. If you do not see the database, you can create it yourself. Refer to [Using the Sample Database](https://docs.snowflake.com/en/user-guide/sample-data-using)."
 96 |       ]
 97 |     },
 98 |     {
 99 |       "cell_type": "code",
100 |       "execution_count": null,
101 |       "id": "edb15abf-6061-4e29-9d45-85b0cc806e71",
102 |       "metadata": {
103 |         "codeCollapsed": false,
104 |         "collapsed": false,
105 |         "language": "sql",
106 |         "name": "cell7"
107 |       },
108 |       "outputs": [],
109 |       "source": [
110 |         "USE SCHEMA SNOWFLAKE_SAMPLE_DATA.{{schema_name}};  "
111 |       ]
112 |     },
113 |     {
114 |       "cell_type": "markdown",
115 |       "id": "024892ff-b2df-4a4d-9308-1760751b4dae",
116 |       "metadata": {
117 |         "collapsed": false,
118 |         "name": "cell8"
119 |       },
120 |       "source": [
121 |         "Check out the number of rows in the `LINEITEM` table."
122 |       ]
123 |     },
124 |     {
125 |       "cell_type": "code",
126 |       "execution_count": null,
127 |       "id": "e73a5b30-fdcc-4dd6-9619-f19a5c31e769",
128 |       "metadata": {
129 |         "codeCollapsed": false,
130 |         "collapsed": false,
131 |         "language": "sql",
132 |         "name": "cell9"
133 |       },
134 |       "outputs": [],
135 |       "source": [
136 |         "SELECT COUNT(*) FROM LINEITEM;"
137 |       ]
138 |     },
139 |     {
140 |       "cell_type": "markdown",
141 |       "id": "115c9b33-f508-4385-806d-20bada66fe18",
142 |       "metadata": {
143 |         "collapsed": false,
144 |         "name": "cell10"
145 |       },
146 |       "source": [
147 |         "Now let's run a query on this dataset:\n",
148 |         "- The query lists totals for extended price, discounted extended price, discounted extended price plus tax, average quantity, average extended price, and average discount. These aggregates are grouped by RETURNFLAG and LINESTATUS, and listed in ascending order of RETURNFLAG and LINESTATUS. A count of the number of line items in each group is included."
149 |       ]
150 |     },
151 |     {
152 |       "cell_type": "code",
153 |       "execution_count": null,
154 |       "id": "8d50cbf4-0c8d-4950-86cb-114990437ac9",
155 |       "metadata": {
156 |         "codeCollapsed": false,
157 |         "collapsed": false,
158 |         "language": "sql",
159 |         "name": "cell11"
160 |       },
161 |       "outputs": [],
162 |       "source": [
163 |         "select\n",
164 |         "       l_returnflag,\n",
165 |         "       l_linestatus,\n",
166 |         "       sum(l_quantity) as sum_qty,\n",
167 |         "       sum(l_extendedprice) as sum_base_price,\n",
168 |         "       sum(l_extendedprice * (1-l_discount)) as sum_disc_price,\n",
169 |         "       sum(l_extendedprice * (1-l_discount) * (1+l_tax)) as sum_charge,\n",
170 |         "       avg(l_quantity) as avg_qty,\n",
171 |         "       avg(l_extendedprice) as avg_price,\n",
172 |         "       avg(l_discount) as avg_disc,\n",
173 |         "       count(*) as count_order\n",
174 |         " from\n",
175 |         "       lineitem\n",
176 |         " where\n",
177 |         "       l_shipdate <= dateadd(day, -90, to_date('1998-12-01'))\n",
178 |         " group by\n",
179 |         "       l_returnflag,\n",
180 |         "       l_linestatus\n",
181 |         " order by\n",
182 |         "       l_returnflag,\n",
183 |         "       l_linestatus;"
184 |       ]
185 |     },
186 |     {
187 |       "cell_type": "markdown",
188 |       "id": "170637df-6e8b-498a-8f2a-fda1a41c21ca",
189 |       "metadata": {
190 |         "collapsed": false,
191 |         "name": "cell12"
192 |       },
193 |       "source": [
194 |         "Using the cell referencing, we get the query ID and history of the query we just ran."
195 |       ]
196 |     },
197 |     {
198 |       "cell_type": "code",
199 |       "execution_count": null,
200 |       "id": "c49eb85b-6956-4da6-949f-1939c6a1dcc4",
201 |       "metadata": {
202 |         "codeCollapsed": false,
203 |         "collapsed": false,
204 |         "language": "python",
205 |         "name": "cell13"
206 |       },
207 |       "outputs": [],
208 |       "source": [
209 |         "# Get query ID of the referenced cell\n",
210 |         "query_id = cell11.result_scan_sql().split(\"'\")[1]"
211 |       ]
212 |     },
213 |     {
214 |       "cell_type": "code",
215 |       "execution_count": null,
216 |       "id": "dfd22f9f-44ef-4a3f-99e6-7c774b02eea7",
217 |       "metadata": {
218 |         "codeCollapsed": false,
219 |         "collapsed": false,
220 |         "language": "sql",
221 |         "name": "cell14"
222 |       },
223 |       "outputs": [],
224 |       "source": [
225 |         "select * from table(information_schema.query_history_by_warehouse('{{warehouse_name}}')) \n",
226 |         "where query_id = '{{query_id}}';"
227 |       ]
228 |     },
229 |     {
230 |       "cell_type": "markdown",
231 |       "id": "ef4d7fcb-9729-4409-8bce-7a7081b98e87",
232 |       "metadata": {
233 |         "name": "cell15"
234 |       },
235 |       "source": [
236 |         "Finally, we compile all of this information into a report to document the run information."
237 |       ]
238 |     },
239 |     {
240 |       "cell_type": "code",
241 |       "execution_count": null,
242 |       "id": "9b718981-9577-4996-b212-0cf7ffb4f23b",
243 |       "metadata": {
244 |         "codeCollapsed": false,
245 |         "collapsed": false,
246 |         "language": "python",
247 |         "name": "cell16"
248 |       },
249 |       "outputs": [],
250 |       "source": [
251 |         "import streamlit as st\n",
252 |         "from datetime import datetime\n",
253 |         "st.header(f\"[{MODE}] Run Report\")\n",
254 |         "st.markdown(f\"Generated on: {datetime.now()}\")\n",
255 |         "\n",
256 |         "st.markdown(f\"### System Information\")\n",
257 |         "# Print session information\n",
258 |         "from snowflake.snowpark.context import get_active_session\n",
259 |         "session = get_active_session()\n",
260 |         "# Add a query tag to the session. This helps with troubleshooting and performance monitoring.\n",
261 |         "session.query_tag = {\"origin\":\"sf_sit-is\", \n",
262 |         "                     \"name\":\"notebook_demo_pack\", \n",
263 |         "                     \"version\":{\"major\":1, \"minor\":0},\n",
264 |         "                     \"attributes\":{\"is_quickstart\":1, \"source\":\"notebook\", \"vignette\":\"working_with_git\"}}\n",
265 |         "st.markdown(f\"**Database:** {session.get_current_database()[1:-1]}\")\n",
266 |         "st.markdown(f\"**Schema:** {session.get_current_schema()[1:-1]}\")\n",
267 |         "st.markdown(f\"**Warehouse:** {session.get_current_warehouse()[1:-1]}\")\n",
268 |         "\n",
269 |         "st.markdown(f\"### Query Information\")\n",
270 |         "# Print session information\n",
271 |         "st.markdown(f\"**Query ID:** {query_id}\")\n",
272 |         "result_info = cell14.to_pandas()\n",
273 |         "st.markdown(\"**Query Text:**\")\n",
274 |         "st.code(result_info[\"QUERY_TEXT\"].values[0],language='sql',line_numbers=True)\n",
275 |         "st.markdown(\"**Runtime information:**\")\n",
276 |         "st.dataframe(result_info[['START_TIME','END_TIME','TOTAL_ELAPSED_TIME']])"
277 |       ]
278 |     }
279 |   ],
280 |   "metadata": {
281 |     "kernelspec": {
282 |       "display_name": "Streamlit Notebook",
283 |       "name": "streamlit"
284 |     }
285 |   },
286 |   "nbformat": 4,
287 |   "nbformat_minor": 5
288 | }
289 | 


--------------------------------------------------------------------------------
/Working with Git/environment.yml:
--------------------------------------------------------------------------------
1 | name: app_environment
2 | channels:
3 |   - snowflake
4 | dependencies:
5 |   - snowflake=0.8.0


--------------------------------------------------------------------------------
/Working with Git/git_setup.sql:
--------------------------------------------------------------------------------
 1 | CREATE OR REPLACE SECRET git_secret_example
 2 |   TYPE = password
 3 |   USERNAME = '<my-github-username>'
 4 |   PASSWORD = '<my-personal-access-token>';
 5 | 
 6 | CREATE OR REPLACE API INTEGRATION git_api_integration_example
 7 |   API_PROVIDER = git_https_api
 8 |   API_ALLOWED_PREFIXES = ('https://github.com/')
 9 |   ALLOWED_AUTHENTICATION_SECRETS = (git_secret_example)
10 |   ENABLED = TRUE;
11 | 
12 | DROP SECRET git_secret_example;
13 | DROP API INTEGRATION git_api_integration_example;


--------------------------------------------------------------------------------
/config.toml:
--------------------------------------------------------------------------------
1 | default_connection_name = default
2 | 


--------------------------------------------------------------------------------