├── .github
    ├── labeler.yml
    └── workflows
    │   ├── labeler.yml
    │   ├── stale.yml
    │   └── notebooks.yaml
├── WISHLIST.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── Gemma
    ├── Guess_the_word.ipynb
    ├── Run_with_Ollama.ipynb
    ├── Gemma2_on_Groq.ipynb
    ├── Integrate_with_Mesop.ipynb
    ├── Keras_Gemma_2_Quickstart.ipynb
    ├── gemma_inference_on_tpu.ipynb
    ├── Prompt_chaining.ipynb
    └── Gemma_RAG_LlamaIndex.ipynb
├── README.md
└── PaliGemma
    └── Zero_shot_object_detection_in_videos_using_PaliGemma.ipynb


/.github/labeler.yml:
--------------------------------------------------------------------------------
1 | 'status:awaiting review':
2 | - '**/*'
3 | 
4 | 'component:examples':
5 | - examples/**/*
6 | 
7 | 'component:quickstarts':
8 | - quickstarts/**/*


--------------------------------------------------------------------------------
/.github/workflows/labeler.yml:
--------------------------------------------------------------------------------
 1 | name: "PR Labeler"
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     types: ["opened", "reopened", "ready_for_review"]
 6 | 
 7 | jobs:
 8 |   triage:
 9 |     permissions:
10 |       contents: read
11 |       pull-requests: write
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/labeler@v4
15 |     if: ${{ github.event.pull_request.draft == false }}


--------------------------------------------------------------------------------
/WISHLIST.md:
--------------------------------------------------------------------------------
 1 | A wish list of cookbooks showcasing:
 2 | 
 3 | * Inference
 4 |   * Integration with [Google GenKit](https://firebase.google.com/products/genkit)
 5 |   * Llamafile demo
 6 |   * llama.cpp demo
 7 |   * HF local-gemma demo
 8 |   * ElasticSearch integration
 9 |   * Gemma+Gemini with [routerLLM](https://github.com/lm-sys/RouteLLM)
10 |   * [SGLang](https://github.com/sgl-project/sglang) integration
11 | 
12 | * Fintuning
13 |   * Finetuning CodeGemma (e.g., SQL generation)
14 |   * Finetuning Gemma for function calling
15 | 
16 | * Responsible AI
17 |   * Use [LLM Comparator](https://github.com/pair-code/llm-comparator) to compare Gemma with another LLM (i.e., Llama)
18 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
 2 | #
 3 | # You can adjust the behavior by modifying this file.
 4 | # For more information, see:
 5 | # https://github.com/actions/stale
 6 | name: Mark stale issues and pull requests
 7 | 
 8 | on:
 9 |   schedule:
10 |   # Scheduled to run at 1.30 UTC everyday
11 |   - cron: '30 1 * * *'
12 | 
13 | jobs:
14 |   stale:
15 | 
16 |     runs-on: ubuntu-latest
17 |     permissions:
18 |       issues: write
19 |       pull-requests: write
20 | 
21 |     steps:
22 |     - uses: actions/stale@v5
23 |       with:
24 |         repo-token: ${{ secrets.GITHUB_TOKEN }}
25 |         days-before-issue-stale: 14
26 |         days-before-issue-close: 14
27 |         stale-issue-label: "status:stale"
28 |         close-issue-reason: not_planned
29 |         any-of-labels: "status:awaiting user response,status:more data needed"
30 |         stale-issue-message: >
31 |           Marking this issue as stale since it has been open for 14 days with no activity.
32 |           This issue will be closed if no further activity occurs.
33 |         close-issue-message: >
34 |           This issue was closed because it has been inactive for 28 days.
35 |           Please post a new issue if you need further assistance. Thanks!
36 |         days-before-pr-stale: 14
37 |         days-before-pr-close: 14
38 |         stale-pr-label: "status:stale"
39 |         stale-pr-message: >
40 |           Marking this pull request as stale since it has been open for 14 days with no activity.
41 |           This PR will be closed if no further activity occurs.
42 |         close-pr-message: >
43 |           This pull request was closed because it has been inactive for 28 days.
44 |           Please open a new pull request if you need further assistance. Thanks!
45 |         # Label that can be assigned to issues to exclude them from being marked as stale
46 |         exempt-issue-labels: 'override-stale'
47 |         # Label that can be assigned to PRs to exclude them from being marked as stale
48 |         exempt-pr-labels: "override-stale"
49 | 


--------------------------------------------------------------------------------
/.github/workflows/notebooks.yaml:
--------------------------------------------------------------------------------
 1 | # Notebook-related checks
 2 | 
 3 | name: Notebooks
 4 | 
 5 | on:
 6 |   # Relevant PRs
 7 |   pull_request:
 8 |     paths:
 9 |     - "**.ipynb"
10 |   # Allow manual runs
11 |   workflow_dispatch:
12 | 
13 | jobs:
14 |   # Format all notebooks.
15 |   nbfmt:
16 |     name: Notebook format
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |     - uses: actions/checkout@v3
20 |     - uses: actions/setup-python@v4
21 |     - name: Install tensorflow-docs
22 |       run: python3 -m pip install -U git+https://github.com/tensorflow/docs
23 |     - name: Fetch main branch
24 |       run: git fetch -u origin main:main
25 |     - name: Check notebook formatting
26 |       run: |
27 |         if [ "${{ github.event_name }}" == "pull_request" ]; then
28 |           # Only check notebooks modified in this pull request
29 |           readarray -t changed_notebooks < <(git diff --name-only main | grep '\.ipynb$' || true)
30 |         else
31 |           # Manual run, check everything
32 |           readarray -t changed_notebooks < <(find -name '*.ipynb')
33 |         fi
34 |         if [[ ${#changed_notebooks[@]} == 0 ]]; then
35 |           echo "No notebooks modified in this pull request."
36 |           exit 0
37 |         else
38 |           echo "Check formatting with nbfmt:"
39 |           python3 -m tensorflow_docs.tools.nbfmt --test "${changed_notebooks[@]}"
40 |         fi
41 | 
42 |   nblint:
43 |     name: Notebook lint
44 |     runs-on: ubuntu-latest
45 |     steps:
46 |     - uses: actions/checkout@v3
47 |     - uses: actions/setup-python@v4
48 |     - name: Install tensorflow-docs
49 |       run: python3 -m pip install -U git+https://github.com/tensorflow/docs
50 |     - name: Fetch main branch
51 |       run: git fetch -u origin main:main
52 | 
53 |     # Lint for all notebooks
54 |     - name: Lint notebooks
55 |       run: |
56 |         if [ "${{ github.event_name }}" == "pull_request" ]; then
57 |           # Only check notebooks modified in this pull request
58 |           readarray -t changed_notebooks < <(git diff --name-only main |grep '\.ipynb$' || true)
59 |         else
60 |           # Manual run, check everything
61 |           readarray -t changed_notebooks < <(find . -name '*.ipynb')
62 |         fi
63 |         if [[ ${#changed_notebooks[@]} == 0 ]]; then
64 |           echo "No website notebooks modified in this pull request."
65 |           exit 0
66 |         else
67 |           echo "WARNING: If the button_colab check fails for you, make sure you have <table class=\"tfo-notebook-buttons\"...>"
68 |           echo "Lint check with nblint:"
69 |           python3 -m tensorflow_docs.tools.nblint \
70 |             --styles=google,tensorflow \
71 |             --arg=repo:google-gemini/gemma-cookbook \
72 |             --arg=branch:main \
73 |             --exclude_lint=tensorflow::button_download \
74 |             --exclude_lint=tensorflow::button_website \
75 |             --arg=base_url:https://ai.google.dev/ \
76 |             --exclude_lint=tensorflow::button_github \
77 |             "${changed_notebooks[@]}"
78 |         fi
79 | 
80 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to the Gemma Cookbook
 2 | 
 3 | We would love to accept your patches and contributions to the Gemma Cookbook. We are excited that you are considering donating some of your time, and this guide will help us be respectful of that time.
 4 | 
 5 | # Before you send anything
 6 | 
 7 | ## Sign our contributor agreement
 8 | 
 9 | All contributions to this project must be accompanied by a [Contributor License Agreement](https://cla.developers.google.com/about) (CLA). You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project.
10 | 
11 | If you or your current employer have already signed the Google CLA (even if it was for a different project), you probably don't need to do it again.
12 | 
13 | Visit [https://cla.developers.google.com/](https://cla.developers.google.com/) to see your current agreements or to sign a new one.
14 | 
15 | ## Style guides
16 | 
17 | Before you start writing, take a look at the [technical writing style guide](https://developers.google.com/style). You don’t need to fully digest the whole document, but do read the [highlights](https://developers.google.com/style/highlights) so you can anticipate the most common feedback.
18 | 
19 | Also check out the relevant [style guide](https://google.github.io/styleguide/) for the language you will be using. These apply strictly to raw code files (e.g. *.py, *.js), though code fragments in documentation (such as markdown files or notebooks) tend to favor readability over strict adherence.
20 | 
21 | # PR checklist 
22 | 
23 | 1. Commit your finished notebook with comments and clean output after finishing the following:
24 |    * Make sure to include the setup steps at the top (you can copy from any existing notebook), including:
25 |         * the Colab self-link to your notebook
26 |         * how to select GPU
27 |         * how to set up Kaggle/HF tokens
28 |    * Include a byline at the top of the notebook with your name, social handle and/or GitHub username
29 |    * Run ‘pyink’ for formatting
30 |    * Name your notebook with words separated by underscores. For example, ‘Integrate_with_Mesop.ipynb’
31 | 2. Add the notebook name and a short description in the table of contents in README.md
32 | 3. (If applicable) remove the entry you have implemented in WISHLIST.md
33 | 4. Submit for review
34 | 5. In your PR comment, let us know if you would like your contribution to be highlighted in Google’s social handle (e.g., [Google for Developers](https://x.com/googledevs?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor) Twitter)
35 | 
36 | # Making changes
37 | 
38 | ## Small fixes
39 | 
40 | Small fixes, such as typos or bug fixes, can be submitted directly via a pull request.
41 | 
42 | ## Large changes or a new notebook
43 | 
44 | Before you send a PR, or even write a single line, please file an [issue](https://github.com/google-gemini/gemma-cookbook/issues). There we can discuss the request and provide guidance about how to structure any content you write.
45 | 
46 | Adding a new guide often involves lots of detailed reviews and we want to make sure that your idea is fully formed and has full support before you start writing anything. Please also check the table of contents first to avoid duplicating existing work. If you want to port an existing guide across (e.g. if you have a guide for Gemma on your own GitHub), feel free to link to it in the issue.
47 | 
48 | ## Things we consider
49 | 
50 | When accepting a new guide, we want to balance a few aspects:
51 | * Originality - e.g. Is there another guide that does the same thing?
52 | * Pedagogy - e.g. Does this guide teach something useful? Specifically for a Gemma feature?
53 | * Quality - e.g. Does this guide contain clear, descriptive prose? Is the code easy to understand? Is there any error?
54 | * Practicality - e.g., Is the technique used in the guide practical in the real world?
55 | 
56 | It is not crucial for a submission to be strong along all of these dimensions, but the stronger the better. Old submissions may be replaced in favor of newer submissions that exceed these properties.
57 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Gemma/Guess_the_word.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "nH85BOCo7YYk"
  7 |       },
  8 |       "source": [
  9 |         "##### Copyright 2024 Google LLC."
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "cellView": "form",
 17 |         "id": "9tQNAByc7U9g"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 22 |         "# you may not use this file except in compliance with the License.\n",
 23 |         "# You may obtain a copy of the License at\n",
 24 |         "#\n",
 25 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 26 |         "#\n",
 27 |         "# Unless required by applicable law or agreed to in writing, software\n",
 28 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 29 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 30 |         "# See the License for the specific language governing permissions and\n",
 31 |         "# limitations under the License."
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {
 37 |         "id": "F7r2q0wS7bxf"
 38 |       },
 39 |       "source": [
 40 |         "# Play with AI - Guess the word\n",
 41 |         "\n",
 42 |         "This cookbook illustrates how you can employ the instruction-tuned model version of Gemma as a chatbot to play \"Guess the word\" game.\n",
 43 |         "\n",
 44 |         "<table align=\"left\">\n",
 45 |         "  <td>\n",
 46 |         "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/google-gemini/gemma-cookbook/blob/main/Gemma/Guess_the_word.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 47 |         "  </td>\n",
 48 |         "</table>"
 49 |       ]
 50 |     },
 51 |     {
 52 |       "cell_type": "markdown",
 53 |       "metadata": {
 54 |         "id": "ZHrL4tqs7mYK"
 55 |       },
 56 |       "source": [
 57 |         "## Setup\n",
 58 |         "\n",
 59 |         "### Select the Colab runtime\n",
 60 |         "To complete this tutorial, you'll need to have a Colab runtime with sufficient resources to run the Gemma model. In this case, you can use a T4 GPU:\n",
 61 |         "\n",
 62 |         "1. In the upper-right of the Colab window, select **▾ (Additional connection options)**.\n",
 63 |         "2. Select **Change runtime type**.\n",
 64 |         "3. Under **Hardware accelerator**, select **T4 GPU**.\n",
 65 |         "\n",
 66 |         "\n",
 67 |         "### Gemma setup on Kaggle\n",
 68 |         "To complete this tutorial, you'll first need to complete the setup instructions at [Gemma setup](https://ai.google.dev/gemma/docs/setup). The Gemma setup instructions show you how to do the following:\n",
 69 |         "\n",
 70 |         "* Get access to Gemma on kaggle.com.\n",
 71 |         "* Select a Colab runtime with sufficient resources to run the Gemma 2B model.\n",
 72 |         "* Generate and configure a Kaggle username and API key.\n",
 73 |         "\n",
 74 |         "After you've completed the Gemma setup, move on to the next section, where you'll set environment variables for your Colab environment."
 75 |       ]
 76 |     },
 77 |     {
 78 |       "cell_type": "markdown",
 79 |       "metadata": {
 80 |         "id": "pQEE8RoO75F-"
 81 |       },
 82 |       "source": [
 83 |         "### Set environment variables\n",
 84 |         "\n",
 85 |         "Set environment variables for `KAGGLE_USERNAME` and `KAGGLE_KEY`."
 86 |       ]
 87 |     },
 88 |     {
 89 |       "cell_type": "code",
 90 |       "execution_count": 1,
 91 |       "metadata": {
 92 |         "id": "XsY2Ut7a76Wa"
 93 |       },
 94 |       "outputs": [],
 95 |       "source": [
 96 |         "import os\n",
 97 |         "from google.colab import userdata\n",
 98 |         "\n",
 99 |         "os.environ[\"KERAS_BACKEND\"] = \"jax\"  # Or \"tensorflow\" or \"torch\".\n",
100 |         "\n",
101 |         "# Note: `userdata.get` is a Colab API. If you're not using Colab, set the env\n",
102 |         "# vars as appropriate for your system.\n",
103 |         "os.environ[\"KAGGLE_USERNAME\"] = userdata.get(\"KAGGLE_USERNAME\")\n",
104 |         "os.environ[\"KAGGLE_KEY\"] = userdata.get(\"KAGGLE_KEY\")"
105 |       ]
106 |     },
107 |     {
108 |       "cell_type": "markdown",
109 |       "metadata": {
110 |         "id": "Ea_56Zpa78Gu"
111 |       },
112 |       "source": [
113 |         "### Install dependencies\n",
114 |         "\n",
115 |         "Install Keras and KerasNLP."
116 |       ]
117 |     },
118 |     {
119 |       "cell_type": "code",
120 |       "execution_count": null,
121 |       "metadata": {
122 |         "id": "AxPjbcnC79ck"
123 |       },
124 |       "outputs": [],
125 |       "source": [
126 |         "# Install Keras 3 last. See https://keras.io/getting_started/ for more details.\n",
127 |         "!pip install -q -U keras-nlp\n",
128 |         "!pip install -q -U keras"
129 |       ]
130 |     },
131 |     {
132 |       "cell_type": "markdown",
133 |       "metadata": {
134 |         "id": "a_QCPQLf8OU0"
135 |       },
136 |       "source": [
137 |         "### Create a chat helper to manage the conversation state"
138 |       ]
139 |     },
140 |     {
141 |       "cell_type": "code",
142 |       "execution_count": null,
143 |       "metadata": {
144 |         "id": "2BmB5Zua8Vs0"
145 |       },
146 |       "outputs": [],
147 |       "source": [
148 |         "import re\n",
149 |         "\n",
150 |         "import keras\n",
151 |         "import keras_nlp\n",
152 |         "\n",
153 |         "# Run at half precision to fit in memory\n",
154 |         "keras.config.set_floatx(\"bfloat16\")\n",
155 |         "\n",
156 |         "gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset(\"gemma2_instruct_2b_en\")\n",
157 |         "gemma_lm.compile(sampler=\"top_k\")\n",
158 |         "\n",
159 |         "\n",
160 |         "class ChatState():\n",
161 |         "  \"\"\"\n",
162 |         "  Manages the conversation history for a turn-based chatbot\n",
163 |         "  Follows the turn-based conversation guidelines for the Gemma family of models\n",
164 |         "  documented at https://ai.google.dev/gemma/docs/formatting\n",
165 |         "  \"\"\"\n",
166 |         "\n",
167 |         "  __START_TURN_USER__ = \"<start_of_turn>user\\n\"\n",
168 |         "  __START_TURN_MODEL__ = \"<start_of_turn>model\\n\"\n",
169 |         "  __END_TURN__ = \"<end_of_turn>\\n\"\n",
170 |         "\n",
171 |         "  def __init__(self, model, system=\"\"):\n",
172 |         "    \"\"\"\n",
173 |         "    Initializes the chat state.\n",
174 |         "\n",
175 |         "    Args:\n",
176 |         "        model: The language model to use for generating responses.\n",
177 |         "        system: (Optional) System instructions or bot description.\n",
178 |         "    \"\"\"\n",
179 |         "    self.model = model\n",
180 |         "    self.system = system\n",
181 |         "    self.history = []\n",
182 |         "\n",
183 |         "  def add_to_history_as_user(self, message):\n",
184 |         "      \"\"\"\n",
185 |         "      Adds a user message to the history with start/end turn markers.\n",
186 |         "      \"\"\"\n",
187 |         "      self.history.append(self.__START_TURN_USER__ + message + self.__END_TURN__)\n",
188 |         "\n",
189 |         "  def add_to_history_as_model(self, message):\n",
190 |         "      \"\"\"\n",
191 |         "      Adds a model response to the history with start/end turn markers.\n",
192 |         "      \"\"\"\n",
193 |         "      self.history.append(self.__START_TURN_MODEL__ + message)\n",
194 |         "\n",
195 |         "  def get_history(self):\n",
196 |         "      \"\"\"\n",
197 |         "      Returns the entire chat history as a single string.\n",
198 |         "      \"\"\"\n",
199 |         "      return \"\".join([*self.history])\n",
200 |         "\n",
201 |         "  def get_full_prompt(self):\n",
202 |         "    \"\"\"\n",
203 |         "    Builds the prompt for the language model, including history and system description.\n",
204 |         "    \"\"\"\n",
205 |         "    prompt = self.get_history() + self.__START_TURN_MODEL__\n",
206 |         "    if len(self.system)>0:\n",
207 |         "      prompt = self.system + \"\\n\" + prompt\n",
208 |         "    return prompt\n",
209 |         "\n",
210 |         "  def send_message(self, message):\n",
211 |         "    \"\"\"\n",
212 |         "    Handles sending a user message and getting a model response.\n",
213 |         "\n",
214 |         "    Args:\n",
215 |         "        message: The user's message.\n",
216 |         "\n",
217 |         "    Returns:\n",
218 |         "        The model's response.\n",
219 |         "    \"\"\"\n",
220 |         "    self.add_to_history_as_user(message)\n",
221 |         "    prompt = self.get_full_prompt()\n",
222 |         "    response = self.model.generate(prompt, max_length=4096)\n",
223 |         "    result = response.replace(prompt, \"\")  # Extract only the new response\n",
224 |         "    self.add_to_history_as_model(result)\n",
225 |         "    return result\n",
226 |         "\n",
227 |         "  def show_history(self):\n",
228 |         "    for h in self.history:\n",
229 |         "      print(h)\n",
230 |         "\n",
231 |         "\n",
232 |         "chat = ChatState(gemma_lm)"
233 |       ]
234 |     },
235 |     {
236 |       "cell_type": "markdown",
237 |       "metadata": {
238 |         "id": "_1jyCoRd8EwX"
239 |       },
240 |       "source": [
241 |         "## Play the game"
242 |       ]
243 |     },
244 |     {
245 |       "cell_type": "code",
246 |       "execution_count": 4,
247 |       "metadata": {
248 |         "id": "zoWDt87V83rZ"
249 |       },
250 |       "outputs": [
251 |         {
252 |           "name": "stdout",
253 |           "output_type": "stream",
254 |           "text": [
255 |             "Choose your theme: animal\n",
256 |             "Guess what I'm thinking.\n",
257 |             "Type \"quit\" if you want to quit.\n",
258 |             "A playful, furry swimmer. Known for its playful antics and clever use of tools.  A member of the weasel-like family with a distinctive, waterproof coat. \n",
259 |             "<end_of_turn>\n",
260 |             "\n",
261 |             "> platypus\n",
262 |             "A creature that spends its days by a river or lake, often seen splashing and diving with its distinctive, thick fur.  It's known for its playful demeanor and ability to hold a surprisingly large amount of water in its paws. \n",
263 |             "<end_of_turn>\n",
264 |             "\n",
265 |             "> beaver\n",
266 |             "A small, aquatic mammal with a playful, curious nature, often spotted near water, known for its distinctive, waterproof fur and love for swimming. \n",
267 |             "<end_of_turn>\n",
268 |             "\n",
269 |             "> otter\n",
270 |             "Correct!\n"
271 |           ]
272 |         }
273 |       ],
274 |       "source": [
275 |         "theme = input(\"Choose your theme: \")\n",
276 |         "setup_message = f\"Generate a random single word from {theme}.\"\n",
277 |         "\n",
278 |         "chat.history.clear()\n",
279 |         "answer = chat.send_message(setup_message).split()[0]\n",
280 |         "answer = re.sub(r\"\\W+\", \"\", answer)  # excludes all numbers, letters and '_'\n",
281 |         "chat.history.clear()\n",
282 |         "cmd_exit = \"quit\"\n",
283 |         "question = f'Describe the word \"{answer}\" without saying it.'\n",
284 |         "\n",
285 |         "resp = \"\"\n",
286 |         "while resp.lower() != answer.lower() and resp != cmd_exit:\n",
287 |         "    text = chat.send_message(question)\n",
288 |         "    if resp == \"\":\n",
289 |         "        print(f'Guess what I\\'m thinking.\\nType \"{cmd_exit}\" if you want to quit.')\n",
290 |         "    remove_answer = re.compile(re.escape(answer), re.IGNORECASE)\n",
291 |         "    text = remove_answer.sub(\"XXXX\", text)\n",
292 |         "    print(text)\n",
293 |         "    resp = input(\"\\n> \")\n",
294 |         "\n",
295 |         "if resp == cmd_exit:\n",
296 |         "    print(f\"The answer was {answer}.\\n\")\n",
297 |         "else:\n",
298 |         "    print(\"Correct!\")"
299 |       ]
300 |     }
301 |   ],
302 |   "metadata": {
303 |     "accelerator": "GPU",
304 |     "colab": {
305 |       "name": "Guess_the_word.ipynb",
306 |       "toc_visible": true
307 |     },
308 |     "kernelspec": {
309 |       "display_name": "Python 3",
310 |       "name": "python3"
311 |     }
312 |   },
313 |   "nbformat": 4,
314 |   "nbformat_minor": 0
315 | }
316 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Welcome to the Gemma Cookbook
 2 | This is a collection of guides and examples for [Google Gemma](https://ai.google.dev/gemma/). Gemma is a family of lightweight, state-of-the art open models built from the same research and technology used to create the Gemini models.
 3 | 
 4 | ## Get started with the Gemma models
 5 | Gemma is a family of lightweight, state-of-the art open models built from the same research and technology used to create the Gemini models. The Gemma model family includes:
 6 | * base Gemma
 7 |   * [Gemma](https://ai.google.dev/gemma/docs/model_card)
 8 |   * [Gemma 2](https://ai.google.dev/gemma/docs/model_card_2)
 9 | * Gemma variants
10 |   * [CodeGemma](https://ai.google.dev/gemma/docs/codegemma)
11 |   * [PaliGemma](https://ai.google.dev/gemma/docs/paligemma)
12 |   * [RecurrentGemma](https://ai.google.dev/gemma/docs/recurrentgemma)
13 | 
14 | You can find the Gemma models on GitHub, Hugging Face models, Kaggle, Google Cloud Vertex AI Model Garden, and [ai.nvidia.com](ai.nvidia.com).
15 | 
16 | ## Table of contents
17 | 
18 | | Name                                                                                                                                             | Description                                                                                                                                                                             |
19 | | ------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
20 | | [Common_use_cases.ipynb](Common_use_cases.ipynb)                                                                                                 | Illustrate some common use cases for Gemma, CodeGemma and PaliGemma.                                                                                                                    |
21 | | **Gemma**                                                                                                                                        |
22 | | [Keras_Gemma_2_Quickstart.ipynb](Gemma/Keras_Gemma_2_Quickstart.ipynb)                                                                           | Gemma 2 pre-trained 9B model quickstart tutorial with Keras.                                                                                                                            |
23 | | [Keras_Gemma_2_Quickstart_Chat.ipynb](Gemma/Keras_Gemma_2_Quickstart_Chat.ipynb)                                                                 | Gemma 2 instruction-tuned 9B model quickstart tutorial with Keras. Referenced in this [blog](https://developers.googleblog.com/en/fine-tuning-gemma-2-with-keras-hugging-face-update/). |
24 | | [Chat_and_distributed_pirate_tuning.ipynb](Gemma/Chat_and_distributed_pirate_tuning.ipynb)                                                       | Chat with Gemma 7B and finetune it so that it generates responses in pirates' tone.                                                                                                     |
25 | | [gemma_inference_on_tpu.ipynb](Gemma/gemma_inference_on_tpu.ipynb)                                                                               | Basic inference of Gemma with JAX/Flax on TPU.                                                                                                                                          |
26 | | [gemma_data_parallel_inference_in_jax_tpu.ipynb](Gemma/gemma_data_parallel_inference_in_jax_tpu.ipynb)                                           | Parallel inference of Gemma with JAX/Flax on TPU.                                                                                                                                       |
27 | | [Gemma_control_vectors.ipynb](Gemma/Gemma_control_vectors.ipynb)                                                                                 | Implement [control vectors](https://arxiv.org/abs/2310.01405) with Gemma in the I/O 2024 [Keras talk](https://www.youtube.com/watch?v=TV7qCk1dBWA).                                     |
28 | | [Self_extend_Gemma.ipynb](Gemma/Self_extend_Gemma.ipynb)                                                                                         | Self-extend context window for Gemma in the I/O 2024 [Keras talk](https://www.youtube.com/watch?v=TV7qCk1dBWA).                                                                         |
29 | | [Gemma_Basics_with_HF.ipynb](Gemma/Gemma_Basics_with_HF.ipynb)                                                                                   | Load, run, finetune and deploy Gemma using [Hugging Face](https://huggingface.co/).                                                                                                     |
30 | | [Guess_the_word.ipynb](Gemma/Guess_the_word.ipynb)                                                                                               | Play a word guessing game with Gemma using Keras.                                                                                                                                       |
31 | | [Game_Design_Brainstorming.ipynb](Gemma/Game_Design_Brainstorming.ipynb)                                                                         | Use Gemma to brainstorm ideas during game design using Keras.                                                                                                                           |
32 | | [Translator_of_Old_Korean_Literature.ipynb](Gemma/Translator_of_Old_Korean_Literature.ipynb)                                                     | Use Gemma to translate old Korean literature using Keras.                                                                                                                               |
33 | | [Gemma2_on_Groq.ipynb](Gemma/Gemma2_on_Groq.ipynb)                                                                                               | Leverage the free Gemma 2 9B IT model hosted on [Groq](https://groq.com/) (super fast speed).                                                                                           |
34 | | [Prompt_chaining.ipynb](Gemma/Prompt_chaining.ipynb)                                                                                             | Illustrate prompt chaining and iterative generation with Gemma.                                                                                                                         |
35 | | [Advanced_Prompting_Techniques.ipynb](Gemma/Advanced_Prompting_Techniques.ipynb)                                                                 | Illustrate advanced prompting techniques with Gemma.                                                                                                                                    |
36 | | [Run_with_Ollama.ipynb](Gemma/Run_with_Ollama.ipynb)                                                                                             | Run Gemma models using [Ollama](https://www.ollama.com/).                                                                                                                               |
37 | | [Deploy_with_vLLM.ipynb](Gemma/Deploy_with_vLLM.ipynb)                                                                                           | Deploy a Gemma model using [vLLM](https://github.com/vllm-project/vllm).                                                                                                                |
38 | | [Deploy_Gemma_in_Vertex_AI.ipynb](Gemma/Deploy_Gemma_in_Vertex_AI.ipynb)                                                                         | Deploy a Gemma model using [Vertex AI](https://cloud.google.com/vertex-ai).                                                                                                             |
39 | | [RAG_with_ChromaDB.ipynb](Gemma/RAG_with_ChromaDB.ipynb)                                                                                         | Build a Retrieval Augmented Generation (RAG) system with Gemma using [ChromaDB](https://www.trychroma.com/) and [Hugging Face](https://huggingface.co/).                                |
40 | | [Minimal_RAG.ipynb](Gemma/Minimal_RAG.ipynb)                                                                                                     | Minimal example of building a RAG system with Gemma using [Google UniSim](https://github.com/google/unisim) and [Hugging Face](https://huggingface.co/).                                |
41 | | [Using_Gemma_with_LangChain.ipynb](Gemma/Using_Gemma_with_LangChain.ipynb)                                                                       | Examples to demonstrate using Gemma with [LangChain](https://www.langchain.com/).                                                                                                       |
42 | | [Gemma_RAG_LlamaIndex.ipynb](Gemma/Gemma_RAG_LlamaIndex.ipynb)                                                                                   | RAG example with [LlamaIndex](https://www.llamaindex.ai/) using Gemma.                                                                                                                  |
43 | | [Integrate_with_Mesop.ipynb](Gemma/Integrate_with_Mesop.ipynb)                                                                                   | Integrate Gemma with [Google Mesop](https://google.github.io/mesop/).                                                                                                                   |
44 | | [Integrate_with_OneTwo.ipynb](Gemma/Integrate_with_OneTwo.ipynb)                                                                                 | Integrate Gemma with [Google OneTwo](https://github.com/google-deepmind/onetwo).                                                                                                        |
45 | | [Finetune_with_Axolotl.ipynb](Gemma/Finetune_with_Axolotl.ipynb)                                                                                 | Finetune Gemma using [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl).                                                                                                    |
46 | | [Finetune_with_XTuner.ipynb](Gemma/Finetune_with_XTuner.ipynb)                                                                                   | Finetune Gemma using [XTuner](https://github.com/InternLM/xtuner).                                                                                                                      |
47 | | [Finetune_with_LLaMA_Factory.ipynb](Gemma/Finetune_with_LLaMA_Factory.ipynb)                                                                     | Finetune Gemma using [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory).                                                                                                         |
48 | | **PaliGemma**                                                                                                                                    |
49 | | [Image_captioning_using_PaliGemma.ipynb](PaliGemma/Image_captioning_using_PaliGemma.ipynb)                                                       | Use PaliGemma to generate image captions using Keras.                                                                                                                                   |
50 | | [Image_captioning_using_finetuned_PaliGemma.ipynb](PaliGemma/Image_captioning_using_finetuned_PaliGemma.ipynb)                                   | Compare the image captioning results using different PaliGemma versions with [Hugging Face](https://huggingface.co/).                                                                   |
51 | | [Finetune_PaliGemma_for_image_description.ipynb](PaliGemma/Finetune_PaliGemma_for_image_description.ipynb)                                       | Finetune PaliGemma for image description using [JAX](https://github.com/google/jax).                                                                                                    |
52 | | [Integrate_PaliGemma_with_Mesop.ipynb](PaliGemma/Integrate_PaliGemma_with_Mesop.ipynb)                                                           | Integrate PaliGemma with [Google Mesop](https://google.github.io/mesop/).                                                                                                               |
53 | | [Zero_shot_object_detection_in_images_using_PaliGemma.ipynb](PaliGemma/Zero_shot_object_detection_in_images_using_PaliGemma.ipynb)               | Zero-shot Object Detection in images using PaliGemma.                                                                                                                                   |
54 | | [Zero_shot_object_detection_in_videos_using_PaliGemma.ipynb](PaliGemma/Zero_shot_object_detection_in_videos_using_PaliGemma.ipynb)               | Zero-shot Object Detection in videos using PaliGemma.                                                                                                                                   |
55 | | [Referring_expression_segmentation_in_images_using_PaliGemma.ipynb](PaliGemma/Referring_expression_segmentation_in_images_using_PaliGemma.ipynb) | Referring Expression Segmentation in images using PaliGemma.                                                                                                                            |
56 | | [Referring_expression_segmentation_in_videos_using_PaliGemma.ipynb](PaliGemma/Referring_expression_segmentation_in_videos_using_PaliGemma.ipynb) | Referring Expression Segmentation in videos using PaliGemma.                                                                                                                            |
57 | 
58 | ## Get help
59 | Ask a Gemma cookbook-related question on the new [Build with Google AI Forum](https://discuss.ai.google.dev/), or open an [issue](https://github.com/google-gemini/gemma-cookbook/issues) on GitHub.
60 | 
61 | ## Wish list
62 | If you want to see additional cookbooks implemented for specific features/integrations, please send us a pull request by adding your feature request(s) in the [wish list](https://github.com/google-gemini/gemma-cookbook/blob/main/WISHLIST.md).
63 | 
64 | If you want to make contributions to the Gemma Cookbook project, you are welcome to pick any idea in the [wish list](https://github.com/google-gemini/gemma-cookbook/blob/main/WISHLIST.md) and implement it.
65 | 
66 | ## Contributing
67 | Contributions are always welcome. Please read [contributing](https://github.com/google-gemini/gemma-cookbook/blob/main/CONTRIBUTING.md) before implementation.
68 | 
69 | Thank you for developing with Gemma! We’re excited to see what you create.
70 | 
71 | ## Translation of this repository
72 | * [Traditional Chinese](https://github.com/doggy8088/gemma-cookbook?tab=readme-ov-file)
73 | 


--------------------------------------------------------------------------------
/Gemma/Run_with_Ollama.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "Tce3stUlHN0L"
  7 |       },
  8 |       "source": [
  9 |         "##### Copyright 2024 Google LLC."
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "cellView": "form",
 17 |         "id": "tuOe1ymfHZPu"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 22 |         "# you may not use this file except in compliance with the License.\n",
 23 |         "# You may obtain a copy of the License at\n",
 24 |         "#\n",
 25 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 26 |         "#\n",
 27 |         "# Unless required by applicable law or agreed to in writing, software\n",
 28 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 29 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 30 |         "# See the License for the specific language governing permissions and\n",
 31 |         "# limitations under the License."
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {
 37 |         "id": "dfsDR_omdNea"
 38 |       },
 39 |       "source": [
 40 |         "# Gemma - Run with Ollama\n",
 41 |         "\n",
 42 |         "This notebook demonstrates how you can run inference on a Gemma model using  [Ollama](https://ollama.com/). Ollama is an easy-to-use solution for running LLMs locally and provides built-in support Gemma.\n",
 43 |         "\n",
 44 |         "<table align=\"left\">\n",
 45 |         "  <td>\n",
 46 |         "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/google-gemini/gemma-cookbook/blob/main/Gemma/Run_with_Ollama.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 47 |         "  </td>\n",
 48 |         "</table>"
 49 |       ]
 50 |     },
 51 |     {
 52 |       "cell_type": "markdown",
 53 |       "metadata": {
 54 |         "id": "MwMiP7jDdAL1"
 55 |       },
 56 |       "source": [
 57 |         "## Setup\n",
 58 |         "\n",
 59 |         "### Select the Colab runtime\n",
 60 |         "To complete this tutorial, you'll need to have a Colab runtime with sufficient resources to run the Gemma model. In this case, you can use a T4 GPU:\n",
 61 |         "\n",
 62 |         "1. In the upper-right of the Colab window, select **▾ (Additional connection options)**.\n",
 63 |         "2. Select **Change runtime type**.\n",
 64 |         "3. Under **Hardware accelerator**, select **T4 GPU**."
 65 |       ]
 66 |     },
 67 |     {
 68 |       "cell_type": "markdown",
 69 |       "metadata": {
 70 |         "id": "gJaQ-OVoPKCo"
 71 |       },
 72 |       "source": [
 73 |         "## Installation\n"
 74 |       ]
 75 |     },
 76 |     {
 77 |       "cell_type": "markdown",
 78 |       "metadata": {
 79 |         "id": "4VYdPAwS9sCr"
 80 |       },
 81 |       "source": [
 82 |         "Install Ollama through the offical installation script."
 83 |       ]
 84 |     },
 85 |     {
 86 |       "cell_type": "code",
 87 |       "execution_count": null,
 88 |       "metadata": {
 89 |         "id": "DHrOMaOAPSAM"
 90 |       },
 91 |       "outputs": [
 92 |         {
 93 |           "name": "stdout",
 94 |           "output_type": "stream",
 95 |           "text": [
 96 |             ">>> Downloading ollama...\n",
 97 |             "############################################################################################# 100.0%\n",
 98 |             ">>> Installing ollama to /usr/local/bin...\n",
 99 |             ">>> Adding ollama user to video group...\n",
100 |             ">>> Adding current user to ollama group...\n",
101 |             ">>> Creating ollama systemd service...\n",
102 |             "WARNING: Unable to detect NVIDIA/AMD GPU. Install lspci or lshw to automatically detect and install GPU dependencies.\n",
103 |             ">>> The Ollama API is now available at 127.0.0.1:11434.\n",
104 |             ">>> Install complete. Run \"ollama\" from the command line.\n"
105 |           ]
106 |         }
107 |       ],
108 |       "source": [
109 |         "!curl -fsSL https://ollama.com/install.sh | sh"
110 |       ]
111 |     },
112 |     {
113 |       "cell_type": "markdown",
114 |       "metadata": {
115 |         "id": "wn2lC5hVPUxy"
116 |       },
117 |       "source": [
118 |         "## Start Ollama\n",
119 |         "\n",
120 |         "Start Ollama in background using nohup."
121 |       ]
122 |     },
123 |     {
124 |       "cell_type": "code",
125 |       "execution_count": null,
126 |       "metadata": {
127 |         "id": "q8o4A6QKPp3d"
128 |       },
129 |       "outputs": [
130 |         {
131 |           "name": "stdout",
132 |           "output_type": "stream",
133 |           "text": [
134 |             "nohup: redirecting stderr to stdout\n"
135 |           ]
136 |         }
137 |       ],
138 |       "source": [
139 |         "!nohup ollama serve > ollama.log &"
140 |       ]
141 |     },
142 |     {
143 |       "cell_type": "markdown",
144 |       "metadata": {
145 |         "id": "76mRtotdPu9N"
146 |       },
147 |       "source": [
148 |         "## Inference\n",
149 |         "\n",
150 |         "Run inference using command line."
151 |       ]
152 |     },
153 |     {
154 |       "cell_type": "code",
155 |       "execution_count": null,
156 |       "metadata": {
157 |         "id": "k8y8SD1XPzAr"
158 |       },
159 |       "outputs": [
160 |         {
161 |           "name": "stdout",
162 |           "output_type": "stream",
163 |           "text": [
164 |             "The capital of France is **Paris**.\n",
165 |             "\n"
166 |           ]
167 |         }
168 |       ],
169 |       "source": [
170 |         "!ollama run gemma:7b \"What is the capital of France?\" 2> ollama.log"
171 |       ]
172 |     },
173 |     {
174 |       "cell_type": "markdown",
175 |       "metadata": {
176 |         "id": "MMg2unvIOtH4"
177 |       },
178 |       "source": [
179 |         "Generate a response via REST endpoint"
180 |       ]
181 |     },
182 |     {
183 |       "cell_type": "code",
184 |       "execution_count": null,
185 |       "metadata": {
186 |         "id": "_JihVtFsOwsn"
187 |       },
188 |       "outputs": [
189 |         {
190 |           "name": "stdout",
191 |           "output_type": "stream",
192 |           "text": [
193 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.14899689Z\",\"response\":\"The\",\"done\":false}\n",
194 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.178231303Z\",\"response\":\" capital\",\"done\":false}\n",
195 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.207532308Z\",\"response\":\" of\",\"done\":false}\n",
196 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.236605028Z\",\"response\":\" Portugal\",\"done\":false}\n",
197 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.265333563Z\",\"response\":\" is\",\"done\":false}\n",
198 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.294147887Z\",\"response\":\" **\",\"done\":false}\n",
199 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.323264861Z\",\"response\":\"Lis\",\"done\":false}\n",
200 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.35282411Z\",\"response\":\"bon\",\"done\":false}\n",
201 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.382855843Z\",\"response\":\"**.\",\"done\":false}\n",
202 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.413118162Z\",\"response\":\"\\n\\n\",\"done\":false}\n",
203 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.619448677Z\",\"response\":\"\",\"done\":true,\"done_reason\":\"stop\",\"context\":[968,2997,235298,559,235298,15508,235313,1645,108,1841,603,573,6037,576,21539,181537,615,235298,559,235298,15508,235313,108,235322,2997,235298,559,235298,15508,235313,2516,108,651,6037,576,21539,603,5231,49147,3839,168428,109,235322,615,235298,559,235298,15508,235313,108],\"total_duration\":518226208,\"load_duration\":1258094,\"prompt_eval_count\":18,\"prompt_eval_duration\":45308000,\"eval_count\":17,\"eval_duration\":470410000}\n"
204 |           ]
205 |         }
206 |       ],
207 |       "source": [
208 |         "!curl http://localhost:11434/api/generate -d '{ \\\n",
209 |         "  \"model\": \"gemma:7b\", \\\n",
210 |         "  \"prompt\":\"What is the capital of Portugal?\" \\\n",
211 |         "}'"
212 |       ]
213 |     },
214 |     {
215 |       "cell_type": "markdown",
216 |       "metadata": {
217 |         "id": "FlLNziE93xdP"
218 |       },
219 |       "source": [
220 |         "Chat with Gemma via REST endpoint"
221 |       ]
222 |     },
223 |     {
224 |       "cell_type": "code",
225 |       "execution_count": null,
226 |       "metadata": {
227 |         "id": "AW-ex-Fgs_La"
228 |       },
229 |       "outputs": [
230 |         {
231 |           "name": "stdout",
232 |           "output_type": "stream",
233 |           "text": [
234 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.80317128Z\",\"message\":{\"role\":\"assistant\",\"content\":\"The\"},\"done\":false}\n",
235 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.832244294Z\",\"message\":{\"role\":\"assistant\",\"content\":\" capital\"},\"done\":false}\n",
236 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.864473026Z\",\"message\":{\"role\":\"assistant\",\"content\":\" of\"},\"done\":false}\n",
237 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.894548916Z\",\"message\":{\"role\":\"assistant\",\"content\":\" Spain\"},\"done\":false}\n",
238 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.924834821Z\",\"message\":{\"role\":\"assistant\",\"content\":\" is\"},\"done\":false}\n",
239 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.954322472Z\",\"message\":{\"role\":\"assistant\",\"content\":\" **\"},\"done\":false}\n",
240 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:56.984517118Z\",\"message\":{\"role\":\"assistant\",\"content\":\"Madrid\"},\"done\":false}\n",
241 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.014076809Z\",\"message\":{\"role\":\"assistant\",\"content\":\"**.\"},\"done\":false}\n",
242 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.044390316Z\",\"message\":{\"role\":\"assistant\",\"content\":\"\\n\\n\"},\"done\":false}\n",
243 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.073914656Z\",\"message\":{\"role\":\"assistant\",\"content\":\"Madrid\"},\"done\":false}\n",
244 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.104846737Z\",\"message\":{\"role\":\"assistant\",\"content\":\" is\"},\"done\":false}\n",
245 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.134785148Z\",\"message\":{\"role\":\"assistant\",\"content\":\" a\"},\"done\":false}\n",
246 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.166072957Z\",\"message\":{\"role\":\"assistant\",\"content\":\" city\"},\"done\":false}\n",
247 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.195707466Z\",\"message\":{\"role\":\"assistant\",\"content\":\" in\"},\"done\":false}\n",
248 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.224466411Z\",\"message\":{\"role\":\"assistant\",\"content\":\" central\"},\"done\":false}\n",
249 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.252835434Z\",\"message\":{\"role\":\"assistant\",\"content\":\" Spain\"},\"done\":false}\n",
250 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.282191186Z\",\"message\":{\"role\":\"assistant\",\"content\":\" and\"},\"done\":false}\n",
251 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.31179531Z\",\"message\":{\"role\":\"assistant\",\"content\":\" has\"},\"done\":false}\n",
252 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.341159754Z\",\"message\":{\"role\":\"assistant\",\"content\":\" been\"},\"done\":false}\n",
253 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.370877089Z\",\"message\":{\"role\":\"assistant\",\"content\":\" the\"},\"done\":false}\n",
254 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.400127634Z\",\"message\":{\"role\":\"assistant\",\"content\":\" capital\"},\"done\":false}\n",
255 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.430518407Z\",\"message\":{\"role\":\"assistant\",\"content\":\" since\"},\"done\":false}\n",
256 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.459726063Z\",\"message\":{\"role\":\"assistant\",\"content\":\" the\"},\"done\":false}\n",
257 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.489232296Z\",\"message\":{\"role\":\"assistant\",\"content\":\" Middle\"},\"done\":false}\n",
258 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.521220399Z\",\"message\":{\"role\":\"assistant\",\"content\":\" Ages\"},\"done\":false}\n",
259 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.550727167Z\",\"message\":{\"role\":\"assistant\",\"content\":\".\"},\"done\":false}\n",
260 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.579056428Z\",\"message\":{\"role\":\"assistant\",\"content\":\" It\"},\"done\":false}\n",
261 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.611030309Z\",\"message\":{\"role\":\"assistant\",\"content\":\" is\"},\"done\":false}\n",
262 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.648551096Z\",\"message\":{\"role\":\"assistant\",\"content\":\" known\"},\"done\":false}\n",
263 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.686423573Z\",\"message\":{\"role\":\"assistant\",\"content\":\" for\"},\"done\":false}\n",
264 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.71824871Z\",\"message\":{\"role\":\"assistant\",\"content\":\" its\"},\"done\":false}\n",
265 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.74884782Z\",\"message\":{\"role\":\"assistant\",\"content\":\" rich\"},\"done\":false}\n",
266 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.778573494Z\",\"message\":{\"role\":\"assistant\",\"content\":\" history\"},\"done\":false}\n",
267 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.808624406Z\",\"message\":{\"role\":\"assistant\",\"content\":\",\"},\"done\":false}\n",
268 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.838451581Z\",\"message\":{\"role\":\"assistant\",\"content\":\" culture\"},\"done\":false}\n",
269 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.867754055Z\",\"message\":{\"role\":\"assistant\",\"content\":\",\"},\"done\":false}\n",
270 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.896675551Z\",\"message\":{\"role\":\"assistant\",\"content\":\" and\"},\"done\":false}\n",
271 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.925461064Z\",\"message\":{\"role\":\"assistant\",\"content\":\" vibrant\"},\"done\":false}\n",
272 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.956364599Z\",\"message\":{\"role\":\"assistant\",\"content\":\" nightlife\"},\"done\":false}\n",
273 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:57.985969319Z\",\"message\":{\"role\":\"assistant\",\"content\":\".\"},\"done\":false}\n",
274 |             "{\"model\":\"gemma:7b\",\"created_at\":\"2024-07-08T10:53:58.015675807Z\",\"message\":{\"role\":\"assistant\",\"content\":\"\"},\"done_reason\":\"stop\",\"done\":true,\"total_duration\":1298987591,\"load_duration\":1110482,\"prompt_eval_count\":23,\"prompt_eval_duration\":43108000,\"eval_count\":41,\"eval_duration\":1212429000}\n"
275 |           ]
276 |         }
277 |       ],
278 |       "source": [
279 |         "!curl http://localhost:11434/api/chat -d '{ \\\n",
280 |         "  \"model\": \"gemma:7b\", \\\n",
281 |         "  \"messages\": [ \\\n",
282 |         "    { \"role\": \"user\", \"content\": \"what is the capital of Spain?\" } \\\n",
283 |         "  ] \\\n",
284 |         "}'"
285 |       ]
286 |     }
287 |   ],
288 |   "metadata": {
289 |     "accelerator": "GPU",
290 |     "colab": {
291 |       "name": "Run_with_Ollama.ipynb",
292 |       "toc_visible": true
293 |     },
294 |     "kernelspec": {
295 |       "display_name": "Python 3",
296 |       "name": "python3"
297 |     }
298 |   },
299 |   "nbformat": 4,
300 |   "nbformat_minor": 0
301 | }
302 | 


--------------------------------------------------------------------------------
/Gemma/Gemma2_on_Groq.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "nH85BOCo7YYk"
  7 |       },
  8 |       "source": [
  9 |         "##### Copyright 2024 Google LLC."
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": 1,
 15 |       "metadata": {
 16 |         "cellView": "form",
 17 |         "id": "9tQNAByc7U9g"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 22 |         "# you may not use this file except in compliance with the License.\n",
 23 |         "# You may obtain a copy of the License at\n",
 24 |         "#\n",
 25 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 26 |         "#\n",
 27 |         "# Unless required by applicable law or agreed to in writing, software\n",
 28 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 29 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 30 |         "# See the License for the specific language governing permissions and\n",
 31 |         "# limitations under the License."
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {
 37 |         "id": "F7r2q0wS7bxf"
 38 |       },
 39 |       "source": [
 40 |         "# Gemma 2 running on Groq\n",
 41 |         "\n",
 42 |         "This cookbook shows how to call the Gemma 2 9B IT model hosted on [Groq Cloud](https://console.groq.com/). Groq is an AI accelerater company that provides [free Gemma 2 9B access](https://console.groq.com/docs/models) with blazing fast inference speed.\n",
 43 |         "\n",
 44 |         "<table align=\"left\">\n",
 45 |         "  <td>\n",
 46 |         "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/google-gemini/gemma-cookbook/blob/main/Gemma/Gemma2_on_Groq.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 47 |         "  </td>\n",
 48 |         "</table>"
 49 |       ]
 50 |     },
 51 |     {
 52 |       "cell_type": "markdown",
 53 |       "metadata": {
 54 |         "id": "ZHrL4tqs7mYK"
 55 |       },
 56 |       "source": [
 57 |         "## Setup\n",
 58 |         "\n",
 59 |         "### Select the Colab runtime\n",
 60 |         "For this tutorial, you do not need a hardware accelerator since the model is hosted on Groq Cloud. A CPU instance will suffice.\n",
 61 |         "\n",
 62 |         "### Genearate a Groq API key\n",
 63 |         "Visit the [Groq Cloud console](https://console.groq.com/keys) and generate a key. Store the key as a Colab secret named `GROQ_API_KEY`.\n"
 64 |       ]
 65 |     },
 66 |     {
 67 |       "cell_type": "markdown",
 68 |       "metadata": {
 69 |         "id": "bF1csLohRrH6"
 70 |       },
 71 |       "source": [
 72 |         "# Calling Gemma 2 on Groq\n",
 73 |         "\n",
 74 |         "Performing a chat completion with Gemma 2 on Groq is very easy."
 75 |       ]
 76 |     },
 77 |     {
 78 |       "cell_type": "code",
 79 |       "execution_count": 2,
 80 |       "metadata": {
 81 |         "id": "x3xUWUrFRpjK"
 82 |       },
 83 |       "outputs": [
 84 |         {
 85 |           "name": "stdout",
 86 |           "output_type": "stream",
 87 |           "text": [
 88 |             "Collecting groq\n",
 89 |             "  Downloading groq-0.9.0-py3-none-any.whl (103 kB)\n",
 90 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.5/103.5 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 91 |             "\u001b[?25hRequirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from groq) (3.7.1)\n",
 92 |             "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from groq) (1.7.0)\n",
 93 |             "Collecting httpx<1,>=0.23.0 (from groq)\n",
 94 |             "  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n",
 95 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 96 |             "\u001b[?25hRequirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from groq) (2.8.0)\n",
 97 |             "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from groq) (1.3.1)\n",
 98 |             "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from groq) (4.12.2)\n",
 99 |             "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->groq) (3.7)\n",
100 |             "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->groq) (1.2.1)\n",
101 |             "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->groq) (2024.6.2)\n",
102 |             "Collecting httpcore==1.* (from httpx<1,>=0.23.0->groq)\n",
103 |             "  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n",
104 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
105 |             "\u001b[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->groq)\n",
106 |             "  Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
107 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
108 |             "\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->groq) (0.7.0)\n",
109 |             "Requirement already satisfied: pydantic-core==2.20.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->groq) (2.20.0)\n",
110 |             "Installing collected packages: h11, httpcore, httpx, groq\n",
111 |             "Successfully installed groq-0.9.0 h11-0.14.0 httpcore-1.0.5 httpx-0.27.0\n",
112 |             "A Large Language Model (LLM) is a type of artificial intelligence that excels at understanding and generating human-like text. \n",
113 |             "\n",
114 |             "Here's a breakdown:\n",
115 |             "\n",
116 |             "**What makes them \"Large\"?**\n",
117 |             "\n",
118 |             "* **Massive Datasets:** LLMs are trained on enormous amounts of text data, often encompassing books, articles, websites, and code. This vast exposure to language allows them to learn complex patterns and relationships within it.\n",
119 |             "* **Numerous Parameters:** They have billions, even trillions, of parameters—think of these as adjustable knobs that control the model's behavior. The more parameters, the greater the model's capacity to learn and generate nuanced text.\n",
120 |             "\n",
121 |             "**What can LLMs do?**\n",
122 |             "\n",
123 |             "* **Text Generation:**\n",
124 |             "\n",
125 |             "Write stories, poems, articles, summaries, and even code.\n",
126 |             "* **Language Translation:**\n",
127 |             "\n",
128 |             "Convert text from one language to another with remarkable accuracy.\n",
129 |             "* **Question Answering:**\n",
130 |             "\n",
131 |             "Provide informative answers to a wide range of questions based on the knowledge gained during training.\n",
132 |             "* **Dialogue Systems:**\n",
133 |             "\n",
134 |             "Engage in natural-sounding conversations and provide human-like responses.\n",
135 |             "* **Text Summarization:**\n",
136 |             "\n",
137 |             "Condense large amounts of text into concise summaries.\n",
138 |             "\n",
139 |             "**Examples of LLMs:**\n",
140 |             "\n",
141 |             "* **GPT-3 (Generative Pre-trained Transformer 3)** by OpenAI\n",
142 |             "* **LaMDA (Language Model for Dialogue Applications)** by Google\n",
143 |             "* **BERT (Bidirectional Encoder Representations from Transformers)** by Google\n",
144 |             "* **BLOOM (BigScience Large Open-science Open-access Multilingual Language Model)** by BigScience\n",
145 |             "\n",
146 |             "**Important Considerations:**\n",
147 |             "\n",
148 |             "* **Bias:** LLMs can inherit biases present in the training data, leading to potentially unfair or prejudiced outputs.\n",
149 |             "* **Factual Errors:** While impressive, LLMs can sometimes generate incorrect information. It's crucial to fact-check their outputs.\n",
150 |             "* **Ethical Implications:** The power of LLMs raises ethical concerns regarding misuse, such as generating fake news or impersonating individuals.\n",
151 |             "\n",
152 |             "\n",
153 |             "Let me know if you have any other questions about LLMs!\n",
154 |             "\n"
155 |           ]
156 |         }
157 |       ],
158 |       "source": [
159 |         "!pip install groq\n",
160 |         "\n",
161 |         "from groq import Groq\n",
162 |         "from google.colab import userdata\n",
163 |         "\n",
164 |         "client = Groq(\n",
165 |         "    api_key=userdata.get(\"GROQ_API_KEY\"),\n",
166 |         ")\n",
167 |         "\n",
168 |         "chat_completion = client.chat.completions.create(\n",
169 |         "    messages=[\n",
170 |         "        {\n",
171 |         "            \"role\": \"user\",\n",
172 |         "            \"content\": \"What is Large Language Model?\",\n",
173 |         "        }\n",
174 |         "    ],\n",
175 |         "    model=\"gemma2-9b-it\",\n",
176 |         ")\n",
177 |         "\n",
178 |         "print(chat_completion.choices[0].message.content)"
179 |       ]
180 |     },
181 |     {
182 |       "cell_type": "markdown",
183 |       "metadata": {
184 |         "id": "rvpqC645TXLN"
185 |       },
186 |       "source": [
187 |         "Groq also offers OpenAI API compatibility."
188 |       ]
189 |     },
190 |     {
191 |       "cell_type": "code",
192 |       "execution_count": 3,
193 |       "metadata": {
194 |         "id": "puX0h_DmSZfO"
195 |       },
196 |       "outputs": [
197 |         {
198 |           "name": "stdout",
199 |           "output_type": "stream",
200 |           "text": [
201 |             "Collecting OpenAI\n",
202 |             "  Downloading openai-1.35.10-py3-none-any.whl (328 kB)\n",
203 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m328.3/328.3 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
204 |             "\u001b[?25hRequirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from OpenAI) (3.7.1)\n",
205 |             "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from OpenAI) (1.7.0)\n",
206 |             "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from OpenAI) (0.27.0)\n",
207 |             "Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from OpenAI) (2.8.0)\n",
208 |             "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from OpenAI) (1.3.1)\n",
209 |             "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from OpenAI) (4.66.4)\n",
210 |             "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from OpenAI) (4.12.2)\n",
211 |             "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->OpenAI) (3.7)\n",
212 |             "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->OpenAI) (1.2.1)\n",
213 |             "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->OpenAI) (2024.6.2)\n",
214 |             "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->OpenAI) (1.0.5)\n",
215 |             "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->OpenAI) (0.14.0)\n",
216 |             "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->OpenAI) (0.7.0)\n",
217 |             "Requirement already satisfied: pydantic-core==2.20.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->OpenAI) (2.20.0)\n",
218 |             "Installing collected packages: OpenAI\n",
219 |             "Successfully installed OpenAI-1.35.10\n",
220 |             "Large language models (LLMs) are incredibly versatile tools with a wide range of applications. Here are some examples of what you can do with an LLM like me:\n",
221 |             "\n",
222 |             "**Communication and Language:**\n",
223 |             "\n",
224 |             "* **Text generation:** Write stories, poems, articles, emails, letters, and more.\n",
225 |             "\n",
226 |             "* **Translation:** Translate text from one language to another.\n",
227 |             "* **Summarization:** Condense large amounts of text into shorter summaries.\n",
228 |             "* **Conversation:** Engage in natural-sounding conversations with humans.\n",
229 |             "* **Dialogue creation:** Write scripts for movies, plays, or video games.\n",
230 |             "\n",
231 |             "**Creativity and Entertainment:**\n",
232 |             "\n",
233 |             "* **Storytelling:** Generate creative narratives and ideas for stories.\n",
234 |             "* **Poetry writing:** Compose poems in various styles and forms.\n",
235 |             "* **Songwriting:** Assist in writing lyrics or composing melodies.\n",
236 |             "* **Scriptwriting:** Create dialogue and plot for videos, games, or plays.\n",
237 |             "\n",
238 |             "**Education and Research:**\n",
239 |             "\n",
240 |             "* **Question answering:** Provide answers to questions based on given information.\n",
241 |             "* **Tutoring:** Offer explanations and assistance with learning concepts.\n",
242 |             "* **Research assistance:** Summarize research papers, identify relevant information, and generate hypotheses.\n",
243 |             "* **Code generation:** Write code in multiple programming languages.\n",
244 |             "\n",
245 |             "**Productivity and Business:**\n",
246 |             "\n",
247 |             "* **Email drafting:** Assist in composing professional emails.\n",
248 |             "* **Meeting summarization:** Create concise summaries of meeting discussions.\n",
249 |             "* **Customer service:** Provide automated responses to common customer inquiries.\n",
250 |             "* **Content creation:** Generate marketing materials, social media posts, and other content.\n",
251 |             "\n",
252 |             "**Other Applications:**\n",
253 |             "\n",
254 |             "* **Personalization:** Tailor experiences and recommendations based on user preferences.\n",
255 |             "* **Accessibility:** Provide text-to-speech and speech-to-text capabilities.\n",
256 |             "* **Data analysis:** Identify patterns and insights in large datasets.\n",
257 |             "\n",
258 |             "It's important to remember that LLMs are still under development and have limitations. They may sometimes generate incorrect or biased information, so it's crucial to critically evaluate their outputs.\n",
259 |             "\n",
260 |             "Overall, LLMs offer a powerful and versatile platform for a wide range of applications, transforming the way we interact with information and technology.\n",
261 |             "\n",
262 |             "\n",
263 |             "\n",
264 |             "\n"
265 |           ]
266 |         }
267 |       ],
268 |       "source": [
269 |         "!pip install OpenAI\n",
270 |         "from openai import OpenAI\n",
271 |         "\n",
272 |         "groq = OpenAI(\n",
273 |         "    api_key=userdata.get(\"GROQ_API_KEY\"), base_url=\"https://api.groq.com/openai/v1\"\n",
274 |         ")\n",
275 |         "\n",
276 |         "response = groq.chat.completions.create(\n",
277 |         "    model=\"gemma2-9b-it\",\n",
278 |         "    messages=[\n",
279 |         "        {\n",
280 |         "            \"role\": \"user\",\n",
281 |         "            \"content\": \"What can you do with a Large Language Model?\",\n",
282 |         "        }\n",
283 |         "    ],\n",
284 |         ")\n",
285 |         "print(response.choices[0].message.content)"
286 |       ]
287 |     }
288 |   ],
289 |   "metadata": {
290 |     "colab": {
291 |       "name": "Gemma2_on_Groq.ipynb",
292 |       "toc_visible": true
293 |     },
294 |     "kernelspec": {
295 |       "display_name": "Python 3",
296 |       "name": "python3"
297 |     }
298 |   },
299 |   "nbformat": 4,
300 |   "nbformat_minor": 0
301 | }
302 | 


--------------------------------------------------------------------------------
/Gemma/Integrate_with_Mesop.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "Tce3stUlHN0L"
  7 |       },
  8 |       "source": [
  9 |         "##### Copyright 2024 Google LLC."
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "cellView": "form",
 17 |         "id": "tuOe1ymfHZPu"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 22 |         "# you may not use this file except in compliance with the License.\n",
 23 |         "# You may obtain a copy of the License at\n",
 24 |         "#\n",
 25 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 26 |         "#\n",
 27 |         "# Unless required by applicable law or agreed to in writing, software\n",
 28 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 29 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 30 |         "# See the License for the specific language governing permissions and\n",
 31 |         "# limitations under the License."
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {
 37 |         "id": "PXNm5_p_oxMF"
 38 |       },
 39 |       "source": [
 40 |         "# Gemma - Run with Mesop\n",
 41 |         "\n",
 42 |         "This notebook demonstrates how you can run a Gemma model with [Google Mesop](https://github.com/google/mesop). Mesop is a Python-based UI framework that allows you to rapidly build web apps like demos and internal apps.\n",
 43 |         "\n",
 44 |         "<table align=\"left\">\n",
 45 |         "  <td>\n",
 46 |         "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/google-gemini/gemma-cookbook/blob/main/Gemma/Integrate_with_Mesop.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 47 |         "  </td>\n",
 48 |         "</table>"
 49 |       ]
 50 |     },
 51 |     {
 52 |       "cell_type": "markdown",
 53 |       "metadata": {
 54 |         "id": "mERVCCsGUPIJ"
 55 |       },
 56 |       "source": [
 57 |         "## Setup\n",
 58 |         "\n",
 59 |         "### Select the Colab runtime\n",
 60 |         "To complete this tutorial, you'll need to have a Colab runtime with sufficient resources to run the Gemma model. In this case, you can use a T4 GPU:\n",
 61 |         "\n",
 62 |         "1. In the upper-right of the Colab window, select **▾ (Additional connection options)**.\n",
 63 |         "2. Select **Change runtime type**.\n",
 64 |         "3. Under **Hardware accelerator**, select **L4** or **A100 GPU**."
 65 |       ]
 66 |     },
 67 |     {
 68 |       "cell_type": "markdown",
 69 |       "metadata": {
 70 |         "id": "QQ6W7NzRe1VM"
 71 |       },
 72 |       "source": [
 73 |         "### Gemma setup\n",
 74 |         "\n",
 75 |         "To complete this tutorial, you'll first need to complete the setup instructions at [Gemma setup](https://ai.google.dev/gemma/docs/setup). The Gemma setup instructions show you how to do the following:\n",
 76 |         "\n",
 77 |         "* Get access to Gemma on kaggle.com.\n",
 78 |         "* Select a Colab runtime with sufficient resources to run\n",
 79 |         "  the Gemma 2B model.\n",
 80 |         "* Generate and configure a Kaggle username and API key.\n",
 81 |         "\n",
 82 |         "After you've completed the Gemma setup, move on to the next section, where you'll set environment variables for your Colab environment."
 83 |       ]
 84 |     },
 85 |     {
 86 |       "cell_type": "markdown",
 87 |       "metadata": {
 88 |         "id": "_gN-IVRC3dQe"
 89 |       },
 90 |       "source": [
 91 |         "### Set environment variables\n",
 92 |         "\n",
 93 |         "Set environment variables for `KAGGLE_USERNAME` and `KAGGLE_KEY`."
 94 |       ]
 95 |     },
 96 |     {
 97 |       "cell_type": "code",
 98 |       "execution_count": null,
 99 |       "metadata": {
100 |         "id": "DrBoa_Urw9Vx"
101 |       },
102 |       "outputs": [],
103 |       "source": [
104 |         "import os\n",
105 |         "from google.colab import userdata\n",
106 |         "\n",
107 |         "# Note: `userdata.get` is a Colab API. If you're not using Colab, set the env\n",
108 |         "# vars as appropriate for your system.\n",
109 |         "os.environ[\"KAGGLE_USERNAME\"] = userdata.get(\"KAGGLE_USERNAME\")\n",
110 |         "os.environ[\"KAGGLE_KEY\"] = userdata.get(\"KAGGLE_KEY\")"
111 |       ]
112 |     },
113 |     {
114 |       "cell_type": "markdown",
115 |       "metadata": {
116 |         "id": "z9oy3QUmXtSd"
117 |       },
118 |       "source": [
119 |         "### Install dependencies\n",
120 |         "\n",
121 |         "You will run KerasNLP to run Gemma. So install Keras and KerasNLP."
122 |       ]
123 |     },
124 |     {
125 |       "cell_type": "code",
126 |       "execution_count": null,
127 |       "metadata": {
128 |         "id": "UcGLzDeQ8NwN"
129 |       },
130 |       "outputs": [],
131 |       "source": [
132 |         "# Install Keras 3 last. See https://keras.io/getting_started/ for more details.\n",
133 |         "!pip install -q -U keras-nlp\n",
134 |         "!pip install -q -U keras"
135 |       ]
136 |     },
137 |     {
138 |       "cell_type": "markdown",
139 |       "metadata": {
140 |         "id": "Pm5cVOFt5YvZ"
141 |       },
142 |       "source": [
143 |         "### Select a backend\n",
144 |         "\n",
145 |         "You will use the JAX backend for this tutorial."
146 |       ]
147 |     },
148 |     {
149 |       "cell_type": "code",
150 |       "execution_count": null,
151 |       "metadata": {
152 |         "id": "7rS7ryTs5wjf"
153 |       },
154 |       "outputs": [],
155 |       "source": [
156 |         "import os\n",
157 |         "\n",
158 |         "os.environ[\"KERAS_BACKEND\"] = \"jax\"  # Or \"tensorflow\" or \"torch\".\n",
159 |         "os.environ[\"XLA_PYTHON_CLIENT_MEM_FRACTION\"] = \"1.0\""
160 |       ]
161 |     },
162 |     {
163 |       "cell_type": "markdown",
164 |       "metadata": {
165 |         "id": "599765c72722"
166 |       },
167 |       "source": [
168 |         "### Import packages\n",
169 |         "\n",
170 |         "Import Keras and KerasNLP."
171 |       ]
172 |     },
173 |     {
174 |       "cell_type": "code",
175 |       "execution_count": null,
176 |       "metadata": {
177 |         "id": "f2fa267d75bc"
178 |       },
179 |       "outputs": [],
180 |       "source": [
181 |         "import keras\n",
182 |         "import keras_nlp"
183 |       ]
184 |     },
185 |     {
186 |       "cell_type": "markdown",
187 |       "metadata": {
188 |         "id": "xfUlIT24giK8"
189 |       },
190 |       "source": [
191 |         "Enable mixed precision on GPU."
192 |       ]
193 |     },
194 |     {
195 |       "cell_type": "code",
196 |       "execution_count": null,
197 |       "metadata": {
198 |         "id": "s79GrIXQf2HS"
199 |       },
200 |       "outputs": [],
201 |       "source": [
202 |         "# Run at half precision.\n",
203 |         "keras.config.set_floatx(\"bfloat16\")\n",
204 |         "keras.mixed_precision.set_global_policy(\"mixed_bfloat16\")"
205 |       ]
206 |     },
207 |     {
208 |       "cell_type": "markdown",
209 |       "metadata": {
210 |         "id": "ZsxDCbLN555T"
211 |       },
212 |       "source": [
213 |         "## Create a model\n",
214 |         "\n",
215 |         "Create the Gemma model using the `from_preset` method."
216 |       ]
217 |     },
218 |     {
219 |       "cell_type": "code",
220 |       "execution_count": null,
221 |       "metadata": {
222 |         "id": "yygIK9DEIldp"
223 |       },
224 |       "outputs": [],
225 |       "source": [
226 |         "gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset(\"gemma2_instruct_2b_en\")"
227 |       ]
228 |     },
229 |     {
230 |       "cell_type": "markdown",
231 |       "metadata": {
232 |         "id": "y5EMEQJgnfus"
233 |       },
234 |       "source": [
235 |         "## Install and start Mesop"
236 |       ]
237 |     },
238 |     {
239 |       "cell_type": "code",
240 |       "execution_count": null,
241 |       "metadata": {
242 |         "id": "KK-WfjB11DqO"
243 |       },
244 |       "outputs": [
245 |         {
246 |           "name": "stdout",
247 |           "output_type": "stream",
248 |           "text": [
249 |             "Collecting mesop\n",
250 |             "  Downloading mesop-0.9.5-py3-none-any.whl (5.1 MB)\n",
251 |             "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/5.1 MB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K     \u001b[91m━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.5/5.1 MB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K     \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━\u001b[0m \u001b[32m3.3/5.1 MB\u001b[0m \u001b[31m51.3 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K     \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━\u001b[0m \u001b[32m3.9/5.1 MB\u001b[0m \u001b[31m37.3 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K     \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m \u001b[32m5.1/5.1 MB\u001b[0m \u001b[31m41.9 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.1/5.1 MB\u001b[0m \u001b[31m33.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
252 |             "\u001b[?25hRequirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from mesop) (1.4.0)\n",
253 |             "Collecting deepdiff==6.* (from mesop)\n",
254 |             "  Downloading deepdiff-6.7.1-py3-none-any.whl (76 kB)\n",
255 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.6/76.6 kB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
256 |             "\u001b[?25hRequirement already satisfied: flask in /usr/local/lib/python3.10/dist-packages (from mesop) (2.2.5)\n",
257 |             "Requirement already satisfied: markdown in /usr/local/lib/python3.10/dist-packages (from mesop) (3.6)\n",
258 |             "Requirement already satisfied: msgpack in /usr/local/lib/python3.10/dist-packages (from mesop) (1.0.8)\n",
259 |             "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from mesop) (3.20.3)\n",
260 |             "Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from mesop) (2.8.2)\n",
261 |             "Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from mesop) (2.16.1)\n",
262 |             "Collecting python-dotenv (from mesop)\n",
263 |             "  Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n",
264 |             "Collecting watchdog (from mesop)\n",
265 |             "  Downloading watchdog-4.0.1-py3-none-manylinux2014_x86_64.whl (83 kB)\n",
266 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.0/83.0 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
267 |             "\u001b[?25hCollecting ordered-set<4.2.0,>=4.0.2 (from deepdiff==6.*->mesop)\n",
268 |             "  Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)\n",
269 |             "Requirement already satisfied: Werkzeug>=2.2.2 in /usr/local/lib/python3.10/dist-packages (from flask->mesop) (3.0.3)\n",
270 |             "Requirement already satisfied: Jinja2>=3.0 in /usr/local/lib/python3.10/dist-packages (from flask->mesop) (3.1.4)\n",
271 |             "Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from flask->mesop) (2.2.0)\n",
272 |             "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.10/dist-packages (from flask->mesop) (8.1.7)\n",
273 |             "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic->mesop) (0.7.0)\n",
274 |             "Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic->mesop) (2.20.1)\n",
275 |             "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic->mesop) (4.12.2)\n",
276 |             "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from Jinja2>=3.0->flask->mesop) (2.1.5)\n",
277 |             "Installing collected packages: watchdog, python-dotenv, ordered-set, deepdiff, mesop\n",
278 |             "Successfully installed deepdiff-6.7.1 mesop-0.9.5 ordered-set-4.1.0 python-dotenv-1.0.1 watchdog-4.0.1\n",
279 |             "\n",
280 |             "\u001b[32mRunning server on: http://localhost:32123\u001b[0m\n",
281 |             " * Serving Flask app 'mesop.server.server'\n",
282 |             " * Debug mode: off\n"
283 |           ]
284 |         }
285 |       ],
286 |       "source": [
287 |         "!pip install mesop\n",
288 |         "import mesop as me\n",
289 |         "import mesop.labs as mel\n",
290 |         "\n",
291 |         "me.colab_run()"
292 |       ]
293 |     },
294 |     {
295 |       "cell_type": "markdown",
296 |       "metadata": {
297 |         "id": "0C2CnGxlnlsK"
298 |       },
299 |       "source": [
300 |         "Load the Mesop UI."
301 |       ]
302 |     },
303 |     {
304 |       "cell_type": "code",
305 |       "execution_count": null,
306 |       "metadata": {
307 |         "id": "D1hVvC5b1KI-"
308 |       },
309 |       "outputs": [
310 |         {
311 |           "data": {
312 |             "application/javascript": [
313 |               "(async (port, path, width, height, cache, element) => {\n",
314 |               "    if (!google.colab.kernel.accessAllowed && !cache) {\n",
315 |               "      return;\n",
316 |               "    }\n",
317 |               "    element.appendChild(document.createTextNode(''));\n",
318 |               "    const url = await google.colab.kernel.proxyPort(port, {cache});\n",
319 |               "    const iframe = document.createElement('iframe');\n",
320 |               "    iframe.src = new URL(path, url).toString();\n",
321 |               "    iframe.height = height;\n",
322 |               "    iframe.width = width;\n",
323 |               "    iframe.style.border = 0;\n",
324 |               "    iframe.allow = [\n",
325 |               "        'accelerometer',\n",
326 |               "        'autoplay',\n",
327 |               "        'camera',\n",
328 |               "        'clipboard-read',\n",
329 |               "        'clipboard-write',\n",
330 |               "        'gyroscope',\n",
331 |               "        'magnetometer',\n",
332 |               "        'microphone',\n",
333 |               "        'serial',\n",
334 |               "        'usb',\n",
335 |               "        'xr-spatial-tracking',\n",
336 |               "    ].join('; ');\n",
337 |               "    element.appendChild(iframe);\n",
338 |               "  })(32123, \"/chat\", \"100%\", \"400\", false, window.element)"
339 |             ],
340 |             "text/plain": [
341 |               "<IPython.core.display.Javascript object>"
342 |             ]
343 |           },
344 |           "metadata": {},
345 |           "output_type": "display_data"
346 |         },
347 |         {
348 |           "name": "stderr",
349 |           "output_type": "stream",
350 |           "text": [
351 |             "INFO:werkzeug:\u001b[31m\u001b[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.\u001b[0m\n",
352 |             " * Running on all addresses (::)\n",
353 |             " * Running on http://[::1]:32123\n",
354 |             " * Running on http://[::1]:32123\n",
355 |             "INFO:werkzeug:\u001b[33mPress CTRL+C to quit\u001b[0m\n"
356 |           ]
357 |         }
358 |       ],
359 |       "source": [
360 |         "@me.page(path=\"/chat\")\n",
361 |         "def chat():\n",
362 |         "    mel.chat(transform)\n",
363 |         "\n",
364 |         "\n",
365 |         "def transform(user_prompt: str, history: list[mel.ChatMessage]) -> str:\n",
366 |         "\n",
367 |         "    # Assemble prompt from chat history\n",
368 |         "    prompt = \"\"\n",
369 |         "    for h in history:\n",
370 |         "        prompt += \"<start_of_turn>{role}\\n{content}<end_of_turn>\\n\".format(\n",
371 |         "            role=h.role, content=h.content\n",
372 |         "        )\n",
373 |         "    prompt += \"<start_of_turn>model\\n\"\n",
374 |         "\n",
375 |         "    result = gemma_lm.generate(prompt)\n",
376 |         "    return result[len(prompt) :]\n",
377 |         "\n",
378 |         "\n",
379 |         "me.colab_show(path=\"/chat\")"
380 |       ]
381 |     },
382 |     {
383 |       "cell_type": "markdown",
384 |       "metadata": {
385 |         "id": "J7enAHFb1C4i"
386 |       },
387 |       "source": [
388 |         "Now you can chat with the Gemma model in the Mesop UI. You can restart the conversation by running the cell above again."
389 |       ]
390 |     }
391 |   ],
392 |   "metadata": {
393 |     "accelerator": "GPU",
394 |     "colab": {
395 |       "name": "Integrate_with_Mesop.ipynb",
396 |       "toc_visible": true
397 |     },
398 |     "google": {
399 |       "image_path": "/site-assets/images/marketing/gemma.png",
400 |       "keywords": [
401 |         "examples",
402 |         "gemma",
403 |         "python",
404 |         "quickstart",
405 |         "text"
406 |       ]
407 |     },
408 |     "kernelspec": {
409 |       "display_name": "Python 3",
410 |       "name": "python3"
411 |     }
412 |   },
413 |   "nbformat": 4,
414 |   "nbformat_minor": 0
415 | }
416 | 


--------------------------------------------------------------------------------
/Gemma/Keras_Gemma_2_Quickstart.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "Tce3stUlHN0L"
  7 |       },
  8 |       "source": [
  9 |         "##### Copyright 2024 Google LLC."
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "cellView": "form",
 17 |         "id": "tuOe1ymfHZPu"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 22 |         "# you may not use this file except in compliance with the License.\n",
 23 |         "# You may obtain a copy of the License at\n",
 24 |         "#\n",
 25 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 26 |         "#\n",
 27 |         "# Unless required by applicable law or agreed to in writing, software\n",
 28 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 29 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 30 |         "# See the License for the specific language governing permissions and\n",
 31 |         "# limitations under the License."
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {
 37 |         "id": "PXNm5_p_oxMF"
 38 |       },
 39 |       "source": [
 40 |         "This is a quick demo of Gemma running on KerasNLP.\n",
 41 |         "\n",
 42 |         "Note that you will need a large GPU (e.g. A100) to run this as well.\n",
 43 |         "\n",
 44 |         "General Keras reading:\n",
 45 |         "- [Getting started with Keras](https://keras.io/getting_started/)\n",
 46 |         "- [Getting started with KerasNLP](https://keras.io/guides/keras_nlp/getting_started/)\n",
 47 |         "\n",
 48 |         "<table align=\"left\">\n",
 49 |         "  <td>\n",
 50 |         "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/google-gemini/gemma-cookbook/blob/main/Gemma/Keras_Gemma_2_Quickstart.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 51 |         "  </td>\n",
 52 |         "</table>"
 53 |       ]
 54 |     },
 55 |     {
 56 |       "cell_type": "markdown",
 57 |       "metadata": {
 58 |         "id": "VzFhJDayXroT"
 59 |       },
 60 |       "source": [
 61 |         "## Access"
 62 |       ]
 63 |     },
 64 |     {
 65 |       "cell_type": "code",
 66 |       "execution_count": 1,
 67 |       "metadata": {
 68 |         "id": "DrBoa_Urw9Vx"
 69 |       },
 70 |       "outputs": [],
 71 |       "source": [
 72 |         "import os\n",
 73 |         "from google.colab import userdata\n",
 74 |         "\n",
 75 |         "os.environ[\"KAGGLE_USERNAME\"] = userdata.get('KAGGLE_USERNAME')\n",
 76 |         "os.environ[\"KAGGLE_KEY\"] = userdata.get('KAGGLE_KEY')\n",
 77 |         "\n",
 78 |         "os.environ[\"KERAS_BACKEND\"] = \"jax\"  # Or \"tensorflow\" or \"torch\"."
 79 |       ]
 80 |     },
 81 |     {
 82 |       "cell_type": "markdown",
 83 |       "metadata": {
 84 |         "id": "z9oy3QUmXtSd"
 85 |       },
 86 |       "source": [
 87 |         "## Installation"
 88 |       ]
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "execution_count": null,
 93 |       "metadata": {
 94 |         "id": "bMboT70Xop8G"
 95 |       },
 96 |       "outputs": [],
 97 |       "source": [
 98 |         "# Install all deps\n",
 99 |         "!pip install -U keras-nlp\n",
100 |         "!pip install -U keras==3.3.3"
101 |       ]
102 |     },
103 |     {
104 |       "cell_type": "markdown",
105 |       "metadata": {
106 |         "id": "FX47AUYrXwLK"
107 |       },
108 |       "source": [
109 |         "## Quickstart"
110 |       ]
111 |     },
112 |     {
113 |       "cell_type": "code",
114 |       "execution_count": 3,
115 |       "metadata": {
116 |         "id": "ww83zI9ToPso"
117 |       },
118 |       "outputs": [
119 |         {
120 |           "name": "stderr",
121 |           "output_type": "stream",
122 |           "text": [
123 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/model.safetensors...\n",
124 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/model.safetensors.index.json...\n",
125 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/metadata.json...\n",
126 |             "100%|██████████| 143/143 [00:00<00:00, 153kB/s]\n",
127 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/task.json...\n",
128 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/config.json...\n",
129 |             "100%|██████████| 780/780 [00:00<00:00, 884kB/s]\n",
130 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/model.safetensors...\n",
131 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/model.safetensors.index.json...\n",
132 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/model.weights.h5...\n",
133 |             "100%|██████████| 17.2G/17.2G [04:22<00:00, 70.5MB/s]\n",
134 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/model.safetensors...\n",
135 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/model.safetensors.index.json...\n",
136 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/preprocessor.json...\n",
137 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/tokenizer.json...\n",
138 |             "100%|██████████| 315/315 [00:00<00:00, 434kB/s]\n",
139 |             "Downloading from https://www.kaggle.com/api/v1/models/keras/gemma2/keras/gemma2_9b_en/1/download/assets/tokenizer/vocabulary.spm...\n",
140 |             "100%|██████████| 4.04M/4.04M [00:00<00:00, 14.6MB/s]\n"
141 |           ]
142 |         },
143 |         {
144 |           "data": {
145 |             "text/html": [
146 |               "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Preprocessor: \"gemma_causal_lm_preprocessor\"</span>\n",
147 |               "</pre>\n"
148 |             ],
149 |             "text/plain": [
150 |               "\u001b[1mPreprocessor: \"gemma_causal_lm_preprocessor\"\u001b[0m\n"
151 |             ]
152 |           },
153 |           "metadata": {},
154 |           "output_type": "display_data"
155 |         },
156 |         {
157 |           "data": {
158 |             "text/html": [
159 |               "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
160 |               "┃<span style=\"font-weight: bold\"> Tokenizer (type)                                   </span>┃<span style=\"font-weight: bold\">                                             Vocab # </span>┃\n",
161 |               "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
162 |               "│ gemma_tokenizer (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GemmaTokenizer</span>)                   │                                             <span style=\"color: #00af00; text-decoration-color: #00af00\">256,000</span> │\n",
163 |               "└────────────────────────────────────────────────────┴─────────────────────────────────────────────────────┘\n",
164 |               "</pre>\n"
165 |             ],
166 |             "text/plain": [
167 |               "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
168 |               "┃\u001b[1m \u001b[0m\u001b[1mTokenizer (type)                                  \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m                                            Vocab #\u001b[0m\u001b[1m \u001b[0m┃\n",
169 |               "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
170 |               "│ gemma_tokenizer (\u001b[38;5;33mGemmaTokenizer\u001b[0m)                   │                                             \u001b[38;5;34m256,000\u001b[0m │\n",
171 |               "└────────────────────────────────────────────────────┴─────────────────────────────────────────────────────┘\n"
172 |             ]
173 |           },
174 |           "metadata": {},
175 |           "output_type": "display_data"
176 |         },
177 |         {
178 |           "data": {
179 |             "text/html": [
180 |               "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"gemma_causal_lm\"</span>\n",
181 |               "</pre>\n"
182 |             ],
183 |             "text/plain": [
184 |               "\u001b[1mModel: \"gemma_causal_lm\"\u001b[0m\n"
185 |             ]
186 |           },
187 |           "metadata": {},
188 |           "output_type": "display_data"
189 |         },
190 |         {
191 |           "data": {
192 |             "text/html": [
193 |               "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
194 |               "┃<span style=\"font-weight: bold\"> Layer (type)                  </span>┃<span style=\"font-weight: bold\"> Output Shape              </span>┃<span style=\"font-weight: bold\">         Param # </span>┃<span style=\"font-weight: bold\"> Connected to               </span>┃\n",
195 |               "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
196 |               "│ padding_mask (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>)              │               <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                          │\n",
197 |               "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
198 |               "│ token_ids (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)        │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>)              │               <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                          │\n",
199 |               "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
200 |               "│ gemma_backbone                │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">3584</span>)        │   <span style=\"color: #00af00; text-decoration-color: #00af00\">9,241,705,984</span> │ padding_mask[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>],        │\n",
201 |               "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GemmaBackbone</span>)               │                           │                 │ token_ids[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]            │\n",
202 |               "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
203 |               "│ token_embedding               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256000</span>)      │     <span style=\"color: #00af00; text-decoration-color: #00af00\">917,504,000</span> │ gemma_backbone[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]       │\n",
204 |               "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">ReversibleEmbedding</span>)         │                           │                 │                            │\n",
205 |               "└───────────────────────────────┴───────────────────────────┴─────────────────┴────────────────────────────┘\n",
206 |               "</pre>\n"
207 |             ],
208 |             "text/plain": [
209 |               "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
210 |               "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape             \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m        Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to              \u001b[0m\u001b[1m \u001b[0m┃\n",
211 |               "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
212 |               "│ padding_mask (\u001b[38;5;33mInputLayer\u001b[0m)     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m)              │               \u001b[38;5;34m0\u001b[0m │ -                          │\n",
213 |               "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
214 |               "│ token_ids (\u001b[38;5;33mInputLayer\u001b[0m)        │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m)              │               \u001b[38;5;34m0\u001b[0m │ -                          │\n",
215 |               "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
216 |               "│ gemma_backbone                │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m3584\u001b[0m)        │   \u001b[38;5;34m9,241,705,984\u001b[0m │ padding_mask[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m],        │\n",
217 |               "│ (\u001b[38;5;33mGemmaBackbone\u001b[0m)               │                           │                 │ token_ids[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m]            │\n",
218 |               "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
219 |               "│ token_embedding               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256000\u001b[0m)      │     \u001b[38;5;34m917,504,000\u001b[0m │ gemma_backbone[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m]       │\n",
220 |               "│ (\u001b[38;5;33mReversibleEmbedding\u001b[0m)         │                           │                 │                            │\n",
221 |               "└───────────────────────────────┴───────────────────────────┴─────────────────┴────────────────────────────┘\n"
222 |             ]
223 |           },
224 |           "metadata": {},
225 |           "output_type": "display_data"
226 |         },
227 |         {
228 |           "data": {
229 |             "text/html": [
230 |               "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">9,241,705,984</span> (17.21 GB)\n",
231 |               "</pre>\n"
232 |             ],
233 |             "text/plain": [
234 |               "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m9,241,705,984\u001b[0m (17.21 GB)\n"
235 |             ]
236 |           },
237 |           "metadata": {},
238 |           "output_type": "display_data"
239 |         },
240 |         {
241 |           "data": {
242 |             "text/html": [
243 |               "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">9,241,705,984</span> (17.21 GB)\n",
244 |               "</pre>\n"
245 |             ],
246 |             "text/plain": [
247 |               "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m9,241,705,984\u001b[0m (17.21 GB)\n"
248 |             ]
249 |           },
250 |           "metadata": {},
251 |           "output_type": "display_data"
252 |         },
253 |         {
254 |           "data": {
255 |             "text/html": [
256 |               "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
257 |               "</pre>\n"
258 |             ],
259 |             "text/plain": [
260 |               "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
261 |             ]
262 |           },
263 |           "metadata": {},
264 |           "output_type": "display_data"
265 |         }
266 |       ],
267 |       "source": [
268 |         "import keras_nlp\n",
269 |         "import keras\n",
270 |         "\n",
271 |         "# Run at half precision.\n",
272 |         "keras.config.set_floatx(\"bfloat16\")\n",
273 |         "\n",
274 |         "# using 9B base model\n",
275 |         "gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset(\"gemma2_9b_en\")\n",
276 |         "gemma_lm.summary()"
277 |       ]
278 |     },
279 |     {
280 |       "cell_type": "code",
281 |       "execution_count": 9,
282 |       "metadata": {
283 |         "id": "aae5GHrdpj2_"
284 |       },
285 |       "outputs": [
286 |         {
287 |           "name": "stdout",
288 |           "output_type": "stream",
289 |           "text": [
290 |             "It was a dark and stormy night.\n",
291 |             "\n",
292 |             "The wind was howling, the rain was pouring, and the thunder was rumbling.\n",
293 |             "\n",
294 |             "I was sitting in my living room, watching the storm rage outside.\n",
295 |             "\n",
296 |             "Suddenly, I heard a knock at the door.\n",
297 |             "\n",
298 |             "I got up and opened it, and there stood a man in a black cloak.\n",
299 |             "\n",
300 |             "He had a strange look in his eyes, and he was holding a lantern.\n",
301 |             "\n",
302 |             "\"Who are you?\" I asked.\n",
303 |             "\n",
304 |             "\"I am the storm,\" he replied.\n",
305 |             "\n",
306 |             "\"And I have come to take you away.\"\n",
307 |             "\n",
308 |             "I was terrified, but I couldn't move.\n",
309 |             "\n",
310 |             "The man in the black cloak grabbed my arm and pulled me out into the storm.\n",
311 |             "\n",
312 |             "We walked for what seemed like hours, until we came to a clearing in the woods.\n",
313 |             "\n",
314 |             "There, the man in the black cloak stopped and turned to me.\n",
315 |             "\n",
316 |             "\"You are mine now,\" he said.\n",
317 |             "\n",
318 |             "\"And I will take you to my castle.\"\n",
319 |             "\n",
320 |             "I tried to fight him off, but he was too strong.\n",
321 |             "\n",
322 |             "He dragged me into the castle, and I was never seen again.\n",
323 |             "\n",
324 |             "The end.\n"
325 |           ]
326 |         }
327 |       ],
328 |       "source": [
329 |         "result = gemma_lm.generate(\"It was a dark and stormy night.\", max_length=256)\n",
330 |         "print(result)"
331 |       ]
332 |     }
333 |   ],
334 |   "metadata": {
335 |     "accelerator": "GPU",
336 |     "colab": {
337 |       "name": "Keras_Gemma_2_Quickstart.ipynb",
338 |       "toc_visible": true
339 |     },
340 |     "kernelspec": {
341 |       "display_name": "Python 3",
342 |       "name": "python3"
343 |     }
344 |   },
345 |   "nbformat": 4,
346 |   "nbformat_minor": 0
347 | }
348 | 


--------------------------------------------------------------------------------
/Gemma/gemma_inference_on_tpu.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "zRN0MvYFIVmT"
  7 |       },
  8 |       "source": [
  9 |         "##### Copyright 2024 Google LLC."
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "cellView": "form",
 17 |         "id": "kNIU45vmIl80"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 22 |         "# you may not use this file except in compliance with the License.\n",
 23 |         "# You may obtain a copy of the License at\n",
 24 |         "#\n",
 25 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 26 |         "#\n",
 27 |         "# Unless required by applicable law or agreed to in writing, software\n",
 28 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 29 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 30 |         "# See the License for the specific language governing permissions and\n",
 31 |         "# limitations under the License."
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {
 37 |         "id": "9PcbvP7Lz1Pn"
 38 |       },
 39 |       "source": [
 40 |         "# Gemma Inference on TPUs\n",
 41 |         "This notebook demonstrates how to leverage Google Colab's TPUs for inference with [Gemma](https://ai.google.dev/gemma) , an open-weights Large Language Model (LLM), using the [Flax](https://github.com/google/flax).\n",
 42 |         "\n",
 43 |         "<table align=\"left\">\n",
 44 |         "  <td>\n",
 45 |         "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/google-gemini/gemma-cookbook/blob/main/Gemma/gemma_inference_on_tpu.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 46 |         "  </td>\n",
 47 |         "</table>"
 48 |       ]
 49 |     },
 50 |     {
 51 |       "cell_type": "markdown",
 52 |       "metadata": {
 53 |         "id": "09OSQCd5ebzP"
 54 |       },
 55 |       "source": [
 56 |         "### Connect to a TPU\n",
 57 |         "- To connect to a TPU v2, click on the button Connect TPU in the top right-hand corner of the screen."
 58 |       ]
 59 |     },
 60 |     {
 61 |       "cell_type": "markdown",
 62 |       "metadata": {
 63 |         "id": "6wOtGTbWfKX1"
 64 |       },
 65 |       "source": [
 66 |         "Now you can see the TPU devices you have available:\n"
 67 |       ]
 68 |     },
 69 |     {
 70 |       "cell_type": "code",
 71 |       "execution_count": null,
 72 |       "metadata": {
 73 |         "id": "TCXhNGCJexoK"
 74 |       },
 75 |       "outputs": [
 76 |         {
 77 |           "data": {
 78 |             "text/plain": [
 79 |               "[TpuDevice(id=0, process_index=0, coords=(0,0,0), core_on_chip=0),\n",
 80 |               " TpuDevice(id=1, process_index=0, coords=(0,0,0), core_on_chip=1),\n",
 81 |               " TpuDevice(id=2, process_index=0, coords=(1,0,0), core_on_chip=0),\n",
 82 |               " TpuDevice(id=3, process_index=0, coords=(1,0,0), core_on_chip=1),\n",
 83 |               " TpuDevice(id=4, process_index=0, coords=(0,1,0), core_on_chip=0),\n",
 84 |               " TpuDevice(id=5, process_index=0, coords=(0,1,0), core_on_chip=1),\n",
 85 |               " TpuDevice(id=6, process_index=0, coords=(1,1,0), core_on_chip=0),\n",
 86 |               " TpuDevice(id=7, process_index=0, coords=(1,1,0), core_on_chip=1)]"
 87 |             ]
 88 |           },
 89 |           "execution_count": 1,
 90 |           "metadata": {},
 91 |           "output_type": "execute_result"
 92 |         }
 93 |       ],
 94 |       "source": [
 95 |         "import jax\n",
 96 |         "\n",
 97 |         "jax.devices()"
 98 |       ]
 99 |     },
100 |     {
101 |       "cell_type": "markdown",
102 |       "metadata": {
103 |         "id": "LtzOe_3XY9R5"
104 |       },
105 |       "source": [
106 |         "## Installation"
107 |       ]
108 |     },
109 |     {
110 |       "cell_type": "markdown",
111 |       "metadata": {
112 |         "id": "b_42SyQifbJ2"
113 |       },
114 |       "source": [
115 |         "- To install Gemma you need to use Python 3.10 or higher.\n",
116 |         "- Google Colab typically offers Python 3.6 or later versions as the default runtime environment."
117 |       ]
118 |     },
119 |     {
120 |       "cell_type": "code",
121 |       "execution_count": null,
122 |       "metadata": {
123 |         "id": "iq2ebV_6YNiU"
124 |       },
125 |       "outputs": [
126 |         {
127 |           "name": "stdout",
128 |           "output_type": "stream",
129 |           "text": [
130 |             "Collecting git+https://github.com/google-deepmind/gemma.git\n",
131 |             "  Cloning https://github.com/google-deepmind/gemma.git to /tmp/pip-req-build-vdzv6aiz\n",
132 |             "  Running command git clone --filter=blob:none --quiet https://github.com/google-deepmind/gemma.git /tmp/pip-req-build-vdzv6aiz\n",
133 |             "  Resolved https://github.com/google-deepmind/gemma.git to commit a24194737dcb54b7392091e9ba772aea1cb68ffb\n",
134 |             "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
135 |             "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
136 |             "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
137 |             "Requirement already satisfied: absl-py<3.0.0,>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from gemma==1.0.0) (2.1.0)\n",
138 |             "Requirement already satisfied: flax>=0.8 in /usr/local/lib/python3.10/dist-packages (from gemma==1.0.0) (0.8.4)\n",
139 |             "Requirement already satisfied: sentencepiece<0.2.0,>=0.1.99 in /usr/local/lib/python3.10/dist-packages (from gemma==1.0.0) (0.1.99)\n",
140 |             "Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from flax>=0.8->gemma==1.0.0) (1.25.2)\n",
141 |             "Requirement already satisfied: jax>=0.4.19 in /usr/local/lib/python3.10/dist-packages (from flax>=0.8->gemma==1.0.0) (0.4.26)\n",
142 |             "Requirement already satisfied: msgpack in /usr/local/lib/python3.10/dist-packages (from flax>=0.8->gemma==1.0.0) (1.0.8)\n",
143 |             "Requirement already satisfied: optax in /usr/local/lib/python3.10/dist-packages (from flax>=0.8->gemma==1.0.0) (0.1.9)\n",
144 |             "Requirement already satisfied: orbax-checkpoint in /usr/local/lib/python3.10/dist-packages (from flax>=0.8->gemma==1.0.0) (0.4.4)\n",
145 |             "Requirement already satisfied: tensorstore in /usr/local/lib/python3.10/dist-packages (from flax>=0.8->gemma==1.0.0) (0.1.45)\n",
146 |             "Requirement already satisfied: rich>=11.1 in /usr/local/lib/python3.10/dist-packages (from flax>=0.8->gemma==1.0.0) (13.7.1)\n",
147 |             "Requirement already satisfied: typing-extensions>=4.2 in /usr/local/lib/python3.10/dist-packages (from flax>=0.8->gemma==1.0.0) (4.12.2)\n",
148 |             "Requirement already satisfied: PyYAML>=5.4.1 in /usr/local/lib/python3.10/dist-packages (from flax>=0.8->gemma==1.0.0) (6.0.1)\n",
149 |             "Requirement already satisfied: ml-dtypes>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from jax>=0.4.19->flax>=0.8->gemma==1.0.0) (0.2.0)\n",
150 |             "Requirement already satisfied: opt-einsum in /usr/local/lib/python3.10/dist-packages (from jax>=0.4.19->flax>=0.8->gemma==1.0.0) (3.3.0)\n",
151 |             "Requirement already satisfied: scipy>=1.9 in /usr/local/lib/python3.10/dist-packages (from jax>=0.4.19->flax>=0.8->gemma==1.0.0) (1.11.4)\n",
152 |             "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1->flax>=0.8->gemma==1.0.0) (3.0.0)\n",
153 |             "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1->flax>=0.8->gemma==1.0.0) (2.18.0)\n",
154 |             "Requirement already satisfied: chex>=0.1.7 in /usr/local/lib/python3.10/dist-packages (from optax->flax>=0.8->gemma==1.0.0) (0.1.86)\n",
155 |             "Requirement already satisfied: jaxlib>=0.1.37 in /usr/local/lib/python3.10/dist-packages (from optax->flax>=0.8->gemma==1.0.0) (0.4.26)\n",
156 |             "Requirement already satisfied: etils[epath,epy] in /usr/local/lib/python3.10/dist-packages (from orbax-checkpoint->flax>=0.8->gemma==1.0.0) (1.7.0)\n",
157 |             "Requirement already satisfied: nest_asyncio in /usr/local/lib/python3.10/dist-packages (from orbax-checkpoint->flax>=0.8->gemma==1.0.0) (1.6.0)\n",
158 |             "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from orbax-checkpoint->flax>=0.8->gemma==1.0.0) (3.20.3)\n",
159 |             "Requirement already satisfied: toolz>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from chex>=0.1.7->optax->flax>=0.8->gemma==1.0.0) (0.12.1)\n",
160 |             "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=11.1->flax>=0.8->gemma==1.0.0) (0.1.2)\n",
161 |             "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from etils[epath,epy]->orbax-checkpoint->flax>=0.8->gemma==1.0.0) (2024.6.1)\n",
162 |             "Requirement already satisfied: importlib_resources in /usr/local/lib/python3.10/dist-packages (from etils[epath,epy]->orbax-checkpoint->flax>=0.8->gemma==1.0.0) (6.4.0)\n",
163 |             "Requirement already satisfied: zipp in /usr/local/lib/python3.10/dist-packages (from etils[epath,epy]->orbax-checkpoint->flax>=0.8->gemma==1.0.0) (3.19.2)\n",
164 |             "Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (1.6.14)\n",
165 |             "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle) (1.16.0)\n",
166 |             "Requirement already satisfied: certifi>=2023.7.22 in /usr/local/lib/python3.10/dist-packages (from kaggle) (2024.7.4)\n",
167 |             "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.9.0.post0)\n",
168 |             "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.31.0)\n",
169 |             "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kaggle) (4.66.4)\n",
170 |             "Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle) (8.0.4)\n",
171 |             "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.0.7)\n",
172 |             "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle) (6.1.0)\n",
173 |             "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle) (0.5.1)\n",
174 |             "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle) (1.3)\n",
175 |             "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.3.2)\n",
176 |             "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.7)\n"
177 |           ]
178 |         }
179 |       ],
180 |       "source": [
181 |         "! pip install git+https://github.com/google-deepmind/gemma.git\n",
182 |         "! pip install --user kaggle"
183 |       ]
184 |     },
185 |     {
186 |       "cell_type": "markdown",
187 |       "metadata": {
188 |         "id": "QOzN-gxIYSB4"
189 |       },
190 |       "source": [
191 |         "## Downloading the Gemma Checkpoint\n",
192 |         "\n",
193 |         "Before using [Google Gemma](https://ai.google.dev/gemma) for the first time, you must request access to the model through Kaggle by setup instructions at [Gemma setup](https://ai.google.dev/gemma/docs/setup), or completing the following steps:\n",
194 |         "\n",
195 |         "1. Log in to [Kaggle](https://www.kaggle.com) or create a new Kaggle account if you don't already have one.\n",
196 |         "1. Go to the [Gemma model card](https://www.kaggle.com/models/google/paligemma/), and click **Request Access**.\n",
197 |         "1. Complete the consent form and accept the terms and conditions.\n",
198 |         "\n",
199 |         "To generate a Kaggle API key, open your [**Settings** page in Kaggle](https://www.kaggle.com/settings) and click **Create New Token**. This triggers the download of a `kaggle.json` file containing your API credentials.\n",
200 |         "\n",
201 |         "Then, in Colab, select **Secrets** (🔑) in the left pane and add your Kaggle username and Kaggle API key. Store your username under desired name `KAGGLE_USERNAME` and your API key under the name `KAGGLE_KEY`.\n",
202 |         "\n",
203 |         "Set environment variables for Kaggle API credentials."
204 |       ]
205 |     },
206 |     {
207 |       "cell_type": "code",
208 |       "execution_count": null,
209 |       "metadata": {
210 |         "id": "likVQiEEYS5X"
211 |       },
212 |       "outputs": [],
213 |       "source": [
214 |         "import os\n",
215 |         "from google.colab import userdata\n",
216 |         "\n",
217 |         "os.environ[\"KAGGLE_USERNAME\"] = userdata.get('KAGGLE_USERNAME')\n",
218 |         "os.environ[\"KAGGLE_KEY\"] = userdata.get('KAGGLE_KEY')"
219 |       ]
220 |     },
221 |     {
222 |       "cell_type": "code",
223 |       "execution_count": null,
224 |       "metadata": {
225 |         "id": "O-sxcasvESaM"
226 |       },
227 |       "outputs": [],
228 |       "source": [
229 |         "import kagglehub\n",
230 |         "\n",
231 |         "VARIANT = '2b-it' # @param ['2b', '2b-it', '7b', '7b-it'] {type:\"string\"}\n",
232 |         "weights_dir = kagglehub.model_download(f'google/gemma/Flax/{VARIANT}')\n",
233 |         "\n",
234 |         "ckpt_path = os.path.join(weights_dir, VARIANT)\n",
235 |         "vocab_path = os.path.join(weights_dir, 'tokenizer.model')"
236 |       ]
237 |     },
238 |     {
239 |       "cell_type": "code",
240 |       "execution_count": null,
241 |       "metadata": {
242 |         "id": "-jpTUa1YESaM"
243 |       },
244 |       "outputs": [],
245 |       "source": [
246 |         "from gemma import params as params_lib\n",
247 |         "from gemma import sampler as sampler_lib\n",
248 |         "from gemma import transformer as transformer_lib\n",
249 |         "import sentencepiece as spm"
250 |       ]
251 |     },
252 |     {
253 |       "cell_type": "markdown",
254 |       "metadata": {
255 |         "id": "4fDQsC87ESaN"
256 |       },
257 |       "source": [
258 |         "## Start Generating with Your Model\n",
259 |         "\n",
260 |         "Load and prepare your LLM's checkpoint for use with Flax."
261 |       ]
262 |     },
263 |     {
264 |       "cell_type": "code",
265 |       "execution_count": null,
266 |       "metadata": {
267 |         "cellView": "form",
268 |         "id": "57nMYQ4HESaN"
269 |       },
270 |       "outputs": [],
271 |       "source": [
272 |         "# Load parameters\n",
273 |         "params = params_lib.load_and_format_params(ckpt_path)"
274 |       ]
275 |     },
276 |     {
277 |       "cell_type": "markdown",
278 |       "metadata": {
279 |         "id": "NWJ3UvHXESaN"
280 |       },
281 |       "source": [
282 |         "Load your tokenizer, which you'll construct using the [SentencePiece](https://github.com/google/sentencepiece) library."
283 |       ]
284 |     },
285 |     {
286 |       "cell_type": "code",
287 |       "execution_count": null,
288 |       "metadata": {
289 |         "cellView": "form",
290 |         "id": "khXrjEF0ESaN"
291 |       },
292 |       "outputs": [
293 |         {
294 |           "data": {
295 |             "text/plain": [
296 |               "True"
297 |             ]
298 |           },
299 |           "execution_count": 7,
300 |           "metadata": {},
301 |           "output_type": "execute_result"
302 |         }
303 |       ],
304 |       "source": [
305 |         "vocab = spm.SentencePieceProcessor()\n",
306 |         "vocab.Load(vocab_path)"
307 |       ]
308 |     },
309 |     {
310 |       "cell_type": "markdown",
311 |       "metadata": {
312 |         "id": "tCRtZMg0ESaN"
313 |       },
314 |       "source": [
315 |         "Use the `transformer_lib.TransformerConfig.from_params` function to automatically load the correct configuration from a checkpoint. Note that the vocabulary size is smaller than the number of input embeddings due to unused tokens in this release."
316 |       ]
317 |     },
318 |     {
319 |       "cell_type": "code",
320 |       "execution_count": null,
321 |       "metadata": {
322 |         "cellView": "form",
323 |         "id": "7InOzQtcESaN"
324 |       },
325 |       "outputs": [],
326 |       "source": [
327 |         "transformer_config=transformer_lib.TransformerConfig.from_params(\n",
328 |         "    params,\n",
329 |         "    cache_size=1024  # Number of time steps in the transformer's cache\n",
330 |         ")\n",
331 |         "transformer = transformer_lib.Transformer(transformer_config)"
332 |       ]
333 |     },
334 |     {
335 |       "cell_type": "markdown",
336 |       "metadata": {
337 |         "id": "KaU-X3_jESaN"
338 |       },
339 |       "source": [
340 |         "Finally, build a sampler on top of your model and your tokenizer."
341 |       ]
342 |     },
343 |     {
344 |       "cell_type": "code",
345 |       "execution_count": null,
346 |       "metadata": {
347 |         "cellView": "form",
348 |         "id": "bdstASGrESaN"
349 |       },
350 |       "outputs": [],
351 |       "source": [
352 |         "# Create a sampler with the right param shapes.\n",
353 |         "sampler = sampler_lib.Sampler(\n",
354 |         "    transformer=transformer,\n",
355 |         "    vocab=vocab,\n",
356 |         "    params=params['transformer'],\n",
357 |         ")"
358 |       ]
359 |     },
360 |     {
361 |       "cell_type": "markdown",
362 |       "metadata": {
363 |         "id": "C1fLns-_ESaN"
364 |       },
365 |       "source": [
366 |         "You're ready to start sampling ! This sampler uses just-in-time compilation, so changing the input shape triggers recompilation, which can slow things down. For the fastest and most efficient results, keep your batch size consistent."
367 |       ]
368 |     },
369 |     {
370 |       "cell_type": "code",
371 |       "execution_count": null,
372 |       "metadata": {
373 |         "cellView": "form",
374 |         "id": "qA0BhNQvESaN"
375 |       },
376 |       "outputs": [
377 |         {
378 |           "name": "stdout",
379 |           "output_type": "stream",
380 |           "text": [
381 |             "Prompt:\n",
382 |             "\n",
383 |             " Explain the phenomenon of a solar eclipse.\n",
384 |             " Answer:\n",
385 |             "\n",
386 |             "\n",
387 |             "A solar eclipse occurs when the Moon passes between the Sun and Earth, casting a shadow on Earth. This phenomenon is caused by the relative positions of the Moon, Sun, and Earth.\n",
388 |             "\n",
389 |             "**Here's a step-by-step explanation of how a solar eclipse occurs:**\n",
390 |             "\n",
391 |             "1. **New Moon:** The Moon is positioned between the Sun and Earth, and the Sun's rays are not directly visible from Earth.\n",
392 |             "2. **Waxing Crescent Phase:** As the Moon orbits the Sun, it gradually moves from the new moon phase to the waxing crescent phase. This means that the illuminated portion of the Moon is gradually increasing.\n",
393 |             "3. **First Quarter Phase:** When the Moon is at the first quarter phase, half of its face is illuminated.\n",
394 |             "4. **Waxing Gibbous Phase:** As the Moon continues to orbit the Sun, it moves further away from the Sun, and the illuminated portion of the Moon gradually increases to the waxing gibbous phase. This means that more and more of the Moon is illuminated.\n",
395 |             "5. **Full Moon:** When the Moon is at the full moon phase, the entire face of the Moon is illuminated.\n",
396 |             "6. **Waning Gibbous Phase:** As the Moon moves away from the Sun, it gradually moves back into the waning gibbous phase. This means that the illuminated portion of the Moon is gradually decreasing.\n",
397 |             "7. **Third Quarter Phase:** When the Moon is at the third quarter phase, half\n",
398 |             "\n"
399 |           ]
400 |         }
401 |       ],
402 |       "source": [
403 |         "input_batch = [\n",
404 |         "    \"\\n Explain the phenomenon of a solar eclipse.\",\n",
405 |         "  ]\n",
406 |         "\n",
407 |         "out_data = sampler(\n",
408 |         "    input_strings=input_batch,\n",
409 |         "    total_generation_steps=300,\n",
410 |         "  )\n",
411 |         "\n",
412 |         "for input_string, out_string in zip(input_batch, out_data.text):\n",
413 |         "  print(f\"Prompt:\\n{input_string}\\n Answer:\\n{out_string}\")\n",
414 |         "  print()"
415 |       ]
416 |     }
417 |   ],
418 |   "metadata": {
419 |     "accelerator": "TPU",
420 |     "colab": {
421 |       "name": "gemma_inference_on_tpu.ipynb",
422 |       "toc_visible": true
423 |     },
424 |     "kernelspec": {
425 |       "display_name": "Python 3",
426 |       "name": "python3"
427 |     }
428 |   },
429 |   "nbformat": 4,
430 |   "nbformat_minor": 0
431 | }
432 | 


--------------------------------------------------------------------------------
/PaliGemma/Zero_shot_object_detection_in_videos_using_PaliGemma.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "Tce3stUlHN0L"
  7 |       },
  8 |       "source": [
  9 |         "##### Copyright 2024 Google LLC."
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "cellView": "form",
 17 |         "id": "tuOe1ymfHZPu"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 22 |         "# you may not use this file except in compliance with the License.\n",
 23 |         "# You may obtain a copy of the License at\n",
 24 |         "#\n",
 25 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 26 |         "#\n",
 27 |         "# Unless required by applicable law or agreed to in writing, software\n",
 28 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 29 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 30 |         "# See the License for the specific language governing permissions and\n",
 31 |         "# limitations under the License."
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {
 37 |         "id": "WYDXtkholMVS"
 38 |       },
 39 |       "source": [
 40 |         "#### This notebook is created by [Nitin Tiwari](https://linkedin.com/in/tiwari-nitin).\n",
 41 |         "\n",
 42 |         "#### **Social links:**\n",
 43 |         "* [LinkedIn](https://linkedin.com/in/tiwari-nitin)\n",
 44 |         "* [GitHub](https://github.com/NSTiwari)\n",
 45 |         "* [Twitter](https://x.com/NSTiwari21)"
 46 |       ]
 47 |     },
 48 |     {
 49 |       "cell_type": "markdown",
 50 |       "metadata": {
 51 |         "id": "2--uLhHDlPPJ"
 52 |       },
 53 |       "source": [
 54 |         "# Zero-shot Object Detection in videos"
 55 |       ]
 56 |     },
 57 |     {
 58 |       "cell_type": "markdown",
 59 |       "metadata": {
 60 |         "id": "VPLU1zrDlSDJ"
 61 |       },
 62 |       "source": [
 63 |         "This notebook guides you to perform zero-shot object detection on videos using [PaliGemma](https://ai.google.dev/gemma/docs/paligemma) and draw the inferences using OpenCV and PIL.\n",
 64 |         "\n",
 65 |         "<table align=\"left\">\n",
 66 |         "  <td>\n",
 67 |         "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/google-gemini/gemma-cookbook/blob/main/PaliGemma/Zero_shot_object_detection_in_videos_using_PaliGemma.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 68 |         "  </td>\n",
 69 |         "</table>"
 70 |       ]
 71 |     },
 72 |     {
 73 |       "cell_type": "markdown",
 74 |       "metadata": {
 75 |         "id": "cq2882eIlczp"
 76 |       },
 77 |       "source": [
 78 |         "### Get access to PaliGemma\n",
 79 |         "\n",
 80 |         "Before using PaliGemma for the first time, you must request access to the model through Hugging Face by completing the following steps:\n",
 81 |         "\n",
 82 |         "1. Log in to [Hugging Face](https://huggingface.co), or create a new Hugging Face account if you don't already have one.\n",
 83 |         "2. Go to the [PaliGemma model card](https://huggingface.co/google/paligemma-3b-mix-224) to get access to the model.\n",
 84 |         "3. Complete the consent form and accept the terms and conditions.\n",
 85 |         "\n",
 86 |         "To generate a Hugging Face token, open your [**Settings** page in Hugging Face](https://huggingface.co/settings), choose **Access Tokens** option in the left pane and click **New token**. In the next window that appears, give a name to your token and choose the type as **Write** to get the write access.\n",
 87 |         "\n",
 88 |         "Then, in Colab, select **Secrets** (🔑) in the left pane and add your Hugging Face token. Store your Hugging Face token under the name `HF_TOKEN`."
 89 |       ]
 90 |     },
 91 |     {
 92 |       "cell_type": "markdown",
 93 |       "metadata": {
 94 |         "id": "qV1XyFxHlfGB"
 95 |       },
 96 |       "source": [
 97 |         "### Select the runtime\n",
 98 |         "\n",
 99 |         "To complete this tutorial, you'll need to have a Colab runtime with sufficient resources to run the PaliGemma model. In this case, you can use a T4 GPU:\n",
100 |         "\n",
101 |         "1. In the upper-right of the Colab window, click the **▾ (Additional connection options)** dropdown menu.\n",
102 |         "1. Select **Change runtime type**.\n",
103 |         "1. Under **Hardware accelerator**, select **T4 GPU**."
104 |       ]
105 |     },
106 |     {
107 |       "cell_type": "markdown",
108 |       "metadata": {
109 |         "id": "4Y6WdnIIEpOh"
110 |       },
111 |       "source": [
112 |         "### Step 1: Install libraries"
113 |       ]
114 |     },
115 |     {
116 |       "cell_type": "code",
117 |       "execution_count": 1,
118 |       "metadata": {
119 |         "id": "l5so74dCEO5B"
120 |       },
121 |       "outputs": [
122 |         {
123 |           "name": "stdout",
124 |           "output_type": "stream",
125 |           "text": [
126 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m119.8/119.8 MB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
127 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.4/309.4 kB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
128 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.6/251.6 kB\u001b[0m \u001b[31m23.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
129 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m66.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
130 |             "\u001b[?25h"
131 |           ]
132 |         }
133 |       ],
134 |       "source": [
135 |         "!pip install bitsandbytes transformers accelerate peft -q"
136 |       ]
137 |     },
138 |     {
139 |       "cell_type": "markdown",
140 |       "metadata": {
141 |         "id": "y4zYJcmnlv3Z"
142 |       },
143 |       "source": [
144 |         "### Step 2: Set environment variables for Hugging Face token"
145 |       ]
146 |     },
147 |     {
148 |       "cell_type": "code",
149 |       "execution_count": 4,
150 |       "metadata": {
151 |         "id": "ggzRPV54lxnY"
152 |       },
153 |       "outputs": [],
154 |       "source": [
155 |         "import os\n",
156 |         "from google.colab import userdata\n",
157 |         "\n",
158 |         "os.environ[\"HF_TOKEN\"] = userdata.get('HF_TOKEN')"
159 |       ]
160 |     },
161 |     {
162 |       "cell_type": "markdown",
163 |       "metadata": {
164 |         "id": "IN0xf7VyEy-I"
165 |       },
166 |       "source": [
167 |         "### Step 3: Load pre-trained PaliGemma base model"
168 |       ]
169 |     },
170 |     {
171 |       "cell_type": "code",
172 |       "execution_count": 5,
173 |       "metadata": {
174 |         "id": "62GzNxTdE1hL"
175 |       },
176 |       "outputs": [
177 |         {
178 |           "data": {
179 |             "application/vnd.jupyter.widget-view+json": {
180 |               "model_id": "9915dd0a1d88428f84e7783a4127fc31",
181 |               "version_major": 2,
182 |               "version_minor": 0
183 |             },
184 |             "text/plain": [
185 |               "config.json:   0%|          | 0.00/1.03k [00:00<?, ?B/s]"
186 |             ]
187 |           },
188 |           "metadata": {},
189 |           "output_type": "display_data"
190 |         },
191 |         {
192 |           "data": {
193 |             "application/vnd.jupyter.widget-view+json": {
194 |               "model_id": "6e9ad8d9c9bd434c8211cb9efbd8c7c8",
195 |               "version_major": 2,
196 |               "version_minor": 0
197 |             },
198 |             "text/plain": [
199 |               "model.safetensors.index.json:   0%|          | 0.00/62.6k [00:00<?, ?B/s]"
200 |             ]
201 |           },
202 |           "metadata": {},
203 |           "output_type": "display_data"
204 |         },
205 |         {
206 |           "data": {
207 |             "application/vnd.jupyter.widget-view+json": {
208 |               "model_id": "51cc44c7d0cb4640808c10da3e356673",
209 |               "version_major": 2,
210 |               "version_minor": 0
211 |             },
212 |             "text/plain": [
213 |               "Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]"
214 |             ]
215 |           },
216 |           "metadata": {},
217 |           "output_type": "display_data"
218 |         },
219 |         {
220 |           "data": {
221 |             "application/vnd.jupyter.widget-view+json": {
222 |               "model_id": "36105f73ab144535bdb23456a10dbf9e",
223 |               "version_major": 2,
224 |               "version_minor": 0
225 |             },
226 |             "text/plain": [
227 |               "model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]"
228 |             ]
229 |           },
230 |           "metadata": {},
231 |           "output_type": "display_data"
232 |         },
233 |         {
234 |           "data": {
235 |             "application/vnd.jupyter.widget-view+json": {
236 |               "model_id": "73e9a813057349939e31717e87e66015",
237 |               "version_major": 2,
238 |               "version_minor": 0
239 |             },
240 |             "text/plain": [
241 |               "model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
242 |             ]
243 |           },
244 |           "metadata": {},
245 |           "output_type": "display_data"
246 |         },
247 |         {
248 |           "data": {
249 |             "application/vnd.jupyter.widget-view+json": {
250 |               "model_id": "fa0e68811636489a82cca3d1fbd4010c",
251 |               "version_major": 2,
252 |               "version_minor": 0
253 |             },
254 |             "text/plain": [
255 |               "model-00003-of-00003.safetensors:   0%|          | 0.00/1.74G [00:00<?, ?B/s]"
256 |             ]
257 |           },
258 |           "metadata": {},
259 |           "output_type": "display_data"
260 |         },
261 |         {
262 |           "name": "stderr",
263 |           "output_type": "stream",
264 |           "text": [
265 |             "`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n",
266 |             "Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n",
267 |             "`config.hidden_activation` if you want to override this behaviour.\n",
268 |             "See https://github.com/huggingface/transformers/pull/29402 for more details.\n"
269 |           ]
270 |         },
271 |         {
272 |           "data": {
273 |             "application/vnd.jupyter.widget-view+json": {
274 |               "model_id": "d25921ffadb341708a6a191ca891951f",
275 |               "version_major": 2,
276 |               "version_minor": 0
277 |             },
278 |             "text/plain": [
279 |               "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
280 |             ]
281 |           },
282 |           "metadata": {},
283 |           "output_type": "display_data"
284 |         },
285 |         {
286 |           "data": {
287 |             "application/vnd.jupyter.widget-view+json": {
288 |               "model_id": "eb4e552bd2884759a69836c37af2a015",
289 |               "version_major": 2,
290 |               "version_minor": 0
291 |             },
292 |             "text/plain": [
293 |               "generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]"
294 |             ]
295 |           },
296 |           "metadata": {},
297 |           "output_type": "display_data"
298 |         },
299 |         {
300 |           "data": {
301 |             "application/vnd.jupyter.widget-view+json": {
302 |               "model_id": "72c64d188e99404b953ef33339c6bbf7",
303 |               "version_major": 2,
304 |               "version_minor": 0
305 |             },
306 |             "text/plain": [
307 |               "preprocessor_config.json:   0%|          | 0.00/699 [00:00<?, ?B/s]"
308 |             ]
309 |           },
310 |           "metadata": {},
311 |           "output_type": "display_data"
312 |         },
313 |         {
314 |           "data": {
315 |             "application/vnd.jupyter.widget-view+json": {
316 |               "model_id": "1786e7824bb8465c83ec08304bd0534f",
317 |               "version_major": 2,
318 |               "version_minor": 0
319 |             },
320 |             "text/plain": [
321 |               "tokenizer_config.json:   0%|          | 0.00/40.0k [00:00<?, ?B/s]"
322 |             ]
323 |           },
324 |           "metadata": {},
325 |           "output_type": "display_data"
326 |         },
327 |         {
328 |           "data": {
329 |             "application/vnd.jupyter.widget-view+json": {
330 |               "model_id": "f56d4ee1d4a1400ea495cf8a8ea412ef",
331 |               "version_major": 2,
332 |               "version_minor": 0
333 |             },
334 |             "text/plain": [
335 |               "tokenizer.model:   0%|          | 0.00/4.26M [00:00<?, ?B/s]"
336 |             ]
337 |           },
338 |           "metadata": {},
339 |           "output_type": "display_data"
340 |         },
341 |         {
342 |           "data": {
343 |             "application/vnd.jupyter.widget-view+json": {
344 |               "model_id": "c90a8f2524ff47e69c6a53371eee2544",
345 |               "version_major": 2,
346 |               "version_minor": 0
347 |             },
348 |             "text/plain": [
349 |               "tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]"
350 |             ]
351 |           },
352 |           "metadata": {},
353 |           "output_type": "display_data"
354 |         },
355 |         {
356 |           "data": {
357 |             "application/vnd.jupyter.widget-view+json": {
358 |               "model_id": "a4893aef62004152bea2e53d59e951d6",
359 |               "version_major": 2,
360 |               "version_minor": 0
361 |             },
362 |             "text/plain": [
363 |               "added_tokens.json:   0%|          | 0.00/24.0 [00:00<?, ?B/s]"
364 |             ]
365 |           },
366 |           "metadata": {},
367 |           "output_type": "display_data"
368 |         },
369 |         {
370 |           "data": {
371 |             "application/vnd.jupyter.widget-view+json": {
372 |               "model_id": "27f0bca5215e48bfba527d3ab3236521",
373 |               "version_major": 2,
374 |               "version_minor": 0
375 |             },
376 |             "text/plain": [
377 |               "special_tokens_map.json:   0%|          | 0.00/607 [00:00<?, ?B/s]"
378 |             ]
379 |           },
380 |           "metadata": {},
381 |           "output_type": "display_data"
382 |         }
383 |       ],
384 |       "source": [
385 |         "from transformers import AutoTokenizer, PaliGemmaForConditionalGeneration, PaliGemmaProcessor\n",
386 |         "import torch\n",
387 |         "\n",
388 |         "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
389 |         "model_id = \"google/paligemma-3b-mix-224\"\n",
390 |         "model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16)\n",
391 |         "processor = PaliGemmaProcessor.from_pretrained(model_id)"
392 |       ]
393 |     },
394 |     {
395 |       "cell_type": "markdown",
396 |       "metadata": {
397 |         "id": "Rm8_--4kE9Px"
398 |       },
399 |       "source": [
400 |         "### Step 4: Function to draw inference on videos"
401 |       ]
402 |     },
403 |     {
404 |       "cell_type": "code",
405 |       "execution_count": 6,
406 |       "metadata": {
407 |         "id": "sFFBnK06FvSK"
408 |       },
409 |       "outputs": [],
410 |       "source": [
411 |         "from PIL import Image, ImageDraw, ImageFont\n",
412 |         "import cv2\n",
413 |         "import numpy as np\n",
414 |         "\n",
415 |         "def draw_bounding_box(image, coordinates, label, width, height):\n",
416 |         "    global label_colors\n",
417 |         "    y1, x1, y2, x2 = coordinates\n",
418 |         "    y1, x1, y2, x2 = map(round, (y1*height, x1*width, y2*height, x2*width))\n",
419 |         "\n",
420 |         "    text_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 1, 3)\n",
421 |         "    text_width, text_height = text_size\n",
422 |         "\n",
423 |         "    text_x = x1 + 2\n",
424 |         "    text_y = y1 - 5\n",
425 |         "\n",
426 |         "    font_scale = 1\n",
427 |         "    label_rect_width = text_width + 8\n",
428 |         "    label_rect_height = int(text_height * font_scale)\n",
429 |         "\n",
430 |         "    color = label_colors.get(label, None)\n",
431 |         "    if color is None:\n",
432 |         "        color = np.random.randint(0, 256, (3,)).tolist()\n",
433 |         "        label_colors[label] = color\n",
434 |         "\n",
435 |         "    cv2.rectangle(image, (x1, y1 - label_rect_height), (x1 + label_rect_width, y1), color, -1)\n",
436 |         "\n",
437 |         "    thickness = 2\n",
438 |         "    cv2.putText(image, label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)\n",
439 |         "\n",
440 |         "    cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)\n",
441 |         "    return image"
442 |       ]
443 |     },
444 |     {
445 |       "cell_type": "markdown",
446 |       "metadata": {
447 |         "id": "ozB-dLGlmCoK"
448 |       },
449 |       "source": [
450 |         "### Step 5: Configure the input video and text prompt"
451 |       ]
452 |     },
453 |     {
454 |       "cell_type": "code",
455 |       "execution_count": 7,
456 |       "metadata": {
457 |         "id": "LZQ1pGAWmNEJ"
458 |       },
459 |       "outputs": [],
460 |       "source": [
461 |         "input_video = 'input_video.mp4' # @param {type:\"string\"}\n",
462 |         "\n",
463 |         "prompt = \"detect person, phone, bottle\" # @param {type: \"string\"}\n",
464 |         "\n",
465 |         "output_file = 'output_video.avi' # @param {type: \"string\"}"
466 |       ]
467 |     },
468 |     {
469 |       "cell_type": "markdown",
470 |       "metadata": {
471 |         "id": "bQpFZOBTmgNi"
472 |       },
473 |       "source": [
474 |         "### Step 6: Pass the input video and text prompt to PaliGemma and draw inferences"
475 |       ]
476 |     },
477 |     {
478 |       "cell_type": "code",
479 |       "execution_count": null,
480 |       "metadata": {
481 |         "id": "7Z6SHLs2E_dD"
482 |       },
483 |       "outputs": [
484 |         {
485 |           "name": "stdout",
486 |           "output_type": "stream",
487 |           "text": [
488 |             "Output video output_video.avi saved to disk.\n"
489 |           ]
490 |         }
491 |       ],
492 |       "source": [
493 |         "# Open the input video file.\n",
494 |         "cap = cv2.VideoCapture(input_video)\n",
495 |         "\n",
496 |         "fourcc = cv2.VideoWriter_fourcc(*'XVID')\n",
497 |         "out = cv2.VideoWriter(output_file, fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))\n",
498 |         "\n",
499 |         "label_colors = {}\n",
500 |         "\n",
501 |         "while(True):\n",
502 |         "    ret, frame = cap.read()\n",
503 |         "    if not ret:\n",
504 |         "        break\n",
505 |         "\n",
506 |         "    # Convert the frame to a PIL Image.\n",
507 |         "    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))\n",
508 |         "\n",
509 |         "    # Send text prompt and image as input.\n",
510 |         "    inputs = processor(text=prompt, images=img,\n",
511 |         "                      padding=\"longest\", do_convert_rgb=True, return_tensors=\"pt\").to(\"cuda\")\n",
512 |         "    model.to(device)\n",
513 |         "    inputs = inputs.to(dtype=model.dtype)\n",
514 |         "\n",
515 |         "    # Get output.\n",
516 |         "    with torch.no_grad():\n",
517 |         "        output = model.generate(**inputs, max_length=496)\n",
518 |         "\n",
519 |         "    paligemma_response = processor.decode(output[0], skip_special_tokens=True)[len(prompt):].lstrip(\"\\n\")\n",
520 |         "    detections = paligemma_response.split(\" ; \")\n",
521 |         "\n",
522 |         "    # Parse the output bounding box coordinates\n",
523 |         "    parsed_coordinates = []\n",
524 |         "    labels = []\n",
525 |         "\n",
526 |         "    for item in detections:\n",
527 |         "        detection = item.replace(\"<loc\", \"\").split()\n",
528 |         "\n",
529 |         "        if len(detection) >= 2:\n",
530 |         "          coordinates_str = detection[0].replace(\",\", \"\")\n",
531 |         "          label = detection[1]\n",
532 |         "          if \"<seg\" in label:\n",
533 |         "            continue\n",
534 |         "          else:\n",
535 |         "            labels.append(label)\n",
536 |         "        else:\n",
537 |         "          # No label detected, skip the iteration.\n",
538 |         "          continue\n",
539 |         "\n",
540 |         "        coordinates = coordinates_str.split(\">\")\n",
541 |         "        coordinates = coordinates[:4]\n",
542 |         "\n",
543 |         "        if coordinates[-1] == '':\n",
544 |         "            coordinates = coordinates[:-1]\n",
545 |         "\n",
546 |         "\n",
547 |         "        coordinates = [int(coord)/1024 for coord in coordinates]\n",
548 |         "        parsed_coordinates.append(coordinates)\n",
549 |         "\n",
550 |         "    width = img.size[0]\n",
551 |         "    height = img.size[1]\n",
552 |         "\n",
553 |         "    # Draw bounding boxes on the frame\n",
554 |         "    image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)\n",
555 |         "    for coordinates, label in zip(parsed_coordinates, labels):\n",
556 |         "      output_frame = draw_bounding_box(frame, coordinates, label, width, height)\n",
557 |         "\n",
558 |         "    # Write the frame to the output video\n",
559 |         "    out.write(output_frame)\n",
560 |         "\n",
561 |         "    # Exit on pressing 'q'\n",
562 |         "    if cv2.waitKey(1) & 0xFF == ord('q'):\n",
563 |         "        break\n",
564 |         "\n",
565 |         "# Release the video capture, output video writer, and destroy the window\n",
566 |         "cap.release()\n",
567 |         "out.release()\n",
568 |         "cv2.destroyAllWindows()\n",
569 |         "print(\"Output video \" + output_file + \" saved to disk.\")"
570 |       ]
571 |     }
572 |   ],
573 |   "metadata": {
574 |     "accelerator": "GPU",
575 |     "colab": {
576 |       "name": "Zero_shot_object_detection_in_videos_using_PaliGemma.ipynb",
577 |       "toc_visible": true
578 |     },
579 |     "kernelspec": {
580 |       "display_name": "Python 3",
581 |       "name": "python3"
582 |     }
583 |   },
584 |   "nbformat": 4,
585 |   "nbformat_minor": 0
586 | }
587 | 


--------------------------------------------------------------------------------
/Gemma/Prompt_chaining.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "Tce3stUlHN0L"
  7 |       },
  8 |       "source": [
  9 |         "##### Copyright 2024 Google LLC."
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "cellView": "form",
 17 |         "id": "tuOe1ymfHZPu"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 22 |         "# you may not use this file except in compliance with the License.\n",
 23 |         "# You may obtain a copy of the License at\n",
 24 |         "#\n",
 25 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 26 |         "#\n",
 27 |         "# Unless required by applicable law or agreed to in writing, software\n",
 28 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 29 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 30 |         "# See the License for the specific language governing permissions and\n",
 31 |         "# limitations under the License."
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {
 37 |         "id": "dfsDR_omdNea"
 38 |       },
 39 |       "source": [
 40 |         "# Gemma - Prompt Chaining and Iterative Generation\n",
 41 |         "This notebook demonstrates how to use prompt chaining and iterative generation with Gemma through a story writing example.\n",
 42 |         "<table align=\"left\">\n",
 43 |         "  <td>\n",
 44 |         "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/google-gemini/gemma-cookbook/blob/main/Gemma/Prompt_chaining.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 45 |         "  </td>\n",
 46 |         "</table>"
 47 |       ]
 48 |     },
 49 |     {
 50 |       "cell_type": "markdown",
 51 |       "metadata": {
 52 |         "id": "FaqZItBdeokU"
 53 |       },
 54 |       "source": [
 55 |         "## Setup\n",
 56 |         "\n",
 57 |         "### Select the Colab runtime\n",
 58 |         "To complete this tutorial, you'll need to have a Colab runtime with sufficient resources to run the Gemma model. In this case, you can use a T4 GPU:\n",
 59 |         "\n",
 60 |         "1. In the upper-right of the Colab window, select **▾ (Additional connection options)**.\n",
 61 |         "2. Select **Change runtime type**.\n",
 62 |         "3. Under **Hardware accelerator**, select **T4 GPU**.\n",
 63 |         "\n",
 64 |         "### Gemma setup\n",
 65 |         "\n",
 66 |         "To complete this tutorial, you'll first need to complete the setup instructions at [Gemma setup](https://ai.google.dev/gemma/docs/setup). The Gemma setup instructions show you how to do the following:\n",
 67 |         "\n",
 68 |         "* Get access to Gemma on kaggle.com.\n",
 69 |         "* Select a Colab runtime with sufficient resources to run\n",
 70 |         "  the Gemma 2B model.\n",
 71 |         "* Generate and configure a Kaggle username and an API key as Colab secrets.\n",
 72 |         "\n",
 73 |         "After you've completed the Gemma setup, move on to the next section, where you'll set environment variables for your Colab environment.\n"
 74 |       ]
 75 |     },
 76 |     {
 77 |       "cell_type": "markdown",
 78 |       "metadata": {
 79 |         "id": "CY2kGtsyYpHF"
 80 |       },
 81 |       "source": [
 82 |         "### Configure your credentials\n",
 83 |         "\n",
 84 |         "Add your your Kaggle credentials to the Colab Secrets manager to securely store it.\n",
 85 |         "\n",
 86 |         "1. Open your Google Colab notebook and click on the 🔑 Secrets tab in the left panel. <img src=\"https://storage.googleapis.com/generativeai-downloads/images/secrets.jpg\" alt=\"The Secrets tab is found on the left panel.\" width=50%>\n",
 87 |         "2. Create new secrets: `KAGGLE_USERNAME` and `KAGGLE_KEY`\n",
 88 |         "3. Copy/paste your username into `KAGGLE_USERNAME`\n",
 89 |         "3. Copy/paste your key into `KAGGLE_KEY`\n",
 90 |         "4. Toggle the buttons on the left to allow notebook access to the secrets.\n"
 91 |       ]
 92 |     },
 93 |     {
 94 |       "cell_type": "code",
 95 |       "execution_count": null,
 96 |       "metadata": {
 97 |         "id": "A9sUQ4WrP-Yr"
 98 |       },
 99 |       "outputs": [],
100 |       "source": [
101 |         "import os\n",
102 |         "from google.colab import userdata\n",
103 |         "\n",
104 |         "# Set the backbend before importing Keras\n",
105 |         "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n",
106 |         "# Avoid memory fragmentation on JAX backend.\n",
107 |         "os.environ[\"XLA_PYTHON_CLIENT_MEM_FRACTION\"] = \"1.00\"\n",
108 |         "\n",
109 |         "# Note: `userdata.get` is a Colab API. If you're not using Colab, set the env\n",
110 |         "# vars as appropriate for your system.\n",
111 |         "os.environ[\"KAGGLE_USERNAME\"] = userdata.get(\"KAGGLE_USERNAME\")\n",
112 |         "os.environ[\"KAGGLE_KEY\"] = userdata.get(\"KAGGLE_KEY\")"
113 |       ]
114 |     },
115 |     {
116 |       "cell_type": "markdown",
117 |       "metadata": {
118 |         "id": "iwjo5_Uucxkw"
119 |       },
120 |       "source": [
121 |         "### Install dependencies\n",
122 |         "Run the cell below to install all the required dependencies."
123 |       ]
124 |     },
125 |     {
126 |       "cell_type": "code",
127 |       "execution_count": null,
128 |       "metadata": {
129 |         "id": "r_nXPEsF7UWQ"
130 |       },
131 |       "outputs": [],
132 |       "source": [
133 |         "!pip install -q -U tensorflow keras keras-nlp"
134 |       ]
135 |     },
136 |     {
137 |       "cell_type": "markdown",
138 |       "metadata": {
139 |         "id": "pOAEiJmnBE0D"
140 |       },
141 |       "source": [
142 |         "## Prompt chaining\n",
143 |         "\n",
144 |         "Prompt chaining is a powerful technique for managing complex tasks that are difficult to accomplish in a single step.\n",
145 |         "\n",
146 |         "It entails breaking a large task into smaller, linked prompts, where each prompt's output feeds into the next. This step-by-step method steers the language model through the process. Key advantages include:\n",
147 |         "\n",
148 |         "\n",
149 |         "\n",
150 |         "*   Enhanced accuracy: Focused, smaller prompts produce better results from the language model.\n",
151 |         "*   Easier debugging: Pinpointing and fixing issues within the chain is straightforward, allowing for precise improvements.\n",
152 |         "* Handling complexity: Dividing intricate problems into manageable steps enables the language model to address more complex tasks.\n"
153 |       ]
154 |     },
155 |     {
156 |       "cell_type": "markdown",
157 |       "metadata": {
158 |         "id": "MTDrbUbiyhHL"
159 |       },
160 |       "source": [
161 |         "## Iterative Generation\n",
162 |         "Iterative generation is the process of creating the desired output step by step. This method is particularly useful for writing stories that exceed the length limitations of a single generation window. The advantages of iterative generation include:\n",
163 |         "\n",
164 |         "* Extended outputs: It enables the production of longer and more detailed content, going beyond the constraints of a single generation window.\n",
165 |         "* Enhanced flexibility: Adjustments and refinements can be made at each\n",
166 |         "iteration, ensuring the story progresses as intended.\n",
167 |         "* Human oversight: Feedback and guidance can be provided at each step, ensuring the story stays true to the creator's vision.\n",
168 |         "\n",
169 |         "\n",
170 |         "By using **prompt chaining** and **iterative generation** together, you can create an interesting and well-structured story, adding to it piece by piece, while still having control over how it unfolds."
171 |       ]
172 |     },
173 |     {
174 |       "cell_type": "markdown",
175 |       "metadata": {
176 |         "id": "J3sX2mFH4GWk"
177 |       },
178 |       "source": [
179 |         "### Gemma"
180 |       ]
181 |     },
182 |     {
183 |       "cell_type": "markdown",
184 |       "metadata": {
185 |         "id": "Fz47tAgSKMNH"
186 |       },
187 |       "source": [
188 |         "**About Gemma**\n",
189 |         "\n",
190 |         "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.\n",
191 |         "\n",
192 |         "Here's the [official documentation](https://ai.google.dev/gemma/docs/formatting) regarding promping instruction-tuned models."
193 |       ]
194 |     },
195 |     {
196 |       "cell_type": "code",
197 |       "execution_count": null,
198 |       "metadata": {
199 |         "id": "B3WckZv2hef3"
200 |       },
201 |       "outputs": [],
202 |       "source": [
203 |         "import keras\n",
204 |         "import keras_nlp\n",
205 |         "from IPython.display import display, Markdown, Latex"
206 |       ]
207 |     },
208 |     {
209 |       "cell_type": "code",
210 |       "execution_count": null,
211 |       "metadata": {
212 |         "id": "8dfseDZChhjl"
213 |       },
214 |       "outputs": [],
215 |       "source": [
216 |         "# Let's load Gemma using Keras\n",
217 |         "gemma_model_id = \"gemma2_instruct_2b_en\"\n",
218 |         "gemma = keras_nlp.models.GemmaCausalLM.from_preset(gemma_model_id)"
219 |       ]
220 |     },
221 |     {
222 |       "cell_type": "markdown",
223 |       "metadata": {
224 |         "id": "fEhPz8PZuRcm"
225 |       },
226 |       "source": [
227 |         "## Story writing baseline: persona, premise, outline"
228 |       ]
229 |     },
230 |     {
231 |       "cell_type": "markdown",
232 |       "metadata": {
233 |         "id": "u97asfO3uU-x"
234 |       },
235 |       "source": [
236 |         "As the first step, you'll create a persona, which the LLM should take to perform the task. In this example, you want it to act like a children's author, who wants to write a new funny and educating story."
237 |       ]
238 |     },
239 |     {
240 |       "cell_type": "code",
241 |       "execution_count": null,
242 |       "metadata": {
243 |         "id": "i4Qg0dYqu5UL"
244 |       },
245 |       "outputs": [],
246 |       "source": [
247 |         "persona = \"\"\"You are a children's author with a penchant for humorous, yet educating stories.\n",
248 |         "Your ultimate goal is to write a new story to be published in a children magazine.\"\"\""
249 |       ]
250 |     },
251 |     {
252 |       "cell_type": "markdown",
253 |       "metadata": {
254 |         "id": "ZhniNs61u-Ku"
255 |       },
256 |       "source": [
257 |         "The upcoming subtask for the LLM is to develop a premise for the story. To achieve this, you require a prompt. You'll use the persona description to generate the premise prompt. In this example, you want the model to create a story about bunnies."
258 |       ]
259 |     },
260 |     {
261 |       "cell_type": "code",
262 |       "execution_count": null,
263 |       "metadata": {
264 |         "id": "XHXTQdNvu7L0"
265 |       },
266 |       "outputs": [],
267 |       "source": [
268 |         "premise_prompt = f\"\"\"<start_of_turn>user\n",
269 |         "{persona}\n",
270 |         "\n",
271 |         "Write a single-sentence premise for a children's story featuring bunnies.<end_of_turn>\n",
272 |         "<start_of_turn>model\\n\"\"\""
273 |       ]
274 |     },
275 |     {
276 |       "cell_type": "code",
277 |       "execution_count": null,
278 |       "metadata": {
279 |         "id": "DyOvDcBkv5s1"
280 |       },
281 |       "outputs": [
282 |         {
283 |           "data": {
284 |             "text/markdown": "When a mischievous bunny accidentally swaps his carrot patch with a giant sunflower field, he must learn the importance of teamwork and communication to get his carrots back. \n<end_of_turn>",
285 |             "text/plain": [
286 |               "<IPython.core.display.Markdown object>"
287 |             ]
288 |           },
289 |           "metadata": {},
290 |           "output_type": "display_data"
291 |         }
292 |       ],
293 |       "source": [
294 |         "premise_response = gemma.generate(premise_prompt, max_length=512)\n",
295 |         "premise = premise_response[len(premise_prompt) :]\n",
296 |         "display(Markdown(premise))"
297 |       ]
298 |     },
299 |     {
300 |       "cell_type": "markdown",
301 |       "metadata": {
302 |         "id": "IpJ55LJAvblz"
303 |       },
304 |       "source": [
305 |         "You'll use the generated premise to create a prompt for the next step, which will produce the story outline.\n"
306 |       ]
307 |     },
308 |     {
309 |       "cell_type": "code",
310 |       "execution_count": null,
311 |       "metadata": {
312 |         "id": "c4FHQw0avkQ0"
313 |       },
314 |       "outputs": [],
315 |       "source": [
316 |         "outline_prompt = f\"\"\"<start_of_turn>user\n",
317 |         "{persona}\n",
318 |         "\n",
319 |         "You have a gripping premise in mind:\n",
320 |         "\n",
321 |         "{{premise}}\n",
322 |         "\n",
323 |         "Create an outline for your story's plot consisting of 5 key points.<end_of_turn>\n",
324 |         "<start_of_turn>model\\n\"\"\""
325 |       ]
326 |     },
327 |     {
328 |       "cell_type": "code",
329 |       "execution_count": null,
330 |       "metadata": {
331 |         "id": "PkT-L8i-v9xJ"
332 |       },
333 |       "outputs": [
334 |         {
335 |           "data": {
336 |             "text/markdown": "Here's a plot outline for a children's story about a mischievous bunny and his carrot patch swap:\n\n**Title:** Benny's Big Sunflower Swap\n\n**Key Points:**\n\n1. **The Mishap:** Benny, a playful bunny with a penchant for mischief, accidentally trips over a root and sends his entire carrot patch tumbling into a giant sunflower field. He's horrified! He can't imagine a world without his delicious carrots.\n2. **The Sunflower Surprise:** Benny, bewildered and hungry, explores the sunflower field. He's amazed by the towering sunflowers, but they're not very tasty. He tries to eat the petals, but they're too tough. He even tries to climb the sunflowers, but they're too tall!\n3. **The Sunflower Friends:** Benny meets a friendly sunflower named Sunny who explains that the sunflowers are actually quite helpful. They provide shade for the bees, attract butterflies, and even help the birds find food. Benny learns that the sunflower field is a vital part of the ecosystem.\n4. **The Teamwork Challenge:** Benny realizes he needs to work with the sunflowers to get his carrots back. He learns that the sunflowers can't move the carrots, but they can help him find a way to get them back. He asks Sunny for help, and together they brainstorm ideas.\n5. **The Carrot Rescue:** Benny and Sunny come up with a plan to use the sunflowers' height to reach the carrots. They work together, with Benny using his agility to climb the sunflowers and Sunny guiding him. They successfully retrieve the carrots, and Benny learns the importance of teamwork and communication.\n\n\n**Humorous Elements:**\n\n* Benny's mischievous nature could lead to funny situations, like him trying to eat the sunflower seeds or trying to climb the sunflowers with his carrot patch stuck to his back.\n* Sunny the sunflower could have a quirky personality, perhaps with a love for riddles or a habit of telling tall tales.\n* The story could incorporate silly",
337 |             "text/plain": [
338 |               "<IPython.core.display.Markdown object>"
339 |             ]
340 |           },
341 |           "metadata": {},
342 |           "output_type": "display_data"
343 |         }
344 |       ],
345 |       "source": [
346 |         "full_outline_prompt = outline_prompt.format(premise=premise)\n",
347 |         "outline_response = gemma.generate(full_outline_prompt, max_length=512)\n",
348 |         "outline = outline_response[len(full_outline_prompt) :]\n",
349 |         "display(Markdown(outline))"
350 |       ]
351 |     },
352 |     {
353 |       "cell_type": "markdown",
354 |       "metadata": {
355 |         "id": "9iYpDaIzv_hC"
356 |       },
357 |       "source": [
358 |         "Once you have the plan, you'd like Gemma to begin writing the story. In the prompt, include all the necessary information you've gathered so far: the persona, premise, and outline.\n"
359 |       ]
360 |     },
361 |     {
362 |       "cell_type": "code",
363 |       "execution_count": null,
364 |       "metadata": {
365 |         "id": "lWWLFF24pFOe"
366 |       },
367 |       "outputs": [],
368 |       "source": [
369 |         "starting_prompt = f\"\"\"<start_of_turn>user\n",
370 |         "{persona}\n",
371 |         "\n",
372 |         "You have a gripping premise in mind:\n",
373 |         "\n",
374 |         "{{premise}}\n",
375 |         "\n",
376 |         "Your imagination has crafted a narrative outline:\n",
377 |         "\n",
378 |         "{{outline}}\n",
379 |         "\n",
380 |         "First, silently review the outline and the premise. Consider how to start the\n",
381 |         "story.\n",
382 |         "\n",
383 |         "Your task is to write a part of the story that covers only the first point of the outline.\n",
384 |         "You are not expected to finish the whole story now.\n",
385 |         "Do not write about the next points, only the first one plot point!!!\n",
386 |         "\n",
387 |         "Try to write 10 sentences.\n",
388 |         "Remember, DO NOT WRITE A WHOLE STORY RIGHT NOW.<end_of_turn>\n",
389 |         "<start_of_turn>model\\n\"\"\""
390 |       ]
391 |     },
392 |     {
393 |       "cell_type": "code",
394 |       "execution_count": null,
395 |       "metadata": {
396 |         "id": "Fdu7kVqVwRXQ"
397 |       },
398 |       "outputs": [
399 |         {
400 |           "data": {
401 |             "text/markdown": "Benny the bunny was a whirlwind of fluff and mischief.  He loved nothing more than hopping through his carrot patch, nibbling on the plumpest, juiciest carrots.  But today, Benny's playful antics took a turn for the disastrous.  He tripped over a particularly stubborn root, sending a cascade of carrots tumbling into the air.  Benny watched in horror as his entire patch, his precious treasure, disappeared into a sea of towering sunflowers.  The sunflowers, with their bright yellow faces, seemed to mock him with their endless rows.  Benny's heart sank.  He couldn't imagine a world without his carrots!  He hopped around, his ears drooping, his little nose twitching with despair.  He needed his carrots, and he needed them now!  But how could he possibly get them back? \n<end_of_turn>",
402 |             "text/plain": [
403 |               "<IPython.core.display.Markdown object>"
404 |             ]
405 |           },
406 |           "metadata": {},
407 |           "output_type": "display_data"
408 |         }
409 |       ],
410 |       "source": [
411 |         "full_starting_prompt = starting_prompt.format(premise=premise, outline=outline)\n",
412 |         "starting_response = gemma.generate(full_starting_prompt, max_length=1000)\n",
413 |         "draft = starting_response[len(full_starting_prompt) :]\n",
414 |         "display(Markdown(draft))"
415 |       ]
416 |     },
417 |     {
418 |       "cell_type": "markdown",
419 |       "metadata": {
420 |         "id": "MHnP5quYwYLu"
421 |       },
422 |       "source": [
423 |         "If you're pleased with the start of your story, you can continue it by further prompting the model with the text written so far. You can also add guidelines to help the model write appropriately and avoid concluding the story too quickly."
424 |       ]
425 |     },
426 |     {
427 |       "cell_type": "code",
428 |       "execution_count": null,
429 |       "metadata": {
430 |         "id": "ZdFyNiFQ4UZW"
431 |       },
432 |       "outputs": [],
433 |       "source": [
434 |         "guidelines = \"\"\"Writing Guidelines\n",
435 |         "\n",
436 |         "Remember, your main goal is to write as much as you can. If you get through\n",
437 |         "the story too fast, that is bad. Expand, never summarize. Don't repeat previous\n",
438 |         "parts of the story, only expand.\"\"\"\n",
439 |         "\n",
440 |         "\n",
441 |         "continuation_prompt = f\"\"\"<start_of_turn>user\n",
442 |         "{persona}\n",
443 |         "\n",
444 |         "You have a following premise in mind:\n",
445 |         "\n",
446 |         "{{premise}}\n",
447 |         "\n",
448 |         "The outline of the story looks like this:\n",
449 |         "\n",
450 |         "{{outline}}\n",
451 |         "\n",
452 |         "\n",
453 |         "Here's what you've written so far:\n",
454 |         "\n",
455 |         "{{story_text}}\n",
456 |         "\n",
457 |         "\n",
458 |         "=====\n",
459 |         "First, silently review the premise, the outline and the story you've written so far.\n",
460 |         "\n",
461 |         "Write the continuation - the next 5 sentences that cover the next outline point. Stick to the outline.\n",
462 |         "\n",
463 |         "However, once the story is COMPLETELY finished, write IAMDONE.\n",
464 |         "\n",
465 |         "{guidelines}<end_of_turn>\n",
466 |         "<start_of_turn>model\\n\"\"\""
467 |       ]
468 |     },
469 |     {
470 |       "cell_type": "code",
471 |       "execution_count": null,
472 |       "metadata": {
473 |         "id": "M7VM7COm22Hz"
474 |       },
475 |       "outputs": [
476 |         {
477 |           "data": {
478 |             "text/markdown": "Benny hopped from sunflower to sunflower, his nose twitching, trying to catch a whiff of his lost carrots.  He sniffed the air, hoping to catch a hint of their sweet scent, but all he could smell was the earthy aroma of the soil and the sweet nectar of the sunflowers.  He felt a pang of sadness, realizing that he couldn't just eat the sunflowers.  They were beautiful, but they weren't carrots!  Benny needed a plan, and he needed it fast. \n\n \n=====\n \n IAMDONE \n<end_of_turn>",
479 |             "text/plain": [
480 |               "<IPython.core.display.Markdown object>"
481 |             ]
482 |           },
483 |           "metadata": {},
484 |           "output_type": "display_data"
485 |         }
486 |       ],
487 |       "source": [
488 |         "full_continuation_prompt = continuation_prompt.format(\n",
489 |         "    premise=premise, outline=outline, story_text=draft\n",
490 |         ")\n",
491 |         "continuation_response = gemma.generate(full_continuation_prompt, max_length=1000)\n",
492 |         "continuation = continuation_response[len(full_continuation_prompt) :]\n",
493 |         "display(Markdown(continuation))"
494 |       ]
495 |     },
496 |     {
497 |       "cell_type": "markdown",
498 |       "metadata": {
499 |         "id": "yX8G_SlOxIB6"
500 |       },
501 |       "source": [
502 |         "Add the continuation to the initial draft, keep building the story iteratively, until 'IAMDONE' is seen"
503 |       ]
504 |     },
505 |     {
506 |       "cell_type": "code",
507 |       "execution_count": null,
508 |       "metadata": {
509 |         "id": "az0Zk9MA1XqU"
510 |       },
511 |       "outputs": [
512 |         {
513 |           "data": {
514 |             "text/markdown": "Benny the bunny was a whirlwind of fluff and mischief.  He loved nothing more than hopping through his carrot patch, nibbling on the plumpest, juiciest carrots.  But today, Benny's playful antics took a turn for the disastrous.  He tripped over a particularly stubborn root, sending a cascade of carrots tumbling into the air.  Benny watched in horror as his entire patch, his precious treasure, disappeared into a sea of towering sunflowers.  The sunflowers, with their bright yellow faces, seemed to mock him with their endless rows.  Benny's heart sank.  He couldn't imagine a world without his carrots!  He hopped around, his ears drooping, his little nose twitching with despair.  He needed his carrots, and he needed them now!  But how could he possibly get them back? \n<end_of_turn>\n\nBenny hopped from sunflower to sunflower, his nose twitching, trying to catch a whiff of his lost carrots.  He sniffed the air, hoping to catch a hint of their sweet scent, but all he could smell was the earthy aroma of the soil and the sweet nectar of the sunflowers.  He felt a pang of sadness, realizing that he couldn't just eat the sunflowers.  They were beautiful, but they weren't carrots!  Benny needed a plan, and he needed it fast. \n\n \n=====\n \n  \n<end_of_turn>",
515 |             "text/plain": [
516 |               "<IPython.core.display.Markdown object>"
517 |             ]
518 |           },
519 |           "metadata": {},
520 |           "output_type": "display_data"
521 |         }
522 |       ],
523 |       "source": [
524 |         "draft = draft + \"\\n\\n\" + continuation\n",
525 |         "\n",
526 |         "while \"IAMDONE\" not in continuation:\n",
527 |         "    full_continuation_prompt = continuation_prompt.format(\n",
528 |         "        premise=premise, outline=outline, story_text=draft\n",
529 |         "    )\n",
530 |         "    continuation_response = gemma.generate(full_continuation_prompt, max_length=5000)\n",
531 |         "    continuation = continuation_response[len(full_continuation_prompt) :]\n",
532 |         "    draft = draft + \"\\n\\n\" + continuation\n",
533 |         "\n",
534 |         "# Remove 'IAMDONE' and print the final story\n",
535 |         "final = draft.replace(\"IAMDONE\", \"\").strip()\n",
536 |         "display(Markdown(final))"
537 |       ]
538 |     }
539 |   ],
540 |   "metadata": {
541 |     "accelerator": "GPU",
542 |     "colab": {
543 |       "name": "Prompt_chaining.ipynb",
544 |       "toc_visible": true
545 |     },
546 |     "kernelspec": {
547 |       "display_name": "Python 3",
548 |       "name": "python3"
549 |     }
550 |   },
551 |   "nbformat": 4,
552 |   "nbformat_minor": 0
553 | }
554 | 


--------------------------------------------------------------------------------
/Gemma/Gemma_RAG_LlamaIndex.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "Tce3stUlHN0L"
  7 |       },
  8 |       "source": [
  9 |         "##### Copyright 2024 Google LLC."
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {
 16 |         "cellView": "form",
 17 |         "id": "tuOe1ymfHZPu"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 22 |         "# you may not use this file except in compliance with the License.\n",
 23 |         "# You may obtain a copy of the License at\n",
 24 |         "#\n",
 25 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 26 |         "#\n",
 27 |         "# Unless required by applicable law or agreed to in writing, software\n",
 28 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 29 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 30 |         "# See the License for the specific language governing permissions and\n",
 31 |         "# limitations under the License."
 32 |       ]
 33 |     },
 34 |     {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {
 37 |         "id": "dfsDR_omdNea"
 38 |       },
 39 |       "source": [
 40 |         "# RAG with Gemma and LlamaIndex\n",
 41 |         "\n",
 42 |         "This notebook demonstrates how to integrate Gemma model with [LlamaIndex](https://www.llamaindex.ai/) library to build a basic RAG application.\n",
 43 |         "<table align=\"left\">\n",
 44 |         "  <td>\n",
 45 |         "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/google-gemini/gemma-cookbook/blob/main/Gemma/Gemma_RAG_LlamaIndex.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 46 |         "  </td>\n",
 47 |         "</table>"
 48 |       ]
 49 |     },
 50 |     {
 51 |       "cell_type": "markdown",
 52 |       "metadata": {
 53 |         "id": "FaqZItBdeokU"
 54 |       },
 55 |       "source": [
 56 |         "## Setup\n",
 57 |         "\n",
 58 |         "### Select the Colab runtime\n",
 59 |         "To complete this tutorial, you'll need to have a Colab runtime with sufficient resources to run the Gemma model. In this case, you can use a T4 GPU:\n",
 60 |         "\n",
 61 |         "1. In the upper-right of the Colab window, select **▾ (Additional connection options)**.\n",
 62 |         "2. Select **Change runtime type**.\n",
 63 |         "3. Under **Hardware accelerator**, select **T4 GPU**.\n",
 64 |         "\n",
 65 |         "### Gemma setup\n",
 66 |         "\n",
 67 |         "To complete this tutorial, you'll first need to complete the setup instructions at [Gemma setup](https://ai.google.dev/gemma/docs/setup). The Gemma setup instructions show you how to do the following:\n",
 68 |         "\n",
 69 |         "* Get access to Gemma on kaggle.com.\n",
 70 |         "* Select a Colab runtime with sufficient resources to run\n",
 71 |         "  the Gemma 2B model.\n",
 72 |         "* Generate and configure a Kaggle username and an API key as Colab secrets.\n",
 73 |         "\n",
 74 |         "After you've completed the Gemma setup, move on to the next section, where you'll set environment variables for your Colab environment.\n"
 75 |       ]
 76 |     },
 77 |     {
 78 |       "cell_type": "markdown",
 79 |       "metadata": {
 80 |         "id": "CY2kGtsyYpHF"
 81 |       },
 82 |       "source": [
 83 |         "### Configure your credentials\n",
 84 |         "\n",
 85 |         "Add your your Kaggle credentials to the Colab Secrets manager to securely store it.\n",
 86 |         "\n",
 87 |         "1. Open your Google Colab notebook and click on the 🔑 Secrets tab in the left panel. <img src=\"https://storage.googleapis.com/generativeai-downloads/images/secrets.jpg\" alt=\"The Secrets tab is found on the left panel.\" width=50%>\n",
 88 |         "2. Create new secrets: `KAGGLE_USERNAME` and `KAGGLE_KEY`\n",
 89 |         "3. Copy/paste your username into `KAGGLE_USERNAME`\n",
 90 |         "3. Copy/paste your key into `KAGGLE_KEY`\n",
 91 |         "4. Toggle the buttons on the left to allow notebook access to the secrets.\n"
 92 |       ]
 93 |     },
 94 |     {
 95 |       "cell_type": "code",
 96 |       "execution_count": null,
 97 |       "metadata": {
 98 |         "id": "A9sUQ4WrP-Yr"
 99 |       },
100 |       "outputs": [],
101 |       "source": [
102 |         "import os\n",
103 |         "from google.colab import userdata\n",
104 |         "\n",
105 |         "# Set the backbend before importing Keras\n",
106 |         "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n",
107 |         "# Avoid memory fragmentation on JAX backend.\n",
108 |         "os.environ[\"XLA_PYTHON_CLIENT_MEM_FRACTION\"] = \"1.00\"\n",
109 |         "\n",
110 |         "# Note: `userdata.get` is a Colab API. If you're not using Colab, set the env\n",
111 |         "# vars as appropriate for your system.\n",
112 |         "os.environ[\"KAGGLE_USERNAME\"] = userdata.get(\"KAGGLE_USERNAME\")\n",
113 |         "os.environ[\"KAGGLE_KEY\"] = userdata.get(\"KAGGLE_KEY\")"
114 |       ]
115 |     },
116 |     {
117 |       "cell_type": "markdown",
118 |       "metadata": {
119 |         "id": "iwjo5_Uucxkw"
120 |       },
121 |       "source": [
122 |         "### Install dependencies\n",
123 |         "Run the cell below to install all the required dependencies."
124 |       ]
125 |     },
126 |     {
127 |       "cell_type": "code",
128 |       "execution_count": null,
129 |       "metadata": {
130 |         "id": "r_nXPEsF7UWQ"
131 |       },
132 |       "outputs": [],
133 |       "source": [
134 |         "!pip install -q -U tensorflow keras keras-nlp\n",
135 |         "!pip install -q -U keras keras-nlp\n",
136 |         "!pip install llama-index-embeddings-instructor\n",
137 |         "!pip install sentence-transformers==2.2.2\n",
138 |         "!pip install llama-index-readers-web llama-index-readers-file"
139 |       ]
140 |     },
141 |     {
142 |       "cell_type": "markdown",
143 |       "metadata": {
144 |         "id": "J3sX2mFH4GWk"
145 |       },
146 |       "source": [
147 |         "### Gemma"
148 |       ]
149 |     },
150 |     {
151 |       "cell_type": "markdown",
152 |       "metadata": {
153 |         "id": "IgJWrlCC7v27"
154 |       },
155 |       "source": [
156 |         "**About Gemma**\n",
157 |         "\n",
158 |         "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone."
159 |       ]
160 |     },
161 |     {
162 |       "cell_type": "code",
163 |       "execution_count": null,
164 |       "metadata": {
165 |         "id": "B3WckZv2hef3"
166 |       },
167 |       "outputs": [],
168 |       "source": [
169 |         "import keras\n",
170 |         "import keras_nlp\n",
171 |         "\n",
172 |         "os.environ[\"KAGGLE_USERNAME\"] = userdata.get(\"KAGGLE_USERNAME\")\n",
173 |         "os.environ[\"KAGGLE_KEY\"] = userdata.get(\"KAGGLE_KEY\")"
174 |       ]
175 |     },
176 |     {
177 |       "cell_type": "code",
178 |       "execution_count": null,
179 |       "metadata": {
180 |         "id": "8dfseDZChhjl"
181 |       },
182 |       "outputs": [],
183 |       "source": [
184 |         "# Let's load Gemma using Keras\n",
185 |         "gemma_model_id = \"gemma2_instruct_2b_en\"\n",
186 |         "gemma = keras_nlp.models.GemmaCausalLM.from_preset(gemma_model_id)"
187 |       ]
188 |     },
189 |     {
190 |       "cell_type": "markdown",
191 |       "metadata": {
192 |         "id": "6xKBqqTD3zyq"
193 |       },
194 |       "source": [
195 |         "## LlamaIndex"
196 |       ]
197 |     },
198 |     {
199 |       "cell_type": "markdown",
200 |       "metadata": {
201 |         "id": "g_YESbFQ8DlU"
202 |       },
203 |       "source": [
204 |         "LlamaIndex is a toolkit for developers to build applications that use large language models (LLMs) with specific data. This data can be private or related to a particular field. With LlamaIndex, developers can create various LLM applications, including question-answering chatbots, document analysis tools, and even autonomous agents. The toolkit offers functions to process data and design workflows that combine data retrieval with instructions for the LLM."
205 |       ]
206 |     },
207 |     {
208 |       "cell_type": "markdown",
209 |       "metadata": {
210 |         "id": "vO2BH5iX8Lot"
211 |       },
212 |       "source": [
213 |         "Large language models (LLMs) are powerful but lack your specific data. Retrieval-Augmented Generation (RAG) bridges this gap by incorporating your data for improved performance. RAG works by indexing your data for efficient retrieval based on user queries. The most relevant information, along with the query itself, is then fed to the LLM to generate a response. Understanding RAG is essential for building LLM applications like chatbots and agents."
214 |       ]
215 |     },
216 |     {
217 |       "cell_type": "markdown",
218 |       "metadata": {
219 |         "id": "RZEp91Gb8pP9"
220 |       },
221 |       "source": [
222 |         "### Setup"
223 |       ]
224 |     },
225 |     {
226 |       "cell_type": "code",
227 |       "execution_count": null,
228 |       "metadata": {
229 |         "id": "NJFTVCbP31P_"
230 |       },
231 |       "outputs": [],
232 |       "source": [
233 |         "from typing import Optional, List, Mapping, Any\n",
234 |         "from llama_index.core.node_parser import SentenceSplitter\n",
235 |         "from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader, PromptTemplate\n",
236 |         "from llama_index.embeddings.instructor import InstructorEmbedding\n",
237 |         "from llama_index.core.llms import (\n",
238 |         "    CustomLLM,\n",
239 |         "    CompletionResponse,\n",
240 |         "    CompletionResponseGen,\n",
241 |         "    LLMMetadata,\n",
242 |         ")\n",
243 |         "from llama_index.core.llms.callbacks import llm_completion_callback"
244 |       ]
245 |     },
246 |     {
247 |       "cell_type": "markdown",
248 |       "metadata": {
249 |         "id": "s8jsT8dr8rjW"
250 |       },
251 |       "source": [
252 |         "To ensure compatibility between Gemma and the LlamaIndex library, you need to creata a simple interface class. The provided code implements basic generation methods, allowing the library to interact with our model effectively."
253 |       ]
254 |     },
255 |     {
256 |       "cell_type": "code",
257 |       "execution_count": null,
258 |       "metadata": {
259 |         "id": "njdY5e7f4IW-"
260 |       },
261 |       "outputs": [],
262 |       "source": [
263 |         "class GemmaLLMInterface(CustomLLM):\n",
264 |         "    model: keras_nlp.models.GemmaCausalLM = None\n",
265 |         "    context_window: int = 8192\n",
266 |         "    num_output: int = 2048\n",
267 |         "    model_name: str = \"gemma_2\"\n",
268 |         "\n",
269 |         "    def _format_prompt(self, message: str) -> str:\n",
270 |         "        return (\n",
271 |         "            f\"<start_of_turn>user\\n{message}<end_of_turn>\\n\" f\"<start_of_turn>model\\n\"\n",
272 |         "        )\n",
273 |         "\n",
274 |         "    @property\n",
275 |         "    def metadata(self) -> LLMMetadata:\n",
276 |         "        \"\"\"Get LLM metadata.\"\"\"\n",
277 |         "        return LLMMetadata(\n",
278 |         "            context_window=self.context_window,\n",
279 |         "            num_output=self.num_output,\n",
280 |         "            model_name=self.model_name,\n",
281 |         "        )\n",
282 |         "\n",
283 |         "    @llm_completion_callback()\n",
284 |         "    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:\n",
285 |         "        prompt = self._format_prompt(prompt)\n",
286 |         "        raw_response = self.model.generate(prompt, max_length=self.num_output)\n",
287 |         "        response = raw_response[len(prompt) :]\n",
288 |         "        return CompletionResponse(text=response)\n",
289 |         "\n",
290 |         "    @llm_completion_callback()\n",
291 |         "    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:\n",
292 |         "        response = self.complete(prompt).text\n",
293 |         "        for token in response:\n",
294 |         "            response += token\n",
295 |         "            yield CompletionResponse(text=response, delta=token)"
296 |       ]
297 |     },
298 |     {
299 |       "cell_type": "code",
300 |       "execution_count": null,
301 |       "metadata": {
302 |         "id": "Y9sar4FazDI_"
303 |       },
304 |       "outputs": [
305 |         {
306 |           "data": {
307 |             "application/vnd.jupyter.widget-view+json": {
308 |               "model_id": "707339b0c08049cf81b15e0431bff054",
309 |               "version_major": 2,
310 |               "version_minor": 0
311 |             },
312 |             "text/plain": [
313 |               ".gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]"
314 |             ]
315 |           },
316 |           "metadata": {},
317 |           "output_type": "display_data"
318 |         },
319 |         {
320 |           "data": {
321 |             "application/vnd.jupyter.widget-view+json": {
322 |               "model_id": "095cd08624e64f52aa81891a434effb8",
323 |               "version_major": 2,
324 |               "version_minor": 0
325 |             },
326 |             "text/plain": [
327 |               "1_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]"
328 |             ]
329 |           },
330 |           "metadata": {},
331 |           "output_type": "display_data"
332 |         },
333 |         {
334 |           "data": {
335 |             "application/vnd.jupyter.widget-view+json": {
336 |               "model_id": "64456f13bbbb407f81f1d01e5e5acace",
337 |               "version_major": 2,
338 |               "version_minor": 0
339 |             },
340 |             "text/plain": [
341 |               "2_Dense/config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]"
342 |             ]
343 |           },
344 |           "metadata": {},
345 |           "output_type": "display_data"
346 |         },
347 |         {
348 |           "data": {
349 |             "application/vnd.jupyter.widget-view+json": {
350 |               "model_id": "73f01f30d8604a10ab2f39d696ddbfba",
351 |               "version_major": 2,
352 |               "version_minor": 0
353 |             },
354 |             "text/plain": [
355 |               "pytorch_model.bin:   0%|          | 0.00/2.36M [00:00<?, ?B/s]"
356 |             ]
357 |           },
358 |           "metadata": {},
359 |           "output_type": "display_data"
360 |         },
361 |         {
362 |           "data": {
363 |             "application/vnd.jupyter.widget-view+json": {
364 |               "model_id": "e9c40fb92b384c4abfa2afccdfd81d87",
365 |               "version_major": 2,
366 |               "version_minor": 0
367 |             },
368 |             "text/plain": [
369 |               "README.md:   0%|          | 0.00/66.2k [00:00<?, ?B/s]"
370 |             ]
371 |           },
372 |           "metadata": {},
373 |           "output_type": "display_data"
374 |         },
375 |         {
376 |           "data": {
377 |             "application/vnd.jupyter.widget-view+json": {
378 |               "model_id": "f785db0c4967428c9136a42836658166",
379 |               "version_major": 2,
380 |               "version_minor": 0
381 |             },
382 |             "text/plain": [
383 |               "config.json:   0%|          | 0.00/1.55k [00:00<?, ?B/s]"
384 |             ]
385 |           },
386 |           "metadata": {},
387 |           "output_type": "display_data"
388 |         },
389 |         {
390 |           "data": {
391 |             "application/vnd.jupyter.widget-view+json": {
392 |               "model_id": "82d6085fe86c44cf99e72365ce3b07d1",
393 |               "version_major": 2,
394 |               "version_minor": 0
395 |             },
396 |             "text/plain": [
397 |               "config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]"
398 |             ]
399 |           },
400 |           "metadata": {},
401 |           "output_type": "display_data"
402 |         },
403 |         {
404 |           "data": {
405 |             "application/vnd.jupyter.widget-view+json": {
406 |               "model_id": "8f9bc5fe0ce34c6cad05371abf3a9c60",
407 |               "version_major": 2,
408 |               "version_minor": 0
409 |             },
410 |             "text/plain": [
411 |               "pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]"
412 |             ]
413 |           },
414 |           "metadata": {},
415 |           "output_type": "display_data"
416 |         },
417 |         {
418 |           "data": {
419 |             "application/vnd.jupyter.widget-view+json": {
420 |               "model_id": "5e370509c6654bc6827a74cbcd1f7ef7",
421 |               "version_major": 2,
422 |               "version_minor": 0
423 |             },
424 |             "text/plain": [
425 |               "sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]"
426 |             ]
427 |           },
428 |           "metadata": {},
429 |           "output_type": "display_data"
430 |         },
431 |         {
432 |           "data": {
433 |             "application/vnd.jupyter.widget-view+json": {
434 |               "model_id": "59656b54c17940b19dc6f42e19e1a61b",
435 |               "version_major": 2,
436 |               "version_minor": 0
437 |             },
438 |             "text/plain": [
439 |               "special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]"
440 |             ]
441 |           },
442 |           "metadata": {},
443 |           "output_type": "display_data"
444 |         },
445 |         {
446 |           "data": {
447 |             "application/vnd.jupyter.widget-view+json": {
448 |               "model_id": "3b99ef33ce4c4aa2b2c030cb7fd082ef",
449 |               "version_major": 2,
450 |               "version_minor": 0
451 |             },
452 |             "text/plain": [
453 |               "spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]"
454 |             ]
455 |           },
456 |           "metadata": {},
457 |           "output_type": "display_data"
458 |         },
459 |         {
460 |           "data": {
461 |             "application/vnd.jupyter.widget-view+json": {
462 |               "model_id": "7b041bf7e06f4886bb21c5d282c2a676",
463 |               "version_major": 2,
464 |               "version_minor": 0
465 |             },
466 |             "text/plain": [
467 |               "tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]"
468 |             ]
469 |           },
470 |           "metadata": {},
471 |           "output_type": "display_data"
472 |         },
473 |         {
474 |           "data": {
475 |             "application/vnd.jupyter.widget-view+json": {
476 |               "model_id": "62c6c45a64634d9a8ed47105e6bfb6cb",
477 |               "version_major": 2,
478 |               "version_minor": 0
479 |             },
480 |             "text/plain": [
481 |               "tokenizer_config.json:   0%|          | 0.00/2.43k [00:00<?, ?B/s]"
482 |             ]
483 |           },
484 |           "metadata": {},
485 |           "output_type": "display_data"
486 |         },
487 |         {
488 |           "data": {
489 |             "application/vnd.jupyter.widget-view+json": {
490 |               "model_id": "1b7a145d712e476cbdf3cb09dadb6bf1",
491 |               "version_major": 2,
492 |               "version_minor": 0
493 |             },
494 |             "text/plain": [
495 |               "modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]"
496 |             ]
497 |           },
498 |           "metadata": {},
499 |           "output_type": "display_data"
500 |         },
501 |         {
502 |           "name": "stdout",
503 |           "output_type": "stream",
504 |           "text": [
505 |             "load INSTRUCTOR_Transformer\n",
506 |             "max_seq_length  512\n"
507 |           ]
508 |         }
509 |       ],
510 |       "source": [
511 |         "# This settings define what models will be used by LlamaIndex\n",
512 |         "Settings.embed_model = InstructorEmbedding(model_name=\"hkunlp/instructor-base\")\n",
513 |         "Settings.llm = GemmaLLMInterface(model=gemma)"
514 |       ]
515 |     },
516 |     {
517 |       "cell_type": "markdown",
518 |       "metadata": {
519 |         "id": "2t5WLak69hd8"
520 |       },
521 |       "source": [
522 |         "## Retrieval-Augmented Generation (RAG)"
523 |       ]
524 |     },
525 |     {
526 |       "cell_type": "markdown",
527 |       "metadata": {
528 |         "id": "A9iQpOQr91Uw"
529 |       },
530 |       "source": [
531 |         "RAGs improve generative AI outputs through a multi-step process. First, it searches for relevant external data (webpages, databases) using powerful algorithms. This retrieved information is then cleaned and prepped for the LLM. Finally, the prepped data is fed alongside the original query into the LLM. This extra context allows the LLM to understand the topic better, resulting in more precise, informative, and engaging responses."
532 |       ]
533 |     },
534 |     {
535 |       "cell_type": "markdown",
536 |       "metadata": {
537 |         "id": "Ulg3ptNf92I3"
538 |       },
539 |       "source": [
540 |         "This notebook demonstrates how to build an RAG application by utilising Paul Graham's essay. The essay is used as a placeholder to illustrate the RAG concepts without introducing complexities of real-world data. It simplifies the process by focusing on a single, well-defined source (the essay) to showcase how data retrieval enhances LLM performance in a RAG system."
541 |       ]
542 |     },
543 |     {
544 |       "cell_type": "markdown",
545 |       "metadata": {
546 |         "id": "j3tnN_qMAmbd"
547 |       },
548 |       "source": [
549 |         "### Chunking the data"
550 |       ]
551 |     },
552 |     {
553 |       "cell_type": "code",
554 |       "execution_count": null,
555 |       "metadata": {
556 |         "id": "ubRH-t77AJIx"
557 |       },
558 |       "outputs": [],
559 |       "source": [
560 |         "# Let's download the data frist\n",
561 |         "!wget -q \"https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt\" \"paul_graham_essay.txt\""
562 |       ]
563 |     },
564 |     {
565 |       "cell_type": "code",
566 |       "execution_count": null,
567 |       "metadata": {
568 |         "id": "53aGXIuor1Nv"
569 |       },
570 |       "outputs": [],
571 |       "source": [
572 |         "# Reading documents from disk\n",
573 |         "documents = SimpleDirectoryReader(input_files=[\"paul_graham_essay.txt\"]).load_data()\n",
574 |         "\n",
575 |         "# Splitting the document into chunks with\n",
576 |         "# predefined size and overlap\n",
577 |         "parser = SentenceSplitter.from_defaults(\n",
578 |         "    chunk_size=256, chunk_overlap=64, paragraph_separator=\"\\n\\n\"\n",
579 |         ")\n",
580 |         "nodes = parser.get_nodes_from_documents(documents)"
581 |       ]
582 |     },
583 |     {
584 |       "cell_type": "code",
585 |       "execution_count": null,
586 |       "metadata": {
587 |         "id": "Fcr14ctm-msd"
588 |       },
589 |       "outputs": [
590 |         {
591 |           "data": {
592 |             "application/vnd.google.colaboratory.intrinsic+json": {
593 |               "type": "string"
594 |             },
595 |             "text/plain": [
596 |               "'What I Worked On\\n\\nFebruary 2021\\n\\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn\\'t write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.\\n\\nThe first programs I tried writing were on the IBM 1401 that our school district used for what was then called \"data processing.\" This was in 9th grade, so I was 13 or 14. The school district\\'s 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain\\'s lair down there, with all these alien-looking machines — CPU, disk drives, printer, card reader — sitting up on a raised floor under bright fluorescent lights.'"
597 |             ]
598 |           },
599 |           "execution_count": 10,
600 |           "metadata": {},
601 |           "output_type": "execute_result"
602 |         }
603 |       ],
604 |       "source": [
605 |         "# Example node:\n",
606 |         "nodes[0].text"
607 |       ]
608 |     },
609 |     {
610 |       "cell_type": "markdown",
611 |       "metadata": {
612 |         "id": "Ox_vtnkRAp5X"
613 |       },
614 |       "source": [
615 |         "### Building Vector Store"
616 |       ]
617 |     },
618 |     {
619 |       "cell_type": "markdown",
620 |       "metadata": {
621 |         "id": "bswW6LMI-9Kc"
622 |       },
623 |       "source": [
624 |         "Now you can now build a search engine that finds the best parts of text that answer a user's question."
625 |       ]
626 |     },
627 |     {
628 |       "cell_type": "code",
629 |       "execution_count": null,
630 |       "metadata": {
631 |         "id": "BE-OyRUKr1wj"
632 |       },
633 |       "outputs": [],
634 |       "source": [
635 |         "# Converting the vector store to retrevier\n",
636 |         "query_engine = VectorStoreIndex(nodes).as_query_engine(\n",
637 |         "    similarity_top_k=3, response_mode=\"tree_summarize\"\n",
638 |         ")"
639 |       ]
640 |     },
641 |     {
642 |       "cell_type": "code",
643 |       "execution_count": null,
644 |       "metadata": {
645 |         "id": "62pfQ9AZ_yI7"
646 |       },
647 |       "outputs": [
648 |         {
649 |           "name": "stdout",
650 |           "output_type": "stream",
651 |           "text": [
652 |             "Found: 3 relevant chunks\n",
653 |             "1) She liked to paint on big, square canvases, 4 to 5 feet on a sid...\n",
654 |             "2) The next year, from the summer of 1998 to the summer of 1999, mu...\n",
655 |             "3) It was a lot of fun working with Robert and Trevor. They're the ...\n"
656 |           ]
657 |         }
658 |       ],
659 |       "source": [
660 |         "# Let's test it out\n",
661 |         "relevant_chunks = query_engine.retrieve(\"1992\")\n",
662 |         "print(f\"Found: {len(relevant_chunks)} relevant chunks\")\n",
663 |         "for idx, chunk in enumerate(relevant_chunks):\n",
664 |         "    print(f\"{idx + 1}) {chunk.text[:64]}...\")"
665 |       ]
666 |     },
667 |     {
668 |       "cell_type": "markdown",
669 |       "metadata": {
670 |         "id": "ImSNuY6GATgJ"
671 |       },
672 |       "source": [
673 |         "Those chunks will be inject to the LLM's prompt in order to answer user query."
674 |       ]
675 |     },
676 |     {
677 |       "cell_type": "code",
678 |       "execution_count": null,
679 |       "metadata": {
680 |         "id": "OX_fcTcshexQ"
681 |       },
682 |       "outputs": [],
683 |       "source": [
684 |         "# (Optional) Gemma works better with straightforward prompts without\n",
685 |         "# additional tokens needed to separate sections.\n",
686 |         "# You can simply update it to get better results:\n",
687 |         "new_summary_tmpl_str = \"\"\"Text:\n",
688 |         "{context_str}\n",
689 |         "According to the text answer the query: {query_str}\"\"\"\n",
690 |         "\n",
691 |         "query_engine.update_prompts(\n",
692 |         "    {\"response_synthesizer:summary_template\": PromptTemplate(new_summary_tmpl_str)}\n",
693 |         ")"
694 |       ]
695 |     },
696 |     {
697 |       "cell_type": "markdown",
698 |       "metadata": {
699 |         "id": "6Cr_g3JqA2sE"
700 |       },
701 |       "source": [
702 |         "So when an user ask a question, the following prompt will be send to the LLM:\n",
703 |         "\n",
704 |         "```\n",
705 |         "Text:\n",
706 |         "<chunk #1>\n",
707 |         "<chunk #2>\n",
708 |         "<chunk #3>\n",
709 |         "According to the text answer the query: <question>\n",
710 |         "```\n",
711 |         "\n",
712 |         "By providing the large language model (LLM) with additional context, it can generate more accurate and informative responses to user queries.\n"
713 |       ]
714 |     },
715 |     {
716 |       "cell_type": "markdown",
717 |       "metadata": {
718 |         "id": "4iJxZAJi4qi_"
719 |       },
720 |       "source": [
721 |         "### Test it yourself!"
722 |       ]
723 |     },
724 |     {
725 |       "cell_type": "code",
726 |       "execution_count": null,
727 |       "metadata": {
728 |         "id": "R64H_BSPNPW9"
729 |       },
730 |       "outputs": [
731 |         {
732 |           "name": "stdout",
733 |           "output_type": "stream",
734 |           "text": [
735 |             "ASP stands for **Application Service Provider**. \n",
736 |             "<end_of_turn>\n"
737 |           ]
738 |         }
739 |       ],
740 |       "source": [
741 |         "response = query_engine.query(\"What does ASP stand for?\")\n",
742 |         "print(response)"
743 |       ]
744 |     },
745 |     {
746 |       "cell_type": "code",
747 |       "execution_count": null,
748 |       "metadata": {
749 |         "id": "J_YjGXFXiao0"
750 |       },
751 |       "outputs": [
752 |         {
753 |           "name": "stdout",
754 |           "output_type": "stream",
755 |           "text": [
756 |             "The text mentions that Paul Graham applied to RISD (Rhode Island School of Design) and the Accademia. \n",
757 |             "<end_of_turn>\n"
758 |           ]
759 |         }
760 |       ],
761 |       "source": [
762 |         "response = query_engine.query(\"What art schools did Paul Graham apply to?\")\n",
763 |         "print(response)"
764 |       ]
765 |     }
766 |   ],
767 |   "metadata": {
768 |     "accelerator": "GPU",
769 |     "colab": {
770 |       "name": "Gemma_RAG_LlamaIndex.ipynb",
771 |       "toc_visible": true
772 |     },
773 |     "kernelspec": {
774 |       "display_name": "Python 3",
775 |       "name": "python3"
776 |     }
777 |   },
778 |   "nbformat": 4,
779 |   "nbformat_minor": 0
780 | }
781 | 


--------------------------------------------------------------------------------