├── .github
    └── workflows
    │   ├── cla.yml
    │   ├── publish-to-test-pypi.yml
    │   └── stale.yaml
├── README.md
├── cla.md
├── demo_helpers
    ├── MANIFEST.in
    ├── demo_helpers
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── args.py
    │   ├── compute_performance.py
    │   ├── dataset.py
    │   ├── datasets
    │   │   └── README.md
    │   ├── misc.py
    │   ├── model_download.py
    │   ├── models.py
    │   ├── pretrained_models
    │   │   ├── m5.pt
    │   │   └── pointnet.pth
    │   └── validate.py
    └── setup.py
├── docs
    ├── img
    │   └── groqflow.gif
    ├── install.md
    ├── readme.md
    ├── release_notes.md
    ├── user_guide.md
    └── versioning.md
├── examples
    ├── hummingbird
    │   ├── randomforest.py
    │   └── xgbclassifier.py
    ├── keras
    │   └── hello_world.py
    ├── onnx
    │   └── hello_world.py
    ├── pytorch
    │   ├── assembler_flags.py
    │   ├── benchmark.py
    │   ├── benchmark_abunch.py
    │   ├── build_name.py
    │   ├── cache_dir.py
    │   ├── compiler_flags.py
    │   ├── estimate_performance.py
    │   ├── groqview.py
    │   ├── hello_world.py
    │   ├── no_monitor.py
    │   ├── num_chips.py
    │   ├── quantization.py
    │   ├── rebuild_always.py
    │   ├── rebuild_never.py
    │   ├── run_abunch.py
    │   └── sequence.py
    └── readme.md
├── groqflow
    ├── __init__.py
    ├── common
    │   ├── __init__.py
    │   ├── build.py
    │   ├── onnx_helpers.py
    │   └── sdk_helpers.py
    ├── groqmodel
    │   ├── __init__.py
    │   ├── execute.py
    │   ├── groqmodel.py
    │   └── remote.py
    ├── justgroqit
    │   ├── __init__.py
    │   ├── assemble_multichip.py
    │   ├── compile.py
    │   ├── export.py
    │   ├── groqit.py
    │   └── ignition.py
    └── version.py
├── license.md
├── proof_points
    ├── README.md
    ├── computer_vision
    │   ├── deit
    │   │   ├── README.md
    │   │   ├── deit_tiny.py
    │   │   └── requirements.txt
    │   ├── googlenet
    │   │   ├── README.md
    │   │   ├── googlenet.py
    │   │   └── requirements.txt
    │   ├── mobilenetv2
    │   │   ├── README.md
    │   │   ├── mobilenetv2.py
    │   │   └── requirements.txt
    │   ├── resnet50
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   └── resnet50.py
    │   ├── squeezenet
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   └── squeezenet.py
    │   └── yolo
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   └── yolov6_nano.py
    ├── natural_language_processing
    │   ├── bert
    │   │   ├── README.md
    │   │   ├── bert_base.py
    │   │   ├── bert_quantize.py
    │   │   ├── bert_tiny.py
    │   │   └── requirements.txt
    │   ├── distilbert
    │   │   ├── README.md
    │   │   ├── distilbert.py
    │   │   └── requirements.txt
    │   ├── electra
    │   │   ├── README.md
    │   │   ├── electra.py
    │   │   └── requirements.txt
    │   ├── minilm
    │   │   ├── README.md
    │   │   ├── minilmv2.py
    │   │   └── requirements.txt
    │   └── roberta
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   └── roberta.py
    └── speech
    │   └── m5
    │       ├── README.md
    │       ├── m5.py
    │       └── requirements.txt
├── pyproject.toml
└── setup.py


/.github/workflows/cla.yml:
--------------------------------------------------------------------------------
 1 | name: "CLA Assistant"
 2 | on:
 3 |   issue_comment:
 4 |     types: [created]
 5 |   pull_request_target:
 6 |     types: [opened, closed, synchronize]
 7 | 
 8 | jobs:
 9 |   CLAAssistant:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: "CLA Assistant"
13 |         if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA') || github.event_name == 'pull_request_target'
14 |         # Beta Release
15 |         uses: contributor-assistant/github-action@v2.2.0
16 |         env:
17 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 |           # the below token should have repo scope and must be manually added by you in the repository's secret
19 |           PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
20 |         with:
21 |           path-to-signatures: "groqflow/version1/cla.json"
22 |           path-to-document: "https://github.com/groq/groqflow/cla.md"
23 |           # branch should not be protected
24 |           branch: "main"
25 |           allowlist: hozen-groq,MihailoMilenkovic,ataheridezfouli-groq,bot*
26 |           remote-organization-name: groq
27 |           remote-repository-name: cla
28 | 
29 |           # the followings are the optional inputs - If the optional inputs are not given, then default values will be taken
30 |           #create-file-commit-message: 'For example: Creating file for storing CLA Signatures'
31 |           #signed-commit-message: 'For example: $contributorName has signed the CLA in #$pullRequestNo'
32 |           #custom-notsigned-prcomment: 'pull request comment with Introductory message to ask new contributors to sign'
33 |           #custom-pr-sign-comment: 'The signature to be committed in order to sign the CLA'
34 |           #custom-allsigned-prcomment: 'pull request comment when all contributors has signed, defaults to **CLA Assistant Lite bot** All Contributors have signed the CLA.'
35 |           #lock-pullrequest-aftermerge: false - if you don't want this bot to automatically lock the pull request after merging (default - true)
36 |           #use-dco-flag: true - If you are using DCO instead of CLA
37 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-to-test-pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI
 2 | 
 3 | on: push
 4 | 
 5 | jobs:
 6 |   build-n-publish:
 7 |     name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@main
11 |       - name: Set up Python 3.8
12 |         uses: actions/setup-python@v3
13 |         with:
14 |           python-version: "3.8"
15 |       - name: Install pypa/build
16 |         run: >-
17 |           python -m
18 |           pip install
19 |           build
20 |           --user
21 |       - name: Build a binary wheel and a source tarball
22 |         run: >-
23 |           python -m
24 |           build
25 |           --sdist
26 |           --wheel
27 |           --outdir dist/
28 |           .
29 |       - name: Publish distribution 📦 to Test PyPI
30 |         if: startsWith(github.ref, 'refs/tags') != true
31 |         uses: pypa/gh-action-pypi-publish@release/v1
32 |         with:
33 |           password: ${{ secrets.TEST_PYPI_API_TOKEN }}
34 |           repository_url: https://test.pypi.org/legacy/
35 |       - name: Publish distribution 📦 to PyPI
36 |         if: startsWith(github.ref, 'refs/tags')
37 |         uses: pypa/gh-action-pypi-publish@release/v1
38 |         with:
39 |           password: ${{ secrets.PYPI_API_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yaml:
--------------------------------------------------------------------------------
 1 | #####################################
 2 | #       DO NOT EDIT DIRECTLY.       #
 3 | # This file is managed by Terraform #
 4 | #####################################
 5 | 
 6 | name: "Close stale PRs"
 7 | on:
 8 |   schedule:
 9 |     - cron: "30 1 * * *"
10 | 
11 | jobs:
12 |   stale:
13 |     runs-on: ubuntu-latest
14 |     # Read repo and write to PRs
15 |     permissions:
16 |       contents: read
17 |       pull-requests: write
18 |       issues: write
19 |     steps:
20 |       - uses: actions/stale@v9
21 |         with:
22 |           stale-pr-message: "This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days."
23 |           close-pr-message: "This PR was closed because it has been stalled for 7 days with no activity."
24 |           days-before-pr-stale: 30
25 |           days-before-pr-close: 7
26 |           exempt-pr-labels: "dependencies,security"
27 |           operations-per-run: 60 # Default is 30
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GroqFlow 🚀
 2 | 
 3 | GroqFlow™ is the easiest way to get started with Groq's technology. GroqFlow provides an automated workflow for compiling Machine Learning, Artifical Intelligence, and High-Performance Computing workloads into Groq programs and executing those programs on the Groq Language Processing Unit™ (LPU).
 4 | 
 5 | ---
 6 | 
 7 | ## System Requirements
 8 | 
 9 | To begin, we recommend that your system meets the following software and hardware requirements:
10 | 
11 | - Ubuntu 22.04 or Rocky 8.4 Linux distribution.
12 | - 32GB RAM (or more) to build models.
13 | - 8 LPUs (especially for larger models) to run models.
14 | - GroqWare Suite™ version >=0.9.2.1 installation*:
15 |   - Groq Developer Tools Package (groq-devtools) for building and compiling models.
16 |   - Groq Runtime Package (groq-runtime) for running compiled models on Groq hardware.
17 | 
18 | *For information on how to install GroqWare Suite on your system, create an account on our [portal](https://support.groq.com/) and view the [GroqWare Quick Start Guide](https://support.groq.com/#/downloads/view/groqware-qsg) for installation instructions.
19 | 
20 | ---
21 | 
22 | ## Navigating GroqFlow
23 | 
24 | * [Documentation](docs/): All GroqFlow documentation, including the installation guide, user guide, known issues, and versioning.
25 | 
26 | * [Examples](examples/): Includes various GroqFlow examples.
27 | 
28 | * [GroqFlow](groqflow/): The source code for the `groqflow` package.
29 | 
30 | * [Proof Points](proof_points/): Machine learning proof points using GroqFlow.
31 | 
32 | * [README.md](readme.md): This README.
33 | 
34 | ---
35 | 
36 | ## Contributors
37 | 
38 | GroqFlow development is primarily conducted within Groq's internal repo and is periodically synced to GitHub. This approach means that developer contributions are not immediately obvious in the commit log.
39 | 
40 | This project follows the [all-contributors](https://allcontributors.org) specification.
41 | Contributions of any kind are welcome!
42 | 


--------------------------------------------------------------------------------
/cla.md:
--------------------------------------------------------------------------------
 1 | ## Individual Contributor License Agreement (CLA)
 2 | 
 3 | **Thank you for submitting your contributions to this project.**
 4 | 
 5 | By signing this CLA, you agree that the following terms apply to all of your past, present and future contributions
 6 | to the project.
 7 | 
 8 | ### License.
 9 | 
10 | You hereby represent that all present, past and future contributions are governed by the
11 | [MIT License](https://opensource.org/licenses/MIT)
12 | copyright statement.
13 | 
14 | This entails that to the extent possible under law, you transfer all copyright and related or neighboring rights
15 | of the code or documents you contribute to the project itself or its maintainers.
16 | Furthermore you also represent that you have the authority to perform the above waiver
17 | with respect to the entirety of you contributions.
18 | 
19 | ### Moral Rights.
20 | 
21 | To the fullest extent permitted under applicable law, you hereby waive, and agree not to
22 | assert, all of your “moral rights” in or relating to your contributions for the benefit of the project.
23 | 
24 | ### Third Party Content.
25 | 
26 | If your Contribution includes or is based on any source code, object code, bug fixes, configuration changes, tools,
27 | specifications, documentation, data, materials, feedback, information or other works of authorship that were not
28 | authored by you (“Third Party Content”) or if you are aware of any third party intellectual property or proprietary
29 | rights associated with your Contribution (“Third Party Rights”),
30 | then you agree to include with the submission of your Contribution full details respecting such Third Party
31 | Content and Third Party Rights, including, without limitation, identification of which aspects of your
32 | Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the
33 | Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable
34 | third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater
35 | certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights
36 | do not apply to any portion of a Project that is incorporated into your Contribution to that same Project.
37 | 
38 | ### Representations.
39 | 
40 | You represent that, other than the Third Party Content and Third Party Rights identified by
41 | you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled
42 | to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were
43 | created in the course of your employment with your past or present employer(s), you represent that such
44 | employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer
45 | (s) has waived all of their right, title or interest in or to your Contributions.
46 | 
47 | ### Disclaimer.
48 | 
49 | To the fullest extent permitted under applicable law, your Contributions are provided on an "as is"
50 | basis, without any warranties or conditions, express or implied, including, without limitation, any implied
51 | warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not
52 | required to provide support for your Contributions, except to the extent you desire to provide support.
53 | 
54 | ### No Obligation.
55 | 
56 | You acknowledge that the maintainers of this project are under no obligation to use or incorporate your contributions
57 | into the project. The decision to use or incorporate your contributions into the project will be made at the
58 | sole discretion of the maintainers or their authorized delegates.
59 | 


--------------------------------------------------------------------------------
/demo_helpers/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include demo_helpers/datasets/README.md
2 | include demo_helpers/pretrained_models/m5.pt
3 | include demo_helpers/pretrained_models/pointnet.pth
4 | 


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/.gitignore:
--------------------------------------------------------------------------------
1 | datasets
2 | 


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/__init__.py:
--------------------------------------------------------------------------------
1 | # Needed to make pip install work
2 | 


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/args.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def parse_args():
 5 |     parser = argparse.ArgumentParser()
 6 |     parser.add_argument(
 7 |         "-b",
 8 |         "--build",
 9 |         action="store_true",
10 |         dest="should_build",
11 |         default=False,
12 |         help="If specified, will build the model to be executed on GroqChip™ processor.",
13 |     )
14 |     parser.add_argument(
15 |         "-e",
16 |         "--execute",
17 |         action="store_true",
18 |         dest="should_execute",
19 |         default=False,
20 |         help="If specified, will execute a pre-built model on GroqChip™ processor "
21 |         "and print accuracy statistics.",
22 |     )
23 |     args = parser.parse_args()
24 | 
25 |     should_build = args.should_build
26 |     should_execute = args.should_execute
27 | 
28 |     # If neither set, perform both operations
29 |     if not (should_build or should_execute):
30 |         should_build = True
31 |         should_execute = True
32 | 
33 |     return {
34 |         "rebuild_policy": "if_needed" if should_build else "never",
35 |         "should_execute": should_execute,
36 |     }
37 | 


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/compute_performance.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from dataclasses import dataclass, field
  3 | from typing import List, Optional, Tuple
  4 | import timeit
  5 | 
  6 | import numpy as np
  7 | import onnxruntime
  8 | from prettytable import PrettyTable
  9 | from tqdm import tqdm
 10 | import torch
 11 | 
 12 | from demo_helpers.dataset import Dataset, create_dataset
 13 | from demo_helpers.validate import formatted_score, resolve_score_label
 14 | 
 15 | 
 16 | @dataclass
 17 | class PerformanceResult:
 18 |     name: str
 19 |     batch_size: int
 20 |     total_number_of_samples: int
 21 |     predictions: List = field(repr=False)
 22 | 
 23 |     on_chip_latency_ms: float = 0
 24 |     end_to_end_latency_ms: Optional[float] = None
 25 | 
 26 |     @property
 27 |     def on_chip_latency_s(self) -> float:
 28 |         return self.on_chip_latency_ms / 1000.0 if self.on_chip_latency_ms else None
 29 | 
 30 |     @property
 31 |     def on_chip_ips(self) -> float:
 32 |         return (
 33 |             1000.0 / self.on_chip_latency_ms * self.batch_size
 34 |             if self.on_chip_latency_ms
 35 |             else None
 36 |         )
 37 | 
 38 |     @property
 39 |     def end_to_end_latency_s(self) -> float:
 40 |         return (
 41 |             self.end_to_end_latency_ms / 1000.0 if self.end_to_end_latency_ms else None
 42 |         )
 43 | 
 44 |     @property
 45 |     def end_to_end_ips(self) -> float:
 46 |         return (
 47 |             1000.0 / self.end_to_end_latency_ms * self.batch_size
 48 |             if self.end_to_end_latency_ms
 49 |             else None
 50 |         )
 51 | 
 52 | 
 53 | def generate_result_comparison_table(
 54 |     performance_result: List[PerformanceResult],
 55 |     dataset: Dataset,
 56 |     task: str,
 57 | ) -> List[Tuple]:
 58 |     pretty_table = PrettyTable()
 59 |     row_data = []
 60 | 
 61 |     score_label = resolve_score_label(task)
 62 | 
 63 |     pretty_table.field_names = [
 64 |         "Source",
 65 |         score_label,
 66 |         "end-to-end latency (ms)",
 67 |         "end-to-end IPS",
 68 |         "on-chip latency (ms)",
 69 |         "on-chip IPS",
 70 |     ]
 71 | 
 72 |     for performance in performance_result:
 73 |         if isinstance(performance.predictions[0], torch.Tensor):
 74 |             prediction = torch.stack(performance.predictions).numpy()
 75 |         else:
 76 |             prediction = np.concatenate(performance.predictions, axis=0)
 77 |         score = formatted_score(prediction, dataset, task=task)
 78 | 
 79 |         on_chip_latency_ms = (
 80 |             f"{performance.on_chip_latency_ms:.2f}"
 81 |             if performance.on_chip_latency_ms
 82 |             else "--"
 83 |         )
 84 |         on_chip_ips = (
 85 |             f"{performance.on_chip_ips:.2f}" if performance.on_chip_ips else "--"
 86 |         )
 87 | 
 88 |         row_data.append(
 89 |             (
 90 |                 performance.name,
 91 |                 score,
 92 |                 f"{performance.end_to_end_latency_ms:.2f}",
 93 |                 f"{performance.end_to_end_ips:.2f}",
 94 |                 on_chip_latency_ms,
 95 |                 on_chip_ips,
 96 |             )
 97 |         )
 98 | 
 99 |     for row in row_data:
100 |         pretty_table.add_row(row)
101 | 
102 |     print(pretty_table)
103 | 
104 |     return row_data
105 | 
106 | 
107 | def compute_performance(
108 |     groq_model,
109 |     pytorch_model,
110 |     dataset,
111 |     tokenizer=None,
112 |     max_seq_length=None,
113 |     feature_extractor=None,
114 |     task=None,
115 | ):
116 |     print("Preprocessing data.")
117 |     input_names = list(groq_model.state.expected_input_shapes.keys())
118 |     dataset = create_dataset(
119 |         dataset,
120 |         tokenizer=tokenizer,
121 |         max_seq_length=max_seq_length,
122 |         feature_extractor=feature_extractor,
123 |         input_names=input_names,
124 |     )
125 | 
126 |     groq_performance_result = timed_inference_end_to_end_latency(
127 |         dataset,
128 |         groq_model,
129 |         chip_type="groq",
130 |         task=task,
131 |     )
132 | 
133 |     host_performance_result = timed_inference_end_to_end_latency(
134 |         dataset,
135 |         pytorch_model,
136 |         chip_type="cpu",
137 |     )
138 | 
139 |     result_table = generate_result_comparison_table(
140 |         [host_performance_result, groq_performance_result],
141 |         dataset,
142 |         task,
143 |     )
144 |     return result_table
145 | 
146 | 
147 | def groq_model_inference(dataset, model, task: Optional[str] = None):
148 |     print("Running inference on GroqChip.")
149 |     pred = model.run_abunch(dataset.x)
150 |     if isinstance(pred, torch.Tensor):
151 |         pred = [pred]
152 | 
153 |     if isinstance(pred[0], tuple):
154 |         if task == "sentence_similarity":
155 |             pred = [p[0] for p in pred]
156 |         else:
157 |             pred = list(map(torch.vstack, pred))
158 | 
159 |     return dataset.postprocess(pred)
160 | 
161 | 
162 | def onnx_model_inference(dataset, model):
163 |     print("Running inference on CPU (ONNX).")
164 |     session = onnxruntime.InferenceSession(model)
165 |     result = []
166 | 
167 |     for inputs in tqdm(dataset.x):
168 |         out = session.run(None, inputs)
169 |         if len(out) == 1:
170 |             result.append(torch.tensor(out[0]))
171 |         else:
172 |             result.append(tuple([torch.tensor(out[i]) for i in range(len(out))]))
173 | 
174 |     return dataset.postprocess(result)
175 | 
176 | 
177 | def pytorch_model_inference(dataset, model):
178 |     with torch.no_grad():
179 |         print("Running inference using PyTorch model (CPU).")
180 |         pred = []
181 |         for inputs in tqdm(dataset.x):
182 |             out = model(**inputs)
183 | 
184 |             if not isinstance(out, torch.Tensor):
185 |                 if isinstance(out, tuple):
186 |                     if len(out) == 1:
187 |                         out = out[0]
188 |                     else:
189 |                         raise ValueError("Cannot handle tuple with len", len(out))
190 |                 elif isinstance(out, dict):
191 |                     if "logits" in out:
192 |                         out = out.logits
193 |                     elif "start_logits" in out and "end_logits" in out:
194 |                         out = torch.vstack((out["start_logits"], out["end_logits"]))
195 |                     elif "last_hidden_state" in out:
196 |                         out = out.last_hidden_state
197 |                     else:
198 |                         raise ValueError(
199 |                             "Unknown output key. List of keys:", list(out.keys())
200 |                         )
201 |                 else:
202 |                     raise ValueError("Unknown output type", type(out))
203 |             pred.append(out)
204 | 
205 |     return dataset.postprocess(pred)
206 | 
207 | 
208 | def timed_inference_end_to_end_latency(
209 |     dataset,
210 |     model,
211 |     chip_type: str,
212 |     task: Optional[str] = None,
213 | ) -> PerformanceResult:
214 |     result = []
215 |     if chip_type == "groq":
216 |         t = timeit.Timer(
217 |             lambda: result.append(groq_model_inference(dataset, model, task))
218 |         )
219 | 
220 |         on_chip_latency_ms = model.estimate_performance().compute_latency * 1000
221 |         production_system_end_to_end_s = model.benchmark().latency
222 | 
223 |     elif chip_type == "cpu":
224 |         if isinstance(model, str):  # ONNX
225 |             t = timeit.Timer(
226 |                 lambda: result.append(onnx_model_inference(dataset, model))
227 |             )
228 |         else:
229 |             t = timeit.Timer(
230 |                 lambda: result.append(pytorch_model_inference(dataset, model))
231 |             )
232 |         on_chip_latency_ms = None
233 | 
234 |     latency_s = t.timeit(number=1) / len(dataset.x)
235 | 
236 |     # for groq chip, use the expected production system latency.
237 |     if chip_type == "groq":
238 |         latency_s = production_system_end_to_end_s
239 | 
240 |     return PerformanceResult(
241 |         name=chip_type,
242 |         batch_size=1,
243 |         total_number_of_samples=len(dataset.x),
244 |         predictions=result[0],
245 |         on_chip_latency_ms=on_chip_latency_ms,
246 |         end_to_end_latency_ms=latency_s * 1000,
247 |     )
248 | 


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/datasets/README.md:
--------------------------------------------------------------------------------
1 | Place manually downloaded datasets here.
2 | 


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/misc.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | import os
 3 | import sys
 4 | import subprocess
 5 | 
 6 | import pkg_resources
 7 | 
 8 | 
 9 | @contextmanager
10 | def suppress_stdout():
11 |     with open(os.devnull, "w", encoding="utf-8") as devnull:
12 |         old_stdout = sys.stdout
13 |         sys.stdout = devnull
14 |         try:
15 |             yield
16 |         finally:
17 |             sys.stdout = old_stdout
18 | 
19 | 
20 | def check_deps(script_filepath):
21 |     dir_path = os.path.dirname(os.path.realpath(script_filepath))
22 |     reqs_filepath = os.path.join(dir_path, "requirements.txt")
23 |     with open(reqs_filepath, "r", encoding="utf-8") as f:
24 |         reqs = pkg_resources.parse_requirements(f)
25 |         str_reqs = [str(req) for req in reqs]
26 |         try:
27 |             with suppress_stdout():
28 |                 for req in str_reqs:
29 |                     pkg_resources.require(str(req))
30 |         except pkg_resources.DistributionNotFound as e:
31 |             print("Some required packages below are missing:\n")
32 |             reqs = pkg_resources.parse_requirements(f)
33 |             for req in str_reqs:
34 |                 print(str(req))
35 |             print()
36 |             reply = None
37 |             question = "Install missing pacakges (y/n): "
38 |             while reply not in ["y", "n"]:
39 |                 reply = str(input(question)).lower().strip()
40 |             if reply == "n":
41 |                 raise e
42 |             subprocess.check_call(["pip", "install", "-r", reqs_filepath])
43 | 


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/model_download.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import zipfile
 3 | 
 4 | from datasets.utils.file_utils import cached_path
 5 | from groqflow.common.build import DEFAULT_CACHE_DIR
 6 | 
 7 | 
 8 | YOLOV6N_MODEL = "yolov6n_model"
 9 | YOLOV6N_SOURCE = "yolov6n_source"
10 | 
11 | 
12 | DATA_URLS = {
13 |     YOLOV6N_MODEL: "https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6n.pt",
14 |     YOLOV6N_SOURCE: "https://github.com/meituan/YOLOv6/archive/refs/tags/0.4.0.zip",
15 | }
16 | 
17 | 
18 | DST_PATHS = {
19 |     YOLOV6N_MODEL: "pytorch_models/yolov6_nano/yolov6n.pt",
20 |     YOLOV6N_SOURCE: "pytorch_models/yolov6_nano/YOLOv6",
21 | }
22 | 
23 | 
24 | def download_model(model):
25 |     dst_path = os.path.join(DEFAULT_CACHE_DIR, DST_PATHS[model])
26 |     if os.path.exists(dst_path):
27 |         return dst_path
28 | 
29 |     os.makedirs(os.path.dirname(dst_path), exist_ok=True)
30 |     url = DATA_URLS[model]
31 |     download_path = cached_path(url)
32 |     os.symlink(download_path, dst_path)
33 |     return dst_path
34 | 
35 | 
36 | def download_source(source):
37 |     dst_path = os.path.join(DEFAULT_CACHE_DIR, DST_PATHS[source])
38 |     if os.path.exists(dst_path):
39 |         return dst_path
40 | 
41 |     os.makedirs(os.path.dirname(dst_path), exist_ok=True)
42 |     url = DATA_URLS[source]
43 |     download_path = cached_path(url)
44 |     with zipfile.ZipFile(download_path, "r") as zip_ref:
45 |         extracted_dir = os.path.dirname(dst_path)
46 |         zip_ref.extractall(extracted_dir)
47 |         os.rename(os.path.join(extracted_dir, zip_ref.infolist()[0].filename), dst_path)
48 |     return dst_path
49 | 


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/models.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import sys
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from demo_helpers.model_download import (
 10 |     YOLOV6N_MODEL,
 11 |     YOLOV6N_SOURCE,
 12 |     download_model,
 13 |     download_source,
 14 | )
 15 | 
 16 | 
 17 | class M5(nn.Module):
 18 |     def __init__(self, n_input=1, n_output=35, stride=16, n_channel=32):
 19 |         super().__init__()
 20 |         self.conv1 = nn.Conv1d(n_input, n_channel, kernel_size=80, stride=stride)
 21 |         self.bn1 = nn.BatchNorm1d(n_channel)
 22 |         self.pool1 = nn.MaxPool1d(4)
 23 |         self.conv2 = nn.Conv1d(n_channel, n_channel, kernel_size=3)
 24 |         self.bn2 = nn.BatchNorm1d(n_channel)
 25 |         self.pool2 = nn.MaxPool1d(4)
 26 |         self.conv3 = nn.Conv1d(n_channel, 2 * n_channel, kernel_size=3)
 27 |         self.bn3 = nn.BatchNorm1d(2 * n_channel)
 28 |         self.pool3 = nn.MaxPool1d(4)
 29 |         self.conv4 = nn.Conv1d(2 * n_channel, 2 * n_channel, kernel_size=3)
 30 |         self.bn4 = nn.BatchNorm1d(2 * n_channel)
 31 |         self.pool4 = nn.MaxPool1d(4)
 32 |         self.avg_pool1 = nn.AvgPool1d(3)
 33 |         self.fc1 = nn.Linear(2 * n_channel, n_output)
 34 | 
 35 |     def forward(self, x):
 36 |         x = self.conv1(x)
 37 |         x = F.relu(self.bn1(x))
 38 |         x = self.pool1(x)
 39 |         x = self.conv2(x)
 40 |         x = F.relu(self.bn2(x))
 41 |         x = self.pool2(x)
 42 |         x = self.conv3(x)
 43 |         x = F.relu(self.bn3(x))
 44 |         x = self.pool3(x)
 45 |         x = self.conv4(x)
 46 |         x = F.relu(self.bn4(x))
 47 |         x = self.pool4(x)
 48 |         x = torch.mean(x, 2, keepdim=True)
 49 |         x = x.permute(0, 2, 1)
 50 |         x = self.fc1(x)
 51 |         return F.log_softmax(x, dim=2)
 52 | 
 53 | 
 54 | class Tnet(nn.Module):
 55 |     def __init__(self, k=3):
 56 |         super().__init__()
 57 |         self.k = k
 58 |         self.conv1 = nn.Conv1d(k, 64, 1)
 59 |         self.conv2 = nn.Conv1d(64, 128, 1)
 60 |         self.conv3 = nn.Conv1d(128, 1024, 1)
 61 |         self.fc1 = nn.Linear(1024, 512)
 62 |         self.fc2 = nn.Linear(512, 256)
 63 |         self.fc3 = nn.Linear(256, k * k)
 64 | 
 65 |         self.bn1 = nn.BatchNorm1d(64)
 66 |         self.bn2 = nn.BatchNorm1d(128)
 67 |         self.bn3 = nn.BatchNorm1d(1024)
 68 |         self.bn4 = nn.BatchNorm1d(512)
 69 |         self.bn5 = nn.BatchNorm1d(256)
 70 | 
 71 |     def forward(self, input):
 72 |         # input.shape == (bs,n,3)
 73 |         bs = input.size(0)
 74 |         xb = F.relu(self.bn1(self.conv1(input)))
 75 |         xb = F.relu(self.bn2(self.conv2(xb)))
 76 |         xb = F.relu(self.bn3(self.conv3(xb)))
 77 |         pool_size = int(xb.size(-1))
 78 |         pool = nn.MaxPool1d(pool_size)(xb)
 79 |         flat = nn.Flatten(1)(pool)
 80 |         xb = F.relu(self.bn4(self.fc1(flat)))
 81 |         xb = F.relu(self.bn5(self.fc2(xb)))
 82 | 
 83 |         # initialize as identity
 84 |         init = torch.eye(self.k, requires_grad=True).repeat(bs, 1, 1)
 85 |         if xb.is_cuda:
 86 |             init = init.cuda()
 87 |         matrix = self.fc3(xb).view(-1, self.k, self.k) + init
 88 |         return matrix
 89 | 
 90 | 
 91 | class Transform(nn.Module):
 92 |     def __init__(self):
 93 |         super().__init__()
 94 |         self.input_transform = Tnet(k=3)
 95 |         self.feature_transform = Tnet(k=64)
 96 |         self.conv1 = nn.Conv1d(3, 64, 1)
 97 | 
 98 |         self.conv2 = nn.Conv1d(64, 128, 1)
 99 |         self.conv3 = nn.Conv1d(128, 1024, 1)
100 | 
101 |         self.bn1 = nn.BatchNorm1d(64)
102 |         self.bn2 = nn.BatchNorm1d(128)
103 |         self.bn3 = nn.BatchNorm1d(1024)
104 | 
105 |     def forward(self, input):
106 |         matrix3x3 = self.input_transform(input)
107 |         # batch matrix multiplication
108 |         xb = torch.bmm(torch.transpose(input, 1, 2), matrix3x3).transpose(1, 2)
109 | 
110 |         xb = F.relu(self.bn1(self.conv1(xb)))
111 | 
112 |         matrix64x64 = self.feature_transform(xb)
113 |         xb = torch.bmm(torch.transpose(xb, 1, 2), matrix64x64).transpose(1, 2)
114 | 
115 |         xb = F.relu(self.bn2(self.conv2(xb)))
116 |         xb = self.bn3(self.conv3(xb))
117 |         xb = nn.MaxPool1d(int(xb.size(-1)))(xb)
118 |         output = nn.Flatten(1)(xb)
119 |         return output, matrix3x3, matrix64x64
120 | 
121 | 
122 | class PointNet(nn.Module):
123 |     def __init__(self, classes=10):
124 |         super().__init__()
125 |         self.transform = Transform()
126 |         self.fc1 = nn.Linear(1024, 512)
127 |         self.fc2 = nn.Linear(512, 256)
128 |         self.fc3 = nn.Linear(256, classes)
129 | 
130 |         self.bn1 = nn.BatchNorm1d(512)
131 |         self.bn2 = nn.BatchNorm1d(256)
132 |         self.dropout = nn.Dropout(p=0.3)
133 |         self.logsoftmax = nn.LogSoftmax(dim=1)
134 | 
135 |     def forward(self, input):
136 |         xb, _, _ = self.transform(input)
137 |         xb = F.relu(self.bn1(self.fc1(xb)))
138 |         xb = F.relu(self.bn2(self.dropout(self.fc2(xb))))
139 |         output = self.fc3(xb)
140 |         return self.logsoftmax(output)
141 | 
142 | 
143 | def get_yolov6n_model():
144 |     weights = download_model(YOLOV6N_MODEL)
145 |     source = download_source(YOLOV6N_SOURCE)
146 |     export_script = os.path.join(source, "deploy/ONNX/export_onnx.py")
147 | 
148 |     cmd = [
149 |         sys.executable,
150 |         export_script,
151 |         "--weights",
152 |         weights,
153 |         "--img",
154 |         "640",
155 |         "--batch",
156 |         "1",
157 |         "--simplify",
158 |     ]
159 |     p = subprocess.Popen(
160 |         cmd, cwd=source, stdout=subprocess.PIPE, stderr=subprocess.PIPE
161 |     )
162 |     p.communicate()
163 |     if p.returncode != 0:
164 |         raise RuntimeError("Unable to get ONNX model")
165 | 
166 |     onnx_file = weights.replace(".pt", ".onnx")
167 |     return onnx_file
168 | 
169 | 
170 | def load_pretrained(model_name):
171 |     """Loads a pre-trained model
172 | 
173 |     :param model_name: The name of model that needs to be loaded.
174 |     :type model_name: `str`
175 | 
176 |     :return: The pre-trained torch model.
177 |     :rtype: `torch.nn.Module`
178 |     """
179 |     if model_name == "m5":
180 |         # create model
181 |         model = M5()
182 | 
183 |         # create absolute path {}
184 |         model_filename = os.path.join(
185 |             os.path.dirname(__file__), f"pretrained_models/{model_name}.pt"
186 |         )
187 |         # load model's state dict.
188 |         model.load_state_dict(torch.load(model_filename))
189 | 
190 |         return model
191 |     elif model_name == "pointnet":
192 |         model = PointNet()
193 |         model_filename = os.path.join(
194 |             os.path.dirname(__file__), f"pretrained_models/{model_name}.pth"
195 |         )
196 | 
197 |         # load model's state dict.
198 |         model.load_state_dict(
199 |             torch.load(model_filename, map_location=torch.device("cpu"))
200 |         )
201 | 
202 |         return model
203 |     else:
204 |         raise ValueError("Unknown model: " + model_name)
205 | 


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/pretrained_models/m5.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/groq/groqflow/32740b44aea43d4ecf5d2fa4a2ce3d0f040e8bf0/demo_helpers/demo_helpers/pretrained_models/m5.pt


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/pretrained_models/pointnet.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/groq/groqflow/32740b44aea43d4ecf5d2fa4a2ce3d0f040e8bf0/demo_helpers/demo_helpers/pretrained_models/pointnet.pth


--------------------------------------------------------------------------------
/demo_helpers/demo_helpers/validate.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import string
  3 | from collections import Counter
  4 | from typing import List
  5 | from datasets import load_metric
  6 | import numpy as np
  7 | import torch
  8 | import torch.nn.functional as F
  9 | from sklearn.metrics.pairwise import paired_cosine_distances
 10 | from scipy.stats import spearmanr
 11 | 
 12 | from demo_helpers.misc import suppress_stdout
 13 | 
 14 | 
 15 | def formatted_score(pred, dataset, ids=None, tokenizer=None, task="classification"):
 16 |     sc = score(pred, dataset, ids=ids, tokenizer=tokenizer, task=task)
 17 |     if task in ["classification", "qa", "ner", "keyword_spotting"]:
 18 |         sc = f"{sc:.2%}"
 19 |     elif task in ["regression", "sentence_similarity", "coco_map"]:
 20 |         sc = f"{sc:.4f}"
 21 |     elif task == "semantic_segmentation":
 22 |         sc = sc["mean_iou"]
 23 |         sc = f"{sc:.4f}"
 24 |     else:
 25 |         raise Exception(f"unrecognized task: {task}")
 26 | 
 27 |     return sc
 28 | 
 29 | 
 30 | def normalize_answer(s):
 31 |     """
 32 |     Lower text and remove punctuation, articles and extra whitespace.
 33 |     From official SQuAD evaluation script.
 34 |     """
 35 | 
 36 |     def remove_articles(text):
 37 |         return re.sub(r"\b(a|an|the)\b", " ", text)
 38 | 
 39 |     def white_space_fix(text):
 40 |         return " ".join(text.split())
 41 | 
 42 |     def remove_punc(text):
 43 |         exclude = set(string.punctuation)
 44 |         return "".join(ch for ch in text if ch not in exclude)
 45 | 
 46 |     def lower(text):
 47 |         return text.lower()
 48 | 
 49 |     return white_space_fix(remove_articles(remove_punc(lower(s))))
 50 | 
 51 | 
 52 | def f1_score(prediction, ground_truth):
 53 |     """From official SQuAD evaluation script."""
 54 |     prediction_tokens = normalize_answer(prediction).split()
 55 |     ground_truth_tokens = normalize_answer(ground_truth).split()
 56 |     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
 57 |     num_same = sum(common.values())
 58 |     if num_same == 0:
 59 |         return 0
 60 |     precision = 1.0 * num_same / len(prediction_tokens)
 61 |     recall = 1.0 * num_same / len(ground_truth_tokens)
 62 |     f1 = (2 * precision * recall) / (precision + recall)
 63 |     return f1
 64 | 
 65 | 
 66 | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
 67 |     """From official SQuAD evaluation script."""
 68 |     scores_for_ground_truths = []
 69 |     for ground_truth in ground_truths:
 70 |         score = metric_fn(prediction, ground_truth)
 71 |         scores_for_ground_truths.append(score)
 72 |     return max(scores_for_ground_truths)
 73 | 
 74 | 
 75 | def score(pred, dataset, ids=None, tokenizer=None, task="classification"):
 76 |     inputs, test = dataset.x, dataset.y
 77 |     if task == "classification":
 78 |         sc = np.mean(pred.argmax(axis=-1).reshape(test.shape) == test)
 79 |     elif task == "keyword_spotting":
 80 |         sc = np.equal(pred.argmax(axis=-1).ravel(), test).mean()
 81 |     elif task == "ner":
 82 |         # unroll gt labels across time steps
 83 |         flat_test = np.array(test).ravel()
 84 | 
 85 |         # get best label for each time step
 86 |         pred_labels = np.argmax(pred, -1)
 87 |         # unroll pred labels across time steps
 88 |         flat_preds = pred_labels.ravel()
 89 | 
 90 |         # all samples are padded to max_seq_len. reduce to valid
 91 |         # time steps only
 92 |         valid_indices = flat_test >= 0
 93 |         flat_test, flat_preds = flat_test[valid_indices], flat_preds[valid_indices]
 94 | 
 95 |         # calculate score
 96 |         sc = np.equal(flat_preds, flat_test).mean()
 97 |     elif task == "regression":
 98 |         sc = np.mean(np.square(test - pred))
 99 |     elif task == "qa":
100 |         pred = pred.argmax(axis=-1)
101 | 
102 |         def answers(y):
103 |             return [
104 |                 tokenizer.decode(id[start:end])
105 |                 for (id, start, end) in zip(ids, y[:, 0], y[:, 1])
106 |             ]
107 | 
108 |         pred = answers(pred)
109 | 
110 |         sc = np.mean(
111 |             [
112 |                 metric_max_over_ground_truths(f1_score, p, t)
113 |                 for (p, t) in zip(pred, test)
114 |             ]
115 |         )
116 |     elif task == "semantic_segmentation":
117 |         sc = calculate_miou_score(pred, test)
118 |     elif task == "sentence_similarity":
119 |         sc = calculate_spearman_correlation(pred, test, inputs)
120 |     elif task == "coco_map":
121 |         # pylint: disable=import-error
122 |         from pycocotools.coco import COCO
123 |         from pycocotools.cocoeval import COCOeval
124 | 
125 |         with suppress_stdout():
126 |             anno = COCO(dataset.anno_path)
127 |             pred = anno.loadRes(pred)
128 |             cocoEval = COCOeval(anno, pred, "bbox")
129 |             cocoEval.evaluate()
130 |             cocoEval.accumulate()
131 |             cocoEval.summarize()
132 |             sc = cocoEval.stats[0]
133 |     else:
134 |         raise Exception(f"Unrecognized task: {task}")
135 |     return sc
136 | 
137 | 
138 | def resolve_score_label(task: str) -> str:
139 |     if task in ["classification", "ner", "keyword_spotting"]:
140 |         label = "Accuracy"
141 |     elif task == "regression":
142 |         label = "MSE"
143 |     elif task == "qa":
144 |         label = "F1 Score"
145 |     elif task == "semantic_segmentation":
146 |         label = "Mean IoU"
147 |     elif task == "sentence_similarity":
148 |         label = "Spearman Rank Correlation Coefficient"
149 |     elif task == "coco_map":
150 |         label = "mAP @ 0.5:0.95"
151 |     else:
152 |         raise Exception(f"Unrecognized task: {task}")
153 |     return label
154 | 
155 | 
156 | def calculate_miou_score(pred: List, test: List):
157 |     metric = load_metric("mean_iou")
158 | 
159 |     upsample_size = test[0].shape[-2:]
160 |     num_labels = pred[0].shape[1]
161 | 
162 |     for p, t in zip(pred, test):
163 |         p = _upsample_logits(torch.tensor(p), upsample_size).squeeze()
164 |         t = t.squeeze()
165 |         metric.add(prediction=p, reference=t)
166 | 
167 |     score = metric.compute(
168 |         num_labels=num_labels,
169 |         ignore_index=255,
170 |         reduce_labels=False,
171 |     )
172 |     return score
173 | 
174 | 
175 | def calculate_spearman_correlation(pred, test, encoded_input):
176 |     sentence_1_embeddings = []
177 |     sentence_2_embeddings = []
178 |     for p, i in zip(pred, encoded_input):
179 |         p = torch.tensor(p)
180 | 
181 |         sentence_embeddings = _mean_pooling(p, i["attention_mask"])
182 |         sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
183 |         sentence_1_embeddings.append(sentence_embeddings[0].reshape(1, -1))
184 |         sentence_2_embeddings.append(sentence_embeddings[1].reshape(1, -1))
185 | 
186 |     cosine_scores = 1 - (
187 |         paired_cosine_distances(
188 |             torch.stack(sentence_1_embeddings).squeeze(),
189 |             torch.stack(sentence_2_embeddings).squeeze(),
190 |         )
191 |     )
192 | 
193 |     spearman_cosine, _ = spearmanr(test, cosine_scores)
194 | 
195 |     return spearman_cosine
196 | 
197 | 
198 | def _upsample_logits(logits, size):
199 |     return F.interpolate(
200 |         logits.double(),
201 |         size=size,
202 |         mode="bilinear",
203 |         align_corners=False,
204 |     ).argmax(dim=1)
205 | 
206 | 
207 | def _mean_pooling(model_output, attention_mask):
208 |     input_mask_expanded = (
209 |         attention_mask.unsqueeze(-1).expand(model_output.shape).float()
210 |     )
211 | 
212 |     return torch.sum(model_output * input_mask_expanded, 1) / torch.clamp(
213 |         input_mask_expanded.sum(1), min=1e-9
214 |     )
215 | 
216 | 
217 | def formatted_ips(ips):
218 |     return f"{ips:.2f}"
219 | 


--------------------------------------------------------------------------------
/demo_helpers/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name="groqflow_demo_helpers",
 5 |     version="0.2.0",
 6 |     description="Helper functions to run GroqFlow demos and proof points",
 7 |     author="Groq",
 8 |     author_email="sales@groq.com",
 9 |     license="groq-license",
10 |     packages=find_packages(
11 |         exclude=["*.__pycache__.*"],
12 |     ),
13 |     include_package_data=True,
14 |     install_requires=[
15 |         "charset-normalizer==3.3.2",
16 |         "transformers>=4.20.0",
17 |         "datasets>=2.3.2",
18 |         "prettytable>=3.3.0",
19 |         "wget>=3.2",
20 |         "setuptools==57.2.0",
21 |         "torchvision==0.16.0",
22 |         "torchaudio==2.1.0",
23 |         "path>=16.4.0",
24 |     ],
25 |     classifiers=[],
26 |     entry_points={},
27 | )
28 | 


--------------------------------------------------------------------------------
/docs/img/groqflow.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/groq/groqflow/32740b44aea43d4ecf5d2fa4a2ce3d0f040e8bf0/docs/img/groqflow.gif


--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
  1 | # GroqFlow™ Installation Guide
  2 | 
  3 | The following describes how to install GroqFlow. These instructions enable users to build models for Groq hardware, as well as execute those builds in systems that have GroqCard™ accelerators physically installed.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | ### Check your versions
  8 | 
  9 | - Ensure that you are using one of the following Linux distributions: Ubuntu 22.04 or Rocky 8.4.
 10 | - Download and install the GroqWare™ Suite version >=0.9.2.1.
 11 |   - For more information, see the GroqWare Quick Start Guide at [support.groq.com](https://support.groq.com).
 12 |   - To compile your model for Groq hardware, GroqFlow requires the Groq Developer Tools Package (groq-devtools). To run your compiled model on hardware, GroqFlow requires the Groq Runtime Package (groq-runtime).
 13 | 
 14 | Make sure that your combination of GroqWare™ Suite version, OS version, and Python version are compatible. Our supported matrix of versions is:
 15 | 
 16 | | GroqWare  | OS           | Python Version |
 17 | |-----------|--------------|----------------|
 18 | | 0.9.2.1   | Ubuntu 22.04 | 3.10           |
 19 | | 0.9.3     | Ubuntu 18.04 | 3.8            |
 20 | | 0.9.3     | Ubuntu 22.04 | 3.8            |
 21 | | 0.9.3     | Rocky 8.4    | 3.8            |
 22 | | 0.10.0    | Ubuntu 22.04 | 3.10           |
 23 | | 0.10.0    | Rocky 8.4    | 3.8            |
 24 | 
 25 | ### Install GroqWare
 26 | 
 27 | Download and install the GroqWare Suite version >=0.9.2.1.
 28 | - For more information, see the GroqWare Quick Start Guide at [support.groq.com](https://support.groq.com).
 29 | - To compile your model for Groq hardware, GroqFlow requires the Groq Developer Tools Package (groq-devtools). To run your compiled model on hardware, GroqFlow requires the Groq Runtime Package (groq-runtime).
 30 | 
 31 | ## Trying out GroqFlow
 32 | 
 33 | If you want to try out GroqFlow by running the [examples](https://github.com/groq/groqflow/tree/main/examples) and [proof points](https://github.com/groq/groqflow/tree/main/proof_points), we recommend that you take the following steps. If you want to use GroqFlow with your own environment and model, we suggest skipping ahead to [Developing with GroqFlow](#developing-with-groqflow).
 34 | 
 35 | ### Step 1: Create and activate a virtual environment
 36 | 
 37 | First, download, install, and create a Miniconda virtual environment.
 38 | 
 39 | ```
 40 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
 41 | bash Miniconda3-latest-Linux-x86_64.sh
 42 | conda create -n groqflow python=$GF_PYTHON_VERSION
 43 | conda deactivate
 44 | conda activate groqflow
 45 | ```
 46 | 
 47 | Where `$GF_PYTHON_VERSION` is the version of Python corresponding to your OS and GroqWare version in the [compatibility chart](#check-your-versions) above.
 48 | 
 49 | > _Note_: it is important to deactivate your base conda environment when first setting up a new groqflow environment. This helps to prevent conda from making unwanted changes in the PATHs of your environments.
 50 | 
 51 | ### Step 2: Pip install GroqFlow
 52 | 
 53 | Install the `groqflow` package into your virtual environment:
 54 | 
 55 | ```
 56 | git clone https://github.com/groq/groqflow.git
 57 | pip install --upgrade pip
 58 | cd groqflow
 59 | pip install .
 60 | ```
 61 | 
 62 | where `groqflow` is the directory where you cloned the GroqFlow repo in the [prerequisites](#prerequisites).
 63 | 
 64 | _Optional_: if you want to use GroqFlow with TensorFlow, use this install command instead of `pip install .`:
 65 | 
 66 | ```
 67 | pip install .[tensorflow]
 68 | ```
 69 | 
 70 | ### Step 3: Add GroqWare Suite to Python Path
 71 | 
 72 | This adds the Groq tools to your path:
 73 | 
 74 | ```
 75 | conda env config vars set PYTHONPATH="/opt/groq/runtime/site-packages:$PYTHONPATH"
 76 | ```
 77 | 
 78 | **Note:** you will need to reactivate your conda environment for this to take effect.
 79 | 
 80 | **Note:** if you encounter errors later that say GroqFlow is unable to find a tool from the GroqWare suite (Groq API, Groq Runtime, Groq DevTools, Groq Compiler, etc.) it usually means either:
 81 | - You forgot to complete this step.
 82 | - Your GroqWare Suite installation failed and you should attempt to re-install the GroqWare Suite.
 83 | 
 84 | ### Step 4: Rock-It with groqit()
 85 | 
 86 | To confirm that you're setup correctly, navigate to the examples folder at `groqflow/examples/` and run the `hello_world.py` example that can be found in the `keras`, `onnx`, and `pytorch` folder depending on your preferred framework:
 87 | 
 88 | ```
 89 | cd groqflow/examples/<framework>
 90 | python hello_world.py
 91 | ```
 92 | 
 93 | ### Step 5: Take-off with a Proof Point
 94 | 
 95 | Included in the directory: `groqflow/proof_points`, are multiple examples of various machine learning and linear algebra workloads. To run these proof points, the `groqflow/demo_helpers` must be installed in your groqflow environment.
 96 | 
 97 | ```
 98 | cd groqflow/demo_helpers/
 99 | pip install -e .
100 | ```
101 | 
102 | Then you can learn about how to run proof points [here](https://github.com/groq/groqflow/tree/main/proof_points).
103 | 
104 | ## Developing with GroqFlow
105 | 
106 | When you are ready to try out your own model with GroqFlow, we recommend taking the following steps:
107 | 
108 | 1. Activate the conda virtual environment where you are able to run your model
109 | 1. Install the GroqFlow package from PyPI:
110 |   - If you are developing a PyTorch, ONNX, or Hummingbird model, use `pip install groqflow`
111 |   - If you are developing a Keras model, use `pip install groqflow[tensorflow]`
112 | 1. Follow steps 3 and 4 in [Testing Out GroqFlow](#testing-out-groqflow) to complete setup
113 | 1. Import `groqflow` into the script where you are running your model and call `groqit(model, inputs)` to build your model (see the [examples](https://github.com/groq/groqflow/tree/main/examples) to learn more about calling `groqit()`)
114 | 
115 | **Note:** The supported Python/OS combinations in [Check your Versions](#check-your-versions) apply here as well.
116 | 
117 | **Note:** We recommend using separate conda environments for PyTorch/ONNX/Hummingbird development vs. TensorFlow development. The reason we make TensorFlow support optional in GroqFlow is to help you avoid dependency conflicts between the TensorFlow package and the other Groq/GroqFlow dependencies. Do not `pip install groqflow[tensorflow]` into an environment where you already did `pip install groqflow`, as this will cause errors.
118 | 


--------------------------------------------------------------------------------
/docs/readme.md:
--------------------------------------------------------------------------------
 1 | # Documentation
 2 | 
 3 | The following are links to GroqFlow documentation:
 4 | 
 5 | - [Install Guide](install.md): Instructions on how to install GroqFlow.
 6 | 
 7 | - [User Guide](user_guide.md): Overview and examples for all of GroqFlow's methods, flags, and options.
 8 | 
 9 | - [Known Issues](known_issues.md): Currently known issues that may occur when using GroqFlow.
10 | 
11 | - [Versioning](versioning.md): Explanation of GroqFlow's versioning scheme.
12 | 
13 | - [README.md](readme.md): This README.
14 | 


--------------------------------------------------------------------------------
/docs/release_notes.md:
--------------------------------------------------------------------------------
 1 | # Release Notes
 2 | 
 3 | ## v4.3.1
 4 | 
 5 | ### Changes
 6 | 
 7 | * Support for SDK 0.11.
 8 | * Add beta support for groq-torch-importer front-end support.
 9 | * Clean up package dependencies.
10 | * Various bug fixes.
11 | 
12 | ### Known Issues
13 | 
14 | * Yolo V6 proof point downloads the pytorch weights and invokes the export script to get the ONNX file.
15 | * Pip install of GroqFlow may complain about incompatible protobuf version.
16 | 
17 | ## v4.2.1
18 | 
19 | ### Known Issues
20 | 
21 | * Runtime errors due to mismatches in tensor sizes may occur even though GroqFlow checks the data shape. (G14148)
22 | * Whacky terminal line wrapping when printing groqit error messages. (G13235)
23 | * GroqFlow requires both the runtime and developer package to be installed. (G18283, G18284)
24 | * GroqFlow BERT Quantization Proof Point fails to compile in SDK0.9.3 due to a scheduling error. (G16739)
25 | * Yolo v6 Proof Points fails to run the evaluation after compilation in SDK0.9.2.1. (G18209)
26 | 


--------------------------------------------------------------------------------
/docs/versioning.md:
--------------------------------------------------------------------------------
1 | # GroqFlow Versioning Policy
2 | 
3 | The `groqflow` package applies semantic versioning for its 3-digit version number. The version number is stored in `groqflow/version.py`.
4 | 
5 | The 3 digits correspond to MAJOR.MINOR.PATCH, which can be interpreted as follows:
6 | * MAJOR: changes indicate breaking API changes that may require the user to change their own code
7 | * MINOR: changes indicate that builds against a previous minor version may not be compatible, and the user may need to rebuild those models
8 | * PATCH: no user action required when the patch number changes
9 | 


--------------------------------------------------------------------------------
/examples/hummingbird/randomforest.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example trains a RandomForestClassifier against random data
 3 | then compares the sklearn result to GroqChip executed via GroqFlow.
 4 | """
 5 | 
 6 | import numpy as np
 7 | from sklearn.model_selection import train_test_split
 8 | from sklearn.ensemble import RandomForestClassifier
 9 | from sklearn.metrics import classification_report
10 | from groqflow import groqit
11 | 
12 | batch_size = 320
13 | 
14 | # Generate random points in a 10-dimensional space with binary labels
15 | np.random.seed(0)
16 | x = np.random.rand(1000, 10).astype(np.float32)
17 | y = np.random.randint(2, size=1000)
18 | 
19 | # Perform a test/train split of the (random) dataset
20 | x_train, x_test, y_train, y_test = train_test_split(
21 |     x, y, test_size=batch_size, random_state=0
22 | )
23 | 
24 | # Fit the model using standard sklearn patterns
25 | skl_model = RandomForestClassifier(n_estimators=10, max_depth=5, random_state=0)
26 | skl_model.fit(x_train, y_train)
27 | 
28 | # Build the model
29 | groq_model = groqit(skl_model, {"input_0": x_test})
30 | 
31 | # Display a report of standard classifier statistics
32 | print("SKLearn classification report")
33 | print(classification_report(y_test, skl_model.predict(x_test)))
34 | print("Groq classification report")
35 | print(classification_report(y_test, groq_model.predict(x_test)))
36 | 
37 | print("Example randomforest.py finished")
38 | 


--------------------------------------------------------------------------------
/examples/hummingbird/xgbclassifier.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example trains an XGBClassifier against random data
 3 | then compares the xgboost result to GroqChip executed via GroqFlow.
 4 | """
 5 | 
 6 | import numpy as np
 7 | from sklearn.model_selection import train_test_split
 8 | from sklearn.metrics import classification_report
 9 | from xgboost import XGBClassifier  # pylint: disable=import-error
10 | from groqflow import groqit
11 | 
12 | batch_size = 320
13 | 
14 | # Generate random points in a 10-dimensional space with binary labels
15 | np.random.seed(0)
16 | x = np.random.rand(1000, 10).astype(np.float32)
17 | y = np.random.randint(2, size=1000)
18 | 
19 | # Perform a test/train split of the (random) dataset
20 | x_train, x_test, y_train, y_test = train_test_split(
21 |     x, y, test_size=batch_size, random_state=0
22 | )
23 | 
24 | # Fit the model using standard sklearn patterns
25 | xgb_model = XGBClassifier(
26 |     n_estimators=10, max_depth=5, random_state=0, objective="binary:logistic"
27 | )
28 | xgb_model.fit(x_train, y_train)
29 | 
30 | # Build the model
31 | groq_model = groqit(xgb_model, {"input_0": x_test})
32 | 
33 | # Display a report of standard classifier statistics
34 | print("XGBoost classification report")
35 | print(classification_report(y_test, xgb_model.predict(x_test)))
36 | print("Groq classification report")
37 | print(classification_report(y_test, groq_model.predict(x_test)))
38 | 
39 | print("Example xgbclassifier.py finished")
40 | 


--------------------------------------------------------------------------------
/examples/keras/hello_world.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Hello ** Keras ** World!
 3 | 
 4 |     This example uses a small model to carry out a single vector matrix
 5 |     multiplication to demonstrate building and running a Keras model
 6 |     with GroqFlow.
 7 | 
 8 |     This example will help identify what you should expect from each groqit()
 9 |     Keras build. You can find the build results in the cache directory at
10 |     ~/.cache/groqflow/hello_keras_world/ (unless otherwise specified).
11 | """
12 | 
13 | import tensorflow as tf
14 | from groqflow import groqit
15 | 
16 | tf.random.set_seed(0)
17 | 
18 | # Define model class
19 | class SmallKerasModel(tf.keras.Model):  # pylint: disable=abstract-method
20 |     def __init__(self, output_size):
21 |         super(SmallKerasModel, self).__init__()
22 |         self.dense = tf.keras.layers.Dense(output_size, activation="relu")
23 | 
24 |     def call(self, x):  # pylint: disable=arguments-differ
25 |         output = self.dense(x)
26 |         return output
27 | 
28 | 
29 | # Instantiate model and generate inputs
30 | batch_size = 1
31 | input_size = 10
32 | output_size = 5
33 | keras_model = SmallKerasModel(output_size)
34 | keras_model.build(input_shape=(batch_size, input_size))
35 | inputs = {"x": tf.random.uniform((batch_size, input_size), dtype=tf.float32)}
36 | 
37 | # Build model
38 | groq_model = groqit(keras_model, inputs, build_name="hello_keras_world")
39 | 
40 | # Compute Keras and Groq results
41 | keras_outputs = keras_model(**inputs)
42 | groq_outputs = groq_model(**inputs)
43 | 
44 | # Print Keras and Groq results
45 | print(f"Keras_outputs: {keras_outputs}")
46 | print(f"Groq_outputs: {groq_outputs}")
47 | 
48 | print("Example hello_world.py finished")
49 | 


--------------------------------------------------------------------------------
/examples/onnx/hello_world.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Hello ** ONNX ** World!
 3 | 
 4 |     This example uses a small model to carry out a single vector matrix
 5 |     multiplication to demonstrate building and running an ONNX model
 6 |     with GroqFlow.
 7 | 
 8 |     This example will help identify what you should expect from each groqit()
 9 |     ONNX build. You can find the build results in the cache directory at
10 |     ~/.cache/groqflow/hello_onnx_world/ (unless otherwise specified).
11 | """
12 | 
13 | import os
14 | import torch
15 | from groqflow import groqit
16 | import onnxruntime as ort
17 | 
18 | torch.manual_seed(0)
19 | 
20 | # Start from a PyTorch model so you can generate an ONNX
21 | # file to pass into groqit().
22 | class SmallModel(torch.nn.Module):
23 |     def __init__(self, input_size, output_size):
24 |         super(SmallModel, self).__init__()
25 |         self.fc = torch.nn.Linear(input_size, output_size)
26 | 
27 |     def forward(self, x):
28 |         output = self.fc(x)
29 |         return output
30 | 
31 | 
32 | # Instantiate PyTorch model and generate inputs
33 | input_size = 10
34 | output_size = 5
35 | pytorch_model = SmallModel(input_size, output_size)
36 | onnx_model = "small_onnx_model.onnx"
37 | input_tensor = torch.rand(input_size)
38 | inputs = {"input": input_tensor}
39 | 
40 | # Export PyTorch Model to ONNX
41 | torch.onnx.export(
42 |     pytorch_model,
43 |     input_tensor,
44 |     onnx_model,
45 |     opset_version=14,
46 |     input_names=["input"],
47 |     output_names=["output"],
48 | )
49 | 
50 | # You can use numpy arrays as inputs to our ONNX model
51 | def to_numpy(tensor):
52 |     return (
53 |         tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
54 |     )
55 | 
56 | 
57 | # Setup OnnxRuntime session for ONNX model so that you can
58 | # present a CPU baseline for the ONNX model inference
59 | ort_sess = ort.InferenceSession(onnx_model)
60 | input_name = ort_sess.get_inputs()[0].name
61 | numpy_inputs = to_numpy(input_tensor)
62 | 
63 | # Build ONNX model
64 | groq_model = groqit(onnx_model, inputs, build_name="hello_onnx_world")
65 | 
66 | # Remove intermediate onnx file so that you don't pollute your disk
67 | if os.path.exists(onnx_model):
68 |     os.remove(onnx_model)
69 | 
70 | # Compute ONNX and Groq results
71 | onnx_outputs = ort_sess.run(None, {input_name: numpy_inputs})
72 | groq_outputs = groq_model.run(inputs)
73 | 
74 | # Print ONNX and Groq results
75 | print(f"Groq_outputs: {groq_outputs}")
76 | print(f"Onnx_outputs: {onnx_outputs}")
77 | 
78 | print("Example hello_world.py finished")
79 | 


--------------------------------------------------------------------------------
/examples/pytorch/assembler_flags.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example shows how to build a small model with
 3 |     a list of assembler flags. Valid assembler flags can be found
 4 |     in the Compiler User Guide on the customer portal at
 5 |     support.groq.com
 6 | 
 7 |     If a list of assembler flags is provided to groqit(), then the
 8 |     default flags are not used. Any of the default flags needed
 9 |     should also be provided.
10 | 
11 |     To check the assembler flags used in a build, you can either print the
12 |     value of the 'gmodel.state.info.assembler_command' or view the yaml file
13 |     in the cache directory for your build.
14 | """
15 | 
16 | import torch
17 | from groqflow import groqit
18 | 
19 | torch.manual_seed(0)
20 | 
21 | # Define model class
22 | class SmallModel(torch.nn.Module):
23 |     def __init__(self, input_size, output_size):
24 |         super(SmallModel, self).__init__()
25 |         self.fc = torch.nn.Linear(input_size, output_size)
26 | 
27 |     def forward(self, x):
28 |         output = self.fc(x)
29 |         return output
30 | 
31 | 
32 | # Instantiate model and generate inputs
33 | input_size = 10
34 | output_size = 5
35 | pytorch_model = SmallModel(input_size, output_size)
36 | inputs = {"x": torch.rand(input_size)}
37 | user_provided_assembler_flags = ["--ifetch-from-self", "--no-metrics"]
38 | 
39 | # Build model with user-provided assembler flags
40 | # Note that assembler_flags are only allowed when num_chips=1
41 | gmodel = groqit(
42 |     pytorch_model, inputs, assembler_flags=user_provided_assembler_flags, num_chips=1
43 | )
44 | 
45 | # Print the user-provided flags and the Groq Assembler command
46 | # to verify your flags were applied.
47 | print(f"\nUser-provided flags: {user_provided_assembler_flags}")
48 | print(f"Groq Assembler command: {gmodel.state.info.assembler_command}")
49 | 
50 | print("Example assembler_flags.py finished")
51 | 


--------------------------------------------------------------------------------
/examples/pytorch/benchmark.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example illustrates how to get benchmarked performance of your build on a GroqNode
 3 |     system using the method `GroqModel.benchmark()`. You can read the details of
 4 |     `benchmark()` in the Benchmark section in docs/user_guide.md.
 5 | """
 6 | 
 7 | import torch
 8 | from groqflow import groqit
 9 | 
10 | torch.manual_seed(0)
11 | 
12 | # Define model class
13 | class SmallModel(torch.nn.Module):
14 |     def __init__(self, input_size, output_size):
15 |         super(SmallModel, self).__init__()
16 |         self.fc = torch.nn.Linear(input_size, output_size)
17 | 
18 |     def forward(self, x):
19 |         output = self.fc(x)
20 |         return output
21 | 
22 | 
23 | # Instantiate model and generate inputs
24 | input_size = 10
25 | output_size = 5
26 | pytorch_model = SmallModel(input_size, output_size)
27 | inputs = {"x": torch.rand(input_size)}
28 | 
29 | # Build model
30 | gmodel = groqit(pytorch_model, inputs, groqview=True)
31 | 
32 | # Get benchmarked performance in terms of latency and throughput
33 | performance = gmodel.benchmark()
34 | print("Your build's estimated performance is:")
35 | print(f"{performance.latency:.7f} {performance.latency_units}")
36 | print(f"{performance.throughput:.1f} {performance.throughput_units}")
37 | 
38 | print("Example benchmark.py finished")
39 | 


--------------------------------------------------------------------------------
/examples/pytorch/benchmark_abunch.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example illustrates how to get benchmarked performance of your build on a GroqNode
 3 |     system using the method `GroqModel.benchmark_abunch()`. You can read the details of
 4 |     `benchmark_abunch()` in the Benchmark section in docs/user_guide.md.
 5 | """
 6 | 
 7 | import torch
 8 | from groqflow import groqit
 9 | 
10 | torch.manual_seed(0)
11 | 
12 | # Define model class
13 | class SmallModel(torch.nn.Module):
14 |     def __init__(self, input_size, output_size):
15 |         super(SmallModel, self).__init__()
16 |         self.fc = torch.nn.Linear(input_size, output_size)
17 | 
18 |     def forward(self, x):
19 |         output = self.fc(x)
20 |         return output
21 | 
22 | 
23 | # Instantiate model and generate inputs
24 | input_size = 10
25 | output_size = 5
26 | pytorch_model = SmallModel(input_size, output_size)
27 | inputs = {"x": torch.rand(input_size)}
28 | 
29 | # Compile model
30 | gmodel = groqit(pytorch_model, inputs)
31 | 
32 | # Create a bunch of inputs
33 | num_inputs = 10
34 | abunch_o_inputs = [{"x": torch.rand(input_size)} for _ in range(num_inputs)]
35 | 
36 | # Get benchmarked performance in terms of latency and throughput
37 | performance = gmodel.benchmark_abunch(input_collection=abunch_o_inputs)
38 | print("Your build's estimated performance is:")
39 | print(f"{performance.latency:.7f} {performance.latency_units}")
40 | print(f"{performance.throughput:.1f} {performance.throughput_units}")
41 | 
42 | print("Example benchmark_abunch.py finished")
43 | 


--------------------------------------------------------------------------------
/examples/pytorch/build_name.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example demonstrates changing the directory name within the cache directory
 3 |     (~/.cache/groqflow) where all the logs, artifacts, and the state file will be written.
 4 | 
 5 |     To change the directory name, use the build_name argument with a unique name.
 6 | 
 7 |     The directory for each build defaults to the name of the file it was built in;
 8 |     'build_name' would be the default for this file.
 9 | 
10 |     Note: If a single script is used to build multiple models, (or if a build_name
11 |     matches a build directory within cache already), then a unique build_name will
12 |     need to be defined, or the subsequent build(s) will overwrite (or load) the
13 |     previous build found in ~/.cache/groqflow/{non_unique_build_name}.
14 |     See docs/user_guide.md for more information.
15 | """
16 | 
17 | import torch
18 | from groqflow import groqit
19 | 
20 | torch.manual_seed(0)
21 | 
22 | 
23 | # Define model class
24 | class SmallModel(torch.nn.Module):
25 |     def __init__(self, input_size, output_size):
26 |         super(SmallModel, self).__init__()
27 |         self.fc = torch.nn.Linear(input_size, output_size)
28 | 
29 |     def forward(self, x):
30 |         output = self.fc(x)
31 |         return output
32 | 
33 | 
34 | # Create two different model instances, each with a different output
35 | # size. You can check the build artifacts to verify that both models
36 | # are built and stored separately.
37 | input_size = 10
38 | output_size_1 = 5
39 | output_size_2 = 8
40 | 
41 | pytorch_model_1 = SmallModel(input_size, output_size_1)
42 | pytorch_model_2 = SmallModel(input_size, output_size_2)
43 | inputs = {"x": torch.rand(input_size)}
44 | 
45 | # Build pytorch_model_1 and write build files to ~/.cache/groqflow/Thing_1
46 | groq_model_1 = groqit(pytorch_model_1, inputs, build_name="Thing_1")
47 | 
48 | # Build pytorch_model_2 and write build files to ~/.cache/groqflow/Thing_2
49 | groq_model_2 = groqit(pytorch_model_2, inputs, build_name="Thing_2")
50 | 
51 | print("\nNote that each build is saved to their own build directories")
52 | print("as indicated at the completion of each build above.")
53 | 
54 | print("Example build_name.py finished")
55 | 


--------------------------------------------------------------------------------
/examples/pytorch/cache_dir.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example demonstrates how to set the location of the GroqFlow build cache
 3 |     directory, using groqit()'s cache_dir argument. The default value for
 4 |     cache_dir is `~/.cache/groqflow`.
 5 | 
 6 |     To specify a different cache directory than the default set cache_dir to
 7 |     your location of choice.
 8 | 
 9 |     Note 1: To change the cache directory for every build, a global default can be
10 |     set with the `GROQFLOW_CACHE_DIR` environment variable:
11 |     export GROQFLOW_CACHE_DIR=/path_of_your_choosing
12 | 
13 |     Note 2: Setting the cache_dir argument within groqit() will override the
14 |     `GROQFLOW_CACHE_DIR' setting.
15 | """
16 | 
17 | import torch
18 | from groqflow import groqit
19 | 
20 | torch.manual_seed(0)
21 | 
22 | 
23 | # Define model class
24 | class SmallModel(torch.nn.Module):
25 |     def __init__(self, input_size, output_size):
26 |         super(SmallModel, self).__init__()
27 |         self.fc = torch.nn.Linear(input_size, output_size)
28 | 
29 |     def forward(self, x):
30 |         output = self.fc(x)
31 |         return output
32 | 
33 | 
34 | # Instantiate PyTorch model and generate inputs
35 | input_size = 10
36 | output_size = 5
37 | pytorch_model = SmallModel(input_size, output_size)
38 | inputs = {"x": torch.rand(input_size)}
39 | 
40 | # Build pytorch_model and set the cache_dir
41 | # We also set the build_name to make the build easy to identify
42 | my_local_cache = "local_cache"
43 | groqit(pytorch_model, inputs, cache_dir=my_local_cache, build_name="my_cache_dir_build")
44 | 
45 | print(
46 |     f"\nCheck out the cache created in the local directory by running 'ls {my_local_cache}'"
47 | )
48 | 
49 | print("Example cache_dir.py finished")
50 | 


--------------------------------------------------------------------------------
/examples/pytorch/compiler_flags.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example shows how to build a small model with
 3 |     a list of compiler flags. Valid compiler flags can be found
 4 |     in the Compiler User Guide on the customer portal at
 5 |     support.groq.com
 6 | 
 7 |     If a list of compiler flags is provided to groqit(), then the
 8 |     default flags are not used. Any of the default flags needed
 9 |     should also be provided.
10 | 
11 |     To check the compiler flags used in a build, you can either print the
12 |     value of the 'gmodel.state.info.compiler_command' or view the yaml file
13 |     in the cache directory for your build.
14 | """
15 | 
16 | import torch
17 | from groqflow import groqit
18 | 
19 | torch.manual_seed(0)
20 | 
21 | # Define model class
22 | class SmallModel(torch.nn.Module):
23 |     def __init__(self, input_size, output_size):
24 |         super(SmallModel, self).__init__()
25 |         self.fc = torch.nn.Linear(input_size, output_size)
26 | 
27 |     def forward(self, x):
28 |         output = self.fc(x)
29 |         return output
30 | 
31 | 
32 | # Instantiate model and generate inputs
33 | input_size = 10
34 | output_size = 5
35 | pytorch_model = SmallModel(input_size, output_size)
36 | inputs = {"x": torch.rand(input_size)}
37 | user_provided_compiler_flags = ["--no-print-stats", "--disableAddressCompaction"]
38 | 
39 | # Build model with user provided compiler flags
40 | gmodel = groqit(pytorch_model, inputs, compiler_flags=user_provided_compiler_flags)
41 | 
42 | # Print the user-provided flags and the Groq Compiler command
43 | # to verify your flags were applied.
44 | print(f"\nUser-provided flags: {user_provided_compiler_flags}")
45 | print(f"Groq Assembler command: {gmodel.state.info.compiler_command}")
46 | 
47 | print("Example compiler_flags.py finished")
48 | 


--------------------------------------------------------------------------------
/examples/pytorch/estimate_performance.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example illustrates how to get the estimated performance of your build using the
 3 |     method `GroqModel.estimate_performance()`. You can read the details of
 4 |     `estimate_performance()` in the Performance Estimation section in docs/user_guide.md.
 5 | """
 6 | 
 7 | import torch
 8 | from groqflow import groqit
 9 | 
10 | torch.manual_seed(0)
11 | 
12 | # Define model class
13 | class SmallModel(torch.nn.Module):
14 |     def __init__(self, input_size, output_size):
15 |         super(SmallModel, self).__init__()
16 |         self.fc = torch.nn.Linear(input_size, output_size)
17 | 
18 |     def forward(self, x):
19 |         output = self.fc(x)
20 |         return output
21 | 
22 | 
23 | # Instantiate model and generate inputs
24 | input_size = 10
25 | output_size = 5
26 | pytorch_model = SmallModel(input_size, output_size)
27 | inputs = {"x": torch.rand(input_size)}
28 | 
29 | # Build model
30 | gmodel = groqit(pytorch_model, inputs, groqview=True)
31 | 
32 | # Get performance estimates in terms of latency and throughput
33 | estimate = gmodel.estimate_performance()
34 | print("Your build's estimated performance is:")
35 | print(f"{estimate.latency:.7f} {estimate.latency_units}")
36 | print(f"{estimate.throughput:.1f} {estimate.throughput_units}")
37 | 
38 | print("Example estimate_performance.py finished")
39 | 


--------------------------------------------------------------------------------
/examples/pytorch/groqview.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example shows how to build a small model and collect the data necessary
 3 |     to visualize and profile a model using GroqView. When you run the
 4 |     `GroqModel.groqview()` method, the visualizer is opened in a web browser.
 5 |     See the GroqView User Guide at support.groq.com to read all about it.
 6 | """
 7 | 
 8 | import torch
 9 | from groqflow import groqit
10 | 
11 | torch.manual_seed(0)
12 | 
13 | # Define model class
14 | class SmallModel(torch.nn.Module):
15 |     def __init__(self, input_size, output_size):
16 |         super(SmallModel, self).__init__()
17 |         self.fc = torch.nn.Linear(input_size, output_size)
18 | 
19 |     def forward(self, x):
20 |         output = self.fc(x)
21 |         return output
22 | 
23 | 
24 | # Instantiate model and generate inputs
25 | input_size = 10
26 | output_size = 5
27 | pytorch_model = SmallModel(input_size, output_size)
28 | inputs = {"x": torch.rand(input_size)}
29 | 
30 | # Build model
31 | gmodel = groqit(pytorch_model, inputs, groqview=True)
32 | 
33 | # Open GroqView
34 | gmodel.groqview()
35 | 
36 | print("Example groqview.py finished")
37 | 


--------------------------------------------------------------------------------
/examples/pytorch/hello_world.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Hello ** PyTorch ** World!
 3 | 
 4 |     This example uses a small model to carry out a single vector matrix
 5 |     multiplication to demonstrate building and running a PyTorch model
 6 |     with GroqFlow.
 7 | 
 8 |     This example will help identify what you should expect from each groqit()
 9 |     PyTorch build. You can find the build results in the cache directory at
10 |     ~/.cache/groqflow/hello_pytorch_world/ (unless otherwise specified).
11 | """
12 | 
13 | import torch
14 | from groqflow import groqit
15 | 
16 | torch.manual_seed(0)
17 | 
18 | # Define model class
19 | class SmallModel(torch.nn.Module):
20 |     def __init__(self, input_size, output_size):
21 |         super(SmallModel, self).__init__()
22 |         self.fc = torch.nn.Linear(input_size, output_size)
23 | 
24 |     def forward(self, x):
25 |         output = self.fc(x)
26 |         return output
27 | 
28 | 
29 | # Instantiate model and generate inputs
30 | input_size = 10
31 | output_size = 5
32 | pytorch_model = SmallModel(input_size, output_size)
33 | inputs = {"x": torch.rand(input_size)}
34 | 
35 | # Build model
36 | groq_model = groqit(pytorch_model, inputs, build_name="hello_pytorch_world")
37 | 
38 | # Compute Pytorch and Groq results
39 | pytorch_outputs = pytorch_model(**inputs)
40 | groq_outputs = groq_model(**inputs)
41 | 
42 | # Print Pytorch and Groq results
43 | print(f"Pytorch_outputs: {pytorch_outputs}")
44 | print(f"Groq_outputs: {groq_outputs}")
45 | 
46 | print("Example hello_world.py finished")
47 | 


--------------------------------------------------------------------------------
/examples/pytorch/no_monitor.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example demonstrates the difference between the groqit() argument,
 3 |     monitor, when set to "True" (its default value) and then "False".
 4 | """
 5 | 
 6 | import torch
 7 | from groqflow import groqit
 8 | 
 9 | torch.manual_seed(0)
10 | 
11 | # Define model class
12 | class SmallModel(torch.nn.Module):
13 |     def __init__(self, input_size, output_size):
14 |         super(SmallModel, self).__init__()
15 |         self.fc = torch.nn.Linear(input_size, output_size)
16 | 
17 |     def forward(self, x):
18 |         output = self.fc(x)
19 |         return output
20 | 
21 | 
22 | # Instantiate model and generate inputs
23 | input_size = 10
24 | output_size = 5
25 | pytorch_model = SmallModel(input_size, output_size)
26 | inputs = {"x": torch.rand(input_size)}
27 | 
28 | # Build pytorch_model with `monitor` explicitly set to True
29 | print("\ngroqit() will now build the model with the monitor enabled...")
30 | groq_model = groqit(pytorch_model, inputs, monitor=True, build_name="monitor_enabled")
31 | 
32 | # Rebuild pytorch_model with the monitor disabled
33 | print("\ngroqit() will now build the model with the monitor disabled...")
34 | groq_model = groqit(pytorch_model, inputs, monitor=False, build_name="monitor_disabled")
35 | 
36 | print("Example no_monitor.py finished")
37 | 


--------------------------------------------------------------------------------
/examples/pytorch/num_chips.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example shows how to specify the number of GroqChip processors
 3 |     used in your build.
 4 | 
 5 |     You will need to be able to put at least one layer on each chip. So, the
 6 |     small model here will have two layers.
 7 | 
 8 |     To check the number of chips used in a build, you can either print the
 9 |     value of the 'gmodel.state.num_chips_used' or view the yaml file
10 |     in the cache directory for your build.
11 | 
12 |     You can read more about the `num_chips` argument and multi-chip builds
13 |     in the Multi-Chip section in the docs/user_guide.md.
14 | """
15 | 
16 | import torch
17 | from groqflow import groqit
18 | 
19 | torch.manual_seed(0)
20 | 
21 | # Define model class
22 | class TwoLayerModel(torch.nn.Module):
23 |     def __init__(self, input_size, output_size):
24 |         super(TwoLayerModel, self).__init__()
25 |         self.fc1 = torch.nn.Linear(input_size, output_size)
26 |         self.fc2 = torch.nn.Linear(output_size, output_size)
27 | 
28 |     def forward(self, x):
29 |         output = self.fc1(x)
30 |         output = self.fc2(output)
31 |         return output
32 | 
33 | 
34 | # Create model and inputs
35 | input_size = 10
36 | output_size = 5
37 | pytorch_model = TwoLayerModel(input_size, output_size)
38 | inputs = {"x": torch.rand(input_size)}
39 | 
40 | # Build model for 2 chips
41 | gmodel = groqit(pytorch_model, inputs, num_chips=2)
42 | 
43 | print(
44 |     "\nThe number of GroqChip processors required to run the build is "
45 |     f"{gmodel.state.num_chips_used}."
46 | )
47 | 
48 | print("Example num_chips.py finished")
49 | 


--------------------------------------------------------------------------------
/examples/pytorch/quantization.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example shows how to specify the data samples to be used to
 3 | perform post training quantization on the equivalent ONNX model
 4 | before compiling and assembling the model into a GroqModel.
 5 | 
 6 | You can read more about the `quantization_samples` argument
 7 | in the corresponding section in the docs/user_guide.md.
 8 | """
 9 | 
10 | import torch
11 | import numpy as np
12 | from groqflow import groqit
13 | 
14 | torch.manual_seed(0)
15 | 
16 | # Define model class
17 | class TwoLayerModel(torch.nn.Module):
18 |     def __init__(self, input_size, output_size):
19 |         super(TwoLayerModel, self).__init__()
20 |         self.fc1 = torch.nn.Linear(input_size, output_size)
21 |         self.fc2 = torch.nn.Linear(output_size, output_size)
22 | 
23 |     def forward(self, x):
24 |         output = self.fc1(x)
25 |         output = self.fc2(output)
26 |         return output
27 | 
28 | 
29 | if __name__ == "__main__":
30 | 
31 |     # Create model and inputs
32 |     input_size, output_size = 10, 5
33 |     pytorch_model = TwoLayerModel(input_size, output_size)
34 |     torch_tensor = torch.rand(input_size)
35 |     inputs = {"x": torch_tensor}
36 | 
37 |     # Prepare quantization data
38 |     # Datatype should be the same type for the model inputs, the model's expected inputs
39 |     # and the quantization samples
40 |     sample_size = 100
41 |     quantization_data = [
42 |         (np.array([np.random.rand(input_size)], dtype=np.float32))
43 |         for _ in range(sample_size)
44 |     ]
45 | 
46 |     # Convert pytorch model into ONNX, quantize the ONNX model and
47 |     # convert quantized ONNX to GroqModel
48 |     gmodel = groqit(
49 |         pytorch_model,
50 |         inputs,
51 |         rebuild="always",
52 |         quantization_samples=quantization_data,
53 |     )
54 | 
55 |     # Inference both PyTorch model and Quantized GroqModel
56 |     simple_pytorch_dataset = [
57 |         inputs,
58 |         inputs,
59 |     ]
60 |     groq_outputs = gmodel.run_abunch(simple_pytorch_dataset)
61 |     with torch.no_grad():
62 |         torch_outputs = [pytorch_model(**example) for example in simple_pytorch_dataset]
63 | 
64 |     # See if inference results match
65 |     value_pass = all(
66 |         [
67 |             np.allclose(torch_outputs[i], groq_outputs[i], rtol=0.01, atol=0.001)
68 |             for i in range(len(simple_pytorch_dataset))
69 |         ]
70 |     )
71 |     match_str = "" if value_pass else "not "
72 |     print(
73 |         "Results of PyTorch model and quantized GroqModel do {}match.".format(match_str)
74 |     )
75 | 
76 |     print("Example quantization.py finished")
77 | 


--------------------------------------------------------------------------------
/examples/pytorch/rebuild_always.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example is built to demonstrate groqit()'s rebuild = "always" setting.
 3 | 
 4 |     groqit() will always rebuild the model, even when a build of that model is
 5 |     found in the GroqFlow build cache, when the `rebuild` argument is set to
 6 |     "always".
 7 | 
 8 |     You can demonstrate the functionality for rebuild="always" by running this
 9 |     script twice and seeing that the model still gets rebuilt even when the model
10 |     is cached and there are no changes to the model.
11 | """
12 | 
13 | import torch
14 | from groqflow import groqit
15 | 
16 | torch.manual_seed(0)
17 | 
18 | # Define model class
19 | class SmallModel(torch.nn.Module):
20 |     def __init__(self, input_size, output_size):
21 |         super(SmallModel, self).__init__()
22 |         self.fc = torch.nn.Linear(input_size, output_size)
23 | 
24 |     def forward(self, x):
25 |         output = self.fc(x)
26 |         return output
27 | 
28 | 
29 | # Instantiate model and generate inputs
30 | input_size = 10
31 | output_size = 5
32 | pytorch_model = SmallModel(input_size, output_size)
33 | inputs = {"x": torch.rand(input_size)}
34 | 
35 | # Build/Rebuild model
36 | groq_model = groqit(pytorch_model, inputs, rebuild="always")
37 | 
38 | print("Example rebuild_always.py finished")
39 | 


--------------------------------------------------------------------------------
/examples/pytorch/rebuild_never.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     This example is built to demonstrate groqit()'s rebuild = "never" setting.
 3 | 
 4 |     When rebuild is set to "never" groqit() will look within the cache
 5 |     for a build with a matching build_name and load it, if it exists.
 6 |     You will see a warning printed to stout if the model has changed, but the
 7 |     existing build will be loaded regardless of functionality or correctness.
 8 | 
 9 |     Try the following experiment.
10 |     1. Run this script to build and save the model in cache.
11 |     2. Run the script again, and observe the warning printed when the
12 |        cached model is loaded even though there is a detected change.
13 | 
14 |     Note: To make sure the model changes, the random seed is not set
15 |           for this example.
16 | """
17 | 
18 | import torch
19 | from groqflow import groqit
20 | 
21 | # Define model class
22 | class SmallModel(torch.nn.Module):
23 |     def __init__(self, input_size, output_size):
24 |         super(SmallModel, self).__init__()
25 |         self.fc = torch.nn.Linear(input_size, output_size)
26 | 
27 |     def forward(self, x):
28 |         output = self.fc(x)
29 |         return output
30 | 
31 | 
32 | # Instantiate model and generate inputs
33 | input_size = 10
34 | output_size = 5
35 | pytorch_model = SmallModel(input_size, output_size)
36 | inputs = {"x": torch.rand(input_size)}
37 | 
38 | # Build or load the model with rebuild="never" applied
39 | groq_model = groqit(pytorch_model, inputs, rebuild="never")
40 | 
41 | print("Example rebuild_never.py finished")
42 | 


--------------------------------------------------------------------------------
/examples/pytorch/run_abunch.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Hello World, again!
 3 | 
 4 |     This example uses the same small model as the hello_world example,
 5 |     but this time we are going to run a bunch of inferences with the
 6 |     GroqModel.run_abunch() method.
 7 | """
 8 | 
 9 | import torch
10 | from groqflow import groqit
11 | 
12 | torch.manual_seed(0)
13 | 
14 | # Define model class
15 | class SmallModel(torch.nn.Module):
16 |     def __init__(self, input_size, output_size):
17 |         super(SmallModel, self).__init__()
18 |         self.fc = torch.nn.Linear(input_size, output_size)
19 | 
20 |     def forward(self, x):
21 |         output = self.fc(x)
22 |         return output
23 | 
24 | 
25 | # Instantiate model and generate inputs
26 | input_size = 10
27 | output_size = 5
28 | pytorch_model = SmallModel(input_size, output_size)
29 | inputs = {"x": torch.rand(input_size)}
30 | 
31 | # Compile model
32 | groq_model = groqit(pytorch_model, inputs)
33 | 
34 | # Create a bunch of inputs
35 | num_inputs = 10
36 | abunch_o_inputs = [{"x": torch.rand(input_size)} for _ in range(num_inputs)]
37 | 
38 | print(f"Calculating the results of the {num_inputs} inputs!")
39 | 
40 | # Run groq_model computations on abunch_o_inputs
41 | abunch_o_outputs = groq_model.run_abunch(input_collection=abunch_o_inputs)
42 | 
43 | # Print abunch of outputs
44 | for count, output in enumerate(abunch_o_outputs):
45 |     print(f"output {count}: {list(output.numpy())}")
46 | 
47 | print("Example run_abunch.py finished")
48 | 


--------------------------------------------------------------------------------
/examples/pytorch/sequence.py:
--------------------------------------------------------------------------------
 1 | """ This example uses GroqFlow features recommended for power users only.
 2 | 
 3 |     By default, GroqFlow completes the following steps:
 4 |      > Convert to ONNX
 5 |      > Optimize ONNX file
 6 |      > Check op support
 7 |      > Convert to FP16
 8 |      > Compile Model
 9 |      > Assemble Model
10 | 
11 |     This example illustrates how to alter the default sequence of steps. In this
12 |     example, the conversion to FP16 is skipped.
13 | """
14 | 
15 | import torch
16 | from groqflow import groqit
17 | import onnxflow.justbuildit.export as of_export
18 | import onnxflow.justbuildit.stage as stage
19 | import groqflow.justgroqit.compile as compile
20 | import groqflow.justgroqit.export as gf_export
21 | 
22 | 
23 | torch.manual_seed(0)
24 | 
25 | # Define model class
26 | class SmallModel(torch.nn.Module):
27 |     def __init__(self, input_size, output_size):
28 |         super(SmallModel, self).__init__()
29 |         self.fc = torch.nn.Linear(input_size, output_size)
30 | 
31 |     def forward(self, x):
32 |         output = self.fc(x)
33 |         return output
34 | 
35 | 
36 | # Instantiate model and generate inputs
37 | input_size = 10
38 | output_size = 5
39 | 
40 | pytorch_model = SmallModel(input_size, output_size)
41 | inputs = {"x": torch.rand(input_size, dtype=torch.float32)}
42 | 
43 | onnx_sequence = stage.Sequence(
44 |     "onnx_sequence",
45 |     "Building ONNX Model without fp16 conversion",
46 |     [
47 |         of_export.ExportPytorchModel(),
48 |         of_export.OptimizeOnnxModel(),
49 |         gf_export.CheckOnnxCompatibility(),
50 |         # of_export.ConvertOnnxToFp16(),  #<-- This is the step we want to skip
51 |         compile.CompileOnnx(),
52 |         compile.Assemble(),
53 |     ],
54 |     enable_model_validation=True,
55 | )
56 | 
57 | # Build model
58 | groq_model = groqit(pytorch_model, inputs, sequence=onnx_sequence)
59 | 
60 | # Compute Pytorch and Groq results
61 | pytorch_outputs = pytorch_model(**inputs)
62 | groq_outputs = groq_model(**inputs)
63 | 
64 | # Print Pytorch and Groq results
65 | print(f"Pytorch_outputs: {pytorch_outputs}")
66 | print(f"Groq_outputs: {groq_outputs}")
67 | 


--------------------------------------------------------------------------------
/examples/readme.md:
--------------------------------------------------------------------------------
 1 | # GroqFlow™ Examples
 2 | 
 3 | This folder contains examples that demonstrate the use of `groqit()` arguments and `GroqModel` methods.
 4 | 
 5 | You can learn more about the concepts demonstrated in the examples by referencing the GroqFlow User Guide at `docs/user_guide.md`.
 6 | 
 7 | ## Table Of Contents
 8 | 
 9 | - [Groq Tool Requirements](#groq-tool-requirements)
10 | - [Understanding Examples](#understanding-examples)
11 | - [Running Examples](#running-examples)
12 | - [Hello Worlds](#hello-worlds)
13 | - [Hummingbird Examples](#hummingbird-examples)
14 | - [Additional Pytorch Examples](#additional-pytorch-examples)
15 | 
16 | ## Groq Tool Requirements
17 | 
18 | The Groq tools packages and the **Quick Start Guide** can be found at the [Groq Customer Portal](https://support.groq.com/)
19 | 
20 | - To build a `groq_model` the `groq-devtools` package should be installed.
21 | - To run a `GroqModel` on hardware the `groq-runtime` package should be installed.
22 | - Both Groq packages should be installed to enable both a build and to run on hardware
23 |   from the same script.
24 | 
25 | ## Understanding Examples
26 | 
27 | Here are some properties shared by all of the examples:
28 | 
29 | - Each example will create a build directory in the GroqFlow build cache, which is located at `~/.cache/groqflow` by default.
30 |   - **Note**: Most builds will load from this cache after the first time you run them, as opposed to rebuilding, unless otherwise specified in the example (check out the `rebuild` argument and its examples to change this behavior).
31 |   - **Note**: Most examples set `torch.manual_seed(0)` or `tf.random.set_seed(0)`, unless otherwise specified in the example, which prevents the randomly generated weights in the example from changing between runs.
32 | - The build directory will be named after the example unless the example specifies a name change with the `build_name` argument (see the `build_name.py` example).
33 | - The model being built in each example is a small one- or two-layer fully-connected graph.
34 | 
35 | ## Running Examples
36 | 
37 | To run any of the examples, open a terminal and type the following command:
38 | 
39 | ```python
40 | python /path/to/example/example_name.py
41 | ```
42 | 
43 | ## Hello Worlds
44 | 
45 | | **Example Name** | **Demonstrates** |
46 | |:--------|:-----------|
47 | | `pytorch/hello_world.py` | building and running a model defined in PyTorch|
48 | | `keras/hello_world.py` | building and running a model defined in Keras|
49 | | `onnx/hello_world.py` | building and running a model defined as an ONNX file|
50 | 
51 | ## Hummingbird Examples
52 | 
53 | | **Example Name** | **Demonstrates** |
54 | |:--------|:-----------|
55 | | `hummingbird/randomforest.py` | building and running a Hummingbird RandomForestClassifier against random data |
56 | | `hummingbird/xgbclassifier.py` | building and running a Hummingbird XGBClassifier against random data |
57 | 
58 | ## Additional PyTorch Examples
59 | 
60 | | **Example Name** | **Demonstrates** |
61 | |:--------|:-----------|
62 | | `pytorch/assembler_flags.py` | the `assembler_flags` argument to `groqit()` |
63 | | `pytorch/benchmark.py` | the `benchmark()` method of `GroqModel` |
64 | | `pytorch/benchmark_abunch.py` | the `benchmark_abunch()` method of `GroqModel` |
65 | | `pytorch/build_name.py` | the `build_name` argument to `groqit()` |
66 | | `pytorch/cache_dir.py` | the `cache_dir` argument to `groqit()` |
67 | | `pytorch/compiler_flags.py` | the `compiler_flags` argument to `groqit()` |
68 | | `pytorch/estimate_performance.py` | the performance estimation feature of GroqFlow |
69 | | `pytorch/groqview.py` | how to create and open a GroqView visualization using GroqFlow |
70 | | `pytorch/no_monitor.py` | the `monitor` argument to `groqit()` |
71 | | `pytorch/num_chips.py` | the `num_chips` argument to groqit()|
72 | | `pytorch/rebuild_always.py` | `groqit()`'s caching behavior when the `rebuild` argument is set to "always" |
73 | | `pytorch/rebuild_never.py` | groqit()'s caching behavior when the `rebuild` argument is set to "never" |
74 | | `pytorch/run_abunch.py` | running multiple inputs at a time with the `run_abunch()` method |
75 | | `pytorch/sequence.py` | the `sequence` argument for changing the default GroqFlow steps for porting your model |
76 | 


--------------------------------------------------------------------------------
/groqflow/__init__.py:
--------------------------------------------------------------------------------
1 | from groqflow.version import __version__
2 | 
3 | from groqflow.common.build import load_state
4 | 
5 | from groqflow.justgroqit.groqit import (
6 |     groqit,
7 | )
8 | 


--------------------------------------------------------------------------------
/groqflow/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/groq/groqflow/32740b44aea43d4ecf5d2fa4a2ce3d0f040e8bf0/groqflow/common/__init__.py


--------------------------------------------------------------------------------
/groqflow/common/build.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import enum
  3 | import math
  4 | from typing import Optional, List, Dict
  5 | import dataclasses
  6 | import onnxflow.common.build as of_build
  7 | from groqflow.version import __version__ as groqflow_version
  8 | 
  9 | 
 10 | DEFAULT_ONNX_OPSET = 16
 11 | MINIMUM_ONNX_OPSET = 13
 12 | 
 13 | # Identifiers for specific GroqCard Accelerators
 14 | GROQCARD_A14 = "A1.4"
 15 | 
 16 | # Identifiers for specific chip topologies
 17 | DRAGONFLY = "Dragonfly"
 18 | ROTATIONAL = "Rotational"
 19 | 
 20 | # WARNING: The "internal" env var may cause unexpected behavior if enabled
 21 | # outside of the internal Groq dev environment.
 22 | environment_variables = {
 23 |     "cache_dir": "GROQFLOW_CACHE_DIR",
 24 |     "rebuild": "GROQIT_REBUILD_POLICY",
 25 |     "dont_use_sdk": "GROQFLOW_BAKE_SDK",
 26 |     "debug": "GROQFLOW_DEBUG",
 27 |     "internal": "GROQFLOW_INTERNAL_FEATURES",
 28 |     "torch_importer": "GROQFLOW_USE_TORCH_IMPORTER",
 29 | }
 30 | 
 31 | # Allow an environment variable to override the default
 32 | # location for the GroqFlow build cache
 33 | if os.environ.get(environment_variables["cache_dir"]):
 34 |     DEFAULT_CACHE_DIR = os.environ.get(environment_variables["cache_dir"])
 35 | else:
 36 |     DEFAULT_CACHE_DIR = os.path.expanduser("~/.cache/groqflow")
 37 | 
 38 | # Allow an environment variable to override the default
 39 | # rebuild policy
 40 | if os.environ.get(environment_variables["rebuild"]):
 41 |     DEFAULT_REBUILD_POLICY = os.environ.get(environment_variables["rebuild"])
 42 |     rebuild_allowed_values = ["if_needed", "always", "never"]
 43 |     if DEFAULT_REBUILD_POLICY not in rebuild_allowed_values:
 44 |         raise ValueError(
 45 |             f'Environment variable set for {environment_variables["rebuild"]} has '
 46 |             f"value {DEFAULT_REBUILD_POLICY}, which is not one of the following allowed "
 47 |             f"values: {rebuild_allowed_values} "
 48 |         )
 49 | else:
 50 |     DEFAULT_REBUILD_POLICY = "if_needed"
 51 | 
 52 | # Allow an environment variable to tell groqit to build an SDK
 53 | # with bake instead of using an installed copy of the SDK (only
 54 | # useful for internal Groq developers)
 55 | if os.environ.get(environment_variables["dont_use_sdk"]) == "True":
 56 |     USE_SDK = False
 57 | else:
 58 |     USE_SDK = True
 59 | 
 60 | # Direct builds to target the default GroqCard A1.4 accelerators.
 61 | GROQCARD = GROQCARD_A14
 62 | 
 63 | # By default, choose the dragonfly topology. Users can change this by passing in
 64 | # the topology argument to groqit().
 65 | TOPOLOGY = DRAGONFLY
 66 | 
 67 | # Allow users to use the Torch Importer and bypass ONNX. Only applicable for
 68 | # Torch models, has no other effect on other model types.
 69 | if os.environ.get(environment_variables["torch_importer"]):
 70 |     USE_TORCH_IMPORTER = True
 71 | else:
 72 |     USE_TORCH_IMPORTER = False
 73 | 
 74 | 
 75 | class Backend(enum.Enum):
 76 |     AUTO = "auto"
 77 |     LOCAL = "local"
 78 |     CLOUD = "cloud"
 79 |     REMOTE = "remote"
 80 | 
 81 | 
 82 | def supported_topology(groqcard: str, topology: str) -> Dict[int, str]:
 83 |     """
 84 |     Return a map of the number of chips to the topology string, given a groqcard
 85 |     and connection topology. Only groqcard value of GROQCARD_A14 and topologies
 86 |     of value DRAGONFLY, ROTATIONAL are currently supported.
 87 |     """
 88 | 
 89 |     topo_df_a14 = {
 90 |         2: "DF_A14_2_CHIP",
 91 |         4: "DF_A14_4_CHIP",
 92 |         8: "DF_A14_8_CHIP",
 93 |         16: "DF_A14_16_CHIP",
 94 |         32: "DF_A14_32_CHIP",
 95 |         64: "DF_A14_64_CHIP",
 96 |     }
 97 |     topo_rt_a14 = {
 98 |         16: "RT09_A14_16_CHIP",
 99 |         32: "RT09_A14_32_CHIP",
100 |         40: "RT09_A14_40_CHIP",
101 |         48: "RT09_A14_48_CHIP",
102 |         56: "RT09_A14_56_CHIP",
103 |         64: "RT09_A14_64_CHIP",
104 |         72: "RT09_A14_72_CHIP",
105 |     }
106 | 
107 |     if groqcard != GROQCARD_A14:
108 |         return {}
109 | 
110 |     if topology == DRAGONFLY:
111 |         return topo_df_a14
112 |     elif topology == ROTATIONAL:
113 |         return topo_rt_a14
114 |     else:
115 |         return {}
116 | 
117 | 
118 | def max_chips(groqcard: str, topology: str):
119 |     chips = list(supported_topology(groqcard, topology).keys())
120 |     if len(chips) == 0:
121 |         raise ValueError(
122 |             f"Could not find the number of chips for groqcard {groqcard}, "
123 |             f"topology {topology}."
124 |         )
125 |     return chips[-1]
126 | 
127 | 
128 | # Each chip can hold approximately 50M parameters
129 | # Number of chips need to be either 1, 2, 4, 8, 16, 32 or 64
130 | def calculate_num_chips(num_parameters, estimate=False):
131 |     if num_parameters is not None:
132 |         if num_parameters == 0:
133 |             return 1
134 |         else:
135 |             x = math.ceil(num_parameters / 50000000)
136 |             if estimate:
137 |                 return x
138 |             else:
139 |                 return 2 ** (x - 1).bit_length()
140 |     else:
141 |         return None
142 | 
143 | 
144 | @dataclasses.dataclass(frozen=True)
145 | class GroqConfig(of_build.Config):
146 |     """
147 |     User-provided build configuration. GroqFlow is not allowed
148 |     to change instances of Config once they have been
149 |     instantiated (frozen=True enforces this).
150 | 
151 |     Inherits `build_name`, `auto_name`, `onnx_opset`, and `sequence` from onnxflow.
152 | 
153 |     Note: modifying this struct can create a breaking change that
154 |     requires users to rebuild their models. Increment the minor
155 |     version number of the groqflow package if you do make a build-
156 |     breaking change.
157 |     """
158 | 
159 |     compiler_flags: Optional[List[str]] = None
160 |     assembler_flags: Optional[List[str]] = None
161 |     groqview: bool = False
162 |     groqcard: str = GROQCARD
163 |     topology: str = TOPOLOGY
164 |     num_chips: Optional[int] = None
165 | 
166 | 
167 | @dataclasses.dataclass
168 | class GroqInfo(of_build.Info):
169 |     """
170 |     Information about a build that may be useful for analysis
171 |     or debugging purposes.
172 | 
173 |     Note: GroqFlow does not guarantee that members of this class will
174 |     have non-None values at the end of a build. GroqFlow code must
175 |     not take a dependence on any member of this class.
176 |     """
177 | 
178 |     num_parameters: Optional[int] = None
179 |     opt_onnx_unsupported_ops: Optional[List[str]] = None
180 |     opt_onnx_all_ops_supported: Optional[bool] = None
181 |     torch_script_exported: Optional[bool] = None
182 |     torch_importer_success: Optional[bool] = None
183 |     torch_importer_command: Optional[str] = None
184 |     compiler_success: Optional[bool] = None
185 |     compiler_command: Optional[str] = None
186 |     assembler_success: Optional[bool] = None
187 |     assembler_command: Optional[str] = None
188 |     measured_latency: Optional[float] = None
189 |     measured_throughput: Optional[float] = None
190 |     estimated_pcie_input_latency: Optional[float] = None
191 |     deterministic_compute_latency: Optional[float] = None
192 |     estimated_pcie_output_latency: Optional[float] = None
193 |     estimated_throughput: Optional[float] = None
194 |     estimated_latency: Optional[float] = None
195 |     compiled_model_input_bytes: Optional[int] = None
196 |     compiled_model_output_bytes: Optional[int] = None
197 |     compiler_ram_bytes: Optional[float] = None
198 | 
199 | 
200 | @dataclasses.dataclass
201 | class GroqState(of_build.State):
202 |     # User-provided args that influence the generated model
203 |     config: GroqConfig = None
204 | 
205 |     # User-provided args that do not influence the generated model
206 |     use_sdk: bool = False
207 | 
208 |     # Optional information about the build
209 |     info: GroqInfo = GroqInfo()
210 | 
211 |     # All of the following are critical aspects of the build,
212 |     # including properties of GroqFlow and choices made by GroqFlow
213 |     # while building the model, which determine the outcome of the build.
214 |     # NOTE: adding or changing a member name in this struct can create
215 |     # a breaking change that requires users to rebuild their models.
216 |     # Increment the minor version number of the groqflow package if you
217 |     # do make a build-breaking change.
218 | 
219 |     groqflow_version: str = groqflow_version
220 |     num_chips_used: Optional[int] = None
221 | 
222 |     @property
223 |     def original_inputs_file(self):
224 |         return os.path.join(
225 |             of_build.output_dir(self.cache_dir, self.config.build_name),
226 |             "inputs_original.npy",
227 |         )
228 | 
229 |     @property
230 |     def execution_inputs_file(self):
231 |         return os.path.join(
232 |             of_build.output_dir(self.cache_dir, self.config.build_name), "inputs.npy"
233 |         )
234 | 
235 |     @property
236 |     def outputs_file(self):
237 |         return os.path.join(
238 |             of_build.output_dir(self.cache_dir, self.config.build_name), "outputs.npy"
239 |         )
240 | 
241 |     @property
242 |     def latency_file(self):
243 |         return os.path.join(
244 |             of_build.output_dir(self.cache_dir, self.config.build_name), "latency.npy"
245 |         )
246 | 
247 |     @property
248 |     def torch_script_dir(self):
249 |         return os.path.join(
250 |             of_build.output_dir(self.cache_dir, self.config.build_name), "torchscript"
251 |         )
252 | 
253 |     @property
254 |     def torch_script_file(self):
255 |         return os.path.join(
256 |             self.torch_script_dir,
257 |             f"{self.config.build_name}.pt",
258 |         )
259 | 
260 |     @property
261 |     def compile_dir(self):
262 |         return os.path.join(
263 |             of_build.output_dir(self.cache_dir, self.config.build_name), "compile"
264 |         )
265 | 
266 |     @property
267 |     def stats_file(self):
268 |         return os.path.join(self.compile_dir, "stats.json")
269 | 
270 |     @property
271 |     def groqview_file(self):
272 |         return os.path.join(self.compile_dir, "output_bind")
273 | 
274 |     @property
275 |     def topology(self):
276 |         topology = supported_topology(self.config.groqcard, self.config.topology)
277 |         if self.num_chips_used in topology.keys():
278 |             return topology[self.num_chips_used]
279 |         else:
280 |             return "Unknown"
281 | 
282 |     def prepare_file_system(self):
283 |         super().prepare_file_system()
284 |         os.makedirs(self.compile_dir, exist_ok=True)
285 | 
286 | 
287 | def load_state(
288 |     cache_dir=DEFAULT_CACHE_DIR, build_name=None, state_path=None
289 | ) -> GroqState:
290 | 
291 |     return of_build.load_state(
292 |         cache_dir=cache_dir,
293 |         build_name=build_name,
294 |         state_path=state_path,
295 |         state_type=GroqState,
296 |     )
297 | 


--------------------------------------------------------------------------------
/groqflow/common/onnx_helpers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper functions for dealing with ONNX files and ONNX models
 3 | """
 4 | 
 5 | import subprocess
 6 | import ast
 7 | import onnxflow.common.printing as printing
 8 | import groqflow.common.sdk_helpers as sdk
 9 | 
10 | 
11 | def check_ops(input_onnx, use_sdk=False):
12 | 
13 |     print("Checking unsupported ops...")
14 | 
15 |     # Select either bake or SDK
16 |     if use_sdk:
17 |         cmd = sdk.find_tool("onnxmodelanalyzer")
18 |     else:
19 |         cmd = [
20 |             "bake",
21 |             "r",
22 |             "//Groq/Compiler:OnnxModelAnalyze",
23 |         ]
24 |     cmd = cmd + ["-u", "-i", input_onnx]
25 | 
26 |     # Run process and decode outputs
27 |     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
28 |     out, _ = p.communicate()
29 |     out = out.decode("utf-8").split("\n")
30 |     all_ops = ast.literal_eval(out[-4])
31 |     unsupported_ops = ast.literal_eval(out[-2])
32 | 
33 |     # print results accordingly
34 |     num_ops = len(all_ops)
35 |     num_unsupported = len(unsupported_ops)
36 |     num_supported = num_ops - num_unsupported
37 |     if num_unsupported == 0:
38 |         printing.logn("\t\tDONE", printing.Colors.OKGREEN)
39 |         printing.logn(
40 |             "\t" + f"{num_supported}/{num_ops} ops supported", printing.Colors.OKGREEN
41 |         )
42 |     else:
43 |         printing.logn("\t\tDONE", printing.Colors.OKGREEN)
44 |         printing.logn(
45 |             "\t" + f"{num_supported}/{num_ops} ops supported", printing.Colors.WARNING
46 |         )
47 |         printing.logn(
48 |             "\tUnsupported ops: " + ", ".join(unsupported_ops),
49 |             printing.Colors.WARNING,
50 |         )
51 |     return all_ops, unsupported_ops
52 | 


--------------------------------------------------------------------------------
/groqflow/common/sdk_helpers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Helper functions for interfacing with the GroqWare SDK
  3 | """
  4 | 
  5 | import os
  6 | import enum
  7 | import subprocess
  8 | import shutil
  9 | from typing import Type, Union
 10 | from pkg_resources import parse_version
 11 | import onnxflow.common.exceptions as exp
 12 | import groqflow.common.build as build
 13 | 
 14 | 
 15 | MIN_RELEASE_VERSION = "0.9.2.1"
 16 | 
 17 | 
 18 | class OS(enum.Enum):
 19 |     UBUNTU = "Ubuntu"
 20 |     ROCKY = "Rocky Linux"
 21 | 
 22 | 
 23 | def get_num_chips_available(pci_devices=None):
 24 | 
 25 |     # The location of lspci may vary according to the OS used
 26 |     if shutil.which("lspci"):
 27 |         lspci = shutil.which("lspci")
 28 |     # This is important to ensure that CI works
 29 |     elif os.path.isfile("/usr/bin/lspci"):
 30 |         lspci = "/usr/bin/lspci"
 31 |     else:
 32 |         raise exp.EnvError("lspci not found")
 33 | 
 34 |     # Capture the list of pci devices on the system using the linux lspci utility
 35 |     if pci_devices is None:
 36 |         pci_devices = (
 37 |             subprocess.check_output([lspci, "-n"], stderr=subprocess.DEVNULL)
 38 |             .decode("utf-8")
 39 |             .split("\n")
 40 |         )
 41 | 
 42 |     # Unique registered vendor id: 1de0, and device id: "0000"
 43 |     groq_card_id = "1de0:0000"
 44 | 
 45 |     # number of chips per device: "1de0:0000":1
 46 |     chips_per_card = 1
 47 | 
 48 |     # Sum the number of GroqCards in the list of devices
 49 |     num_cards = 0
 50 |     for device in pci_devices:
 51 |         if groq_card_id in device:
 52 |             num_cards += 1
 53 | 
 54 |     # Calculate total number of chips
 55 |     num_chips_available = num_cards * chips_per_card
 56 | 
 57 |     return num_chips_available
 58 | 
 59 | 
 60 | def find_tool(tool, soft_fail=False):
 61 |     if shutil.which(tool):
 62 |         return [tool]
 63 |     elif os.path.isfile(f"/usr/local/groq/bin/{tool}"):
 64 |         return [f"/usr/local/groq/bin/{tool}"]
 65 |     elif soft_fail:
 66 |         return False
 67 |     else:
 68 |         raise exp.StageError(f"{tool} not found")
 69 | 
 70 | 
 71 | def _installed_package_version(package: str, os_version: OS) -> Union[bool, str]:
 72 |     """
 73 |     This function is a simple wrapper around "apt-cache policy" that
 74 |     avoids a dependency on python-apt. It returns the installed version
 75 |     of the package when installed or "False" when not installed.
 76 |     """
 77 |     if os_version == OS.UBUNTU:
 78 |         # Get package info
 79 |         try:
 80 |             cmd = ["apt-cache", "policy", package]
 81 |             package_info = (
 82 |                 subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
 83 |                 .decode("utf-8")
 84 |                 .split("\n")
 85 |             )
 86 |         except (FileNotFoundError, subprocess.CalledProcessError) as e:
 87 |             raise exp.Error("apt-cache policy command failed") from e
 88 | 
 89 |         # Return False if package was not found
 90 |         if len(package_info) == 1:
 91 |             return False
 92 | 
 93 |         # Return version number
 94 |         # package_info[1] has the format "Installed: <VERSION_NUMBER>"
 95 |         return package_info[1].split(":")[1].replace(" ", "")
 96 |     elif os_version == OS.ROCKY:
 97 |         # Get package info
 98 |         cmd = ["dnf", "info", package]
 99 |         try:
100 |             package_info = (
101 |                 subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
102 |                 .decode("utf-8")
103 |                 .split("\n")
104 |             )
105 |         except FileNotFoundError as e:
106 |             raise exp.Error("dnf info command failed") from e
107 |         except subprocess.CalledProcessError as e:
108 |             # Return False if package was not found
109 |             return False
110 | 
111 |         # Return version number
112 |         # package_info[3] has the format "Version : <VERSION_NUMBER>"
113 |         return package_info[3].split(":")[1].replace(" ", "")
114 |     else:
115 |         # The following exception will only be raised if a GroqFlow dev forgets to update
116 |         # _installed_package_version() when adding support for a new OS
117 |         raise exp.EnvError(
118 |             f"_installed_package_version not implemented for {os_version}"
119 |         )
120 | 
121 | 
122 | def version_a_less_than_b(version_a, version_b: str):
123 |     """
124 |     Return true if version_a >= version_b, following the scheme:
125 |         major.minor.patch.patchpatch~release_candidate_number
126 | 
127 |     The release_candidate_number should be ignored.
128 |     """
129 | 
130 |     # Strip the release candidate number, if any
131 |     clean_version_a = version_a.split("~")[0]
132 |     clean_version_b = version_b.split("~")[0]
133 | 
134 |     return parse_version(clean_version_a) < parse_version(clean_version_b)
135 | 
136 | 
137 | def version_is_valid(
138 |     sdkv: Union[str, bool],
139 |     required: bool,
140 |     requirement_name: str,
141 |     exception_type: Type[Exception] = exp.EnvError,
142 |     hint: str = "",
143 | ):
144 |     """
145 |     Raise an exception if the required version number is not installed
146 |     """
147 | 
148 |     msg = (
149 |         f"{requirement_name}>={MIN_RELEASE_VERSION} is a required dependency "
150 |         "for this part of GroqFlow"
151 |     )
152 | 
153 |     # Package not found
154 |     if not sdkv and required:
155 |         msg = msg + f". However, {requirement_name} was not found. "
156 |         raise exception_type(msg + hint)
157 | 
158 |     # Package found, but version is not acceptable
159 |     elif version_a_less_than_b(sdkv, MIN_RELEASE_VERSION) and required:
160 |         msg = msg + f" ({sdkv} is installed). "
161 |         raise exception_type(msg + hint)
162 | 
163 | 
164 | def validate_os_version() -> OS:
165 | 
166 |     supported_os_names = [x.value for x in OS]
167 |     unsupported_os_msg = (
168 |         "Your OS must be one of the following Linux distributions: "
169 |         f"{', '.join(supported_os_names)}. Please refer to our installation "
170 |         "guide for more details on supported versions."
171 |     )
172 | 
173 |     # Check if this is a linux-based OS
174 |     if not os.path.isfile("/etc/os-release"):
175 |         raise exp.EnvError(unsupported_os_msg)
176 | 
177 |     # Parse OS-release data
178 |     with open("/etc/os-release", encoding="utf-8") as f:
179 |         os_release = {}
180 |         for line in f:
181 |             k, v = line.rstrip().split("=")
182 |             os_release[k] = v.replace('"', "")
183 | 
184 |     # Check if OS is supported
185 |     if os_release["NAME"] not in supported_os_names:
186 |         raise exp.EnvError(unsupported_os_msg)
187 | 
188 |     return OS(os_release["NAME"])
189 | 
190 | 
191 | def validate_devtools(
192 |     os_version: OS,
193 |     required=False,
194 |     exception_type: Type[Exception] = exp.EnvError,
195 | ):
196 |     version = _installed_package_version("groq-devtools", os_version)
197 |     hint = "Please contact sales@groq.com to get access to groq-devtools."
198 |     version_is_valid(version, required, "groq-devtools", exception_type, hint)
199 | 
200 | 
201 | def validate_runtime(
202 |     os_version: OS,
203 |     required=False,
204 |     exception_type: Type[Exception] = exp.EnvError,
205 | ):
206 |     version = _installed_package_version("groq-runtime", os_version)
207 |     hint = "Please contact sales@groq.com to get access to groq-runtime."
208 |     version_is_valid(version, required, "groq-runtime", exception_type, hint)
209 | 
210 | 
211 | # Returns the root directory of the current git repo and any associated
212 | # error from running the git command
213 | def get_repo_root():
214 |     p = subprocess.Popen(
215 |         ["git", "rev-parse", "--show-toplevel"],
216 |         stdout=subprocess.PIPE,
217 |         stderr=subprocess.PIPE,
218 |     )
219 |     out, err = p.communicate()
220 |     repo = out.decode("utf-8")
221 |     repo = repo.rstrip("\n")
222 |     err = err.decode("utf-8")
223 |     return repo, err
224 | 
225 | 
226 | def validate_bake():
227 |     if not shutil.which("bake"):
228 |         raise exp.EnvError(
229 |             (
230 |                 "Bake must be available when the env var "
231 |                 f'{build.environment_variables["dont_use_sdk"]} is set to True'
232 |             )
233 |         )
234 | 
235 |     # bake commands require Groq to be current git repo
236 |     repo, err = get_repo_root()
237 |     groq_root = repo.split("/")[-1] == "Groq"
238 | 
239 |     if err:
240 |         raise exp.EnvError(
241 |             (
242 |                 "You must be inside the Groq repo when the env var "
243 |                 f'{build.environment_variables["dont_use_sdk"]} is set to True. '
244 |                 f"groqit() returned with error {err}"
245 |             )
246 |         )
247 | 
248 |     elif not groq_root:
249 |         raise exp.EnvError(
250 |             (
251 |                 "You must be inside the Groq repo when the env var "
252 |                 f'{build.environment_variables["dont_use_sdk"]} is set to True. '
253 |                 f"groqit() detected you are inside repo {repo}"
254 |             )
255 |         )
256 | 
257 | 
258 | def check_dependencies(
259 |     require_devtools: bool = False,
260 |     require_runtime: bool = False,
261 |     exception_type: Type[Exception] = exp.EnvError,
262 | ):
263 | 
264 |     # Skip dependency check if necessary
265 |     if os.environ.get("GROQFLOW_SKIP_SDK_CHECK") == "True":
266 |         return True
267 | 
268 |     # Check for bake if SDK is not being used
269 |     if not build.USE_SDK:
270 |         validate_bake()
271 |     # Check for the different SDK components when using the SDK
272 |     # Skip all checks if using CI
273 |     else:
274 |         os_version = validate_os_version()
275 | 
276 |         # Only check for the package that is required
277 |         if require_devtools:
278 |             validate_devtools(
279 |                 os_version=os_version,
280 |                 required=require_devtools,
281 |                 exception_type=exception_type,
282 |             )
283 | 
284 |         # Only check for the package that is required
285 |         if require_runtime:
286 |             validate_runtime(
287 |                 os_version=os_version,
288 |                 required=require_runtime,
289 |                 exception_type=exception_type,
290 |             )
291 | 


--------------------------------------------------------------------------------
/groqflow/groqmodel/__init__.py:
--------------------------------------------------------------------------------
1 | from .groqmodel import GroqModel
2 | from .groqmodel import load
3 | 


--------------------------------------------------------------------------------
/groqflow/groqmodel/execute.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The following script is used to get the latency and outputs of a given run on the GroqChip.
  3 | This script doesn't depend on GroqFlow to be executed.
  4 | """
  5 | # pylint: disable = no-name-in-module
  6 | # pylint: disable = import-error
  7 | import argparse
  8 | from timeit import Timer
  9 | from typing import Tuple, List
 10 | import numpy as np
 11 | import groq.api as g
 12 | import groq.runner.tsp as tsp
 13 | 
 14 | 
 15 | def get_multi_tsp_runner(
 16 |     compile_dir: str, topology: str, bringup_topology: bool = False
 17 | ) -> tsp.local_runner.MultichipTSPRunner:
 18 | 
 19 |     # FIXME: topo_config is defined in two files, both assembler_multichip.py
 20 |     #  and execute.py. If you modify this code, make sure to modify it in
 21 |     #  both places. We will remove this code replication when we are able to
 22 |     #  import the groqit.misc package into execute.py.
 23 | 
 24 |     # Declare different topologies
 25 |     topo_config = {
 26 |         "DF_A14_2_CHIP": g.TopologyConfig.DF_A14_2_CHIP,
 27 |         "DF_A14_4_CHIP": g.TopologyConfig.DF_A14_4_CHIP,
 28 |         "DF_A14_8_CHIP": g.TopologyConfig.DF_A14_8_CHIP,
 29 |     }
 30 | 
 31 |     if bringup_topology:
 32 |         print("Bringup C2C topology...")
 33 |         tsp.bringup_topology(user_config=topo_config[topology])
 34 | 
 35 |     program_name = "output"
 36 |     tsp_runner = tsp.create_multi_tsp_runner(
 37 |         program_name,
 38 |         compile_dir,
 39 |         program_name,
 40 |         user_config=topo_config[topology],
 41 |     )
 42 |     return tsp_runner
 43 | 
 44 | 
 45 | def rtime(func, num_times: int, *args, **kwargs) -> Tuple[float, List]:
 46 |     """
 47 |     Measure time of a given function multiple times and return
 48 |     the average time in seconds
 49 |     """
 50 |     output_container = []
 51 | 
 52 |     def wrapper():
 53 |         output_container.append(func(*args, **kwargs))
 54 | 
 55 |     timer = Timer(wrapper)
 56 |     delta = timer.timeit(num_times)
 57 |     return delta, output_container.pop()
 58 | 
 59 | 
 60 | def run(
 61 |     input_batch: np.ndarray,
 62 |     num_chips: int,
 63 |     output_dir: str,
 64 |     topology: str,
 65 |     bringup_topology: bool,
 66 |     repetitions=1,
 67 | ) -> Tuple[float, List]:
 68 | 
 69 |     # Get tsp_runner
 70 |     if num_chips == 1:
 71 |         iop_file = f"{output_dir}/compile/output.iop"
 72 |         tsp_runner = tsp.create_tsp_runner(iop_file)
 73 |     else:
 74 |         compile_dir = f"{output_dir}/compile"
 75 |         tsp_runner = get_multi_tsp_runner(compile_dir, topology, bringup_topology)
 76 | 
 77 |     # Multi-TSP Runner will run a pipeline of inputs
 78 |     # through the entire topology of the program 1-chip at a time
 79 |     # to get the actual output from the entire graph we need to invoke `num_chip` times
 80 |     def forward_multichip(example):
 81 |         for _ in range(num_chips):
 82 |             output = tsp_runner(**example)
 83 |         return output
 84 | 
 85 |     # Forward function for models compiled for a single chip
 86 |     def forward_singlechip(example):
 87 |         return tsp_runner(**example)
 88 | 
 89 |     forward = forward_singlechip if num_chips == 1 else forward_multichip
 90 |     batch_size = len(input_batch)
 91 |     output_batch = []
 92 |     total_latency = 0.0
 93 |     for idx in range(batch_size):
 94 |         example = input_batch[idx]
 95 |         latency, output = rtime(forward, repetitions, example)
 96 |         total_latency += latency
 97 |         output_batch.append(output)
 98 | 
 99 |     total_latency = total_latency / repetitions / batch_size
100 | 
101 |     return total_latency, output_batch
102 | 
103 | 
104 | if __name__ == "__main__":
105 | 
106 |     # Disabling lint warning for using pickle
107 |     # pylint: disable = unexpected-keyword-arg
108 | 
109 |     # Terminology:
110 |     # This function receives a batch of inputs (input_batch)
111 |     # Each element of this batch is called an "example"
112 |     # Each example may contain one or more arguments
113 | 
114 |     # Parse Inputs
115 |     parser = argparse.ArgumentParser(description="Execute models built by GroqFlow")
116 |     parser.add_argument(
117 |         "num_chips",
118 |         type=int,
119 |         help="Number of chips used to build the model",
120 |     )
121 |     parser.add_argument("output_dir", help="Path where the build files are stored")
122 |     parser.add_argument("outputs_file", help="File in which the outputs will be saved")
123 |     parser.add_argument("latency_file", help="File in which the latency will be saved")
124 |     parser.set_defaults(bringup_topology=False)
125 |     parser.add_argument("topology", help="GroqChip topology used when building model")
126 |     parser.add_argument(
127 |         "repetitions",
128 |         type=int,
129 |         help="Number of times to execute the received inputs",
130 |     )
131 |     parser.add_argument(
132 |         "--bringup_topology",
133 |         help="Describes whether or not the topology should be initialized",
134 |         action="store_true",
135 |     )
136 |     args = vars(parser.parse_args())
137 | 
138 |     # Read inputs
139 |     input_file = f"{args['output_dir']}/inputs.npy"
140 |     input_batch = np.load(input_file, allow_pickle=True)
141 | 
142 |     # Get latency/output_data
143 |     latency, output_data = run(
144 |         input_batch,
145 |         args["num_chips"],
146 |         args["output_dir"],
147 |         args["topology"],
148 |         args["bringup_topology"],
149 |         repetitions=args["repetitions"],
150 |     )
151 | 
152 |     # Save results to file
153 |     with open(args["outputs_file"], "wb") as f:
154 |         np.save(args["outputs_file"], output_data)
155 |     with open(args["latency_file"], "wb") as f:
156 |         np.save(args["latency_file"], latency)
157 | 


--------------------------------------------------------------------------------
/groqflow/groqmodel/remote.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import hashlib
  3 | import io
  4 | import os
  5 | from typing import Any, Collection, Dict, List, Tuple
  6 | from dataclasses import dataclass
  7 | import requests
  8 | import numpy as np
  9 | import groqflow.common.build as build
 10 | 
 11 | 
 12 | @dataclass
 13 | class UploadUrls:
 14 |     iops: List[str]
 15 |     inputs: List[Dict[str, str]]
 16 |     use_cached_iop: bool
 17 | 
 18 | 
 19 | @dataclass
 20 | class DownloadUrls:
 21 |     outputs: List[Dict[str, str]]
 22 | 
 23 | 
 24 | # NOTE: frozen=True because mutations between upload and run wouldn't be caught
 25 | # otherwise (objects are passed by reference)
 26 | @dataclass(frozen=True)
 27 | class RemoteGroqModel:
 28 |     user_name: str
 29 |     build_name: str
 30 |     iop_path: str
 31 |     num_chips: int
 32 |     num_iterations: int
 33 |     input_batch: List[Dict[str, np.ndarray]]
 34 |     input_names: List[str]
 35 |     post_check_remote_cache_endpoint: str
 36 |     post_upload_urls_endpoint: str
 37 |     post_execute_endpoint: str
 38 | 
 39 |     def _serialize(self, data: np.ndarray) -> bytes:
 40 |         buffer = io.BytesIO()
 41 |         np.save(buffer, data, allow_pickle=False)
 42 |         buffer.seek(0)
 43 |         return buffer.read()
 44 | 
 45 |     def _deserialize(self, data_raw: bytes) -> np.ndarray:
 46 |         buffer = io.BytesIO(data_raw)
 47 |         buffer.seek(0)
 48 |         return np.load(buffer, allow_pickle=False)
 49 | 
 50 |     def _upload_helper(self, url: str, data_raw: bytes) -> None:
 51 |         # TODO:(epatrick): error handling
 52 |         _response = requests.put(
 53 |             url,
 54 |             headers={"Content-Type": "application/octet-stream"},
 55 |             data=data_raw,
 56 |         )
 57 | 
 58 |     def _download_helper(self, url: str) -> bytes:
 59 |         response = requests.get(
 60 |             url,
 61 |             headers={"Content-Type": "application/octet-stream"},
 62 |         )
 63 |         return response.content
 64 | 
 65 |     def _calc_file_md5(self, file: str) -> str:
 66 |         with open(file, "rb") as iop:
 67 |             data_bytes = iop.read()
 68 |         md5_bytes = hashlib.md5(data_bytes).digest()
 69 |         return base64.b64encode(md5_bytes).decode("utf-8")
 70 | 
 71 |     def _calc_iop_md5s(self) -> List[str]:
 72 |         iop_files = (
 73 |             [f"{self.iop_path}/output.iop"]
 74 |             if self.num_chips == 1
 75 |             else [f"{self.iop_path}/output.{i}.iop" for i in range(self.num_chips)]
 76 |         )
 77 |         return list(map(self._calc_file_md5, iop_files))
 78 | 
 79 |     def check_remote_cache(self) -> bool:
 80 |         iop_md5s = self._calc_iop_md5s()
 81 |         response = requests.post(
 82 |             self.post_check_remote_cache_endpoint,
 83 |             json={
 84 |                 "user_name": self.user_name,
 85 |                 "build_name": self.build_name,
 86 |                 "num_chips": self.num_chips,
 87 |                 "iop_md5s": iop_md5s,
 88 |             },
 89 |         )
 90 | 
 91 |         if not response.ok:
 92 |             # NOTE: we may choose to ignore the exception and pretend the cache
 93 |             #       returned false but that should be done at the callsite
 94 |             raise Exception(
 95 |                 f"error status code: {response.status_code}, message: {response.text}"
 96 |             )
 97 | 
 98 |         body = response.json()
 99 |         cache_hit: bool = body["cache_hit"]
100 |         return cache_hit
101 | 
102 |     def get_upload_urls(self, skip_iop_urls: bool = False) -> UploadUrls:
103 |         response = requests.post(
104 |             self.post_upload_urls_endpoint,
105 |             json={
106 |                 "user_name": self.user_name,
107 |                 "build_name": self.build_name,
108 |                 "num_chips": self.num_chips,
109 |                 "input_names": self.input_names,
110 |                 "batch_size": len(self.input_batch),
111 |                 "skip_iop_urls": skip_iop_urls,
112 |             },
113 |         )
114 | 
115 |         if not response.ok:
116 |             raise Exception(
117 |                 f"error status code: {response.status_code}, message: {response.text}"
118 |             )
119 | 
120 |         body = response.json()
121 | 
122 |         input_urls: List[Dict[str, str]] = body["input_urls"]
123 |         iop_urls: List[str] = [] if skip_iop_urls else body["iop_urls"]
124 | 
125 |         return UploadUrls(
126 |             iops=iop_urls, inputs=input_urls, use_cached_iop=skip_iop_urls
127 |         )
128 | 
129 |     def _upload_batch(
130 |         self, input_batch: Dict[str, np.ndarray], upload_urls_batch: Dict[str, str]
131 |     ) -> None:
132 |         for input_name, input_data in input_batch.items():
133 |             input_url = upload_urls_batch[input_name]
134 |             input_raw = self._serialize(input_data)
135 |             self._upload_helper(input_url, input_raw)
136 | 
137 |     def upload(self, upload_urls: UploadUrls) -> None:
138 |         # TODO: error handling
139 |         if not upload_urls.use_cached_iop:
140 |             if self.num_chips == 1:
141 |                 iop_files = [f"{self.iop_path}/output.iop"]
142 |             else:
143 |                 iop_files = [
144 |                     f"{self.iop_path}/output.{i}.iop" for i in range(self.num_chips)
145 |                 ]
146 | 
147 |             for iop_file, iop_url in zip(iop_files, upload_urls.iops):
148 |                 with open(iop_file, "rb") as iop:
149 |                     self._upload_helper(iop_url, iop)
150 | 
151 |         for batch_index, input_batch in enumerate(self.input_batch):
152 |             self._upload_batch(input_batch, upload_urls.inputs[batch_index])
153 | 
154 |     def _execute(self) -> Tuple[DownloadUrls, Dict[str, Any]]:
155 |         response = requests.post(
156 |             self.post_execute_endpoint,
157 |             json={
158 |                 "user_name": self.user_name,
159 |                 "build_name": self.build_name,
160 |                 "num_chips": self.num_chips,
161 |                 "input_names": self.input_names,
162 |                 "batch_size": len(self.input_batch),
163 |                 "num_iterations": self.num_iterations,
164 |             },
165 |         )
166 | 
167 |         if not response.ok:
168 |             raise Exception(
169 |                 f"error status code: {response.status_code}, message: {response.text}"
170 |             )
171 | 
172 |         body = response.json()
173 |         output_urls = body["output_urls"]
174 |         stats = body["stats"]
175 | 
176 |         return DownloadUrls(outputs=output_urls), stats
177 | 
178 |     def _download(self, download_urls: DownloadUrls) -> List[Dict[str, np.ndarray]]:
179 |         outputs = []
180 |         for output_urls in download_urls.outputs:
181 |             output = {}
182 |             for output_name, output_url in output_urls.items():
183 |                 output_raw = self._download_helper(output_url)
184 |                 output[output_name] = self._deserialize(output_raw)
185 |             outputs.append(output)
186 |         return outputs
187 | 
188 |     def run(self) -> Tuple[List[Dict[str, np.ndarray]], Dict[str, Any]]:
189 |         """
190 |         Invokes this remote groq model.
191 | 
192 |         Returns: (output_batch, stats)
193 |             An output_batch where output_batch[i] corresponds to input_batch[i]
194 |             A dictionary of stats for how the model ran on TSPs
195 |         """
196 |         download_urls, stats = self._execute()
197 |         output_batch = self._download(download_urls)
198 |         return output_batch, stats
199 | 
200 | 
201 | class RemoteClient:
202 |     """
203 |     A client for running TSP models using remote backend
204 |     """
205 | 
206 |     # Backend URL is the IP of where the remote server is hosted
207 |     # TODO: Replace backend_url by a hostname
208 |     def __init__(self, backend_url: str = "http://34.125.159.215"):
209 |         self.post_check_remote_cache_endpoint = f"{backend_url}/storage/cache/check"
210 |         self.post_upload_urls_endpoint = f"{backend_url}/storage/upload-urls"
211 |         self.post_execute_endpoint = f"{backend_url}/execute"
212 |         self.user_name = os.getlogin()
213 | 
214 |     def upload(
215 |         self,
216 |         user_name: str,
217 |         build_name: str,
218 |         compile_dir: str,
219 |         num_chips: int,
220 |         input_batch: Collection[Dict[str, np.ndarray]],
221 |         num_iterations: int = 1,
222 |     ) -> RemoteGroqModel:
223 |         """
224 |         A lower level interface to upload a remote groq model ahead of time. You may
225 |         invoke the remote groq model with the returned RemoteGroqModel object.
226 | 
227 |         You should also use this interface if you want to combine the functionality of
228 |         benchmark and run_abunch.
229 | 
230 |         Args:
231 |             user_name: Username of the caller
232 |             build_name: Name of the build
233 |             compile_dir: Full path to the directory containing the IOP file(s)
234 |             num_chips: Number of chips for the remote groq model
235 |             input_batch: Data used as input for the remote groq model. Execution
236 |             will be done once per batch
237 |             num_iterations: How many executions the statistics should be averaged over
238 |             (default = 1)
239 | 
240 |         Returns:
241 |             A RemoteGroqModel object that can be used to invoke the uploaded model
242 |         """
243 | 
244 |         input_names = [] if len(input_batch) == 0 else list(list(input_batch)[0].keys())
245 |         remote_gm = RemoteGroqModel(
246 |             user_name,
247 |             build_name,
248 |             compile_dir,
249 |             num_chips,
250 |             num_iterations,
251 |             input_batch,
252 |             input_names,
253 |             self.post_check_remote_cache_endpoint,
254 |             self.post_upload_urls_endpoint,
255 |             self.post_execute_endpoint,
256 |         )
257 |         cache_hit = remote_gm.check_remote_cache()
258 |         upload_urls = remote_gm.get_upload_urls(skip_iop_urls=cache_hit)
259 |         remote_gm.upload(upload_urls)
260 |         return remote_gm
261 | 
262 |     def execute(
263 |         self,
264 |         state: build.GroqState,
265 |         repetitions: int,
266 |     ):
267 |         """
268 |         Executes a build on the given inputs and saves results to disk.
269 | 
270 |         Args:
271 |             state: State of the build being executed
272 |             repetitions: Number of times to execute a build
273 |         """
274 |         inputs_file = state.execution_inputs_file
275 |         inputs_data = np.load(inputs_file, allow_pickle=True)
276 |         latency_file = state.latency_file
277 |         outputs_file = state.outputs_file
278 |         remote_gm = self.upload(
279 |             self.user_name,
280 |             state.config.build_name,
281 |             state.compile_dir,
282 |             state.num_chips_used,
283 |             inputs_data,
284 |             repetitions,
285 |         )
286 |         output_batch, stats = remote_gm.run()
287 |         latency_avg = stats["exec_time_seconds"]["mean"]
288 |         np.save(latency_file, latency_avg)
289 |         outputs_data = output_batch
290 |         np.save(outputs_file, outputs_data, allow_pickle=True)
291 | 


--------------------------------------------------------------------------------
/groqflow/justgroqit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/groq/groqflow/32740b44aea43d4ecf5d2fa4a2ce3d0f040e8bf0/groqflow/justgroqit/__init__.py


--------------------------------------------------------------------------------
/groqflow/justgroqit/assemble_multichip.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import groq.api as g
 3 | 
 4 | 
 5 | def assembler_multichip(topology, compile_dir, is_large_program=False):
 6 | 
 7 |     # FIXME: topo_config is defined in two files, both assembler_multichip.py
 8 |     #  and benchmark.py. If you modify this code, make sure to modify it in
 9 |     #  both places. We will remove this code replication when we are able to
10 |     #  import groqflow packages into these files.
11 | 
12 |     # Identify the topology. The topology specified with
13 |     # groq-compiler should match the one configured here.
14 |     topo_config = {
15 |         "DF_A14_2_CHIP": g.TopologyConfig.DF_A14_2_CHIP,
16 |         "DF_A14_4_CHIP": g.TopologyConfig.DF_A14_4_CHIP,
17 |         "DF_A14_8_CHIP": g.TopologyConfig.DF_A14_8_CHIP,
18 |         "DF_A14_16_CHIP": g.TopologyConfig.DF_A14_16_CHIP,
19 |         "DF_A14_32_CHIP": g.TopologyConfig.DF_A14_32_CHIP,
20 |         "DF_A14_64_CHIP": g.TopologyConfig.DF_A14_64_CHIP,
21 |         "RT09_A14_16_CHIP": g.TopologyConfig.RT09_A14_16_CHIP,
22 |         "RT09_A14_32_CHIP": g.TopologyConfig.RT09_A14_32_CHIP,
23 |         "RT09_A14_40_CHIP": g.TopologyConfig.RT09_A14_40_CHIP,
24 |         "RT09_A14_48_CHIP": g.TopologyConfig.RT09_A14_48_CHIP,
25 |         "RT09_A14_56_CHIP": g.TopologyConfig.RT09_A14_56_CHIP,
26 |         "RT09_A14_64_CHIP": g.TopologyConfig.RT09_A14_64_CHIP,
27 |         "RT09_A14_72_CHIP": g.TopologyConfig.RT09_A14_72_CHIP,
28 |     }
29 | 
30 |     # Select topology
31 |     topo = g.configure_topology(config=topo_config[topology])
32 | 
33 |     # Initiate the program package object with package name and output directory
34 |     md_pgm_pkg = g.ProgramPackage(name="output", output_dir=compile_dir)
35 | 
36 |     # assign the name and topology to the create_program_context
37 |     pgm_ctx = md_pgm_pkg.create_program_context("output", topo)
38 | 
39 |     # add the .aa files created by the groq-compiler and add them to the program
40 |     md_pgm_pkg.add_precompiled_program(pgm_ctx, compile_dir, "output")
41 | 
42 |     # if any extra instruction memory slices were defined
43 |     # during groq-compiler add them here.
44 |     if is_large_program:
45 |         extra_slices = [
46 |             "West 18",
47 |             "West 19",
48 |             "East 17",
49 |             "East 18",
50 |             "East 19",
51 |             "East 38",
52 |         ]
53 |     else:
54 |         extra_slices = []
55 | 
56 |     # The .assemble method takes all the files and topologies and
57 |     # assembles the multi chip program package.
58 |     print("Starting multi-chip assembling process", flush=True)
59 |     md_pgm_pkg.assemble(extra_ifetch_slices=extra_slices, ifetch_from_self=True)
60 | 
61 | 
62 | if __name__ == "__main__":
63 | 
64 |     # Parse command line arguments
65 |     parser = argparse.ArgumentParser()
66 |     parser.add_argument(
67 |         "-t",
68 |         dest="topology",
69 |         help="GroqCard topology for multi-chip assembly",
70 |         required=True,
71 |     )
72 |     parser.add_argument(
73 |         "-d",
74 |         dest="compile_dir",
75 |         help="Directory for inputs and outputs",
76 |         required=True,
77 |     )
78 |     parser.add_argument(
79 |         "-l",
80 |         dest="is_large_program",
81 |         help="If compiler uses --large-program the set to True",
82 |         required=False,
83 |         default=None,
84 |     )
85 | 
86 |     args = parser.parse_args()
87 | 
88 |     # Run script
89 |     assembler_multichip(args.topology, args.compile_dir, args.is_large_program)
90 | 


--------------------------------------------------------------------------------
/groqflow/justgroqit/export.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import os
  3 | import sys
  4 | import warnings
  5 | import torch
  6 | import onnxflow.justbuildit.stage as stage
  7 | import onnxflow.common.exceptions as exp
  8 | import onnxflow.common.tensor_helpers as tensor_helpers
  9 | import groqflow.common.build as build
 10 | import groqflow.common.onnx_helpers as onnx_helpers
 11 | import groqflow.common.sdk_helpers as sdk
 12 | 
 13 | 
 14 | def _warn_to_stdout(message, category, filename, line_number, _, line):
 15 |     sys.stdout.write(
 16 |         warnings.formatwarning(message, category, filename, line_number, line)
 17 |     )
 18 | 
 19 | 
 20 | class CheckOnnxCompatibility(stage.Stage):
 21 |     """
 22 |     Stage that takes an ONNX file, checks whether it is compatible
 23 |     with Groq Compiler, and raises an exception if the ONNX file is
 24 |     not compatible.
 25 | 
 26 |     Expected inputs:
 27 |      - state.intermediate_results contains a single .onnx file
 28 | 
 29 |     Outputs:
 30 |      - The same ONNX file as the input
 31 |     """
 32 | 
 33 |     def __init__(self):
 34 |         super().__init__(
 35 |             unique_name="check_compatibility",
 36 |             monitor_message="Checking for Op support",
 37 |         )
 38 | 
 39 |     def fire(self, state: build.GroqState):
 40 | 
 41 |         sdk.check_dependencies(require_devtools=True, exception_type=exp.StageError)
 42 | 
 43 |         # TODO: validate this input
 44 |         # https://git.groq.io/code/Groq/-/issues/13947
 45 |         input_onnx = state.intermediate_results[0]
 46 | 
 47 |         (
 48 |             state.info.opt_onnx_ops,
 49 |             state.info.opt_onnx_unsupported_ops,
 50 |         ) = onnx_helpers.check_ops(input_onnx, state.use_sdk)
 51 |         print(f"Model has {len(state.info.opt_onnx_unsupported_ops)} unsupported ops")
 52 | 
 53 |         state.info.opt_onnx_all_ops_supported = (
 54 |             len(state.info.opt_onnx_unsupported_ops) == 0
 55 |             and len(state.info.opt_onnx_ops) != 0
 56 |         )
 57 | 
 58 |         if not state.info.opt_onnx_all_ops_supported:
 59 |             ops = ", ".join(state.info.opt_onnx_unsupported_ops)
 60 |             msg = f"""
 61 |             You model contains ONNX operation(s) that are not supported by Groq Compiler:
 62 |             **{ops}**
 63 |             Please replace these operation(s) in your model or contact
 64 |             sales@groq.com to request improved operation support in Groq Compiler.
 65 |             """
 66 |             raise exp.StageError(msg)
 67 | 
 68 |         return state
 69 | 
 70 | 
 71 | class ExportPytorchToTorchScript(stage.Stage):
 72 |     """
 73 |     Stage that takes a Pytorch module and exports it to TorchScript using
 74 |     torch.jit API.
 75 | 
 76 |     Expected inputs:
 77 |      - state.model is a torch.nn.Module or torch.jit.ScriptModule
 78 |      - state.inputs is a dict that represents valid kwargs to the forward
 79 |         function of state.model
 80 | 
 81 |     Outputs:
 82 |      - A *.pt file that implements state.model given state.inputs
 83 |     """
 84 | 
 85 |     def __init__(self):
 86 |         super().__init__(
 87 |             unique_name="export_pytorch_to_torch_script",
 88 |             monitor_message="Exporting PyTorch to TorchScript",
 89 |         )
 90 | 
 91 |     @staticmethod
 92 |     def _check_model(torch_script_file, success_message, fail_message) -> bool:
 93 |         if os.path.isfile(torch_script_file):
 94 |             print(success_message)
 95 |             return True
 96 |         else:
 97 |             print(fail_message)
 98 |             return False
 99 | 
100 |     def fire(self, state: build.GroqState):
101 |         if not isinstance(state.model, (torch.nn.Module, torch.jit.ScriptModule)):
102 |             msg = f"""
103 |             The current stage (ExportPytorchToTorchScript) is only compatible
104 |             with models of type torch.nn.Module or torch.jit.ScriptModule,
105 |             however the stage received a model of type {type(state.model)}.
106 |             """
107 |             raise exp.StageError(msg)
108 | 
109 |         if isinstance(state.model, torch.nn.Module):
110 |             # Validate user provided args
111 |             all_args = list(inspect.signature(state.model.forward).parameters.keys())
112 | 
113 |             for inp in list(state.inputs.keys()):
114 |                 if inp not in all_args:
115 |                     msg = f"""
116 |                     Input name {inp} not found in the model's forward method. Available
117 |                     input names are: {all_args}"
118 |                     """
119 |                     raise ValueError(msg)
120 | 
121 |         # Send torch export warnings to stdout (and therefore the log file)
122 |         # so that they don't fill up the command line
123 |         default_warnings = warnings.showwarning
124 |         warnings.showwarning = _warn_to_stdout
125 | 
126 |         # Export the model to TorchScript
127 |         jit_module = torch.jit.trace(
128 |             state.model,
129 |             example_kwarg_inputs=state.inputs,
130 |         )
131 | 
132 |         # Save model to disk
133 |         os.makedirs(state.torch_script_dir, exist_ok=True)
134 |         jit_module.save(state.torch_script_file)
135 | 
136 |         # Save output names to ensure we are preserving the order of the outputs.
137 |         # We have to re-load the torchscript module because the output names
138 |         # will change during serialization.
139 |         loaded_jit_module = torch.jit.load(state.torch_script_file)
140 |         state.expected_output_names = [
141 |             output.debugName() for output in loaded_jit_module.graph.outputs()
142 |         ]
143 | 
144 |         # Restore default warnings behavior
145 |         warnings.showwarning = default_warnings
146 | 
147 |         tensor_helpers.save_inputs(
148 |             [state.inputs], state.original_inputs_file, downcast=False
149 |         )
150 | 
151 |         # Check the if the base mode has been exported successfully
152 |         success_msg = "\tSuccess exporting model to TorchScript"
153 |         fail_msg = "\tFailed exporting model to TorchScript"
154 |         state.info.torch_script_exported = self._check_model(
155 |             state.torch_script_file, success_msg, fail_msg
156 |         )
157 | 
158 |         if state.info.torch_script_exported:
159 |             state.intermediate_results = [state.torch_script_file]
160 |         else:
161 |             msg = f"""
162 |             Unable to export model to TorchScript using Torch's jit exporter.
163 |             We recommend that you modify your model until it is
164 |             compatible with this third party software, then re-run.
165 |             More information may be available in the log file at **{self.logfile_path}**
166 |             """
167 |             raise exp.StageError(msg)
168 | 
169 |         return state
170 | 


--------------------------------------------------------------------------------
/groqflow/justgroqit/groqit.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, List, Dict, Any
  2 | from collections.abc import Collection
  3 | import onnxflow.common.printing as printing
  4 | import onnxflow.justbuildit.stage as stage
  5 | import onnxflow.common.build as of_build
  6 | import groqflow.justgroqit.ignition as ignition
  7 | import groqflow.groqmodel as groqmodel
  8 | import groqflow.common.build as build
  9 | 
 10 | 
 11 | def groqit(
 12 |     model: of_build.UnionValidModelInstanceTypes = None,
 13 |     inputs: Optional[Dict[str, Any]] = None,
 14 |     build_name: Optional[str] = None,
 15 |     cache_dir: Optional[str] = build.DEFAULT_CACHE_DIR,
 16 |     monitor: bool = True,
 17 |     rebuild: Optional[str] = None,
 18 |     compiler_flags: Optional[List[str]] = None,
 19 |     assembler_flags: Optional[List[str]] = None,
 20 |     num_chips: Optional[int] = None,
 21 |     topology: str = build.TOPOLOGY,
 22 |     groqview: bool = False,
 23 |     sequence: Optional[stage.Sequence] = None,
 24 |     quantization_samples: Collection = None,
 25 | ) -> groqmodel.GroqModel:
 26 | 
 27 |     """Use GroqFlow to build a model instance into a GroqModel
 28 |         object that can be executed on GroqChip processors.
 29 | 
 30 |     Args:
 31 |         model: Model to be mapped to a GroqModel, which can be a PyTorch
 32 |             model instance, Keras model instance, a path to an ONNX file, or
 33 |             a path to a Python script that follows the GroqFlow model.py template.
 34 |         inputs: Example inputs to the user's model. The GroqModel will be
 35 |             compiled to handle inputs with the same static shape only. Argument
 36 |             is not required if the model input is a GroqFlow model.py file.
 37 |         build_name: Unique name for the model that will be
 38 |             used to store the GroqModel and build state on disk. Defaults to the
 39 |             name of the file that calls groqit().
 40 |         cache_dir: Directory to use as the GroqFlow cache for this build. Output files
 41 |             from this build will be stored at cache_dir/build_name/
 42 |             Defaults to ~/.cache/groqflow
 43 |         monitor: Display a monitor on the command line that
 44 |             tracks the progress of groqit as it builds the GroqModel.
 45 |         rebuild: determines whether to rebuild or load a cached build. Options:
 46 |             - "if_needed" (default): overwrite invalid cached builds with a rebuild
 47 |             - "always": overwrite valid cached builds with a rebuild
 48 |             - "never": load cached builds without checking validity, with no guarantee
 49 |                 of functionality or correctness
 50 |             - None: Falls back to default
 51 |         compiler_flags: Override groqit's default compiler flags with a list
 52 |             of user-specified flags.
 53 |         assembler_flags: Override groqit's default assembler flags with a
 54 |             list of user-specified flags.
 55 |         num_chips: Override the default number of GroqChip processors to be
 56 |             used instead of letting groqit decide automatically. Power users
 57 |             only.
 58 |         topology: Override the default topology when num_chips > 1. Power users
 59 |             only.
 60 |         groqview: If set, creates a GroqView file for the model during the
 61 |             build process. Defaults to false because this option uses up
 62 |             significant time and compute/RAM resources.
 63 |         sequence: Override groqit's default sequence of build stages. Power
 64 |             users only.
 65 |         quantization_samples: If set, performs post-training quantization
 66 |             on the ONNX model using the provided samples, then generates
 67 |             GroqModel from the quantized model. If the previous build used samples
 68 |             that are different to the samples used in current build, the "rebuild"
 69 |             argument needs to be manually set to "always" in the current build
 70 |             in order to create a new GroqModel.
 71 |     """
 72 | 
 73 |     # Validate and lock in the groqit() config (user arguments that
 74 |     # configure the build) that will be used by the rest of groqit()
 75 |     config = ignition.lock_config(
 76 |         model=model,
 77 |         build_name=build_name,
 78 |         compiler_flags=compiler_flags,
 79 |         assembler_flags=assembler_flags,
 80 |         groqview=groqview,
 81 |         groqcard=build.GROQCARD,
 82 |         num_chips=num_chips,
 83 |         topology=topology,
 84 |         sequence=sequence,
 85 |     )
 86 | 
 87 |     # Analyze the user's model argument and lock in the model, inputs,
 88 |     # and sequence that will be used by the rest of groqit()
 89 |     (model_locked, inputs_locked, sequence_locked, model_type) = ignition.model_intake(
 90 |         model,
 91 |         inputs,
 92 |         sequence,
 93 |         config,
 94 |         user_quantization_samples=quantization_samples,
 95 |     )
 96 | 
 97 |     # Get the state of the model from the GroqFlow cache if a valid build is available
 98 |     state = ignition.load_or_make_state(
 99 |         config=config,
100 |         cache_dir=cache_dir,
101 |         rebuild=rebuild or build.DEFAULT_REBUILD_POLICY,
102 |         model_type=model_type,
103 |         monitor=monitor,
104 |         use_sdk=build.USE_SDK,
105 |         model=model_locked,
106 |         inputs=inputs_locked,
107 |         quantization_samples=quantization_samples,
108 |     )
109 | 
110 |     # Return a cached build if possible, otherwise prepare the model State for
111 |     # a build
112 |     if state.build_status == of_build.Status.SUCCESSFUL_BUILD:
113 |         # Successful builds can be loaded from cache and returned with
114 |         # no additional steps
115 |         additional_msg = " (build_name auto-selected)" if config.auto_name else ""
116 |         printing.log_success(
117 |             f' Build "{config.build_name}"{additional_msg} found in cache. Loading it!',
118 |         )
119 | 
120 |         return groqmodel.load(config.build_name, state.cache_dir)
121 | 
122 |     state.quantization_samples = quantization_samples
123 | 
124 |     sequence_locked.show_monitor(config, state.monitor)
125 |     state = sequence_locked.launch(state)
126 | 
127 |     if state.build_status == of_build.Status.SUCCESSFUL_BUILD:
128 |         printing.log_success(
129 |             f"\n    Saved to **{of_build.output_dir(state.cache_dir, config.build_name)}**"
130 |         )
131 | 
132 |         return groqmodel.load(config.build_name, state.cache_dir)
133 | 
134 |     else:
135 |         printing.log_success(
136 |             f"Build Sequence {sequence_locked.unique_name} completed successfully"
137 |         )
138 |         msg = """
139 |         groqit() only returns a GroqModel instance if the Sequence includes a Stage
140 |         that sets state.build_status=onnxflow.build.Status.SUCCESSFUL_BUILD.
141 |         """
142 |         printing.log_warning(msg)
143 |         return None
144 | 


--------------------------------------------------------------------------------
/groqflow/justgroqit/ignition.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, List, Tuple, Union, Dict, Any
  2 | from collections.abc import Collection
  3 | from typeguard import typechecked
  4 | 
  5 | import onnxflow.common.build as of_build
  6 | import onnxflow.common.exceptions as exp
  7 | import onnxflow.justbuildit.export as of_export
  8 | import onnxflow.justbuildit.hummingbird as hummingbird
  9 | import onnxflow.justbuildit.stage as stage
 10 | import onnxflow.justbuildit.ignition as of_ignition
 11 | 
 12 | import groqflow.common.build as build
 13 | import groqflow.justgroqit.compile as compile
 14 | import groqflow.justgroqit.export as gf_export
 15 | 
 16 | 
 17 | from groqflow.version import __version__ as groqflow_version
 18 | 
 19 | default_pytorch_export_sequence = stage.Sequence(
 20 |     "default_pytorch_export_sequence",
 21 |     "Exporting PyTorch Model",
 22 |     [
 23 |         of_export.ExportPytorchModel(),
 24 |         of_export.OptimizeOnnxModel(),
 25 |         gf_export.CheckOnnxCompatibility(),
 26 |         of_export.ConvertOnnxToFp16(),
 27 |     ],
 28 | )
 29 | 
 30 | default_pytorch_sequence = stage.Sequence(
 31 |     "default_pytorch_sequence",
 32 |     "Building PyTorch Model",
 33 |     [
 34 |         default_pytorch_export_sequence,
 35 |         compile.CompileOnnx(),
 36 |         compile.Assemble(),
 37 |     ],
 38 | )
 39 | 
 40 | pytorch_export_sequence_with_quantization = stage.Sequence(
 41 |     "pytorch_export_sequence_with_quantization",
 42 |     "Exporting PyTorch Model and Quantizing Exported ONNX",
 43 |     [
 44 |         of_export.ExportPytorchModel(),
 45 |         of_export.OptimizeOnnxModel(),
 46 |         gf_export.CheckOnnxCompatibility(),
 47 |         of_export.QuantizeONNXModel(),
 48 |     ],
 49 | )
 50 | 
 51 | pytorch_sequence_with_quantization = stage.Sequence(
 52 |     "pytorch_sequence_with_quantization",
 53 |     "Building PyTorch Model",
 54 |     [
 55 |         pytorch_export_sequence_with_quantization,
 56 |         compile.CompileOnnx(),
 57 |         compile.Assemble(),
 58 |     ],
 59 | )
 60 | 
 61 | 
 62 | pytorch_export_torch_script_sequence = stage.Sequence(
 63 |     "pytorch_export_torch_script",
 64 |     "Export Pytorch Model to TorchScript",
 65 |     [
 66 |         gf_export.ExportPytorchToTorchScript(),
 67 |     ],
 68 | )
 69 | 
 70 | pytorch_torch_importer_sequence = stage.Sequence(
 71 |     "pytorch_torch_importer",
 72 |     "Build PyTorch Model using Torch Importer Front-end",
 73 |     [
 74 |         pytorch_export_torch_script_sequence,
 75 |         compile.CompileTorchScript(),
 76 |         compile.Assemble(),
 77 |     ],
 78 | )
 79 | 
 80 | default_keras_export_sequence = stage.Sequence(
 81 |     "default_keras_export_sequence",
 82 |     "Exporting Keras Model",
 83 |     [
 84 |         of_export.ExportKerasModel(),
 85 |         of_export.OptimizeOnnxModel(),
 86 |         gf_export.CheckOnnxCompatibility(),
 87 |         of_export.ConvertOnnxToFp16(),
 88 |     ],
 89 | )
 90 | 
 91 | default_keras_sequence = stage.Sequence(
 92 |     "default_keras_sequence",
 93 |     "Building Keras Model",
 94 |     [
 95 |         default_keras_export_sequence,
 96 |         compile.CompileOnnx(),
 97 |         compile.Assemble(),
 98 |     ],
 99 | )
100 | 
101 | 
102 | default_onnx_sequence = stage.Sequence(
103 |     "default_onnx_sequence",
104 |     "Building ONNX Model",
105 |     [
106 |         of_export.ReceiveOnnxModel(),
107 |         of_export.OptimizeOnnxModel(),
108 |         gf_export.CheckOnnxCompatibility(),
109 |         of_export.ConvertOnnxToFp16(),
110 |         compile.CompileOnnx(),
111 |         compile.Assemble(),
112 |     ],
113 | )
114 | 
115 | default_hummingbird_sequence = stage.Sequence(
116 |     "default_hummingbird_sequence",
117 |     "Building Hummingbird Model",
118 |     [
119 |         hummingbird.ConvertHummingbirdModel(),
120 |         of_export.OptimizeOnnxModel(),
121 |         gf_export.CheckOnnxCompatibility(),
122 |         compile.CompileOnnx(),
123 |         compile.Assemble(),
124 |     ],
125 | )
126 | 
127 | default_compiler_flags: List[str] = []
128 | 
129 | default_assembler_flags = [
130 |     "--ifetch-from-self",
131 |     "--ifetch-slice-ordering=round-robin",
132 | ]
133 | 
134 | 
135 | @typechecked
136 | def _validate_args(  # pylint: disable = unused-argument
137 |     build_name: Optional[str] = None,
138 |     compiler_flags: Optional[List[str]] = None,
139 |     assembler_flags: Optional[List[str]] = None,
140 |     groqview: bool = False,
141 |     groqcard: Optional[str] = build.GROQCARD_A14,
142 |     num_chips: Optional[int] = None,
143 |     topology: Optional[str] = build.TOPOLOGY,
144 | ):
145 | 
146 |     if groqcard is not build.GROQCARD_A14:
147 |         msg = f"""
148 |         You set groqit()'s groqcard argument to {groqcard}, which is not a supported value. The
149 |         currently supported value is: {build.GROQCARD_A14}.
150 |         """
151 |         raise exp.ArgError(msg)
152 | 
153 |     if num_chips is not None and num_chips > 1:
154 |         if topology is not build.DRAGONFLY and topology is not build.ROTATIONAL:
155 |             msg = f"""
156 |             You set groqit()'s topology argument to {topology}
157 |             for build {build_name}, which is not a supported value. Choose from the
158 |             currently supported values: {build.DRAGONFLY}, {build.ROTATIONAL}.
159 |             """
160 |             raise exp.ArgError(msg)
161 | 
162 |         supported_topology = build.supported_topology(groqcard, topology)
163 |         if num_chips not in supported_topology.keys():
164 |             msg = f"""
165 |             You set groqit()'s num_chips argument to {num_chips} with topology {topology}
166 |             for build {build_name}, which is not a supported value. Choose from the
167 |             currently supported chip counts: {supported_topology.keys()}.
168 |             """
169 |             raise exp.ArgError(msg)
170 | 
171 |     if compiler_flags:
172 |         if "--auto-asm" in compiler_flags:
173 |             if assembler_flags:
174 |                 msg = """
175 |                 The --auto-asm compiler flag is mutually exclusive with the assembler_flags argument
176 |                 argument to groqit(). Either set assembler_flags=None or do not use --auto-asm.
177 |                 """
178 |                 raise exp.ArgError(msg)
179 | 
180 |         # groqit() may automatically apply certain Groq Compiler flags to each build
181 |         # This check makes sure the user isn't creating a collision by also applying
182 |         # any of these flags
183 |         disallowed_compiler_flags = [
184 |             "--multichip",
185 |             "--groqview",
186 |             "--save-stats",
187 |             "-o",
188 |         ]
189 |         for user_flag in compiler_flags:
190 |             for disallowed_flag in disallowed_compiler_flags:
191 |                 if user_flag.startswith(disallowed_flag):
192 |                     msg = f"""
193 |                     The following compiler flags are reserved by groqit() and cannot be used
194 |                     in the groqit(compiler_flags=...) argument: {disallowed_compiler_flags}.
195 |                     However, your compiler_flags argument includes {user_flag}.
196 |                     """
197 |                     raise exp.ArgError(msg)
198 | 
199 |     if assembler_flags and num_chips != 1:
200 |         msg = """
201 |         The assembler_flags argument is incompatible with multi-chip models.
202 |         Either set num_chips=1 or do not use assembler_flags.
203 |         """
204 |         raise exp.ArgError(msg)
205 | 
206 | 
207 | def lock_config(
208 |     model,
209 |     build_name: Optional[str] = None,
210 |     compiler_flags: Optional[List[str]] = None,
211 |     assembler_flags: Optional[List[str]] = None,
212 |     groqview: bool = False,
213 |     groqcard: Optional[str] = build.GROQCARD_A14,
214 |     num_chips: Optional[int] = None,
215 |     topology: Optional[str] = build.TOPOLOGY,
216 |     sequence: stage.Sequence = None,
217 | ) -> build.GroqConfig:
218 | 
219 |     """
220 |     Process the user's configuration arguments to groqit():
221 |     1. Raise exceptions for illegal arguments
222 |     2. Replace unset arguments with default values
223 |     3. Lock the configuration into an immutable object
224 |     """
225 | 
226 |     _validate_args(
227 |         build_name=build_name,
228 |         compiler_flags=compiler_flags,
229 |         assembler_flags=assembler_flags,
230 |         groqview=groqview,
231 |         groqcard=groqcard,
232 |         num_chips=num_chips,
233 |         topology=topology,
234 |     )
235 | 
236 |     # Override the onnxflow default opset with GroqFlow's default
237 |     of_build.DEFAULT_ONNX_OPSET = build.DEFAULT_ONNX_OPSET
238 | 
239 |     of_config = of_ignition.lock_config(
240 |         model=model,
241 |         build_name=build_name,
242 |         sequence=sequence,
243 |     )
244 | 
245 |     # Use default compiler flags if no flags were specified
246 |     if compiler_flags is None:
247 |         compiler_flags = default_compiler_flags
248 | 
249 |     # Use default assembler flags if no flags were specified
250 |     if assembler_flags is None:
251 |         assembler_flags = default_assembler_flags
252 | 
253 |     # Store the args that should be immutable
254 |     config = build.GroqConfig(  # pylint: disable=unexpected-keyword-arg
255 |         build_name=of_config.build_name,
256 |         auto_name=of_config.auto_name,
257 |         compiler_flags=compiler_flags,
258 |         assembler_flags=assembler_flags,
259 |         groqview=groqview,
260 |         groqcard=groqcard,
261 |         topology=topology,
262 |         num_chips=num_chips,
263 |         sequence=of_config.sequence,
264 |         onnx_opset=of_config.onnx_opset,
265 |     )
266 | 
267 |     return config
268 | 
269 | 
270 | def _validate_cached_model(
271 |     config: build.GroqConfig,
272 |     model_type: of_build.ModelType,
273 |     state: build.GroqState,
274 |     model: of_build.UnionValidModelInstanceTypes = None,
275 |     inputs: Optional[Dict[str, Any]] = None,
276 | ) -> List[str]:
277 |     """
278 |     Verify whether anything in the call to groqit changed
279 |     We require the user to resolve the discrepancy when such a
280 |     change occurs, so the purpose of this function is simply to
281 |     detect these conditions and raise an appropriate error.
282 |     If this function returns without raising an exception then
283 |     the cached model is valid to use in the build.
284 |     """
285 | 
286 |     result = of_ignition.validate_cached_model(
287 |         config=config,
288 |         model_type=model_type,
289 |         state=state,
290 |         model=model,
291 |         inputs=inputs,
292 |     )
293 | 
294 |     current_version_decoded = of_ignition.decode_version_number(groqflow_version)
295 |     state_version_decoded = of_ignition.decode_version_number(state.groqflow_version)
296 | 
297 |     out_of_date: Union[str, bool] = False
298 |     if current_version_decoded["major"] > state_version_decoded["major"]:
299 |         out_of_date = "major"
300 |     elif current_version_decoded["minor"] > state_version_decoded["minor"]:
301 |         out_of_date = "minor"
302 | 
303 |     if out_of_date:
304 |         msg = (
305 |             f"Your build {state.config.build_name} was previously built against "
306 |             f"GroqFlow version {state.groqflow_version}, "
307 |             f"however you are now using GroqFlow version {groqflow_version}. The previous build is "
308 |             f"incompatible with this version of GroqFlow, as indicated by the {out_of_date} "
309 |             "version number changing. See **docs/versioning.md** for details."
310 |         )
311 |         result.append(msg)
312 | 
313 |     return result
314 | 
315 | 
316 | def load_or_make_state(
317 |     config: build.GroqConfig,
318 |     cache_dir: str,
319 |     rebuild: str,
320 |     model_type: of_build.ModelType,
321 |     monitor: bool,
322 |     use_sdk: bool,
323 |     model: of_build.UnionValidModelInstanceTypes = None,
324 |     inputs: Optional[Dict[str, Any]] = None,
325 |     quantization_samples: Optional[Collection] = None,
326 | ) -> build.GroqState:
327 |     """
328 |     Decide whether we can load the model from the GroqFlow model cache
329 |     (return a valid State instance) or whether we need to rebuild it (return
330 |     a new State instance).
331 |     """
332 | 
333 |     return of_ignition.load_or_make_state(
334 |         config=config,
335 |         cache_dir=cache_dir,
336 |         rebuild=rebuild,
337 |         model_type=model_type,
338 |         monitor=monitor,
339 |         model=model,
340 |         inputs=inputs,
341 |         quantization_samples=quantization_samples,
342 |         state_type=build.GroqState,
343 |         cache_validation_func=_validate_cached_model,
344 |         extra_state_args={"use_sdk": use_sdk},
345 |     )
346 | 
347 | 
348 | groq_model_type_to_sequence = {
349 |     of_build.ModelType.PYTORCH: default_pytorch_sequence,
350 |     of_build.ModelType.KERAS: default_keras_sequence,
351 |     of_build.ModelType.ONNX_FILE: default_onnx_sequence,
352 |     of_build.ModelType.HUMMINGBIRD: default_hummingbird_sequence,
353 | }
354 | 
355 | groq_model_type_to_sequence_with_quantization = {
356 |     of_build.ModelType.PYTORCH: pytorch_sequence_with_quantization,
357 | }
358 | 
359 | groq_model_type_torch_importer_override_to_sequence = {
360 |     of_build.ModelType.PYTORCH: pytorch_torch_importer_sequence,
361 |     of_build.ModelType.KERAS: default_keras_sequence,
362 |     of_build.ModelType.ONNX_FILE: default_onnx_sequence,
363 |     of_build.ModelType.HUMMINGBIRD: default_hummingbird_sequence,
364 | }
365 | 
366 | 
367 | def model_intake(
368 |     user_model,
369 |     user_inputs,
370 |     user_sequence: Optional[stage.Sequence],
371 |     config: build.GroqConfig,
372 |     user_quantization_samples: Optional[Collection] = None,
373 | ) -> Tuple[Any, Any, stage.Sequence, of_build.ModelType]:
374 | 
375 |     override_sequence_map = groq_model_type_to_sequence
376 |     if build.USE_TORCH_IMPORTER:
377 |         override_sequence_map = groq_model_type_torch_importer_override_to_sequence
378 | 
379 |     model, inputs, sequence, model_type = of_ignition.model_intake(
380 |         user_model=user_model,
381 |         user_inputs=user_inputs,
382 |         user_sequence=user_sequence,
383 |         user_quantization_samples=user_quantization_samples,
384 |         override_quantization_sequence_map=groq_model_type_to_sequence_with_quantization,
385 |         override_sequence_map=override_sequence_map,
386 |     )
387 | 
388 |     if "--auto-asm" in config.compiler_flags:
389 |         sequence.stages = [
390 |             stage
391 |             for stage in sequence.stages
392 |             if not isinstance(stage, compile.Assemble)
393 |         ]
394 | 
395 |     return (model, inputs, sequence, model_type)
396 | 


--------------------------------------------------------------------------------
/groqflow/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "4.3.1"
2 | 


--------------------------------------------------------------------------------
/license.md:
--------------------------------------------------------------------------------
1 | Copyright 2023 Groq Inc.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/proof_points/README.md:
--------------------------------------------------------------------------------
  1 | # GroqFlow Proof Points
  2 | 
  3 | Proof points demonstrate how GroqFlow is able to successfully build and execute a model on Groq hardware, while maintaining model accuracy. The models are organized by category.
  4 | 
  5 | - Computer Vision (CV)
  6 | - Natural Language Processing (NLP)
  7 | - Speech Processing
  8 | 
  9 | ## Table of Contents
 10 | 
 11 | - [Prerequisites](#prerequisites)
 12 | - [Support Matrix](#support-matrix)
 13 |   - [Computer Vision](#computer-vision)
 14 |   - [Natural Language Processing](#natural-language-processing)
 15 |   - [Speach Processing](#speech-processing)
 16 | - [Running a Script](#running-a-script)
 17 | - [Build and Evaluate on a Single Machine](#build-and-evaluate-on-a-single-machine)
 18 | - [Build and Evaluate on Separate Machines](#build-and-evaluate-on-separate-machines)
 19 | 
 20 | ## Prerequisites
 21 | 
 22 | The following tasks are required to enable running proof point scripts:
 23 | 
 24 | - Download and install the GroqWare Suite packages from from the [Groq Customer Portal](https://support.groq.com/):
 25 |   - `groq-devtools` package, for model development and builds
 26 |   - `groq-runtime` package, for running computations on hardware (Groq Hardware must be present to install)
 27 |   - If building and executing a proof point on the same host machine, download and install both of the above packages.
 28 | - Clone the [GroqFlow Repository](https://github.com/groq/groqflow)
 29 | - Set up and activate a `groqflow` environment
 30 |   - Follow the [GroqFlow Installation Guide](https://github.com/groq/groqflow/blob/main/docs/install.md)
 31 | - Pip install the helper files for the proof points
 32 |   - `pip install -e {path_to}/groqflow/demo_helpers`
 33 | 
 34 | ## Support Matrix
 35 | 
 36 | The following relates the proof point models with the version of the GroqWare Suite (SDK) in which they are supported.
 37 | 
 38 | ### Computer Vision
 39 | 
 40 | | Proof Point Model | Supported SDK Version|
 41 | |:------------------|:------------------------|
 42 | | [DeiT-tiny](computer_vision/deit/) | >=0.9.2.1
 43 | | [GoogleNet](computer_vision/googlenet/) | >=0.9.2.1
 44 | | [MobileNetV2](computer_vision/mobilenetv2/) | >=0.9.2.1
 45 | | [ResNet50](computer_vision/resnet50/) | >=0.9.2.1
 46 | | [SqueezeNet](computer_vision/squeezenet/) | >=0.9.2.1
 47 | | [Yolo v6](computer_vision/yolo/) | >=0.11.0
 48 | 
 49 | ### Natural Language Processing
 50 | 
 51 | | Proof Point Model | Supported SDK Version(s)|
 52 | |:------------------|:------------------------|
 53 | | [Bert Tiny](natural_language_processing/bert/) | >=0.9.2.1
 54 | | [Bert Base](natural_language_processing/bert/) | >=0.9.2.1
 55 | | [Bert Quantized](natural_language_processing/bert/) | 0.9.2.1
 56 | | [DistilBERT](natural_language_processing/distilbert/) | >=0.9.2.1
 57 | | [ELECTRA](natural_language_processing/electra/) | >=0.9.2.1
 58 | | [MiniLM v2](natural_language_processing/minilm/) | >=0.9.2.1
 59 | | [RoBERTa](natural_language_processing/roberta/) | >=0.9.2.1
 60 | 
 61 | ### Speech Processing
 62 | 
 63 | | Proof Point Model | Supported SDK Version|
 64 | |:------------------|:------------------------|
 65 | | [M5](speech/m5/) | >=0.9.2.1
 66 | 
 67 | ## Running A Script
 68 | 
 69 | Each proof point will first build a GroqModel, and then evaluate the model on both a CPU and Groq hardware. If access to Groq hardware is available, the build and model evaluation steps can be run in a single step. However, a two step process has also been provided in case resource management requires that the build and evaluation steps be carried out on separate machines. Provided here are the general steps to run a script, but each proof point has a README with that provides any requirements and features that are specific to the model.
 70 | 
 71 | **Note**: Builds for large models can take several minutes. To avoid a time commitment surprise, the build time is included in the README for each proof point.
 72 | 
 73 | ## Build and Evaluate on a Single Machine
 74 | 
 75 | Navigate to the folder containing the proof point and read the model's details in the `README`.
 76 | 
 77 | - Install the `requirement.txt` file.
 78 | 
 79 |   ```bash
 80 |   pip install -r requirements.txt
 81 |   ```
 82 | 
 83 | - Build and evaluate the proof point:
 84 | 
 85 |   ```bash
 86 |   python {proof_point_name}.py
 87 |   ```
 88 | 
 89 | ## Build and Evaluate on Separate Machines
 90 | 
 91 | Navigate to the folder containing the proof point and read the model's details in the `README`.
 92 | 
 93 | - Install the `requirement.txt` file.
 94 | 
 95 |   ```bash
 96 |   pip install -r requirements.txt
 97 |   ```
 98 | 
 99 | - Build the model by running the command with the `--build` flag as shown below:
100 | 
101 |   ```bash
102 |   python {proof_point_name}.py --build
103 |   ```
104 | 
105 |   - If the model already exists in cache, it will not be rebuilt unless the model code or build changes.
106 | - Transfer the proof point script and the `.iop` files to the machine connected to Groq Hardware.
107 |   - The resulting build artifacts will be located in the GroqFlow Cache directory for the proof point, `~/.cache/groqflow/{proof_point_name}`. These artifacts include log files, ONNX files, inputs, the yaml state file, and the compile folder.
108 |   - The `.iop` files can be found within the compile folder in the cache directory. There will be a file for each card used to execute the model on Groq hardware. Copy these files to the same location on the second machine:
109 | 
110 |     `~/.cache/groqflow/{proof_point_name}/compile/*.iop`
111 | 
112 | - Once the proof point is copied to the same cache directory and the initial prerequisites are met, the script can be run a second time with the `--execute` flag to evaluate the model.
113 | 
114 |   ```bash
115 |   python {proof_point_name}.py --execute
116 |   ```
117 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/deit/README.md:
--------------------------------------------------------------------------------
 1 | # DeiT
 2 | 
 3 | The [DeiT](https://arxiv.org/abs/2012.12877) (Data-efficient image Transformer) is a convolution-free transformer model designed for computer vision. DeiT models are efficiently trained for image classification using a novel token distillation process that can learn more from a convolutional teacher model than a transformer teacher. DeiT models also require less data to train than the original Vision Transformers [(ViT)](https://arxiv.org/abs/2010.11929v2).
 4 | 
 5 | This proof point obtains a [pre-trained DeiT-tiny](https://huggingface.co/facebook/deit-tiny-patch16-224) from Hugging Face for the task of Image Classification. The model implementations are evaluated using the 10-class [Imagenette dataset](https://github.com/fastai/imagenette) which is a sampling from the [ImageNet dataset](https://www.image-net.org/). The tiny version of the DeiT model illustrates the ability of GroqFlow™ and GroqWare™ Suite to support all of the necessary operations used to build and run the ConvNeXt models.
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | - Ensure you've completed the install prerequisites:
10 |   - Installed GroqWare™ Suite
11 |   - Installed GroqFlow
12 |   - Installed Groq Demo Helpers
13 |     - For more information on these steps, see the [Proof Points README](../../README.md).
14 | - Install the python dependencies using the requirements.txt file included with this proof point using the following command:
15 | 
16 |   ```bash
17 |   pip install -r requirements.txt
18 |   ```
19 | 
20 | ## Build and Evaluate
21 | 
22 | To build and evaluate DeiT-tiny:
23 | 
24 |   ```bash
25 |   python deit_tiny.py
26 |   ```
27 | 
28 | **Note:** The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
29 | 
30 | ## Expected Results
31 | 
32 | It takes approximately 4 minutes for DeiT-tiny to build and about 2 minutes to evaluate the implementation accuracies. The script returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation on a GroqCard™ accelerator.
33 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/deit/deit_tiny.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example downloads a pre-trained DeiT tiny from Hugging
 3 | Face (https://huggingface.co/facebook/deit-tiny-patch16-224) and
 4 | executes against Imagenette, the 10-class, sampled ImageNet
 5 | dataset (https://github.com/fastai/imagenette) on CPU and
 6 | GroqChip™ processor by using the GroqFlow toolchain.
 7 | """
 8 | import torch
 9 | from transformers import ViTForImageClassification
10 | 
11 | from groqflow import groqit
12 | from demo_helpers.compute_performance import compute_performance
13 | from demo_helpers.args import parse_args
14 | 
15 | 
16 | def evaluate_deit_tiny(rebuild_policy=None, should_execute=True):
17 |     # load torch model
18 |     model = ViTForImageClassification.from_pretrained(
19 |         "facebook/deit-tiny-patch16-224", torchscript=True
20 |     )
21 |     model.eval()
22 | 
23 |     # create dummy inputs to prime groq model
24 |     dummy_inputs = {"pixel_values": torch.ones([1, 3, 224, 224])}
25 | 
26 |     # generate groq model
27 |     groq_model = groqit(model, dummy_inputs, rebuild=rebuild_policy)
28 | 
29 |     # compute performance on CPU and GroqChip
30 |     if should_execute:
31 |         compute_performance(
32 |             groq_model,
33 |             model,
34 |             dataset="sampled_imagenet",
35 |             task="classification",
36 |         )
37 |     print(f"Proof point {__file__} finished!")
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     evaluate_deit_tiny(**parse_args())
42 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/deit/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.12.0
2 | transformers>=4.20.0
3 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/googlenet/README.md:
--------------------------------------------------------------------------------
 1 | # GoogLeNet
 2 | 
 3 | [GoogLeNet](https://arxiv.org/abs/1409.4842v1) is the convolutional neural network (CNN) based on the Inception architecture that received top marks in the ImageNet Large-Scale Visual Recognition Challenge 2014 ([ILSVRC 2014](https://www.image-net.org/challenges/LSVRC/2014/)). The stacked Inception modules applied multiple convolutional filter sizes (1x1, 3x3, & 5x5) before aggregating the results so that the next stage could simultaneously extract features of varying scale. The number of parameters and computational complexity were kept in check by using 1x1 convolution layers before the larger filters to reduce the layer dimensions before convolving over large patch sizes.
 4 | 
 5 | In this proof point, GoogleNet is used for the task of image classification and evaluated using the Imagenette [dataset](https://github.com/fastai/imagenette), a 10 class, sampled version of the ImageNet [dataset](https://www.image-net.org/). The model weights are downloaded from the [PyTorch website](https://pytorch.org/hub/pytorch_vision_googlenet/).
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | - Ensure you've completed the install prerequisites:
10 |   - Installed GroqWare™ Suite
11 |   - Installed GroqFlow
12 |   - Installed Groq Demo Helpers
13 |     - For more information on these steps, see the [Proof Points README](../../README.md).
14 | - Install the python dependencies using the requirements.txt file included with this proof point using the following command:
15 | 
16 |   ```bash
17 |   pip install -r requirements.txt
18 |   ```
19 | 
20 | ## Build and Evaluate
21 | 
22 | To build and evaluate GoogLeNet:
23 | 
24 |   ```bash
25 |   python googlenet.py
26 |   ```
27 | 
28 | **Note:** The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
29 | 
30 | ## Expected Results
31 | 
32 | It takes approximately 6 minutes for GoogLeNet to build and about 2 minutes to evaluate the implementation accuracies. The script returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation on a GroqCard™ accelerator.
33 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/googlenet/googlenet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes a pre-trained GoogLeNet model
 3 | (https://pytorch.org/hub/pytorch_vision_googlenet/) and
 4 | executes against Imagenette, the 10-class, sampled ImageNet
 5 | dataset (https://github.com/fastai/imagenette) on CPU and
 6 | GroqChip™ processor by using the GroqFlow toolchain.
 7 | """
 8 | import torch
 9 | 
10 | from demo_helpers.compute_performance import compute_performance
11 | from demo_helpers.args import parse_args
12 | from groqflow import groqit
13 | 
14 | 
15 | def evaluate_googlenet(rebuild_policy=None, should_execute=None):
16 |     # set seed for consistency
17 |     torch.manual_seed(0)
18 | 
19 |     # load torch model
20 |     torch_model = torch.hub.load(
21 |         "pytorch/vision:v0.10.0", "googlenet", weights="GoogLeNet_Weights.DEFAULT"
22 |     )
23 |     torch_model.eval()  # disable training specific layers
24 | 
25 |     # create dummy input to prime groq model
26 |     dummy_inputs = torch.randn((1, 3, 224, 224), dtype=torch.float32)
27 | 
28 |     # generate groq model
29 |     build_name = "googlenet"
30 |     groq_model = groqit(
31 |         torch_model,
32 |         {"x": dummy_inputs},
33 |         rebuild=rebuild_policy,
34 |         build_name=build_name,
35 |     )
36 | 
37 |     # compute performance on CPU and GroqChip
38 |     if should_execute:
39 |         compute_performance(
40 |             groq_model, torch_model, "sampled_imagenet", task="classification"
41 |         )
42 | 
43 |     print(f"Proof point {__file__} finished!")
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     evaluate_googlenet(**parse_args())
48 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/googlenet/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.12.0
2 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/mobilenetv2/README.md:
--------------------------------------------------------------------------------
 1 | # MobileNetV2
 2 | 
 3 | [MobileNetV2](https://arxiv.org/abs/1801.04381) is a CNN model that was designed to perform well on mobile devices. The architecture makes use of an inverted residual structure where the residual connections are between the bottleneck layers. The intermediate expansion layer uses lightweight depthwise convolutions to filter features as a source of non-linearity and to reduce the memory footprint of the model.
 4 | 
 5 | This proof point uses a [MobileNet V2 model]((https://pytorch.org/hub/pytorch_vision_mobilenet_v2/)) pre-trained on the [ImageNet dataset](https://www.image-net.org/) and downloaded from PyTorch's model hub. The model is evaluated on the sampled, 10 class version of the ImageNet dataset, [Imagenette](https://github.com/fastai/imagenette).
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | - Ensure you've completed the install prerequisites:
10 |   - Installed the GroqWare™ Suite
11 |   - Installed GroqFlow
12 |   - Installed Groq Demo Helpers
13 |     - For more information on these steps, see the [Proof Points README](../../README.md).
14 | - Install the python dependencies using the requirements.txt file included with this proof point using the following command:
15 | 
16 |   ```bash
17 |   pip install -r requirements.txt
18 |   ```
19 | 
20 | # Build and Evaluate
21 | 
22 | To build and evaluate MobileNetV2:
23 | 
24 |   ```bash
25 |   python mobilenetv2.py
26 |   ```
27 | 
28 | **Note:** The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
29 | 
30 | ## Expected Results
31 | 
32 | It takes approximately 12 minutes for MobileNetV2 to build and about 2 minutes to evaluate the implementation accuracies. The script returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation using a single GroqCard™ accelerator.
33 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/mobilenetv2/mobilenetv2.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes a pre-trained MobileNetV2 model from
 3 | https://pytorch.org/hub/pytorch_vision_mobilenet_v2/, and
 4 | executes against the 10-class, sampled ImageNet dataset, Imagenette
 5 | (https://github.com/fastai/imagenette) on CPU and GroqChip™
 6 | processor by using the GroqFlow toolchain.
 7 | """
 8 | 
 9 | import torch
10 | 
11 | from demo_helpers.compute_performance import compute_performance
12 | from demo_helpers.args import parse_args
13 | from groqflow import groqit
14 | 
15 | 
16 | def evaluate_mobilenetv2(rebuild_policy=None, should_execute=None):
17 |     # set seed for consistency
18 |     torch.manual_seed(0)
19 | 
20 |     # load torch model
21 |     torch_model = torch.hub.load(
22 |         "pytorch/vision:v0.10.0",
23 |         "mobilenet_v2",
24 |         weights="MobileNet_V2_Weights.IMAGENET1K_V1",
25 |     )
26 |     torch_model.eval()  # disable normalization and dropout layers
27 | 
28 |     # create dummy input to prime groq model
29 |     dummy_inputs = torch.randn((1, 3, 224, 224), dtype=torch.float32)
30 | 
31 |     # generate groq model
32 |     build_name = "mobilenetv2"
33 |     groq_model = groqit(
34 |         torch_model,
35 |         {"x": dummy_inputs},
36 |         rebuild=rebuild_policy,
37 |         build_name=build_name,
38 |     )
39 | 
40 |     # compute performance on CPU and GroqChip
41 |     if should_execute:
42 |         compute_performance(
43 |             groq_model, torch_model, "sampled_imagenet", task="classification"
44 |         )
45 | 
46 |     print(f"Proof point {__file__} finished!")
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     evaluate_mobilenetv2(**parse_args())
51 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/mobilenetv2/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.12.0
2 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/resnet50/README.md:
--------------------------------------------------------------------------------
 1 | # ResNet50
 2 | 
 3 | ResNet50 is a Convolutional Neural Network (CNN) model used for image classification. Kaiming He, et al. first introduced ResNet models and the revolutionary residual connection (also known as skip connection) in their 2015 paper, [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385). The residual connection enables easier optimization and better accuracy while training deep models.
 4 | 
 5 | This proof point uses a [ResNet50 model](https://pytorch.org/hub/pytorch_vision_resnet/) pre-trained on the [ImageNet dataset](https://www.image-net.org/) and downloaded from PyTorch's model hub. The model is evaluated on the sampled, 10 class version of the ImageNet dataset, [Imagenette](https://github.com/fastai/imagenette).
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | - Ensure you've completed the install prerequisites:
10 |   - Installed the GroqWare™ Suite
11 |   - Installed GroqFlow
12 |   - Installed Groq Demo Helpers
13 |     - For more information on these steps, see the [Proof Points README](../../README.md).
14 | - Install the python dependencies for this proof point with the following:
15 | 
16 |   ```bash
17 |   pip install -r requirements.txt
18 |   ```
19 | 
20 | ## Build and Evaluate
21 | 
22 | To build and evaluate ResNet50:
23 | 
24 |   ```bash
25 |   python resnet50.py
26 |   ```
27 | 
28 | **Note:** The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
29 | 
30 | ## Expected Results
31 | 
32 | It takes approximately 18 minutes for ResNet50 to build and about 3 minutes to evaluate the implementation accuracies. The script returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation using a single GroqCard™ accelerator.
33 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/resnet50/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.12.0
2 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/resnet50/resnet50.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes pre-trained ResNet-50 from torchvision and executes against
 3 | Imagenet1k on CPU and GroqChip1 through GroqFlow.
 4 | """
 5 | 
 6 | from groqflow import groqit
 7 | from demo_helpers.args import parse_args
 8 | from demo_helpers.compute_performance import compute_performance
 9 | 
10 | import torch
11 | 
12 | 
13 | def get_model():
14 |     """PyTorch Model setup."""
15 |     pytorch_model = torch.hub.load(
16 |         "pytorch/vision:v0.10.0", "resnet50", weights="ResNet50_Weights.IMAGENET1K_V1"
17 |     )
18 |     return pytorch_model.eval()
19 | 
20 | 
21 | def evaluate_resnet50(rebuild_policy=None, should_execute=True):
22 |     pytorch_model = get_model()
23 |     dummy_inputs = {"x": torch.ones([1, 3, 224, 224])}
24 | 
25 |     # Get Groq Model using groqit
26 |     groq_model = groqit(pytorch_model, dummy_inputs, rebuild=rebuild_policy)
27 | 
28 |     # Execute PyTorch model on CPU, Groq Model and print accuracy
29 |     if should_execute:
30 |         compute_performance(
31 |             groq_model, pytorch_model, "sampled_imagenet", task="classification"
32 |         )
33 | 
34 |     print(f"Proof point {__file__} finished!")
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     evaluate_resnet50(**parse_args())
39 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/squeezenet/README.md:
--------------------------------------------------------------------------------
 1 | # SqueezeNet
 2 | 
 3 | [SqueezeNet](https://arxiv.org/abs/1602.07360?context=cs) is advertised as a small convolutional neural network (CNN) that achieves "AlexNet level accuracy on ImageNet with 50x fewer parameters" as quoted in the linked paper. SqueezeNet models are highly efficient in terms of size and speed while providing relatively good accuracies. This makes them ideal for platforms with strict constraints on size.
 4 | 
 5 | In this proof point, SqueezeNet is performing image classification. It is evaluated on the [Imagenette dataset](https://github.com/fastai/imagenette) which is a sampled, 10 class version of the [ImageNet dataset](https://www.image-net.org/). The model weights will be downloaded from the [PyTorch website](https://pytorch.org/hub/pytorch_vision_squeezenet/).
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | - Ensure you've completed the install prerequisites:
10 |   - Installed the GroqWare™ Suite
11 |   - Installed GroqFlow
12 |   - Installed Groq Demo Helpers
13 |     - For more information on these steps, see the [Proof Points README](../../README.md).
14 | - Install the python dependencies for this proof point with the following:
15 | 
16 |   ```bash
17 |   pip install -r requirements.txt
18 |   ```
19 | 
20 | ## Build and Evaluate
21 | 
22 | To build and evaluate SqueezeNet:
23 | 
24 |   ```bash
25 |   python squeezenet.py
26 |   ```
27 | 
28 | **Note:** The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
29 | 
30 | ## Expected Results
31 | 
32 | It takes approximately 5 minutes for SqueezeNet to build and about 1 minute to evaluate the implementation accuracies. The script returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation using a single GroqCard™ accelerator.
33 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/squeezenet/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.12.1
2 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/squeezenet/squeezenet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes a pre-trained SqueezeNet model and
 3 | executes against the Imagenette dataset on a CPU and GroqChip™ processor
 4 | by using the GroqFlow toolchain.
 5 | """
 6 | 
 7 | import torch
 8 | 
 9 | from demo_helpers.compute_performance import compute_performance
10 | from demo_helpers.args import parse_args
11 | from groqflow import groqit
12 | 
13 | 
14 | def evaluate_squeezenet(rebuild_policy=None, should_execute=None):
15 |     # set seed for consistency
16 |     torch.manual_seed(0)
17 | 
18 |     # load torch model
19 |     torch_model = torch.hub.load(
20 |         "pytorch/vision:v0.10.0",
21 |         "squeezenet1_0",
22 |         weights="SqueezeNet1_0_Weights.DEFAULT",
23 |     )
24 | 
25 |     # create dummy inputs to prime groq model
26 |     dummy_inputs = torch.randn((1, 3, 224, 224), dtype=torch.float32)
27 | 
28 |     # generate groq model
29 |     build_name = "squeezenet"
30 |     groq_model = groqit(
31 |         torch_model,
32 |         {"x": dummy_inputs},
33 |         rebuild=rebuild_policy,
34 |         build_name=build_name,
35 |     )
36 | 
37 |     # compute performance on CPU and GroqChip
38 |     if should_execute:
39 |         compute_performance(
40 |             groq_model, torch_model, "sampled_imagenet", task="classification"
41 |         )
42 | 
43 |     print(f"Proof point {__file__} finished!")
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     evaluate_squeezenet(**parse_args())
48 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/yolo/README.md:
--------------------------------------------------------------------------------
 1 | # YOLO v6
 2 | 
 3 | YOLOv6 is a Convolutional Neural Network (CNN) model used for [Object Detection](https://en.wikipedia.org/wiki/Object_detection). It is an extension of the original YOLO model developed by [Joseph Redmon](https://pjreddie.com/), et al. in their 2015 paper, [You Only Look Once: Unified, Real-Time Object Detection](https://arxiv.org/abs/1506.02640). The key innovation of YOLO is the improved efficiency in inference speed and computation compared to other object detection models. Yolo locates the objects in an image and classifies them in a single "look". Other state of the art image detection models use a many module approach which required separate steps to first identify possible objects and then another to classify located objects. Redmon argued that this required multiple "looks" at an image and while it could achieved good results, they were larger, more computationally intense, and therefore slower.
 4 | 
 5 | This variation of YOLO  was released by the Meituan Vision AI Department and [published on github](https://github.com/meituan/YOLOv6) in different sizes ranging from YOLOv6-nano at 4.3M parameters to YOLOv6-large at 58.5M parameters. This proof point compiles the YOLOv6-nano model for an input size of 640 X 640 pixels.
 6 | 
 7 | This proof point evaluates YOLOv6-nano on the [COCO dataset](https://cocodataset.org/). The success of the model is measured using the "mAP @ 0.5:0.95" metric, which computes an average mAP (Mean Average Precision) using different IoU (Intersection over Union) thresholds varying from 0.5 to 0.95. An explanation of this evaluation method can also be found at the COCO website under the [Evaluate tab](https://cocodataset.org/#detection-eval).
 8 | 
 9 | ## Prerequisites
10 | 
11 | - Ensure you've completed the install prerequisites:
12 |   - Installed the GroqWare™ Suite
13 |   - Installed GroqFlow
14 |   - Installed Groq Demo Helpers
15 |     - For more information on these steps, see the [Proof Points README](../../README.md).
16 | - Install the python dependencies for this proof point with the following:
17 | 
18 |   ```bash
19 |   pip install -r requirements.txt
20 |   ```
21 | 
22 | ## Build and Evaluate
23 | 
24 | To build and evaluate YOLOv6-nano:
25 | 
26 |   ```bash
27 |   python yolov6_nano.py
28 |   ```
29 | 
30 | **Note:** The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
31 | 
32 | ## Expected Results
33 | 
34 | It takes approximately 60 minutes for YOLOv6 to build and about 10 minutes to evaluate the implementation accuracies. The script returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation using a single GroqCard™ accelerator.
35 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/yolo/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python>=4.1.2
2 | pycocotools>=2.0
3 | 


--------------------------------------------------------------------------------
/proof_points/computer_vision/yolo/yolov6_nano.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes pre-trained YOLOv6 model
 3 | (https://github.com/meituan/YOLOv6) and executes against
 4 | the COCO dataset (https://cocodataset.org/) on CPU and
 5 | GroqChip™ processor using the GroqFlow toolchain.
 6 | """
 7 | import torch
 8 | 
 9 | from groqflow import groqit
10 | from demo_helpers.args import parse_args
11 | from demo_helpers.compute_performance import compute_performance
12 | from demo_helpers.models import get_yolov6n_model
13 | from demo_helpers.misc import check_deps
14 | 
15 | 
16 | def evaluate_yolov6n(rebuild_policy=None, should_execute=True):
17 |     check_deps(__file__)
18 |     model = get_yolov6n_model()
19 |     dummy_inputs = {"images": torch.ones([1, 3, 640, 640])}
20 | 
21 |     # Get Groq Model using groqit
22 |     groq_model = groqit(
23 |         model,
24 |         dummy_inputs,
25 |         rebuild=rebuild_policy,
26 |         compiler_flags=["--effort=high"],
27 |     )
28 |     if should_execute:
29 |         compute_performance(groq_model, model, "coco", task="coco_map")
30 | 
31 |     print(f"Proof point {__file__} finished!")
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     evaluate_yolov6n(**parse_args())
36 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/bert/README.md:
--------------------------------------------------------------------------------
 1 | # BERT
 2 | 
 3 | This folder contains proof points that demonstrate two variants of the Natural Language processing model, [BERT](https://arxiv.org/pdf/1810.04805.pdf): BERT-tiny, and BERT-base. BERT is a bidirectional transformer architecture pretrained using Masked Language Modeling. The success of these proof points illustrate the ability of GroqFlow and the GroqWare™ Suite to support both the operations and size of the classic transformer architecture used by BERT models.
 4 | 
 5 | BERT-tiny is a small (tiny, even!) variant of the BERT architecture. The paper, [Well-Read Students Learn Better: On the Importance of Pre-training Compact Models](https://arxiv.org/pdf/1908.08962.pdf), introduces BERT-tiny along with other BERT variants of reduced size: BERT-mini, BERT-small, and BERT-medium.  They are studied further in the paper [Generalization in NLI: Ways (Not) To Go Beyond Simple Heuristics](https://arxiv.org/pdf/2110.01518.pdf)
 6 | 
 7 | The Bert-tiny proof point uses a model fine-tuned on the [Stanford Sentiment Treebank (SST) dataset](https://paperswithcode.com/dataset/sst), loaded from [Huggingface](https://huggingface.co/M-FAC/bert-tiny-finetuned-sst2) to perform [Sentiment Classification](https://paperswithcode.com/task/sentiment-analysis).
 8 | 
 9 | The BERT-base proof point also uses a pre-trained model that is fine-tuned on the SST dataset for Sentiment Classification. [Huggingface](https://huggingface.co/howey/bert-base-uncased-sst2) provides the BERT-base model.
10 | 
11 | The Bert-quantize proof point performs post training quantization on the BERT-base model specified above. 1000 data samples from the SST Sentiment Classification dataset are chosen at random to determine the quantization parameters.
12 | 
13 | ## Prerequisites
14 | 
15 | - Ensure you've completed the install prerequisites:
16 |   - Installed GroqWare™ Suite
17 |   - Installed GroqFlow
18 |   - Installed Groq Demo Helpers
19 |     - For more information on these steps, see the [Proof Points README](../../README.md).
20 | - Install the python dependencies using the requirements.txt file included with this proof point using the following command:
21 | 
22 |   ```bash
23 |   pip install -r requirements.txt
24 |   ```
25 | 
26 | ## Build and Evaluate
27 | 
28 | To build and evaluate BERT-tiny:
29 | 
30 |   ```bash
31 |   python bert_tiny.py
32 |   ```
33 | 
34 | To build and evaluate BERT-base:
35 | 
36 |   ```bash
37 |   python bert_base.py
38 |   ```
39 | 
40 | Note: The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
41 | 
42 | ## Expected Results
43 | 
44 |  Each script returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation. The table below details the approximate time to run each part of the script, and the required number of GroqCard™ accelerator.
45 | 
46 | | Proof Point Model | Approx Build Time | Approx Evaluation Time | Num of GroqCard™ Accelerators |
47 | |:-----------|:--------|:---------|:----------|
48 | | BERT-tiny | 1 min | 30 sec | 1 |
49 | | BERT-base | 15 min | 4 min | 4 |
50 | | BERT-quantize | 17 min | 4 min | 4 |
51 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/bert/bert_base.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes pre-trained Bert from Hugging Face
 3 | (https://huggingface.co/howey/bert-base-uncased-sst2) and
 4 | executes against SST dataset (https://paperswithcode.com/dataset/sst)
 5 | on CPU and GroqCard™ processor using the GroqFlow toolchain.
 6 | """
 7 | import os
 8 | import numpy as np
 9 | import torch
10 | import transformers
11 | from groqflow import groqit
12 | 
13 | from demo_helpers.compute_performance import compute_performance
14 | from demo_helpers.args import parse_args
15 | 
16 | 
17 | def get_model():
18 |     """PyTorch Model setup."""
19 |     pretrained_model_name = "howey/bert-base-uncased-sst2"
20 | 
21 |     tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained_model_name)
22 |     pytorch_model = transformers.AutoModelForSequenceClassification.from_pretrained(
23 |         pretrained_model_name, torchscript=True
24 |     )
25 | 
26 |     return pytorch_model.eval(), tokenizer
27 | 
28 | 
29 | def evaluate_bert(rebuild_policy=None, should_execute=True):
30 |     # set seed for consistency
31 |     np.random.seed(1)
32 |     torch.manual_seed(0)
33 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 |     # load pre-trained torch model
36 |     pytorch_model, tokenizer = get_model()
37 | 
38 |     # dummy inputs to generate the groq model
39 |     batch_size = 1
40 |     max_seq_length = 128
41 |     dummy_inputs = {
42 |         "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long),
43 |         "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.bool),
44 |     }
45 | 
46 |     # generate groq model
47 |     groq_model = groqit(pytorch_model, dummy_inputs, rebuild=rebuild_policy)
48 | 
49 |     # compute performance on CPU and GroqChip
50 |     if should_execute:
51 |         compute_performance(
52 |             groq_model,
53 |             pytorch_model,
54 |             dataset="sst",
55 |             tokenizer=tokenizer,
56 |             max_seq_length=max_seq_length,
57 |             task="classification",
58 |         )
59 | 
60 |     print(f"Proof point {__file__} finished!")
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     evaluate_bert(**parse_args())
65 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/bert/bert_quantize.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes pre-trained Bert from Hugging Face
 3 | (https://huggingface.co/howey/bert-base-uncased-sst2), performs
 4 | post-training quantization on the exported ONNX model, and
 5 | executes against SST dataset (https://paperswithcode.com/dataset/sst)
 6 | on CPU and GroqCard™ accelerator using the GroqFlow toolchain.
 7 | """
 8 | 
 9 | import os
10 | import numpy as np
11 | import torch
12 | import transformers
13 | from groqflow import groqit
14 | 
15 | from demo_helpers.compute_performance import compute_performance
16 | from demo_helpers.args import parse_args
17 | from demo_helpers.dataset import get_sst_quantization_samples
18 | 
19 | from datasets import logging
20 | 
21 | logging.set_verbosity(logging.ERROR)
22 | 
23 | 
24 | def get_model():
25 |     """PyTorch Model setup."""
26 |     pretrained_model_name = "howey/bert-base-uncased-sst2"
27 | 
28 |     tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained_model_name)
29 |     pytorch_model = transformers.AutoModelForSequenceClassification.from_pretrained(
30 |         pretrained_model_name
31 |     )
32 | 
33 |     return pytorch_model.eval(), tokenizer
34 | 
35 | 
36 | def evaluate_bert(rebuild_policy=None, should_execute=True):
37 |     # set seed for consistency
38 |     np.random.seed(1)
39 |     torch.manual_seed(0)
40 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
41 | 
42 |     # load pre-trained torch model
43 |     pytorch_model, tokenizer = get_model()
44 | 
45 |     # dummy inputs to generate the groq model
46 |     batch_size = 1
47 |     max_seq_length = 128
48 |     dummy_inputs = {
49 |         "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.int32),
50 |         "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.bool),
51 |     }
52 | 
53 |     # process quantization sample data
54 |     x_train = get_sst_quantization_samples()
55 | 
56 |     # generate groq model
57 |     groq_model = groqit(
58 |         pytorch_model,
59 |         dummy_inputs,
60 |         rebuild=rebuild_policy,
61 |         quantization_samples=x_train,
62 |         compiler_flags=["--large-program"],
63 |     )
64 | 
65 |     if should_execute:
66 |         compute_performance(
67 |             groq_model,
68 |             pytorch_model,
69 |             dataset="sst-int32",
70 |             tokenizer=tokenizer,
71 |             max_seq_length=max_seq_length,
72 |             task="classification",
73 |         )
74 | 
75 |     print(f"Proof point {__file__} finished!")
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     evaluate_bert(**parse_args())
80 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/bert/bert_tiny.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes pre-trained Bert-tiny from Hugging Face
 3 | (https://huggingface.co/M-FAC/bert-tiny-finetuned-sst2) and
 4 | executes against SST dataset (https://paperswithcode.com/dataset/sston)
 5 | on CPU and GroqCard™ processor using the GroqFlow toolchain.
 6 | """
 7 | import os
 8 | import numpy as np
 9 | import torch
10 | import transformers
11 | from groqflow import groqit
12 | 
13 | from demo_helpers.compute_performance import compute_performance
14 | from demo_helpers.args import parse_args
15 | 
16 | 
17 | def get_model():
18 |     """PyTorch Model setup."""
19 |     pretrained_model_name = "M-FAC/bert-tiny-finetuned-sst2"
20 | 
21 |     tokenizer = transformers.AutoTokenizer.from_pretrained(pretrained_model_name)
22 |     pytorch_model = transformers.AutoModelForSequenceClassification.from_pretrained(
23 |         pretrained_model_name, torchscript=True
24 |     )
25 | 
26 |     return pytorch_model.eval(), tokenizer
27 | 
28 | 
29 | def evaluate_bert_tiny(rebuild_policy=None, should_execute=True):
30 |     # set seed for consistency
31 |     np.random.seed(1)
32 |     torch.manual_seed(0)
33 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 |     # load pre-trained torch model
36 |     pytorch_model, tokenizer = get_model()
37 | 
38 |     # dummy inputs to generate the groq model
39 |     batch_size = 1
40 |     max_seq_length = 128
41 |     dummy_inputs = {
42 |         "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long),
43 |         "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.bool),
44 |     }
45 | 
46 |     # generate groq model
47 |     groq_model = groqit(pytorch_model, dummy_inputs, rebuild=rebuild_policy)
48 | 
49 |     # compute performance on CPU and GroqChip
50 |     if should_execute:
51 |         compute_performance(
52 |             groq_model,
53 |             pytorch_model,
54 |             dataset="sst",
55 |             tokenizer=tokenizer,
56 |             max_seq_length=max_seq_length,
57 |             task="classification",
58 |         )
59 | 
60 |     print(f"Proof point {__file__} finished!")
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     evaluate_bert_tiny(**parse_args())
65 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/bert/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.21.6
2 | torch>=1.12.1
3 | transformers>=4.20.0
4 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/distilbert/README.md:
--------------------------------------------------------------------------------
 1 | # DistilBERT
 2 | 
 3 | [DistilBERT](https://arxiv.org/pdf/1910.01108.pdf) is a [distilled model](https://arxiv.org/pdf/1503.02531.pdf) using the [BERT model](https://arxiv.org/abs/1810.04805) as the teacher. DistilBERT has the same general architecture as BERT except half the layers, the pooler, and token embeddings are removed. This reduction in size allows the model to train faster and requires much less memory and power to run. DistilBert boasts that it retains 97% of the Bert model scores with 40% fewer parameters.
 4 | 
 5 | In this proof point, DistilBert performs the task of [Sentiment Classification](https://paperswithcode.com/task/sentiment-analysis) and is evaluated using the Stanford Sentiment Treebank [(SST) dataset](https://paperswithcode.com/dataset/sst). The model weights are downloaded from the [Hugging Face website](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | - Ensure you've completed the install prerequisites:
10 |   - Installed GroqWare™ Suite
11 |   - Installed GroqFlow
12 |   - Installed Groq Demo Helpers
13 |     - For more information on these steps, see the [Proof Points README](../../README.md).
14 | - Install the python dependencies using the requirements.txt file included with this proof point using the following command:
15 | 
16 |   ```bash
17 |   pip install -r requirements.txt
18 |   ```
19 | 
20 | ## Build and Evaluate
21 | 
22 | To build and evaluate DistilBERT:
23 | 
24 |   ```bash
25 |   python distilbert.py
26 |   ```
27 | 
28 | **Note:** The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
29 | 
30 | ## Expected Results
31 | 
32 | It takes approximately 8 minutes for DistilBERT to build and about 2 minutes to evaluate the model's accuracy. The example returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation on 4 GroqCard™ accelerators within a GroqNode™ server.
33 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/distilbert/distilbert.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from transformers import AutoTokenizer, DistilBertForSequenceClassification
 4 | import torch
 5 | from demo_helpers.compute_performance import compute_performance
 6 | from demo_helpers.args import parse_args
 7 | 
 8 | from groqflow import groqit
 9 | 
10 | 
11 | def evaluate_distilbert(rebuild_policy=None, should_execute=True):
12 |     # set seed for consistency
13 |     np.random.seed(1)
14 |     torch.manual_seed(0)
15 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
16 | 
17 |     # load pre-trained torch model
18 |     pretrained_model = "distilbert-base-uncased-finetuned-sst-2-english"
19 |     tokenizer = AutoTokenizer.from_pretrained(pretrained_model)
20 |     pytorch_model = DistilBertForSequenceClassification.from_pretrained(
21 |         pretrained_model, torchscript=True
22 |     )
23 | 
24 |     # dummy inputs to generate the groq model
25 |     batch_size = 1
26 |     max_seq_length = 128
27 | 
28 |     dummy_inputs = {
29 |         "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long),
30 |         "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.bool),
31 |     }
32 | 
33 |     # generate groq model
34 |     build_name = "distilbert"
35 |     groq_model = groqit(
36 |         pytorch_model,
37 |         dummy_inputs,
38 |         rebuild=rebuild_policy,
39 |         build_name=build_name,
40 |         num_chips=4,
41 |         compiler_flags=["--partition-mode=group-fit"],
42 |     )
43 | 
44 |     # compute performance on CPU and GroqChip
45 |     if should_execute:
46 |         compute_performance(
47 |             groq_model,
48 |             pytorch_model,
49 |             dataset="sst",
50 |             tokenizer=tokenizer,
51 |             max_seq_length=max_seq_length,
52 |             task="classification",
53 |         )
54 | 
55 |     print(f"Proof point {__file__} finished!")
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     evaluate_distilbert(**parse_args())
60 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/distilbert/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.12.0
2 | transformers>=4.20.0
3 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/electra/README.md:
--------------------------------------------------------------------------------
 1 | # ELECTRA
 2 | 
 3 | [ELECTRA](https://openreview.net/pdf?id=r1xMH1BtvB) uses a self-supervised pre-training method for language representation learning that is similar to a [Generative Adversarial Network (GAN)](https://en.wikipedia.org/wiki/Generative_adversarial_network), without the adversarial part. During pre-training, instead of masking an input token and learning what the masked token is, like many other NLP models, a few input tokens are replaced with tokens of similar meaning by a small generative network. Then, the edited input is fed into a discriminator network to learn to differentiate between the original and replacement tokens. After training, the generator network is discarded and the discriminator network is used for inference. With this architecture and training method, ELECTRA boasts that it learns more efficiently and meets or outperforms, in terms of accuracy, models that only learn the masked tokens.
 4 | 
 5 | In this proof point, ELECTRA is fine-tuned on the [Stanford Sentiment Treebank (SST) dataset](https://paperswithcode.com/dataset/sst), loaded from [Huggingface](https://huggingface.co/M-FAC/bert-tiny-finetuned-sst2), and performs the task of [Sentiment Classification](https://paperswithcode.com/task/sentiment-analysis).
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | - Ensure you've completed the install prerequisites:
10 |   - Installed the GroqWare™ Suite
11 |   - Installed GroqFlow
12 |   - Installed Groq Demo Helpers
13 |     - For more information on these steps, see the [Proof Points README](../../README.md).
14 | - Install the python dependencies using the requirements.txt file included with this proof point using the following command:
15 | 
16 |   ```bash
17 |   pip install -r requirements.txt
18 |   ```
19 | 
20 | ## Build and Evaluate
21 | 
22 | To build and evaluate ELECTRA:
23 | 
24 |   ```bash
25 |   python electra.py
26 |   ```
27 | 
28 | **Note:** The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
29 | 
30 | ## Expected Results
31 | 
32 | It takes approximately 15 minutes for ELECTRA to build and about 4 minutes to evaluate the implementation accuracies. The script returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation on 4 GroqCard™ accelerators within a GroqNode™ server.
33 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/electra/electra.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes pre-trained ELECTRA small v2 from
 3 | huggingface models repository and executes against SST dataset on CPU
 4 | and GroqChip1 through GroqFlow.
 5 | """
 6 | import os
 7 | import transformers
 8 | from groqflow import groqit
 9 | import torch
10 | import numpy as np
11 | 
12 | from demo_helpers.compute_performance import compute_performance
13 | from demo_helpers.args import parse_args
14 | 
15 | 
16 | def evaluate_electra(rebuild_policy=None, should_execute=True):
17 |     # set seed for consistency
18 |     np.random.seed(1)
19 |     torch.manual_seed(0)
20 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
21 | 
22 |     # load pre-trained torch model
23 |     pretrained_model_name = "howey/electra-base-sst2"
24 | 
25 |     tokenizer = transformers.ElectraTokenizerFast.from_pretrained(pretrained_model_name)
26 |     pytorch_model = transformers.ElectraForSequenceClassification.from_pretrained(
27 |         pretrained_model_name, torchscript=True
28 |     )
29 |     pytorch_model.eval()
30 | 
31 |     # dummy inputs to generate the groq model
32 |     batch_size = 1
33 |     max_seq_length = 128
34 |     dummy_inputs = {
35 |         "input_ids": torch.ones(batch_size, max_seq_length, dtype=torch.long),
36 |         "attention_mask": torch.ones(batch_size, max_seq_length, dtype=torch.bool),
37 |     }
38 | 
39 |     # generate groq model
40 |     groq_model = groqit(pytorch_model, dummy_inputs, rebuild=rebuild_policy)
41 | 
42 |     # compute performance on CPU and GroqChip
43 |     if should_execute:
44 |         compute_performance(
45 |             groq_model,
46 |             pytorch_model,
47 |             dataset="sst",
48 |             tokenizer=tokenizer,
49 |             max_seq_length=max_seq_length,
50 |             task="classification",
51 |         )
52 | 
53 |     print(f"Proof point {__file__} finished!")
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     evaluate_electra(**parse_args())
58 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/electra/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.12.0
2 | transformers>=4.20.0
3 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/minilm/README.md:
--------------------------------------------------------------------------------
 1 | # MiniLM v2
 2 | 
 3 | [MiniLM v2](https://arxiv.org/abs/2012.15828) is a [distilled model](https://arxiv.org/pdf/1503.02531.pdf) that employs a generalization of the deep self-attention distillation method that the authors of the linked paper introduced in their first paper [MiniLm](https://arxiv.org/abs/2002.10957). The distillation is generalized by employing multi-head self-attention distillation.
 4 | 
 5 | In this proof point, MiniLM v2 is used for the task of [sentence similarity](https://huggingface.co/tasks/sentence-similarity) and evaluated using the [machine translated multi-lingual](https://github.com/PhilipMay/stsb-multi-mt) version of the Semantic Textual Similarity [(STS) benchmark dataset](https://ixa2.si.ehu.eus/stswiki/index.php/STSbenchmark). Both the [model](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) and the [dataset](https://huggingface.co/datasets/stsb_multi_mt#citation-information) are downloaded from Hugging Face.
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | - Ensure you've completed the install prerequisites:
10 |   - Installed the GroqWare™ Suite
11 |   - Installed GroqFlow
12 |   - Installed Groq Demo Helpers
13 |     - For more information on these steps, see the [Proof Points README](../../README.md).
14 | - Install the python dependencies using the requirements.txt file included with this proof point using the following command:
15 | 
16 |   ```bash
17 |   pip install -r requirements.txt
18 |   ```
19 | 
20 | ## Build and Evaluate
21 | 
22 | To build and evaluate MiniLM v2:
23 | 
24 |   ```bash
25 |   python minilmv2.py
26 |   ```
27 | 
28 | **Note:** The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
29 | 
30 | ## Expected Results
31 | 
32 | It takes approximately 10 minutes for MiniLM v2 to build and about 1 minutes to evaluate the [Spearman Rank Correlation Coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient) for both implementations. The script returns the Spearman Rank Correlation Coefficients for both the PyTorch implementation on a CPU and the Groq implementation using a single GroqCard™ accelerator.
33 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/minilm/minilmv2.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes pre-trained MiniLM v2 from
 3 | huggingface models repository and executes against STS benchmark dataset
 4 | on CPU and GroqChip1 through GroqFlow.
 5 | """
 6 | import os
 7 | from transformers import AutoTokenizer, AutoModel
 8 | import torch
 9 | from demo_helpers.compute_performance import compute_performance
10 | from demo_helpers.args import parse_args
11 | 
12 | from groqflow import groqit
13 | 
14 | 
15 | def evaluate_minilm(rebuild_policy=None, should_execute=True):
16 |     # set seed for consistency
17 |     torch.manual_seed(0)
18 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
19 | 
20 |     # load pre-trained torch model
21 |     tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
22 |     model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
23 | 
24 |     # dummy inputs to generate the groq model
25 |     max_seq_length = 128
26 |     dummy_inputs = {
27 |         "input_ids": torch.ones((2, max_seq_length), dtype=torch.long),
28 |         "token_type_ids": torch.ones((2, max_seq_length), dtype=torch.long),
29 |         "attention_mask": torch.ones((2, max_seq_length), dtype=torch.bool),
30 |     }
31 | 
32 |     # generate groq model
33 |     groq_model = groqit(model, dummy_inputs, rebuild=rebuild_policy)
34 | 
35 |     # compute performance on CPU and GroqChip
36 |     if should_execute:
37 |         compute_performance(
38 |             groq_model,
39 |             model,
40 |             dataset="stsb_multi_mt",
41 |             tokenizer=tokenizer,
42 |             max_seq_length=max_seq_length,
43 |             task="sentence_similarity",
44 |         )
45 | 
46 |     print(f"Proof point {__file__} finished!")
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     evaluate_minilm(**parse_args())
51 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/minilm/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.12.0
2 | transformers>=4.20.0
3 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/roberta/README.md:
--------------------------------------------------------------------------------
 1 | # RoBERTa
 2 | 
 3 | [RoBERTa](https://arxiv.org/abs/1907.11692) is one of many derivatives of the [BERT model](https://arxiv.org/abs/1810.04805). Its name is an acronym created from the phrase, "Robustly optimized BERT approach". RoBERTa improves on BERT by hyperparameter tuning and altering the training recipe. Optimizations employed by RoBERTa include longer training with larger batch sizes, more data, longer sequence lengths, and dynamically changing masking patterns. As with many of the other BERT model variations, RoBERTa also removes the next sentence proposal (NSP) loss from the loss function.
 4 | 
 5 | In this proof point, RoBERTa is used for the task of [Named Entity Recognition](https://en.wikipedia.org/wiki/Named-entity_recognition) and evaluated using the [CoNLL-2003 dataset](https://paperswithcode.com/dataset/conll-2003). The model weights are downloaded from the [Hugging Face website](https://huggingface.co/dominiqueblok/roberta-base-finetuned-ner).
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | - Ensure you've completed the install prerequisites:
10 |   - Installed the GroqWare™ Suite
11 |   - Installed GroqFlow
12 |   - Installed Groq Demo Helpers
13 |     - For more information on these steps, see the [Proof Points README](../../README.md).
14 | - Install the python dependencies using the requirements.txt file included with this proof point using the following command:
15 | 
16 |   ```bash
17 |   pip install -r requirements.txt
18 |   ```
19 | 
20 | ## Build and Evaluate
21 | 
22 | To build and evaluate RoBERTa:
23 | 
24 |   ```bash
25 |   python roberta.py
26 |   ```
27 | 
28 | **Note:** The Proof Points directory [readme.md](../../README.md) details how to build and execute on two machines.
29 | 
30 | ## Expected Results
31 | 
32 | It takes approximately 15 minutes for RoBERTa to build and about 5 minutes to evaluate the implementation accuracies. The script returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation on 4 GroqCard™ accelerators within a GroqNode™ server.
33 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/roberta/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.12.0
2 | transformers>=4.20.0
3 | 


--------------------------------------------------------------------------------
/proof_points/natural_language_processing/roberta/roberta.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes a pre-trained RoBERTa model and executes
 3 | against CoNNL 2003 dataset on CPU and GroqChip™ processor by using
 4 | the GroqFlow toolchain. The model and data set can be downloaded
 5 | here: https://huggingface.co/dominiqueblok/roberta-base-finetuned-ner
 6 | """
 7 | 
 8 | import os
 9 | 
10 | import torch
11 | 
12 | from demo_helpers.compute_performance import compute_performance
13 | from demo_helpers.args import parse_args
14 | from groqflow import groqit
15 | from transformers import RobertaForTokenClassification, RobertaTokenizerFast
16 | 
17 | 
18 | def evaluate_roberta(rebuild_policy=None, should_execute=None):
19 |     # set seed for consistency
20 |     torch.manual_seed(0)
21 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
22 | 
23 |     # load pre-trained torch model
24 |     model_path = "dominiqueblok/roberta-base-finetuned-ner"
25 |     tokenizer = RobertaTokenizerFast.from_pretrained(model_path)
26 |     torch_model = RobertaForTokenClassification.from_pretrained(
27 |         model_path, torchscript=True
28 |     )
29 | 
30 |     # dummy inputs to generate the groq model
31 |     batch_size, max_seq_length = 1, 128
32 |     dummy_inputs = {
33 |         "input_ids": torch.ones((batch_size, max_seq_length), dtype=torch.long),
34 |         "attention_mask": torch.ones((batch_size, max_seq_length), dtype=torch.float),
35 |     }
36 | 
37 |     # generate groq model
38 |     build_name = "roberta"
39 |     groq_model = groqit(
40 |         torch_model,
41 |         dummy_inputs,
42 |         compiler_flags=["--large-program"],
43 |         rebuild=rebuild_policy,
44 |         build_name=build_name,
45 |     )
46 | 
47 |     # compute performance on CPU and GroqChip
48 |     if should_execute:
49 |         compute_performance(
50 |             groq_model,
51 |             torch_model,
52 |             dataset="conll2003",
53 |             tokenizer=tokenizer,
54 |             max_seq_length=max_seq_length,
55 |             task="ner",
56 |         )
57 | 
58 |     print(f"Proof point {__file__} finished!")
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     evaluate_roberta(**parse_args())
63 | 


--------------------------------------------------------------------------------
/proof_points/speech/m5/README.md:
--------------------------------------------------------------------------------
 1 | # M5
 2 | 
 3 | [M5](https://arxiv.org/abs/1610.00087) is a convolutional neural network (CNN) that works directly on raw audio waveform. Since M5 accepts raw data, there is no need to generate frequency spectrums, a required pre-processing step used by many audio/acoustic models.
 4 | 
 5 | This proof point uses the M5 model on the task of [Keyword Spotting](https://en.wikipedia.org/wiki/Keyword_spotting). The M5 adaptation for this task replaces the global average pool in the original M5 model with a fully connected layer; the architecture definition can be viewed in the [demo_helpers folder](../../../demo_helpers/models.py).
 6 | 
 7 | M5's Keyword Spotting accuracy is evaluated using the [SpeechCommands dataset](https://arxiv.org/abs/1804.03209) from PyTorch's `torchaudio.datasets` library.
 8 | 
 9 | ## Prerequisites
10 | 
11 | - Ensure you've completed the install prerequisites:
12 |   - Installed the GroqWare™ Suite
13 |   - Installed GroqFlow
14 |   - Installed Groq Demo Helpers
15 |     - For more information on these steps, see the [Proof Points README](../../README.md).
16 | - Install the python dependencies using the requirements.txt file included with this proof point using the following command:
17 | 
18 |   ```bash
19 |   pip install -r requirements.txt
20 |   ```
21 | 
22 | - Since this proofpoint uses audio files, often the audio libraries must be installed on system.
23 |   - For Ubuntu OS:
24 | 
25 |   ```bash
26 |   sudo apt install libsox-dev
27 |   ```
28 | 
29 |   - For Rocky OS:
30 | 
31 |   ```bash
32 |   sudo dnf install sox-devel
33 |   ```
34 | 
35 | ## Build and Evaluate
36 | 
37 | To build and evaluate M5:
38 | 
39 |   ```bash
40 |   python m5.py
41 |   ```
42 | 
43 | Note: The [Proof Points directory README](../../README.md) details how to build and execute on two machines.
44 | 
45 | ## Expected Results
46 | 
47 | It takes approximately 5 minutes for M5 to build and about 1 minute to evaluate the implementation accuracies. The script returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation using a single GroqCard™ accelerator.
48 | 


--------------------------------------------------------------------------------
/proof_points/speech/m5/m5.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following example takes a pre-trained M5 model and executes against
 3 | SpeechCommands dataset on CPU and GroqChip™ processor using Groqflow.
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | from demo_helpers.compute_performance import compute_performance
 9 | from demo_helpers.models import load_pretrained
10 | from demo_helpers.args import parse_args
11 | from groqflow import groqit
12 | 
13 | 
14 | def evaluate_m5(rebuild_policy=None, should_execute=True):
15 |     # set seed for consistency
16 |     torch.manual_seed(0)
17 | 
18 |     # load pre-trained torch model
19 |     torch_model = load_pretrained("m5")
20 |     torch_model.eval()
21 | 
22 |     # dummy inputs to generate groq model
23 |     dummy_input = torch.randn([1, 1, 16000])
24 | 
25 |     # generate groq_model
26 |     build_name = "m5"
27 |     groq_model = groqit(
28 |         torch_model, {"x": dummy_input}, rebuild=rebuild_policy, build_name=build_name
29 |     )
30 | 
31 |     # compute performance on CPU, GroqChip
32 |     if should_execute:
33 |         compute_performance(
34 |             groq_model,
35 |             torch_model,
36 |             dataset="speechcommands",
37 |             task="keyword_spotting",
38 |         )
39 | 
40 |     print(f"Proof point {__file__} finished!")
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     evaluate_m5(**parse_args())
45 | 


--------------------------------------------------------------------------------
/proof_points/speech/m5/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.12.1
2 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0"]
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("groqflow/version.py", encoding="utf-8") as fp:
 4 |     version = fp.read().split('"')[1]
 5 | 
 6 | setup(
 7 |     name="groqflow",
 8 |     version=version,
 9 |     description="GroqFlow toolchain library",
10 |     url="https://github.com/groq/groqflow",
11 |     author="Groq",
12 |     author_email="sales@groq.com",
13 |     license="MIT",
14 |     packages=find_packages(
15 |         exclude=["*.__pycache__.*"],
16 |     ),
17 |     install_requires=[
18 |         "mlagility==3.3.1",
19 |         "onnx==1.14.0",
20 |         "onnxruntime==1.15.1",
21 |         "protobuf==3.20.3",
22 |         "scikit-learn==1.1.1",
23 |         "torch==2.1.0",
24 |         "typeguard==4.0.0",
25 |     ],
26 |     extras_require={
27 |         "tensorflow": ["tensorflow-cpu>=2.8.1", "tf2onnx>=1.12.0"],
28 |     },
29 |     classifiers=[],
30 |     python_requires=">=3.8, <3.11",
31 |     long_description=open("README.md", "r", encoding="utf-8").read(),
32 |     long_description_content_type="text/markdown",
33 | )
34 | 


--------------------------------------------------------------------------------