├── .github ├── dependabot.yml └── workflows │ └── deploy.yml ├── .gitignore ├── .gitlab-ci.yml ├── .pre-commit-config.yaml ├── README.md ├── _config.yml ├── _toc.yml ├── callbacks.py ├── environment.yml ├── images ├── conifer_v1.png ├── conv2d_animation.gif ├── hls4ml_logo.svg ├── part5_floorplan.png ├── part7_block_design.png ├── part7_floorplan.png ├── reuse.png └── test.png ├── nn_utils.py ├── part1_getting_started.ipynb ├── part2_advanced_config.ipynb ├── part3_compression.ipynb ├── part4.1_HG_quantization.ipynb ├── part4_quantization.ipynb ├── part5_bdt.ipynb ├── part6_cnns.ipynb ├── part7a_bitstream.ipynb ├── part7b_deployment.ipynb ├── part7c_validation.ipynb ├── part8_symbolic_regression.ipynb ├── plotting.py ├── pruned_cnn ├── myproject_prj │ └── solution1 │ │ └── syn │ │ └── report │ │ └── myproject_csynth.rpt └── vivado_synth.rpt ├── quantized_pruned_cnn ├── myproject_prj │ └── solution1 │ │ └── syn │ │ └── report │ │ └── myproject_csynth.rpt └── vivado_synth.rpt └── sr └── example.pkl /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Maintain dependencies for GitHub Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "weekly" 8 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: deploy-book 2 | 3 | # Only run this when the master branch changes 4 | on: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | # This job installs dependencies, build the book, and pushes it to `gh-pages` 13 | jobs: 14 | deploy-book: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | # Install dependencies 20 | - name: Setup Miniconda 21 | uses: conda-incubator/setup-miniconda@v3 22 | with: 23 | miniforge-version: latest 24 | use-mamba: true 25 | channels: conda-forge 26 | activate-environment: hls4ml-tutorial 27 | environment-file: environment.yml 28 | python-version: 3.10.16 29 | auto-activate-base: false 30 | 31 | # Check dependencies 32 | - name: Check Miniconda 33 | shell: bash -l {0} 34 | run: | 35 | conda info 36 | conda list 37 | conda config --show-sources 38 | conda config --show 39 | printenv | sort 40 | 41 | - name: Build the book 42 | shell: bash -l {0} 43 | run: | 44 | jupyter contrib nbextension install --user 45 | jupyter nbextension enable --py widgetsnbextension 46 | jupyter-book build . 47 | 48 | - name: GitHub Pages action 49 | uses: peaceiris/actions-gh-pages@v4.0.0 50 | if: ${{ github.event_name != 'pull_request' }} 51 | with: 52 | github_token: ${{ secrets.GITHUB_TOKEN }} 53 | publish_dir: _build/html 54 | force_orphan: true 55 | user_name: 'github-actions[bot]' 56 | user_email: 'github-actions[bot]@users.noreply.github.com' 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | __pycache__ 3 | *~ 4 | *.npy 5 | _build 6 | model_1 7 | model_2 8 | model_3 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | image: gcr.io/kaniko-project/executor:debug 2 | 3 | stages: 4 | - build-and-push 5 | 6 | build-and-push-job: 7 | stage: build-and-push 8 | script: 9 | - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json 10 | - /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/docker/Dockerfile --destination $CI_REGISTRY_IMAGE/hls4ml-0.8.0:${CI_COMMIT_SHA:0:8} --destination $CI_REGISTRY_IMAGE/hls4ml-0.8.0:latest 11 | 12 | build-and-push-vivado-job: 13 | stage: build-and-push 14 | script: 15 | - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json 16 | - /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/docker/Dockerfile.vivado --destination $CI_REGISTRY_IMAGE/hls4ml-0.8.0-vivado-2019.1:${CI_COMMIT_SHA:0:8} --destination $CI_REGISTRY_IMAGE/hls4ml-0.8.0-vivado-2019.1:latest 17 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: .*\.rpt$ 2 | 3 | repos: 4 | - repo: https://github.com/psf/black 5 | rev: 25.1.0 6 | hooks: 7 | - id: black-jupyter 8 | language_version: python3 9 | args: ['--line-length=125', 10 | '--skip-string-normalization'] 11 | 12 | - repo: https://github.com/pre-commit/pre-commit-hooks 13 | rev: v5.0.0 14 | hooks: 15 | - id: check-added-large-files 16 | - id: check-case-conflict 17 | - id: check-merge-conflict 18 | - id: check-symlinks 19 | - id: check-yaml 20 | - id: debug-statements 21 | - id: end-of-file-fixer 22 | - id: mixed-line-ending 23 | - id: requirements-txt-fixer 24 | - id: trailing-whitespace 25 | 26 | - repo: https://github.com/PyCQA/isort 27 | rev: 6.0.1 28 | hooks: 29 | - id: isort 30 | args: ["--profile", "black", --line-length=125] 31 | 32 | - repo: https://github.com/asottile/pyupgrade 33 | rev: v3.19.1 34 | hooks: 35 | - id: pyupgrade 36 | args: ["--py36-plus"] 37 | 38 | - repo: https://github.com/asottile/setup-cfg-fmt 39 | rev: v2.8.0 40 | hooks: 41 | - id: setup-cfg-fmt 42 | 43 | - repo: https://github.com/pycqa/flake8 44 | rev: 7.2.0 45 | hooks: 46 | - id: flake8 47 | exclude: docs/conf.py 48 | additional_dependencies: [flake8-bugbear, flake8-print] 49 | args: ['--max-line-length=125', # github viewer width 50 | '--extend-ignore=E203,T201'] # E203 is not PEP8 compliant 51 | 52 | - repo: https://github.com/mgedmin/check-manifest 53 | rev: "0.50" 54 | hooks: 55 | - id: check-manifest 56 | stages: [manual] 57 | 58 | - repo: https://github.com/jmduarte/p-clang-format 59 | rev: "v1.0.4" 60 | hooks: 61 | - id: p-clang-format 62 | types_or: [c++, c, cuda] 63 | ci: 64 | autofix_commit_msg: '[pre-commit.ci] auto fixes from pre-commit hooks' 65 | autofix_prs: true # default is true 66 | autoupdate_branch: 'main' 67 | autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' 68 | autoupdate_schedule: weekly 69 | skip: [] 70 | submodules: true 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hls4ml-tutorial: Tutorial notebooks for `hls4ml` 2 | 3 | 4 | [![Jupyter Book Badge](https://jupyterbook.org/badge.svg)](https://fastmachinelearning.org/hls4ml-tutorial) 5 | ![deploy-book](https://github.com/fastmachinelearning/hls4ml-tutorial/actions/workflows/deploy.yml/badge.svg) 6 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 7 | [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) 8 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/fastmachinelearning/hls4ml-tutorial) 9 | 10 | 11 | There are several ways to run the tutorial notebooks: 12 | ## Online 13 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/fastmachinelearning/hls4ml-tutorial/HEAD) 14 | 15 | ## Conda 16 | Running the tutorials requires AMD Vitis HLS to be installed, see [here](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vitis.html). 17 | After the installation, the necessary environmental variables can be set using 18 | ``` 19 | source /path/to/your/installtion/Xilinx/Vitis_HLS/202X.X/settings64.(c)sh 20 | ``` 21 | 22 | The Python environment used for the tutorials is specified in the `environment.yml` file. 23 | It can be setup like: 24 | ```bash 25 | conda env create -f environment.yml 26 | conda activate hls4ml-tutorial 27 | source /path/to/your/installtion/Xilinx/Vitis_HLS/202X.X/settings64.(c)sh 28 | ``` 29 | 30 | Note that part 7 of the tutorial makes use of the `VivadoAccelator` backend of hls4ml for which no Vitis equivalent is available yet. For this part of the tutorial it is therefore necesary to install and source Vivado HLS version 2019.2 or 2020.1, which can be obtained [here](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/archive.html). 31 | 32 | ## Companion material 33 | We have prepared a set of slides with some introduction and more details on each of the exercises. 34 | Please find them [here](https://docs.google.com/presentation/d/1c4LvEc6yMByx2HJs8zUP5oxLtY6ACSizQdKvw5cg5Ck/edit?usp=sharing). 35 | 36 | 37 | ## Notebooks 38 | ```{tableofcontents} 39 | ``` 40 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | # Book settings 2 | # Learn more at https://jupyterbook.org/customize/config.html 3 | 4 | title: hls4ml tutorial 5 | author: Fast ML team 6 | logo: images/hls4ml_logo.svg 7 | favicon: images/hls4ml_logo.svg 8 | 9 | # Force re-execution of notebooks on each build. 10 | # See https://jupyterbook.org/content/execute.html 11 | execute: 12 | execute_notebooks: force 13 | timeout: -1 14 | 15 | # Define the name of the latex output file for PDF builds 16 | latex: 17 | latex_documents: 18 | targetname: book.tex 19 | 20 | # Information about where the book exists on the web 21 | repository: 22 | url: https://github.com/fastmachinelearning/hls4ml-tutorial # Online location of your book 23 | path_to_book: "" # Optional path to your book, relative to the repository root 24 | branch: main # Which branch of the repository should be used when creating links (optional) 25 | 26 | # Add GitHub buttons to your book 27 | # See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository 28 | html: 29 | use_issues_button: true 30 | use_repository_button: true 31 | baseurl: "https://fastmachinlearning.org/hls4ml-tutorial/" # The base URL where your book will be hosted. Used for creating image previews and social links. e.g.: https://mypage.com/mybook/ 32 | 33 | launch_buttons: 34 | binderhub_url: "https://mybinder.org" 35 | colab_url: "https://colab.research.google.com" 36 | -------------------------------------------------------------------------------- /_toc.yml: -------------------------------------------------------------------------------- 1 | format: jb-book 2 | root: README.md 3 | chapters: 4 | - file: part1_getting_started.ipynb 5 | - file: part2_advanced_config.ipynb 6 | - file: part3_compression.ipynb 7 | - file: part4_quantization.ipynb 8 | - file: part5_bdt.ipynb 9 | - file: part6_cnns.ipynb 10 | - file: part7a_bitstream.ipynb 11 | - file: part7b_deployment.ipynb 12 | - file: part7c_validation.ipynb 13 | - file: part8_symbolic_regression.ipynb 14 | -------------------------------------------------------------------------------- /callbacks.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 7 Apr 2017 3 | 4 | @author: jkiesele 5 | ''' 6 | 7 | import json 8 | 9 | # loss per epoch 10 | from time import time 11 | 12 | from tensorflow.keras.callbacks import Callback, EarlyStopping, History, ModelCheckpoint, ReduceLROnPlateau, TensorBoard 13 | 14 | 15 | class newline_callbacks_begin(Callback): 16 | def __init__(self, outputDir): 17 | self.outputDir = outputDir 18 | self.loss = [] 19 | self.val_loss = [] 20 | self.full_logs = [] 21 | 22 | def on_epoch_end(self, epoch, epoch_logs={}): # noqa: B006 23 | import os 24 | 25 | lossfile = os.path.join(self.outputDir, 'losses.log') 26 | print('\n***callbacks***\nsaving losses to ' + lossfile) 27 | self.loss.append(epoch_logs.get('loss')) 28 | self.val_loss.append(epoch_logs.get('val_loss')) 29 | f = open(lossfile, 'w') 30 | for i in range(len(self.loss)): 31 | f.write(str(self.loss[i])) 32 | f.write(" ") 33 | f.write(str(self.val_loss[i])) 34 | f.write("\n") 35 | f.close() 36 | normed = {} 37 | for vv in epoch_logs: 38 | normed[vv] = float(epoch_logs[vv]) 39 | self.full_logs.append(normed) 40 | lossfile = os.path.join(self.outputDir, 'full_info.log') 41 | with open(lossfile, 'w') as out: 42 | out.write(json.dumps(self.full_logs)) 43 | 44 | 45 | class newline_callbacks_end(Callback): 46 | def on_epoch_end(self, epoch, epoch_logs={}): # noqa: B006 47 | print('\n***callbacks end***\n') 48 | 49 | 50 | class Losstimer(Callback): 51 | def __init__(self, every=5): 52 | self.points = [] 53 | self.every = every 54 | 55 | def on_train_begin(self, logs): 56 | self.start = time() 57 | 58 | def on_batch_end(self, batch, logs): 59 | if (batch % self.every) != 0: 60 | return 61 | elapsed = time() - self.start 62 | cop = {} 63 | for i, j in logs.items(): 64 | cop[i] = float(j) 65 | cop['elapsed'] = elapsed 66 | self.points.append(cop) 67 | 68 | 69 | class all_callbacks: 70 | def __init__( 71 | self, stop_patience=10, lr_factor=0.5, lr_patience=1, lr_epsilon=0.001, lr_cooldown=4, lr_minimum=1e-5, outputDir='' 72 | ): 73 | self.nl_begin = newline_callbacks_begin(outputDir) 74 | self.nl_end = newline_callbacks_end() 75 | 76 | self.stopping = EarlyStopping(monitor='val_loss', patience=stop_patience, verbose=1, mode='min') 77 | 78 | self.reduce_lr = ReduceLROnPlateau( 79 | monitor='val_loss', 80 | factor=lr_factor, 81 | patience=lr_patience, 82 | mode='min', 83 | verbose=1, 84 | epsilon=lr_epsilon, 85 | cooldown=lr_cooldown, 86 | min_lr=lr_minimum, 87 | ) 88 | 89 | self.modelbestcheck = ModelCheckpoint( 90 | outputDir + "/KERAS_check_best_model.h5", monitor='val_loss', verbose=1, save_best_only=True 91 | ) 92 | 93 | self.modelbestcheckweights = ModelCheckpoint( 94 | outputDir + "/KERAS_check_best_model_weights.h5", 95 | monitor='val_loss', 96 | verbose=1, 97 | save_best_only=True, 98 | save_weights_only=True, 99 | ) 100 | 101 | self.modelcheckperiod = ModelCheckpoint(outputDir + "/KERAS_check_model_epoch{epoch:02d}.h5", verbose=1, period=10) 102 | 103 | self.modelcheck = ModelCheckpoint(outputDir + "/KERAS_check_model_last.h5", verbose=1) 104 | 105 | self.modelcheckweights = ModelCheckpoint( 106 | outputDir + "/KERAS_check_model_last_weights.h5", verbose=1, save_weights_only=True 107 | ) 108 | 109 | self.tb = TensorBoard(log_dir=outputDir + '/logs') 110 | 111 | self.history = History() 112 | self.timer = Losstimer() 113 | 114 | self.callbacks = [ 115 | self.nl_begin, 116 | self.modelbestcheck, 117 | self.modelbestcheckweights, 118 | self.modelcheck, 119 | self.modelcheckweights, 120 | self.modelcheckperiod, 121 | self.reduce_lr, 122 | self.stopping, 123 | self.nl_end, 124 | self.tb, 125 | self.history, 126 | self.timer, 127 | ] 128 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: hls4ml-tutorial 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.10.16 6 | - jupyter_contrib_nbextensions 7 | - jupyterhub 8 | - jupyter-book 9 | - jsonschema-with-format-nongpl 10 | - pydot==1.4.2 11 | - graphviz==7.1.0 12 | - scikit-learn==1.2.2 13 | - tensorflow==2.14.0 14 | - tensorflow-datasets==4.8.3 15 | - webcolors 16 | - widgetsnbextension==3.6.0 17 | - pip==23.0.1 18 | - pip: 19 | - hls4ml[profiling,optimization,sr,HGQ]==1.1.0 20 | - conifer==1.5 21 | - pysr==0.16.3 22 | - xgboost==1.7.5 23 | - zstd 24 | -------------------------------------------------------------------------------- /images/conifer_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/conifer_v1.png -------------------------------------------------------------------------------- /images/conv2d_animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/conv2d_animation.gif -------------------------------------------------------------------------------- /images/hls4ml_logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 24 | 28 | 29 | 32 | 36 | 37 | 40 | 44 | 45 | 48 | 52 | 53 | 56 | 60 | 61 | 64 | 68 | 69 | 72 | 76 | 77 | 80 | 84 | 85 | 88 | 92 | 93 | 96 | 100 | 101 | 104 | 108 | 109 | 112 | 116 | 117 | 120 | 124 | 125 | 128 | 132 | 133 | 136 | 140 | 141 | 144 | 148 | 149 | 152 | 156 | 157 | 160 | 164 | 165 | 168 | 172 | 173 | 176 | 180 | 181 | 182 | 196 | 198 | 199 | 201 | image/svg+xml 202 | 204 | 205 | 206 | 207 | 208 | 213 | 217 | 219 | 222 | 225 | 228 | 231 | 236 | 241 | 246 | 251 | 252 | 253 | 256 | 261 | 265 | 266 | 267 | 270 | 275 | 280 | 285 | 286 | 287 | 290 | 293 | 296 | 301 | 302 | 303 | 304 | 307 | 310 | 313 | 318 | 319 | 320 | 321 | 324 | 327 | 330 | 333 | 338 | 339 | 340 | 341 | 342 | 345 | 348 | 351 | 354 | 359 | 360 | 361 | 362 | 363 | 366 | 369 | 372 | 375 | 380 | 381 | 382 | 383 | 384 | 387 | 390 | 393 | 396 | 401 | 402 | 403 | 404 | 405 | 408 | 411 | 414 | 417 | 422 | 423 | 424 | 425 | 426 | 429 | 432 | 435 | 438 | 443 | 444 | 445 | 446 | 447 | 450 | 453 | 456 | 459 | 464 | 465 | 466 | 467 | 468 | 471 | 474 | 477 | 480 | 485 | 486 | 487 | 488 | 489 | 492 | 495 | 499 | 500 | 503 | 506 | 510 | 511 | 512 | 513 | 514 | 515 | -------------------------------------------------------------------------------- /images/part5_floorplan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/part5_floorplan.png -------------------------------------------------------------------------------- /images/part7_block_design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/part7_block_design.png -------------------------------------------------------------------------------- /images/part7_floorplan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/part7_floorplan.png -------------------------------------------------------------------------------- /images/reuse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/reuse.png -------------------------------------------------------------------------------- /images/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/test.png -------------------------------------------------------------------------------- /nn_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import pickle as pkl 4 | import random 5 | from io import BytesIO 6 | from pathlib import Path 7 | from typing import Callable 8 | 9 | import h5py as h5 10 | import numpy as np 11 | import tensorflow as tf 12 | import zstd 13 | from HGQ.bops import trace_minmax 14 | from keras.layers import Dense 15 | from keras.src.layers.convolutional.base_conv import Conv 16 | from keras.src.saving.legacy import hdf5_format 17 | from matplotlib import pyplot as plt 18 | from tensorflow import keras 19 | from tqdm.auto import tqdm 20 | 21 | 22 | class NumpyFloatValuesEncoder(json.JSONEncoder): 23 | def default(self, obj): 24 | if isinstance(obj, np.float32): # type: ignore 25 | return float(obj) 26 | return json.JSONEncoder.default(self, obj) 27 | 28 | 29 | class SaveTopN(keras.callbacks.Callback): 30 | def __init__( 31 | self, 32 | metric_fn: Callable[[dict], float], 33 | n: int, 34 | path: str | Path, 35 | side: str = 'max', 36 | fname_format='epoch={epoch}-metric={metric:.4e}.h5', 37 | cond_fn: Callable[[dict], bool] = lambda x: True, 38 | ): 39 | self.n = n 40 | self.metric_fn = metric_fn 41 | self.path = Path(path) 42 | self.fname_format = fname_format 43 | os.makedirs(path, exist_ok=True) 44 | self.weight_paths = np.full(n, '/dev/null', dtype=object) 45 | if side == 'max': 46 | self.best = np.full(n, -np.inf) 47 | self.side = np.greater 48 | elif side == 'min': 49 | self.best = np.full(n, np.inf) 50 | self.side = np.less 51 | self.cond = cond_fn 52 | 53 | def on_epoch_end(self, epoch, logs=None): 54 | assert isinstance(logs, dict) 55 | assert isinstance(self.model, keras.models.Model) 56 | logs = logs.copy() 57 | logs['epoch'] = epoch 58 | if not self.cond(logs): 59 | return 60 | metric = self.metric_fn(logs) 61 | 62 | if self.side(metric, self.best[-1]): 63 | try: 64 | os.remove(self.weight_paths[-1]) 65 | except OSError: 66 | pass 67 | logs['metric'] = metric 68 | fname = self.path / self.fname_format.format(**logs) 69 | self.best[-1] = metric 70 | self.weight_paths[-1] = fname 71 | self.model.save_weights(fname) 72 | with h5.File(fname, 'r+') as f: 73 | log_str = json.dumps(logs, cls=NumpyFloatValuesEncoder) 74 | f.attrs['train_log'] = log_str 75 | idx = np.argsort(self.best) 76 | if self.side == np.greater: 77 | idx = idx[::-1] 78 | self.best = self.best[idx] 79 | self.weight_paths = self.weight_paths[idx] 80 | 81 | def rename_ckpts(self, dataset, bsz=65536): 82 | assert self.weight_paths[0] != '/dev/null', 'No checkpoints to rename' 83 | assert isinstance(self.model, keras.models.Model) 84 | 85 | weight_buf = BytesIO() 86 | with h5.File(weight_buf, 'w') as f: 87 | hdf5_format.save_weights_to_hdf5_group(f, self.model) 88 | weight_buf.seek(0) 89 | 90 | for i, path in enumerate(tqdm(self.weight_paths, desc='Renaming checkpoints')): 91 | if path == '/dev/null': 92 | continue 93 | self.model.load_weights(path) 94 | bops = trace_minmax(self.model, dataset, bsz=bsz, verbose=False) 95 | with h5.File(path, 'r+') as f: 96 | logs = json.loads(f.attrs['train_log']) # type: ignore 97 | logs['bops'] = bops 98 | metric = self.metric_fn(logs) 99 | logs['metric'] = metric 100 | f.attrs['train_log'] = json.dumps(logs, cls=NumpyFloatValuesEncoder) 101 | self.best[i] = metric 102 | new_fname = self.path / self.fname_format.format(**logs) 103 | os.rename(path, new_fname) 104 | self.weight_paths[i] = new_fname 105 | 106 | idx = np.argsort(self.best) 107 | self.best = self.best[idx] 108 | self.weight_paths = self.weight_paths[idx] 109 | with h5.File(weight_buf, 'r') as f: 110 | hdf5_format.load_weights_from_hdf5_group_by_name(f, self.model) 111 | 112 | 113 | class PBarCallback(tf.keras.callbacks.Callback): 114 | def __init__(self, metric='loss: {loss:.2f}/{val_loss:.2f}'): 115 | self.pbar = None 116 | self.template = metric 117 | 118 | def on_epoch_begin(self, epoch, logs=None): 119 | if self.pbar is None: 120 | self.pbar = tqdm(total=self.params['epochs'], unit='epoch') 121 | 122 | def on_epoch_end(self, epoch, logs=None): 123 | assert isinstance(self.pbar, tqdm) 124 | assert isinstance(logs, dict) 125 | self.pbar.update(1) 126 | string = self.template.format(**logs) 127 | if 'bops' in logs: 128 | string += f' - BOPs: {logs["bops"]:,.0f}' 129 | self.pbar.set_description(string) 130 | 131 | def on_train_end(self, logs=None): 132 | if self.pbar is not None: 133 | self.pbar.close() 134 | 135 | 136 | def plot_history(histry: dict, metrics=('loss', 'val_loss'), ylabel='Loss', logy=False): 137 | fig, ax = plt.subplots() 138 | for metric in metrics: 139 | ax.plot(histry[metric], label=metric) 140 | ax.set_xlabel('Epoch') 141 | ax.set_ylabel(ylabel) 142 | if logy: 143 | ax.set_yscale('log') 144 | ax.legend() 145 | return fig, ax 146 | 147 | 148 | def save_model(model: keras.models.Model, path: str): 149 | _path = Path(path) 150 | model.save(path) 151 | if model.history is not None: 152 | history = model.history.history 153 | else: 154 | history = {} 155 | with open(_path.with_suffix('.history'), 'wb') as f: 156 | f.write(zstd.compress(pkl.dumps(history))) 157 | 158 | 159 | def load_model(path: str, co=None): 160 | _path = Path(path) 161 | model: keras.Model = keras.models.load_model(path, custom_objects=co) # type: ignore 162 | with open(_path.with_suffix('.history'), 'rb') as f: 163 | history: dict[str, list] = pkl.loads(zstd.decompress(f.read())) 164 | return model, history 165 | 166 | 167 | def save_history(history, path): 168 | with open(path, 'wb') as f: 169 | f.write(zstd.compress(pkl.dumps(history))) 170 | 171 | 172 | def load_history(path): 173 | with open(path, 'rb') as f: 174 | history = pkl.loads(zstd.decompress(f.read())) 175 | return history 176 | 177 | 178 | def absorb_batchNorm(model_target, model_original): 179 | for layer in model_target.layers: 180 | if layer.__class__.__name__ == 'Functional': 181 | absorb_batchNorm(layer, model_original.get_layer(layer.name)) 182 | continue 183 | if ( 184 | (isinstance(layer, Dense) or isinstance(layer, Conv)) 185 | and len(nodes := model_original.get_layer(layer.name)._outbound_nodes) > 0 186 | and isinstance(nodes[0].outbound_layer, keras.layers.BatchNormalization) 187 | ): 188 | _gamma, _beta, _mu, _var = model_original.get_layer(layer.name)._outbound_nodes[0].outbound_layer.get_weights() 189 | _ratio = _gamma / np.sqrt(0.001 + _var) 190 | _bias = -_gamma * _mu / np.sqrt(0.001 + _var) + _beta 191 | 192 | k, *_b = model_original.get_layer(layer.name).get_weights() 193 | if _b: 194 | b = _b[0] 195 | else: 196 | b = np.zeros(layer.output_shape[-1]) 197 | nk = np.einsum('...c, c-> ...c', k, _ratio, optimize=True) 198 | nb = np.einsum('...c, c-> ...c', b, _ratio, optimize=True) + _bias 199 | extras = layer.get_weights()[2:] 200 | layer.set_weights([nk, nb, *extras]) 201 | elif hasattr(layer, 'kernel'): 202 | for w in layer.weights: 203 | if '_bw' not in w.name: 204 | break 205 | else: 206 | continue 207 | weights = layer.get_weights() 208 | new_weights = model_original.get_layer(layer.name).get_weights() 209 | l = len(new_weights) # noqa: E741 # If l looks like 1 by any chance, change your font. 210 | layer.set_weights([*new_weights, *weights[l:]][: len(weights)]) 211 | 212 | 213 | def set_seed(seed): 214 | np.random.seed(seed) 215 | tf.random.set_seed(seed) 216 | os.environ['PYTHONHASHSEED'] = str(seed) 217 | random.seed(seed) 218 | 219 | tf.config.experimental.enable_op_determinism() 220 | 221 | 222 | def get_best_ckpt(save_path: Path, take_min=False): 223 | ckpts = list(save_path.glob('*.h5')) 224 | 225 | def rank(ckpt: Path): 226 | with h5.File(ckpt, 'r') as f: 227 | log: dict = f.attrs['train_log'] # type: ignore 228 | log = json.loads(log) # type: ignore 229 | metric = log['metric'] # type: ignore 230 | return metric 231 | 232 | ckpts = sorted(ckpts, key=rank, reverse=not take_min) 233 | ckpt = ckpts[0] 234 | return ckpt 235 | 236 | 237 | class PeratoFront(keras.callbacks.Callback): 238 | def __init__( 239 | self, 240 | path: str | Path, 241 | fname_format: str, 242 | metrics_names: list[str], 243 | sides: list[int], 244 | cond_fn: Callable[[dict], bool] = lambda x: True, 245 | ): 246 | self.path = Path(path) 247 | self.fname_format = fname_format 248 | os.makedirs(path, exist_ok=True) 249 | self.paths = [] 250 | self.metrics = [] 251 | self.metric_names = metrics_names 252 | self.sides = np.array(sides) 253 | self.cond_fn = cond_fn 254 | 255 | def on_epoch_end(self, epoch, logs=None): 256 | assert isinstance(self.model, keras.models.Model) 257 | assert isinstance(logs, dict) 258 | 259 | logs = logs.copy() 260 | logs['epoch'] = epoch 261 | 262 | if not self.cond_fn(logs): 263 | return 264 | new_metrics = np.array([logs[metric_name] for metric_name in self.metric_names]) 265 | _rm_idx = [] 266 | for i, old_metrics in enumerate(self.metrics): 267 | _old_metrics = self.sides * old_metrics 268 | _new_metrics = self.sides * new_metrics 269 | if np.all(_new_metrics <= _old_metrics): 270 | return 271 | if np.all(_new_metrics >= _old_metrics): 272 | _rm_idx.append(i) 273 | for i in _rm_idx[::-1]: 274 | self.metrics.pop(i) 275 | p = self.paths.pop(i) 276 | os.remove(p) 277 | 278 | path = self.path / self.fname_format.format(**logs) 279 | self.metrics.append(new_metrics) 280 | self.paths.append(path) 281 | self.model.save_weights(self.paths[-1]) 282 | 283 | with h5.File(path, 'r+') as f: 284 | log_str = json.dumps(logs, cls=NumpyFloatValuesEncoder) 285 | f.attrs['train_log'] = log_str 286 | 287 | def rename_ckpts(self, dataset, bsz=65536): 288 | assert isinstance(self.model, keras.models.Model) 289 | 290 | weight_buf = BytesIO() 291 | with h5.File(weight_buf, 'w') as f: 292 | hdf5_format.save_weights_to_hdf5_group(f, self.model) 293 | weight_buf.seek(0) 294 | 295 | for i, path in enumerate(tqdm(self.paths, desc='Renaming checkpoints')): 296 | self.model.load_weights(path) 297 | bops = trace_minmax(self.model, dataset, bsz=bsz, verbose=False) 298 | with h5.File(path, 'r+') as f: 299 | logs = json.loads(f.attrs['train_log']) # type: ignore 300 | logs['bops'] = bops 301 | f.attrs['train_log'] = json.dumps(logs, cls=NumpyFloatValuesEncoder) 302 | metrics = np.array([logs[metric_name] for metric_name in self.metric_names]) 303 | self.metrics[i] = metrics 304 | new_fname = self.path / self.fname_format.format(**logs) 305 | os.rename(path, new_fname) 306 | self.paths[i] = new_fname 307 | 308 | with h5.File(weight_buf, 'r') as f: 309 | hdf5_format.load_weights_from_hdf5_group_by_name(f, self.model) 310 | 311 | 312 | class BetaScheduler(keras.callbacks.Callback): 313 | def __init__(self, beta_fn: Callable[[int], float]): 314 | self.beta_fn = beta_fn 315 | 316 | def on_epoch_begin(self, epoch, logs=None): 317 | assert isinstance(self.model, keras.models.Model) 318 | 319 | beta = self.beta_fn(epoch) 320 | for layer in self.model.layers: 321 | if hasattr(layer, 'beta'): 322 | layer.beta.assign(keras.backend.constant(beta, dtype=keras.backend.floatx())) 323 | 324 | def on_epoch_end(self, epoch, logs=None): 325 | assert isinstance(logs, dict) 326 | logs['beta'] = self.beta_fn(epoch) 327 | 328 | @classmethod 329 | def from_config(cls, config): 330 | return cls(get_schedule(config.beta, config.train.epochs)) 331 | 332 | 333 | def get_schedule(beta_conf, total_epochs): 334 | epochs = [] 335 | betas = [] 336 | interpolations = [] 337 | for block in beta_conf.intervals: 338 | epochs.append(block.epochs) 339 | betas.append(block.betas) 340 | interpolation = block.interpolation 341 | assert interpolation in ['linear', 'log'] 342 | interpolations.append(interpolation == 'log') 343 | epochs = np.array(epochs + [total_epochs]) 344 | assert np.all(np.diff(epochs) >= 0) 345 | betas = np.array(betas) 346 | interpolations = np.array(interpolations) 347 | 348 | def schedule(epoch): 349 | if epoch >= total_epochs: 350 | return betas[-1, -1] 351 | idx = np.searchsorted(epochs, epoch, side='right') - 1 352 | beta0, beta1 = betas[idx] 353 | epoch0, epoch1 = epochs[idx], epochs[idx + 1] 354 | if interpolations[idx]: 355 | beta = beta0 * (beta1 / beta0) ** ((epoch - epoch0) / (epoch1 - epoch0)) 356 | else: 357 | beta = beta0 + (beta1 - beta0) * (epoch - epoch0) / (epoch1 - epoch0) 358 | return float(beta) 359 | 360 | return schedule 361 | -------------------------------------------------------------------------------- /part1_getting_started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Part 1: Getting started" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from tensorflow.keras.utils import to_categorical\n", 17 | "from sklearn.datasets import fetch_openml\n", 18 | "from sklearn.model_selection import train_test_split\n", 19 | "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", 20 | "import numpy as np\n", 21 | "\n", 22 | "%matplotlib inline\n", 23 | "seed = 0\n", 24 | "np.random.seed(seed)\n", 25 | "import tensorflow as tf\n", 26 | "\n", 27 | "tf.random.set_seed(seed)\n", 28 | "import os\n", 29 | "\n", 30 | "os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Fetch the jet tagging dataset from Open ML" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "data = fetch_openml('hls4ml_lhc_jets_hlf')\n", 47 | "X, y = data['data'], data['target']" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "### Let's print some information about the dataset\n", 55 | "Print the feature names and the dataset shape" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "scrolled": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "print(data['feature_names'])\n", 67 | "print(X.shape, y.shape)\n", 68 | "print(X[:5])\n", 69 | "print(y[:5])" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "As you saw above, the `y` target is an array of strings, e.g. \\['g', 'w',...\\] etc.\n", 77 | "We need to make this a \"One Hot\" encoding for the training.\n", 78 | "Then, split the dataset into training and validation sets" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "le = LabelEncoder()\n", 88 | "y = le.fit_transform(y)\n", 89 | "y = to_categorical(y, 5)\n", 90 | "X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", 91 | "print(y[:5])" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "scaler = StandardScaler()\n", 101 | "X_train_val = scaler.fit_transform(X_train_val)\n", 102 | "X_test = scaler.transform(X_test)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "np.save('X_train_val.npy', X_train_val)\n", 112 | "np.save('X_test.npy', X_test)\n", 113 | "np.save('y_train_val.npy', y_train_val)\n", 114 | "np.save('y_test.npy', y_test)\n", 115 | "np.save('classes.npy', le.classes_)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Now construct a model\n", 123 | "We'll use 3 hidden layers with 64, then 32, then 32 neurons. Each layer will use `relu` activation.\n", 124 | "Add an output layer with 5 neurons (one for each class), then finish with Softmax activation." 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "from tensorflow.keras.models import Sequential\n", 134 | "from tensorflow.keras.layers import Dense, Activation, BatchNormalization\n", 135 | "from tensorflow.keras.optimizers import Adam\n", 136 | "from tensorflow.keras.regularizers import l1\n", 137 | "from callbacks import all_callbacks" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "model = Sequential()\n", 147 | "model.add(Dense(64, input_shape=(16,), name='fc1', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n", 148 | "model.add(Activation(activation='relu', name='relu1'))\n", 149 | "model.add(Dense(32, name='fc2', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n", 150 | "model.add(Activation(activation='relu', name='relu2'))\n", 151 | "model.add(Dense(32, name='fc3', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n", 152 | "model.add(Activation(activation='relu', name='relu3'))\n", 153 | "model.add(Dense(5, name='output', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n", 154 | "model.add(Activation(activation='softmax', name='softmax'))" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "## Train the model\n", 162 | "We'll use Adam optimizer with categorical crossentropy loss.\n", 163 | "The callbacks will decay the learning rate and save the model into a directory 'model_1'\n", 164 | "The model isn't very complex, so this should just take a few minutes even on the CPU.\n", 165 | "If you've restarted the notebook kernel after training once, set `train = False` to load the trained model." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "train = True\n", 175 | "if train:\n", 176 | " adam = Adam(lr=0.0001)\n", 177 | " model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])\n", 178 | " callbacks = all_callbacks(\n", 179 | " stop_patience=1000,\n", 180 | " lr_factor=0.5,\n", 181 | " lr_patience=10,\n", 182 | " lr_epsilon=0.000001,\n", 183 | " lr_cooldown=2,\n", 184 | " lr_minimum=0.0000001,\n", 185 | " outputDir='model_1',\n", 186 | " )\n", 187 | " model.fit(\n", 188 | " X_train_val,\n", 189 | " y_train_val,\n", 190 | " batch_size=1024,\n", 191 | " epochs=10,\n", 192 | " validation_split=0.25,\n", 193 | " shuffle=True,\n", 194 | " callbacks=callbacks.callbacks,\n", 195 | " )\n", 196 | "else:\n", 197 | " from tensorflow.keras.models import load_model\n", 198 | "\n", 199 | " model = load_model('model_1/KERAS_check_best_model.h5')" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "## Check performance\n", 207 | "Check the accuracy and make a ROC curve" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "import plotting\n", 217 | "import matplotlib.pyplot as plt\n", 218 | "from sklearn.metrics import accuracy_score\n", 219 | "\n", 220 | "y_keras = model.predict(X_test)\n", 221 | "print(\"Accuracy: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_keras, axis=1))))\n", 222 | "plt.figure(figsize=(9, 9))\n", 223 | "_ = plotting.makeRoc(y_test, y_keras, le.classes_)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "## Convert the model to FPGA firmware with hls4ml\n", 231 | "Now we will go through the steps to convert the model we trained to a low-latency optimized FPGA firmware with hls4ml.\n", 232 | "First, we will evaluate its classification performance to make sure we haven't lost accuracy using the fixed-point data types. \n", 233 | "Then we will synthesize the model with Vitis HLS and check the metrics of latency and FPGA resource usage.\n", 234 | "\n", 235 | "### Make an hls4ml config & model\n", 236 | "The hls4ml Neural Network inference library is controlled through a configuration dictionary.\n", 237 | "In this example we'll use the most simple variation, later exercises will look at more advanced configuration." 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "import hls4ml\n", 247 | "\n", 248 | "config = hls4ml.utils.config_from_keras_model(model, granularity='model', backend='Vitis')\n", 249 | "print(\"-----------------------------------\")\n", 250 | "print(\"Configuration\")\n", 251 | "plotting.print_dict(config)\n", 252 | "print(\"-----------------------------------\")\n", 253 | "hls_model = hls4ml.converters.convert_from_keras_model(\n", 254 | " model, hls_config=config, backend='Vitis', output_dir='model_1/hls4ml_prj', part='xcu250-figd2104-2L-e'\n", 255 | ")" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "Let's visualise what we created. The model architecture is shown, annotated with the shape and data types" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file=None)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "## Compile, predict\n", 279 | "Now we need to check that this model performance is still good. We compile the hls_model, and then use `hls_model.predict` to execute the FPGA firmware with bit-accurate emulation on the CPU." 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "hls_model.compile()\n", 289 | "X_test = np.ascontiguousarray(X_test)\n", 290 | "y_hls = hls_model.predict(X_test)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "## Compare\n", 298 | "That was easy! Now let's see how the performance compares to Keras:" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "print(\"Keras Accuracy: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_keras, axis=1))))\n", 308 | "print(\"hls4ml Accuracy: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))))\n", 309 | "\n", 310 | "fig, ax = plt.subplots(figsize=(9, 9))\n", 311 | "_ = plotting.makeRoc(y_test, y_keras, le.classes_)\n", 312 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 313 | "_ = plotting.makeRoc(y_test, y_hls, le.classes_, linestyle='--')\n", 314 | "\n", 315 | "from matplotlib.lines import Line2D\n", 316 | "\n", 317 | "lines = [Line2D([0], [0], ls='-'), Line2D([0], [0], ls='--')]\n", 318 | "from matplotlib.legend import Legend\n", 319 | "\n", 320 | "leg = Legend(ax, lines, labels=['keras', 'hls4ml'], loc='lower right', frameon=False)\n", 321 | "ax.add_artist(leg)" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "## Synthesize\n", 329 | "Now we'll actually use Vitis HLS to synthesize the model. We can run the build using a method of our `hls_model` object.\n", 330 | "After running this step, we can integrate the generated IP into a workflow to compile for a specific FPGA board.\n", 331 | "In this case, we'll just review the reports that Vitis HLS generates, checking the latency and resource usage.\n", 332 | "\n", 333 | "**This can take several minutes.**\n", 334 | "\n", 335 | "While the C-Synthesis is running, we can monitor the progress looking at the log file by opening a terminal from the notebook home, and executing:\n", 336 | "\n", 337 | "`tail -f model_1/hls4ml_prj/vitis_hls.log`" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": { 344 | "scrolled": true 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "hls_model.build(csim=False)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "## Check the reports\n", 356 | "Print out the reports generated by Vitis HLS. Pay attention to the Latency and the 'Utilization Estimates' sections" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "hls4ml.report.read_vivado_report('model_1/hls4ml_prj/')" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "## Exercise\n", 373 | "Since `ReuseFactor = 1` we expect each multiplication used in the inference of our neural network to use 1 DSP. Is this what we see? (Note that the Softmax layer should use 5 DSPs, or 1 per class)\n", 374 | "Calculate how many multiplications are performed for the inference of this network...\n", 375 | "(We'll discuss the outcome)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [] 384 | } 385 | ], 386 | "metadata": { 387 | "kernelspec": { 388 | "display_name": "Python 3 (ipykernel)", 389 | "language": "python", 390 | "name": "python3" 391 | }, 392 | "language_info": { 393 | "codemirror_mode": { 394 | "name": "ipython", 395 | "version": 3 396 | }, 397 | "file_extension": ".py", 398 | "mimetype": "text/x-python", 399 | "name": "python", 400 | "nbconvert_exporter": "python", 401 | "pygments_lexer": "ipython3", 402 | "version": "3.10.16" 403 | } 404 | }, 405 | "nbformat": 4, 406 | "nbformat_minor": 4 407 | } 408 | -------------------------------------------------------------------------------- /part3_compression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Part 3: Compression" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from tensorflow.keras.utils import to_categorical\n", 17 | "from sklearn.datasets import fetch_openml\n", 18 | "from sklearn.model_selection import train_test_split\n", 19 | "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "\n", 23 | "%matplotlib inline\n", 24 | "seed = 0\n", 25 | "np.random.seed(seed)\n", 26 | "import tensorflow as tf\n", 27 | "\n", 28 | "tf.random.set_seed(seed)\n", 29 | "import os\n", 30 | "\n", 31 | "os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Fetch the jet tagging dataset from Open ML" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "X_train_val = np.load('X_train_val.npy')\n", 48 | "X_test = np.load('X_test.npy')\n", 49 | "y_train_val = np.load('y_train_val.npy')\n", 50 | "y_test = np.load('y_test.npy')\n", 51 | "classes = np.load('classes.npy', allow_pickle=True)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "## Now construct a model\n", 59 | "We'll use the same architecture as in part 1: 3 hidden layers with 64, then 32, then 32 neurons. Each layer will use `relu` activation.\n", 60 | "Add an output layer with 5 neurons (one for each class), then finish with Softmax activation." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "from tensorflow.keras.models import Sequential\n", 70 | "from tensorflow.keras.layers import Dense, Activation, BatchNormalization\n", 71 | "from tensorflow.keras.optimizers import Adam\n", 72 | "from tensorflow.keras.regularizers import l1\n", 73 | "from callbacks import all_callbacks" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "model = Sequential()\n", 83 | "model.add(Dense(64, input_shape=(16,), name='fc1', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n", 84 | "model.add(Activation(activation='relu', name='relu1'))\n", 85 | "model.add(Dense(32, name='fc2', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n", 86 | "model.add(Activation(activation='relu', name='relu2'))\n", 87 | "model.add(Dense(32, name='fc3', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n", 88 | "model.add(Activation(activation='relu', name='relu3'))\n", 89 | "model.add(Dense(5, name='output', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n", 90 | "model.add(Activation(activation='softmax', name='softmax'))" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "## Train sparse\n", 98 | "This time we'll use the Tensorflow model optimization sparsity to train a sparse model (forcing many weights to '0'). In this instance, the target sparsity is 75%" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule\n", 108 | "from tensorflow_model_optimization.sparsity.keras import strip_pruning\n", 109 | "\n", 110 | "pruning_params = {\"pruning_schedule\": pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100)}\n", 111 | "model = prune.prune_low_magnitude(model, **pruning_params)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "## Train the model\n", 119 | "We'll use the same settings as the model for part 1: Adam optimizer with categorical crossentropy loss.\n", 120 | "The callbacks will decay the learning rate and save the model into a directory 'model_2'\n", 121 | "The model isn't very complex, so this should just take a few minutes even on the CPU.\n", 122 | "If you've restarted the notebook kernel after training once, set `train = False` to load the trained model rather than training again." 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "train = True\n", 132 | "if train:\n", 133 | " adam = Adam(lr=0.0001)\n", 134 | " model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])\n", 135 | " callbacks = all_callbacks(\n", 136 | " stop_patience=1000,\n", 137 | " lr_factor=0.5,\n", 138 | " lr_patience=10,\n", 139 | " lr_epsilon=0.000001,\n", 140 | " lr_cooldown=2,\n", 141 | " lr_minimum=0.0000001,\n", 142 | " outputDir='model_2',\n", 143 | " )\n", 144 | " callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())\n", 145 | " model.fit(\n", 146 | " X_train_val,\n", 147 | " y_train_val,\n", 148 | " batch_size=1024,\n", 149 | " epochs=10,\n", 150 | " validation_split=0.25,\n", 151 | " shuffle=True,\n", 152 | " callbacks=callbacks.callbacks,\n", 153 | " )\n", 154 | " # Save the model again but with the pruning 'stripped' to use the regular layer types\n", 155 | " model = strip_pruning(model)\n", 156 | " model.save('model_2/KERAS_check_best_model.h5')\n", 157 | "else:\n", 158 | " from tensorflow.keras.models import load_model\n", 159 | "\n", 160 | " model = load_model('model_2/KERAS_check_best_model.h5')" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "## Check sparsity\n", 168 | "Make a quick check that the model was indeed trained sparse. We'll just make a histogram of the weights of the 1st layer, and hopefully observe a large peak in the bin containing '0'. Note logarithmic y axis." 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "w = model.layers[0].weights[0].numpy()\n", 178 | "h, b = np.histogram(w, bins=100)\n", 179 | "plt.figure(figsize=(7, 7))\n", 180 | "plt.bar(b[:-1], h, width=b[1] - b[0])\n", 181 | "plt.semilogy()\n", 182 | "print('% of zeros = {}'.format(np.sum(w == 0) / np.size(w)))" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "## Check performance\n", 190 | "How does this 75% sparse model compare against the unpruned model? Let's report the accuracy and make a ROC curve. The pruned model is shown with solid lines, the unpruned model from part 1 is shown with dashed lines.\n", 191 | "**Make sure you've trained the model from part 1**" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "import plotting\n", 201 | "import matplotlib.pyplot as plt\n", 202 | "from sklearn.metrics import accuracy_score\n", 203 | "from tensorflow.keras.models import load_model\n", 204 | "\n", 205 | "model_ref = load_model('model_1/KERAS_check_best_model.h5')\n", 206 | "\n", 207 | "y_ref = model_ref.predict(X_test)\n", 208 | "y_prune = model.predict(X_test)\n", 209 | "\n", 210 | "print(\"Accuracy unpruned: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_ref, axis=1))))\n", 211 | "print(\"Accuracy pruned: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_prune, axis=1))))\n", 212 | "\n", 213 | "fig, ax = plt.subplots(figsize=(9, 9))\n", 214 | "_ = plotting.makeRoc(y_test, y_ref, classes)\n", 215 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 216 | "_ = plotting.makeRoc(y_test, y_prune, classes, linestyle='--')\n", 217 | "\n", 218 | "from matplotlib.lines import Line2D\n", 219 | "\n", 220 | "lines = [Line2D([0], [0], ls='-'), Line2D([0], [0], ls='--')]\n", 221 | "from matplotlib.legend import Legend\n", 222 | "\n", 223 | "leg = Legend(ax, lines, labels=['unpruned', 'pruned'], loc='lower right', frameon=False)\n", 224 | "ax.add_artist(leg)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "## Convert the model to FPGA firmware with hls4ml\n", 232 | "Let's use the default configuration: `ap_fixed<16,6>` precision everywhere and `ReuseFactor=1`, so we can compare with the part 1 model. We need to use `strip_pruning` to change the layer types back to their originals.\n", 233 | "\n", 234 | "**The synthesis will take a while**\n", 235 | "\n", 236 | "While the C-Synthesis is running, we can monitor the progress looking at the log file by opening a terminal from the notebook home, and executing:\n", 237 | "\n", 238 | "`tail -f model_2/hls4ml_prj/vitis_hls.log`" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "import hls4ml\n", 248 | "\n", 249 | "config = hls4ml.utils.config_from_keras_model(model, granularity='model', backend='Vitis')\n", 250 | "print(config)\n", 251 | "hls_model = hls4ml.converters.convert_from_keras_model(\n", 252 | " model, hls_config=config, backend='Vitis', output_dir='model_2/hls4ml_prj', part='xcu250-figd2104-2L-e'\n", 253 | ")\n", 254 | "hls_model.compile()\n", 255 | "hls_model.build(csim=False)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "## Check the reports\n", 263 | "Print out the reports generated by Vitis HLS. Pay attention to the Utilization Estimates' section in particular this time." 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "hls4ml.report.read_vivado_report('model_2/hls4ml_prj/')" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "Print the report for the model trained in part 1. Remember these models have the same architecture, but the model in this section was trained using the sparsity API from tensorflow_model_optimization. Notice how the resource usage had dramatically reduced (particularly the DSPs). When Vitis HLS notices an operation like `y = 0 * x` it can avoid placing a DSP for that operation. The impact of this is biggest when `ReuseFactor = 1`, but still applies at higher reuse as well. **Note you need to have trained and synthesized the model from part 1**" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "hls4ml.report.read_vivado_report('model_1/hls4ml_prj')" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [] 297 | } 298 | ], 299 | "metadata": { 300 | "kernelspec": { 301 | "display_name": "Python 3 (ipykernel)", 302 | "language": "python", 303 | "name": "python3" 304 | }, 305 | "language_info": { 306 | "codemirror_mode": { 307 | "name": "ipython", 308 | "version": 3 309 | }, 310 | "file_extension": ".py", 311 | "mimetype": "text/x-python", 312 | "name": "python", 313 | "nbconvert_exporter": "python", 314 | "pygments_lexer": "ipython3", 315 | "version": "3.10.16" 316 | } 317 | }, 318 | "nbformat": 4, 319 | "nbformat_minor": 4 320 | } 321 | -------------------------------------------------------------------------------- /part4.1_HG_quantization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Part 4: HG Quantization" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import keras\n", 18 | "from keras.utils import to_categorical\n", 19 | "from sklearn.datasets import fetch_openml\n", 20 | "from sklearn.model_selection import train_test_split\n", 21 | "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", 22 | "import numpy as np\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "\n", 25 | "%matplotlib inline\n", 26 | "seed = 0\n", 27 | "np.random.seed(seed)\n", 28 | "import tensorflow as tf\n", 29 | "\n", 30 | "tf.random.set_seed(seed)\n", 31 | "\n", 32 | "os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Fetch the jet tagging dataset from Open ML" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# If you haven't finished part 1 already, uncomment the following lines to download, process, and save the dataset\n", 49 | "\n", 50 | "# le = LabelEncoder()\n", 51 | "# y = le.fit_transform(y)\n", 52 | "# y = to_categorical(y, 5)\n", 53 | "# X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", 54 | "# # print(y[:5])\n", 55 | "# scaler = StandardScaler()\n", 56 | "# X_train_val = scaler.fit_transform(X_train_val)\n", 57 | "# X_test = scaler.transform(X_test)\n", 58 | "# np.save('X_train_val.npy', X_train_val)\n", 59 | "# np.save('X_test.npy', X_test)\n", 60 | "# np.save('y_train_val.npy', y_train_val)\n", 61 | "# np.save('y_test.npy', y_test)\n", 62 | "# np.save('classes.npy', le.classes_)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "X_train_val = np.load('X_train_val.npy')\n", 72 | "X_test = np.load('X_test.npy')\n", 73 | "y_train_val = np.load('y_train_val.npy')\n", 74 | "y_test = np.load('y_test.npy')\n", 75 | "classes = np.load('classes.npy', allow_pickle=True)\n", 76 | "\n", 77 | "# Convert everything to tf.Tensor to avoid casting\n", 78 | "with tf.device('/cpu:0'): # type: ignore\n", 79 | " _X_train_val = tf.convert_to_tensor(X_train_val, dtype=tf.float32)\n", 80 | " # We don't make explicit y categorical tensor:\n", 81 | " # Use SparseCategoricalCrossentropy loss instead.\n", 82 | " _y_train_val = tf.convert_to_tensor(np.argmax(y_train_val, axis=1), dtype=tf.int32)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "## Construct a model\n", 90 | "This time we're going to use HGQ layers.\n", 91 | "\n", 92 | "HGQ is \"High Granularity Quantization\" for heterogeneous quantization at arbitrary granularity, up to per-weight and per-activation level.\n", 93 | "\n", 94 | "https://github.com/calad0i/HGQ\n", 95 | "\n", 96 | "Depending on the specific task, HGQ can achieve more than 10x resource savings comparing to QKeras. (For example, on this dataset and requiring an accuracy of around 0.72~0.74)." 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "from keras.models import Sequential\n", 106 | "from keras.optimizers import Adam\n", 107 | "from keras.losses import SparseCategoricalCrossentropy\n", 108 | "from HGQ.layers import HQuantize, HDense, HActivation" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "For any layer that needs to be quantized (i.e., layers that perform the actual computation), add a `H` in front of the layer name. For example, `HDense`, `HConv2D`, `HActivation`, etc.\n", 116 | "\n", 117 | "HGQ requires the input number to be quantized. To achieve it, you can simply add a `HQuantizer` layer at the beginning of the model. You may refer to https://calad0i.github.io/HGQ/ for full documentation.\n", 118 | "\n", 119 | "As all quantization bitwidths are learnt, you don't need to specify them. Instead, for each `H-` layer, you need to specify the `beta` parameter that controls the trade-off between accuracy and resource savings. The higher the `beta`, the more aggressive the quantization will be." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "beta = 3e-6\n", 129 | "# The bigger the beta, the smaller the models is, at the cost of accuracy.\n", 130 | "\n", 131 | "model = Sequential(\n", 132 | " [\n", 133 | " HQuantize(beta=beta),\n", 134 | " HDense(64, activation='relu', beta=beta),\n", 135 | " HDense(32, activation='relu', beta=beta),\n", 136 | " HDense(32, activation='relu', beta=beta),\n", 137 | " HDense(5, beta=beta),\n", 138 | " ]\n", 139 | ")" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "## Train sparse\n", 147 | "\n", 148 | "No need to do anything. Unstructured sparsity comes for free with HGQ." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# This is a empty code cell, you don't need to put anything here." 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "## Train the model\n", 165 | "We'll use the same settings as the model for part 1: Adam optimizer with categorical crossentropy loss.\n", 166 | "\n", 167 | "However, we can skip the softmax layer in the model by adding `from_logits=True` to the loss function. `Softmax` is expensive in hardware, so we want to avoid it if possible.\n", 168 | "\n", 169 | "For any HGQ model, it's essential to use `ResetMinMax` callback to reset the quantization ranges after each epoch. This is because the ranges are calculated based on the data seen so far, and we want to make sure they are recalculated after each epoch.\n", 170 | "\n", 171 | "It is recommended to use the `FreeBOPs` callback to monitor the number of (effective) bits operations in the model. This is a good proxy for ressource usage in FPGA (BOPs ~ 55*DSPs+LUTs) for **post place&route resource**. Notice that CSynth tends to overestimate at least by a factor of 2." 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "from HGQ import ResetMinMax, FreeBOPs\n", 181 | "from keras.callbacks import LearningRateScheduler\n", 182 | "from keras.experimental import CosineDecay\n", 183 | "from nn_utils import PBarCallback\n", 184 | "\n", 185 | "_sched = CosineDecay(2e-2, 200)\n", 186 | "sched = LearningRateScheduler(_sched)\n", 187 | "pbar = PBarCallback(metric='loss: {loss:.3f}/{val_loss:.3f} - acc: {accuracy:.3f}/{val_accuracy:.3f}')\n", 188 | "\n", 189 | "callbacks = [ResetMinMax(), FreeBOPs(), pbar, sched]\n", 190 | "\n", 191 | "# ResetMinMax: necessary callback for all HGQ models\n", 192 | "# FreeBOPs: recommended callback\n", 193 | "# pbar: progress bar callback, useful when the number of epochs is high\n", 194 | "# sched: learning rate scheduler. Cosine decay in this case." 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "## Notice\n", 202 | "\n", 203 | "- Due to the stochasticness of surrogate gradient on the individual bitwidth, it is recommended to train the model with a large batchsize over more epochs.\n", 204 | "\n", 205 | "- HGQ is jit-compiled for many parts. The first epoch will take longer to compile.\n", 206 | "\n", 207 | "- We train for 200 epochs here, which takes ~1min on a 3070-maxq GPU, similar to the time taken part 4.\n", 208 | "\n", 209 | "- Parameters used in this tutorial are not optimized for the best performance. Please refer to [HGQ-demos](https://github.com/calad0i/HGQ-demos) for more advanced examples." 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "train = True\n", 219 | "if train:\n", 220 | " opt = Adam(learning_rate=0)\n", 221 | " loss = SparseCategoricalCrossentropy(from_logits=True)\n", 222 | " model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])\n", 223 | "\n", 224 | " model.fit(\n", 225 | " _X_train_val,\n", 226 | " _y_train_val,\n", 227 | " batch_size=16384,\n", 228 | " epochs=200,\n", 229 | " validation_split=0.25,\n", 230 | " shuffle=True,\n", 231 | " callbacks=callbacks,\n", 232 | " verbose=0, # type: ignore\n", 233 | " )\n", 234 | " model.save('model_3.1/model.h5')\n", 235 | "else:\n", 236 | " from keras.models import load_model\n", 237 | "\n", 238 | " # No need to use custom_objects as the custom layers are already registered\n", 239 | " model: keras.Model = load_model('model_3.1/model.h5') # type: ignore" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "## Prepare for conversion\n", 247 | "\n", 248 | "HGQ model cannot be converted to hls4ml model directly, and we need to convert it to a proxy model first. The proxy model also serves as a bit-accurate emulator of the hls4ml model that takes numerical overflow into account.\n", 249 | "\n", 250 | "To convert to a proxy model, we need to set appropriate ranges of the model internal variables. This is done by using the `trace_minmax` function. You can add a scaler factor `cover_range` to the ranges to make sure the model more robust to numerical overflow. `trace_minmax` also resturns the exact (effective) BOPs of the model (the number provided during training is approximated).\n", 251 | "\n", 252 | "If you keep all parameters the same and everything goes correctly, total BOPs of the model should be around 6500. This means, after running place&route (or vsynth), the model should take around 6500 LUTs, which means DSPs*55+LUTs used should be around 6500." 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "from HGQ import trace_minmax, to_proxy_model\n", 262 | "\n", 263 | "trace_minmax(model, X_train_val)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "Check that the model is indeed sparse without explicit pruning or `l1` regularization." 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "for layer in model.layers:\n", 280 | " if layer._has_kernel:\n", 281 | " k = layer.fused_qkernel.numpy()\n", 282 | " print(f'{layer.name}: {np.mean(k==0):.2%} sparsity')" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "Then, convert the model to a proxy model using the `to_proxy_model` function." 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "proxy = to_proxy_model(model)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "import hls4ml\n", 308 | "import plotting\n", 309 | "\n", 310 | "hls_model = hls4ml.converters.convert_from_keras_model(\n", 311 | " proxy, output_dir='model_3.1/hls4ml_prj', part='xcu250-figd2104-2L-e', backend='Vitis'\n", 312 | ")\n", 313 | "hls_model.compile()\n", 314 | "\n", 315 | "X_test = np.ascontiguousarray(X_test)\n", 316 | "y_keras = model.predict(X_test, batch_size=16384, verbose=0)\n", 317 | "y_proxy = proxy.predict(X_test, batch_size=16384, verbose=0)\n", 318 | "y_hls = hls_model.predict(X_test)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "# Check bit-accuracy\n", 326 | "If you are unlucky, `y_keras` and `y_hls` will not fully match due to numerical overflow (for a few entries). However, `y_keras` and `y_proxy` should match perfectly. (Sometime mismatch could also happen - only due to machine precision limit.\n", 327 | "\n", 328 | "For newer nvidia GPUs, TF32 is enabled by default (fp32 with reduced mantissa bits), which could cause this issue). This will make this issue more prevalent." 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "np.mean(y_keras == y_hls), np.mean(y_proxy == y_hls)" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "# The plotting script assumes 0-1 range for the predictions.\n", 347 | "y_keras_softmax = tf.nn.softmax(y_keras).numpy()\n", 348 | "y_hls_softmax = tf.nn.softmax(y_hls).numpy()" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "%matplotlib inline\n", 358 | "from sklearn.metrics import accuracy_score\n", 359 | "from keras.models import load_model\n", 360 | "\n", 361 | "model_ref = load_model('model_1/KERAS_check_best_model.h5')\n", 362 | "y_ref = model_ref.predict(X_test, batch_size=1024, verbose=0)\n", 363 | "\n", 364 | "print(\"Accuracy baseline: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_ref, axis=1))))\n", 365 | "print(\"Accuracy pruned, quantized: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_keras, axis=1))))\n", 366 | "print(\"Accuracy hls4ml: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))))\n", 367 | "\n", 368 | "fig, ax = plt.subplots(figsize=(9, 9))\n", 369 | "_ = plotting.makeRoc(y_test, y_ref, classes)\n", 370 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 371 | "_ = plotting.makeRoc(y_test, y_keras_softmax, classes, linestyle='--')\n", 372 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 373 | "_ = plotting.makeRoc(y_test, y_hls_softmax, classes, linestyle=':')\n", 374 | "\n", 375 | "from matplotlib.lines import Line2D\n", 376 | "\n", 377 | "lines = [Line2D([0], [0], ls='-'), Line2D([0], [0], ls='--'), Line2D([0], [0], ls=':')]\n", 378 | "from matplotlib.legend import Legend\n", 379 | "\n", 380 | "leg = Legend(ax, lines, labels=['baseline', 'pruned, quantized', 'hls4ml'], loc='lower right', frameon=False)\n", 381 | "ax.add_artist(leg)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "## Synthesize\n", 389 | "Now let's synthesize this quantized, pruned model.\n", 390 | "\n", 391 | "**The synthesis will take a while**\n", 392 | "\n", 393 | "While the C-Synthesis is running, we can monitor the progress looking at the log file by opening a terminal from the notebook home, and executing:\n", 394 | "\n", 395 | "`tail -f model_3.1/hls4ml_prj/vitis_hls.log`" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "hls_model.build(csim=False)" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "## Check the reports\n", 412 | "Print out the reports generated by Vitis HLS. Pay attention to the Utilization Estimates' section in particular this time.\n", 413 | "\n", 414 | "## Notice\n", 415 | "We strip away the softmax layer compare to part 4, which takes 3~5 cycles to compute. The overall latency could be comparable." 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "metadata": {}, 422 | "outputs": [], 423 | "source": [ 424 | "hls4ml.report.read_vivado_report('model_3.1/hls4ml_prj')" 425 | ] 426 | }, 427 | { 428 | "cell_type": "markdown", 429 | "metadata": {}, 430 | "source": [ 431 | "Print the report for the model trained in part 4. You should notice that the resource usage is significantly lower than the model trained in part 4.\n", 432 | "\n", 433 | "**Note you need to have trained and synthesized the model from part 4**" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "metadata": {}, 440 | "outputs": [], 441 | "source": [ 442 | "hls4ml.report.read_vivado_report('model_3/hls4ml_prj')" 443 | ] 444 | }, 445 | { 446 | "cell_type": "markdown", 447 | "metadata": {}, 448 | "source": [ 449 | "## NB\n", 450 | "Note as well that the Vitis HLS `csynth` resource estimates tend to _overestimate_ on chip resource usage. Running the subsequent stages of FPGA compilation reveals the more realistic resource usage, You can run the next step, 'logic synthesis' with `hls_model.build(synth=True, vsynth=True)`, but we skipped it in this tutorial in the interest of time." 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [] 459 | } 460 | ], 461 | "metadata": { 462 | "kernelspec": { 463 | "display_name": "Python 3 (ipykernel)", 464 | "language": "python", 465 | "name": "python3" 466 | }, 467 | "language_info": { 468 | "codemirror_mode": { 469 | "name": "ipython", 470 | "version": 3 471 | }, 472 | "file_extension": ".py", 473 | "mimetype": "text/x-python", 474 | "name": "python", 475 | "nbconvert_exporter": "python", 476 | "pygments_lexer": "ipython3", 477 | "version": "3.10.16" 478 | } 479 | }, 480 | "nbformat": 4, 481 | "nbformat_minor": 4 482 | } 483 | -------------------------------------------------------------------------------- /part4_quantization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Part 4: Quantization" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from tensorflow.keras.utils import to_categorical\n", 17 | "from sklearn.datasets import fetch_openml\n", 18 | "from sklearn.model_selection import train_test_split\n", 19 | "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "\n", 23 | "%matplotlib inline\n", 24 | "seed = 0\n", 25 | "np.random.seed(seed)\n", 26 | "import tensorflow as tf\n", 27 | "\n", 28 | "tf.random.set_seed(seed)\n", 29 | "import os\n", 30 | "\n", 31 | "os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Fetch the jet tagging dataset from Open ML" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "X_train_val = np.load('X_train_val.npy')\n", 48 | "X_test = np.load('X_test.npy')\n", 49 | "y_train_val = np.load('y_train_val.npy')\n", 50 | "y_test = np.load('y_test.npy')\n", 51 | "classes = np.load('classes.npy', allow_pickle=True)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "## Construct a model\n", 59 | "This time we're going to use QKeras layers.\n", 60 | "QKeras is \"Quantized Keras\" for deep heterogeneous quantization of ML models.\n", 61 | "\n", 62 | "https://github.com/google/qkeras\n", 63 | "\n", 64 | "It is maintained by Google and we recently added support for QKeras model to hls4ml." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "from tensorflow.keras.models import Sequential\n", 74 | "from tensorflow.keras.optimizers import Adam\n", 75 | "from tensorflow.keras.regularizers import l1\n", 76 | "from callbacks import all_callbacks\n", 77 | "from tensorflow.keras.layers import Activation\n", 78 | "from qkeras.qlayers import QDense, QActivation\n", 79 | "from qkeras.quantizers import quantized_bits, quantized_relu" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "We're using `QDense` layer instead of `Dense`, and `QActivation` instead of `Activation`. We're also specifying `kernel_quantizer = quantized_bits(6,0,0)`. This will use 6-bits (of which 0 are integer) for the weights. We also use the same quantization for the biases, and `quantized_relu(6)` for 6-bit ReLU activations." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "model = Sequential()\n", 96 | "model.add(\n", 97 | " QDense(\n", 98 | " 64,\n", 99 | " input_shape=(16,),\n", 100 | " name='fc1',\n", 101 | " kernel_quantizer=quantized_bits(6, 0, alpha=1),\n", 102 | " bias_quantizer=quantized_bits(6, 0, alpha=1),\n", 103 | " kernel_initializer='lecun_uniform',\n", 104 | " kernel_regularizer=l1(0.0001),\n", 105 | " )\n", 106 | ")\n", 107 | "model.add(QActivation(activation=quantized_relu(6), name='relu1'))\n", 108 | "model.add(\n", 109 | " QDense(\n", 110 | " 32,\n", 111 | " name='fc2',\n", 112 | " kernel_quantizer=quantized_bits(6, 0, alpha=1),\n", 113 | " bias_quantizer=quantized_bits(6, 0, alpha=1),\n", 114 | " kernel_initializer='lecun_uniform',\n", 115 | " kernel_regularizer=l1(0.0001),\n", 116 | " )\n", 117 | ")\n", 118 | "model.add(QActivation(activation=quantized_relu(6), name='relu2'))\n", 119 | "model.add(\n", 120 | " QDense(\n", 121 | " 32,\n", 122 | " name='fc3',\n", 123 | " kernel_quantizer=quantized_bits(6, 0, alpha=1),\n", 124 | " bias_quantizer=quantized_bits(6, 0, alpha=1),\n", 125 | " kernel_initializer='lecun_uniform',\n", 126 | " kernel_regularizer=l1(0.0001),\n", 127 | " )\n", 128 | ")\n", 129 | "model.add(QActivation(activation=quantized_relu(6), name='relu3'))\n", 130 | "model.add(\n", 131 | " QDense(\n", 132 | " 5,\n", 133 | " name='output',\n", 134 | " kernel_quantizer=quantized_bits(6, 0, alpha=1),\n", 135 | " bias_quantizer=quantized_bits(6, 0, alpha=1),\n", 136 | " kernel_initializer='lecun_uniform',\n", 137 | " kernel_regularizer=l1(0.0001),\n", 138 | " )\n", 139 | ")\n", 140 | "model.add(Activation(activation='softmax', name='softmax'))" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "## Train sparse\n", 148 | "Let's train with model sparsity again, since QKeras layers are prunable." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule\n", 158 | "from tensorflow_model_optimization.sparsity.keras import strip_pruning\n", 159 | "\n", 160 | "pruning_params = {\"pruning_schedule\": pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100)}\n", 161 | "model = prune.prune_low_magnitude(model, **pruning_params)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "## Train the model\n", 169 | "We'll use the same settings as the model for part 1: Adam optimizer with categorical crossentropy loss.\n", 170 | "The callbacks will decay the learning rate and save the model into a directory 'model_2'\n", 171 | "The model isn't very complex, so this should just take a few minutes even on the CPU.\n", 172 | "If you've restarted the notebook kernel after training once, set `train = False` to load the trained model rather than training again." 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "train = True\n", 182 | "if train:\n", 183 | " adam = Adam(lr=0.0001)\n", 184 | " model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])\n", 185 | " callbacks = all_callbacks(\n", 186 | " stop_patience=1000,\n", 187 | " lr_factor=0.5,\n", 188 | " lr_patience=10,\n", 189 | " lr_epsilon=0.000001,\n", 190 | " lr_cooldown=2,\n", 191 | " lr_minimum=0.0000001,\n", 192 | " outputDir='model_3',\n", 193 | " )\n", 194 | " callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())\n", 195 | " model.fit(\n", 196 | " X_train_val,\n", 197 | " y_train_val,\n", 198 | " batch_size=1024,\n", 199 | " epochs=30,\n", 200 | " validation_split=0.25,\n", 201 | " shuffle=True,\n", 202 | " callbacks=callbacks.callbacks,\n", 203 | " )\n", 204 | " # Save the model again but with the pruning 'stripped' to use the regular layer types\n", 205 | " model = strip_pruning(model)\n", 206 | " model.save('model_3/KERAS_check_best_model.h5')\n", 207 | "else:\n", 208 | " from tensorflow.keras.models import load_model\n", 209 | " from qkeras.utils import _add_supported_quantized_objects\n", 210 | "\n", 211 | " co = {}\n", 212 | " _add_supported_quantized_objects(co)\n", 213 | " model = load_model('model_3/KERAS_check_best_model.h5', custom_objects=co)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "## Check performance\n", 221 | "How does this model which was trained using 6-bits, and 75% sparsity model compare against the original model? Let's report the accuracy and make a ROC curve. The quantized, pruned model is shown with solid lines, the unpruned model from part 1 is shown with dashed lines.\n", 222 | "\n", 223 | "\n", 224 | "We should also check that hls4ml can respect the choice to use 6-bits throughout the model, and match the accuracy. We'll generate a configuration from this Quantized model, and plot its performance as the dotted line.\n", 225 | "The generated configuration is printed out. You'll notice that it uses 7 bits for the type, but we specified 6!? That's just because QKeras doesn't count the sign-bit when we specify the number of bits, so the type that actually gets used needs 1 more.\n", 226 | "\n", 227 | "We also use the `OutputRoundingSaturationMode` optimizer pass of `hls4ml` to set the Activation layers to round, rather than truncate, the cast. This is important for getting good model accuracy when using small bit precision activations. And we'll set a different data type for the tables used in the Softmax, just for a bit of extra performance.\n", 228 | "\n", 229 | "\n", 230 | "**Make sure you've trained the model from part 1**" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "import hls4ml\n", 240 | "import plotting\n", 241 | "\n", 242 | "config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend='Vitis')\n", 243 | "config['LayerName']['softmax']['exp_table_t'] = 'ap_fixed<18,8>'\n", 244 | "config['LayerName']['softmax']['inv_table_t'] = 'ap_fixed<18,4>'\n", 245 | "print(\"-----------------------------------\")\n", 246 | "plotting.print_dict(config)\n", 247 | "print(\"-----------------------------------\")\n", 248 | "hls_model = hls4ml.converters.convert_from_keras_model(\n", 249 | " model, hls_config=config, backend='Vitis', output_dir='model_3/hls4ml_prj', part='xcu250-figd2104-2L-e'\n", 250 | ")\n", 251 | "hls_model.compile()\n", 252 | "\n", 253 | "y_qkeras = model.predict(np.ascontiguousarray(X_test))\n", 254 | "y_hls = hls_model.predict(np.ascontiguousarray(X_test))\n", 255 | "np.save('model_3/y_qkeras.npy', y_qkeras)\n", 256 | "np.save('model_3/y_hls.npy', y_hls)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "%matplotlib inline\n", 266 | "from sklearn.metrics import accuracy_score\n", 267 | "from tensorflow.keras.models import load_model\n", 268 | "\n", 269 | "model_ref = load_model('model_1/KERAS_check_best_model.h5')\n", 270 | "y_ref = model_ref.predict(X_test)\n", 271 | "\n", 272 | "print(\"Accuracy baseline: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_ref, axis=1))))\n", 273 | "print(\"Accuracy pruned, quantized: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_qkeras, axis=1))))\n", 274 | "print(\"Accuracy hls4ml: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))))\n", 275 | "\n", 276 | "fig, ax = plt.subplots(figsize=(9, 9))\n", 277 | "_ = plotting.makeRoc(y_test, y_ref, classes)\n", 278 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 279 | "_ = plotting.makeRoc(y_test, y_qkeras, classes, linestyle='--')\n", 280 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 281 | "_ = plotting.makeRoc(y_test, y_hls, classes, linestyle=':')\n", 282 | "\n", 283 | "from matplotlib.lines import Line2D\n", 284 | "\n", 285 | "lines = [Line2D([0], [0], ls='-'), Line2D([0], [0], ls='--'), Line2D([0], [0], ls=':')]\n", 286 | "from matplotlib.legend import Legend\n", 287 | "\n", 288 | "leg = Legend(ax, lines, labels=['baseline', 'pruned, quantized', 'hls4ml'], loc='lower right', frameon=False)\n", 289 | "ax.add_artist(leg)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "## Synthesize\n", 297 | "Now let's synthesize this quantized, pruned model.\n", 298 | "\n", 299 | "**The synthesis will take a while**\n", 300 | "\n", 301 | "While the C-Synthesis is running, we can monitor the progress looking at the log file by opening a terminal from the notebook home, and executing:\n", 302 | "\n", 303 | "`tail -f model_3/hls4ml_prj/vitis_hls.log`" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "hls_model.build(csim=False)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "## Check the reports\n", 320 | "Print out the reports generated by Vitis HLS. Pay attention to the Utilization Estimates' section in particular this time." 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "hls4ml.report.read_vivado_report('model_3/hls4ml_prj')" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "Print the report for the model trained in part 1. Now, compared to the model from part 1, this model has been trained with low-precision quantization, and 75% pruning. You should be able to see that we have saved a lot of resource compared to where we started in part 1. At the same time, referring to the ROC curve above, the model performance is pretty much identical even with this drastic compression!\n", 337 | "\n", 338 | "**Note you need to have trained and synthesized the model from part 1**" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "hls4ml.report.read_vivado_report('model_1/hls4ml_prj')" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "Print the report for the model trained in part 3. Both these models were trained with 75% sparsity, but the new model uses 6-bit precision as well. You can see how Vitis HLS has moved multiplication operations from DSPs into LUTs, reducing the \"critical\" resource usage.\n", 355 | "\n", 356 | "**Note you need to have trained and synthesized the model from part 3**" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "hls4ml.report.read_vivado_report('model_2/hls4ml_prj')" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "## NB\n", 373 | "Note as well that the Vitis HLS resource estimates tend to _overestimate_ LUTs, while generally estimating the DSPs correctly. Running the subsequent stages of FPGA compilation reveals the more realistic resource usage, You can run the next step, 'logic synthesis' with `hls_model.build(synth=True, vsynth=True)`, but we skipped it in this tutorial in the interest of time." 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [] 382 | } 383 | ], 384 | "metadata": { 385 | "kernelspec": { 386 | "display_name": "Python 3 (ipykernel)", 387 | "language": "python", 388 | "name": "python3" 389 | }, 390 | "language_info": { 391 | "codemirror_mode": { 392 | "name": "ipython", 393 | "version": 3 394 | }, 395 | "file_extension": ".py", 396 | "mimetype": "text/x-python", 397 | "name": "python", 398 | "nbconvert_exporter": "python", 399 | "pygments_lexer": "ipython3", 400 | "version": "3.10.16" 401 | } 402 | }, 403 | "nbformat": 4, 404 | "nbformat_minor": 4 405 | } 406 | -------------------------------------------------------------------------------- /part5_bdt.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\"conifer\"\n", 8 | "\n", 9 | "In this notebook we will take the first steps with training a BDT with `xgboost`, then translating it to HLS code for FPGA with `conifer`\n", 10 | "\n", 11 | "Key concepts:\n", 12 | "- model training\n", 13 | "- model evaluation\n", 14 | "- `conifer` configuration and conversion\n", 15 | "- model emulation\n", 16 | "- model synthesis\n", 17 | "- accelerator creation\n", 18 | "\n", 19 | "For some use cases, the Forest Processing Unit might be an easier entry point as no FPGA synthesis is required for supported boards. Read more about the FPU here: https://ssummers.web.cern.ch/conifer/fpu.html" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import xgboost as xgb\n", 29 | "import matplotlib.pyplot as plt\n", 30 | "import plotting\n", 31 | "import numpy as np\n", 32 | "from scipy.special import softmax\n", 33 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", 34 | "import conifer\n", 35 | "import json\n", 36 | "import os\n", 37 | "import sys\n", 38 | "\n", 39 | "os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']\n", 40 | "\n", 41 | "# enable more output from conifer\n", 42 | "import logging\n", 43 | "\n", 44 | "logging.basicConfig(stream=sys.stdout, level=logging.WARNING)\n", 45 | "logger = logging.getLogger('conifer')\n", 46 | "logger.setLevel('DEBUG')\n", 47 | "\n", 48 | "# create a random seed at we use to make the results repeatable\n", 49 | "seed = int('hls4ml-tutorial'.encode('utf-8').hex(), 16) % 2**31\n", 50 | "\n", 51 | "print(f'Using conifer version {conifer.__version__}')" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "# Load dataset\n", 59 | "\n", 60 | "Load the jet tagging dataset.\n", 61 | "\n", 62 | "**Note**: you need to run part1 first." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "X_train_val = np.load('X_train_val.npy')\n", 72 | "X_test = np.load('X_test.npy')\n", 73 | "y_train_val_one_hot = np.load('y_train_val.npy')\n", 74 | "y_test_one_hot = np.load('y_test.npy')\n", 75 | "classes = np.load('classes.npy', allow_pickle=True)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "We need to transform the test labels from the one-hot encoded values to labels" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "le = LabelEncoder().fit(classes)\n", 92 | "ohe = OneHotEncoder().fit(le.transform(classes).reshape(-1, 1))\n", 93 | "y_train_val = ohe.inverse_transform(y_train_val_one_hot.astype(int))\n", 94 | "y_test = ohe.inverse_transform(y_test_one_hot)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "# Train a BDT\n", 102 | "We'll use `xgboost`'s `XGBClassifier` with:\n", 103 | "\n", 104 | "| Parameter | Explanation |\n", 105 | "| --- | --- |\n", 106 | "| `n_estimators=25` | 25 trees |\n", 107 | "| `max_depth=5` | maximum tree depth of 5 |" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "clf = xgb.XGBClassifier(n_estimators=25, max_depth=5, learning_rate=1.0, random_state=seed).fit(X_train_val, y_train_val)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "# Validate performance\n", 124 | "Now we check whether the trained model is any good. We'll plot the ROC curve." 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "from sklearn.metrics import accuracy_score\n", 134 | "from tensorflow.keras.models import load_model\n", 135 | "\n", 136 | "# load the KERAS model from part 1\n", 137 | "model_ref = load_model('model_1/KERAS_check_best_model.h5')\n", 138 | "y_ref = model_ref.predict(X_test)\n", 139 | "\n", 140 | "# compute predictions of the xgboost model\n", 141 | "y_xgb = clf.predict_proba(X_test)\n", 142 | "print(f'Accuracy baseline: {accuracy_score(np.argmax(y_test_one_hot, axis=1), np.argmax(y_ref, axis=1)):.5f}')\n", 143 | "print(f'Accuracy xgboost: {accuracy_score(np.argmax(y_test_one_hot, axis=1), np.argmax(y_xgb, axis=1)):.5f}')\n", 144 | "\n", 145 | "fig, ax = plt.subplots(figsize=(9, 9))\n", 146 | "_ = plotting.makeRoc(y_test_one_hot, y_ref, classes, linestyle='--')\n", 147 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 148 | "_ = plotting.makeRoc(y_test_one_hot, y_xgb, classes, linestyle='-')\n", 149 | "\n", 150 | "# add a legend\n", 151 | "from matplotlib.lines import Line2D\n", 152 | "\n", 153 | "lines = [\n", 154 | " Line2D([0], [0], ls='--'),\n", 155 | " Line2D([0], [0], ls='-'),\n", 156 | "]\n", 157 | "from matplotlib.legend import Legend\n", 158 | "\n", 159 | "leg = Legend(ax, lines, labels=['part1 Keras', 'xgboost'], loc='lower right', frameon=False)\n", 160 | "ax.add_artist(leg)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "\"conifer\"\n", 168 | "\n", 169 | "Now we'll convert this model to FPGA firmware with `conifer`. We first need to create a configuration in the form of a dictionary. The quickest way to get started is to create a default configuration from the intended target backend (`xilinxhls` for us). Each backend may have different configuration options, so getting the configuration this way helps enumerate the possible options.\n", 170 | "\n", 171 | "We will print the configuration, modify it, and print it again. The modifications are:\n", 172 | "- set the `OutputDirectory` to something descriptive\n", 173 | "- set the `XilinxPart` to the part number of the FPGA on the Alveo U50" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "cfg = conifer.backends.xilinxhls.auto_config()\n", 183 | "\n", 184 | "# print the config\n", 185 | "print('Default Configuration\\n' + '-' * 50)\n", 186 | "plotting.print_dict(cfg)\n", 187 | "print('-' * 50)\n", 188 | "\n", 189 | "# modify the config\n", 190 | "cfg['OutputDir'] = 'model_5/'\n", 191 | "cfg['XilinxPart'] = 'xcu250-figd2104-2L-e'\n", 192 | "\n", 193 | "# print the config again\n", 194 | "print('Modified Configuration\\n' + '-' * 50)\n", 195 | "plotting.print_dict(cfg)\n", 196 | "print('-' * 50)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "## Convert and write\n", 204 | "Convert the `xgboost` model to a `conifer` one, and print the `help` to see what methods it implements.\n", 205 | "Then `write` the model, creating the specified output directory and writing all the HLS files to it. We also save the `xgboost` model itself.\n", 206 | "\n", 207 | "#### Other converters:\n", 208 | "`conifer` has converters for several popular BDT training libraries. Each one is used like: `conifer.converters.convert_from_(model, config)`\n", 209 | "The converters are:\n", 210 | "- `sklearn`\n", 211 | "- `xgboost`\n", 212 | "- `ydf`\n", 213 | "- `tmva`\n", 214 | "- `onnx` (exposing `catboost` and `lightGBM`)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "# convert the model to the conifer representation\n", 224 | "conifer_model = conifer.converters.convert_from_xgboost(clf, cfg)\n", 225 | "# print the help to see the API on the conifer_model\n", 226 | "help(conifer_model)\n", 227 | "# write the project (writing HLS project to disk)\n", 228 | "conifer_model.write()\n", 229 | "# save the conifer model - we can load this again later\n", 230 | "clf.save_model('model_5/xgboost_model.json')" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "## Explore\n", 238 | "Browse the files in the newly created project directory to take a look at the HLS code.\n", 239 | "\n", 240 | "The output of `!tree model_5` is:\n", 241 | "\n", 242 | "```\n", 243 | "model_5/\n", 244 | "├── bridge.cpp\n", 245 | "├── build_hls.tcl\n", 246 | "├── firmware\n", 247 | "│ ├── BDT.cpp\n", 248 | "│ ├── BDT.h\n", 249 | "│ ├── my_prj.cpp\n", 250 | "│ ├── my_prj.h\n", 251 | "│ └── parameters.h\n", 252 | "├── hls_parameters.tcl\n", 253 | "├── my_prj.json\n", 254 | "├── my_prj_test.cpp\n", 255 | "├── tb_data\n", 256 | "└── vivado_synth.tcl\n", 257 | "```\n", 258 | "\n", 259 | "- files under `firmware` are the HLS implementation of the model\n", 260 | "- `my_prj.json` is the saved converted `conifer` model that can be loaded again without the original `xgboost` model\n", 261 | "- `tcl` scripts are used for synthesizing the project" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "## Emulate\n", 269 | "Before starting the lengthy FPGA build process, we should validate that our conversion was successful and that the choice of precision was suitable with a bit-accurate emulation. To do this we need to run the HLS C++ code on the CPU with some test data first. This is like the HLS C Simulation step, but rather than writing a C++ testbench and invoking `vitis_hls` to run `csim`, `conifer` implements Python bindings for the HLS, just like `hls4ml`.\n", 270 | "\n", 271 | "We first need to compile (which uses the C++ compiler), then we can make predictions" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "conifer_model.compile()" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "y_hls = conifer_model.decision_function(X_test)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "## Compare\n", 297 | "\n", 298 | "Now we check whether the emulated predictions are good. To do this we'll plot the ROC curve again with the HLS predictions overlaid." 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "y_hls_proba = softmax(y_hls) # compute class probabilities from the raw predictions\n", 308 | "\n", 309 | "print(f'Accuracy baseline: {accuracy_score(np.argmax(y_test_one_hot, axis=1), np.argmax(y_ref, axis=1)):.5f}')\n", 310 | "print(f'Accuracy xgboost: {accuracy_score(np.argmax(y_test_one_hot, axis=1), np.argmax(y_xgb, axis=1)):.5f}')\n", 311 | "print(f'Accuracy conifer: {accuracy_score(np.argmax(y_test_one_hot, axis=1), np.argmax(y_hls_proba, axis=1)):.5f}')\n", 312 | "\n", 313 | "\n", 314 | "fig, ax = plt.subplots(figsize=(9, 9))\n", 315 | "_ = plotting.makeRoc(y_test_one_hot, y_ref, classes, linestyle='--')\n", 316 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 317 | "_ = plotting.makeRoc(y_test_one_hot, y_xgb, classes, linestyle=':')\n", 318 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 319 | "_ = plotting.makeRoc(y_test_one_hot, y_hls_proba, classes, linestyle='-')\n", 320 | "\n", 321 | "# add a legend\n", 322 | "from matplotlib.lines import Line2D\n", 323 | "\n", 324 | "lines = [\n", 325 | " Line2D([0], [0], ls='--'),\n", 326 | " Line2D([0], [0], ls=':'),\n", 327 | " Line2D([0], [0], ls='-'),\n", 328 | "]\n", 329 | "from matplotlib.legend import Legend\n", 330 | "\n", 331 | "leg = Legend(ax, lines, labels=['part1 Keras', 'xgboost', 'conifer'], loc='lower right', frameon=False)\n", 332 | "ax.add_artist(leg)" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "## Build\n", 340 | "Now we'll run the Vitis HLS and Vivado synthesis. HLS C Synthesis compiles our C++ to RTL, performing scheduling and resource mapping. Vivado synthesis synthesizes the RTL from the previous step into a netlist, and produces a more realistic resource estimation. The latency can't change during Vivado synthesis, it's fixed in the RTL description.\n", 341 | "\n", 342 | "After the build completes we can also browse the new log files and reports that are generated.\n", 343 | "\n", 344 | "**Warning**: this step might take around 10 minutes" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": null, 350 | "metadata": { 351 | "scrolled": true 352 | }, 353 | "outputs": [], 354 | "source": [ 355 | "conifer_model.build(synth=True, vsynth=True)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "## Report\n", 363 | "If the synthesis completed successfuly, we can extract the key metrics from the reports and print them out.\n", 364 | "The section `\"vsynth\"` contains the report from the Vivado RTL synthesis, which is usually lower, and more realistic than the HLS report." 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "report = conifer_model.read_report()\n", 374 | "plotting.print_dict(report)" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "## Deployment with `pynq`\n", 382 | "\n", 383 | "There are two main ways to deploy a BDT to an accelerator card with `conifer`:\n", 384 | "- build a static accelerator with Xilinx HLS backend\n", 385 | "- use the dynamic accelerator Forest Processing Unit (FPU)\n", 386 | "\n", 387 | "Getting started with the FPU is straightforward. For a supported board, you will need only the converted model JSON, and a bitfile that can be downloaded from the conifer website. Read more about the FPU here: https://ssummers.web.cern.ch/conifer/fpu.html\n", 388 | "\n", 389 | "However, without a physical device there's not much to show, so in this section we'll see how to deploy the model that we already trained as a static accelerator to a `pynq-z2` board.\n", 390 | "We'll use the `AcceleratorConfig` part of the configuration that we previously left undefined." 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "pynq_model_cfg = conifer.backends.xilinxhls.auto_config()\n", 400 | "pynq_model_cfg['OutputDir'] = 'model_5_pynq' # choose a new project directory\n", 401 | "pynq_model_cfg['ProjectName'] = 'conifer_jettag'\n", 402 | "pynq_model_cfg['AcceleratorConfig'] = {\n", 403 | " 'Board': 'pynq-z2', # choose a pynq-z2 board\n", 404 | " 'InterfaceType': 'float', # floating point for the data I/O (this is default)\n", 405 | "}\n", 406 | "\n", 407 | "# print the config\n", 408 | "print('Modified Configuration\\n' + '-' * 50)\n", 409 | "print(json.dumps(pynq_model_cfg, indent=2))\n", 410 | "print('-' * 50)" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "## Supported boards\n", 418 | "\n", 419 | "Here we print the list of supported boards, so you can see what else works \"out of the box\". It's relatively easy to add other Zynq SoC or Alveo boards, for example to add an Alveo U50 card targeting `xilinx_u50_gen3x16_xdma_5_202210_1` platform:\n", 420 | "\n", 421 | "```\n", 422 | "u50 = conifer.backends.boards.AlveoConfig.default_config()\n", 423 | "u50['xilinx_part'] = 'xcu50-fsvh2104-2-e'\n", 424 | "u50['platform'] = 'xilinx_u50_gen3x16_xdma_5_202210_1'\n", 425 | "u50['name'] = 'xilinx_u50_gen3x16_xdma_5_202210_1'\n", 426 | "u50 = conifer.backends.boards.AlveoConfig(u50)\n", 427 | "conifer.backends.boards.register_board_config(u50.name, u50)\n", 428 | "```" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "# This is the full list of supported boards:\n", 438 | "print(f'Supported boards: {conifer.backends.boards.get_available_boards()}')" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "### Load the model\n", 446 | "\n", 447 | "We load the JSON for the conifer model we previously used, applying the new configuration just defined. We'll see that the FPGA part specified by the board overrides the `XilinxPart` specified in the default." 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "pynq_model = conifer.model.load_model('model_5/my_prj.json', new_config=pynq_model_cfg)\n", 457 | "pynq_model.write()" 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "## Build the model\n", 465 | "\n", 466 | "Now we run `build` again, running HLS Synthesis, Logic Synthesis and Place & Route, finally producing a bitfile and an archive of files that we'll need to run inference on the pynq-z2 board. \n", 467 | "\n", 468 | "**Warning**: this step might take around 20 minutes to complete.\n", 469 | "\n", 470 | "The floorplan of the bitfile should like something like this, where the individual tree modules are highlighted in different colours:\n", 471 | "\n", 472 | "" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "pynq_model.build(synth=True, bitfile=True, package=True)" 482 | ] 483 | }, 484 | { 485 | "cell_type": "markdown", 486 | "metadata": {}, 487 | "source": [ 488 | "## Inference on pynq-z2\n", 489 | "\n", 490 | "Running inference on the `pynq-z2` would look like this:\n", 491 | "- download the `model_5/conifer_jettag.zip` archive from this notebook\n", 492 | "- upload `conifer_jettag.zip` to the pynq-z2 device and unzip it\n", 493 | "- start a jupyter notebook on the `pynq-z2` and run the following code:\n", 494 | "\n", 495 | "```\n", 496 | "import conifer\n", 497 | "accelerator = conifer.backends.xilinxhls.runtime.ZynqDriver('conifer_jettag.bit', batch_size=1)\n", 498 | "X = ... # load some data \n", 499 | "y_pynq = accelerator.decision_function(X)\n", 500 | "```\n" 501 | ] 502 | } 503 | ], 504 | "metadata": { 505 | "kernelspec": { 506 | "display_name": "Python 3 (ipykernel)", 507 | "language": "python", 508 | "name": "python3" 509 | }, 510 | "language_info": { 511 | "codemirror_mode": { 512 | "name": "ipython", 513 | "version": 3 514 | }, 515 | "file_extension": ".py", 516 | "mimetype": "text/x-python", 517 | "name": "python", 518 | "nbconvert_exporter": "python", 519 | "pygments_lexer": "ipython3", 520 | "version": "3.10.10" 521 | } 522 | }, 523 | "nbformat": 4, 524 | "nbformat_minor": 4 525 | } 526 | -------------------------------------------------------------------------------- /part7b_deployment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "033cc4d9", 6 | "metadata": {}, 7 | "source": [ 8 | "# Part 7b: Deployment on PYNQ-Z2\n", 9 | "The following section is the code to execute in the pynq-z2 jupyter notebook to execute NN inference. \n", 10 | "\n", 11 | "The following cells are intended to run on a pynq-z2, they will not run on the server used to train and synthesize models!\n", 12 | "\n", 13 | "First, import our driver `Overlay` class. We'll also load the test data." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "id": "89c67e4f", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "from axi_stream_driver import NeuralNetworkOverlay\n", 24 | "import numpy as np\n", 25 | "\n", 26 | "X_test = np.load('X_test.npy')\n", 27 | "y_test = np.load('y_test.npy')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "id": "551c5cd6", 33 | "metadata": {}, 34 | "source": [ 35 | "Create a `NeuralNetworkOverlay` object. This will download the `Overlay` (bitfile) onto the PL of the pynq-z2. We provide the `X_test.shape` and `y_test.shape` to allocate some buffers for the data transfer." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "id": "cfb786f3", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "nn = NeuralNetworkOverlay('hls4ml_nn.bit', X_test.shape, y_test.shape)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "id": "5fde9b2d", 51 | "metadata": {}, 52 | "source": [ 53 | "Now run the prediction! When we set `profile=True` the function times the inference, and prints out a summary as well as returning the profiling information. We also save the output to a file so we can do some validation." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "id": "1fd6dee7", 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "y_hw, latency, throughput = nn.predict(X_test, profile=True)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "id": "1983e7d7", 69 | "metadata": {}, 70 | "source": [ 71 | "An example print out looks like:\n", 72 | "\n", 73 | "```\n", 74 | "Classified 166000 samples in 0.402568 seconds (412352.6956936468 inferences / s)\n", 75 | "```\n", 76 | "\n", 77 | "Now let's save the output and transfer this back to the host." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "id": "981ffced", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "np.save('y_hw.npy', y_hw)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "id": "b9e92d1e", 93 | "metadata": {}, 94 | "source": [ 95 | "Now, go back to the host and follow `part7c_validation.ipynb`" 96 | ] 97 | } 98 | ], 99 | "metadata": { 100 | "kernelspec": { 101 | "display_name": "Python 3 (ipykernel)", 102 | "language": "python", 103 | "name": "python3" 104 | }, 105 | "language_info": { 106 | "codemirror_mode": { 107 | "name": "ipython", 108 | "version": 3 109 | }, 110 | "file_extension": ".py", 111 | "mimetype": "text/x-python", 112 | "name": "python", 113 | "nbconvert_exporter": "python", 114 | "pygments_lexer": "ipython3", 115 | "version": "3.10.16" 116 | } 117 | }, 118 | "nbformat": 4, 119 | "nbformat_minor": 5 120 | } 121 | -------------------------------------------------------------------------------- /part7c_validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "005ae126", 6 | "metadata": {}, 7 | "source": [ 8 | "# Part 7c: Validation\n", 9 | "We executed NN inference on the pynq-z2! Now we can copy the `y_hw.npy` back to the host we've been using for the training and synthesis, and make a final plot to check that the output we took on the board is as expected.\n", 10 | "\n", 11 | "The command to copy it back is\n", 12 | "\n", 13 | "```bash\n", 14 | "scp xilinx@192.168.2.99:~/jupyter_notebooks/y_hw.npy model_3/\n", 15 | "```" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "id": "fee790be", 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import matplotlib.pyplot as plt\n", 26 | "import numpy as np\n", 27 | "import plotting\n", 28 | "\n", 29 | "%matplotlib inline\n", 30 | "from sklearn.metrics import accuracy_score\n", 31 | "\n", 32 | "y_hw = np.load('model_3/y_hw.npy')\n", 33 | "y_test = np.load('y_test.npy')\n", 34 | "classes = np.load('classes.npy', allow_pickle=True)\n", 35 | "y_hls = np.load('model_3/y_hls.npy')\n", 36 | "y_qkeras = np.load('model_3/y_qkeras.npy')\n", 37 | "\n", 38 | "print(\"Accuracy QKeras, CPU: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_qkeras, axis=1))))\n", 39 | "print(\"Accuracy hls4ml, CPU: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))))\n", 40 | "print(\"Accuracy hls4ml, pynq-z2: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hw, axis=1))))\n", 41 | "\n", 42 | "fig, ax = plt.subplots(figsize=(9, 9))\n", 43 | "_ = plotting.makeRoc(y_test, y_qkeras, classes, linestyle='-')\n", 44 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 45 | "_ = plotting.makeRoc(y_test, y_hls, classes, linestyle='--')\n", 46 | "plt.gca().set_prop_cycle(None) # reset the colors\n", 47 | "_ = plotting.makeRoc(y_test, y_hw, classes, linestyle='-.')\n", 48 | "\n", 49 | "from matplotlib.lines import Line2D\n", 50 | "\n", 51 | "lines = [Line2D([0], [0], ls='-'), Line2D([0], [0], ls='--'), Line2D([0], [0], ls='-.')]\n", 52 | "from matplotlib.legend import Legend\n", 53 | "\n", 54 | "leg = Legend(ax, lines, labels=['QKeras, CPU', 'hls4ml, CPU', 'hls4ml, pynq-z2'], loc='lower right', frameon=False)\n", 55 | "ax.add_artist(leg)" 56 | ] 57 | } 58 | ], 59 | "metadata": { 60 | "kernelspec": { 61 | "display_name": "Python 3 (ipykernel)", 62 | "language": "python", 63 | "name": "python3" 64 | }, 65 | "language_info": { 66 | "codemirror_mode": { 67 | "name": "ipython", 68 | "version": 3 69 | }, 70 | "file_extension": ".py", 71 | "mimetype": "text/x-python", 72 | "name": "python", 73 | "nbconvert_exporter": "python", 74 | "pygments_lexer": "ipython3", 75 | "version": "3.10.16" 76 | } 77 | }, 78 | "nbformat": 4, 79 | "nbformat_minor": 5 80 | } 81 | -------------------------------------------------------------------------------- /part8_symbolic_regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "79933ff7", 6 | "metadata": {}, 7 | "source": [ 8 | "# Part 8: Symbolic Regression" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "ede2226f", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import sympy\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import hls4ml\n", 22 | "from sklearn.model_selection import train_test_split\n", 23 | "from sklearn.preprocessing import StandardScaler, LabelEncoder\n", 24 | "from sklearn.metrics import roc_curve, auc, accuracy_score\n", 25 | "from tensorflow.keras.utils import to_categorical\n", 26 | "from sklearn.datasets import fetch_openml" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "id": "d9e2b159", 32 | "metadata": {}, 33 | "source": [ 34 | "## Load the LHC jet tagging dataset" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "ee6d96bd", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "data = fetch_openml('hls4ml_lhc_jets_hlf')\n", 45 | "X, Y = data['data'].to_numpy(), data['target'].to_numpy()\n", 46 | "print(data['feature_names'])\n", 47 | "print(X.shape, Y.shape)\n", 48 | "print(Y[:10])\n", 49 | "\n", 50 | "LE = LabelEncoder()\n", 51 | "Y = LE.fit_transform(Y)\n", 52 | "Y = to_categorical(Y, 5)\n", 53 | "\n", 54 | "Y = 2 * Y - 1\n", 55 | "print(Y[:10])" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "0502aea8", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5, random_state=123)\n", 66 | "\n", 67 | "scaler = StandardScaler().fit(X_train)\n", 68 | "X_train = scaler.transform(X_train)\n", 69 | "X_test = scaler.transform(X_test)\n", 70 | "\n", 71 | "# PySR (or any genetic programming based SR) not happy with too many training data\n", 72 | "X_train = X_train[:8000]\n", 73 | "Y_train = Y_train[:8000]\n", 74 | "\n", 75 | "print('X_train.shape: ' + str(X_train.shape))\n", 76 | "print('Y_train.shape: ' + str(Y_train.shape))\n", 77 | "print('X_test.shape: ' + str(X_test.shape))\n", 78 | "print('Y_test.shape: ' + str(Y_test.shape))" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "id": "7ec86106", 84 | "metadata": {}, 85 | "source": [ 86 | "## Perform SR with PySR (if installed)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "id": "57e7896d", 92 | "metadata": {}, 93 | "source": [ 94 | "If you want to run `PySR` (a genetic programming-based symbolic regression software), please see https://github.com/MilesCranmer/PySR for installation and intructions.\n", 95 | "\n", 96 | "Below is an example configuration script to run training in `PySR`, where one can specify the allowed primitive functions `unary_operators` `binary_operators` (e.g. `+`, `*`, `sin`) and constraints `complexity_of_operators` `constraints` `nested_constraints` in the equation seacrhing. The training results will be stored in a `.pkl` file that contains the final equations selected by the training strategy `model_selection`.\n", 97 | "\n", 98 | "We also provide an already trained PySR model `sr/example.pkl` in the following sections for demonstrating the HLS implementation." 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "id": "96a651dd", 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "from pysr import PySRRegressor\n", 109 | "\n", 110 | "!export JULIA_NUM_THREADS=32\n", 111 | "\n", 112 | "model_pysr = PySRRegressor(\n", 113 | " model_selection='accuracy',\n", 114 | " niterations=40,\n", 115 | " timeout_in_seconds=60 * 60 * 1,\n", 116 | " maxsize=40,\n", 117 | " select_k_features=6,\n", 118 | " binary_operators=['+', '-', '*'],\n", 119 | " unary_operators=['sin', 'sc(x)=sin(x)*cos(x)'],\n", 120 | " complexity_of_operators={'+': 1, '-': 1, '*': 1, 'sin': 1, 'sc': 1},\n", 121 | " constraints={'sin': 20, 'sc': 20},\n", 122 | " nested_constraints={'sin': {'sin': 0, 'sc': 0}, 'sc': {'sin': 0, 'sc': 0}},\n", 123 | " extra_sympy_mappings={'sc': lambda x: sympy.sin(x) * sympy.cos(x)},\n", 124 | " loss='L2MarginLoss()', # (1 - y*y')^2\n", 125 | ")" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "id": "5f4d9501", 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "model_pysr.fit(X_train, Y_train)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "id": "846e710b", 141 | "metadata": {}, 142 | "source": [ 143 | "## Prepare symbolic expressions in strings first" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "id": "c7aaf105", 149 | "metadata": {}, 150 | "source": [ 151 | "We provide a trained model for the HLS demonstration.\n", 152 | "\n", 153 | "**If you have `PySR` installed**, you can directly load the trained expressions from the output file `sr/example.pkl`.\n", 154 | "`PySR` allows custom functions to be defined, such as sc(x):=sin(x)*cos(x) in this example, they need to be re-defined through `extra_sympy_mappings` and a new `sympy` class when retrieving the equations for evaluation." 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "id": "d3d5d2cd", 161 | "metadata": { 162 | "scrolled": true 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "from pysr import PySRRegressor\n", 167 | "\n", 168 | "model_pysr = PySRRegressor.from_file('sr/example.pkl')\n", 169 | "with sympy.evaluate(True):\n", 170 | " for i in range(5):\n", 171 | " print('Tagger {} = '.format(i) + str(model_pysr.sympy()[i]) + '\\n------------------------------------------')\n", 172 | "\n", 173 | "# Re-write custom operator defined from PySR config: sc(x) = sin(x)*cos(x)\n", 174 | "model_pysr.set_params(extra_sympy_mappings={\"sc\": lambda x: sympy.sin(x) * sympy.cos(x)})\n", 175 | "model_pysr.refresh()\n", 176 | "\n", 177 | "\n", 178 | "class sc(sympy.Function):\n", 179 | " pass" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "id": "699d2e05", 185 | "metadata": {}, 186 | "source": [ 187 | "There are two options for evaluating math functions in `hls4ml`, one is using the standard HLS math library (`func`), another one is using approximation with user-defined lookup tables (`func_lut`) for resources saving. We will define the lookup tables (table range and size) for `func_lut` later.\n", 188 | "\n", 189 | "We have the equations in the `sympy` format, now convert them into strings: `expr` for using the standard functions and `expr_lut` for using the approximation with lookup tables. We will re-parse `expr` and `expr_lut` from strings in `sympy` format for the `hls4ml` converter." 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "id": "7219a874", 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "expr = []\n", 200 | "expr_lut = []\n", 201 | "for i in range(5):\n", 202 | " expr.append(str(model_pysr.sympy()[i]))\n", 203 | " expr_lut.append(expr[i].replace(\"sin\", \"sin_lut\").replace(\"cos\", \"cos_lut\"))" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "id": "0abcba26", 209 | "metadata": {}, 210 | "source": [ 211 | "**If you don't have PySR installed**, you can also write your expressions directly in strings and parse in `sympy` format, which can then be fed to `hls4ml` converter. Here again, `expr` for using standard math library, `expr_lut` for using approximation with lookup tables." 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "id": "3356d1e6", 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "# Expressions from 'sr/example.pkl'\n", 222 | "\n", 223 | "# Expressions that will use Vivado math library\n", 224 | "expr = [\n", 225 | " '-0.1630426*(sin(-0.75052315)*cos(-0.75052315) - 0.84283006)*sin(2*x14 - 1.03665108)*cos(2*x14 - 1.03665108) - sin(x14 - (0.9237657 - 0.11933863*x3)*(-x15 + 2*x2 - 0.3817056) + 1.761264957)',\n", 226 | " '(-(0.5822144*sin(0.83811*x14)*cos(0.83811*x14) - 0.5324657)*(sin(0.3923645*x2)*cos(0.3923645*x2) - 0.63548696) + sin(x14 - 0.3923645*x15 + x3 + 0.51168373)*cos(x14 - 0.3923645*x15 + x3 + 0.51168373))*(0.561041303633489*sin(x15) - 0.47277835) - 0.84055585',\n", 227 | " '0.49239117*(sin(x3)*cos(x3) + sin(x15 + 0.76784414*x3)*cos(x15 + 0.76784414*x3))*(sin(-0.13417026)*cos(-0.13417026) + sin(0.5180547)*cos(0.5180547) + sin(x2)*cos(x2)) - sin(x14 + 0.25715914*x15*x3 - x2 - x3 + 0.66443527)',\n", 228 | " '0.41071504*(0.9298677 - sin(0.59376544*x15))*(sin(x14)*cos(x14) + 5.2546763*sin(0.71913457 - x3)*cos(0.71913457 - x3))*(-sin(2*x3)*cos(2*x3) + sin(5.2546763*x14 + x3 + 0.77032656)*cos(5.2546763*x14 + x3 + 0.77032656) + 0.32492808) - 0.863786752431664',\n", 229 | " '(1.0745832 - sin(-x14 - 0.4094719)*cos(-x14 - 0.4094719))*(-0.15737492*x15 - sin(x14 - 4.2594776)*cos(x14 - 4.2594776) + sin(3*x14 - x3*(x14 - 4.1772995) - x3 + 3.087878)*cos(3*x14 - x3*(x14 - 4.1772995) - x3 + 3.087878) - 0.690204005690814)',\n", 230 | "]\n", 231 | "# Expressions that will use look-up table approximated math functions\n", 232 | "expr_lut = []\n", 233 | "for i in range(len(expr)):\n", 234 | " expr_lut.append(expr[i].replace(\"sin\", \"sin_lut\").replace(\"cos\", \"cos_lut\"))" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "id": "788ee608", 240 | "metadata": {}, 241 | "source": [ 242 | "## Then parse the strings to sympy expressions" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "id": "03fc8284", 248 | "metadata": {}, 249 | "source": [ 250 | "Define the lookup tables for approximating math functions. The table range and size can be customized for each function to be approximated, they depend on how much precision can be compromised to save more resources." 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "id": "920e2326", 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "from hls4ml.utils.symbolic_utils import init_pysr_lut_functions\n", 261 | "\n", 262 | "# For functions approximated with look-up table, define the table range and size\n", 263 | "function_definitions = [\n", 264 | " 'sin_lut(x) = math_lut(sin, x, N=256, range_start=-8, range_end=8)',\n", 265 | " 'cos_lut(x) = math_lut(cos, x, N=256, range_start=-8, range_end=8)',\n", 266 | "]\n", 267 | "init_pysr_lut_functions(init_defaults=True, function_definitions=function_definitions)\n", 268 | "\n", 269 | "lut_functions = {\n", 270 | " 'sin_lut': {'math_func': 'sin', 'range_start': -8, 'range_end': 8, 'table_size': 256},\n", 271 | " 'cos_lut': {'math_func': 'cos', 'range_start': -8, 'range_end': 8, 'table_size': 256},\n", 272 | "}" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "id": "8be93891", 278 | "metadata": {}, 279 | "source": [ 280 | "Parse `expr` and `expr_lut` to sympy expressions." 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "id": "96f61066", 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "# Use sympy to parse strings into sympy expressions\n", 291 | "for i in range(len(expr)):\n", 292 | " print('expr =\\n' + expr[i])\n", 293 | " print(\"----------------------------------------\")\n", 294 | " print('expr_LUT =\\n' + expr_lut[i])\n", 295 | " print(\"========================================\")\n", 296 | " expr[i] = sympy.parsing.sympy_parser.parse_expr(expr[i])\n", 297 | " expr_lut[i] = sympy.parsing.sympy_parser.parse_expr(expr_lut[i])" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "id": "f7548c93", 303 | "metadata": {}, 304 | "source": [ 305 | "Use `hls4ml.converters.convert_from_symbolic_expression` to convert sympy expressions and compile." 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "id": "46ff4b5e", 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "# Use hls4ml to convert sympy expressions into HLS model\n", 316 | "hls_model = hls4ml.converters.convert_from_symbolic_expression(\n", 317 | " expr, n_symbols=16, output_dir='my-hls-test', precision='ap_fixed<16,6>', part='xcvu9p-flga2577-2-e'\n", 318 | ")\n", 319 | "hls_model.write()\n", 320 | "hls_model.compile()\n", 321 | "\n", 322 | "hls_model_lut = hls4ml.converters.convert_from_symbolic_expression(\n", 323 | " expr_lut,\n", 324 | " n_symbols=16,\n", 325 | " output_dir='my-hls-test-lut',\n", 326 | " precision='ap_fixed<16,6>',\n", 327 | " part='xcvu9p-flga2577-2-e',\n", 328 | " lut_functions=lut_functions,\n", 329 | ")\n", 330 | "hls_model_lut.write()\n", 331 | "hls_model_lut.compile()" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "id": "08682628", 337 | "metadata": {}, 338 | "source": [ 339 | "## Compare outputs: PySR vs HLS vs HLS(LUT)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "id": "39269441", 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "test_vector = np.random.rand(1, 16) * 4 - 2\n", 350 | "# print(model_pysr.predict(test_vector))\n", 351 | "print(hls_model.predict(test_vector))\n", 352 | "print(hls_model_lut.predict(test_vector))" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "id": "08795fca", 358 | "metadata": {}, 359 | "source": [ 360 | "## Compare performance on the dataset" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "id": "05894f0b", 367 | "metadata": {}, 368 | "outputs": [], 369 | "source": [ 370 | "# Y_pysr = model_pysr.predict(X_test)\n", 371 | "Y_hls = hls_model.predict(X_test)\n", 372 | "Y_hls_lut = hls_model_lut.predict(X_test)\n", 373 | "# auc_pysr=[]\n", 374 | "auc_hls = []\n", 375 | "auc_hls_lut = []\n", 376 | "for x, label in enumerate(LE.classes_):\n", 377 | " # fpr_pysr, tpr_pysr, _ = roc_curve(Y_test[:, x], Y_pysr[:, x])\n", 378 | " fpr_hls, tpr_hls, _ = roc_curve(Y_test[:, x], Y_hls[:, x])\n", 379 | " fpr_hls_lut, tpr_hls_lut, _ = roc_curve(Y_test[:, x], Y_hls_lut[:, x])\n", 380 | " # auc_pysr.append(auc(fpr_pysr, tpr_pysr))\n", 381 | " auc_hls.append(auc(fpr_hls, tpr_hls))\n", 382 | " auc_hls_lut.append(auc(fpr_hls_lut, tpr_hls_lut))\n", 383 | "\n", 384 | "# print('PySR acc = {0:.3f}'.format(accuracy_score(np.argmax(Y_test, axis=1), np.argmax(Y_pysr, axis=1))))\n", 385 | "# print('PySR auc = {0:.3f},{1:.3f},{2:.3f},{3:.3f},{4:.3f}'.format(auc_pysr[0],auc_pysr[1],auc_pysr[2],auc_pysr[3],auc_pysr[4]))\n", 386 | "print('HLS acc = {0:.3f}'.format(accuracy_score(np.argmax(Y_test, axis=1), np.argmax(Y_hls, axis=1))))\n", 387 | "print(\n", 388 | " 'HLS auc = {0:.3f},{1:.3f},{2:.3f},{3:.3f},{4:.3f}'.format(\n", 389 | " auc_hls[0], auc_hls[1], auc_hls[2], auc_hls[3], auc_hls[4]\n", 390 | " )\n", 391 | ")\n", 392 | "print('HLS_LUT acc = {0:.3f}'.format(accuracy_score(np.argmax(Y_test, axis=1), np.argmax(Y_hls_lut, axis=1))))\n", 393 | "print(\n", 394 | " 'HLS_LUT auc = {0:.3f},{1:.3f},{2:.3f},{3:.3f},{4:.3f}'.format(\n", 395 | " auc_hls_lut[0], auc_hls_lut[1], auc_hls_lut[2], auc_hls_lut[3], auc_hls_lut[4]\n", 396 | " )\n", 397 | ")" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "id": "002643a3", 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "def plot_roc(y_test, y_pred, labels, model):\n", 408 | " color = ['blue', 'orange', 'green', 'red', 'purple']\n", 409 | " for x, label in enumerate(labels):\n", 410 | " fpr, tpr, _ = roc_curve(y_test[:, x], y_pred[:, x])\n", 411 | " if model == 'pysr':\n", 412 | " plt.plot(\n", 413 | " tpr,\n", 414 | " fpr,\n", 415 | " label='{0}, PySR, AUC = {1:.1f}'.format(label, auc(fpr, tpr) * 100.0),\n", 416 | " linestyle='solid',\n", 417 | " color=color[x],\n", 418 | " lw=1.5,\n", 419 | " )\n", 420 | " if model == 'hls':\n", 421 | " plt.plot(\n", 422 | " tpr,\n", 423 | " fpr,\n", 424 | " label='{0}, HLS, AUC = {1:.1f}'.format(label, auc(fpr, tpr) * 100.0),\n", 425 | " linestyle='dotted',\n", 426 | " color=color[x],\n", 427 | " lw=1.5,\n", 428 | " )\n", 429 | " if model == 'hls_lut':\n", 430 | " plt.plot(\n", 431 | " tpr,\n", 432 | " fpr,\n", 433 | " label='{0}, HLS LUT, AUC = {1:.1f}'.format(label, auc(fpr, tpr) * 100.0),\n", 434 | " linestyle='None',\n", 435 | " color=color[x],\n", 436 | " lw=1,\n", 437 | " marker='o',\n", 438 | " ms=1,\n", 439 | " )\n", 440 | " plt.semilogy()\n", 441 | " plt.xlabel('True positive rate', size=15, loc='right')\n", 442 | " plt.ylabel('False positive rate', size=15, loc='top')\n", 443 | " plt.tick_params(axis='both', which='major', direction='in', length=6, width=1.2, labelsize=12, right=True, top=True)\n", 444 | " plt.tick_params(axis='both', which='minor', direction='in', length=2, width=1, labelsize=12, right=True, top=True)\n", 445 | " plt.xlim(0, 1)\n", 446 | " plt.ylim(0.001, 1)\n", 447 | " plt.grid(True)\n", 448 | " plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12)\n", 449 | "\n", 450 | "\n", 451 | "plt.figure(figsize=(15, 15))\n", 452 | "axes = plt.subplot(2, 2, 1)\n", 453 | "# plot_roc(Y_test, Y_pysr, LE.classes_, 'pysr')\n", 454 | "plot_roc(Y_test, Y_hls, LE.classes_, 'hls')\n", 455 | "plot_roc(Y_test, Y_hls_lut, LE.classes_, 'hls_lut')" 456 | ] 457 | }, 458 | { 459 | "cell_type": "markdown", 460 | "id": "7beb92ea", 461 | "metadata": {}, 462 | "source": [ 463 | "## Run synthesis from command line" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "id": "e4047f52", 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [ 473 | "!source ${XILINX_VITIS}/settings64.sh\n", 474 | "!vitis_hls -f build_prj.tcl \"reset=1 synth=1 csim=0 cosim=0 validation=0 export=0 vsynth=0\"\n", 475 | "!cat my-hls-test/myproject_prj/solution1/syn/report/myproject_csynth.rpt" 476 | ] 477 | } 478 | ], 479 | "metadata": { 480 | "kernelspec": { 481 | "display_name": "Python 3 (ipykernel)", 482 | "language": "python", 483 | "name": "python3" 484 | }, 485 | "language_info": { 486 | "codemirror_mode": { 487 | "name": "ipython", 488 | "version": 3 489 | }, 490 | "file_extension": ".py", 491 | "mimetype": "text/x-python", 492 | "name": "python", 493 | "nbconvert_exporter": "python", 494 | "pygments_lexer": "ipython3", 495 | "version": "3.10.16" 496 | } 497 | }, 498 | "nbformat": 4, 499 | "nbformat_minor": 5 500 | } 501 | -------------------------------------------------------------------------------- /plotting.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import pandas as pd 6 | from sklearn.metrics import auc, roc_curve 7 | 8 | 9 | # confusion matrix code from Maurizio 10 | # /eos/user/m/mpierini/DeepLearning/ML4FPGA/jupyter/HbbTagger_Conv1D.ipynb 11 | def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues): 12 | """ 13 | This function prints and plots the confusion matrix. 14 | Normalization can be applied by setting `normalize=True`. 15 | """ 16 | if normalize: 17 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 18 | 19 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 20 | # plt.title(title) 21 | cbar = plt.colorbar() 22 | plt.clim(0, 1) 23 | cbar.set_label(title) 24 | tick_marks = np.arange(len(classes)) 25 | plt.xticks(tick_marks, classes, rotation=45) 26 | plt.yticks(tick_marks, classes) 27 | 28 | fmt = '.2f' if normalize else 'd' 29 | thresh = cm.max() / 2.0 30 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 31 | plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") 32 | 33 | # plt.tight_layout() 34 | plt.ylabel('True label') 35 | plt.xlabel('Predicted label') 36 | 37 | 38 | def plotRoc(fpr, tpr, auc, labels, linestyle, legend=True): 39 | for _i, label in enumerate(labels): 40 | plt.plot( 41 | tpr[label], 42 | fpr[label], 43 | label='{} tagger, AUC = {:.1f}%'.format(label.replace('j_', ''), auc[label] * 100.0), 44 | linestyle=linestyle, 45 | ) 46 | plt.semilogy() 47 | plt.xlabel("Signal Efficiency") 48 | plt.ylabel("Background Efficiency") 49 | plt.ylim(0.001, 1) 50 | plt.grid(True) 51 | if legend: 52 | plt.legend(loc='upper left') 53 | plt.figtext(0.25, 0.90, 'hls4ml', fontweight='bold', wrap=True, horizontalalignment='right', fontsize=14) 54 | 55 | 56 | def rocData(y, predict_test, labels): 57 | df = pd.DataFrame() 58 | 59 | fpr = {} 60 | tpr = {} 61 | auc1 = {} 62 | 63 | for i, label in enumerate(labels): 64 | df[label] = y[:, i] 65 | df[label + '_pred'] = predict_test[:, i] 66 | 67 | fpr[label], tpr[label], threshold = roc_curve(df[label], df[label + '_pred']) 68 | 69 | auc1[label] = auc(fpr[label], tpr[label]) 70 | return fpr, tpr, auc1 71 | 72 | 73 | def makeRoc(y, predict_test, labels, linestyle='-', legend=True): 74 | if 'j_index' in labels: 75 | labels.remove('j_index') 76 | 77 | fpr, tpr, auc1 = rocData(y, predict_test, labels) 78 | plotRoc(fpr, tpr, auc1, labels, linestyle, legend=legend) 79 | return predict_test 80 | 81 | 82 | def print_dict(d, indent=0): 83 | for key, value in d.items(): 84 | print(' ' * indent + str(key), end='') 85 | if isinstance(value, dict): 86 | print() 87 | print_dict(value, indent + 1) 88 | else: 89 | print(':' + ' ' * (20 - len(key) - 2 * indent) + str(value)) 90 | -------------------------------------------------------------------------------- /pruned_cnn/vivado_synth.rpt: -------------------------------------------------------------------------------- 1 | Copyright 1986-2020 Xilinx, Inc. All Rights Reserved. 2 | -------------------------------------------------------------------------------------- 3 | | Tool Version : Vivado v.2020.1 (lin64) Build 2902540 Wed May 27 19:54:35 MDT 2020 4 | | Date : Mon Jun 28 13:59:34 2021 5 | | Host : geonosis.cern.ch running 64-bit CentOS Linux release 7.9.2009 (Core) 6 | | Command : report_utilization -file vivado_synth.rpt 7 | | Design : myproject 8 | | Device : xcu250figd2104-2L 9 | | Design State : Synthesized 10 | -------------------------------------------------------------------------------------- 11 | 12 | Utilization Design Information 13 | 14 | Table of Contents 15 | ----------------- 16 | 1. CLB Logic 17 | 1.1 Summary of Registers by Type 18 | 2. BLOCKRAM 19 | 3. ARITHMETIC 20 | 4. I/O 21 | 5. CLOCK 22 | 6. ADVANCED 23 | 7. CONFIGURATION 24 | 8. Primitives 25 | 9. Black Boxes 26 | 10. Instantiated Netlists 27 | 11. SLR Connectivity 28 | 12. SLR Connectivity Matrix 29 | 13. SLR CLB Logic and Dedicated Block Utilization 30 | 14. SLR IO Utilization 31 | 32 | 1. CLB Logic 33 | ------------ 34 | 35 | +----------------------------+--------+-------+-----------+-------+ 36 | | Site Type | Used | Fixed | Available | Util% | 37 | +----------------------------+--------+-------+-----------+-------+ 38 | | CLB LUTs* | 123948 | 0 | 1728000 | 7.17 | 39 | | LUT as Logic | 120268 | 0 | 1728000 | 6.96 | 40 | | LUT as Memory | 3680 | 0 | 791040 | 0.47 | 41 | | LUT as Distributed RAM | 0 | 0 | | | 42 | | LUT as Shift Register | 3680 | 0 | | | 43 | | CLB Registers | 43435 | 0 | 3456000 | 1.26 | 44 | | Register as Flip Flop | 43435 | 0 | 3456000 | 1.26 | 45 | | Register as Latch | 0 | 0 | 3456000 | 0.00 | 46 | | CARRY8 | 13270 | 0 | 216000 | 6.14 | 47 | | F7 Muxes | 256 | 0 | 864000 | 0.03 | 48 | | F8 Muxes | 0 | 0 | 432000 | 0.00 | 49 | | F9 Muxes | 0 | 0 | 216000 | 0.00 | 50 | +----------------------------+--------+-------+-----------+-------+ 51 | * Warning! The Final LUT count, after physical optimizations and full implementation, is typically lower. Run opt_design after synthesis, if not already completed, for a more realistic count. 52 | 53 | 54 | 1.1 Summary of Registers by Type 55 | -------------------------------- 56 | 57 | +-------+--------------+-------------+--------------+ 58 | | Total | Clock Enable | Synchronous | Asynchronous | 59 | +-------+--------------+-------------+--------------+ 60 | | 0 | _ | - | - | 61 | | 0 | _ | - | Set | 62 | | 0 | _ | - | Reset | 63 | | 0 | _ | Set | - | 64 | | 0 | _ | Reset | - | 65 | | 0 | Yes | - | - | 66 | | 0 | Yes | - | Set | 67 | | 0 | Yes | - | Reset | 68 | | 1069 | Yes | Set | - | 69 | | 42366 | Yes | Reset | - | 70 | +-------+--------------+-------------+--------------+ 71 | 72 | 73 | 2. BLOCKRAM 74 | ----------- 75 | 76 | +-------------------+------+-------+-----------+-------+ 77 | | Site Type | Used | Fixed | Available | Util% | 78 | +-------------------+------+-------+-----------+-------+ 79 | | Block RAM Tile | 42 | 0 | 2688 | 1.56 | 80 | | RAMB36/FIFO* | 0 | 0 | 2688 | 0.00 | 81 | | RAMB18 | 84 | 0 | 5376 | 1.56 | 82 | | RAMB18E2 only | 84 | | | | 83 | | URAM | 0 | 0 | 1280 | 0.00 | 84 | +-------------------+------+-------+-----------+-------+ 85 | * Note: Each Block RAM Tile only has one FIFO logic available and therefore can accommodate only one FIFO36E2 or one FIFO18E2. However, if a FIFO18E2 occupies a Block RAM Tile, that tile can still accommodate a RAMB18E2 86 | 87 | 88 | 3. ARITHMETIC 89 | ------------- 90 | 91 | +----------------+------+-------+-----------+-------+ 92 | | Site Type | Used | Fixed | Available | Util% | 93 | +----------------+------+-------+-----------+-------+ 94 | | DSPs | 5386 | 0 | 12288 | 43.83 | 95 | | DSP48E2 only | 5386 | | | | 96 | +----------------+------+-------+-----------+-------+ 97 | 98 | 99 | 4. I/O 100 | ------ 101 | 102 | +------------+------+-------+-----------+-------+ 103 | | Site Type | Used | Fixed | Available | Util% | 104 | +------------+------+-------+-----------+-------+ 105 | | Bonded IOB | 274 | 0 | 676 | 40.53 | 106 | +------------+------+-------+-----------+-------+ 107 | 108 | 109 | 5. CLOCK 110 | -------- 111 | 112 | +----------------------+------+-------+-----------+-------+ 113 | | Site Type | Used | Fixed | Available | Util% | 114 | +----------------------+------+-------+-----------+-------+ 115 | | GLOBAL CLOCK BUFFERs | 1 | 0 | 1344 | 0.07 | 116 | | BUFGCE | 1 | 0 | 384 | 0.26 | 117 | | BUFGCE_DIV | 0 | 0 | 64 | 0.00 | 118 | | BUFG_GT | 0 | 0 | 768 | 0.00 | 119 | | BUFGCTRL* | 0 | 0 | 128 | 0.00 | 120 | | PLL | 0 | 0 | 32 | 0.00 | 121 | | MMCM | 0 | 0 | 16 | 0.00 | 122 | +----------------------+------+-------+-----------+-------+ 123 | * Note: Each used BUFGCTRL counts as two GLOBAL CLOCK BUFFERs. This table does not include global clocking resources, only buffer cell usage. See the Clock Utilization Report (report_clock_utilization) for detailed accounting of global clocking resource availability. 124 | 125 | 126 | 6. ADVANCED 127 | ----------- 128 | 129 | +-----------------+------+-------+-----------+-------+ 130 | | Site Type | Used | Fixed | Available | Util% | 131 | +-----------------+------+-------+-----------+-------+ 132 | | CMACE4 | 0 | 0 | 12 | 0.00 | 133 | | GTYE4_CHANNEL | 0 | 0 | 24 | 0.00 | 134 | | GTYE4_COMMON | 0 | 0 | 6 | 0.00 | 135 | | ILKNE4 | 0 | 0 | 8 | 0.00 | 136 | | OBUFDS_GTE4 | 0 | 0 | 12 | 0.00 | 137 | | OBUFDS_GTE4_ADV | 0 | 0 | 12 | 0.00 | 138 | | PCIE40E4 | 0 | 0 | 4 | 0.00 | 139 | | SYSMONE4 | 0 | 0 | 4 | 0.00 | 140 | +-----------------+------+-------+-----------+-------+ 141 | 142 | 143 | 7. CONFIGURATION 144 | ---------------- 145 | 146 | +-------------+------+-------+-----------+-------+ 147 | | Site Type | Used | Fixed | Available | Util% | 148 | +-------------+------+-------+-----------+-------+ 149 | | BSCANE2 | 0 | 0 | 16 | 0.00 | 150 | | DNA_PORTE2 | 0 | 0 | 4 | 0.00 | 151 | | EFUSE_USR | 0 | 0 | 4 | 0.00 | 152 | | FRAME_ECCE4 | 0 | 0 | 4 | 0.00 | 153 | | ICAPE3 | 0 | 0 | 8 | 0.00 | 154 | | MASTER_JTAG | 0 | 0 | 4 | 0.00 | 155 | | STARTUPE3 | 0 | 0 | 4 | 0.00 | 156 | +-------------+------+-------+-----------+-------+ 157 | 158 | 159 | 8. Primitives 160 | ------------- 161 | 162 | +----------+-------+---------------------+ 163 | | Ref Name | Used | Functional Category | 164 | +----------+-------+---------------------+ 165 | | LUT2 | 52029 | CLB | 166 | | FDRE | 42366 | Register | 167 | | LUT3 | 41635 | CLB | 168 | | LUT4 | 40010 | CLB | 169 | | CARRY8 | 13270 | CLB | 170 | | LUT6 | 12631 | CLB | 171 | | LUT5 | 10697 | CLB | 172 | | DSP48E2 | 5386 | Arithmetic | 173 | | LUT1 | 4899 | CLB | 174 | | SRL16E | 2816 | CLB | 175 | | FDSE | 1069 | Register | 176 | | SRLC32E | 864 | CLB | 177 | | MUXF7 | 256 | CLB | 178 | | OBUF | 210 | I/O | 179 | | RAMB18E2 | 84 | Block Ram | 180 | | INBUF | 64 | I/O | 181 | | IBUFCTRL | 64 | Others | 182 | | BUFGCE | 1 | Clock | 183 | +----------+-------+---------------------+ 184 | 185 | 186 | 9. Black Boxes 187 | -------------- 188 | 189 | +----------+------+ 190 | | Ref Name | Used | 191 | +----------+------+ 192 | 193 | 194 | 10. Instantiated Netlists 195 | ------------------------- 196 | 197 | +----------+------+ 198 | | Ref Name | Used | 199 | +----------+------+ 200 | 201 | 202 | 11. SLR Connectivity 203 | -------------------- 204 | 205 | +----------------------------------+------+-------+-----------+-------+ 206 | | | Used | Fixed | Available | Util% | 207 | +----------------------------------+------+-------+-----------+-------+ 208 | | SLR3 <-> SLR2 | 0 | | 23040 | 0.00 | 209 | | SLR2 -> SLR3 | 0 | | | 0.00 | 210 | | Using TX_REG only | 0 | 0 | | | 211 | | Using RX_REG only | 0 | 0 | | | 212 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 213 | | SLR3 -> SLR2 | 0 | | | 0.00 | 214 | | Using TX_REG only | 0 | 0 | | | 215 | | Using RX_REG only | 0 | 0 | | | 216 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 217 | | SLR2 <-> SLR1 | 0 | | 23040 | 0.00 | 218 | | SLR1 -> SLR2 | 0 | | | 0.00 | 219 | | Using TX_REG only | 0 | 0 | | | 220 | | Using RX_REG only | 0 | 0 | | | 221 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 222 | | SLR2 -> SLR1 | 0 | | | 0.00 | 223 | | Using TX_REG only | 0 | 0 | | | 224 | | Using RX_REG only | 0 | 0 | | | 225 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 226 | | SLR1 <-> SLR0 | 0 | | 23040 | 0.00 | 227 | | SLR0 -> SLR1 | 0 | | | 0.00 | 228 | | Using TX_REG only | 0 | 0 | | | 229 | | Using RX_REG only | 0 | 0 | | | 230 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 231 | | SLR1 -> SLR0 | 0 | | | 0.00 | 232 | | Using TX_REG only | 0 | 0 | | | 233 | | Using RX_REG only | 0 | 0 | | | 234 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 235 | +----------------------------------+------+-------+-----------+-------+ 236 | | Total SLLs Used | 0 | | | | 237 | +----------------------------------+------+-------+-----------+-------+ 238 | 239 | 240 | 12. SLR Connectivity Matrix 241 | --------------------------- 242 | 243 | +-----------+------+------+------+------+ 244 | | FROM \ TO | SLR3 | SLR2 | SLR1 | SLR0 | 245 | +-----------+------+------+------+------+ 246 | | SLR3 | 0 | 0 | 0 | 0 | 247 | | SLR2 | 0 | 0 | 0 | 0 | 248 | | SLR1 | 0 | 0 | 0 | 0 | 249 | | SLR0 | 0 | 0 | 0 | 0 | 250 | +-----------+------+------+------+------+ 251 | 252 | 253 | 13. SLR CLB Logic and Dedicated Block Utilization 254 | ------------------------------------------------- 255 | 256 | +----------------------------+------+------+------+------+--------+--------+--------+--------+ 257 | | Site Type | SLR0 | SLR1 | SLR2 | SLR3 | SLR0 % | SLR1 % | SLR2 % | SLR3 % | 258 | +----------------------------+------+------+------+------+--------+--------+--------+--------+ 259 | | CLB | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 260 | | CLBL | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 261 | | CLBM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 262 | | CLB LUTs | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 263 | | LUT as Logic | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 264 | | LUT as Memory | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 265 | | LUT as Distributed RAM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 266 | | LUT as Shift Register | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 267 | | CLB Registers | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 268 | | CARRY8 | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 269 | | F7 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 270 | | F8 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 271 | | F9 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 272 | | Block RAM Tile | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 273 | | RAMB36/FIFO | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 274 | | RAMB18 | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 275 | | URAM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 276 | | DSPs | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 277 | | PLL | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 278 | | MMCM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 279 | | Unique Control Sets | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 280 | +----------------------------+------+------+------+------+--------+--------+--------+--------+ 281 | * Note: Available Control Sets based on CLB Registers / 8 282 | 283 | 284 | 14. SLR IO Utilization 285 | ---------------------- 286 | 287 | +-----------+-----------+---------+------------+----------+------------+----------+-----+ 288 | | SLR Index | Used IOBs | (%)IOBs | Used IPADs | (%)IPADs | Used OPADs | (%)OPADs | GTs | 289 | +-----------+-----------+---------+------------+----------+------------+----------+-----+ 290 | | SLR3 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 291 | | SLR2 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 292 | | SLR1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 293 | | SLR0 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 294 | +-----------+-----------+---------+------------+----------+------------+----------+-----+ 295 | | Total | 0 | | 0 | | 0 | | 0 | 296 | +-----------+-----------+---------+------------+----------+------------+----------+-----+ 297 | 298 | 299 | -------------------------------------------------------------------------------- /quantized_pruned_cnn/vivado_synth.rpt: -------------------------------------------------------------------------------- 1 | Copyright 1986-2020 Xilinx, Inc. All Rights Reserved. 2 | -------------------------------------------------------------------------------------- 3 | | Tool Version : Vivado v.2020.1 (lin64) Build 2902540 Wed May 27 19:54:35 MDT 2020 4 | | Date : Mon Jun 28 13:06:58 2021 5 | | Host : geonosis.cern.ch running 64-bit CentOS Linux release 7.9.2009 (Core) 6 | | Command : report_utilization -file vivado_synth.rpt 7 | | Design : myproject 8 | | Device : xcu250figd2104-2L 9 | | Design State : Synthesized 10 | -------------------------------------------------------------------------------------- 11 | 12 | Utilization Design Information 13 | 14 | Table of Contents 15 | ----------------- 16 | 1. CLB Logic 17 | 1.1 Summary of Registers by Type 18 | 2. BLOCKRAM 19 | 3. ARITHMETIC 20 | 4. I/O 21 | 5. CLOCK 22 | 6. ADVANCED 23 | 7. CONFIGURATION 24 | 8. Primitives 25 | 9. Black Boxes 26 | 10. Instantiated Netlists 27 | 11. SLR Connectivity 28 | 12. SLR Connectivity Matrix 29 | 13. SLR CLB Logic and Dedicated Block Utilization 30 | 14. SLR IO Utilization 31 | 32 | 1. CLB Logic 33 | ------------ 34 | 35 | +----------------------------+--------+-------+-----------+-------+ 36 | | Site Type | Used | Fixed | Available | Util% | 37 | +----------------------------+--------+-------+-----------+-------+ 38 | | CLB LUTs* | 118931 | 0 | 1728000 | 6.88 | 39 | | LUT as Logic | 115875 | 0 | 1728000 | 6.71 | 40 | | LUT as Memory | 3056 | 0 | 791040 | 0.39 | 41 | | LUT as Distributed RAM | 0 | 0 | | | 42 | | LUT as Shift Register | 3056 | 0 | | | 43 | | CLB Registers | 30702 | 0 | 3456000 | 0.89 | 44 | | Register as Flip Flop | 30702 | 0 | 3456000 | 0.89 | 45 | | Register as Latch | 0 | 0 | 3456000 | 0.00 | 46 | | CARRY8 | 14273 | 0 | 216000 | 6.61 | 47 | | F7 Muxes | 578 | 0 | 864000 | 0.07 | 48 | | F8 Muxes | 80 | 0 | 432000 | 0.02 | 49 | | F9 Muxes | 0 | 0 | 216000 | 0.00 | 50 | +----------------------------+--------+-------+-----------+-------+ 51 | * Warning! The Final LUT count, after physical optimizations and full implementation, is typically lower. Run opt_design after synthesis, if not already completed, for a more realistic count. 52 | 53 | 54 | 1.1 Summary of Registers by Type 55 | -------------------------------- 56 | 57 | +-------+--------------+-------------+--------------+ 58 | | Total | Clock Enable | Synchronous | Asynchronous | 59 | +-------+--------------+-------------+--------------+ 60 | | 0 | _ | - | - | 61 | | 0 | _ | - | Set | 62 | | 0 | _ | - | Reset | 63 | | 0 | _ | Set | - | 64 | | 0 | _ | Reset | - | 65 | | 0 | Yes | - | - | 66 | | 0 | Yes | - | Set | 67 | | 0 | Yes | - | Reset | 68 | | 1413 | Yes | Set | - | 69 | | 29289 | Yes | Reset | - | 70 | +-------+--------------+-------------+--------------+ 71 | 72 | 73 | 2. BLOCKRAM 74 | ----------- 75 | 76 | +-------------------+------+-------+-----------+-------+ 77 | | Site Type | Used | Fixed | Available | Util% | 78 | +-------------------+------+-------+-----------+-------+ 79 | | Block RAM Tile | 34 | 0 | 2688 | 1.26 | 80 | | RAMB36/FIFO* | 0 | 0 | 2688 | 0.00 | 81 | | RAMB18 | 68 | 0 | 5376 | 1.26 | 82 | | RAMB18E2 only | 68 | | | | 83 | | URAM | 0 | 0 | 1280 | 0.00 | 84 | +-------------------+------+-------+-----------+-------+ 85 | * Note: Each Block RAM Tile only has one FIFO logic available and therefore can accommodate only one FIFO36E2 or one FIFO18E2. However, if a FIFO18E2 occupies a Block RAM Tile, that tile can still accommodate a RAMB18E2 86 | 87 | 88 | 3. ARITHMETIC 89 | ------------- 90 | 91 | +----------------+------+-------+-----------+-------+ 92 | | Site Type | Used | Fixed | Available | Util% | 93 | +----------------+------+-------+-----------+-------+ 94 | | DSPs | 353 | 0 | 12288 | 2.87 | 95 | | DSP48E2 only | 353 | | | | 96 | +----------------+------+-------+-----------+-------+ 97 | 98 | 99 | 4. I/O 100 | ------ 101 | 102 | +------------+------+-------+-----------+-------+ 103 | | Site Type | Used | Fixed | Available | Util% | 104 | +------------+------+-------+-----------+-------+ 105 | | Bonded IOB | 274 | 0 | 676 | 40.53 | 106 | +------------+------+-------+-----------+-------+ 107 | 108 | 109 | 5. CLOCK 110 | -------- 111 | 112 | +----------------------+------+-------+-----------+-------+ 113 | | Site Type | Used | Fixed | Available | Util% | 114 | +----------------------+------+-------+-----------+-------+ 115 | | GLOBAL CLOCK BUFFERs | 1 | 0 | 1344 | 0.07 | 116 | | BUFGCE | 1 | 0 | 384 | 0.26 | 117 | | BUFGCE_DIV | 0 | 0 | 64 | 0.00 | 118 | | BUFG_GT | 0 | 0 | 768 | 0.00 | 119 | | BUFGCTRL* | 0 | 0 | 128 | 0.00 | 120 | | PLL | 0 | 0 | 32 | 0.00 | 121 | | MMCM | 0 | 0 | 16 | 0.00 | 122 | +----------------------+------+-------+-----------+-------+ 123 | * Note: Each used BUFGCTRL counts as two GLOBAL CLOCK BUFFERs. This table does not include global clocking resources, only buffer cell usage. See the Clock Utilization Report (report_clock_utilization) for detailed accounting of global clocking resource availability. 124 | 125 | 126 | 6. ADVANCED 127 | ----------- 128 | 129 | +-----------------+------+-------+-----------+-------+ 130 | | Site Type | Used | Fixed | Available | Util% | 131 | +-----------------+------+-------+-----------+-------+ 132 | | CMACE4 | 0 | 0 | 12 | 0.00 | 133 | | GTYE4_CHANNEL | 0 | 0 | 24 | 0.00 | 134 | | GTYE4_COMMON | 0 | 0 | 6 | 0.00 | 135 | | ILKNE4 | 0 | 0 | 8 | 0.00 | 136 | | OBUFDS_GTE4 | 0 | 0 | 12 | 0.00 | 137 | | OBUFDS_GTE4_ADV | 0 | 0 | 12 | 0.00 | 138 | | PCIE40E4 | 0 | 0 | 4 | 0.00 | 139 | | SYSMONE4 | 0 | 0 | 4 | 0.00 | 140 | +-----------------+------+-------+-----------+-------+ 141 | 142 | 143 | 7. CONFIGURATION 144 | ---------------- 145 | 146 | +-------------+------+-------+-----------+-------+ 147 | | Site Type | Used | Fixed | Available | Util% | 148 | +-------------+------+-------+-----------+-------+ 149 | | BSCANE2 | 0 | 0 | 16 | 0.00 | 150 | | DNA_PORTE2 | 0 | 0 | 4 | 0.00 | 151 | | EFUSE_USR | 0 | 0 | 4 | 0.00 | 152 | | FRAME_ECCE4 | 0 | 0 | 4 | 0.00 | 153 | | ICAPE3 | 0 | 0 | 8 | 0.00 | 154 | | MASTER_JTAG | 0 | 0 | 4 | 0.00 | 155 | | STARTUPE3 | 0 | 0 | 4 | 0.00 | 156 | +-------------+------+-------+-----------+-------+ 157 | 158 | 159 | 8. Primitives 160 | ------------- 161 | 162 | +----------+-------+---------------------+ 163 | | Ref Name | Used | Functional Category | 164 | +----------+-------+---------------------+ 165 | | LUT2 | 53834 | CLB | 166 | | LUT3 | 29466 | CLB | 167 | | FDRE | 29289 | Register | 168 | | LUT4 | 28455 | CLB | 169 | | LUT6 | 17197 | CLB | 170 | | LUT5 | 16487 | CLB | 171 | | CARRY8 | 14273 | CLB | 172 | | LUT1 | 5418 | CLB | 173 | | SRL16E | 2032 | CLB | 174 | | FDSE | 1413 | Register | 175 | | SRLC32E | 1024 | CLB | 176 | | MUXF7 | 578 | CLB | 177 | | DSP48E2 | 353 | Arithmetic | 178 | | OBUF | 210 | I/O | 179 | | MUXF8 | 80 | CLB | 180 | | RAMB18E2 | 68 | Block Ram | 181 | | INBUF | 64 | I/O | 182 | | IBUFCTRL | 64 | Others | 183 | | BUFGCE | 1 | Clock | 184 | +----------+-------+---------------------+ 185 | 186 | 187 | 9. Black Boxes 188 | -------------- 189 | 190 | +----------+------+ 191 | | Ref Name | Used | 192 | +----------+------+ 193 | 194 | 195 | 10. Instantiated Netlists 196 | ------------------------- 197 | 198 | +----------+------+ 199 | | Ref Name | Used | 200 | +----------+------+ 201 | 202 | 203 | 11. SLR Connectivity 204 | -------------------- 205 | 206 | +----------------------------------+------+-------+-----------+-------+ 207 | | | Used | Fixed | Available | Util% | 208 | +----------------------------------+------+-------+-----------+-------+ 209 | | SLR3 <-> SLR2 | 0 | | 23040 | 0.00 | 210 | | SLR2 -> SLR3 | 0 | | | 0.00 | 211 | | Using TX_REG only | 0 | 0 | | | 212 | | Using RX_REG only | 0 | 0 | | | 213 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 214 | | SLR3 -> SLR2 | 0 | | | 0.00 | 215 | | Using TX_REG only | 0 | 0 | | | 216 | | Using RX_REG only | 0 | 0 | | | 217 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 218 | | SLR2 <-> SLR1 | 0 | | 23040 | 0.00 | 219 | | SLR1 -> SLR2 | 0 | | | 0.00 | 220 | | Using TX_REG only | 0 | 0 | | | 221 | | Using RX_REG only | 0 | 0 | | | 222 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 223 | | SLR2 -> SLR1 | 0 | | | 0.00 | 224 | | Using TX_REG only | 0 | 0 | | | 225 | | Using RX_REG only | 0 | 0 | | | 226 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 227 | | SLR1 <-> SLR0 | 0 | | 23040 | 0.00 | 228 | | SLR0 -> SLR1 | 0 | | | 0.00 | 229 | | Using TX_REG only | 0 | 0 | | | 230 | | Using RX_REG only | 0 | 0 | | | 231 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 232 | | SLR1 -> SLR0 | 0 | | | 0.00 | 233 | | Using TX_REG only | 0 | 0 | | | 234 | | Using RX_REG only | 0 | 0 | | | 235 | | Using Both TX_REG and RX_REG | 0 | 0 | | | 236 | +----------------------------------+------+-------+-----------+-------+ 237 | | Total SLLs Used | 0 | | | | 238 | +----------------------------------+------+-------+-----------+-------+ 239 | 240 | 241 | 12. SLR Connectivity Matrix 242 | --------------------------- 243 | 244 | +-----------+------+------+------+------+ 245 | | FROM \ TO | SLR3 | SLR2 | SLR1 | SLR0 | 246 | +-----------+------+------+------+------+ 247 | | SLR3 | 0 | 0 | 0 | 0 | 248 | | SLR2 | 0 | 0 | 0 | 0 | 249 | | SLR1 | 0 | 0 | 0 | 0 | 250 | | SLR0 | 0 | 0 | 0 | 0 | 251 | +-----------+------+------+------+------+ 252 | 253 | 254 | 13. SLR CLB Logic and Dedicated Block Utilization 255 | ------------------------------------------------- 256 | 257 | +----------------------------+------+------+------+------+--------+--------+--------+--------+ 258 | | Site Type | SLR0 | SLR1 | SLR2 | SLR3 | SLR0 % | SLR1 % | SLR2 % | SLR3 % | 259 | +----------------------------+------+------+------+------+--------+--------+--------+--------+ 260 | | CLB | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 261 | | CLBL | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 262 | | CLBM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 263 | | CLB LUTs | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 264 | | LUT as Logic | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 265 | | LUT as Memory | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 266 | | LUT as Distributed RAM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 267 | | LUT as Shift Register | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 268 | | CLB Registers | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 269 | | CARRY8 | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 270 | | F7 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 271 | | F8 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 272 | | F9 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 273 | | Block RAM Tile | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 274 | | RAMB36/FIFO | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 275 | | RAMB18 | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 276 | | URAM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 277 | | DSPs | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 278 | | PLL | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 279 | | MMCM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 280 | | Unique Control Sets | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 281 | +----------------------------+------+------+------+------+--------+--------+--------+--------+ 282 | * Note: Available Control Sets based on CLB Registers / 8 283 | 284 | 285 | 14. SLR IO Utilization 286 | ---------------------- 287 | 288 | +-----------+-----------+---------+------------+----------+------------+----------+-----+ 289 | | SLR Index | Used IOBs | (%)IOBs | Used IPADs | (%)IPADs | Used OPADs | (%)OPADs | GTs | 290 | +-----------+-----------+---------+------------+----------+------------+----------+-----+ 291 | | SLR3 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 292 | | SLR2 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 293 | | SLR1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 294 | | SLR0 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 | 295 | +-----------+-----------+---------+------------+----------+------------+----------+-----+ 296 | | Total | 0 | | 0 | | 0 | | 0 | 297 | +-----------+-----------+---------+------------+----------+------------+----------+-----+ 298 | 299 | 300 | -------------------------------------------------------------------------------- /sr/example.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/sr/example.pkl --------------------------------------------------------------------------------