├── .github
├── dependabot.yml
└── workflows
│ └── deploy.yml
├── .gitignore
├── .gitlab-ci.yml
├── .pre-commit-config.yaml
├── README.md
├── _config.yml
├── _toc.yml
├── callbacks.py
├── environment.yml
├── images
├── conifer_v1.png
├── conv2d_animation.gif
├── hls4ml_logo.svg
├── part5_floorplan.png
├── part7_block_design.png
├── part7_floorplan.png
├── reuse.png
└── test.png
├── nn_utils.py
├── part1_getting_started.ipynb
├── part2_advanced_config.ipynb
├── part3_compression.ipynb
├── part4.1_HG_quantization.ipynb
├── part4_quantization.ipynb
├── part5_bdt.ipynb
├── part6_cnns.ipynb
├── part7a_bitstream.ipynb
├── part7b_deployment.ipynb
├── part7c_validation.ipynb
├── part8_symbolic_regression.ipynb
├── plotting.py
├── pruned_cnn
├── myproject_prj
│ └── solution1
│ │ └── syn
│ │ └── report
│ │ └── myproject_csynth.rpt
└── vivado_synth.rpt
├── quantized_pruned_cnn
├── myproject_prj
│ └── solution1
│ │ └── syn
│ │ └── report
│ │ └── myproject_csynth.rpt
└── vivado_synth.rpt
└── sr
└── example.pkl
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | # Maintain dependencies for GitHub Actions
4 | - package-ecosystem: "github-actions"
5 | directory: "/"
6 | schedule:
7 | interval: "weekly"
8 |
--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
1 | name: deploy-book
2 |
3 | # Only run this when the master branch changes
4 | on:
5 | push:
6 | branches:
7 | - main
8 | pull_request:
9 | branches:
10 | - main
11 |
12 | # This job installs dependencies, build the book, and pushes it to `gh-pages`
13 | jobs:
14 | deploy-book:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/checkout@v4
18 |
19 | # Install dependencies
20 | - name: Setup Miniconda
21 | uses: conda-incubator/setup-miniconda@v3
22 | with:
23 | miniforge-version: latest
24 | use-mamba: true
25 | channels: conda-forge
26 | activate-environment: hls4ml-tutorial
27 | environment-file: environment.yml
28 | python-version: 3.10.16
29 | auto-activate-base: false
30 |
31 | # Check dependencies
32 | - name: Check Miniconda
33 | shell: bash -l {0}
34 | run: |
35 | conda info
36 | conda list
37 | conda config --show-sources
38 | conda config --show
39 | printenv | sort
40 |
41 | - name: Build the book
42 | shell: bash -l {0}
43 | run: |
44 | jupyter contrib nbextension install --user
45 | jupyter nbextension enable --py widgetsnbextension
46 | jupyter-book build .
47 |
48 | - name: GitHub Pages action
49 | uses: peaceiris/actions-gh-pages@v4.0.0
50 | if: ${{ github.event_name != 'pull_request' }}
51 | with:
52 | github_token: ${{ secrets.GITHUB_TOKEN }}
53 | publish_dir: _build/html
54 | force_orphan: true
55 | user_name: 'github-actions[bot]'
56 | user_email: 'github-actions[bot]@users.noreply.github.com'
57 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | __pycache__
3 | *~
4 | *.npy
5 | _build
6 | model_1
7 | model_2
8 | model_3
9 | .DS_Store
10 |
--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
1 | image: gcr.io/kaniko-project/executor:debug
2 |
3 | stages:
4 | - build-and-push
5 |
6 | build-and-push-job:
7 | stage: build-and-push
8 | script:
9 | - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json
10 | - /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/docker/Dockerfile --destination $CI_REGISTRY_IMAGE/hls4ml-0.8.0:${CI_COMMIT_SHA:0:8} --destination $CI_REGISTRY_IMAGE/hls4ml-0.8.0:latest
11 |
12 | build-and-push-vivado-job:
13 | stage: build-and-push
14 | script:
15 | - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json
16 | - /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/docker/Dockerfile.vivado --destination $CI_REGISTRY_IMAGE/hls4ml-0.8.0-vivado-2019.1:${CI_COMMIT_SHA:0:8} --destination $CI_REGISTRY_IMAGE/hls4ml-0.8.0-vivado-2019.1:latest
17 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | exclude: .*\.rpt$
2 |
3 | repos:
4 | - repo: https://github.com/psf/black
5 | rev: 25.1.0
6 | hooks:
7 | - id: black-jupyter
8 | language_version: python3
9 | args: ['--line-length=125',
10 | '--skip-string-normalization']
11 |
12 | - repo: https://github.com/pre-commit/pre-commit-hooks
13 | rev: v5.0.0
14 | hooks:
15 | - id: check-added-large-files
16 | - id: check-case-conflict
17 | - id: check-merge-conflict
18 | - id: check-symlinks
19 | - id: check-yaml
20 | - id: debug-statements
21 | - id: end-of-file-fixer
22 | - id: mixed-line-ending
23 | - id: requirements-txt-fixer
24 | - id: trailing-whitespace
25 |
26 | - repo: https://github.com/PyCQA/isort
27 | rev: 6.0.1
28 | hooks:
29 | - id: isort
30 | args: ["--profile", "black", --line-length=125]
31 |
32 | - repo: https://github.com/asottile/pyupgrade
33 | rev: v3.19.1
34 | hooks:
35 | - id: pyupgrade
36 | args: ["--py36-plus"]
37 |
38 | - repo: https://github.com/asottile/setup-cfg-fmt
39 | rev: v2.8.0
40 | hooks:
41 | - id: setup-cfg-fmt
42 |
43 | - repo: https://github.com/pycqa/flake8
44 | rev: 7.2.0
45 | hooks:
46 | - id: flake8
47 | exclude: docs/conf.py
48 | additional_dependencies: [flake8-bugbear, flake8-print]
49 | args: ['--max-line-length=125', # github viewer width
50 | '--extend-ignore=E203,T201'] # E203 is not PEP8 compliant
51 |
52 | - repo: https://github.com/mgedmin/check-manifest
53 | rev: "0.50"
54 | hooks:
55 | - id: check-manifest
56 | stages: [manual]
57 |
58 | - repo: https://github.com/jmduarte/p-clang-format
59 | rev: "v1.0.4"
60 | hooks:
61 | - id: p-clang-format
62 | types_or: [c++, c, cuda]
63 | ci:
64 | autofix_commit_msg: '[pre-commit.ci] auto fixes from pre-commit hooks'
65 | autofix_prs: true # default is true
66 | autoupdate_branch: 'main'
67 | autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
68 | autoupdate_schedule: weekly
69 | skip: []
70 | submodules: true
71 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # hls4ml-tutorial: Tutorial notebooks for `hls4ml`
2 |
3 |
4 | [](https://fastmachinelearning.org/hls4ml-tutorial)
5 | 
6 | [](https://github.com/psf/black)
7 | [](https://github.com/pre-commit/pre-commit)
8 | [](https://mybinder.org/v2/gh/fastmachinelearning/hls4ml-tutorial)
9 |
10 |
11 | There are several ways to run the tutorial notebooks:
12 | ## Online
13 | [](https://mybinder.org/v2/gh/fastmachinelearning/hls4ml-tutorial/HEAD)
14 |
15 | ## Conda
16 | Running the tutorials requires AMD Vitis HLS to be installed, see [here](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vitis.html).
17 | After the installation, the necessary environmental variables can be set using
18 | ```
19 | source /path/to/your/installtion/Xilinx/Vitis_HLS/202X.X/settings64.(c)sh
20 | ```
21 |
22 | The Python environment used for the tutorials is specified in the `environment.yml` file.
23 | It can be setup like:
24 | ```bash
25 | conda env create -f environment.yml
26 | conda activate hls4ml-tutorial
27 | source /path/to/your/installtion/Xilinx/Vitis_HLS/202X.X/settings64.(c)sh
28 | ```
29 |
30 | Note that part 7 of the tutorial makes use of the `VivadoAccelator` backend of hls4ml for which no Vitis equivalent is available yet. For this part of the tutorial it is therefore necesary to install and source Vivado HLS version 2019.2 or 2020.1, which can be obtained [here](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/archive.html).
31 |
32 | ## Companion material
33 | We have prepared a set of slides with some introduction and more details on each of the exercises.
34 | Please find them [here](https://docs.google.com/presentation/d/1c4LvEc6yMByx2HJs8zUP5oxLtY6ACSizQdKvw5cg5Ck/edit?usp=sharing).
35 |
36 |
37 | ## Notebooks
38 | ```{tableofcontents}
39 | ```
40 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | # Book settings
2 | # Learn more at https://jupyterbook.org/customize/config.html
3 |
4 | title: hls4ml tutorial
5 | author: Fast ML team
6 | logo: images/hls4ml_logo.svg
7 | favicon: images/hls4ml_logo.svg
8 |
9 | # Force re-execution of notebooks on each build.
10 | # See https://jupyterbook.org/content/execute.html
11 | execute:
12 | execute_notebooks: force
13 | timeout: -1
14 |
15 | # Define the name of the latex output file for PDF builds
16 | latex:
17 | latex_documents:
18 | targetname: book.tex
19 |
20 | # Information about where the book exists on the web
21 | repository:
22 | url: https://github.com/fastmachinelearning/hls4ml-tutorial # Online location of your book
23 | path_to_book: "" # Optional path to your book, relative to the repository root
24 | branch: main # Which branch of the repository should be used when creating links (optional)
25 |
26 | # Add GitHub buttons to your book
27 | # See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository
28 | html:
29 | use_issues_button: true
30 | use_repository_button: true
31 | baseurl: "https://fastmachinlearning.org/hls4ml-tutorial/" # The base URL where your book will be hosted. Used for creating image previews and social links. e.g.: https://mypage.com/mybook/
32 |
33 | launch_buttons:
34 | binderhub_url: "https://mybinder.org"
35 | colab_url: "https://colab.research.google.com"
36 |
--------------------------------------------------------------------------------
/_toc.yml:
--------------------------------------------------------------------------------
1 | format: jb-book
2 | root: README.md
3 | chapters:
4 | - file: part1_getting_started.ipynb
5 | - file: part2_advanced_config.ipynb
6 | - file: part3_compression.ipynb
7 | - file: part4_quantization.ipynb
8 | - file: part5_bdt.ipynb
9 | - file: part6_cnns.ipynb
10 | - file: part7a_bitstream.ipynb
11 | - file: part7b_deployment.ipynb
12 | - file: part7c_validation.ipynb
13 | - file: part8_symbolic_regression.ipynb
14 |
--------------------------------------------------------------------------------
/callbacks.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on 7 Apr 2017
3 |
4 | @author: jkiesele
5 | '''
6 |
7 | import json
8 |
9 | # loss per epoch
10 | from time import time
11 |
12 | from tensorflow.keras.callbacks import Callback, EarlyStopping, History, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
13 |
14 |
15 | class newline_callbacks_begin(Callback):
16 | def __init__(self, outputDir):
17 | self.outputDir = outputDir
18 | self.loss = []
19 | self.val_loss = []
20 | self.full_logs = []
21 |
22 | def on_epoch_end(self, epoch, epoch_logs={}): # noqa: B006
23 | import os
24 |
25 | lossfile = os.path.join(self.outputDir, 'losses.log')
26 | print('\n***callbacks***\nsaving losses to ' + lossfile)
27 | self.loss.append(epoch_logs.get('loss'))
28 | self.val_loss.append(epoch_logs.get('val_loss'))
29 | f = open(lossfile, 'w')
30 | for i in range(len(self.loss)):
31 | f.write(str(self.loss[i]))
32 | f.write(" ")
33 | f.write(str(self.val_loss[i]))
34 | f.write("\n")
35 | f.close()
36 | normed = {}
37 | for vv in epoch_logs:
38 | normed[vv] = float(epoch_logs[vv])
39 | self.full_logs.append(normed)
40 | lossfile = os.path.join(self.outputDir, 'full_info.log')
41 | with open(lossfile, 'w') as out:
42 | out.write(json.dumps(self.full_logs))
43 |
44 |
45 | class newline_callbacks_end(Callback):
46 | def on_epoch_end(self, epoch, epoch_logs={}): # noqa: B006
47 | print('\n***callbacks end***\n')
48 |
49 |
50 | class Losstimer(Callback):
51 | def __init__(self, every=5):
52 | self.points = []
53 | self.every = every
54 |
55 | def on_train_begin(self, logs):
56 | self.start = time()
57 |
58 | def on_batch_end(self, batch, logs):
59 | if (batch % self.every) != 0:
60 | return
61 | elapsed = time() - self.start
62 | cop = {}
63 | for i, j in logs.items():
64 | cop[i] = float(j)
65 | cop['elapsed'] = elapsed
66 | self.points.append(cop)
67 |
68 |
69 | class all_callbacks:
70 | def __init__(
71 | self, stop_patience=10, lr_factor=0.5, lr_patience=1, lr_epsilon=0.001, lr_cooldown=4, lr_minimum=1e-5, outputDir=''
72 | ):
73 | self.nl_begin = newline_callbacks_begin(outputDir)
74 | self.nl_end = newline_callbacks_end()
75 |
76 | self.stopping = EarlyStopping(monitor='val_loss', patience=stop_patience, verbose=1, mode='min')
77 |
78 | self.reduce_lr = ReduceLROnPlateau(
79 | monitor='val_loss',
80 | factor=lr_factor,
81 | patience=lr_patience,
82 | mode='min',
83 | verbose=1,
84 | epsilon=lr_epsilon,
85 | cooldown=lr_cooldown,
86 | min_lr=lr_minimum,
87 | )
88 |
89 | self.modelbestcheck = ModelCheckpoint(
90 | outputDir + "/KERAS_check_best_model.h5", monitor='val_loss', verbose=1, save_best_only=True
91 | )
92 |
93 | self.modelbestcheckweights = ModelCheckpoint(
94 | outputDir + "/KERAS_check_best_model_weights.h5",
95 | monitor='val_loss',
96 | verbose=1,
97 | save_best_only=True,
98 | save_weights_only=True,
99 | )
100 |
101 | self.modelcheckperiod = ModelCheckpoint(outputDir + "/KERAS_check_model_epoch{epoch:02d}.h5", verbose=1, period=10)
102 |
103 | self.modelcheck = ModelCheckpoint(outputDir + "/KERAS_check_model_last.h5", verbose=1)
104 |
105 | self.modelcheckweights = ModelCheckpoint(
106 | outputDir + "/KERAS_check_model_last_weights.h5", verbose=1, save_weights_only=True
107 | )
108 |
109 | self.tb = TensorBoard(log_dir=outputDir + '/logs')
110 |
111 | self.history = History()
112 | self.timer = Losstimer()
113 |
114 | self.callbacks = [
115 | self.nl_begin,
116 | self.modelbestcheck,
117 | self.modelbestcheckweights,
118 | self.modelcheck,
119 | self.modelcheckweights,
120 | self.modelcheckperiod,
121 | self.reduce_lr,
122 | self.stopping,
123 | self.nl_end,
124 | self.tb,
125 | self.history,
126 | self.timer,
127 | ]
128 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: hls4ml-tutorial
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python=3.10.16
6 | - jupyter_contrib_nbextensions
7 | - jupyterhub
8 | - jupyter-book
9 | - jsonschema-with-format-nongpl
10 | - pydot==1.4.2
11 | - graphviz==7.1.0
12 | - scikit-learn==1.2.2
13 | - tensorflow==2.14.0
14 | - tensorflow-datasets==4.8.3
15 | - webcolors
16 | - widgetsnbextension==3.6.0
17 | - pip==23.0.1
18 | - pip:
19 | - hls4ml[profiling,optimization,sr,HGQ]==1.1.0
20 | - conifer==1.5
21 | - pysr==0.16.3
22 | - xgboost==1.7.5
23 | - zstd
24 |
--------------------------------------------------------------------------------
/images/conifer_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/conifer_v1.png
--------------------------------------------------------------------------------
/images/conv2d_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/conv2d_animation.gif
--------------------------------------------------------------------------------
/images/hls4ml_logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
515 |
--------------------------------------------------------------------------------
/images/part5_floorplan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/part5_floorplan.png
--------------------------------------------------------------------------------
/images/part7_block_design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/part7_block_design.png
--------------------------------------------------------------------------------
/images/part7_floorplan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/part7_floorplan.png
--------------------------------------------------------------------------------
/images/reuse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/reuse.png
--------------------------------------------------------------------------------
/images/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/images/test.png
--------------------------------------------------------------------------------
/nn_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import pickle as pkl
4 | import random
5 | from io import BytesIO
6 | from pathlib import Path
7 | from typing import Callable
8 |
9 | import h5py as h5
10 | import numpy as np
11 | import tensorflow as tf
12 | import zstd
13 | from HGQ.bops import trace_minmax
14 | from keras.layers import Dense
15 | from keras.src.layers.convolutional.base_conv import Conv
16 | from keras.src.saving.legacy import hdf5_format
17 | from matplotlib import pyplot as plt
18 | from tensorflow import keras
19 | from tqdm.auto import tqdm
20 |
21 |
22 | class NumpyFloatValuesEncoder(json.JSONEncoder):
23 | def default(self, obj):
24 | if isinstance(obj, np.float32): # type: ignore
25 | return float(obj)
26 | return json.JSONEncoder.default(self, obj)
27 |
28 |
29 | class SaveTopN(keras.callbacks.Callback):
30 | def __init__(
31 | self,
32 | metric_fn: Callable[[dict], float],
33 | n: int,
34 | path: str | Path,
35 | side: str = 'max',
36 | fname_format='epoch={epoch}-metric={metric:.4e}.h5',
37 | cond_fn: Callable[[dict], bool] = lambda x: True,
38 | ):
39 | self.n = n
40 | self.metric_fn = metric_fn
41 | self.path = Path(path)
42 | self.fname_format = fname_format
43 | os.makedirs(path, exist_ok=True)
44 | self.weight_paths = np.full(n, '/dev/null', dtype=object)
45 | if side == 'max':
46 | self.best = np.full(n, -np.inf)
47 | self.side = np.greater
48 | elif side == 'min':
49 | self.best = np.full(n, np.inf)
50 | self.side = np.less
51 | self.cond = cond_fn
52 |
53 | def on_epoch_end(self, epoch, logs=None):
54 | assert isinstance(logs, dict)
55 | assert isinstance(self.model, keras.models.Model)
56 | logs = logs.copy()
57 | logs['epoch'] = epoch
58 | if not self.cond(logs):
59 | return
60 | metric = self.metric_fn(logs)
61 |
62 | if self.side(metric, self.best[-1]):
63 | try:
64 | os.remove(self.weight_paths[-1])
65 | except OSError:
66 | pass
67 | logs['metric'] = metric
68 | fname = self.path / self.fname_format.format(**logs)
69 | self.best[-1] = metric
70 | self.weight_paths[-1] = fname
71 | self.model.save_weights(fname)
72 | with h5.File(fname, 'r+') as f:
73 | log_str = json.dumps(logs, cls=NumpyFloatValuesEncoder)
74 | f.attrs['train_log'] = log_str
75 | idx = np.argsort(self.best)
76 | if self.side == np.greater:
77 | idx = idx[::-1]
78 | self.best = self.best[idx]
79 | self.weight_paths = self.weight_paths[idx]
80 |
81 | def rename_ckpts(self, dataset, bsz=65536):
82 | assert self.weight_paths[0] != '/dev/null', 'No checkpoints to rename'
83 | assert isinstance(self.model, keras.models.Model)
84 |
85 | weight_buf = BytesIO()
86 | with h5.File(weight_buf, 'w') as f:
87 | hdf5_format.save_weights_to_hdf5_group(f, self.model)
88 | weight_buf.seek(0)
89 |
90 | for i, path in enumerate(tqdm(self.weight_paths, desc='Renaming checkpoints')):
91 | if path == '/dev/null':
92 | continue
93 | self.model.load_weights(path)
94 | bops = trace_minmax(self.model, dataset, bsz=bsz, verbose=False)
95 | with h5.File(path, 'r+') as f:
96 | logs = json.loads(f.attrs['train_log']) # type: ignore
97 | logs['bops'] = bops
98 | metric = self.metric_fn(logs)
99 | logs['metric'] = metric
100 | f.attrs['train_log'] = json.dumps(logs, cls=NumpyFloatValuesEncoder)
101 | self.best[i] = metric
102 | new_fname = self.path / self.fname_format.format(**logs)
103 | os.rename(path, new_fname)
104 | self.weight_paths[i] = new_fname
105 |
106 | idx = np.argsort(self.best)
107 | self.best = self.best[idx]
108 | self.weight_paths = self.weight_paths[idx]
109 | with h5.File(weight_buf, 'r') as f:
110 | hdf5_format.load_weights_from_hdf5_group_by_name(f, self.model)
111 |
112 |
113 | class PBarCallback(tf.keras.callbacks.Callback):
114 | def __init__(self, metric='loss: {loss:.2f}/{val_loss:.2f}'):
115 | self.pbar = None
116 | self.template = metric
117 |
118 | def on_epoch_begin(self, epoch, logs=None):
119 | if self.pbar is None:
120 | self.pbar = tqdm(total=self.params['epochs'], unit='epoch')
121 |
122 | def on_epoch_end(self, epoch, logs=None):
123 | assert isinstance(self.pbar, tqdm)
124 | assert isinstance(logs, dict)
125 | self.pbar.update(1)
126 | string = self.template.format(**logs)
127 | if 'bops' in logs:
128 | string += f' - BOPs: {logs["bops"]:,.0f}'
129 | self.pbar.set_description(string)
130 |
131 | def on_train_end(self, logs=None):
132 | if self.pbar is not None:
133 | self.pbar.close()
134 |
135 |
136 | def plot_history(histry: dict, metrics=('loss', 'val_loss'), ylabel='Loss', logy=False):
137 | fig, ax = plt.subplots()
138 | for metric in metrics:
139 | ax.plot(histry[metric], label=metric)
140 | ax.set_xlabel('Epoch')
141 | ax.set_ylabel(ylabel)
142 | if logy:
143 | ax.set_yscale('log')
144 | ax.legend()
145 | return fig, ax
146 |
147 |
148 | def save_model(model: keras.models.Model, path: str):
149 | _path = Path(path)
150 | model.save(path)
151 | if model.history is not None:
152 | history = model.history.history
153 | else:
154 | history = {}
155 | with open(_path.with_suffix('.history'), 'wb') as f:
156 | f.write(zstd.compress(pkl.dumps(history)))
157 |
158 |
159 | def load_model(path: str, co=None):
160 | _path = Path(path)
161 | model: keras.Model = keras.models.load_model(path, custom_objects=co) # type: ignore
162 | with open(_path.with_suffix('.history'), 'rb') as f:
163 | history: dict[str, list] = pkl.loads(zstd.decompress(f.read()))
164 | return model, history
165 |
166 |
167 | def save_history(history, path):
168 | with open(path, 'wb') as f:
169 | f.write(zstd.compress(pkl.dumps(history)))
170 |
171 |
172 | def load_history(path):
173 | with open(path, 'rb') as f:
174 | history = pkl.loads(zstd.decompress(f.read()))
175 | return history
176 |
177 |
178 | def absorb_batchNorm(model_target, model_original):
179 | for layer in model_target.layers:
180 | if layer.__class__.__name__ == 'Functional':
181 | absorb_batchNorm(layer, model_original.get_layer(layer.name))
182 | continue
183 | if (
184 | (isinstance(layer, Dense) or isinstance(layer, Conv))
185 | and len(nodes := model_original.get_layer(layer.name)._outbound_nodes) > 0
186 | and isinstance(nodes[0].outbound_layer, keras.layers.BatchNormalization)
187 | ):
188 | _gamma, _beta, _mu, _var = model_original.get_layer(layer.name)._outbound_nodes[0].outbound_layer.get_weights()
189 | _ratio = _gamma / np.sqrt(0.001 + _var)
190 | _bias = -_gamma * _mu / np.sqrt(0.001 + _var) + _beta
191 |
192 | k, *_b = model_original.get_layer(layer.name).get_weights()
193 | if _b:
194 | b = _b[0]
195 | else:
196 | b = np.zeros(layer.output_shape[-1])
197 | nk = np.einsum('...c, c-> ...c', k, _ratio, optimize=True)
198 | nb = np.einsum('...c, c-> ...c', b, _ratio, optimize=True) + _bias
199 | extras = layer.get_weights()[2:]
200 | layer.set_weights([nk, nb, *extras])
201 | elif hasattr(layer, 'kernel'):
202 | for w in layer.weights:
203 | if '_bw' not in w.name:
204 | break
205 | else:
206 | continue
207 | weights = layer.get_weights()
208 | new_weights = model_original.get_layer(layer.name).get_weights()
209 | l = len(new_weights) # noqa: E741 # If l looks like 1 by any chance, change your font.
210 | layer.set_weights([*new_weights, *weights[l:]][: len(weights)])
211 |
212 |
213 | def set_seed(seed):
214 | np.random.seed(seed)
215 | tf.random.set_seed(seed)
216 | os.environ['PYTHONHASHSEED'] = str(seed)
217 | random.seed(seed)
218 |
219 | tf.config.experimental.enable_op_determinism()
220 |
221 |
222 | def get_best_ckpt(save_path: Path, take_min=False):
223 | ckpts = list(save_path.glob('*.h5'))
224 |
225 | def rank(ckpt: Path):
226 | with h5.File(ckpt, 'r') as f:
227 | log: dict = f.attrs['train_log'] # type: ignore
228 | log = json.loads(log) # type: ignore
229 | metric = log['metric'] # type: ignore
230 | return metric
231 |
232 | ckpts = sorted(ckpts, key=rank, reverse=not take_min)
233 | ckpt = ckpts[0]
234 | return ckpt
235 |
236 |
237 | class PeratoFront(keras.callbacks.Callback):
238 | def __init__(
239 | self,
240 | path: str | Path,
241 | fname_format: str,
242 | metrics_names: list[str],
243 | sides: list[int],
244 | cond_fn: Callable[[dict], bool] = lambda x: True,
245 | ):
246 | self.path = Path(path)
247 | self.fname_format = fname_format
248 | os.makedirs(path, exist_ok=True)
249 | self.paths = []
250 | self.metrics = []
251 | self.metric_names = metrics_names
252 | self.sides = np.array(sides)
253 | self.cond_fn = cond_fn
254 |
255 | def on_epoch_end(self, epoch, logs=None):
256 | assert isinstance(self.model, keras.models.Model)
257 | assert isinstance(logs, dict)
258 |
259 | logs = logs.copy()
260 | logs['epoch'] = epoch
261 |
262 | if not self.cond_fn(logs):
263 | return
264 | new_metrics = np.array([logs[metric_name] for metric_name in self.metric_names])
265 | _rm_idx = []
266 | for i, old_metrics in enumerate(self.metrics):
267 | _old_metrics = self.sides * old_metrics
268 | _new_metrics = self.sides * new_metrics
269 | if np.all(_new_metrics <= _old_metrics):
270 | return
271 | if np.all(_new_metrics >= _old_metrics):
272 | _rm_idx.append(i)
273 | for i in _rm_idx[::-1]:
274 | self.metrics.pop(i)
275 | p = self.paths.pop(i)
276 | os.remove(p)
277 |
278 | path = self.path / self.fname_format.format(**logs)
279 | self.metrics.append(new_metrics)
280 | self.paths.append(path)
281 | self.model.save_weights(self.paths[-1])
282 |
283 | with h5.File(path, 'r+') as f:
284 | log_str = json.dumps(logs, cls=NumpyFloatValuesEncoder)
285 | f.attrs['train_log'] = log_str
286 |
287 | def rename_ckpts(self, dataset, bsz=65536):
288 | assert isinstance(self.model, keras.models.Model)
289 |
290 | weight_buf = BytesIO()
291 | with h5.File(weight_buf, 'w') as f:
292 | hdf5_format.save_weights_to_hdf5_group(f, self.model)
293 | weight_buf.seek(0)
294 |
295 | for i, path in enumerate(tqdm(self.paths, desc='Renaming checkpoints')):
296 | self.model.load_weights(path)
297 | bops = trace_minmax(self.model, dataset, bsz=bsz, verbose=False)
298 | with h5.File(path, 'r+') as f:
299 | logs = json.loads(f.attrs['train_log']) # type: ignore
300 | logs['bops'] = bops
301 | f.attrs['train_log'] = json.dumps(logs, cls=NumpyFloatValuesEncoder)
302 | metrics = np.array([logs[metric_name] for metric_name in self.metric_names])
303 | self.metrics[i] = metrics
304 | new_fname = self.path / self.fname_format.format(**logs)
305 | os.rename(path, new_fname)
306 | self.paths[i] = new_fname
307 |
308 | with h5.File(weight_buf, 'r') as f:
309 | hdf5_format.load_weights_from_hdf5_group_by_name(f, self.model)
310 |
311 |
312 | class BetaScheduler(keras.callbacks.Callback):
313 | def __init__(self, beta_fn: Callable[[int], float]):
314 | self.beta_fn = beta_fn
315 |
316 | def on_epoch_begin(self, epoch, logs=None):
317 | assert isinstance(self.model, keras.models.Model)
318 |
319 | beta = self.beta_fn(epoch)
320 | for layer in self.model.layers:
321 | if hasattr(layer, 'beta'):
322 | layer.beta.assign(keras.backend.constant(beta, dtype=keras.backend.floatx()))
323 |
324 | def on_epoch_end(self, epoch, logs=None):
325 | assert isinstance(logs, dict)
326 | logs['beta'] = self.beta_fn(epoch)
327 |
328 | @classmethod
329 | def from_config(cls, config):
330 | return cls(get_schedule(config.beta, config.train.epochs))
331 |
332 |
333 | def get_schedule(beta_conf, total_epochs):
334 | epochs = []
335 | betas = []
336 | interpolations = []
337 | for block in beta_conf.intervals:
338 | epochs.append(block.epochs)
339 | betas.append(block.betas)
340 | interpolation = block.interpolation
341 | assert interpolation in ['linear', 'log']
342 | interpolations.append(interpolation == 'log')
343 | epochs = np.array(epochs + [total_epochs])
344 | assert np.all(np.diff(epochs) >= 0)
345 | betas = np.array(betas)
346 | interpolations = np.array(interpolations)
347 |
348 | def schedule(epoch):
349 | if epoch >= total_epochs:
350 | return betas[-1, -1]
351 | idx = np.searchsorted(epochs, epoch, side='right') - 1
352 | beta0, beta1 = betas[idx]
353 | epoch0, epoch1 = epochs[idx], epochs[idx + 1]
354 | if interpolations[idx]:
355 | beta = beta0 * (beta1 / beta0) ** ((epoch - epoch0) / (epoch1 - epoch0))
356 | else:
357 | beta = beta0 + (beta1 - beta0) * (epoch - epoch0) / (epoch1 - epoch0)
358 | return float(beta)
359 |
360 | return schedule
361 |
--------------------------------------------------------------------------------
/part1_getting_started.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Part 1: Getting started"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "from tensorflow.keras.utils import to_categorical\n",
17 | "from sklearn.datasets import fetch_openml\n",
18 | "from sklearn.model_selection import train_test_split\n",
19 | "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
20 | "import numpy as np\n",
21 | "\n",
22 | "%matplotlib inline\n",
23 | "seed = 0\n",
24 | "np.random.seed(seed)\n",
25 | "import tensorflow as tf\n",
26 | "\n",
27 | "tf.random.set_seed(seed)\n",
28 | "import os\n",
29 | "\n",
30 | "os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "## Fetch the jet tagging dataset from Open ML"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "data = fetch_openml('hls4ml_lhc_jets_hlf')\n",
47 | "X, y = data['data'], data['target']"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "### Let's print some information about the dataset\n",
55 | "Print the feature names and the dataset shape"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "metadata": {
62 | "scrolled": true
63 | },
64 | "outputs": [],
65 | "source": [
66 | "print(data['feature_names'])\n",
67 | "print(X.shape, y.shape)\n",
68 | "print(X[:5])\n",
69 | "print(y[:5])"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {},
75 | "source": [
76 | "As you saw above, the `y` target is an array of strings, e.g. \\['g', 'w',...\\] etc.\n",
77 | "We need to make this a \"One Hot\" encoding for the training.\n",
78 | "Then, split the dataset into training and validation sets"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "metadata": {},
85 | "outputs": [],
86 | "source": [
87 | "le = LabelEncoder()\n",
88 | "y = le.fit_transform(y)\n",
89 | "y = to_categorical(y, 5)\n",
90 | "X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
91 | "print(y[:5])"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": [
100 | "scaler = StandardScaler()\n",
101 | "X_train_val = scaler.fit_transform(X_train_val)\n",
102 | "X_test = scaler.transform(X_test)"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": null,
108 | "metadata": {},
109 | "outputs": [],
110 | "source": [
111 | "np.save('X_train_val.npy', X_train_val)\n",
112 | "np.save('X_test.npy', X_test)\n",
113 | "np.save('y_train_val.npy', y_train_val)\n",
114 | "np.save('y_test.npy', y_test)\n",
115 | "np.save('classes.npy', le.classes_)"
116 | ]
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "metadata": {},
121 | "source": [
122 | "## Now construct a model\n",
123 | "We'll use 3 hidden layers with 64, then 32, then 32 neurons. Each layer will use `relu` activation.\n",
124 | "Add an output layer with 5 neurons (one for each class), then finish with Softmax activation."
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": null,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "from tensorflow.keras.models import Sequential\n",
134 | "from tensorflow.keras.layers import Dense, Activation, BatchNormalization\n",
135 | "from tensorflow.keras.optimizers import Adam\n",
136 | "from tensorflow.keras.regularizers import l1\n",
137 | "from callbacks import all_callbacks"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "model = Sequential()\n",
147 | "model.add(Dense(64, input_shape=(16,), name='fc1', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n",
148 | "model.add(Activation(activation='relu', name='relu1'))\n",
149 | "model.add(Dense(32, name='fc2', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n",
150 | "model.add(Activation(activation='relu', name='relu2'))\n",
151 | "model.add(Dense(32, name='fc3', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n",
152 | "model.add(Activation(activation='relu', name='relu3'))\n",
153 | "model.add(Dense(5, name='output', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n",
154 | "model.add(Activation(activation='softmax', name='softmax'))"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "## Train the model\n",
162 | "We'll use Adam optimizer with categorical crossentropy loss.\n",
163 | "The callbacks will decay the learning rate and save the model into a directory 'model_1'\n",
164 | "The model isn't very complex, so this should just take a few minutes even on the CPU.\n",
165 | "If you've restarted the notebook kernel after training once, set `train = False` to load the trained model."
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "train = True\n",
175 | "if train:\n",
176 | " adam = Adam(lr=0.0001)\n",
177 | " model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])\n",
178 | " callbacks = all_callbacks(\n",
179 | " stop_patience=1000,\n",
180 | " lr_factor=0.5,\n",
181 | " lr_patience=10,\n",
182 | " lr_epsilon=0.000001,\n",
183 | " lr_cooldown=2,\n",
184 | " lr_minimum=0.0000001,\n",
185 | " outputDir='model_1',\n",
186 | " )\n",
187 | " model.fit(\n",
188 | " X_train_val,\n",
189 | " y_train_val,\n",
190 | " batch_size=1024,\n",
191 | " epochs=10,\n",
192 | " validation_split=0.25,\n",
193 | " shuffle=True,\n",
194 | " callbacks=callbacks.callbacks,\n",
195 | " )\n",
196 | "else:\n",
197 | " from tensorflow.keras.models import load_model\n",
198 | "\n",
199 | " model = load_model('model_1/KERAS_check_best_model.h5')"
200 | ]
201 | },
202 | {
203 | "cell_type": "markdown",
204 | "metadata": {},
205 | "source": [
206 | "## Check performance\n",
207 | "Check the accuracy and make a ROC curve"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": null,
213 | "metadata": {},
214 | "outputs": [],
215 | "source": [
216 | "import plotting\n",
217 | "import matplotlib.pyplot as plt\n",
218 | "from sklearn.metrics import accuracy_score\n",
219 | "\n",
220 | "y_keras = model.predict(X_test)\n",
221 | "print(\"Accuracy: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_keras, axis=1))))\n",
222 | "plt.figure(figsize=(9, 9))\n",
223 | "_ = plotting.makeRoc(y_test, y_keras, le.classes_)"
224 | ]
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "metadata": {},
229 | "source": [
230 | "## Convert the model to FPGA firmware with hls4ml\n",
231 | "Now we will go through the steps to convert the model we trained to a low-latency optimized FPGA firmware with hls4ml.\n",
232 | "First, we will evaluate its classification performance to make sure we haven't lost accuracy using the fixed-point data types. \n",
233 | "Then we will synthesize the model with Vitis HLS and check the metrics of latency and FPGA resource usage.\n",
234 | "\n",
235 | "### Make an hls4ml config & model\n",
236 | "The hls4ml Neural Network inference library is controlled through a configuration dictionary.\n",
237 | "In this example we'll use the most simple variation, later exercises will look at more advanced configuration."
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": null,
243 | "metadata": {},
244 | "outputs": [],
245 | "source": [
246 | "import hls4ml\n",
247 | "\n",
248 | "config = hls4ml.utils.config_from_keras_model(model, granularity='model', backend='Vitis')\n",
249 | "print(\"-----------------------------------\")\n",
250 | "print(\"Configuration\")\n",
251 | "plotting.print_dict(config)\n",
252 | "print(\"-----------------------------------\")\n",
253 | "hls_model = hls4ml.converters.convert_from_keras_model(\n",
254 | " model, hls_config=config, backend='Vitis', output_dir='model_1/hls4ml_prj', part='xcu250-figd2104-2L-e'\n",
255 | ")"
256 | ]
257 | },
258 | {
259 | "cell_type": "markdown",
260 | "metadata": {},
261 | "source": [
262 | "Let's visualise what we created. The model architecture is shown, annotated with the shape and data types"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": null,
268 | "metadata": {},
269 | "outputs": [],
270 | "source": [
271 | "hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file=None)"
272 | ]
273 | },
274 | {
275 | "cell_type": "markdown",
276 | "metadata": {},
277 | "source": [
278 | "## Compile, predict\n",
279 | "Now we need to check that this model performance is still good. We compile the hls_model, and then use `hls_model.predict` to execute the FPGA firmware with bit-accurate emulation on the CPU."
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": null,
285 | "metadata": {},
286 | "outputs": [],
287 | "source": [
288 | "hls_model.compile()\n",
289 | "X_test = np.ascontiguousarray(X_test)\n",
290 | "y_hls = hls_model.predict(X_test)"
291 | ]
292 | },
293 | {
294 | "cell_type": "markdown",
295 | "metadata": {},
296 | "source": [
297 | "## Compare\n",
298 | "That was easy! Now let's see how the performance compares to Keras:"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": null,
304 | "metadata": {},
305 | "outputs": [],
306 | "source": [
307 | "print(\"Keras Accuracy: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_keras, axis=1))))\n",
308 | "print(\"hls4ml Accuracy: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))))\n",
309 | "\n",
310 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
311 | "_ = plotting.makeRoc(y_test, y_keras, le.classes_)\n",
312 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
313 | "_ = plotting.makeRoc(y_test, y_hls, le.classes_, linestyle='--')\n",
314 | "\n",
315 | "from matplotlib.lines import Line2D\n",
316 | "\n",
317 | "lines = [Line2D([0], [0], ls='-'), Line2D([0], [0], ls='--')]\n",
318 | "from matplotlib.legend import Legend\n",
319 | "\n",
320 | "leg = Legend(ax, lines, labels=['keras', 'hls4ml'], loc='lower right', frameon=False)\n",
321 | "ax.add_artist(leg)"
322 | ]
323 | },
324 | {
325 | "cell_type": "markdown",
326 | "metadata": {},
327 | "source": [
328 | "## Synthesize\n",
329 | "Now we'll actually use Vitis HLS to synthesize the model. We can run the build using a method of our `hls_model` object.\n",
330 | "After running this step, we can integrate the generated IP into a workflow to compile for a specific FPGA board.\n",
331 | "In this case, we'll just review the reports that Vitis HLS generates, checking the latency and resource usage.\n",
332 | "\n",
333 | "**This can take several minutes.**\n",
334 | "\n",
335 | "While the C-Synthesis is running, we can monitor the progress looking at the log file by opening a terminal from the notebook home, and executing:\n",
336 | "\n",
337 | "`tail -f model_1/hls4ml_prj/vitis_hls.log`"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": null,
343 | "metadata": {
344 | "scrolled": true
345 | },
346 | "outputs": [],
347 | "source": [
348 | "hls_model.build(csim=False)"
349 | ]
350 | },
351 | {
352 | "cell_type": "markdown",
353 | "metadata": {},
354 | "source": [
355 | "## Check the reports\n",
356 | "Print out the reports generated by Vitis HLS. Pay attention to the Latency and the 'Utilization Estimates' sections"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": null,
362 | "metadata": {},
363 | "outputs": [],
364 | "source": [
365 | "hls4ml.report.read_vivado_report('model_1/hls4ml_prj/')"
366 | ]
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "metadata": {},
371 | "source": [
372 | "## Exercise\n",
373 | "Since `ReuseFactor = 1` we expect each multiplication used in the inference of our neural network to use 1 DSP. Is this what we see? (Note that the Softmax layer should use 5 DSPs, or 1 per class)\n",
374 | "Calculate how many multiplications are performed for the inference of this network...\n",
375 | "(We'll discuss the outcome)"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": null,
381 | "metadata": {},
382 | "outputs": [],
383 | "source": []
384 | }
385 | ],
386 | "metadata": {
387 | "kernelspec": {
388 | "display_name": "Python 3 (ipykernel)",
389 | "language": "python",
390 | "name": "python3"
391 | },
392 | "language_info": {
393 | "codemirror_mode": {
394 | "name": "ipython",
395 | "version": 3
396 | },
397 | "file_extension": ".py",
398 | "mimetype": "text/x-python",
399 | "name": "python",
400 | "nbconvert_exporter": "python",
401 | "pygments_lexer": "ipython3",
402 | "version": "3.10.16"
403 | }
404 | },
405 | "nbformat": 4,
406 | "nbformat_minor": 4
407 | }
408 |
--------------------------------------------------------------------------------
/part3_compression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Part 3: Compression"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "from tensorflow.keras.utils import to_categorical\n",
17 | "from sklearn.datasets import fetch_openml\n",
18 | "from sklearn.model_selection import train_test_split\n",
19 | "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
20 | "import numpy as np\n",
21 | "import matplotlib.pyplot as plt\n",
22 | "\n",
23 | "%matplotlib inline\n",
24 | "seed = 0\n",
25 | "np.random.seed(seed)\n",
26 | "import tensorflow as tf\n",
27 | "\n",
28 | "tf.random.set_seed(seed)\n",
29 | "import os\n",
30 | "\n",
31 | "os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "## Fetch the jet tagging dataset from Open ML"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "X_train_val = np.load('X_train_val.npy')\n",
48 | "X_test = np.load('X_test.npy')\n",
49 | "y_train_val = np.load('y_train_val.npy')\n",
50 | "y_test = np.load('y_test.npy')\n",
51 | "classes = np.load('classes.npy', allow_pickle=True)"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "## Now construct a model\n",
59 | "We'll use the same architecture as in part 1: 3 hidden layers with 64, then 32, then 32 neurons. Each layer will use `relu` activation.\n",
60 | "Add an output layer with 5 neurons (one for each class), then finish with Softmax activation."
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "from tensorflow.keras.models import Sequential\n",
70 | "from tensorflow.keras.layers import Dense, Activation, BatchNormalization\n",
71 | "from tensorflow.keras.optimizers import Adam\n",
72 | "from tensorflow.keras.regularizers import l1\n",
73 | "from callbacks import all_callbacks"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "model = Sequential()\n",
83 | "model.add(Dense(64, input_shape=(16,), name='fc1', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n",
84 | "model.add(Activation(activation='relu', name='relu1'))\n",
85 | "model.add(Dense(32, name='fc2', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n",
86 | "model.add(Activation(activation='relu', name='relu2'))\n",
87 | "model.add(Dense(32, name='fc3', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n",
88 | "model.add(Activation(activation='relu', name='relu3'))\n",
89 | "model.add(Dense(5, name='output', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))\n",
90 | "model.add(Activation(activation='softmax', name='softmax'))"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "## Train sparse\n",
98 | "This time we'll use the Tensorflow model optimization sparsity to train a sparse model (forcing many weights to '0'). In this instance, the target sparsity is 75%"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "metadata": {},
105 | "outputs": [],
106 | "source": [
107 | "from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule\n",
108 | "from tensorflow_model_optimization.sparsity.keras import strip_pruning\n",
109 | "\n",
110 | "pruning_params = {\"pruning_schedule\": pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100)}\n",
111 | "model = prune.prune_low_magnitude(model, **pruning_params)"
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "## Train the model\n",
119 | "We'll use the same settings as the model for part 1: Adam optimizer with categorical crossentropy loss.\n",
120 | "The callbacks will decay the learning rate and save the model into a directory 'model_2'\n",
121 | "The model isn't very complex, so this should just take a few minutes even on the CPU.\n",
122 | "If you've restarted the notebook kernel after training once, set `train = False` to load the trained model rather than training again."
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "train = True\n",
132 | "if train:\n",
133 | " adam = Adam(lr=0.0001)\n",
134 | " model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])\n",
135 | " callbacks = all_callbacks(\n",
136 | " stop_patience=1000,\n",
137 | " lr_factor=0.5,\n",
138 | " lr_patience=10,\n",
139 | " lr_epsilon=0.000001,\n",
140 | " lr_cooldown=2,\n",
141 | " lr_minimum=0.0000001,\n",
142 | " outputDir='model_2',\n",
143 | " )\n",
144 | " callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())\n",
145 | " model.fit(\n",
146 | " X_train_val,\n",
147 | " y_train_val,\n",
148 | " batch_size=1024,\n",
149 | " epochs=10,\n",
150 | " validation_split=0.25,\n",
151 | " shuffle=True,\n",
152 | " callbacks=callbacks.callbacks,\n",
153 | " )\n",
154 | " # Save the model again but with the pruning 'stripped' to use the regular layer types\n",
155 | " model = strip_pruning(model)\n",
156 | " model.save('model_2/KERAS_check_best_model.h5')\n",
157 | "else:\n",
158 | " from tensorflow.keras.models import load_model\n",
159 | "\n",
160 | " model = load_model('model_2/KERAS_check_best_model.h5')"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "## Check sparsity\n",
168 | "Make a quick check that the model was indeed trained sparse. We'll just make a histogram of the weights of the 1st layer, and hopefully observe a large peak in the bin containing '0'. Note logarithmic y axis."
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": null,
174 | "metadata": {},
175 | "outputs": [],
176 | "source": [
177 | "w = model.layers[0].weights[0].numpy()\n",
178 | "h, b = np.histogram(w, bins=100)\n",
179 | "plt.figure(figsize=(7, 7))\n",
180 | "plt.bar(b[:-1], h, width=b[1] - b[0])\n",
181 | "plt.semilogy()\n",
182 | "print('% of zeros = {}'.format(np.sum(w == 0) / np.size(w)))"
183 | ]
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {},
188 | "source": [
189 | "## Check performance\n",
190 | "How does this 75% sparse model compare against the unpruned model? Let's report the accuracy and make a ROC curve. The pruned model is shown with solid lines, the unpruned model from part 1 is shown with dashed lines.\n",
191 | "**Make sure you've trained the model from part 1**"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": null,
197 | "metadata": {},
198 | "outputs": [],
199 | "source": [
200 | "import plotting\n",
201 | "import matplotlib.pyplot as plt\n",
202 | "from sklearn.metrics import accuracy_score\n",
203 | "from tensorflow.keras.models import load_model\n",
204 | "\n",
205 | "model_ref = load_model('model_1/KERAS_check_best_model.h5')\n",
206 | "\n",
207 | "y_ref = model_ref.predict(X_test)\n",
208 | "y_prune = model.predict(X_test)\n",
209 | "\n",
210 | "print(\"Accuracy unpruned: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_ref, axis=1))))\n",
211 | "print(\"Accuracy pruned: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_prune, axis=1))))\n",
212 | "\n",
213 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
214 | "_ = plotting.makeRoc(y_test, y_ref, classes)\n",
215 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
216 | "_ = plotting.makeRoc(y_test, y_prune, classes, linestyle='--')\n",
217 | "\n",
218 | "from matplotlib.lines import Line2D\n",
219 | "\n",
220 | "lines = [Line2D([0], [0], ls='-'), Line2D([0], [0], ls='--')]\n",
221 | "from matplotlib.legend import Legend\n",
222 | "\n",
223 | "leg = Legend(ax, lines, labels=['unpruned', 'pruned'], loc='lower right', frameon=False)\n",
224 | "ax.add_artist(leg)"
225 | ]
226 | },
227 | {
228 | "cell_type": "markdown",
229 | "metadata": {},
230 | "source": [
231 | "## Convert the model to FPGA firmware with hls4ml\n",
232 | "Let's use the default configuration: `ap_fixed<16,6>` precision everywhere and `ReuseFactor=1`, so we can compare with the part 1 model. We need to use `strip_pruning` to change the layer types back to their originals.\n",
233 | "\n",
234 | "**The synthesis will take a while**\n",
235 | "\n",
236 | "While the C-Synthesis is running, we can monitor the progress looking at the log file by opening a terminal from the notebook home, and executing:\n",
237 | "\n",
238 | "`tail -f model_2/hls4ml_prj/vitis_hls.log`"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": null,
244 | "metadata": {},
245 | "outputs": [],
246 | "source": [
247 | "import hls4ml\n",
248 | "\n",
249 | "config = hls4ml.utils.config_from_keras_model(model, granularity='model', backend='Vitis')\n",
250 | "print(config)\n",
251 | "hls_model = hls4ml.converters.convert_from_keras_model(\n",
252 | " model, hls_config=config, backend='Vitis', output_dir='model_2/hls4ml_prj', part='xcu250-figd2104-2L-e'\n",
253 | ")\n",
254 | "hls_model.compile()\n",
255 | "hls_model.build(csim=False)"
256 | ]
257 | },
258 | {
259 | "cell_type": "markdown",
260 | "metadata": {},
261 | "source": [
262 | "## Check the reports\n",
263 | "Print out the reports generated by Vitis HLS. Pay attention to the Utilization Estimates' section in particular this time."
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": null,
269 | "metadata": {},
270 | "outputs": [],
271 | "source": [
272 | "hls4ml.report.read_vivado_report('model_2/hls4ml_prj/')"
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {},
278 | "source": [
279 | "Print the report for the model trained in part 1. Remember these models have the same architecture, but the model in this section was trained using the sparsity API from tensorflow_model_optimization. Notice how the resource usage had dramatically reduced (particularly the DSPs). When Vitis HLS notices an operation like `y = 0 * x` it can avoid placing a DSP for that operation. The impact of this is biggest when `ReuseFactor = 1`, but still applies at higher reuse as well. **Note you need to have trained and synthesized the model from part 1**"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": null,
285 | "metadata": {},
286 | "outputs": [],
287 | "source": [
288 | "hls4ml.report.read_vivado_report('model_1/hls4ml_prj')"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": null,
294 | "metadata": {},
295 | "outputs": [],
296 | "source": []
297 | }
298 | ],
299 | "metadata": {
300 | "kernelspec": {
301 | "display_name": "Python 3 (ipykernel)",
302 | "language": "python",
303 | "name": "python3"
304 | },
305 | "language_info": {
306 | "codemirror_mode": {
307 | "name": "ipython",
308 | "version": 3
309 | },
310 | "file_extension": ".py",
311 | "mimetype": "text/x-python",
312 | "name": "python",
313 | "nbconvert_exporter": "python",
314 | "pygments_lexer": "ipython3",
315 | "version": "3.10.16"
316 | }
317 | },
318 | "nbformat": 4,
319 | "nbformat_minor": 4
320 | }
321 |
--------------------------------------------------------------------------------
/part4.1_HG_quantization.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Part 4: HG Quantization"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import os\n",
17 | "import keras\n",
18 | "from keras.utils import to_categorical\n",
19 | "from sklearn.datasets import fetch_openml\n",
20 | "from sklearn.model_selection import train_test_split\n",
21 | "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
22 | "import numpy as np\n",
23 | "import matplotlib.pyplot as plt\n",
24 | "\n",
25 | "%matplotlib inline\n",
26 | "seed = 0\n",
27 | "np.random.seed(seed)\n",
28 | "import tensorflow as tf\n",
29 | "\n",
30 | "tf.random.set_seed(seed)\n",
31 | "\n",
32 | "os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "## Fetch the jet tagging dataset from Open ML"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "# If you haven't finished part 1 already, uncomment the following lines to download, process, and save the dataset\n",
49 | "\n",
50 | "# le = LabelEncoder()\n",
51 | "# y = le.fit_transform(y)\n",
52 | "# y = to_categorical(y, 5)\n",
53 | "# X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
54 | "# # print(y[:5])\n",
55 | "# scaler = StandardScaler()\n",
56 | "# X_train_val = scaler.fit_transform(X_train_val)\n",
57 | "# X_test = scaler.transform(X_test)\n",
58 | "# np.save('X_train_val.npy', X_train_val)\n",
59 | "# np.save('X_test.npy', X_test)\n",
60 | "# np.save('y_train_val.npy', y_train_val)\n",
61 | "# np.save('y_test.npy', y_test)\n",
62 | "# np.save('classes.npy', le.classes_)"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "X_train_val = np.load('X_train_val.npy')\n",
72 | "X_test = np.load('X_test.npy')\n",
73 | "y_train_val = np.load('y_train_val.npy')\n",
74 | "y_test = np.load('y_test.npy')\n",
75 | "classes = np.load('classes.npy', allow_pickle=True)\n",
76 | "\n",
77 | "# Convert everything to tf.Tensor to avoid casting\n",
78 | "with tf.device('/cpu:0'): # type: ignore\n",
79 | " _X_train_val = tf.convert_to_tensor(X_train_val, dtype=tf.float32)\n",
80 | " # We don't make explicit y categorical tensor:\n",
81 | " # Use SparseCategoricalCrossentropy loss instead.\n",
82 | " _y_train_val = tf.convert_to_tensor(np.argmax(y_train_val, axis=1), dtype=tf.int32)"
83 | ]
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {},
88 | "source": [
89 | "## Construct a model\n",
90 | "This time we're going to use HGQ layers.\n",
91 | "\n",
92 | "HGQ is \"High Granularity Quantization\" for heterogeneous quantization at arbitrary granularity, up to per-weight and per-activation level.\n",
93 | "\n",
94 | "https://github.com/calad0i/HGQ\n",
95 | "\n",
96 | "Depending on the specific task, HGQ can achieve more than 10x resource savings comparing to QKeras. (For example, on this dataset and requiring an accuracy of around 0.72~0.74)."
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": null,
102 | "metadata": {},
103 | "outputs": [],
104 | "source": [
105 | "from keras.models import Sequential\n",
106 | "from keras.optimizers import Adam\n",
107 | "from keras.losses import SparseCategoricalCrossentropy\n",
108 | "from HGQ.layers import HQuantize, HDense, HActivation"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "For any layer that needs to be quantized (i.e., layers that perform the actual computation), add a `H` in front of the layer name. For example, `HDense`, `HConv2D`, `HActivation`, etc.\n",
116 | "\n",
117 | "HGQ requires the input number to be quantized. To achieve it, you can simply add a `HQuantizer` layer at the beginning of the model. You may refer to https://calad0i.github.io/HGQ/ for full documentation.\n",
118 | "\n",
119 | "As all quantization bitwidths are learnt, you don't need to specify them. Instead, for each `H-` layer, you need to specify the `beta` parameter that controls the trade-off between accuracy and resource savings. The higher the `beta`, the more aggressive the quantization will be."
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "beta = 3e-6\n",
129 | "# The bigger the beta, the smaller the models is, at the cost of accuracy.\n",
130 | "\n",
131 | "model = Sequential(\n",
132 | " [\n",
133 | " HQuantize(beta=beta),\n",
134 | " HDense(64, activation='relu', beta=beta),\n",
135 | " HDense(32, activation='relu', beta=beta),\n",
136 | " HDense(32, activation='relu', beta=beta),\n",
137 | " HDense(5, beta=beta),\n",
138 | " ]\n",
139 | ")"
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "metadata": {},
145 | "source": [
146 | "## Train sparse\n",
147 | "\n",
148 | "No need to do anything. Unstructured sparsity comes for free with HGQ."
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": null,
154 | "metadata": {},
155 | "outputs": [],
156 | "source": [
157 | "# This is a empty code cell, you don't need to put anything here."
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {},
163 | "source": [
164 | "## Train the model\n",
165 | "We'll use the same settings as the model for part 1: Adam optimizer with categorical crossentropy loss.\n",
166 | "\n",
167 | "However, we can skip the softmax layer in the model by adding `from_logits=True` to the loss function. `Softmax` is expensive in hardware, so we want to avoid it if possible.\n",
168 | "\n",
169 | "For any HGQ model, it's essential to use `ResetMinMax` callback to reset the quantization ranges after each epoch. This is because the ranges are calculated based on the data seen so far, and we want to make sure they are recalculated after each epoch.\n",
170 | "\n",
171 | "It is recommended to use the `FreeBOPs` callback to monitor the number of (effective) bits operations in the model. This is a good proxy for ressource usage in FPGA (BOPs ~ 55*DSPs+LUTs) for **post place&route resource**. Notice that CSynth tends to overestimate at least by a factor of 2."
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": null,
177 | "metadata": {},
178 | "outputs": [],
179 | "source": [
180 | "from HGQ import ResetMinMax, FreeBOPs\n",
181 | "from keras.callbacks import LearningRateScheduler\n",
182 | "from keras.experimental import CosineDecay\n",
183 | "from nn_utils import PBarCallback\n",
184 | "\n",
185 | "_sched = CosineDecay(2e-2, 200)\n",
186 | "sched = LearningRateScheduler(_sched)\n",
187 | "pbar = PBarCallback(metric='loss: {loss:.3f}/{val_loss:.3f} - acc: {accuracy:.3f}/{val_accuracy:.3f}')\n",
188 | "\n",
189 | "callbacks = [ResetMinMax(), FreeBOPs(), pbar, sched]\n",
190 | "\n",
191 | "# ResetMinMax: necessary callback for all HGQ models\n",
192 | "# FreeBOPs: recommended callback\n",
193 | "# pbar: progress bar callback, useful when the number of epochs is high\n",
194 | "# sched: learning rate scheduler. Cosine decay in this case."
195 | ]
196 | },
197 | {
198 | "cell_type": "markdown",
199 | "metadata": {},
200 | "source": [
201 | "## Notice\n",
202 | "\n",
203 | "- Due to the stochasticness of surrogate gradient on the individual bitwidth, it is recommended to train the model with a large batchsize over more epochs.\n",
204 | "\n",
205 | "- HGQ is jit-compiled for many parts. The first epoch will take longer to compile.\n",
206 | "\n",
207 | "- We train for 200 epochs here, which takes ~1min on a 3070-maxq GPU, similar to the time taken part 4.\n",
208 | "\n",
209 | "- Parameters used in this tutorial are not optimized for the best performance. Please refer to [HGQ-demos](https://github.com/calad0i/HGQ-demos) for more advanced examples."
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": null,
215 | "metadata": {},
216 | "outputs": [],
217 | "source": [
218 | "train = True\n",
219 | "if train:\n",
220 | " opt = Adam(learning_rate=0)\n",
221 | " loss = SparseCategoricalCrossentropy(from_logits=True)\n",
222 | " model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])\n",
223 | "\n",
224 | " model.fit(\n",
225 | " _X_train_val,\n",
226 | " _y_train_val,\n",
227 | " batch_size=16384,\n",
228 | " epochs=200,\n",
229 | " validation_split=0.25,\n",
230 | " shuffle=True,\n",
231 | " callbacks=callbacks,\n",
232 | " verbose=0, # type: ignore\n",
233 | " )\n",
234 | " model.save('model_3.1/model.h5')\n",
235 | "else:\n",
236 | " from keras.models import load_model\n",
237 | "\n",
238 | " # No need to use custom_objects as the custom layers are already registered\n",
239 | " model: keras.Model = load_model('model_3.1/model.h5') # type: ignore"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {},
245 | "source": [
246 | "## Prepare for conversion\n",
247 | "\n",
248 | "HGQ model cannot be converted to hls4ml model directly, and we need to convert it to a proxy model first. The proxy model also serves as a bit-accurate emulator of the hls4ml model that takes numerical overflow into account.\n",
249 | "\n",
250 | "To convert to a proxy model, we need to set appropriate ranges of the model internal variables. This is done by using the `trace_minmax` function. You can add a scaler factor `cover_range` to the ranges to make sure the model more robust to numerical overflow. `trace_minmax` also resturns the exact (effective) BOPs of the model (the number provided during training is approximated).\n",
251 | "\n",
252 | "If you keep all parameters the same and everything goes correctly, total BOPs of the model should be around 6500. This means, after running place&route (or vsynth), the model should take around 6500 LUTs, which means DSPs*55+LUTs used should be around 6500."
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": null,
258 | "metadata": {},
259 | "outputs": [],
260 | "source": [
261 | "from HGQ import trace_minmax, to_proxy_model\n",
262 | "\n",
263 | "trace_minmax(model, X_train_val)"
264 | ]
265 | },
266 | {
267 | "cell_type": "markdown",
268 | "metadata": {},
269 | "source": [
270 | "Check that the model is indeed sparse without explicit pruning or `l1` regularization."
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": null,
276 | "metadata": {},
277 | "outputs": [],
278 | "source": [
279 | "for layer in model.layers:\n",
280 | " if layer._has_kernel:\n",
281 | " k = layer.fused_qkernel.numpy()\n",
282 | " print(f'{layer.name}: {np.mean(k==0):.2%} sparsity')"
283 | ]
284 | },
285 | {
286 | "cell_type": "markdown",
287 | "metadata": {},
288 | "source": [
289 | "Then, convert the model to a proxy model using the `to_proxy_model` function."
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": null,
295 | "metadata": {},
296 | "outputs": [],
297 | "source": [
298 | "proxy = to_proxy_model(model)"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": null,
304 | "metadata": {},
305 | "outputs": [],
306 | "source": [
307 | "import hls4ml\n",
308 | "import plotting\n",
309 | "\n",
310 | "hls_model = hls4ml.converters.convert_from_keras_model(\n",
311 | " proxy, output_dir='model_3.1/hls4ml_prj', part='xcu250-figd2104-2L-e', backend='Vitis'\n",
312 | ")\n",
313 | "hls_model.compile()\n",
314 | "\n",
315 | "X_test = np.ascontiguousarray(X_test)\n",
316 | "y_keras = model.predict(X_test, batch_size=16384, verbose=0)\n",
317 | "y_proxy = proxy.predict(X_test, batch_size=16384, verbose=0)\n",
318 | "y_hls = hls_model.predict(X_test)"
319 | ]
320 | },
321 | {
322 | "cell_type": "markdown",
323 | "metadata": {},
324 | "source": [
325 | "# Check bit-accuracy\n",
326 | "If you are unlucky, `y_keras` and `y_hls` will not fully match due to numerical overflow (for a few entries). However, `y_keras` and `y_proxy` should match perfectly. (Sometime mismatch could also happen - only due to machine precision limit.\n",
327 | "\n",
328 | "For newer nvidia GPUs, TF32 is enabled by default (fp32 with reduced mantissa bits), which could cause this issue). This will make this issue more prevalent."
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": null,
334 | "metadata": {},
335 | "outputs": [],
336 | "source": [
337 | "np.mean(y_keras == y_hls), np.mean(y_proxy == y_hls)"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": null,
343 | "metadata": {},
344 | "outputs": [],
345 | "source": [
346 | "# The plotting script assumes 0-1 range for the predictions.\n",
347 | "y_keras_softmax = tf.nn.softmax(y_keras).numpy()\n",
348 | "y_hls_softmax = tf.nn.softmax(y_hls).numpy()"
349 | ]
350 | },
351 | {
352 | "cell_type": "code",
353 | "execution_count": null,
354 | "metadata": {},
355 | "outputs": [],
356 | "source": [
357 | "%matplotlib inline\n",
358 | "from sklearn.metrics import accuracy_score\n",
359 | "from keras.models import load_model\n",
360 | "\n",
361 | "model_ref = load_model('model_1/KERAS_check_best_model.h5')\n",
362 | "y_ref = model_ref.predict(X_test, batch_size=1024, verbose=0)\n",
363 | "\n",
364 | "print(\"Accuracy baseline: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_ref, axis=1))))\n",
365 | "print(\"Accuracy pruned, quantized: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_keras, axis=1))))\n",
366 | "print(\"Accuracy hls4ml: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))))\n",
367 | "\n",
368 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
369 | "_ = plotting.makeRoc(y_test, y_ref, classes)\n",
370 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
371 | "_ = plotting.makeRoc(y_test, y_keras_softmax, classes, linestyle='--')\n",
372 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
373 | "_ = plotting.makeRoc(y_test, y_hls_softmax, classes, linestyle=':')\n",
374 | "\n",
375 | "from matplotlib.lines import Line2D\n",
376 | "\n",
377 | "lines = [Line2D([0], [0], ls='-'), Line2D([0], [0], ls='--'), Line2D([0], [0], ls=':')]\n",
378 | "from matplotlib.legend import Legend\n",
379 | "\n",
380 | "leg = Legend(ax, lines, labels=['baseline', 'pruned, quantized', 'hls4ml'], loc='lower right', frameon=False)\n",
381 | "ax.add_artist(leg)"
382 | ]
383 | },
384 | {
385 | "cell_type": "markdown",
386 | "metadata": {},
387 | "source": [
388 | "## Synthesize\n",
389 | "Now let's synthesize this quantized, pruned model.\n",
390 | "\n",
391 | "**The synthesis will take a while**\n",
392 | "\n",
393 | "While the C-Synthesis is running, we can monitor the progress looking at the log file by opening a terminal from the notebook home, and executing:\n",
394 | "\n",
395 | "`tail -f model_3.1/hls4ml_prj/vitis_hls.log`"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": null,
401 | "metadata": {},
402 | "outputs": [],
403 | "source": [
404 | "hls_model.build(csim=False)"
405 | ]
406 | },
407 | {
408 | "cell_type": "markdown",
409 | "metadata": {},
410 | "source": [
411 | "## Check the reports\n",
412 | "Print out the reports generated by Vitis HLS. Pay attention to the Utilization Estimates' section in particular this time.\n",
413 | "\n",
414 | "## Notice\n",
415 | "We strip away the softmax layer compare to part 4, which takes 3~5 cycles to compute. The overall latency could be comparable."
416 | ]
417 | },
418 | {
419 | "cell_type": "code",
420 | "execution_count": null,
421 | "metadata": {},
422 | "outputs": [],
423 | "source": [
424 | "hls4ml.report.read_vivado_report('model_3.1/hls4ml_prj')"
425 | ]
426 | },
427 | {
428 | "cell_type": "markdown",
429 | "metadata": {},
430 | "source": [
431 | "Print the report for the model trained in part 4. You should notice that the resource usage is significantly lower than the model trained in part 4.\n",
432 | "\n",
433 | "**Note you need to have trained and synthesized the model from part 4**"
434 | ]
435 | },
436 | {
437 | "cell_type": "code",
438 | "execution_count": null,
439 | "metadata": {},
440 | "outputs": [],
441 | "source": [
442 | "hls4ml.report.read_vivado_report('model_3/hls4ml_prj')"
443 | ]
444 | },
445 | {
446 | "cell_type": "markdown",
447 | "metadata": {},
448 | "source": [
449 | "## NB\n",
450 | "Note as well that the Vitis HLS `csynth` resource estimates tend to _overestimate_ on chip resource usage. Running the subsequent stages of FPGA compilation reveals the more realistic resource usage, You can run the next step, 'logic synthesis' with `hls_model.build(synth=True, vsynth=True)`, but we skipped it in this tutorial in the interest of time."
451 | ]
452 | },
453 | {
454 | "cell_type": "code",
455 | "execution_count": null,
456 | "metadata": {},
457 | "outputs": [],
458 | "source": []
459 | }
460 | ],
461 | "metadata": {
462 | "kernelspec": {
463 | "display_name": "Python 3 (ipykernel)",
464 | "language": "python",
465 | "name": "python3"
466 | },
467 | "language_info": {
468 | "codemirror_mode": {
469 | "name": "ipython",
470 | "version": 3
471 | },
472 | "file_extension": ".py",
473 | "mimetype": "text/x-python",
474 | "name": "python",
475 | "nbconvert_exporter": "python",
476 | "pygments_lexer": "ipython3",
477 | "version": "3.10.16"
478 | }
479 | },
480 | "nbformat": 4,
481 | "nbformat_minor": 4
482 | }
483 |
--------------------------------------------------------------------------------
/part4_quantization.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Part 4: Quantization"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "from tensorflow.keras.utils import to_categorical\n",
17 | "from sklearn.datasets import fetch_openml\n",
18 | "from sklearn.model_selection import train_test_split\n",
19 | "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
20 | "import numpy as np\n",
21 | "import matplotlib.pyplot as plt\n",
22 | "\n",
23 | "%matplotlib inline\n",
24 | "seed = 0\n",
25 | "np.random.seed(seed)\n",
26 | "import tensorflow as tf\n",
27 | "\n",
28 | "tf.random.set_seed(seed)\n",
29 | "import os\n",
30 | "\n",
31 | "os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "## Fetch the jet tagging dataset from Open ML"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "X_train_val = np.load('X_train_val.npy')\n",
48 | "X_test = np.load('X_test.npy')\n",
49 | "y_train_val = np.load('y_train_val.npy')\n",
50 | "y_test = np.load('y_test.npy')\n",
51 | "classes = np.load('classes.npy', allow_pickle=True)"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "## Construct a model\n",
59 | "This time we're going to use QKeras layers.\n",
60 | "QKeras is \"Quantized Keras\" for deep heterogeneous quantization of ML models.\n",
61 | "\n",
62 | "https://github.com/google/qkeras\n",
63 | "\n",
64 | "It is maintained by Google and we recently added support for QKeras model to hls4ml."
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "from tensorflow.keras.models import Sequential\n",
74 | "from tensorflow.keras.optimizers import Adam\n",
75 | "from tensorflow.keras.regularizers import l1\n",
76 | "from callbacks import all_callbacks\n",
77 | "from tensorflow.keras.layers import Activation\n",
78 | "from qkeras.qlayers import QDense, QActivation\n",
79 | "from qkeras.quantizers import quantized_bits, quantized_relu"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "We're using `QDense` layer instead of `Dense`, and `QActivation` instead of `Activation`. We're also specifying `kernel_quantizer = quantized_bits(6,0,0)`. This will use 6-bits (of which 0 are integer) for the weights. We also use the same quantization for the biases, and `quantized_relu(6)` for 6-bit ReLU activations."
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "model = Sequential()\n",
96 | "model.add(\n",
97 | " QDense(\n",
98 | " 64,\n",
99 | " input_shape=(16,),\n",
100 | " name='fc1',\n",
101 | " kernel_quantizer=quantized_bits(6, 0, alpha=1),\n",
102 | " bias_quantizer=quantized_bits(6, 0, alpha=1),\n",
103 | " kernel_initializer='lecun_uniform',\n",
104 | " kernel_regularizer=l1(0.0001),\n",
105 | " )\n",
106 | ")\n",
107 | "model.add(QActivation(activation=quantized_relu(6), name='relu1'))\n",
108 | "model.add(\n",
109 | " QDense(\n",
110 | " 32,\n",
111 | " name='fc2',\n",
112 | " kernel_quantizer=quantized_bits(6, 0, alpha=1),\n",
113 | " bias_quantizer=quantized_bits(6, 0, alpha=1),\n",
114 | " kernel_initializer='lecun_uniform',\n",
115 | " kernel_regularizer=l1(0.0001),\n",
116 | " )\n",
117 | ")\n",
118 | "model.add(QActivation(activation=quantized_relu(6), name='relu2'))\n",
119 | "model.add(\n",
120 | " QDense(\n",
121 | " 32,\n",
122 | " name='fc3',\n",
123 | " kernel_quantizer=quantized_bits(6, 0, alpha=1),\n",
124 | " bias_quantizer=quantized_bits(6, 0, alpha=1),\n",
125 | " kernel_initializer='lecun_uniform',\n",
126 | " kernel_regularizer=l1(0.0001),\n",
127 | " )\n",
128 | ")\n",
129 | "model.add(QActivation(activation=quantized_relu(6), name='relu3'))\n",
130 | "model.add(\n",
131 | " QDense(\n",
132 | " 5,\n",
133 | " name='output',\n",
134 | " kernel_quantizer=quantized_bits(6, 0, alpha=1),\n",
135 | " bias_quantizer=quantized_bits(6, 0, alpha=1),\n",
136 | " kernel_initializer='lecun_uniform',\n",
137 | " kernel_regularizer=l1(0.0001),\n",
138 | " )\n",
139 | ")\n",
140 | "model.add(Activation(activation='softmax', name='softmax'))"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "## Train sparse\n",
148 | "Let's train with model sparsity again, since QKeras layers are prunable."
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": null,
154 | "metadata": {},
155 | "outputs": [],
156 | "source": [
157 | "from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule\n",
158 | "from tensorflow_model_optimization.sparsity.keras import strip_pruning\n",
159 | "\n",
160 | "pruning_params = {\"pruning_schedule\": pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100)}\n",
161 | "model = prune.prune_low_magnitude(model, **pruning_params)"
162 | ]
163 | },
164 | {
165 | "cell_type": "markdown",
166 | "metadata": {},
167 | "source": [
168 | "## Train the model\n",
169 | "We'll use the same settings as the model for part 1: Adam optimizer with categorical crossentropy loss.\n",
170 | "The callbacks will decay the learning rate and save the model into a directory 'model_2'\n",
171 | "The model isn't very complex, so this should just take a few minutes even on the CPU.\n",
172 | "If you've restarted the notebook kernel after training once, set `train = False` to load the trained model rather than training again."
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "train = True\n",
182 | "if train:\n",
183 | " adam = Adam(lr=0.0001)\n",
184 | " model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])\n",
185 | " callbacks = all_callbacks(\n",
186 | " stop_patience=1000,\n",
187 | " lr_factor=0.5,\n",
188 | " lr_patience=10,\n",
189 | " lr_epsilon=0.000001,\n",
190 | " lr_cooldown=2,\n",
191 | " lr_minimum=0.0000001,\n",
192 | " outputDir='model_3',\n",
193 | " )\n",
194 | " callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())\n",
195 | " model.fit(\n",
196 | " X_train_val,\n",
197 | " y_train_val,\n",
198 | " batch_size=1024,\n",
199 | " epochs=30,\n",
200 | " validation_split=0.25,\n",
201 | " shuffle=True,\n",
202 | " callbacks=callbacks.callbacks,\n",
203 | " )\n",
204 | " # Save the model again but with the pruning 'stripped' to use the regular layer types\n",
205 | " model = strip_pruning(model)\n",
206 | " model.save('model_3/KERAS_check_best_model.h5')\n",
207 | "else:\n",
208 | " from tensorflow.keras.models import load_model\n",
209 | " from qkeras.utils import _add_supported_quantized_objects\n",
210 | "\n",
211 | " co = {}\n",
212 | " _add_supported_quantized_objects(co)\n",
213 | " model = load_model('model_3/KERAS_check_best_model.h5', custom_objects=co)"
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {},
219 | "source": [
220 | "## Check performance\n",
221 | "How does this model which was trained using 6-bits, and 75% sparsity model compare against the original model? Let's report the accuracy and make a ROC curve. The quantized, pruned model is shown with solid lines, the unpruned model from part 1 is shown with dashed lines.\n",
222 | "\n",
223 | "\n",
224 | "We should also check that hls4ml can respect the choice to use 6-bits throughout the model, and match the accuracy. We'll generate a configuration from this Quantized model, and plot its performance as the dotted line.\n",
225 | "The generated configuration is printed out. You'll notice that it uses 7 bits for the type, but we specified 6!? That's just because QKeras doesn't count the sign-bit when we specify the number of bits, so the type that actually gets used needs 1 more.\n",
226 | "\n",
227 | "We also use the `OutputRoundingSaturationMode` optimizer pass of `hls4ml` to set the Activation layers to round, rather than truncate, the cast. This is important for getting good model accuracy when using small bit precision activations. And we'll set a different data type for the tables used in the Softmax, just for a bit of extra performance.\n",
228 | "\n",
229 | "\n",
230 | "**Make sure you've trained the model from part 1**"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": null,
236 | "metadata": {},
237 | "outputs": [],
238 | "source": [
239 | "import hls4ml\n",
240 | "import plotting\n",
241 | "\n",
242 | "config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend='Vitis')\n",
243 | "config['LayerName']['softmax']['exp_table_t'] = 'ap_fixed<18,8>'\n",
244 | "config['LayerName']['softmax']['inv_table_t'] = 'ap_fixed<18,4>'\n",
245 | "print(\"-----------------------------------\")\n",
246 | "plotting.print_dict(config)\n",
247 | "print(\"-----------------------------------\")\n",
248 | "hls_model = hls4ml.converters.convert_from_keras_model(\n",
249 | " model, hls_config=config, backend='Vitis', output_dir='model_3/hls4ml_prj', part='xcu250-figd2104-2L-e'\n",
250 | ")\n",
251 | "hls_model.compile()\n",
252 | "\n",
253 | "y_qkeras = model.predict(np.ascontiguousarray(X_test))\n",
254 | "y_hls = hls_model.predict(np.ascontiguousarray(X_test))\n",
255 | "np.save('model_3/y_qkeras.npy', y_qkeras)\n",
256 | "np.save('model_3/y_hls.npy', y_hls)"
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "execution_count": null,
262 | "metadata": {},
263 | "outputs": [],
264 | "source": [
265 | "%matplotlib inline\n",
266 | "from sklearn.metrics import accuracy_score\n",
267 | "from tensorflow.keras.models import load_model\n",
268 | "\n",
269 | "model_ref = load_model('model_1/KERAS_check_best_model.h5')\n",
270 | "y_ref = model_ref.predict(X_test)\n",
271 | "\n",
272 | "print(\"Accuracy baseline: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_ref, axis=1))))\n",
273 | "print(\"Accuracy pruned, quantized: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_qkeras, axis=1))))\n",
274 | "print(\"Accuracy hls4ml: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))))\n",
275 | "\n",
276 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
277 | "_ = plotting.makeRoc(y_test, y_ref, classes)\n",
278 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
279 | "_ = plotting.makeRoc(y_test, y_qkeras, classes, linestyle='--')\n",
280 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
281 | "_ = plotting.makeRoc(y_test, y_hls, classes, linestyle=':')\n",
282 | "\n",
283 | "from matplotlib.lines import Line2D\n",
284 | "\n",
285 | "lines = [Line2D([0], [0], ls='-'), Line2D([0], [0], ls='--'), Line2D([0], [0], ls=':')]\n",
286 | "from matplotlib.legend import Legend\n",
287 | "\n",
288 | "leg = Legend(ax, lines, labels=['baseline', 'pruned, quantized', 'hls4ml'], loc='lower right', frameon=False)\n",
289 | "ax.add_artist(leg)"
290 | ]
291 | },
292 | {
293 | "cell_type": "markdown",
294 | "metadata": {},
295 | "source": [
296 | "## Synthesize\n",
297 | "Now let's synthesize this quantized, pruned model.\n",
298 | "\n",
299 | "**The synthesis will take a while**\n",
300 | "\n",
301 | "While the C-Synthesis is running, we can monitor the progress looking at the log file by opening a terminal from the notebook home, and executing:\n",
302 | "\n",
303 | "`tail -f model_3/hls4ml_prj/vitis_hls.log`"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": null,
309 | "metadata": {},
310 | "outputs": [],
311 | "source": [
312 | "hls_model.build(csim=False)"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "## Check the reports\n",
320 | "Print out the reports generated by Vitis HLS. Pay attention to the Utilization Estimates' section in particular this time."
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": null,
326 | "metadata": {},
327 | "outputs": [],
328 | "source": [
329 | "hls4ml.report.read_vivado_report('model_3/hls4ml_prj')"
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {},
335 | "source": [
336 | "Print the report for the model trained in part 1. Now, compared to the model from part 1, this model has been trained with low-precision quantization, and 75% pruning. You should be able to see that we have saved a lot of resource compared to where we started in part 1. At the same time, referring to the ROC curve above, the model performance is pretty much identical even with this drastic compression!\n",
337 | "\n",
338 | "**Note you need to have trained and synthesized the model from part 1**"
339 | ]
340 | },
341 | {
342 | "cell_type": "code",
343 | "execution_count": null,
344 | "metadata": {},
345 | "outputs": [],
346 | "source": [
347 | "hls4ml.report.read_vivado_report('model_1/hls4ml_prj')"
348 | ]
349 | },
350 | {
351 | "cell_type": "markdown",
352 | "metadata": {},
353 | "source": [
354 | "Print the report for the model trained in part 3. Both these models were trained with 75% sparsity, but the new model uses 6-bit precision as well. You can see how Vitis HLS has moved multiplication operations from DSPs into LUTs, reducing the \"critical\" resource usage.\n",
355 | "\n",
356 | "**Note you need to have trained and synthesized the model from part 3**"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": null,
362 | "metadata": {},
363 | "outputs": [],
364 | "source": [
365 | "hls4ml.report.read_vivado_report('model_2/hls4ml_prj')"
366 | ]
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "metadata": {},
371 | "source": [
372 | "## NB\n",
373 | "Note as well that the Vitis HLS resource estimates tend to _overestimate_ LUTs, while generally estimating the DSPs correctly. Running the subsequent stages of FPGA compilation reveals the more realistic resource usage, You can run the next step, 'logic synthesis' with `hls_model.build(synth=True, vsynth=True)`, but we skipped it in this tutorial in the interest of time."
374 | ]
375 | },
376 | {
377 | "cell_type": "code",
378 | "execution_count": null,
379 | "metadata": {},
380 | "outputs": [],
381 | "source": []
382 | }
383 | ],
384 | "metadata": {
385 | "kernelspec": {
386 | "display_name": "Python 3 (ipykernel)",
387 | "language": "python",
388 | "name": "python3"
389 | },
390 | "language_info": {
391 | "codemirror_mode": {
392 | "name": "ipython",
393 | "version": 3
394 | },
395 | "file_extension": ".py",
396 | "mimetype": "text/x-python",
397 | "name": "python",
398 | "nbconvert_exporter": "python",
399 | "pygments_lexer": "ipython3",
400 | "version": "3.10.16"
401 | }
402 | },
403 | "nbformat": 4,
404 | "nbformat_minor": 4
405 | }
406 |
--------------------------------------------------------------------------------
/part5_bdt.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n",
8 | "\n",
9 | "In this notebook we will take the first steps with training a BDT with `xgboost`, then translating it to HLS code for FPGA with `conifer`\n",
10 | "\n",
11 | "Key concepts:\n",
12 | "- model training\n",
13 | "- model evaluation\n",
14 | "- `conifer` configuration and conversion\n",
15 | "- model emulation\n",
16 | "- model synthesis\n",
17 | "- accelerator creation\n",
18 | "\n",
19 | "For some use cases, the Forest Processing Unit might be an easier entry point as no FPGA synthesis is required for supported boards. Read more about the FPU here: https://ssummers.web.cern.ch/conifer/fpu.html"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "import xgboost as xgb\n",
29 | "import matplotlib.pyplot as plt\n",
30 | "import plotting\n",
31 | "import numpy as np\n",
32 | "from scipy.special import softmax\n",
33 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
34 | "import conifer\n",
35 | "import json\n",
36 | "import os\n",
37 | "import sys\n",
38 | "\n",
39 | "os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']\n",
40 | "\n",
41 | "# enable more output from conifer\n",
42 | "import logging\n",
43 | "\n",
44 | "logging.basicConfig(stream=sys.stdout, level=logging.WARNING)\n",
45 | "logger = logging.getLogger('conifer')\n",
46 | "logger.setLevel('DEBUG')\n",
47 | "\n",
48 | "# create a random seed at we use to make the results repeatable\n",
49 | "seed = int('hls4ml-tutorial'.encode('utf-8').hex(), 16) % 2**31\n",
50 | "\n",
51 | "print(f'Using conifer version {conifer.__version__}')"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "# Load dataset\n",
59 | "\n",
60 | "Load the jet tagging dataset.\n",
61 | "\n",
62 | "**Note**: you need to run part1 first."
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "X_train_val = np.load('X_train_val.npy')\n",
72 | "X_test = np.load('X_test.npy')\n",
73 | "y_train_val_one_hot = np.load('y_train_val.npy')\n",
74 | "y_test_one_hot = np.load('y_test.npy')\n",
75 | "classes = np.load('classes.npy', allow_pickle=True)"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {},
81 | "source": [
82 | "We need to transform the test labels from the one-hot encoded values to labels"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": [
91 | "le = LabelEncoder().fit(classes)\n",
92 | "ohe = OneHotEncoder().fit(le.transform(classes).reshape(-1, 1))\n",
93 | "y_train_val = ohe.inverse_transform(y_train_val_one_hot.astype(int))\n",
94 | "y_test = ohe.inverse_transform(y_test_one_hot)"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "# Train a BDT\n",
102 | "We'll use `xgboost`'s `XGBClassifier` with:\n",
103 | "\n",
104 | "| Parameter | Explanation |\n",
105 | "| --- | --- |\n",
106 | "| `n_estimators=25` | 25 trees |\n",
107 | "| `max_depth=5` | maximum tree depth of 5 |"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": null,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": [
116 | "clf = xgb.XGBClassifier(n_estimators=25, max_depth=5, learning_rate=1.0, random_state=seed).fit(X_train_val, y_train_val)"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "# Validate performance\n",
124 | "Now we check whether the trained model is any good. We'll plot the ROC curve."
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": null,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "from sklearn.metrics import accuracy_score\n",
134 | "from tensorflow.keras.models import load_model\n",
135 | "\n",
136 | "# load the KERAS model from part 1\n",
137 | "model_ref = load_model('model_1/KERAS_check_best_model.h5')\n",
138 | "y_ref = model_ref.predict(X_test)\n",
139 | "\n",
140 | "# compute predictions of the xgboost model\n",
141 | "y_xgb = clf.predict_proba(X_test)\n",
142 | "print(f'Accuracy baseline: {accuracy_score(np.argmax(y_test_one_hot, axis=1), np.argmax(y_ref, axis=1)):.5f}')\n",
143 | "print(f'Accuracy xgboost: {accuracy_score(np.argmax(y_test_one_hot, axis=1), np.argmax(y_xgb, axis=1)):.5f}')\n",
144 | "\n",
145 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
146 | "_ = plotting.makeRoc(y_test_one_hot, y_ref, classes, linestyle='--')\n",
147 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
148 | "_ = plotting.makeRoc(y_test_one_hot, y_xgb, classes, linestyle='-')\n",
149 | "\n",
150 | "# add a legend\n",
151 | "from matplotlib.lines import Line2D\n",
152 | "\n",
153 | "lines = [\n",
154 | " Line2D([0], [0], ls='--'),\n",
155 | " Line2D([0], [0], ls='-'),\n",
156 | "]\n",
157 | "from matplotlib.legend import Legend\n",
158 | "\n",
159 | "leg = Legend(ax, lines, labels=['part1 Keras', 'xgboost'], loc='lower right', frameon=False)\n",
160 | "ax.add_artist(leg)"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "
\n",
168 | "\n",
169 | "Now we'll convert this model to FPGA firmware with `conifer`. We first need to create a configuration in the form of a dictionary. The quickest way to get started is to create a default configuration from the intended target backend (`xilinxhls` for us). Each backend may have different configuration options, so getting the configuration this way helps enumerate the possible options.\n",
170 | "\n",
171 | "We will print the configuration, modify it, and print it again. The modifications are:\n",
172 | "- set the `OutputDirectory` to something descriptive\n",
173 | "- set the `XilinxPart` to the part number of the FPGA on the Alveo U50"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": null,
179 | "metadata": {},
180 | "outputs": [],
181 | "source": [
182 | "cfg = conifer.backends.xilinxhls.auto_config()\n",
183 | "\n",
184 | "# print the config\n",
185 | "print('Default Configuration\\n' + '-' * 50)\n",
186 | "plotting.print_dict(cfg)\n",
187 | "print('-' * 50)\n",
188 | "\n",
189 | "# modify the config\n",
190 | "cfg['OutputDir'] = 'model_5/'\n",
191 | "cfg['XilinxPart'] = 'xcu250-figd2104-2L-e'\n",
192 | "\n",
193 | "# print the config again\n",
194 | "print('Modified Configuration\\n' + '-' * 50)\n",
195 | "plotting.print_dict(cfg)\n",
196 | "print('-' * 50)"
197 | ]
198 | },
199 | {
200 | "cell_type": "markdown",
201 | "metadata": {},
202 | "source": [
203 | "## Convert and write\n",
204 | "Convert the `xgboost` model to a `conifer` one, and print the `help` to see what methods it implements.\n",
205 | "Then `write` the model, creating the specified output directory and writing all the HLS files to it. We also save the `xgboost` model itself.\n",
206 | "\n",
207 | "#### Other converters:\n",
208 | "`conifer` has converters for several popular BDT training libraries. Each one is used like: `conifer.converters.convert_from_(model, config)`\n",
209 | "The converters are:\n",
210 | "- `sklearn`\n",
211 | "- `xgboost`\n",
212 | "- `ydf`\n",
213 | "- `tmva`\n",
214 | "- `onnx` (exposing `catboost` and `lightGBM`)"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": null,
220 | "metadata": {},
221 | "outputs": [],
222 | "source": [
223 | "# convert the model to the conifer representation\n",
224 | "conifer_model = conifer.converters.convert_from_xgboost(clf, cfg)\n",
225 | "# print the help to see the API on the conifer_model\n",
226 | "help(conifer_model)\n",
227 | "# write the project (writing HLS project to disk)\n",
228 | "conifer_model.write()\n",
229 | "# save the conifer model - we can load this again later\n",
230 | "clf.save_model('model_5/xgboost_model.json')"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "## Explore\n",
238 | "Browse the files in the newly created project directory to take a look at the HLS code.\n",
239 | "\n",
240 | "The output of `!tree model_5` is:\n",
241 | "\n",
242 | "```\n",
243 | "model_5/\n",
244 | "├── bridge.cpp\n",
245 | "├── build_hls.tcl\n",
246 | "├── firmware\n",
247 | "│ ├── BDT.cpp\n",
248 | "│ ├── BDT.h\n",
249 | "│ ├── my_prj.cpp\n",
250 | "│ ├── my_prj.h\n",
251 | "│ └── parameters.h\n",
252 | "├── hls_parameters.tcl\n",
253 | "├── my_prj.json\n",
254 | "├── my_prj_test.cpp\n",
255 | "├── tb_data\n",
256 | "└── vivado_synth.tcl\n",
257 | "```\n",
258 | "\n",
259 | "- files under `firmware` are the HLS implementation of the model\n",
260 | "- `my_prj.json` is the saved converted `conifer` model that can be loaded again without the original `xgboost` model\n",
261 | "- `tcl` scripts are used for synthesizing the project"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "metadata": {},
267 | "source": [
268 | "## Emulate\n",
269 | "Before starting the lengthy FPGA build process, we should validate that our conversion was successful and that the choice of precision was suitable with a bit-accurate emulation. To do this we need to run the HLS C++ code on the CPU with some test data first. This is like the HLS C Simulation step, but rather than writing a C++ testbench and invoking `vitis_hls` to run `csim`, `conifer` implements Python bindings for the HLS, just like `hls4ml`.\n",
270 | "\n",
271 | "We first need to compile (which uses the C++ compiler), then we can make predictions"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {},
278 | "outputs": [],
279 | "source": [
280 | "conifer_model.compile()"
281 | ]
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": null,
286 | "metadata": {},
287 | "outputs": [],
288 | "source": [
289 | "y_hls = conifer_model.decision_function(X_test)"
290 | ]
291 | },
292 | {
293 | "cell_type": "markdown",
294 | "metadata": {},
295 | "source": [
296 | "## Compare\n",
297 | "\n",
298 | "Now we check whether the emulated predictions are good. To do this we'll plot the ROC curve again with the HLS predictions overlaid."
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": null,
304 | "metadata": {},
305 | "outputs": [],
306 | "source": [
307 | "y_hls_proba = softmax(y_hls) # compute class probabilities from the raw predictions\n",
308 | "\n",
309 | "print(f'Accuracy baseline: {accuracy_score(np.argmax(y_test_one_hot, axis=1), np.argmax(y_ref, axis=1)):.5f}')\n",
310 | "print(f'Accuracy xgboost: {accuracy_score(np.argmax(y_test_one_hot, axis=1), np.argmax(y_xgb, axis=1)):.5f}')\n",
311 | "print(f'Accuracy conifer: {accuracy_score(np.argmax(y_test_one_hot, axis=1), np.argmax(y_hls_proba, axis=1)):.5f}')\n",
312 | "\n",
313 | "\n",
314 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
315 | "_ = plotting.makeRoc(y_test_one_hot, y_ref, classes, linestyle='--')\n",
316 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
317 | "_ = plotting.makeRoc(y_test_one_hot, y_xgb, classes, linestyle=':')\n",
318 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
319 | "_ = plotting.makeRoc(y_test_one_hot, y_hls_proba, classes, linestyle='-')\n",
320 | "\n",
321 | "# add a legend\n",
322 | "from matplotlib.lines import Line2D\n",
323 | "\n",
324 | "lines = [\n",
325 | " Line2D([0], [0], ls='--'),\n",
326 | " Line2D([0], [0], ls=':'),\n",
327 | " Line2D([0], [0], ls='-'),\n",
328 | "]\n",
329 | "from matplotlib.legend import Legend\n",
330 | "\n",
331 | "leg = Legend(ax, lines, labels=['part1 Keras', 'xgboost', 'conifer'], loc='lower right', frameon=False)\n",
332 | "ax.add_artist(leg)"
333 | ]
334 | },
335 | {
336 | "cell_type": "markdown",
337 | "metadata": {},
338 | "source": [
339 | "## Build\n",
340 | "Now we'll run the Vitis HLS and Vivado synthesis. HLS C Synthesis compiles our C++ to RTL, performing scheduling and resource mapping. Vivado synthesis synthesizes the RTL from the previous step into a netlist, and produces a more realistic resource estimation. The latency can't change during Vivado synthesis, it's fixed in the RTL description.\n",
341 | "\n",
342 | "After the build completes we can also browse the new log files and reports that are generated.\n",
343 | "\n",
344 | "**Warning**: this step might take around 10 minutes"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": null,
350 | "metadata": {
351 | "scrolled": true
352 | },
353 | "outputs": [],
354 | "source": [
355 | "conifer_model.build(synth=True, vsynth=True)"
356 | ]
357 | },
358 | {
359 | "cell_type": "markdown",
360 | "metadata": {},
361 | "source": [
362 | "## Report\n",
363 | "If the synthesis completed successfuly, we can extract the key metrics from the reports and print them out.\n",
364 | "The section `\"vsynth\"` contains the report from the Vivado RTL synthesis, which is usually lower, and more realistic than the HLS report."
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "execution_count": null,
370 | "metadata": {},
371 | "outputs": [],
372 | "source": [
373 | "report = conifer_model.read_report()\n",
374 | "plotting.print_dict(report)"
375 | ]
376 | },
377 | {
378 | "cell_type": "markdown",
379 | "metadata": {},
380 | "source": [
381 | "## Deployment with `pynq`\n",
382 | "\n",
383 | "There are two main ways to deploy a BDT to an accelerator card with `conifer`:\n",
384 | "- build a static accelerator with Xilinx HLS backend\n",
385 | "- use the dynamic accelerator Forest Processing Unit (FPU)\n",
386 | "\n",
387 | "Getting started with the FPU is straightforward. For a supported board, you will need only the converted model JSON, and a bitfile that can be downloaded from the conifer website. Read more about the FPU here: https://ssummers.web.cern.ch/conifer/fpu.html\n",
388 | "\n",
389 | "However, without a physical device there's not much to show, so in this section we'll see how to deploy the model that we already trained as a static accelerator to a `pynq-z2` board.\n",
390 | "We'll use the `AcceleratorConfig` part of the configuration that we previously left undefined."
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": null,
396 | "metadata": {},
397 | "outputs": [],
398 | "source": [
399 | "pynq_model_cfg = conifer.backends.xilinxhls.auto_config()\n",
400 | "pynq_model_cfg['OutputDir'] = 'model_5_pynq' # choose a new project directory\n",
401 | "pynq_model_cfg['ProjectName'] = 'conifer_jettag'\n",
402 | "pynq_model_cfg['AcceleratorConfig'] = {\n",
403 | " 'Board': 'pynq-z2', # choose a pynq-z2 board\n",
404 | " 'InterfaceType': 'float', # floating point for the data I/O (this is default)\n",
405 | "}\n",
406 | "\n",
407 | "# print the config\n",
408 | "print('Modified Configuration\\n' + '-' * 50)\n",
409 | "print(json.dumps(pynq_model_cfg, indent=2))\n",
410 | "print('-' * 50)"
411 | ]
412 | },
413 | {
414 | "cell_type": "markdown",
415 | "metadata": {},
416 | "source": [
417 | "## Supported boards\n",
418 | "\n",
419 | "Here we print the list of supported boards, so you can see what else works \"out of the box\". It's relatively easy to add other Zynq SoC or Alveo boards, for example to add an Alveo U50 card targeting `xilinx_u50_gen3x16_xdma_5_202210_1` platform:\n",
420 | "\n",
421 | "```\n",
422 | "u50 = conifer.backends.boards.AlveoConfig.default_config()\n",
423 | "u50['xilinx_part'] = 'xcu50-fsvh2104-2-e'\n",
424 | "u50['platform'] = 'xilinx_u50_gen3x16_xdma_5_202210_1'\n",
425 | "u50['name'] = 'xilinx_u50_gen3x16_xdma_5_202210_1'\n",
426 | "u50 = conifer.backends.boards.AlveoConfig(u50)\n",
427 | "conifer.backends.boards.register_board_config(u50.name, u50)\n",
428 | "```"
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": null,
434 | "metadata": {},
435 | "outputs": [],
436 | "source": [
437 | "# This is the full list of supported boards:\n",
438 | "print(f'Supported boards: {conifer.backends.boards.get_available_boards()}')"
439 | ]
440 | },
441 | {
442 | "cell_type": "markdown",
443 | "metadata": {},
444 | "source": [
445 | "### Load the model\n",
446 | "\n",
447 | "We load the JSON for the conifer model we previously used, applying the new configuration just defined. We'll see that the FPGA part specified by the board overrides the `XilinxPart` specified in the default."
448 | ]
449 | },
450 | {
451 | "cell_type": "code",
452 | "execution_count": null,
453 | "metadata": {},
454 | "outputs": [],
455 | "source": [
456 | "pynq_model = conifer.model.load_model('model_5/my_prj.json', new_config=pynq_model_cfg)\n",
457 | "pynq_model.write()"
458 | ]
459 | },
460 | {
461 | "cell_type": "markdown",
462 | "metadata": {},
463 | "source": [
464 | "## Build the model\n",
465 | "\n",
466 | "Now we run `build` again, running HLS Synthesis, Logic Synthesis and Place & Route, finally producing a bitfile and an archive of files that we'll need to run inference on the pynq-z2 board. \n",
467 | "\n",
468 | "**Warning**: this step might take around 20 minutes to complete.\n",
469 | "\n",
470 | "The floorplan of the bitfile should like something like this, where the individual tree modules are highlighted in different colours:\n",
471 | "\n",
472 | "
"
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": null,
478 | "metadata": {},
479 | "outputs": [],
480 | "source": [
481 | "pynq_model.build(synth=True, bitfile=True, package=True)"
482 | ]
483 | },
484 | {
485 | "cell_type": "markdown",
486 | "metadata": {},
487 | "source": [
488 | "## Inference on pynq-z2\n",
489 | "\n",
490 | "Running inference on the `pynq-z2` would look like this:\n",
491 | "- download the `model_5/conifer_jettag.zip` archive from this notebook\n",
492 | "- upload `conifer_jettag.zip` to the pynq-z2 device and unzip it\n",
493 | "- start a jupyter notebook on the `pynq-z2` and run the following code:\n",
494 | "\n",
495 | "```\n",
496 | "import conifer\n",
497 | "accelerator = conifer.backends.xilinxhls.runtime.ZynqDriver('conifer_jettag.bit', batch_size=1)\n",
498 | "X = ... # load some data \n",
499 | "y_pynq = accelerator.decision_function(X)\n",
500 | "```\n"
501 | ]
502 | }
503 | ],
504 | "metadata": {
505 | "kernelspec": {
506 | "display_name": "Python 3 (ipykernel)",
507 | "language": "python",
508 | "name": "python3"
509 | },
510 | "language_info": {
511 | "codemirror_mode": {
512 | "name": "ipython",
513 | "version": 3
514 | },
515 | "file_extension": ".py",
516 | "mimetype": "text/x-python",
517 | "name": "python",
518 | "nbconvert_exporter": "python",
519 | "pygments_lexer": "ipython3",
520 | "version": "3.10.10"
521 | }
522 | },
523 | "nbformat": 4,
524 | "nbformat_minor": 4
525 | }
526 |
--------------------------------------------------------------------------------
/part7b_deployment.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "033cc4d9",
6 | "metadata": {},
7 | "source": [
8 | "# Part 7b: Deployment on PYNQ-Z2\n",
9 | "The following section is the code to execute in the pynq-z2 jupyter notebook to execute NN inference. \n",
10 | "\n",
11 | "The following cells are intended to run on a pynq-z2, they will not run on the server used to train and synthesize models!\n",
12 | "\n",
13 | "First, import our driver `Overlay` class. We'll also load the test data."
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": null,
19 | "id": "89c67e4f",
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "from axi_stream_driver import NeuralNetworkOverlay\n",
24 | "import numpy as np\n",
25 | "\n",
26 | "X_test = np.load('X_test.npy')\n",
27 | "y_test = np.load('y_test.npy')"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "id": "551c5cd6",
33 | "metadata": {},
34 | "source": [
35 | "Create a `NeuralNetworkOverlay` object. This will download the `Overlay` (bitfile) onto the PL of the pynq-z2. We provide the `X_test.shape` and `y_test.shape` to allocate some buffers for the data transfer."
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "id": "cfb786f3",
42 | "metadata": {},
43 | "outputs": [],
44 | "source": [
45 | "nn = NeuralNetworkOverlay('hls4ml_nn.bit', X_test.shape, y_test.shape)"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "id": "5fde9b2d",
51 | "metadata": {},
52 | "source": [
53 | "Now run the prediction! When we set `profile=True` the function times the inference, and prints out a summary as well as returning the profiling information. We also save the output to a file so we can do some validation."
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "id": "1fd6dee7",
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "y_hw, latency, throughput = nn.predict(X_test, profile=True)"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "id": "1983e7d7",
69 | "metadata": {},
70 | "source": [
71 | "An example print out looks like:\n",
72 | "\n",
73 | "```\n",
74 | "Classified 166000 samples in 0.402568 seconds (412352.6956936468 inferences / s)\n",
75 | "```\n",
76 | "\n",
77 | "Now let's save the output and transfer this back to the host."
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "id": "981ffced",
84 | "metadata": {},
85 | "outputs": [],
86 | "source": [
87 | "np.save('y_hw.npy', y_hw)"
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "id": "b9e92d1e",
93 | "metadata": {},
94 | "source": [
95 | "Now, go back to the host and follow `part7c_validation.ipynb`"
96 | ]
97 | }
98 | ],
99 | "metadata": {
100 | "kernelspec": {
101 | "display_name": "Python 3 (ipykernel)",
102 | "language": "python",
103 | "name": "python3"
104 | },
105 | "language_info": {
106 | "codemirror_mode": {
107 | "name": "ipython",
108 | "version": 3
109 | },
110 | "file_extension": ".py",
111 | "mimetype": "text/x-python",
112 | "name": "python",
113 | "nbconvert_exporter": "python",
114 | "pygments_lexer": "ipython3",
115 | "version": "3.10.16"
116 | }
117 | },
118 | "nbformat": 4,
119 | "nbformat_minor": 5
120 | }
121 |
--------------------------------------------------------------------------------
/part7c_validation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "005ae126",
6 | "metadata": {},
7 | "source": [
8 | "# Part 7c: Validation\n",
9 | "We executed NN inference on the pynq-z2! Now we can copy the `y_hw.npy` back to the host we've been using for the training and synthesis, and make a final plot to check that the output we took on the board is as expected.\n",
10 | "\n",
11 | "The command to copy it back is\n",
12 | "\n",
13 | "```bash\n",
14 | "scp xilinx@192.168.2.99:~/jupyter_notebooks/y_hw.npy model_3/\n",
15 | "```"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "id": "fee790be",
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "import matplotlib.pyplot as plt\n",
26 | "import numpy as np\n",
27 | "import plotting\n",
28 | "\n",
29 | "%matplotlib inline\n",
30 | "from sklearn.metrics import accuracy_score\n",
31 | "\n",
32 | "y_hw = np.load('model_3/y_hw.npy')\n",
33 | "y_test = np.load('y_test.npy')\n",
34 | "classes = np.load('classes.npy', allow_pickle=True)\n",
35 | "y_hls = np.load('model_3/y_hls.npy')\n",
36 | "y_qkeras = np.load('model_3/y_qkeras.npy')\n",
37 | "\n",
38 | "print(\"Accuracy QKeras, CPU: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_qkeras, axis=1))))\n",
39 | "print(\"Accuracy hls4ml, CPU: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))))\n",
40 | "print(\"Accuracy hls4ml, pynq-z2: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hw, axis=1))))\n",
41 | "\n",
42 | "fig, ax = plt.subplots(figsize=(9, 9))\n",
43 | "_ = plotting.makeRoc(y_test, y_qkeras, classes, linestyle='-')\n",
44 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
45 | "_ = plotting.makeRoc(y_test, y_hls, classes, linestyle='--')\n",
46 | "plt.gca().set_prop_cycle(None) # reset the colors\n",
47 | "_ = plotting.makeRoc(y_test, y_hw, classes, linestyle='-.')\n",
48 | "\n",
49 | "from matplotlib.lines import Line2D\n",
50 | "\n",
51 | "lines = [Line2D([0], [0], ls='-'), Line2D([0], [0], ls='--'), Line2D([0], [0], ls='-.')]\n",
52 | "from matplotlib.legend import Legend\n",
53 | "\n",
54 | "leg = Legend(ax, lines, labels=['QKeras, CPU', 'hls4ml, CPU', 'hls4ml, pynq-z2'], loc='lower right', frameon=False)\n",
55 | "ax.add_artist(leg)"
56 | ]
57 | }
58 | ],
59 | "metadata": {
60 | "kernelspec": {
61 | "display_name": "Python 3 (ipykernel)",
62 | "language": "python",
63 | "name": "python3"
64 | },
65 | "language_info": {
66 | "codemirror_mode": {
67 | "name": "ipython",
68 | "version": 3
69 | },
70 | "file_extension": ".py",
71 | "mimetype": "text/x-python",
72 | "name": "python",
73 | "nbconvert_exporter": "python",
74 | "pygments_lexer": "ipython3",
75 | "version": "3.10.16"
76 | }
77 | },
78 | "nbformat": 4,
79 | "nbformat_minor": 5
80 | }
81 |
--------------------------------------------------------------------------------
/part8_symbolic_regression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "79933ff7",
6 | "metadata": {},
7 | "source": [
8 | "# Part 8: Symbolic Regression"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "ede2226f",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import numpy as np\n",
19 | "import sympy\n",
20 | "import matplotlib.pyplot as plt\n",
21 | "import hls4ml\n",
22 | "from sklearn.model_selection import train_test_split\n",
23 | "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
24 | "from sklearn.metrics import roc_curve, auc, accuracy_score\n",
25 | "from tensorflow.keras.utils import to_categorical\n",
26 | "from sklearn.datasets import fetch_openml"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "id": "d9e2b159",
32 | "metadata": {},
33 | "source": [
34 | "## Load the LHC jet tagging dataset"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "id": "ee6d96bd",
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "data = fetch_openml('hls4ml_lhc_jets_hlf')\n",
45 | "X, Y = data['data'].to_numpy(), data['target'].to_numpy()\n",
46 | "print(data['feature_names'])\n",
47 | "print(X.shape, Y.shape)\n",
48 | "print(Y[:10])\n",
49 | "\n",
50 | "LE = LabelEncoder()\n",
51 | "Y = LE.fit_transform(Y)\n",
52 | "Y = to_categorical(Y, 5)\n",
53 | "\n",
54 | "Y = 2 * Y - 1\n",
55 | "print(Y[:10])"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "id": "0502aea8",
62 | "metadata": {},
63 | "outputs": [],
64 | "source": [
65 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5, random_state=123)\n",
66 | "\n",
67 | "scaler = StandardScaler().fit(X_train)\n",
68 | "X_train = scaler.transform(X_train)\n",
69 | "X_test = scaler.transform(X_test)\n",
70 | "\n",
71 | "# PySR (or any genetic programming based SR) not happy with too many training data\n",
72 | "X_train = X_train[:8000]\n",
73 | "Y_train = Y_train[:8000]\n",
74 | "\n",
75 | "print('X_train.shape: ' + str(X_train.shape))\n",
76 | "print('Y_train.shape: ' + str(Y_train.shape))\n",
77 | "print('X_test.shape: ' + str(X_test.shape))\n",
78 | "print('Y_test.shape: ' + str(Y_test.shape))"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "id": "7ec86106",
84 | "metadata": {},
85 | "source": [
86 | "## Perform SR with PySR (if installed)"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "id": "57e7896d",
92 | "metadata": {},
93 | "source": [
94 | "If you want to run `PySR` (a genetic programming-based symbolic regression software), please see https://github.com/MilesCranmer/PySR for installation and intructions.\n",
95 | "\n",
96 | "Below is an example configuration script to run training in `PySR`, where one can specify the allowed primitive functions `unary_operators` `binary_operators` (e.g. `+`, `*`, `sin`) and constraints `complexity_of_operators` `constraints` `nested_constraints` in the equation seacrhing. The training results will be stored in a `.pkl` file that contains the final equations selected by the training strategy `model_selection`.\n",
97 | "\n",
98 | "We also provide an already trained PySR model `sr/example.pkl` in the following sections for demonstrating the HLS implementation."
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "id": "96a651dd",
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "from pysr import PySRRegressor\n",
109 | "\n",
110 | "!export JULIA_NUM_THREADS=32\n",
111 | "\n",
112 | "model_pysr = PySRRegressor(\n",
113 | " model_selection='accuracy',\n",
114 | " niterations=40,\n",
115 | " timeout_in_seconds=60 * 60 * 1,\n",
116 | " maxsize=40,\n",
117 | " select_k_features=6,\n",
118 | " binary_operators=['+', '-', '*'],\n",
119 | " unary_operators=['sin', 'sc(x)=sin(x)*cos(x)'],\n",
120 | " complexity_of_operators={'+': 1, '-': 1, '*': 1, 'sin': 1, 'sc': 1},\n",
121 | " constraints={'sin': 20, 'sc': 20},\n",
122 | " nested_constraints={'sin': {'sin': 0, 'sc': 0}, 'sc': {'sin': 0, 'sc': 0}},\n",
123 | " extra_sympy_mappings={'sc': lambda x: sympy.sin(x) * sympy.cos(x)},\n",
124 | " loss='L2MarginLoss()', # (1 - y*y')^2\n",
125 | ")"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "id": "5f4d9501",
132 | "metadata": {},
133 | "outputs": [],
134 | "source": [
135 | "model_pysr.fit(X_train, Y_train)"
136 | ]
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "id": "846e710b",
141 | "metadata": {},
142 | "source": [
143 | "## Prepare symbolic expressions in strings first"
144 | ]
145 | },
146 | {
147 | "cell_type": "markdown",
148 | "id": "c7aaf105",
149 | "metadata": {},
150 | "source": [
151 | "We provide a trained model for the HLS demonstration.\n",
152 | "\n",
153 | "**If you have `PySR` installed**, you can directly load the trained expressions from the output file `sr/example.pkl`.\n",
154 | "`PySR` allows custom functions to be defined, such as sc(x):=sin(x)*cos(x) in this example, they need to be re-defined through `extra_sympy_mappings` and a new `sympy` class when retrieving the equations for evaluation."
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "id": "d3d5d2cd",
161 | "metadata": {
162 | "scrolled": true
163 | },
164 | "outputs": [],
165 | "source": [
166 | "from pysr import PySRRegressor\n",
167 | "\n",
168 | "model_pysr = PySRRegressor.from_file('sr/example.pkl')\n",
169 | "with sympy.evaluate(True):\n",
170 | " for i in range(5):\n",
171 | " print('Tagger {} = '.format(i) + str(model_pysr.sympy()[i]) + '\\n------------------------------------------')\n",
172 | "\n",
173 | "# Re-write custom operator defined from PySR config: sc(x) = sin(x)*cos(x)\n",
174 | "model_pysr.set_params(extra_sympy_mappings={\"sc\": lambda x: sympy.sin(x) * sympy.cos(x)})\n",
175 | "model_pysr.refresh()\n",
176 | "\n",
177 | "\n",
178 | "class sc(sympy.Function):\n",
179 | " pass"
180 | ]
181 | },
182 | {
183 | "cell_type": "markdown",
184 | "id": "699d2e05",
185 | "metadata": {},
186 | "source": [
187 | "There are two options for evaluating math functions in `hls4ml`, one is using the standard HLS math library (`func`), another one is using approximation with user-defined lookup tables (`func_lut`) for resources saving. We will define the lookup tables (table range and size) for `func_lut` later.\n",
188 | "\n",
189 | "We have the equations in the `sympy` format, now convert them into strings: `expr` for using the standard functions and `expr_lut` for using the approximation with lookup tables. We will re-parse `expr` and `expr_lut` from strings in `sympy` format for the `hls4ml` converter."
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "id": "7219a874",
196 | "metadata": {},
197 | "outputs": [],
198 | "source": [
199 | "expr = []\n",
200 | "expr_lut = []\n",
201 | "for i in range(5):\n",
202 | " expr.append(str(model_pysr.sympy()[i]))\n",
203 | " expr_lut.append(expr[i].replace(\"sin\", \"sin_lut\").replace(\"cos\", \"cos_lut\"))"
204 | ]
205 | },
206 | {
207 | "cell_type": "markdown",
208 | "id": "0abcba26",
209 | "metadata": {},
210 | "source": [
211 | "**If you don't have PySR installed**, you can also write your expressions directly in strings and parse in `sympy` format, which can then be fed to `hls4ml` converter. Here again, `expr` for using standard math library, `expr_lut` for using approximation with lookup tables."
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": null,
217 | "id": "3356d1e6",
218 | "metadata": {},
219 | "outputs": [],
220 | "source": [
221 | "# Expressions from 'sr/example.pkl'\n",
222 | "\n",
223 | "# Expressions that will use Vivado math library\n",
224 | "expr = [\n",
225 | " '-0.1630426*(sin(-0.75052315)*cos(-0.75052315) - 0.84283006)*sin(2*x14 - 1.03665108)*cos(2*x14 - 1.03665108) - sin(x14 - (0.9237657 - 0.11933863*x3)*(-x15 + 2*x2 - 0.3817056) + 1.761264957)',\n",
226 | " '(-(0.5822144*sin(0.83811*x14)*cos(0.83811*x14) - 0.5324657)*(sin(0.3923645*x2)*cos(0.3923645*x2) - 0.63548696) + sin(x14 - 0.3923645*x15 + x3 + 0.51168373)*cos(x14 - 0.3923645*x15 + x3 + 0.51168373))*(0.561041303633489*sin(x15) - 0.47277835) - 0.84055585',\n",
227 | " '0.49239117*(sin(x3)*cos(x3) + sin(x15 + 0.76784414*x3)*cos(x15 + 0.76784414*x3))*(sin(-0.13417026)*cos(-0.13417026) + sin(0.5180547)*cos(0.5180547) + sin(x2)*cos(x2)) - sin(x14 + 0.25715914*x15*x3 - x2 - x3 + 0.66443527)',\n",
228 | " '0.41071504*(0.9298677 - sin(0.59376544*x15))*(sin(x14)*cos(x14) + 5.2546763*sin(0.71913457 - x3)*cos(0.71913457 - x3))*(-sin(2*x3)*cos(2*x3) + sin(5.2546763*x14 + x3 + 0.77032656)*cos(5.2546763*x14 + x3 + 0.77032656) + 0.32492808) - 0.863786752431664',\n",
229 | " '(1.0745832 - sin(-x14 - 0.4094719)*cos(-x14 - 0.4094719))*(-0.15737492*x15 - sin(x14 - 4.2594776)*cos(x14 - 4.2594776) + sin(3*x14 - x3*(x14 - 4.1772995) - x3 + 3.087878)*cos(3*x14 - x3*(x14 - 4.1772995) - x3 + 3.087878) - 0.690204005690814)',\n",
230 | "]\n",
231 | "# Expressions that will use look-up table approximated math functions\n",
232 | "expr_lut = []\n",
233 | "for i in range(len(expr)):\n",
234 | " expr_lut.append(expr[i].replace(\"sin\", \"sin_lut\").replace(\"cos\", \"cos_lut\"))"
235 | ]
236 | },
237 | {
238 | "cell_type": "markdown",
239 | "id": "788ee608",
240 | "metadata": {},
241 | "source": [
242 | "## Then parse the strings to sympy expressions"
243 | ]
244 | },
245 | {
246 | "cell_type": "markdown",
247 | "id": "03fc8284",
248 | "metadata": {},
249 | "source": [
250 | "Define the lookup tables for approximating math functions. The table range and size can be customized for each function to be approximated, they depend on how much precision can be compromised to save more resources."
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": null,
256 | "id": "920e2326",
257 | "metadata": {},
258 | "outputs": [],
259 | "source": [
260 | "from hls4ml.utils.symbolic_utils import init_pysr_lut_functions\n",
261 | "\n",
262 | "# For functions approximated with look-up table, define the table range and size\n",
263 | "function_definitions = [\n",
264 | " 'sin_lut(x) = math_lut(sin, x, N=256, range_start=-8, range_end=8)',\n",
265 | " 'cos_lut(x) = math_lut(cos, x, N=256, range_start=-8, range_end=8)',\n",
266 | "]\n",
267 | "init_pysr_lut_functions(init_defaults=True, function_definitions=function_definitions)\n",
268 | "\n",
269 | "lut_functions = {\n",
270 | " 'sin_lut': {'math_func': 'sin', 'range_start': -8, 'range_end': 8, 'table_size': 256},\n",
271 | " 'cos_lut': {'math_func': 'cos', 'range_start': -8, 'range_end': 8, 'table_size': 256},\n",
272 | "}"
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "id": "8be93891",
278 | "metadata": {},
279 | "source": [
280 | "Parse `expr` and `expr_lut` to sympy expressions."
281 | ]
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": null,
286 | "id": "96f61066",
287 | "metadata": {},
288 | "outputs": [],
289 | "source": [
290 | "# Use sympy to parse strings into sympy expressions\n",
291 | "for i in range(len(expr)):\n",
292 | " print('expr =\\n' + expr[i])\n",
293 | " print(\"----------------------------------------\")\n",
294 | " print('expr_LUT =\\n' + expr_lut[i])\n",
295 | " print(\"========================================\")\n",
296 | " expr[i] = sympy.parsing.sympy_parser.parse_expr(expr[i])\n",
297 | " expr_lut[i] = sympy.parsing.sympy_parser.parse_expr(expr_lut[i])"
298 | ]
299 | },
300 | {
301 | "cell_type": "markdown",
302 | "id": "f7548c93",
303 | "metadata": {},
304 | "source": [
305 | "Use `hls4ml.converters.convert_from_symbolic_expression` to convert sympy expressions and compile."
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": null,
311 | "id": "46ff4b5e",
312 | "metadata": {},
313 | "outputs": [],
314 | "source": [
315 | "# Use hls4ml to convert sympy expressions into HLS model\n",
316 | "hls_model = hls4ml.converters.convert_from_symbolic_expression(\n",
317 | " expr, n_symbols=16, output_dir='my-hls-test', precision='ap_fixed<16,6>', part='xcvu9p-flga2577-2-e'\n",
318 | ")\n",
319 | "hls_model.write()\n",
320 | "hls_model.compile()\n",
321 | "\n",
322 | "hls_model_lut = hls4ml.converters.convert_from_symbolic_expression(\n",
323 | " expr_lut,\n",
324 | " n_symbols=16,\n",
325 | " output_dir='my-hls-test-lut',\n",
326 | " precision='ap_fixed<16,6>',\n",
327 | " part='xcvu9p-flga2577-2-e',\n",
328 | " lut_functions=lut_functions,\n",
329 | ")\n",
330 | "hls_model_lut.write()\n",
331 | "hls_model_lut.compile()"
332 | ]
333 | },
334 | {
335 | "cell_type": "markdown",
336 | "id": "08682628",
337 | "metadata": {},
338 | "source": [
339 | "## Compare outputs: PySR vs HLS vs HLS(LUT)"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": null,
345 | "id": "39269441",
346 | "metadata": {},
347 | "outputs": [],
348 | "source": [
349 | "test_vector = np.random.rand(1, 16) * 4 - 2\n",
350 | "# print(model_pysr.predict(test_vector))\n",
351 | "print(hls_model.predict(test_vector))\n",
352 | "print(hls_model_lut.predict(test_vector))"
353 | ]
354 | },
355 | {
356 | "cell_type": "markdown",
357 | "id": "08795fca",
358 | "metadata": {},
359 | "source": [
360 | "## Compare performance on the dataset"
361 | ]
362 | },
363 | {
364 | "cell_type": "code",
365 | "execution_count": null,
366 | "id": "05894f0b",
367 | "metadata": {},
368 | "outputs": [],
369 | "source": [
370 | "# Y_pysr = model_pysr.predict(X_test)\n",
371 | "Y_hls = hls_model.predict(X_test)\n",
372 | "Y_hls_lut = hls_model_lut.predict(X_test)\n",
373 | "# auc_pysr=[]\n",
374 | "auc_hls = []\n",
375 | "auc_hls_lut = []\n",
376 | "for x, label in enumerate(LE.classes_):\n",
377 | " # fpr_pysr, tpr_pysr, _ = roc_curve(Y_test[:, x], Y_pysr[:, x])\n",
378 | " fpr_hls, tpr_hls, _ = roc_curve(Y_test[:, x], Y_hls[:, x])\n",
379 | " fpr_hls_lut, tpr_hls_lut, _ = roc_curve(Y_test[:, x], Y_hls_lut[:, x])\n",
380 | " # auc_pysr.append(auc(fpr_pysr, tpr_pysr))\n",
381 | " auc_hls.append(auc(fpr_hls, tpr_hls))\n",
382 | " auc_hls_lut.append(auc(fpr_hls_lut, tpr_hls_lut))\n",
383 | "\n",
384 | "# print('PySR acc = {0:.3f}'.format(accuracy_score(np.argmax(Y_test, axis=1), np.argmax(Y_pysr, axis=1))))\n",
385 | "# print('PySR auc = {0:.3f},{1:.3f},{2:.3f},{3:.3f},{4:.3f}'.format(auc_pysr[0],auc_pysr[1],auc_pysr[2],auc_pysr[3],auc_pysr[4]))\n",
386 | "print('HLS acc = {0:.3f}'.format(accuracy_score(np.argmax(Y_test, axis=1), np.argmax(Y_hls, axis=1))))\n",
387 | "print(\n",
388 | " 'HLS auc = {0:.3f},{1:.3f},{2:.3f},{3:.3f},{4:.3f}'.format(\n",
389 | " auc_hls[0], auc_hls[1], auc_hls[2], auc_hls[3], auc_hls[4]\n",
390 | " )\n",
391 | ")\n",
392 | "print('HLS_LUT acc = {0:.3f}'.format(accuracy_score(np.argmax(Y_test, axis=1), np.argmax(Y_hls_lut, axis=1))))\n",
393 | "print(\n",
394 | " 'HLS_LUT auc = {0:.3f},{1:.3f},{2:.3f},{3:.3f},{4:.3f}'.format(\n",
395 | " auc_hls_lut[0], auc_hls_lut[1], auc_hls_lut[2], auc_hls_lut[3], auc_hls_lut[4]\n",
396 | " )\n",
397 | ")"
398 | ]
399 | },
400 | {
401 | "cell_type": "code",
402 | "execution_count": null,
403 | "id": "002643a3",
404 | "metadata": {},
405 | "outputs": [],
406 | "source": [
407 | "def plot_roc(y_test, y_pred, labels, model):\n",
408 | " color = ['blue', 'orange', 'green', 'red', 'purple']\n",
409 | " for x, label in enumerate(labels):\n",
410 | " fpr, tpr, _ = roc_curve(y_test[:, x], y_pred[:, x])\n",
411 | " if model == 'pysr':\n",
412 | " plt.plot(\n",
413 | " tpr,\n",
414 | " fpr,\n",
415 | " label='{0}, PySR, AUC = {1:.1f}'.format(label, auc(fpr, tpr) * 100.0),\n",
416 | " linestyle='solid',\n",
417 | " color=color[x],\n",
418 | " lw=1.5,\n",
419 | " )\n",
420 | " if model == 'hls':\n",
421 | " plt.plot(\n",
422 | " tpr,\n",
423 | " fpr,\n",
424 | " label='{0}, HLS, AUC = {1:.1f}'.format(label, auc(fpr, tpr) * 100.0),\n",
425 | " linestyle='dotted',\n",
426 | " color=color[x],\n",
427 | " lw=1.5,\n",
428 | " )\n",
429 | " if model == 'hls_lut':\n",
430 | " plt.plot(\n",
431 | " tpr,\n",
432 | " fpr,\n",
433 | " label='{0}, HLS LUT, AUC = {1:.1f}'.format(label, auc(fpr, tpr) * 100.0),\n",
434 | " linestyle='None',\n",
435 | " color=color[x],\n",
436 | " lw=1,\n",
437 | " marker='o',\n",
438 | " ms=1,\n",
439 | " )\n",
440 | " plt.semilogy()\n",
441 | " plt.xlabel('True positive rate', size=15, loc='right')\n",
442 | " plt.ylabel('False positive rate', size=15, loc='top')\n",
443 | " plt.tick_params(axis='both', which='major', direction='in', length=6, width=1.2, labelsize=12, right=True, top=True)\n",
444 | " plt.tick_params(axis='both', which='minor', direction='in', length=2, width=1, labelsize=12, right=True, top=True)\n",
445 | " plt.xlim(0, 1)\n",
446 | " plt.ylim(0.001, 1)\n",
447 | " plt.grid(True)\n",
448 | " plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12)\n",
449 | "\n",
450 | "\n",
451 | "plt.figure(figsize=(15, 15))\n",
452 | "axes = plt.subplot(2, 2, 1)\n",
453 | "# plot_roc(Y_test, Y_pysr, LE.classes_, 'pysr')\n",
454 | "plot_roc(Y_test, Y_hls, LE.classes_, 'hls')\n",
455 | "plot_roc(Y_test, Y_hls_lut, LE.classes_, 'hls_lut')"
456 | ]
457 | },
458 | {
459 | "cell_type": "markdown",
460 | "id": "7beb92ea",
461 | "metadata": {},
462 | "source": [
463 | "## Run synthesis from command line"
464 | ]
465 | },
466 | {
467 | "cell_type": "code",
468 | "execution_count": null,
469 | "id": "e4047f52",
470 | "metadata": {},
471 | "outputs": [],
472 | "source": [
473 | "!source ${XILINX_VITIS}/settings64.sh\n",
474 | "!vitis_hls -f build_prj.tcl \"reset=1 synth=1 csim=0 cosim=0 validation=0 export=0 vsynth=0\"\n",
475 | "!cat my-hls-test/myproject_prj/solution1/syn/report/myproject_csynth.rpt"
476 | ]
477 | }
478 | ],
479 | "metadata": {
480 | "kernelspec": {
481 | "display_name": "Python 3 (ipykernel)",
482 | "language": "python",
483 | "name": "python3"
484 | },
485 | "language_info": {
486 | "codemirror_mode": {
487 | "name": "ipython",
488 | "version": 3
489 | },
490 | "file_extension": ".py",
491 | "mimetype": "text/x-python",
492 | "name": "python",
493 | "nbconvert_exporter": "python",
494 | "pygments_lexer": "ipython3",
495 | "version": "3.10.16"
496 | }
497 | },
498 | "nbformat": 4,
499 | "nbformat_minor": 5
500 | }
501 |
--------------------------------------------------------------------------------
/plotting.py:
--------------------------------------------------------------------------------
1 | import itertools
2 |
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 | import pandas as pd
6 | from sklearn.metrics import auc, roc_curve
7 |
8 |
9 | # confusion matrix code from Maurizio
10 | # /eos/user/m/mpierini/DeepLearning/ML4FPGA/jupyter/HbbTagger_Conv1D.ipynb
11 | def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
12 | """
13 | This function prints and plots the confusion matrix.
14 | Normalization can be applied by setting `normalize=True`.
15 | """
16 | if normalize:
17 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
18 |
19 | plt.imshow(cm, interpolation='nearest', cmap=cmap)
20 | # plt.title(title)
21 | cbar = plt.colorbar()
22 | plt.clim(0, 1)
23 | cbar.set_label(title)
24 | tick_marks = np.arange(len(classes))
25 | plt.xticks(tick_marks, classes, rotation=45)
26 | plt.yticks(tick_marks, classes)
27 |
28 | fmt = '.2f' if normalize else 'd'
29 | thresh = cm.max() / 2.0
30 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
31 | plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")
32 |
33 | # plt.tight_layout()
34 | plt.ylabel('True label')
35 | plt.xlabel('Predicted label')
36 |
37 |
38 | def plotRoc(fpr, tpr, auc, labels, linestyle, legend=True):
39 | for _i, label in enumerate(labels):
40 | plt.plot(
41 | tpr[label],
42 | fpr[label],
43 | label='{} tagger, AUC = {:.1f}%'.format(label.replace('j_', ''), auc[label] * 100.0),
44 | linestyle=linestyle,
45 | )
46 | plt.semilogy()
47 | plt.xlabel("Signal Efficiency")
48 | plt.ylabel("Background Efficiency")
49 | plt.ylim(0.001, 1)
50 | plt.grid(True)
51 | if legend:
52 | plt.legend(loc='upper left')
53 | plt.figtext(0.25, 0.90, 'hls4ml', fontweight='bold', wrap=True, horizontalalignment='right', fontsize=14)
54 |
55 |
56 | def rocData(y, predict_test, labels):
57 | df = pd.DataFrame()
58 |
59 | fpr = {}
60 | tpr = {}
61 | auc1 = {}
62 |
63 | for i, label in enumerate(labels):
64 | df[label] = y[:, i]
65 | df[label + '_pred'] = predict_test[:, i]
66 |
67 | fpr[label], tpr[label], threshold = roc_curve(df[label], df[label + '_pred'])
68 |
69 | auc1[label] = auc(fpr[label], tpr[label])
70 | return fpr, tpr, auc1
71 |
72 |
73 | def makeRoc(y, predict_test, labels, linestyle='-', legend=True):
74 | if 'j_index' in labels:
75 | labels.remove('j_index')
76 |
77 | fpr, tpr, auc1 = rocData(y, predict_test, labels)
78 | plotRoc(fpr, tpr, auc1, labels, linestyle, legend=legend)
79 | return predict_test
80 |
81 |
82 | def print_dict(d, indent=0):
83 | for key, value in d.items():
84 | print(' ' * indent + str(key), end='')
85 | if isinstance(value, dict):
86 | print()
87 | print_dict(value, indent + 1)
88 | else:
89 | print(':' + ' ' * (20 - len(key) - 2 * indent) + str(value))
90 |
--------------------------------------------------------------------------------
/pruned_cnn/vivado_synth.rpt:
--------------------------------------------------------------------------------
1 | Copyright 1986-2020 Xilinx, Inc. All Rights Reserved.
2 | --------------------------------------------------------------------------------------
3 | | Tool Version : Vivado v.2020.1 (lin64) Build 2902540 Wed May 27 19:54:35 MDT 2020
4 | | Date : Mon Jun 28 13:59:34 2021
5 | | Host : geonosis.cern.ch running 64-bit CentOS Linux release 7.9.2009 (Core)
6 | | Command : report_utilization -file vivado_synth.rpt
7 | | Design : myproject
8 | | Device : xcu250figd2104-2L
9 | | Design State : Synthesized
10 | --------------------------------------------------------------------------------------
11 |
12 | Utilization Design Information
13 |
14 | Table of Contents
15 | -----------------
16 | 1. CLB Logic
17 | 1.1 Summary of Registers by Type
18 | 2. BLOCKRAM
19 | 3. ARITHMETIC
20 | 4. I/O
21 | 5. CLOCK
22 | 6. ADVANCED
23 | 7. CONFIGURATION
24 | 8. Primitives
25 | 9. Black Boxes
26 | 10. Instantiated Netlists
27 | 11. SLR Connectivity
28 | 12. SLR Connectivity Matrix
29 | 13. SLR CLB Logic and Dedicated Block Utilization
30 | 14. SLR IO Utilization
31 |
32 | 1. CLB Logic
33 | ------------
34 |
35 | +----------------------------+--------+-------+-----------+-------+
36 | | Site Type | Used | Fixed | Available | Util% |
37 | +----------------------------+--------+-------+-----------+-------+
38 | | CLB LUTs* | 123948 | 0 | 1728000 | 7.17 |
39 | | LUT as Logic | 120268 | 0 | 1728000 | 6.96 |
40 | | LUT as Memory | 3680 | 0 | 791040 | 0.47 |
41 | | LUT as Distributed RAM | 0 | 0 | | |
42 | | LUT as Shift Register | 3680 | 0 | | |
43 | | CLB Registers | 43435 | 0 | 3456000 | 1.26 |
44 | | Register as Flip Flop | 43435 | 0 | 3456000 | 1.26 |
45 | | Register as Latch | 0 | 0 | 3456000 | 0.00 |
46 | | CARRY8 | 13270 | 0 | 216000 | 6.14 |
47 | | F7 Muxes | 256 | 0 | 864000 | 0.03 |
48 | | F8 Muxes | 0 | 0 | 432000 | 0.00 |
49 | | F9 Muxes | 0 | 0 | 216000 | 0.00 |
50 | +----------------------------+--------+-------+-----------+-------+
51 | * Warning! The Final LUT count, after physical optimizations and full implementation, is typically lower. Run opt_design after synthesis, if not already completed, for a more realistic count.
52 |
53 |
54 | 1.1 Summary of Registers by Type
55 | --------------------------------
56 |
57 | +-------+--------------+-------------+--------------+
58 | | Total | Clock Enable | Synchronous | Asynchronous |
59 | +-------+--------------+-------------+--------------+
60 | | 0 | _ | - | - |
61 | | 0 | _ | - | Set |
62 | | 0 | _ | - | Reset |
63 | | 0 | _ | Set | - |
64 | | 0 | _ | Reset | - |
65 | | 0 | Yes | - | - |
66 | | 0 | Yes | - | Set |
67 | | 0 | Yes | - | Reset |
68 | | 1069 | Yes | Set | - |
69 | | 42366 | Yes | Reset | - |
70 | +-------+--------------+-------------+--------------+
71 |
72 |
73 | 2. BLOCKRAM
74 | -----------
75 |
76 | +-------------------+------+-------+-----------+-------+
77 | | Site Type | Used | Fixed | Available | Util% |
78 | +-------------------+------+-------+-----------+-------+
79 | | Block RAM Tile | 42 | 0 | 2688 | 1.56 |
80 | | RAMB36/FIFO* | 0 | 0 | 2688 | 0.00 |
81 | | RAMB18 | 84 | 0 | 5376 | 1.56 |
82 | | RAMB18E2 only | 84 | | | |
83 | | URAM | 0 | 0 | 1280 | 0.00 |
84 | +-------------------+------+-------+-----------+-------+
85 | * Note: Each Block RAM Tile only has one FIFO logic available and therefore can accommodate only one FIFO36E2 or one FIFO18E2. However, if a FIFO18E2 occupies a Block RAM Tile, that tile can still accommodate a RAMB18E2
86 |
87 |
88 | 3. ARITHMETIC
89 | -------------
90 |
91 | +----------------+------+-------+-----------+-------+
92 | | Site Type | Used | Fixed | Available | Util% |
93 | +----------------+------+-------+-----------+-------+
94 | | DSPs | 5386 | 0 | 12288 | 43.83 |
95 | | DSP48E2 only | 5386 | | | |
96 | +----------------+------+-------+-----------+-------+
97 |
98 |
99 | 4. I/O
100 | ------
101 |
102 | +------------+------+-------+-----------+-------+
103 | | Site Type | Used | Fixed | Available | Util% |
104 | +------------+------+-------+-----------+-------+
105 | | Bonded IOB | 274 | 0 | 676 | 40.53 |
106 | +------------+------+-------+-----------+-------+
107 |
108 |
109 | 5. CLOCK
110 | --------
111 |
112 | +----------------------+------+-------+-----------+-------+
113 | | Site Type | Used | Fixed | Available | Util% |
114 | +----------------------+------+-------+-----------+-------+
115 | | GLOBAL CLOCK BUFFERs | 1 | 0 | 1344 | 0.07 |
116 | | BUFGCE | 1 | 0 | 384 | 0.26 |
117 | | BUFGCE_DIV | 0 | 0 | 64 | 0.00 |
118 | | BUFG_GT | 0 | 0 | 768 | 0.00 |
119 | | BUFGCTRL* | 0 | 0 | 128 | 0.00 |
120 | | PLL | 0 | 0 | 32 | 0.00 |
121 | | MMCM | 0 | 0 | 16 | 0.00 |
122 | +----------------------+------+-------+-----------+-------+
123 | * Note: Each used BUFGCTRL counts as two GLOBAL CLOCK BUFFERs. This table does not include global clocking resources, only buffer cell usage. See the Clock Utilization Report (report_clock_utilization) for detailed accounting of global clocking resource availability.
124 |
125 |
126 | 6. ADVANCED
127 | -----------
128 |
129 | +-----------------+------+-------+-----------+-------+
130 | | Site Type | Used | Fixed | Available | Util% |
131 | +-----------------+------+-------+-----------+-------+
132 | | CMACE4 | 0 | 0 | 12 | 0.00 |
133 | | GTYE4_CHANNEL | 0 | 0 | 24 | 0.00 |
134 | | GTYE4_COMMON | 0 | 0 | 6 | 0.00 |
135 | | ILKNE4 | 0 | 0 | 8 | 0.00 |
136 | | OBUFDS_GTE4 | 0 | 0 | 12 | 0.00 |
137 | | OBUFDS_GTE4_ADV | 0 | 0 | 12 | 0.00 |
138 | | PCIE40E4 | 0 | 0 | 4 | 0.00 |
139 | | SYSMONE4 | 0 | 0 | 4 | 0.00 |
140 | +-----------------+------+-------+-----------+-------+
141 |
142 |
143 | 7. CONFIGURATION
144 | ----------------
145 |
146 | +-------------+------+-------+-----------+-------+
147 | | Site Type | Used | Fixed | Available | Util% |
148 | +-------------+------+-------+-----------+-------+
149 | | BSCANE2 | 0 | 0 | 16 | 0.00 |
150 | | DNA_PORTE2 | 0 | 0 | 4 | 0.00 |
151 | | EFUSE_USR | 0 | 0 | 4 | 0.00 |
152 | | FRAME_ECCE4 | 0 | 0 | 4 | 0.00 |
153 | | ICAPE3 | 0 | 0 | 8 | 0.00 |
154 | | MASTER_JTAG | 0 | 0 | 4 | 0.00 |
155 | | STARTUPE3 | 0 | 0 | 4 | 0.00 |
156 | +-------------+------+-------+-----------+-------+
157 |
158 |
159 | 8. Primitives
160 | -------------
161 |
162 | +----------+-------+---------------------+
163 | | Ref Name | Used | Functional Category |
164 | +----------+-------+---------------------+
165 | | LUT2 | 52029 | CLB |
166 | | FDRE | 42366 | Register |
167 | | LUT3 | 41635 | CLB |
168 | | LUT4 | 40010 | CLB |
169 | | CARRY8 | 13270 | CLB |
170 | | LUT6 | 12631 | CLB |
171 | | LUT5 | 10697 | CLB |
172 | | DSP48E2 | 5386 | Arithmetic |
173 | | LUT1 | 4899 | CLB |
174 | | SRL16E | 2816 | CLB |
175 | | FDSE | 1069 | Register |
176 | | SRLC32E | 864 | CLB |
177 | | MUXF7 | 256 | CLB |
178 | | OBUF | 210 | I/O |
179 | | RAMB18E2 | 84 | Block Ram |
180 | | INBUF | 64 | I/O |
181 | | IBUFCTRL | 64 | Others |
182 | | BUFGCE | 1 | Clock |
183 | +----------+-------+---------------------+
184 |
185 |
186 | 9. Black Boxes
187 | --------------
188 |
189 | +----------+------+
190 | | Ref Name | Used |
191 | +----------+------+
192 |
193 |
194 | 10. Instantiated Netlists
195 | -------------------------
196 |
197 | +----------+------+
198 | | Ref Name | Used |
199 | +----------+------+
200 |
201 |
202 | 11. SLR Connectivity
203 | --------------------
204 |
205 | +----------------------------------+------+-------+-----------+-------+
206 | | | Used | Fixed | Available | Util% |
207 | +----------------------------------+------+-------+-----------+-------+
208 | | SLR3 <-> SLR2 | 0 | | 23040 | 0.00 |
209 | | SLR2 -> SLR3 | 0 | | | 0.00 |
210 | | Using TX_REG only | 0 | 0 | | |
211 | | Using RX_REG only | 0 | 0 | | |
212 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
213 | | SLR3 -> SLR2 | 0 | | | 0.00 |
214 | | Using TX_REG only | 0 | 0 | | |
215 | | Using RX_REG only | 0 | 0 | | |
216 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
217 | | SLR2 <-> SLR1 | 0 | | 23040 | 0.00 |
218 | | SLR1 -> SLR2 | 0 | | | 0.00 |
219 | | Using TX_REG only | 0 | 0 | | |
220 | | Using RX_REG only | 0 | 0 | | |
221 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
222 | | SLR2 -> SLR1 | 0 | | | 0.00 |
223 | | Using TX_REG only | 0 | 0 | | |
224 | | Using RX_REG only | 0 | 0 | | |
225 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
226 | | SLR1 <-> SLR0 | 0 | | 23040 | 0.00 |
227 | | SLR0 -> SLR1 | 0 | | | 0.00 |
228 | | Using TX_REG only | 0 | 0 | | |
229 | | Using RX_REG only | 0 | 0 | | |
230 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
231 | | SLR1 -> SLR0 | 0 | | | 0.00 |
232 | | Using TX_REG only | 0 | 0 | | |
233 | | Using RX_REG only | 0 | 0 | | |
234 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
235 | +----------------------------------+------+-------+-----------+-------+
236 | | Total SLLs Used | 0 | | | |
237 | +----------------------------------+------+-------+-----------+-------+
238 |
239 |
240 | 12. SLR Connectivity Matrix
241 | ---------------------------
242 |
243 | +-----------+------+------+------+------+
244 | | FROM \ TO | SLR3 | SLR2 | SLR1 | SLR0 |
245 | +-----------+------+------+------+------+
246 | | SLR3 | 0 | 0 | 0 | 0 |
247 | | SLR2 | 0 | 0 | 0 | 0 |
248 | | SLR1 | 0 | 0 | 0 | 0 |
249 | | SLR0 | 0 | 0 | 0 | 0 |
250 | +-----------+------+------+------+------+
251 |
252 |
253 | 13. SLR CLB Logic and Dedicated Block Utilization
254 | -------------------------------------------------
255 |
256 | +----------------------------+------+------+------+------+--------+--------+--------+--------+
257 | | Site Type | SLR0 | SLR1 | SLR2 | SLR3 | SLR0 % | SLR1 % | SLR2 % | SLR3 % |
258 | +----------------------------+------+------+------+------+--------+--------+--------+--------+
259 | | CLB | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
260 | | CLBL | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
261 | | CLBM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
262 | | CLB LUTs | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
263 | | LUT as Logic | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
264 | | LUT as Memory | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
265 | | LUT as Distributed RAM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
266 | | LUT as Shift Register | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
267 | | CLB Registers | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
268 | | CARRY8 | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
269 | | F7 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
270 | | F8 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
271 | | F9 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
272 | | Block RAM Tile | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
273 | | RAMB36/FIFO | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
274 | | RAMB18 | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
275 | | URAM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
276 | | DSPs | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
277 | | PLL | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
278 | | MMCM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
279 | | Unique Control Sets | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
280 | +----------------------------+------+------+------+------+--------+--------+--------+--------+
281 | * Note: Available Control Sets based on CLB Registers / 8
282 |
283 |
284 | 14. SLR IO Utilization
285 | ----------------------
286 |
287 | +-----------+-----------+---------+------------+----------+------------+----------+-----+
288 | | SLR Index | Used IOBs | (%)IOBs | Used IPADs | (%)IPADs | Used OPADs | (%)OPADs | GTs |
289 | +-----------+-----------+---------+------------+----------+------------+----------+-----+
290 | | SLR3 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 |
291 | | SLR2 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 |
292 | | SLR1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 |
293 | | SLR0 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 |
294 | +-----------+-----------+---------+------------+----------+------------+----------+-----+
295 | | Total | 0 | | 0 | | 0 | | 0 |
296 | +-----------+-----------+---------+------------+----------+------------+----------+-----+
297 |
298 |
299 |
--------------------------------------------------------------------------------
/quantized_pruned_cnn/vivado_synth.rpt:
--------------------------------------------------------------------------------
1 | Copyright 1986-2020 Xilinx, Inc. All Rights Reserved.
2 | --------------------------------------------------------------------------------------
3 | | Tool Version : Vivado v.2020.1 (lin64) Build 2902540 Wed May 27 19:54:35 MDT 2020
4 | | Date : Mon Jun 28 13:06:58 2021
5 | | Host : geonosis.cern.ch running 64-bit CentOS Linux release 7.9.2009 (Core)
6 | | Command : report_utilization -file vivado_synth.rpt
7 | | Design : myproject
8 | | Device : xcu250figd2104-2L
9 | | Design State : Synthesized
10 | --------------------------------------------------------------------------------------
11 |
12 | Utilization Design Information
13 |
14 | Table of Contents
15 | -----------------
16 | 1. CLB Logic
17 | 1.1 Summary of Registers by Type
18 | 2. BLOCKRAM
19 | 3. ARITHMETIC
20 | 4. I/O
21 | 5. CLOCK
22 | 6. ADVANCED
23 | 7. CONFIGURATION
24 | 8. Primitives
25 | 9. Black Boxes
26 | 10. Instantiated Netlists
27 | 11. SLR Connectivity
28 | 12. SLR Connectivity Matrix
29 | 13. SLR CLB Logic and Dedicated Block Utilization
30 | 14. SLR IO Utilization
31 |
32 | 1. CLB Logic
33 | ------------
34 |
35 | +----------------------------+--------+-------+-----------+-------+
36 | | Site Type | Used | Fixed | Available | Util% |
37 | +----------------------------+--------+-------+-----------+-------+
38 | | CLB LUTs* | 118931 | 0 | 1728000 | 6.88 |
39 | | LUT as Logic | 115875 | 0 | 1728000 | 6.71 |
40 | | LUT as Memory | 3056 | 0 | 791040 | 0.39 |
41 | | LUT as Distributed RAM | 0 | 0 | | |
42 | | LUT as Shift Register | 3056 | 0 | | |
43 | | CLB Registers | 30702 | 0 | 3456000 | 0.89 |
44 | | Register as Flip Flop | 30702 | 0 | 3456000 | 0.89 |
45 | | Register as Latch | 0 | 0 | 3456000 | 0.00 |
46 | | CARRY8 | 14273 | 0 | 216000 | 6.61 |
47 | | F7 Muxes | 578 | 0 | 864000 | 0.07 |
48 | | F8 Muxes | 80 | 0 | 432000 | 0.02 |
49 | | F9 Muxes | 0 | 0 | 216000 | 0.00 |
50 | +----------------------------+--------+-------+-----------+-------+
51 | * Warning! The Final LUT count, after physical optimizations and full implementation, is typically lower. Run opt_design after synthesis, if not already completed, for a more realistic count.
52 |
53 |
54 | 1.1 Summary of Registers by Type
55 | --------------------------------
56 |
57 | +-------+--------------+-------------+--------------+
58 | | Total | Clock Enable | Synchronous | Asynchronous |
59 | +-------+--------------+-------------+--------------+
60 | | 0 | _ | - | - |
61 | | 0 | _ | - | Set |
62 | | 0 | _ | - | Reset |
63 | | 0 | _ | Set | - |
64 | | 0 | _ | Reset | - |
65 | | 0 | Yes | - | - |
66 | | 0 | Yes | - | Set |
67 | | 0 | Yes | - | Reset |
68 | | 1413 | Yes | Set | - |
69 | | 29289 | Yes | Reset | - |
70 | +-------+--------------+-------------+--------------+
71 |
72 |
73 | 2. BLOCKRAM
74 | -----------
75 |
76 | +-------------------+------+-------+-----------+-------+
77 | | Site Type | Used | Fixed | Available | Util% |
78 | +-------------------+------+-------+-----------+-------+
79 | | Block RAM Tile | 34 | 0 | 2688 | 1.26 |
80 | | RAMB36/FIFO* | 0 | 0 | 2688 | 0.00 |
81 | | RAMB18 | 68 | 0 | 5376 | 1.26 |
82 | | RAMB18E2 only | 68 | | | |
83 | | URAM | 0 | 0 | 1280 | 0.00 |
84 | +-------------------+------+-------+-----------+-------+
85 | * Note: Each Block RAM Tile only has one FIFO logic available and therefore can accommodate only one FIFO36E2 or one FIFO18E2. However, if a FIFO18E2 occupies a Block RAM Tile, that tile can still accommodate a RAMB18E2
86 |
87 |
88 | 3. ARITHMETIC
89 | -------------
90 |
91 | +----------------+------+-------+-----------+-------+
92 | | Site Type | Used | Fixed | Available | Util% |
93 | +----------------+------+-------+-----------+-------+
94 | | DSPs | 353 | 0 | 12288 | 2.87 |
95 | | DSP48E2 only | 353 | | | |
96 | +----------------+------+-------+-----------+-------+
97 |
98 |
99 | 4. I/O
100 | ------
101 |
102 | +------------+------+-------+-----------+-------+
103 | | Site Type | Used | Fixed | Available | Util% |
104 | +------------+------+-------+-----------+-------+
105 | | Bonded IOB | 274 | 0 | 676 | 40.53 |
106 | +------------+------+-------+-----------+-------+
107 |
108 |
109 | 5. CLOCK
110 | --------
111 |
112 | +----------------------+------+-------+-----------+-------+
113 | | Site Type | Used | Fixed | Available | Util% |
114 | +----------------------+------+-------+-----------+-------+
115 | | GLOBAL CLOCK BUFFERs | 1 | 0 | 1344 | 0.07 |
116 | | BUFGCE | 1 | 0 | 384 | 0.26 |
117 | | BUFGCE_DIV | 0 | 0 | 64 | 0.00 |
118 | | BUFG_GT | 0 | 0 | 768 | 0.00 |
119 | | BUFGCTRL* | 0 | 0 | 128 | 0.00 |
120 | | PLL | 0 | 0 | 32 | 0.00 |
121 | | MMCM | 0 | 0 | 16 | 0.00 |
122 | +----------------------+------+-------+-----------+-------+
123 | * Note: Each used BUFGCTRL counts as two GLOBAL CLOCK BUFFERs. This table does not include global clocking resources, only buffer cell usage. See the Clock Utilization Report (report_clock_utilization) for detailed accounting of global clocking resource availability.
124 |
125 |
126 | 6. ADVANCED
127 | -----------
128 |
129 | +-----------------+------+-------+-----------+-------+
130 | | Site Type | Used | Fixed | Available | Util% |
131 | +-----------------+------+-------+-----------+-------+
132 | | CMACE4 | 0 | 0 | 12 | 0.00 |
133 | | GTYE4_CHANNEL | 0 | 0 | 24 | 0.00 |
134 | | GTYE4_COMMON | 0 | 0 | 6 | 0.00 |
135 | | ILKNE4 | 0 | 0 | 8 | 0.00 |
136 | | OBUFDS_GTE4 | 0 | 0 | 12 | 0.00 |
137 | | OBUFDS_GTE4_ADV | 0 | 0 | 12 | 0.00 |
138 | | PCIE40E4 | 0 | 0 | 4 | 0.00 |
139 | | SYSMONE4 | 0 | 0 | 4 | 0.00 |
140 | +-----------------+------+-------+-----------+-------+
141 |
142 |
143 | 7. CONFIGURATION
144 | ----------------
145 |
146 | +-------------+------+-------+-----------+-------+
147 | | Site Type | Used | Fixed | Available | Util% |
148 | +-------------+------+-------+-----------+-------+
149 | | BSCANE2 | 0 | 0 | 16 | 0.00 |
150 | | DNA_PORTE2 | 0 | 0 | 4 | 0.00 |
151 | | EFUSE_USR | 0 | 0 | 4 | 0.00 |
152 | | FRAME_ECCE4 | 0 | 0 | 4 | 0.00 |
153 | | ICAPE3 | 0 | 0 | 8 | 0.00 |
154 | | MASTER_JTAG | 0 | 0 | 4 | 0.00 |
155 | | STARTUPE3 | 0 | 0 | 4 | 0.00 |
156 | +-------------+------+-------+-----------+-------+
157 |
158 |
159 | 8. Primitives
160 | -------------
161 |
162 | +----------+-------+---------------------+
163 | | Ref Name | Used | Functional Category |
164 | +----------+-------+---------------------+
165 | | LUT2 | 53834 | CLB |
166 | | LUT3 | 29466 | CLB |
167 | | FDRE | 29289 | Register |
168 | | LUT4 | 28455 | CLB |
169 | | LUT6 | 17197 | CLB |
170 | | LUT5 | 16487 | CLB |
171 | | CARRY8 | 14273 | CLB |
172 | | LUT1 | 5418 | CLB |
173 | | SRL16E | 2032 | CLB |
174 | | FDSE | 1413 | Register |
175 | | SRLC32E | 1024 | CLB |
176 | | MUXF7 | 578 | CLB |
177 | | DSP48E2 | 353 | Arithmetic |
178 | | OBUF | 210 | I/O |
179 | | MUXF8 | 80 | CLB |
180 | | RAMB18E2 | 68 | Block Ram |
181 | | INBUF | 64 | I/O |
182 | | IBUFCTRL | 64 | Others |
183 | | BUFGCE | 1 | Clock |
184 | +----------+-------+---------------------+
185 |
186 |
187 | 9. Black Boxes
188 | --------------
189 |
190 | +----------+------+
191 | | Ref Name | Used |
192 | +----------+------+
193 |
194 |
195 | 10. Instantiated Netlists
196 | -------------------------
197 |
198 | +----------+------+
199 | | Ref Name | Used |
200 | +----------+------+
201 |
202 |
203 | 11. SLR Connectivity
204 | --------------------
205 |
206 | +----------------------------------+------+-------+-----------+-------+
207 | | | Used | Fixed | Available | Util% |
208 | +----------------------------------+------+-------+-----------+-------+
209 | | SLR3 <-> SLR2 | 0 | | 23040 | 0.00 |
210 | | SLR2 -> SLR3 | 0 | | | 0.00 |
211 | | Using TX_REG only | 0 | 0 | | |
212 | | Using RX_REG only | 0 | 0 | | |
213 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
214 | | SLR3 -> SLR2 | 0 | | | 0.00 |
215 | | Using TX_REG only | 0 | 0 | | |
216 | | Using RX_REG only | 0 | 0 | | |
217 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
218 | | SLR2 <-> SLR1 | 0 | | 23040 | 0.00 |
219 | | SLR1 -> SLR2 | 0 | | | 0.00 |
220 | | Using TX_REG only | 0 | 0 | | |
221 | | Using RX_REG only | 0 | 0 | | |
222 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
223 | | SLR2 -> SLR1 | 0 | | | 0.00 |
224 | | Using TX_REG only | 0 | 0 | | |
225 | | Using RX_REG only | 0 | 0 | | |
226 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
227 | | SLR1 <-> SLR0 | 0 | | 23040 | 0.00 |
228 | | SLR0 -> SLR1 | 0 | | | 0.00 |
229 | | Using TX_REG only | 0 | 0 | | |
230 | | Using RX_REG only | 0 | 0 | | |
231 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
232 | | SLR1 -> SLR0 | 0 | | | 0.00 |
233 | | Using TX_REG only | 0 | 0 | | |
234 | | Using RX_REG only | 0 | 0 | | |
235 | | Using Both TX_REG and RX_REG | 0 | 0 | | |
236 | +----------------------------------+------+-------+-----------+-------+
237 | | Total SLLs Used | 0 | | | |
238 | +----------------------------------+------+-------+-----------+-------+
239 |
240 |
241 | 12. SLR Connectivity Matrix
242 | ---------------------------
243 |
244 | +-----------+------+------+------+------+
245 | | FROM \ TO | SLR3 | SLR2 | SLR1 | SLR0 |
246 | +-----------+------+------+------+------+
247 | | SLR3 | 0 | 0 | 0 | 0 |
248 | | SLR2 | 0 | 0 | 0 | 0 |
249 | | SLR1 | 0 | 0 | 0 | 0 |
250 | | SLR0 | 0 | 0 | 0 | 0 |
251 | +-----------+------+------+------+------+
252 |
253 |
254 | 13. SLR CLB Logic and Dedicated Block Utilization
255 | -------------------------------------------------
256 |
257 | +----------------------------+------+------+------+------+--------+--------+--------+--------+
258 | | Site Type | SLR0 | SLR1 | SLR2 | SLR3 | SLR0 % | SLR1 % | SLR2 % | SLR3 % |
259 | +----------------------------+------+------+------+------+--------+--------+--------+--------+
260 | | CLB | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
261 | | CLBL | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
262 | | CLBM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
263 | | CLB LUTs | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
264 | | LUT as Logic | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
265 | | LUT as Memory | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
266 | | LUT as Distributed RAM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
267 | | LUT as Shift Register | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
268 | | CLB Registers | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
269 | | CARRY8 | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
270 | | F7 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
271 | | F8 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
272 | | F9 Muxes | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
273 | | Block RAM Tile | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
274 | | RAMB36/FIFO | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
275 | | RAMB18 | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
276 | | URAM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
277 | | DSPs | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
278 | | PLL | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
279 | | MMCM | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
280 | | Unique Control Sets | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
281 | +----------------------------+------+------+------+------+--------+--------+--------+--------+
282 | * Note: Available Control Sets based on CLB Registers / 8
283 |
284 |
285 | 14. SLR IO Utilization
286 | ----------------------
287 |
288 | +-----------+-----------+---------+------------+----------+------------+----------+-----+
289 | | SLR Index | Used IOBs | (%)IOBs | Used IPADs | (%)IPADs | Used OPADs | (%)OPADs | GTs |
290 | +-----------+-----------+---------+------------+----------+------------+----------+-----+
291 | | SLR3 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 |
292 | | SLR2 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 |
293 | | SLR1 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 |
294 | | SLR0 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0 |
295 | +-----------+-----------+---------+------------+----------+------------+----------+-----+
296 | | Total | 0 | | 0 | | 0 | | 0 |
297 | +-----------+-----------+---------+------------+----------+------------+----------+-----+
298 |
299 |
300 |
--------------------------------------------------------------------------------
/sr/example.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml-tutorial/93d88af30bc86075ae158f256558ce43b455cba2/sr/example.pkl
--------------------------------------------------------------------------------