├── __init__.py
├── benchmark
    ├── __init__.py
    ├── bench_utils.py
    ├── ADBConnect.py
    ├── run_on_device.py
    └── tensorrt
    │   ├── calibrator.py
    │   └── onnx_trt_test.py
├── are_16_heads
    ├── __init__.py
    ├── itp
    │   ├── setup.sh
    │   ├── run_itp.sh
    │   └── submit.py
    ├── .amltignore
    ├── logger.py
    ├── deit_tiny_head_importance.txt
    ├── .amltconfig
    ├── heads_pruning.sh
    ├── heads_ablation.sh
    ├── requirements.txt
    ├── deit_small_head_importance.txt
    ├── deit_base_head_importance.txt
    ├── classifier_scoring.py
    ├── prepare_task.sh
    ├── fetch_results.py
    ├── evaluate_iterative_pruned_deit.py
    ├── finetune.py
    ├── util.py
    └── pruning.py
├── deit_pruning
    ├── src
    │   ├── inspector
    │   │   ├── __init__.py
    │   │   └── get_sparsity.py
    │   ├── preprocessing
    │   │   ├── __init__.py
    │   │   └── random_select.py
    │   ├── pytorch_prune
    │   │   ├── __init__.py
    │   │   ├── ln_smart.py
    │   │   ├── block.py
    │   │   └── pruner.py
    │   ├── deepspeed_config
    │   │   ├── deepspeed.json
    │   │   ├── deepspeed_deit_base.json
    │   │   ├── deepspeed_deit_small.json
    │   │   ├── deepspeed_deit_tiny.json
    │   │   ├── deepspeed_finetune_deit_tiny.json
    │   │   ├── deepspeed_finetune_deit_base.json
    │   │   └── deepspeed_finetune_deit_small.json
    │   ├── validate.py
    │   ├── analyse.py
    │   ├── onnx_inference.py
    │   ├── latency_model.py
    │   ├── onnx_export.py
    │   ├── layers
    │   │   └── super_bertlayers.py
    │   ├── model.py
    │   ├── supernet.py
    │   ├── get_latency.py
    │   ├── data.py
    │   └── trainer.py
    ├── vendor
    │   └── nn_pruning_v1
    │   │   ├── MANIFEST.in
    │   │   ├── nn_pruning
    │   │       ├── tests
    │   │       │   ├── __init__.py
    │   │       │   ├── test_quantization.py
    │   │       │   ├── test_patch2.py
    │   │       │   └── test_patch.py
    │   │       ├── __init__.py
    │   │       ├── modules
    │   │       │   ├── gelu2relu.py
    │   │       │   ├── AmpereRework.ipynb
    │   │       │   ├── quantization_config.py
    │   │       │   └── nonorm.py
    │   │       ├── model_patcher.py
    │   │       ├── training_patcher.py
    │   │       └── hp_naming.py
    │   │   ├── pyproject.toml
    │   │   ├── .isort.cfg
    │   │   ├── Makefile
    │   │   ├── .gitignore
    │   │   └── setup.py
    ├── config
    │   ├── topk-hybrid-struct-layerwise.json
    │   ├── topk-hybrid-struct-layerwise-base.json
    │   ├── topk-hybrid-struct-layerwise-small.json
    │   ├── topk-hybrid-struct-layerwise-tiny.json
    │   ├── topk-hybrid.json
    │   ├── topk-hybrid-block4x4.json
    │   ├── topk-hybrid-struct.json
    │   ├── magnitude-hybrid.json
    │   ├── topk-hybrid-block16x16.json
    │   ├── sigmoied_threshold-hybrid.json
    │   ├── topk-unstructured.json
    │   └── sigmoied_threshold-unstructured.json
    └── requirements.txt
├── requirements.txt
├── modeling
    ├── torch_layers
    │   ├── activation.py
    │   ├── residual.py
    │   ├── norm.py
    │   ├── ffn.py
    │   └── attention.py
    ├── layers
    │   ├── residual.py
    │   ├── ffn.py
    │   ├── norm.py
    │   ├── activation.py
    │   ├── embedding.py
    │   ├── attention.py
    │   ├── tf1_layers.py
    │   └── transformer_encoder.py
    ├── save_model.py
    └── models
    │   ├── squeezenet.py
    │   ├── cnn_zoo.py
    │   └── vit.py
├── .gitignore
├── experiments
    ├── D1130_vino_quant_cnn_test.py
    ├── D1130_tflite_gpu_r21_benchmark.py
    ├── D0104_tvm_fusion_test.py
    ├── D1207_tflite_quant_cnn_test.py
    └── D1207_vino_quant_cnn_test.py
├── run.sh
└── draw.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/are_16_heads/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deit_pruning/src/inspector/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deit_pruning/src/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deit_pruning/src/pytorch_prune/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/MANIFEST.in:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 119
3 | target-version = ['py35']
4 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | multi_line_output=3
3 | include_trailing_comma=True
4 | 
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow>=2.5.2
2 | onnx-tf==1.8.0
3 | timm==0.4.12
4 | torch==1.9.1
5 | torchvision== 0.10.1
6 | onnx==1.10.1
7 | onnxruntime==1.9.0


--------------------------------------------------------------------------------
/are_16_heads/itp/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pip install -r requirements.txt
4 | cd vendor/huggingface_transformers
5 | python setup.py install --user
6 | cd ../..


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/__init__.py:
--------------------------------------------------------------------------------
1 | def run1(path, output):
2 |     return "run1_ok"
3 | 
4 | 
5 | def run2(path, output):
6 |     return "run2_ok"
7 | 


--------------------------------------------------------------------------------
/are_16_heads/.amltignore:
--------------------------------------------------------------------------------
1 | vendor/huggingface_transformers/docker
2 | vendor/huggingface_transformers/docs
3 | vendor/huggingface_transformers/examples
4 | vendor/huggingface_transformers/tests
5 | amlt/
6 | itp/.tmp
7 | 


--------------------------------------------------------------------------------
/modeling/torch_layers/activation.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | 
4 | def gelu(x):
5 |     cdf = 0.5 * (1.0 + torch.tanh(
6 |         (math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))))
7 |     return x * cdf


--------------------------------------------------------------------------------
/are_16_heads/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | 
3 | logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s',
4 |                     datefmt='%H:%M:%S',
5 |                     level=logging.INFO)
6 | logger = logging.getLogger(__name__)
7 | 


--------------------------------------------------------------------------------
/modeling/layers/residual.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | 
3 | class Residual(tf.keras.Model):
4 |     def __init__(self, fn):
5 |         super().__init__()
6 |         self.fn = fn
7 | 
8 |     def call(self, x):
9 |         return self.fn(x) + x


--------------------------------------------------------------------------------
/modeling/torch_layers/residual.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Residual(nn.Module):
 5 |     def __init__(self, sub_layer):
 6 |         super().__init__()
 7 |         self.sub_layer = sub_layer
 8 | 
 9 |     def forward(self, x):
10 |         return x + self.sub_layer(x)


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY:	style test
 2 | 
 3 | # Run code quality checks
 4 | style:
 5 | 	black .
 6 | 	isort .
 7 | 
 8 | # Run tests for the library
 9 | test:
10 | 	python -m pytest nn_pruning
11 | 
12 | build_dist:
13 | 	rm -fr build
14 | 	rm -fr dist
15 | 	python -m build
16 | 
17 | pypi_upload: build_dist
18 | 	python -m twine upload dist/*
19 | 


--------------------------------------------------------------------------------
/are_16_heads/deit_tiny_head_importance.txt:
--------------------------------------------------------------------------------
 1 | 0.88811	0.35664	0.28993
 2 | 0.34891	0.88253	0.31530
 3 | 0.46338	0.44602	0.76573
 4 | 0.64993	0.54293	0.53182
 5 | 0.56222	0.61737	0.55024
 6 | 0.50725	0.74968	0.42505
 7 | 0.65688	0.56474	0.49958
 8 | 0.56039	0.57933	0.59190
 9 | 0.56777	0.53060	0.62936
10 | 0.57603	0.50893	0.63967
11 | 0.55203	0.46831	0.68989
12 | 0.30792	0.62804	0.71467


--------------------------------------------------------------------------------
/modeling/layers/ffn.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .activation import gelu
 3 | 
 4 | 
 5 | class FeedForward(tf.keras.Model):
 6 |     def __init__(self, dim, hidden_dim):
 7 |         super().__init__()
 8 |         self.net = tf.keras.Sequential([tf.keras.layers.Dense(hidden_dim, activation=gelu),
 9 |                                         tf.keras.layers.Dense(dim)])
10 | 
11 |     def call(self, x):
12 |         return self.net(x)


--------------------------------------------------------------------------------
/are_16_heads/.amltconfig:
--------------------------------------------------------------------------------
1 | {"project_name": "are16heads_deit", "storage_account_name": "hexnas", "container_name": "amulet", "blob_storage_account_name": "hexnas", "registry_name": "projects", "targets": {}, "local_path": "/data/data1/v-xudongwang/benchmark_tools/are_16_heads", "default_output_dir": "/data/data1/v-xudongwang/benchmark_tools/are_16_heads/amlt", "project_uuid": "7366271800.53065-27c70f65-df0a-43de-b226-b1e82b3d54a1", "version": "8.1.3"}


--------------------------------------------------------------------------------
/are_16_heads/heads_pruning.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | TASK=$1
 4 | OPTIONS="${@:2}"
 5 | 
 6 | here="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 7 | source $here/prepare_task.sh $TASK
 8 | 
 9 | echo $base_acc
10 | prune_options="--do_prune --eval_pruned --prune_percent `seq 5 5 100` $OPTIONS"
11 | run_eval "$prune_options"
12 | 
13 | # prune cmd: bash experiments/BERT/heads_pruning.sh MNLI --normalize_pruning_by_layer


--------------------------------------------------------------------------------
/deit_pruning/config/topk-hybrid-struct-layerwise.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "topK:1d_alt", 
 3 |   "attention_pruning_method": "topK", 
 4 |   "initial_threshold": 1.0, 
 5 |   "initial_warmup": 1,
 6 |   "final_warmup": 3,
 7 |   "attention_block_rows": 64,
 8 |   "attention_block_cols": 256,
 9 |   "attention_output_with_dense": 0,
10 |   "regularization_final_lambda": 20,
11 |   "dense_lambda": 0.25,
12 |   "regularization": "l1"
13 | }


--------------------------------------------------------------------------------
/deit_pruning/config/topk-hybrid-struct-layerwise-base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "topK:1d_alt", 
 3 |   "attention_pruning_method": "topK", 
 4 |   "initial_threshold": 1.0, 
 5 |   "initial_warmup": 1,
 6 |   "final_warmup": 3,
 7 |   "attention_block_rows": 64,
 8 |   "attention_block_cols": 768,
 9 |   "attention_output_with_dense": 0,
10 |   "regularization_final_lambda": 20,
11 |   "dense_lambda": 0.25,
12 |   "regularization": "l1"
13 | }


--------------------------------------------------------------------------------
/deit_pruning/config/topk-hybrid-struct-layerwise-small.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "topK:1d_alt", 
 3 |   "attention_pruning_method": "topK", 
 4 |   "initial_threshold": 1.0, 
 5 |   "initial_warmup": 1,
 6 |   "final_warmup": 3,
 7 |   "attention_block_rows": 64,
 8 |   "attention_block_cols": 384,
 9 |   "attention_output_with_dense": 0,
10 |   "regularization_final_lambda": 20,
11 |   "dense_lambda": 0.25,
12 |   "regularization": "l1"
13 | }


--------------------------------------------------------------------------------
/deit_pruning/config/topk-hybrid-struct-layerwise-tiny.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "topK:1d_alt", 
 3 |   "attention_pruning_method": "topK", 
 4 |   "initial_threshold": 1.0, 
 5 |   "initial_warmup": 1,
 6 |   "final_warmup": 3,
 7 |   "attention_block_rows": 64,
 8 |   "attention_block_cols": 192,
 9 |   "attention_output_with_dense": 0,
10 |   "regularization_final_lambda": 20,
11 |   "dense_lambda": 0.25,
12 |   "regularization": "l1"
13 | }


--------------------------------------------------------------------------------
/deit_pruning/config/topk-hybrid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "topK:1d_alt", 
 3 |   "attention_pruning_method": "topK", 
 4 |   "initial_threshold": 1.0, 
 5 |   "final_threshold": 0.5, 
 6 |   "initial_warmup": 1,
 7 |   "final_warmup": 3,
 8 |   "attention_block_rows": 32,
 9 |   "attention_block_cols": 32,
10 |   "attention_output_with_dense": 0,
11 |   "regularization_final_lambda": 20,
12 |   "dense_lambda": 0.25,
13 |   "regularization": "l1"
14 | }


--------------------------------------------------------------------------------
/modeling/layers/norm.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | class LayerNorm(tf.keras.Model):
 4 |     def __init__(self, fn, pre=False):
 5 |         super().__init__()
 6 |         self.norm = tf.keras.layers.LayerNormalization(epsilon=1e-5)
 7 |         self.fn = fn
 8 |         self.pre = pre
 9 | 
10 |     def call(self, x):
11 |         if self.pre:
12 |             return self.fn(self.norm(x))
13 |         else:
14 |             return self.norm(self.fn(x))


--------------------------------------------------------------------------------
/deit_pruning/config/topk-hybrid-block4x4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "topK:1d_alt", 
 3 |   "attention_pruning_method": "topK", 
 4 |   "initial_threshold": 1.0, 
 5 |   "final_threshold": 0.5, 
 6 |   "initial_warmup": 1,
 7 |   "final_warmup": 3,
 8 |   "attention_block_rows": 4,
 9 |   "attention_block_cols": 4,
10 |   "attention_output_with_dense": 0,
11 |   "regularization_final_lambda": 20,
12 |   "dense_lambda": 0.25,
13 |   "regularization": "l1"
14 | }


--------------------------------------------------------------------------------
/deit_pruning/config/topk-hybrid-struct.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "topK:1d_alt", 
 3 |   "attention_pruning_method": "topK", 
 4 |   "initial_threshold": 1.0, 
 5 |   "final_threshold": 0.5, 
 6 |   "initial_warmup": 1,
 7 |   "final_warmup": 3,
 8 |   "attention_block_rows": 32,
 9 |   "attention_block_cols": 32,
10 |   "attention_output_with_dense": 0,
11 |   "regularization_final_lambda": 20,
12 |   "dense_lambda": 0.25,
13 |   "regularization": "l1"
14 | }


--------------------------------------------------------------------------------
/deit_pruning/config/magnitude-hybrid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "magnitude:1d_alt", 
 3 |   "attention_pruning_method": "magnitude", 
 4 |   "initial_threshold": 1.0, 
 5 |   "final_threshold": 0.5, 
 6 |   "initial_warmup": 1,
 7 |   "final_warmup": 3,
 8 |   "attention_block_rows": 32,
 9 |   "attention_block_cols": 32,
10 |   "attention_output_with_dense": 0,
11 |   "regularization_final_lambda": 20,
12 |   "dense_lambda": 0.25,
13 |   "regularization": "l1"
14 | }


--------------------------------------------------------------------------------
/deit_pruning/config/topk-hybrid-block16x16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "topK:1d_alt", 
 3 |   "attention_pruning_method": "topK", 
 4 |   "initial_threshold": 1.0, 
 5 |   "final_threshold": 0.5, 
 6 |   "initial_warmup": 1,
 7 |   "final_warmup": 3,
 8 |   "attention_block_rows": 16,
 9 |   "attention_block_cols": 16,
10 |   "attention_output_with_dense": 0,
11 |   "regularization_final_lambda": 20,
12 |   "dense_lambda": 0.25,
13 |   "regularization": "l1"
14 | }


--------------------------------------------------------------------------------
/deit_pruning/config/sigmoied_threshold-hybrid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "sigmoied_threshold:1d_alt", 
 3 |   "attention_pruning_method": "sigmoied_threshold", 
 4 |   "initial_threshold": 0.0, 
 5 |   "final_threshold": 0.1, 
 6 |   "initial_warmup": 1,
 7 |   "final_warmup": 3,
 8 |   "attention_block_rows": 32,
 9 |   "attention_block_cols": 32,
10 |   "attention_output_with_dense": 0,
11 |   "regularization_final_lambda": 20,
12 |   "dense_lambda": 0.25,
13 |   "regularization": "l1"
14 | }


--------------------------------------------------------------------------------
/modeling/layers/activation.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import tensorflow as tf
 3 | 
 4 | def gelu(x):
 5 |     """Gaussian Error Linear Unit.
 6 |     This is a smoother version of the RELU.
 7 |     Original paper: https://arxiv.org/abs/1606.08415
 8 |     Args:
 9 |         x: float Tensor to perform activation.
10 |     Returns:
11 |         `x` with the GELU activation applied.
12 |     """
13 |     cdf = 0.5 * (1.0 + tf.tanh(
14 |         (math.sqrt(2 / math.pi) * (x + 0.044715 * tf.pow(x, 3)))))
15 |     return x * cdf


--------------------------------------------------------------------------------
/deit_pruning/config/topk-unstructured.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "topK", 
 3 |   "attention_pruning_method": "topK", 
 4 |   "initial_threshold": 1.0, 
 5 |   "final_threshold": 0.5, 
 6 |   "initial_warmup": 1,
 7 |   "final_warmup": 3,
 8 |   "attention_block_rows": 1,
 9 |   "attention_block_cols": 1,
10 |   "dense_block_rows": 1,
11 |   "dense_block_cols": 1,
12 |   "attention_output_with_dense": 0,
13 |   "regularization_final_lambda": 20,
14 |   "dense_lambda": 0.25,
15 |   "regularization": "l1"
16 | }


--------------------------------------------------------------------------------
/modeling/torch_layers/norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class LayerNorm(nn.Module):
 5 |     def __init__(self, input_shape, sub_layer, is_pre=False) -> None:
 6 |         super().__init__()
 7 |         self.layer_norm = nn.LayerNorm(input_shape)
 8 |         self.sub_layer = sub_layer
 9 |         self.is_pre = is_pre
10 | 
11 |     def forward(self, x):
12 |         if self.is_pre:
13 |             return self.sub_layer(self.layer_norm(x))
14 |         else:
15 |             return self.layer_norm(self.sub_layer(x))


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | vit_model.py
 2 | vit_huggingface.py
 3 | play.py
 4 | **/foo.sh
 5 | **/__pycache__
 6 | are_16_heads/training_log/
 7 | are_16_heads/vendor
 8 | are_16_heads/amlt
 9 | are_16_heads/itp
10 | deit_pruning/itp
11 | deit_pruning/results
12 | deit_pruning/logs
13 | deit_pruning/vendor/nn_pruning
14 | deit_pruning/vendor/onnx_scripts
15 | deit_pruning/vendor/nn_pruning_v1/analysis
16 | deit_pruning/vendor/nn_pruning_v1/docs
17 | deit_pruning/vendor/nn_pruning_v1/examples
18 | deit_pruning/vendor/nn_pruning_v1/notebooks
19 | model_zoo
20 | tmp*
21 | 


--------------------------------------------------------------------------------
/deit_pruning/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi==2021.5.30
 2 | chardet==4.0.0
 3 | click==8.0.1
 4 | deepspeed==0.4.1
 5 | filelock==3.0.12
 6 | huggingface-hub==0.0.8
 7 | idna==2.10
 8 | joblib==1.0.1
 9 | numpy==1.20.3
10 | onnxruntime==1.6.0
11 | packaging==20.9
12 | pyparsing==2.4.7
13 | PyYAML==5.4.1
14 | regex==2021.4.4
15 | requests==2.25.1
16 | sacremoses==0.0.45
17 | six==1.16.0
18 | tokenizers==0.10.3
19 | torch==1.8.1
20 | tqdm==4.61.1
21 | transformers==4.7.0
22 | typing-extensions==3.10.0.0
23 | urllib3==1.26.5
24 | torchvision==0.9.1
25 | timm==0.4.12


--------------------------------------------------------------------------------
/deit_pruning/config/sigmoied_threshold-unstructured.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dense_pruning_method": "sigmoied_threshold", 
 3 |   "attention_pruning_method": "sigmoied_threshold", 
 4 |   "initial_threshold": 0.0, 
 5 |   "final_threshold": 0.1, 
 6 |   "initial_warmup": 1,
 7 |   "final_warmup": 3,
 8 |   "attention_block_rows": 1,
 9 |   "attention_block_cols": 1,
10 |   "dense_block_rows": 1,
11 |   "dense_block_cols": 1,
12 |   "attention_output_with_dense": 0,
13 |   "regularization_final_lambda": 20,
14 |   "dense_lambda": 0.25,
15 |   "regularization": "l1"
16 | }


--------------------------------------------------------------------------------
/modeling/torch_layers/ffn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn.modules import activation
 4 | from .activation import gelu
 5 | 
 6 | 
 7 | class FeedForward(nn.Module):
 8 |     def __init__(self, hidden_size, intermediate_size):
 9 |         super().__init__()
10 |         self.linear1 = nn.Linear(hidden_size, intermediate_size)
11 |         self.linear2 = nn.Linear(intermediate_size, hidden_size)
12 | 
13 |     def forward(self, x):
14 |         x = self.linear1(x)
15 |         x = gelu(x)
16 |         x = self.linear2(x)
17 |         return x


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled python modules.
 2 | *.pyc
 3 | *.pyo
 4 | 
 5 | # Setuptools distribution folder.
 6 | /dist/
 7 | /build/
 8 | 
 9 | # Python egg metadata, regenerated from source files by setuptools.
10 | /*.egg-info
11 | /*.egg
12 | 
13 | # emacs Files
14 | *~
15 | 
16 | # Python cache files
17 | __pycache__/
18 | 
19 | # Jupyter Notebook
20 | .ipynb_checkpoints
21 | 
22 | # wandb information directory
23 | wandb
24 | 
25 | # backup directories
26 | back
27 | 
28 | venv
29 | .vscode
30 | notebooks/models/
31 | notebooks/checkpoints/


--------------------------------------------------------------------------------
/are_16_heads/heads_ablation.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | TASK=$1
 4 | OPTIONS="${@:2}"
 5 | 
 6 | here="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 7 | source $here/prepare_task.sh $TASK
 8 | 
 9 | 
10 | echo $base_acc
11 | echo $part
12 | for layer in `seq 1 12`
13 | do
14 |     echo -n "$layer"
15 |     for head in `seq 1 12`
16 |     do
17 |         mask_str="${layer}:${head}"
18 |         acc=$(run_eval "--attention_mask_heads $mask_str $OPTIONS" | grep $metric | rev | cut -d" " -f1 | rev)
19 |         printf "\t%.5f" $(echo "$acc - $base_acc" | bc )
20 |     done
21 | done
22 | 
23 | 


--------------------------------------------------------------------------------
/are_16_heads/requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3==1.18.50
 2 | botocore==1.21.50
 3 | certifi==2021.5.30
 4 | charset-normalizer==2.0.6
 5 | click==8.0.1
 6 | filelock==3.1.0
 7 | huggingface-hub==0.0.17
 8 | idna==3.2
 9 | jmespath==0.10.0
10 | joblib==1.0.1
11 | numpy==1.21.2
12 | packaging==21.0
13 | Pillow==8.3.2
14 | pyparsing==2.4.7
15 | python-dateutil==2.8.2
16 | PyYAML==5.4.1
17 | regex==2021.9.24
18 | requests==2.26.0
19 | s3transfer==0.5.0
20 | sacremoses==0.0.46
21 | six==1.16.0
22 | timm==0.4.12
23 | tokenizers==0.10.3
24 | torch==1.9.1
25 | torchvision==0.10.1
26 | tqdm==4.62.3
27 | typing-extensions==3.10.0.2
28 | urllib3==1.26.7
29 | 


--------------------------------------------------------------------------------
/deit_pruning/src/deepspeed_config/deepspeed.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 256,
 3 |   "steps_per_print": 10,
 4 |   "optimizer": {
 5 |     "type": "Adam",
 6 |     "params": {
 7 |       "lr": 3e-5,
 8 |       "weight_decay": 0.01,
 9 |       "bias_correction": false
10 |     }
11 |   },
12 |   "scheduler": {
13 |     "type": "WarmupDecayLR",
14 |     "params": {
15 |         "warmup_min_lr": 0,
16 |         "warmup_max_lr": "auto",
17 |         "warmup_num_steps": "auto",
18 |         "total_num_steps": "auto"
19 |     }
20 |   },
21 |   "gradient_clipping": 1.0,
22 |   "fp16": {
23 |     "enabled": false
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/deit_pruning/src/deepspeed_config/deepspeed_deit_base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 64,
 3 |   "steps_per_print": 10,
 4 |   "optimizer": {
 5 |     "type": "Adam",
 6 |     "params": {
 7 |       "lr": 0.000025,
 8 |       "weight_decay": 0.01,
 9 |       "bias_correction": false
10 |     }
11 |   },
12 |   "scheduler": {
13 |     "type": "WarmupDecayLR",
14 |     "params": {
15 |         "warmup_min_lr": 0,
16 |         "warmup_max_lr": "auto",
17 |         "warmup_num_steps": "auto",
18 |         "total_num_steps": "auto"
19 |     }
20 |   },
21 |   "gradient_clipping": 1.0,
22 |   "fp16": {
23 |     "enabled": false
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/deit_pruning/src/deepspeed_config/deepspeed_deit_small.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 128,
 3 |   "steps_per_print": 10,
 4 |   "optimizer": {
 5 |     "type": "Adam",
 6 |     "params": {
 7 |       "lr": 5e-5,
 8 |       "weight_decay": 0.01,
 9 |       "bias_correction": false
10 |     }
11 |   },
12 |   "scheduler": {
13 |     "type": "WarmupDecayLR",
14 |     "params": {
15 |         "warmup_min_lr": 0,
16 |         "warmup_max_lr": "auto",
17 |         "warmup_num_steps": "auto",
18 |         "total_num_steps": "auto"
19 |     }
20 |   },
21 |   "gradient_clipping": 1.0,
22 |   "fp16": {
23 |     "enabled": false
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/deit_pruning/src/deepspeed_config/deepspeed_deit_tiny.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 256,
 3 |   "steps_per_print": 10,
 4 |   "optimizer": {
 5 |     "type": "Adam",
 6 |     "params": {
 7 |       "lr": 1e-4,
 8 |       "weight_decay": 0.01,
 9 |       "bias_correction": false
10 |     }
11 |   },
12 |   "scheduler": {
13 |     "type": "WarmupDecayLR",
14 |     "params": {
15 |         "warmup_min_lr": 0,
16 |         "warmup_max_lr": "auto",
17 |         "warmup_num_steps": "auto",
18 |         "total_num_steps": "auto"
19 |     }
20 |   },
21 |   "gradient_clipping": 1.0,
22 |   "fp16": {
23 |     "enabled": false
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/deit_pruning/src/deepspeed_config/deepspeed_finetune_deit_tiny.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 256,
 3 |   "steps_per_print": 10,
 4 |   "optimizer": {
 5 |     "type": "Adam",
 6 |     "params": {
 7 |       "lr": 1e-5,
 8 |       "weight_decay": 0.01,
 9 |       "bias_correction": false
10 |     }
11 |   },
12 |   "scheduler": {
13 |     "type": "WarmupDecayLR",
14 |     "params": {
15 |         "warmup_min_lr": 0,
16 |         "warmup_max_lr": "auto",
17 |         "warmup_num_steps": "auto",
18 |         "total_num_steps": "auto"
19 |     }
20 |   },
21 |   "gradient_clipping": 1.0,
22 |   "fp16": {
23 |     "enabled": false
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/modeling/save_model.py:
--------------------------------------------------------------------------------
 1 | from models.vit import ViT
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | if __name__ == '__main__':
 6 |     vit_config = {
 7 |         "image_size":224,
 8 |         "patch_size":16,
 9 |         "num_classes":1000,
10 |         "dim":768,
11 |         "depth":12,
12 |         "heads":12,
13 |         "mlp_dim":3072
14 |     }
15 | 
16 |     vit = ViT(**vit_config)
17 |     vit = tf.keras.Sequential([
18 |         tf.keras.layers.InputLayer(input_shape=(3, vit_config["image_size"], vit_config["image_size"]), batch_size=1),
19 |         vit,
20 |     ])
21 | 
22 |     vit.save(f'/data/v-xudongwang/models/tf_model/vit_test_patch16_224.tf')


--------------------------------------------------------------------------------
/are_16_heads/deit_small_head_importance.txt:
--------------------------------------------------------------------------------
 1 | 0.18231	0.24750	0.70043	0.29003	0.19202	0.54214
 2 | 0.38329	0.25754	0.29344	0.27641	0.44560	0.65245
 3 | 0.33424	0.36127	0.50169	0.53243	0.30777	0.35759
 4 | 0.43552	0.38621	0.45984	0.28844	0.43485	0.42121
 5 | 0.40952	0.40148	0.41213	0.33956	0.40213	0.47356
 6 | 0.49885	0.34838	0.41573	0.44428	0.35074	0.36952
 7 | 0.36161	0.35020	0.48799	0.38296	0.46031	0.38720
 8 | 0.41847	0.36983	0.40848	0.36265	0.50831	0.36244
 9 | 0.33773	0.37743	0.40715	0.41210	0.52201	0.36797
10 | 0.36294	0.30509	0.55308	0.42384	0.42007	0.33644
11 | 0.34931	0.60585	0.26035	0.28327	0.31772	0.51182
12 | 0.22166	0.13127	0.15896	0.37551	0.73163	0.48175


--------------------------------------------------------------------------------
/deit_pruning/src/deepspeed_config/deepspeed_finetune_deit_base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 64,
 3 |   "steps_per_print": 10,
 4 |   "optimizer": {
 5 |     "type": "Adam",
 6 |     "params": {
 7 |       "lr": 0.0000025,
 8 |       "weight_decay": 0.01,
 9 |       "bias_correction": false
10 |     }
11 |   },
12 |   "scheduler": {
13 |     "type": "WarmupDecayLR",
14 |     "params": {
15 |         "warmup_min_lr": 0,
16 |         "warmup_max_lr": "auto",
17 |         "warmup_num_steps": "auto",
18 |         "total_num_steps": "auto"
19 |     }
20 |   },
21 |   "gradient_clipping": 1.0,
22 |   "fp16": {
23 |     "enabled": false
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/deit_pruning/src/deepspeed_config/deepspeed_finetune_deit_small.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 128,
 3 |   "steps_per_print": 10,
 4 |   "optimizer": {
 5 |     "type": "Adam",
 6 |     "params": {
 7 |       "lr": 5e-6,
 8 |       "weight_decay": 0.01,
 9 |       "bias_correction": false
10 |     }
11 |   },
12 |   "scheduler": {
13 |     "type": "WarmupDecayLR",
14 |     "params": {
15 |         "warmup_min_lr": 0,
16 |         "warmup_max_lr": "auto",
17 |         "warmup_num_steps": "auto",
18 |         "total_num_steps": "auto"
19 |     }
20 |   },
21 |   "gradient_clipping": 1.0,
22 |   "fp16": {
23 |     "enabled": false
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/deit_pruning/src/validate.py:
--------------------------------------------------------------------------------
 1 | from nn_pruning.inference_model_patcher import optimize_model as nn_optimize
 2 | from model import SwiftBERT
 3 | from transformers import AutoModelForImageClassification
 4 | import sys
 5 | model = AutoModelForImageClassification.from_pretrained(sys.argv[1])
 6 | # model = SwiftBERTOutput.from_pretrained('results/playground/swift_bert_final')
 7 | original_params = model.num_parameters()
 8 | print('=== model before optimize ===')
 9 | print(model)
10 | model = nn_optimize(model, "dense")
11 | pruned_params = model.num_parameters()
12 | print("Original params:", original_params)
13 | print("After-pruned params:", pruned_params)
14 | print('=== model after optimize ===')
15 | print(model)
16 | 


--------------------------------------------------------------------------------
/modeling/layers/embedding.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | def get_sinusoid_encoding(n_position, d_hid):
 5 |     ''' Sinusoid position encoding table '''
 6 | 
 7 |     def get_position_angle_vec(position):
 8 |         return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)]
 9 | 
10 |     sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)])
11 |     sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
12 |     sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1
13 | 
14 |     sinusoid_table = tf.convert_to_tensor(sinusoid_table, dtype=tf.float32)
15 |     return tf.expand_dims(sinusoid_table, axis=0)
16 | 


--------------------------------------------------------------------------------
/deit_pruning/src/analyse.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pathlib import Path
 3 | from transformers import AutoModelForImageClassification
 4 | import torch
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | 
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument('--model_path', type=Path, required=True, help='pretrained model path to analyse')
10 | args = parser.parse_args()
11 | 
12 | model = AutoModelForImageClassification.from_pretrained(args.model_path)
13 | attention = model.vit.encoder.layer[0].attention.attention
14 | qkv_weight = [attention.query.weight, attention.key.weight, attention.value.weight]
15 | qkv_sparsity = [torch.sum(x == 0) / np.prod(x.shape) for x in qkv_weight]
16 | qkv_name = ['query', 'key', 'value']
17 | 
18 | print('Model Layer0 attention analyse summary')
19 | print('qkv sparsity', qkv_sparsity)
20 | 
21 | for i in range(3):
22 |     plt.imshow(qkv_weight[i] == 0)
23 |     plt.savefig(args.model_path / f'{qkv_name[i]}_sparsity.png')


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/tests/test_quantization.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from transformers import AutoModelForQuestionAnswering, AutoTokenizer
 4 | 
 5 | from nn_pruning.modules.quantization import (
 6 |     prepare_qat,
 7 |     prepare_static,
 8 |     quantize,
 9 | )
10 | 
11 | 
12 | class TestQuantization(unittest.TestCase):
13 |     def _test_quantization(self, prepare_fn):
14 |         model_name = "bert-base-uncased"
15 |         tokenizer = AutoTokenizer.from_pretrained(model_name)
16 |         model = AutoModelForQuestionAnswering.from_pretrained(model_name)
17 |         prepared_model = prepare_fn(
18 |             model, input_names=["input_ids", "attention_mask", "token_type_ids"], qconfig_name="default"
19 |         )
20 |         prepared_model(**prepared_model.dummy_inputs)
21 |         quantized = quantize(prepared_model)
22 |         quantized(**prepared_model.dummy_inputs)
23 | 
24 |     def test_static_quantization(self):
25 |         self._test_quantization(prepare_static)
26 | 
27 |     def test_qat(self):
28 |         self._test_quantization(prepare_qat)
29 | 


--------------------------------------------------------------------------------
/are_16_heads/deit_base_head_importance.txt:
--------------------------------------------------------------------------------
 1 | 0.10633	0.21009	0.17917	0.17396	0.23997	0.03947	0.66133	0.04475	0.20111	0.12547	0.55047	0.15649
 2 | 0.85622	0.07048	0.14891	0.13867	0.10301	0.17172	0.11505	0.32608	0.11934	0.07479	0.10796	0.17137
 3 | 0.13972	0.13900	0.27455	0.16737	0.13647	0.19076	0.55112	0.62408	0.22676	0.18713	0.10770	0.10728
 4 | 0.33292	0.37048	0.27849	0.43238	0.18705	0.15823	0.30169	0.20417	0.44913	0.18356	0.16674	0.17738
 5 | 0.32738	0.28058	0.33533	0.16170	0.26594	0.15998	0.36160	0.35538	0.30818	0.26875	0.21111	0.33221
 6 | 0.26823	0.40010	0.35554	0.25859	0.24402	0.26022	0.24198	0.35145	0.20782	0.26094	0.21693	0.32716
 7 | 0.20791	0.18577	0.28252	0.25817	0.37873	0.42019	0.27734	0.32148	0.25068	0.30721	0.19980	0.28000
 8 | 0.24535	0.35692	0.31715	0.21870	0.22684	0.40151	0.30993	0.28751	0.25410	0.27766	0.29290	0.21250
 9 | 0.29598	0.26994	0.34892	0.23472	0.30831	0.25975	0.25006	0.29248	0.31943	0.36282	0.23939	0.24718
10 | 0.23726	0.30566	0.27336	0.22064	0.35475	0.22777	0.28260	0.33208	0.23842	0.41765	0.22885	0.27451
11 | 0.23946	0.23778	0.38542	0.20270	0.24301	0.20889	0.28000	0.24122	0.51742	0.21560	0.30406	0.22484
12 | 0.28876	0.22567	0.17927	0.18561	0.60431	0.09382	0.10180	0.10877	0.27325	0.30052	0.36722	0.32107


--------------------------------------------------------------------------------
/benchmark/bench_utils.py:
--------------------------------------------------------------------------------
 1 | def fetech_tf_bench_results(result_str):
 2 |         if rfind_assign_int(result_str, 'count') >= 2:
 3 |             std_ms = rfind_assign_float(result_str, 'std') / 1e3
 4 |             avg_ms = rfind_assign_float(result_str, 'avg') / 1e3
 5 |             mem_mb = rfind_assign_float(result_str, 'overall')
 6 |         else:
 7 |             std_ms = 0
 8 |             avg_ms = rfind_assign_float(result_str, 'curr') / 1e3
 9 |             mem_mb = rfind_assign_float(result_str, 'overall')
10 | 
11 |         return std_ms, avg_ms, mem_mb
12 |         
13 | def rfind_assign(s, mark):
14 |     mark += "="
15 |     p = s.rfind(mark)
16 |     assert p != -1
17 |     l_idx = p + len(mark)
18 |     r_idx = l_idx
19 |     while s[r_idx] not in [' ', '\n']:
20 |         r_idx += 1
21 |     return s[l_idx: r_idx]
22 | 
23 | 
24 | def rfind_assign_float(s, mark):
25 |     return float(rfind_assign(s, mark))
26 | 
27 | 
28 | def rfind_assign_int(s, mark):
29 |     return int(rfind_assign(s, mark))
30 | 
31 | 
32 | def table_try_float(table):
33 |     for i in range(len(table)):
34 |         for j in range(len(table[i])):
35 |             try:
36 |                 table[i][j] = float(table[i][j])
37 |             except:
38 |                 pass
39 |     return table
40 | 


--------------------------------------------------------------------------------
/are_16_heads/itp/run_itp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TASK=$1
 4 | OPTIONS="${@:2}"
 5 | 
 6 | function distributed_launch() {
 7 |     python -m torch.distributed.launch --nproc_per_node 4 ./run_classifier.py \
 8 |     --normalize_pruning_by_layer \
 9 |     --do_prune \
10 |     --eval_pruned \
11 |     --actually_prune \
12 |     --data_dir /mnt/data/EdgeDL/imagenet2012 \
13 |     --eval_batch_size 500 \
14 |     --at_least_x_heads_per_layer 1 \
15 |     --num_workers 8 \
16 |     --use_huggingface_trainer \
17 |     $OPTIONS
18 | }
19 | 
20 | function iterative_pruning_base() {
21 |     ./itp/run_itp.sh distributed_launch \
22 |     --deit_type base \
23 |     --prune_number `seq 0 4 132` \
24 |     --exact_pruning \
25 |     --train_batch_size 64 \
26 |     --n_retrain_epochs_after_pruning 3 \
27 |     --retrain_learning_rate 0.000025 \
28 |     --output_dir /mnt/data/EdgeDL/are16heads_results/iterative/base
29 | }
30 | 
31 | function finetune_many_base() {
32 |     python -m torch.distributed.launch --nproc_per_node 4 finetune_many.py \
33 |     --data_dir /mnt/data/EdgeDL/imagenet2012 \
34 |     --model_path /mnt/data/EdgeDL/are16heads_results/iterative/base \
35 |     --output_dir /mnt/data/EdgeDL/are16heads_results/ \
36 |     --finetune_learning_rate 0.000025 \
37 |     --n_finetune_epochs_after_pruning 3 \
38 |     --finetune_batch_size 64
39 | }
40 | $1 ""


--------------------------------------------------------------------------------
/modeling/layers/attention.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from einops.layers.tensorflow import Rearrange
 3 | 
 4 | 
 5 | class Attention(tf.keras.Model):
 6 |     def __init__(self, dim, num_heads, h_k=None):
 7 |         if h_k is None:
 8 |             if dim % num_heads != 0:
 9 |                 raise ValueError(f'hidden_size {dim} must be a multiple of num_heads {num_heads}.')
10 |             self.h_k = dim // num_heads
11 |         else:
12 |             self.h_k = h_k
13 |         super().__init__()
14 |         self.num_heads = num_heads
15 |         self.scale = self.h_k ** -0.5
16 |  
17 |         self.to_qkv = tf.keras.layers.Dense(self.num_heads * self.h_k * 3, use_bias=False)
18 |         self.to_out = tf.keras.layers.Dense(dim)
19 | 
20 |         self.rearrange_qkv = Rearrange('b n (qkv h d) -> qkv b h n d', qkv = 3, h = self.num_heads)
21 |         self.rearrange_out = Rearrange('b h n d -> b n (h d)')
22 | 
23 |     def call(self, x):
24 |         qkv = self.to_qkv(x)
25 |         qkv = self.rearrange_qkv(qkv)
26 |         q = qkv[0]
27 |         k = qkv[1]
28 |         v = qkv[2]
29 | 
30 |         dots = tf.einsum('bhid,bhjd->bhij', q, k) * self.scale
31 |         attn = tf.nn.softmax(dots, axis=-1)
32 | 
33 |         out = tf.einsum('bhij,bhjd->bhid', attn, v)
34 |         out = self.rearrange_out(out)
35 |         out =  self.to_out(out)
36 |         return out


--------------------------------------------------------------------------------
/benchmark/ADBConnect.py:
--------------------------------------------------------------------------------
 1 | import subprocess,re
 2 | 
 3 | class ADBConnect:
 4 |     def __init__(self, serial=None):   
 5 |         devices = subprocess.check_output(f'adb devices', shell=True).decode('utf-8')
 6 |         device_list = re.findall(r'([a-zA-Z0-9]+)[^\w]*([a-zA-Z0-9]+)', devices.split('List of devices attached')[-1])
 7 |         if serial == None:
 8 |             if len(device_list) == 0:
 9 |                 raise FileNotFoundError
10 |             else:
11 |                 self.serial = device_list[0][0]
12 |                 print(f'Device {self.serial} selected.')
13 |         else:
14 |             for device in device_list:
15 |                 if serial == device[0]:
16 |                     self.serial = serial
17 |                     print(f'Device {self.serial} selected.')
18 |                     return
19 |             raise FileNotFoundError
20 | 
21 |     def push_files(self, src, dst):
22 |         subprocess.check_output(f'adb -s {self.serial} push {src} {dst}', shell=True)
23 |     
24 |     def pull_files(self, src, dst):
25 |         subprocess.check_output(f'adb -s {self.serial} pull {src} {dst}', shell=True)
26 |     
27 |     def run_cmd(self, cmd, no_root=False):
28 |         #print(self.serial)
29 |         results = subprocess.check_output(f'adb -s {self.serial} shell {"su -c" if not no_root else ""} {cmd}', shell=True).decode('utf-8')
30 |         #print(results)
31 |         #latency=get_avg_latency(results)
32 |         #print(latency)
33 | 
34 |         return results


--------------------------------------------------------------------------------
/are_16_heads/classifier_scoring.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class BaseClassifierScorer(object):
 5 |     _name = "base"
 6 | 
 7 |     def __call__(self, predictions, labels):
 8 |         raise NotImplementedError()
 9 | 
10 |     @property
11 |     def name(cls):
12 |         return cls._name
13 | 
14 | 
15 | class Accuracy(BaseClassifierScorer):
16 |     _name = "Accuracy"
17 | 
18 |     def __call__(self, predictions, labels):
19 |         return (predictions == labels).mean()
20 | 
21 | 
22 | class F1(BaseClassifierScorer):
23 |     _name = "F-1 score"
24 | 
25 |     def __call__(self, predictions, labels):
26 |         # True positives
27 |         tp = np.logical_and(predictions == 1, labels == 1).sum()
28 |         # Precision
29 |         P = tp / (predictions == 1).sum()
30 |         # Recall
31 |         R = tp / (labels == 1).sum()
32 |         # F-score
33 |         return 2 * P * R / (P + R)
34 | 
35 | 
36 | class Matthews(BaseClassifierScorer):
37 |     _name = "Matthew's correlation"
38 | 
39 |     def __call__(self, predictions, labels):
40 |         # True/False positives/negatives
41 |         tp = np.logical_and(predictions == 1, labels == 1).sum()
42 |         fp = np.logical_and(predictions == 1, labels == 0).sum()
43 |         tn = np.logical_and(predictions == 0, labels == 0).sum()
44 |         fn = np.logical_and(predictions == 0, labels == 1).sum()
45 |         # Correlation coefficient
46 |         m = (tp * tn) - (fp * fn)
47 |         m /= np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) + 1e-20
48 | 
49 |         return m
50 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | def readme():
 5 |     with open("README.md") as f:
 6 |         return f.read()
 7 | 
 8 | extras = {
 9 |     "tests": ["pytest"],
10 |     "examples": ["numpy>=1.2.0", "datasets>=1.4.1", "ipywidgets>=7.6.3", "matplotlib>=3.3.4", "pandas>=1.2.3"],
11 | }
12 | 
13 | def combine_requirements(base_keys):
14 |     return list(set(k for v in base_keys for k in extras[v]))
15 | 
16 | extras["dev"] = combine_requirements([k for k in extras if k != "examples"])
17 | 
18 | 
19 | setup(
20 |     name="nn_pruning",
21 |     version="0.1.2",
22 |     description="nn_pruning is a python package for pruning PyTorch models.",
23 |     long_description="nn_pruning is a python package for pruning PyTorch models.",
24 |     classifiers=[
25 |         "Development Status :: 3 - Alpha",
26 |         "License :: OSI Approved :: MIT License",
27 |         "Programming Language :: Python :: 3.0",
28 |         "Topic :: Text Processing",
29 |     ],
30 |     keywords="",
31 |     url="",
32 |     author="",
33 |     author_email="",
34 |     license="MIT",
35 |     packages=["nn_pruning", "nn_pruning.modules"],
36 |     install_requires=["click", "transformers>=4.3.0", "torch>=1.6", "scikit-learn>=0.24"],
37 |     extras_require=extras,
38 |     test_suite="nose.collector",
39 |     tests_require=["nose", "nose-cover3"],
40 |     entry_points={
41 |         "console_scripts": ["nn_pruning_run_example=examples.command_line:main"],
42 |     },
43 |     include_package_data=True,
44 |     zip_safe=False,
45 | )
46 | 


--------------------------------------------------------------------------------
/deit_pruning/src/preprocessing/random_select.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pathlib import Path
 3 | from ..utils import set_random
 4 | 
 5 | # import numpy as np
 6 | import random
 7 | 
 8 | def output(train_filename, output_filename, idx):
 9 |   global_idx = 0
10 |   idx_ptr = 0
11 |   fout = open(output_filename, "w")
12 |   with open(train_filename) as f:
13 |     for line in f:
14 |       if global_idx == idx[idx_ptr]:
15 |         fout.write(line)
16 |         idx_ptr += 1
17 |       global_idx += 1
18 |   fout.close()
19 | 
20 | 
21 | if __name__ == "__main__":
22 |   parser = argparse.ArgumentParser()
23 |   parser.add_argument("--train_filename", required=True, type=Path)
24 |   parser.add_argument("--output_filename", required=True, type=Path)
25 |   parser.add_argument("--train_lcnt", required=True, type=int),
26 |   parser.add_argument("--ratio", required=True, type=float)
27 |   parser.add_argument("--seed", type=int, default=12345)
28 |   args = parser.parse_args()
29 |   # python -m src.preprocessing.random_select --train_filename ../../swiftBertData/data.tsv --output_filename data/train_subset_0.02.tsv --train_lcnt 1573820370 --ratio 0.02
30 |   ## new dataset: 2000000007
31 |   assert args.train_filename != args.output_filename
32 |   set_random(args.seed)
33 | 
34 |   selected_lcnt = int(args.train_lcnt * args.ratio)
35 |   print(f"Select {selected_lcnt} / {args.train_lcnt}")
36 | 
37 |   selected_idx = sorted(random.sample(range(args.train_lcnt), selected_lcnt))
38 |   selected_idx.append(-1)
39 | 
40 |   output(train_filename=args.train_filename, output_filename=args.output_filename, idx=selected_idx)
41 | 


--------------------------------------------------------------------------------
/modeling/models/squeezenet.py:
--------------------------------------------------------------------------------
 1 | # from https://medium.com/@sumeetbadgujar/squeezenet-implementation-in-tensorflow-7949d795e84e
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow.keras.layers import Conv2D, ReLU, concatenate, Input, MaxPool2D, Dropout, AvgPool2D
 5 | from tensorflow.keras import Model
 6 | 
 7 | 
 8 | def fire_module(x, s1, e1, e3):
 9 |     s1x = Conv2D(s1, kernel_size=1, padding='same')(x)
10 |     s1x = ReLU()(s1x)
11 |     e1x = Conv2D(e1, kernel_size=1, padding='same')(s1x)
12 |     e3x = Conv2D(e3, kernel_size=3, padding='same')(s1x)
13 |     x = concatenate([e1x, e3x])
14 |     x = ReLU()(x)
15 |     return x
16 | 
17 | 
18 | def SqueezeNet(image_size=[224, 224, 3], nclasses=1000, batch_size=1):
19 |     input = Input(image_size, batch_size)
20 |     x = Conv2D(96, kernel_size=(7, 7), strides=(2, 2), padding='same')(input)
21 |     x = MaxPool2D(pool_size=(3, 3), strides=(2, 2))(x)
22 |     x = fire_module(x, s1=16, e1=64, e3=64)  # 2
23 |     x = fire_module(x, s1=16, e1=64, e3=64)  # 3
24 |     x = fire_module(x, s1=32, e1=128, e3=128)  # 4
25 |     x = MaxPool2D(pool_size=(3, 3), strides=(2, 2))(x)
26 |     x = fire_module(x, s1=32, e1=128, e3=128)  # 5
27 |     x = fire_module(x, s1=48, e1=192, e3=192)  # 6
28 |     x = fire_module(x, s1=48, e1=192, e3=192)  # 7
29 |     x = fire_module(x, s1=64, e1=256, e3=256)  # 8
30 |     x = MaxPool2D(pool_size=(3, 3), strides=(2, 2))(x)
31 |     x = fire_module(x, s1=64, e1=256, e3=256)  # 9
32 |     x = Dropout(0.5)(x)
33 |     x = Conv2D(nclasses, kernel_size=1)(x)
34 |     output = AvgPool2D(pool_size=(13, 13))(x)
35 |     model = Model(input, output)
36 |     return model
37 | 


--------------------------------------------------------------------------------
/modeling/layers/tf1_layers.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def fc_layer(_input, out_units, opname='',use_bias=False, param_initializer=None):
 4 |     features_total = int(_input.get_shape()[-1])
 5 |     if not param_initializer:
 6 |         param_initializer = {}
 7 |     with tf.compat.v1.variable_scope(opname+'.fc'):
 8 |         init_key = '%s/weight' % tf.get_variable_scope().name
 9 |         initializer = param_initializer.get(init_key, tf.contrib.layers.xavier_initializer())
10 |         weight = tf.compat.v1.get_variable(name='weight', shape=[features_total, out_units],initializer=initializer)
11 |         output = tf.matmul(_input, weight)
12 |         if use_bias:
13 |             init_key = '%s/bias' % tf.get_variable_scope().name
14 |             initializer = param_initializer.get(init_key, tf.constant_initializer([0.0] * out_units))
15 |             bias = tf.get_variable(name='bias', shape=[out_units],initializer=initializer)
16 |             output = output + bias
17 |     return output
18 | 
19 | def gelu(_input, opname=''):
20 |     import math
21 |     with tf.compat.v1.variable_scope(opname + '.' + 'gelu'):
22 |         cdf = 0.5 * (1.0 + tf.tanh(
23 |             (math.sqrt(2 / math.pi) * (_input + 0.044715 * tf.pow(_input, 3)))))
24 |         return _input * cdf
25 | 
26 | def ffn(_input, intermediate_size, opname=''):
27 |     h = int(_input.get_shape()[-1])
28 |     with tf.compat.v1.variable_scope(opname + '.' + 'ffn'):
29 |         x = fc_layer(_input, intermediate_size, use_bias=True, opname='dense1')
30 |         x = gelu(x)
31 |         x = fc_layer(x, h, use_bias=True, opname='dense2')
32 |         return x
33 | 


--------------------------------------------------------------------------------
/deit_pruning/src/inspector/get_sparsity.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import argparse
 3 | from nn_pruning.inference_model_patcher import optimize_model
 4 | 
 5 | def show(model, skip_embedding=False, skip_layernorm=False, skip_bias=False):
 6 |   print("Params:", model.num_parameters())
 7 |   zero_param_cnt = 0
 8 |   param_numel = 0
 9 | 
10 |   for k, v in model.named_parameters():
11 |     if skip_embedding and "embedding" in k:
12 |       continue
13 |     if skip_layernorm and "LayerNorm" in k:
14 |       continue
15 |     if skip_bias and "bias" in k:
16 |       continue
17 |     zero_mask = v == 0
18 | 
19 |     with torch.no_grad():
20 |       print(k, float(zero_mask.sum() / zero_mask.numel()), int(zero_mask.sum()), zero_mask.shape, sep='\t')
21 |       zero_param_cnt += zero_mask.sum().item()
22 |       param_numel += zero_mask.numel()
23 | 
24 |   print("Zero params:", zero_param_cnt)
25 |   #print("Params (for):", param_numel)
26 | 
27 | if __name__ == "__main__":
28 |   from ..model import SwiftBERT
29 |   parser = argparse.ArgumentParser()
30 |   parser.add_argument("--deit_model_name", type=str)
31 |   parser.add_argument("--nn_pruning", action='store_true')
32 |   parser.add_argument("--skip_embedding", action='store_true')
33 |   parser.add_argument("--skip_layernorm", action='store_true')
34 |   parser.add_argument("--skip_bias", action='store_true')
35 | 
36 |   args = parser.parse_args()
37 |   # python -m src.inspector.get_sparsity --deit_model_name ./results/playground/final
38 | 
39 |   model = SwiftBERT.from_pretrained(args.deit_model_name)
40 |   if args.nn_pruning:
41 |     model = optimize_model(model, "dense")
42 |   
43 |   show(model, skip_embedding=args.skip_embedding, skip_layernorm=args.skip_layernorm, skip_bias=args.skip_bias)
44 | 


--------------------------------------------------------------------------------
/benchmark/run_on_device.py:
--------------------------------------------------------------------------------
 1 | from .bench_utils import*
 2 | import os
 3 | 
 4 | def run_on_android(modelpath, adb, use_gpu=False, num_threads=1, num_runs=10, warmup_runs=10, skip_push=False, 
 5 |                    taskset_mask='70', benchmark_binary_dir='/data/local/tmp', bin_name='benchmark_model_plus_flex_r27', no_root=False, use_xnnpack=False, 
 6 |                    profiling_output_csv_file=None):
 7 |     if not skip_push:
 8 |         #=======Push to device===========
 9 |         adb.push_files(modelpath, '/sdcard/')
10 |     model_name=modelpath.split('/')[-1]
11 |     if benchmark_binary_dir[-1] == '/':
12 |         benchmark_binary_dir = benchmark_binary_dir[:-1]
13 |     benchmark_binary_path = f'{benchmark_binary_dir}/{bin_name}'
14 | 
15 |     command = f'taskset {taskset_mask} {benchmark_binary_path} --num_threads={num_threads} {"--use_gpu=true" if use_gpu else ""} '
16 |     command += f'--num_runs={num_runs} --warmup_runs={warmup_runs} {"--use_xnnpack=true" if use_xnnpack else "--use_xnnpack=false"} --graph=/sdcard/{model_name} '
17 |     command += f'--enable_op_profiling=true --profiling_output_csv_file=/sdcard/{os.path.basename(profiling_output_csv_file)} ' if profiling_output_csv_file else ''
18 |     print(command)
19 | 
20 |     bench_str = adb.run_cmd(command, no_root=no_root)
21 |     std_ms, avg_ms, mem_mb = fetech_tf_bench_results(bench_str)
22 | 
23 |     if not skip_push:
24 |         #=======Clear device files=======
25 |         adb.run_cmd(f'rm -rf /sdcard/{model_name}', no_root=no_root)
26 | 
27 |     if profiling_output_csv_file:
28 |         adb.pull_files(src=f'/sdcard/{os.path.basename(profiling_output_csv_file)}', dst=profiling_output_csv_file)
29 |         print(f'Save profiling output csv file in {profiling_output_csv_file}')
30 |     return std_ms, avg_ms, mem_mb


--------------------------------------------------------------------------------
/are_16_heads/prepare_task.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | TASK=$1
 4 | EVAL=${EVAL:-"1"}
 5 | TRAIN_OPTIONS="${@:2}"
 6 | FEATURE_MODE=${FEATURE_MODE:-0}
 7 | NODROP_MODE=${NODROP_MODE:-0}
 8 | 
 9 | 
10 | prefix="$TASK"
11 | if [ "$FEATURE_MODE" -eq "1" ]
12 | then
13 |     prefix="${TASK}-feature"
14 |     TRAIN_OPTIONS="$TRAIN_OPTIONS --feature_mode"
15 | fi
16 | if [ "$NODROP_MODE" -eq "1" ]
17 | then
18 |     prefix="${TASK}-nodrop"
19 |     TRAIN_OPTIONS="$TRAIN_OPTIONS --attn_dropout 0.0"
20 | fi
21 | 
22 | mkdir -p models
23 | model_dir=models/$prefix
24 | mkdir -p $model_dir
25 | 
26 | function run_train () {
27 |     python pytorch-pretrained-BERT/examples/run_classifier.py $TRAIN_OPTIONS \
28 |     --task_name $TASK \
29 |     --do_train \
30 |     --do_lower_case \
31 |     --data_dir glue_data/$TASK/ \
32 |     --bert_model bert-base-uncased \
33 |     --max_seq_length 128 \
34 |     --train_batch_size 32 \
35 |     --eval_batch_size 32 \
36 |     --learning_rate 2e-5 \
37 |     --num_train_epochs 3.0 \
38 |     --output_dir $model_dir 2>&1
39 | }
40 | 
41 | function run_eval () {
42 |     python pytorch-pretrained-BERT/examples/run_classifier.py \
43 |     --task_name $TASK \
44 |     --do_eval \
45 |     --do_lower_case \
46 |     $1 \
47 |     --data_dir glue_data/$TASK/ \
48 |     --bert_model bert-base-uncased \
49 |     --max_seq_length 128 \
50 |     --eval_batch_size 32 \
51 |     --output_dir $model_dir 2>&1
52 | }
53 | 
54 | if [ ! -e $model_dir/pytorch_model.bin ]
55 | then
56 |     run_train
57 | fi
58 | 
59 | metric="eval_accuracy"
60 | if [ $TASK == "CoLA" ]
61 | then
62 |     metric="Matthew"
63 | elif [ $TASK == "MRPC" ]
64 | then
65 |     metric="F-1"
66 | fi
67 | 
68 | if [ "$EVAL" = "1" ]
69 | then
70 |     run_eval ""
71 |     base_acc=$(run_eval "" | grep $metric | rev | cut -d" " -f1 | rev)
72 | fi
73 | 


--------------------------------------------------------------------------------
/modeling/torch_layers/attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Attention(nn.Module):
 5 |     # reference huggingface ViTSelfAttention 
 6 |     def __init__(self, hidden_size, num_heads, head_size=None):
 7 |         if head_size is None:
 8 |             if hidden_size % num_heads != 0:
 9 |                 raise ValueError(f'hidden_size {head_size} must be a multiple of num_heads {num_heads}.')
10 |             self.head_size = hidden_size // num_heads
11 |         else:
12 |             self.head_size = head_size
13 |         
14 |         super().__init__()
15 |         self.num_heads = num_heads
16 |         self.scale = self.head_size ** -0.5
17 |         self.hidden_size = hidden_size
18 | 
19 |         self.to_query = nn.Linear(in_features=hidden_size, out_features=self.num_heads * self.head_size)
20 |         self.to_key = nn.Linear(in_features=hidden_size, out_features=self.num_heads * self.head_size)
21 |         self.to_value = nn.Linear(in_features=hidden_size, out_features=self.num_heads * self.head_size)
22 |         self.to_out = nn.Linear(in_features=self.num_heads * self.head_size, out_features=hidden_size)
23 | 
24 |     def transpose_for_scores(self, x):
25 |         new_shape = x.size()[:-1] + (self.num_heads, self.head_size)
26 |         x = x.view(*new_shape)
27 |         return x.permute(0, 2, 1, 3)
28 | 
29 |     def forward(self, x):
30 |         mixed_query = self.to_query(x)
31 | 
32 |         key = self.transpose_for_scores(self.to_key(x))
33 |         value = self.transpose_for_scores(self.to_value(x))
34 |         query = self.transpose_for_scores(mixed_query)
35 | 
36 |         attention_scores = torch.matmul(query, key.transpose(-1, -2))
37 |         attention_scores = attention_scores * self.scale 
38 | 
39 |         attention_probs = nn.Softmax(dim=-1)(attention_scores)
40 | 
41 |         context = torch.matmul(attention_probs, value)
42 |         context = context.permute(0, 2, 1, 3).contiguous()
43 | 
44 |         next_shape = context.size()[:-2] + (self.num_heads * self.head_size,)
45 |         context = context.view(*next_shape)
46 |         context = self.to_out(context)
47 | 
48 |         return context


--------------------------------------------------------------------------------
/deit_pruning/src/onnx_inference.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
 3 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | import argparse
19 | import collections
20 | import logging
21 | import json
22 | import math
23 | import os
24 | import random
25 | import pickle
26 | 
27 | import time
28 | import numpy as np
29 | import torch
30 | 
31 | from pathlib import Path
32 | 
33 | def main():
34 |     parser = argparse.ArgumentParser()
35 |     parser.add_argument('--batch_size', type=int, default=1)
36 |     parser.add_argument('--model_file', type=Path)
37 |     parser.add_argument('--profile', required=False, action='store_true', help='Enable layer profiling (JSON output)')
38 |     parser.add_argument('--extra', required=False, type=str, default=None)
39 |     parser.add_argument('--seed', type=int, default=12345)
40 |     parser.add_argument('--seq_len', type=int, default=38)
41 |     # python src/onnx_inference.py --model_file ./results/dummy_mini/final/output.onnx
42 | 
43 |     args = parser.parse_args()
44 |     print(args)
45 | 
46 |     # random.seed(args.seed)
47 |     # np.random.seed(args.seed)
48 |     # torch.manual_seed(args.seed)
49 |     
50 |     profile_arg = "--profile" if args.profile else ""
51 |     extra_arg = args.extra if args.extra else ""
52 | 
53 |     perf_script = os.path.join("vendor/onnx_scripts", "bert_perf_test.py")
54 | 
55 |     os.system(f'python {perf_script} {profile_arg} --model "' + str(args.model_file) + f'" --batch_size {args.batch_size} --sequence_length {args.seq_len} --seed {args.seed} {extra_arg}')
56 |     
57 | 
58 | if __name__ == "__main__":
59 |     main()
60 | 
61 | 


--------------------------------------------------------------------------------
/deit_pruning/src/pytorch_prune/ln_smart.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.utils import prune
 2 | from torch.nn.utils.prune import (
 3 |   _validate_pruning_amount_init, 
 4 |   _validate_structured_pruning, 
 5 |   _compute_nparams_toprune, 
 6 |   _validate_pruning_amount
 7 | )
 8 | import torch
 9 | 
10 | 
11 | class LnSmartStructured(prune.BasePruningMethod):
12 |   PRUNING_TYPE = "1d"
13 | 
14 |   def __init__(self, amount, n=1):
15 |     _validate_pruning_amount_init(amount)
16 |     self.amount = amount
17 |     self.ord = n
18 |     if n != 1:
19 |       print("WARN: LnSmartStructured is only verified in norm ord=1!")
20 | 
21 |   def make_mask(self, t, dim, indices):
22 |     # Modified frin pytorch LnStructured.make_mask
23 |     # init mask to 1
24 |     mask = torch.ones_like(t)
25 |     # e.g.: slc = [None, None, None], if len(t.shape) = 3
26 |     slc = [slice(None)] * len(t.shape)
27 |     # replace a None at position=dim with indices
28 |     # e.g.: slc = [None, None, [0, 2, 3]] if dim=2 & indices=[0,2,3]
29 |     slc[dim] = indices
30 |     # use slc to slice mask and replace all its entries with 0s
31 |     # e.g.: mask[:, :, [0, 2, 3]] = 0
32 |     mask[slc] = 0
33 |     return mask
34 | 
35 |   def compute_mask(self, t, default_mask):
36 |     # _validate_structured_pruning(t)
37 |     assert len(t.shape) == 2
38 |     rows = t.shape[0]
39 |     cols = t.shape[1]
40 | 
41 |     # 1. Calculate whether to prune row (dim=0) or col (dim=1)
42 |     prune_row = False
43 |     test_nparams_toprune = _compute_nparams_toprune(self.amount, min(rows, cols))
44 |     _validate_pruning_amount(test_nparams_toprune, min(rows, cols))
45 |     row_norm_sum = torch.topk(torch.linalg.norm(t, dim=1, ord=self.ord), k=test_nparams_toprune, largest=False).values.sum() / (cols ** (1 / self.ord))  # Is it right to avoid bias between the two norm values?
46 |     col_norm_sum = torch.topk(torch.linalg.norm(t, dim=0, ord=self.ord), k=test_nparams_toprune, largest=False).values.sum() / (rows ** (1 / self.ord))
47 |     # print(row_norm_sum, col_norm_sum)
48 |     if col_norm_sum >= row_norm_sum:
49 |       prune_row = True
50 | 
51 |     # 2. Prune (actually)
52 |     bcnt = rows if prune_row else cols
53 |     nparams_toprune = _compute_nparams_toprune(self.amount, bcnt)
54 |     _validate_pruning_amount(nparams_toprune, bcnt)
55 | 
56 |     mask = default_mask.clone()
57 |     if nparams_toprune != 0:
58 |       indices = torch.topk(torch.linalg.norm(t, dim=1 if prune_row else 0), k=nparams_toprune, largest=False).indices
59 |       mask[self.make_mask(t, 0 if prune_row else 1, indices).to(dtype=mask.dtype) == 0] = 0
60 |       
61 |     return mask
62 | 
63 | 
64 | def ln_smart_structured(module, name, amount, n=1):
65 |   LnSmartStructured.apply(module, name, amount=amount, n=n)
66 |   return module
67 | 


--------------------------------------------------------------------------------
/are_16_heads/fetch_results.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | from pathlib import Path
 4 | import os
 5 | 
 6 | def fetch_accuracy(parser: argparse.ArgumentParser):
 7 |     parser.add_argument('--file', '-f', type=str, help='input file to fecth accuracy')
 8 |     parser.add_argument('--begin_line', type=int, default=0, help='begin line number')
 9 |     parser.add_argument('--end_line'  , type=int, default=None, help='end line number plus one')
10 |     args = parser.parse_args()
11 |     
12 |     f = open(args.file)
13 |     if args.end_line:
14 |         lines = f.readlines()[args.begin_line: args.end_line]
15 |     else:
16 |         lines = f.readlines()[args.begin_line:]
17 | 
18 |     i = 0
19 |     acc_list = []
20 |     for i in range(len(lines)):
21 |         line = lines[i]
22 |         if 'Pruning eval results' in line:
23 |             tokens = lines[i + 1].split()
24 |             acc = round(float(tokens[2]) * 100, 2)
25 |             acc_list.append(acc)
26 |         # if 'Finetuning eval results' in line:
27 |         #     tokens = lines[i + 1].split()
28 |         #     acc = round(float(tokens[3]) * 100, 2)
29 |         #     acc_list.append(acc)
30 |         i += 1
31 | 
32 |     print (acc_list)
33 |     return acc_list
34 | 
35 | 
36 | def fetch_accuracy_from_path(parser: argparse.ArgumentParser):
37 |     parser.add_argument('--path', type=Path, help='pruned model directory to fetch accuracy')
38 |     parser.add_argument('--finetuned', action='store_true', help='sub_dir is "final_finetuned" instead of "final"')
39 |     args = parser.parse_args()
40 | 
41 |     final_str = 'final' if not args.finetuned else 'final_finetuned'
42 | 
43 |     model_list = sorted(os.listdir(args.path), key=lambda x: int(x[x.find('prune') + len('prune'): ]))
44 |     acc_list = []
45 |     for model_name in model_list:
46 |         final_dir = args.path / model_name / final_str
47 |         acc_file_name = sorted(os.listdir(final_dir))[0]
48 |         if 'accuracy' not in acc_file_name:
49 |             print(f'Please check the contents of this dir {final_dir}: {os.listdir(final_dir)}')
50 |             exit()
51 | 
52 |         acc = round(int(acc_file_name[len('accuracy'): len('accuracy') + 4]) / 100, 2)
53 |         acc_list.append(acc)
54 |     
55 |     print(acc_list)
56 | 
57 | function_dict = dict(
58 |     fetch_accuracy = fetch_accuracy,
59 |     fetch_accuracy_from_path = fetch_accuracy_from_path,
60 | )
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     parser = argparse.ArgumentParser()
65 |     parser.add_argument('func', help='Specify the function to do.')
66 |     
67 |     assert(len(sys.argv) > 1)
68 |     func = sys.argv[1]
69 |     
70 |     if func not in function_dict.keys():
71 |         print('Supported functions: ', list(function_dict.keys()))
72 |         exit()
73 | 
74 |     function_dict[func](parser)
75 | 


--------------------------------------------------------------------------------
/deit_pruning/src/pytorch_prune/block.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.utils import prune
 2 | from torch.nn.utils.prune import (
 3 |   _validate_pruning_amount_init, 
 4 |   _validate_structured_pruning, 
 5 |   _compute_nparams_toprune, 
 6 |   _validate_pruning_amount
 7 | )
 8 | import torch
 9 | 
10 | 
11 | class BlockPruningMethod(prune.BasePruningMethod):
12 |   # Well pytorch thinks that structured pruning shall have a 'dim' attr
13 |   # and unstructured pruning shall accept 1-d tensor
14 |   # Block pruning satisfies neither of these conditions, so I set PRUNING_TYPE to this value to avoid misused 
15 |   PRUNING_TYPE = "block"
16 | 
17 |   def __init__(self, amount, block_row, block_col, n='fro'):
18 |     _validate_pruning_amount_init(amount)
19 |     self.amount = amount
20 |     self.block_row = block_row
21 |     self.block_col = block_col
22 |     self.ord = n
23 | 
24 |   def get_block_view(self, matrix):
25 |     rows = matrix.shape[0]
26 |     cols = matrix.shape[1]
27 | 
28 |     assert rows % self.block_row == 0
29 |     assert cols % self.block_col == 0
30 | 
31 |     brows = rows // self.block_row
32 |     bcols = cols // self.block_col
33 |     bcnt = brows * bcols
34 |     
35 |     def subview(idx):
36 |         rstart = idx // bcols * self.block_row
37 |         rend = (idx // bcols + 1) * self.block_row
38 |         cstart = idx % bcols * self.block_col
39 |         cend = (idx % bcols + 1) * self.block_col
40 |         
41 |         return matrix[rstart:rend, cstart:cend]
42 |     
43 |     blocks = [subview(idx) for idx in range(bcnt)]
44 |     
45 |     return torch.stack(blocks)
46 | 
47 |   def compute_mask(self, t, default_mask):
48 |     # _validate_structured_pruning(t)
49 |     assert len(t.shape) == 2
50 |     rows = t.shape[0]
51 |     cols = t.shape[1]
52 |     assert rows % self.block_row == 0
53 |     assert cols % self.block_col == 0
54 |     brows = rows // self.block_row
55 |     bcols = cols // self.block_col
56 |     bcnt = brows * bcols
57 | 
58 |     nparams_toprune = _compute_nparams_toprune(self.amount, bcnt)
59 |     _validate_pruning_amount(nparams_toprune, bcnt)
60 | 
61 |     mask = default_mask.clone()
62 |     if nparams_toprune != 0:
63 |       block_view = self.get_block_view(t)
64 |       norms = torch.linalg.norm(block_view, ord=self.ord, dim=(1, 2))
65 |       indices = torch.topk(norms, k=nparams_toprune, largest=False).indices
66 |       this_mask = torch.ones((brows, bcols))
67 |       this_mask.view(-1)[indices] = 0
68 |       this_mask = torch.repeat_interleave(this_mask, self.block_row, dim=0)
69 |       this_mask = torch.repeat_interleave(this_mask, self.block_col, dim=1)
70 |       mask[this_mask == 0] = 0
71 |     return mask
72 | 
73 | 
74 | def block_pruning(module, name, amount, block_row, block_col, n='fro'):
75 |   BlockPruningMethod.apply(module, name, amount=amount, block_row=block_row, block_col=block_col, n=n)
76 |   return module
77 | 


--------------------------------------------------------------------------------
/are_16_heads/itp/submit.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import subprocess
 3 | import random
 4 | import string
 5 | import os
 6 | 
 7 | template = \
 8 | """
 9 | description: Are16heads DeiT exp ({job_name})
10 | 
11 | target:
12 |   service: amlk8s
13 |   # run "pt target list amlk8s" to list the names of available AMLK8s targets
14 |   name: itpscusv100cl # TODO
15 |   vc: resrchvc
16 | 
17 | environment:
18 |   image: taoky/pytorch-1.8.1-cuda10.2-cudnn7-devel-enhanced:latest
19 |   # sh does not have "source", so use ". ./xxx.sh" here.
20 |   setup:
21 |   - . ./itp/setup.sh
22 | 
23 | code:
24 |   # upload the code
25 |   local_dir: $CONFIG_DIR/../../
26 | 
27 | storage:
28 |   teamdrive:
29 |     storage_account_name: hexnas
30 |     container_name: teamdrive
31 |     mount_dir: /mnt/data
32 |     local_dir: $CONFIG_DIR/../../../faketeamdrive/
33 | 
34 | 
35 | jobs:
36 | {jobs}
37 | """
38 | 
39 | job_template = \
40 | """- name: {job_name}
41 |   sku: G{gpu_cnt}
42 |   command:
43 |   - ./itp/run_itp.sh {function}
44 | """
45 | 
46 | func_to_job_name_dict = {
47 |   'iterative_pruning_base': 'D1009_are16heads_iterative_pruning_deit_base',
48 |   'finetune_many_base': 'D1013_are16heads_finetune_pruned_deit_base'
49 | }
50 | def main(mode):
51 |   function = sys.argv[2]
52 |   assert function in func_to_job_name_dict.keys()
53 |   
54 |   job_name = func_to_job_name_dict[function]  # !! Edit this
55 |   jobs = ""
56 |   jobs += job_template.format(
57 |       job_name=job_name, gpu_cnt=4, function=function
58 |   )
59 |   description = f"EdgeDL Are16heads exp ({job_name})"
60 | 
61 |   # ======================================================================================================
62 |   # Don't need to modify following code
63 |   result = template.format(
64 |       job_name=job_name,
65 |       jobs=jobs,
66 |   )
67 |   print(result)
68 | 
69 |   tmp_name = ''.join(random.choices(string.ascii_lowercase, k=6)) + job_name
70 |   tmp_name = os.path.join("./.tmp", tmp_name)
71 |   with open(tmp_name, "w") as fout:
72 |     fout.write(result)
73 |   if mode == 0:
74 |     subprocess.run(["amlt", "run", "-t", "local", "--use-sudo", tmp_name, "--devices", "all"])
75 |     input()
76 |   elif mode == 1:
77 |     subprocess.run(["amlt", "run", "-d", description, tmp_name, job_name])
78 | 
79 | 
80 | if __name__ == "__main__":
81 |   # example: python xx.py submit tiny 50
82 |   # tiny (sys.argv[2]) is deit_type
83 |   # 50 (sys.argv[2]) is sparsity
84 |   if len(sys.argv) == 2 and sys.argv[1] in ('--help', '-h'):
85 |     print('Example cmd: python <this_file> submit iterative_pruning_base')
86 |     exit()
87 |   mode = 2
88 |   if len(sys.argv) == 3 and sys.argv[1] == 'submit':
89 |     print("Submit (pt run)")
90 |     mode = 1
91 |   elif len(sys.argv) == 3 and sys.argv[1] == 'debug':
92 |     print("Debug dry run (pt run -t local)")
93 |     mode = 0
94 |   else:
95 |     print("Print only")
96 | 
97 |   main(mode)
98 | 


--------------------------------------------------------------------------------
/experiments/D1130_vino_quant_cnn_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | import subprocess
 5 | 
 6 | sys.path.insert(0, f'{os.path.dirname(sys.argv[0])}/..')
 7 | from benchmark.openvino.vino_cli import openvino_benchmark
 8 | 
 9 | MODEL_LIST = [
10 |     'mobilenet-v1-1.0-224-tf',
11 |     'mobilenet-v2-1.0-224',
12 |     'shufflenet-v2-x1.0',
13 |     'inception-resnet-v2-tf',
14 |     'efficientnet-b0',
15 |     'resnet-34-pytorch',
16 |     'resnet-50-tf', 
17 | ]
18 | 
19 | 
20 | class ModelExporter:
21 |     def __init__(self, model_list, download_model_dir, ir_model_dir):
22 |         self.model_list = model_list
23 |         self.download_model_dir = download_model_dir
24 |         self.ir_model_dir = ir_model_dir
25 |         pass
26 | 
27 |     def download(self):
28 |         subprocess.run(
29 |             f'python $VINO_DOWNLOADER --name={",".join(self.model_list)} --precisions=FP32 --output_dir={self.download_model_dir}', shell=True)
30 | 
31 |     def convert(self):
32 |         subprocess.run(
33 |             f'python $VINO_CONVERTER --name={",".join(self.model_list)} --precisions=FP32 --download_dir={self.download_model_dir} --output_dir={self.ir_model_dir}', shell=True)
34 | 
35 |     def quantize(self):
36 |         subprocess.run(
37 |             f'python $VINO_QUANTER --name={",".join(self.model_list)} --model_dir={self.ir_model_dir} --dataset_dir=datasets/imagenet2012 --output_dir={self.ir_model_dir} --precisions=FP32-INT8', shell=True)
38 | 
39 |     def benchmark(self):
40 |         print('========== Benchmarking model performance on CPU with 1 thread')
41 |         latency_list_fp32 = []
42 |         latency_list_int8 = []
43 | 
44 |         for name in self.model_list:
45 |             print(f'===== Testing {name} =====')
46 |             model_path_fp32 = os.path.join(self.ir_model_dir, 'public', name, 'FP32', f'{name}.xml')
47 |             model_path_int8 = os.path.join(self.ir_model_dir, 'public', name, 'FP32-INT8', f'{name}.xml')
48 | 
49 |             latency_fp32 = openvino_benchmark('$VINO_BENCHMARK_APP', model_path_fp32, niter=30, num_threads=1, batch_size=1)
50 |             latency_int8 = openvino_benchmark('$VINO_BENCHMARK_APP', model_path_int8, niter=30, num_threads=1, batch_size=1)
51 | 
52 |             latency_list_fp32.append(latency_fp32)
53 |             latency_list_int8.append(latency_int8)
54 | 
55 |         print('== SUMMARY ==')
56 |         print(self.model_list)
57 |         print('FP32 Latency:')
58 |         print([round(v, 2) for v in latency_list_fp32])
59 |         print('INT8 Latency:')
60 |         print([round(v, 2) for v in latency_list_int8])
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     model_exporter = ModelExporter(
65 |         MODEL_LIST, 'models/vino_model/download', 'models/vino_model/ir')
66 |     model_exporter.download()
67 |     model_exporter.convert()
68 |     model_exporter.quantize()
69 |     model_exporter.benchmark()


--------------------------------------------------------------------------------
/benchmark/tensorrt/calibrator.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | 
17 | import tensorrt as trt
18 | import os
19 | 
20 | import pycuda.driver as cuda
21 | import pycuda.autoinit
22 | from PIL import Image
23 | import numpy as np
24 | 
25 | class DummyCalibrator(trt.IInt8EntropyCalibrator2):
26 |     def __init__(self, training_data, cache_file='tmp.cache', batch_size=64):
27 |         # Whenever you specify a custom constructor for a TensorRT class,
28 |         # you MUST call the constructor of the parent explicitly.
29 |         trt.IInt8EntropyCalibrator2.__init__(self)
30 | 
31 |         self.cache_file = None
32 | 
33 |         # Every time get_batch is called, the next batch of size batch_size will be copied to the device and returned.
34 |         self.data = training_data
35 |         self.batch_size = batch_size
36 |         self.current_index = 0
37 | 
38 |         # Allocate enough memory for a whole batch.
39 |         self.device_input = cuda.mem_alloc(self.data[0].nbytes * self.batch_size)
40 | 
41 |     def get_batch_size(self):
42 |         return self.batch_size
43 | 
44 |     # TensorRT passes along the names of the engine bindings to the get_batch function.
45 |     # You don't necessarily have to use them, but they can be useful to understand the order of
46 |     # the inputs. The bindings list is expected to have the same ordering as 'names'.
47 |     def get_batch(self, names):
48 |         if self.current_index + self.batch_size > self.data.shape[0]:
49 |             return None
50 | 
51 |         current_batch = int(self.current_index / self.batch_size)
52 |         # if current_batch % 10 == 0:
53 |         #     print("Calibrating batch {:}, containing {:} images".format(current_batch, self.batch_size))
54 | 
55 |         batch = self.data[self.current_index:self.current_index + self.batch_size].ravel()
56 |         cuda.memcpy_htod(self.device_input, batch)
57 |         self.current_index += self.batch_size
58 |         return [self.device_input]
59 | 
60 | 
61 |     def read_calibration_cache(self):
62 |         pass
63 |         # # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
64 |         # if os.path.exists(self.cache_file):
65 |         #     with open(self.cache_file, "rb") as f:
66 |         #         return f.read()
67 | 
68 |     def write_calibration_cache(self, cache):
69 |         # with open(self.cache_file, "wb") as f:
70 |         #     f.write(cache)
71 |         pass
72 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/modules/gelu2relu.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from nn_pruning.model_patcher import ModelPatcher
 5 | 
 6 | 
 7 | class GeLU2ReLU(nn.Module):
 8 |     # There are two ways to specify how the module will move progressively from a GeLU to a ReLU
 9 |     # If you give a non-None schedule_callback, steps won't be used.
10 |     # It must be a function that returns a dictionary containing at least two keys:
11 |     #  - mix : moving from 1.0 to 0.0 , it is the lerp factor between LayerNorm and NoNorm
12 |     # If you don't specify a schedule_callback, each call to forward will count as a step, and in 'steps' steps
13 |     # it will move to a GeLU to a ReLU
14 |     def __init__(self,
15 |                  steps = 5000,
16 |                  schedule_callback = None):
17 |         super().__init__()
18 |         self.schedule_callback = schedule_callback
19 | 
20 |         if self.schedule_callback is None:
21 |             self.steps = steps
22 |             self.mix_step = 1 / self.steps
23 |             self.mix = 1.0
24 |         else:
25 |             self.steps = None
26 |             self.mix_step = None
27 |             self.mix = None
28 | 
29 |     def forward(self, batch):
30 |         if self.schedule_callback is not None:
31 |             d = self.schedule_callback()
32 |             mix = d["mix"]
33 |         else:
34 |             if self.training:
35 |                 mix = self.mix
36 |             else:
37 |                 mix = 0
38 | 
39 |         if mix == 0:
40 |             ret = F.relu(batch)
41 |         else:
42 |             g = F.gelu(batch)
43 |             r = F.relu(batch)
44 |             ret = torch.lerp(r, g, mix)
45 | 
46 |         if self.training:
47 |             if self.schedule_callback is None:
48 |                 self.mix = max(0.0, self.mix - self.mix_step)
49 | 
50 |         return ret
51 | 
52 | class GeLU2ReLUModelPatcher(ModelPatcher):
53 |     def __init__(self,
54 |                  steps = 5000,
55 |                  schedule_callback = None):
56 |         super().__init__(all_match=True)
57 |         self.steps = steps
58 |         self.schedule_callback = schedule_callback
59 |         self.names = ["intermediate_act_fn", "transform_act_fn", "activation_fn"]
60 | 
61 |     def is_patchable(self, module_name, module, raiseError):
62 |         for name in self.names:
63 |             if hasattr(module, name):
64 |                 return True
65 |         return False
66 | 
67 |     def new_child_module(self, child_module_name, child_module, patch_info):
68 |         fn = GeLU2ReLU(steps=self.steps,
69 |                        schedule_callback=self.schedule_callback)
70 |         patched = True
71 |         for name in self.names:
72 |             if hasattr(child_module, name):
73 |                 setattr(child_module, name, fn)
74 |                 patched = True
75 | 
76 |         assert(patched)
77 |         # We don't return a new child module, we change it in place, we return the module itself to let the patcher update stats
78 |         return child_module
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/modules/AmpereRework.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 79,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stdout",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "tensor([[0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.],\n",
13 |       "        [0., 1., 1., 0., 0., 0., 1., 1., 1., 0., 0., 1.],\n",
14 |       "        [1., 1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1.],\n",
15 |       "        [1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0.],\n",
16 |       "        [0., 0., 1., 1., 0., 1., 1., 0., 1., 0., 0., 1.],\n",
17 |       "        [0., 1., 1., 0., 1., 0., 1., 0., 0., 1., 1., 0.],\n",
18 |       "        [0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.],\n",
19 |       "        [1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 0., 1.],\n",
20 |       "        [1., 1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1.],\n",
21 |       "        [0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0.],\n",
22 |       "        [1., 0., 0., 1., 0., 1., 1., 0., 1., 0., 1., 0.]])\n"
23 |      ]
24 |     }
25 |    ],
26 |    "source": [
27 |     "import torch\n",
28 |     "\n",
29 |     "from nn_pruning.modules.binarizer import MagnitudeBinarizer, ThresholdBinarizer, TopKBinarizer\n",
30 |     "\n",
31 |     "def ampere_mask_threshold(mask_scores, threshold, sigmoid, train):\n",
32 |     "    assert((mask_scores.shape[1] % 4) == 0)\n",
33 |     "        \n",
34 |     "    mask_scores_4 = mask_scores.reshape(mask_scores.shape[0], mask_scores.shape[1] // 4, 4)\n",
35 |     "    top = torch.topk(mask_scores_4, 2, dim=2, largest=True)    \n",
36 |     "    top_mask = torch.zeros_like(mask_scores_4, device=mask_scores.device)        \n",
37 |     "    top_mask = top_mask.scatter(2, top.indices, True)        \n",
38 |     "    top_mask = top_mask.reshape_as(mask_scores)\n",
39 |     "    \n",
40 |     "    if train:\n",
41 |     "        mask = ThresholdBinarizer.apply(mask_scores, threshold, sigmoid)    \n",
42 |     "        ret = torch.max(mask, top_mask)\n",
43 |     "    else:\n",
44 |     "        ret = top_mask\n",
45 |     "    \n",
46 |     "    return ret\n",
47 |     "    \n",
48 |     "    \n",
49 |     "scores = torch.randn(11,12)\n",
50 |     "\n",
51 |     "mask = ampere_mask_threshold(scores, 0.9, True, False)\n",
52 |     "\n",
53 |     "assert(mask.shape == scores.shape)\n",
54 |     "\n",
55 |     "print(mask)\n"
56 |    ]
57 |   },
58 |   {
59 |    "cell_type": "code",
60 |    "execution_count": null,
61 |    "metadata": {},
62 |    "outputs": [],
63 |    "source": []
64 |   }
65 |  ],
66 |  "metadata": {
67 |   "kernelspec": {
68 |    "display_name": "Python 3",
69 |    "language": "python",
70 |    "name": "python3"
71 |   },
72 |   "language_info": {
73 |    "codemirror_mode": {
74 |     "name": "ipython",
75 |     "version": 3
76 |    },
77 |    "file_extension": ".py",
78 |    "mimetype": "text/x-python",
79 |    "name": "python",
80 |    "nbconvert_exporter": "python",
81 |    "pygments_lexer": "ipython3",
82 |    "version": "3.8.5"
83 |   }
84 |  },
85 |  "nbformat": 4,
86 |  "nbformat_minor": 4
87 | }
88 | 


--------------------------------------------------------------------------------
/deit_pruning/src/latency_model.py:
--------------------------------------------------------------------------------
 1 | import json 
 2 | from sklearn.metrics import mean_squared_error
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.ensemble import RandomForestRegressor
 5 | import numpy as np
 6 | import pickle
 7 | from sklearn.metrics import mean_squared_error
 8 | import shutil,json
 9 | def get_accuracy(y_pred,y_true,threshold=0.01):
10 |     a=(y_true-y_pred)/y_true 
11 |     c=abs(y_true-y_pred)
12 | 
13 |     b=(np.where(abs(a)<=threshold ) )
14 |     return len(b[0])/len(y_true)
15 |    
16 | 
17 | 
18 | def lat_metrics(y_pred,y_true):
19 |     rmspe = (np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))) * 100
20 |     rmse=np.sqrt(mean_squared_error(y_pred,y_true))    
21 |     acc5=get_accuracy(y_pred,y_true,threshold=0.05)
22 |     acc10=get_accuracy(y_pred,y_true,threshold=0.10)
23 |     acc15=get_accuracy(y_pred,y_true,threshold=0.15)
24 |    
25 | 
26 |     return rmse,rmspe,rmse/np.mean(y_true),acc5,acc10,acc15
27 | def get_feature(fe):
28 |     layers=fe.split('-')
29 |     X=[]
30 |     for layer in layers:
31 |         items=layer.split('_')
32 |         h=float(items[1])
33 |         d=float(items[-1])
34 |         X.append(h)
35 |         X.append(d)
36 |     return X
37 | 
38 | 
39 | def get_latency(filename):
40 |     X=[]
41 |     Y=[]
42 |     f1=open("latency.csv",'w')
43 |     with open(filename,'r') as fw:
44 |         dicts=json.load(fw)
45 |         for mid in dicts:
46 |             #print(mid,dicts[mid])
47 |             fe=mid.split('\\')[-1].replace(".onnx","")
48 |             data=dicts[mid]
49 |             items=data.split('\r\n')
50 |            # print(items)
51 |             avg=float(items[-3].split(': ')[-1].replace(" us",""))
52 |             x=get_feature(fe)
53 |             print(fe,avg)
54 |             X.append(x)
55 |             Y.append(avg)
56 |             f1.write(fe+','+str(avg)+'\n')
57 |     return X,Y
58 | 
59 | def get_model(filename):
60 |     X,Y=get_latency(filename)
61 |     print(len(X))
62 |     trainx, testx, trainy, testy = train_test_split(
63 |                     X, Y, test_size=0.2, random_state=10
64 |                 )
65 | 
66 |     print(min(Y),max(Y),np.average(Y))
67 | 
68 | 
69 |     model=RandomForestRegressor(max_depth=70,n_estimators=320,min_samples_leaf=1,min_samples_split=2,
70 |                                             max_features=8, oob_score=True,random_state=10)
71 | 
72 |     model.fit(trainx,trainy)
73 |     predicts=model.predict(testx)
74 |     rmse,rmspe,error,acc5,acc10,_=lat_metrics(predicts,testy)
75 |     print(rmse,rmspe,error,acc5,acc10)
76 |     for i in range(len(testy)):
77 |         print(testy[i],predicts[i],(testy[i]-predicts[i])/testy[i])
78 | 
79 |     model.fit(X, Y)
80 |     with open("latency/latency_model.pkl", "wb") as f:
81 |                     pickle.dump(model, f)
82 | 
83 | def predict(feature):
84 |     with open('latency/latency_model.pkl', "rb") as f: 
85 |         model = pickle.load(f)
86 |         return model.predict(feature)[0]
87 | 
88 | #get_model("latency/latency_bench_newt13.json")
89 | fe=get_feature('h_4_d_0.4-h_4_d_0.4-h_4_d_0.4-h_4_d_0.4')
90 | print(fe)
91 | latency=predict([fe])
92 | 
93 | print(latency)


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/model_patcher.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | class ModelPatcher:
 4 |     def __init__(self, all_match=False):
 5 |         self.patterns = []
 6 |         self.stats = {"patched": 0, "patched_names":set()}
 7 |         self.all_match = all_match
 8 | 
 9 |     def is_patchable(self, child_module_name, child_module, raiseError):
10 |         # Implement in subclass if needed
11 |         return True
12 | 
13 |     def new_child_module(self, child_module_name, child_module, patch_info):
14 |         # Just return None if the module does not need to be patched, or a new module.
15 |         raise NotImplementedError("Implement this in subclasses")
16 | 
17 |     def get_patchable_layers(self, model, number_rewrite=True):
18 |         # Layer names (displayed as regexps)")
19 |         ret = {}
20 |         for k, v in model.named_modules():
21 |             if self.is_patchable(k, v, raiseError=False):
22 |                 r = re.escape(k)
23 |                 r = re.sub(r"[0-9]+", "[0-9]+", r)
24 |                 if r not in ret:
25 |                     ret[r] = []
26 |                 ret[r].append(v)
27 | 
28 |         return ret
29 | 
30 |     def add_pattern(self, pattern, patch_info):
31 |         self.patterns.append(dict(pattern=pattern, patch_info=patch_info))
32 | 
33 |     def pattern_match(self, module_name):
34 |         if self.all_match:
35 |             return True, -1
36 |         for pattern_def in self.patterns:
37 |             if re.match(pattern_def["pattern"], module_name):
38 |                 return True, pattern_def["patch_info"]
39 |         return False, -1
40 | 
41 | 
42 | 
43 |     def replace_module(self, father, child_module_name, child_name, child_module, patch_info):
44 |         new_child_module = self.new_child_module(child_module_name, child_module, patch_info)
45 |         if new_child_module is not None:
46 |             self.stats["patched"] += 1
47 |             self.stats["patched_names"].add(child_module_name)
48 |             setattr(father, child_name, new_child_module)
49 | 
50 |     def needs_patch(self, model):
51 |         for k, v in model.named_modules():
52 |             if self.is_patchable(k, v, raiseError=True):
53 |                 return True
54 |         else:
55 |             return False
56 | 
57 |     def patch(self, model):
58 |         modules = {}
59 |         modified = False
60 |         if self.all_match and len(self.patterns) != 0:
61 |             Warning("all match is true, but there are some defined patterns, those will be ignored")
62 |         for k, v in model.named_modules():
63 |             modules[k] = v
64 |             match, patch_info = self.pattern_match(k)
65 |             if match and self.is_patchable(k, v, raiseError=True):
66 |                 parts = k.split(".")
67 |                 father_module_name = ".".join(parts[:-1])
68 |                 child_name = parts[-1]
69 |                 father = modules[father_module_name]
70 |                 self.replace_module(father, k, child_name, v, patch_info)
71 |                 modified = True
72 | 
73 |         if not modified:
74 |             print(
75 |                 "Warning: the patcher did not patch anything!"
76 |                 " Check patchable layers with `mp.get_patchable_layers(model)`"
77 |             )
78 | 
79 | 


--------------------------------------------------------------------------------
/experiments/D1130_tflite_gpu_r21_benchmark.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import subprocess
 3 | import os
 4 | import re
 5 | class ADB:
 6 |     def __init__(self, serino):
 7 |         self.serino = serino
 8 |     
 9 |     def push(self, src, dst):
10 |         subprocess.check_output(f'adb -s {self.serino} push {src} {dst}', shell=True)
11 | 
12 |     def pull(self, src, dst):
13 |         subprocess.check_output(f'adb -s {self.serino} pull {src} {dst}', shell=True)
14 | 
15 |     def remove(self, dst):
16 |         subprocess.check_output(f'adb -s {self.serino} shell rm {dst}', shell=True)
17 | 
18 |     def run_cmd(self, cmd):
19 |         result = subprocess.check_output(f'adb -s {self.serino} shell {cmd}', shell=True).decode('utf-8')
20 |         return result
21 | 
22 | def fetch_number(text: str, marker: str):
23 |     result = re.findall(f'{marker}\d+\.\d+|{marker}\d+', text)[0]
24 |     return float(result[len(marker):])
25 |     
26 | def main():
27 |     parser = argparse.ArgumentParser()
28 |     parser.add_argument('--model_dir', type=str, required=True, help='tflite model dir to test')
29 |     parser.add_argument('--serino', default='98281FFAZ009SV', type=str, help='phone serial number to test')
30 |     parser.add_argument('--precision', default=3, type=int, help='precision to print latency')
31 |     parser.add_argument('--dump_csv', action='store_true', dest='dump_csv', help='dump result to csv file')
32 |     args = parser.parse_args()
33 | 
34 |     adb = ADB(args.serino)
35 |     name_list = []
36 |     latency_list_f32 = []
37 |     latency_list_f16 = []
38 | 
39 |     if args.dump_csv:
40 |         with open('result_gpu_fp32.csv', 'a') as f:
41 |             f.write('model_name, fp32_ms\n')
42 |         with open('result_gpu_fp16.csv', 'a') as f:
43 |             f.write('model_name fp16_ms\n')
44 | 
45 |     for name in sorted(os.listdir(args.model_dir)):
46 |         f32_ms = 0
47 |         f16_ms = 0
48 |         try:
49 |             name_list.append(os.path.splitext(os.path.basename(name))[0])
50 |             model_path = os.path.join(args.model_dir, name)
51 |             dst_path = f'/sdcard/{name}'
52 |             adb.push(model_path, dst_path)
53 |             result_f32 = adb.run_cmd(f'"cd /data/local/tmp && ./benchmark_model_fixed_group_size --graph={dst_path} --use_gpu=true --precision=F32"')
54 |             result_f16 = adb.run_cmd(f'"cd /data/local/tmp && ./benchmark_model_fixed_group_size --graph={dst_path} --use_gpu=true --precision=F16"')
55 |             adb.remove(dst_path)
56 |             f32_ms = fetch_number(result_f32, 'comp_avg_ms=')
57 |             f16_ms = fetch_number(result_f16, 'comp_avg_ms=')
58 |         except:
59 |             pass
60 |         latency_list_f32.append(round(f32_ms, args.precision))
61 |         latency_list_f16.append(round(f16_ms, args.precision))
62 |         
63 |         print(name_list[-1], f32_ms, f16_ms)
64 |         if args.dump_csv:
65 |             with open('result_gpu_fp32.csv', 'a') as f:
66 |                 f.write(f'{name_list[-1]}, {latency_list_f32[-1]}\n')
67 |             with open('result_gpu_fp16.csv', 'a') as f:
68 |                 f.write(f'{name_list[-1]}, {latency_list_f16[-1]}\n')
69 | 
70 |     print('==== LATENCY SUMMARY ====')
71 |     print(name_list)
72 |     print('[F32 Latency]')
73 |     print(latency_list_f32)
74 |     print('[F16 Latency]')
75 |     print(latency_list_f16)
76 | 
77 | if __name__ == '__main__':
78 |     main()


--------------------------------------------------------------------------------
/are_16_heads/evaluate_iterative_pruned_deit.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | import torch.distributed as dist
 4 | from classifier_eval import evaluate
 5 | from util import build_dataset
 6 | from classifier_scoring import Accuracy
 7 | from pathlib import Path
 8 | from transformers import AutoModelForImageClassification
 9 | import os
10 | from logger import logger
11 | 
12 | def evaluate_one_model(model_path, dataset, batch_size, local_rank, num_workers):
13 |     is_main = local_rank == -1 or local_rank == 0
14 |     if is_main:
15 |         logger.info(f'*** Evaluating {model_path}***')
16 | 
17 |     model = AutoModelForImageClassification.from_pretrained(model_path)
18 |     if local_rank == -1:
19 |         device = torch.device('cuda')
20 |     else:
21 |         device = torch.device('cuda', local_rank)
22 |     model.to(device)
23 |     model = (model)
24 |     scorer = Accuracy()
25 |     accuracy = evaluate(
26 |         dataset,
27 |         model,
28 |         batch_size,
29 |         save_attention_probs=False,
30 |         print_head_entropy=False,
31 |         verbose=False,
32 |         scorer=scorer,
33 |         distributed=local_rank != -1,
34 |         num_workers=num_workers
35 |     )[scorer.name]
36 | 
37 |     if is_main:
38 |         logger.info("***** Pruning eval results *****")
39 |         logger.info(f"Accuracy\t{accuracy}")
40 |         accuracy_file_name = f'accuracy{int(accuracy * 10000)}.txt'
41 |         os.system(f'touch {os.path.join(model_path, accuracy_file_name)}')
42 | 
43 | 
44 | def main():
45 |     parser = argparse.ArgumentParser()
46 |     parser.add_argument('--local_rank', type=int, default=-1, help='local rank for distributed evaluating')
47 |     parser.add_argument('--data_path', type=Path, default='/data/data1/v-xudongwang/imagenet', help='imagenet2012 dataset path')
48 |     parser.add_argument('--model_path', type=Path, required=True, help='directory of models or a model to evalute')
49 |     parser.add_argument('--batch_size', type=int, default=500, help='evaluate batch size per gpu')
50 |     parser.add_argument('--eval_dir_of_models', action='store_true', help='evaludate all models in model_path')
51 |     parser.add_argument('--num_workers', default=8, type=int, help='dataloader number of workers')
52 |     args = parser.parse_args()
53 | 
54 |     dataset, _ = build_dataset(args.data_path, is_train=False, shuffle=False, return_dict=False)
55 | 
56 |     if args.local_rank != -1:
57 |         dist.init_process_group("nccl", rank=args.local_rank, world_size=torch.cuda.device_count())
58 | 
59 |     if not args.eval_dir_of_models:
60 |         evaluate_one_model(model_path=args.model_path / 'final', dataset=dataset, batch_size=args.batch_size, local_rank=args.local_rank, num_workers=args.num_workers)
61 |     else:
62 |         for model_name in sorted(os.listdir(args.model_path)):
63 |             model_path = args.model_path / model_name
64 |             if 'final' in os.listdir(model_path):
65 |                 model_path = model_path / 'final'
66 |                 if len(os.listdir(model_path)) < 3:
67 |                     evaluate_one_model(model_path=model_path, dataset=dataset, batch_size=args.batch_size, local_rank=args.local_rank, num_workers=args.num_workers)
68 |                 else:
69 |                     logger.info(os.listdir(model_path))
70 |                     logger.info(f"{model_name} already evaluated. Skip.")
71 | 
72 | if __name__ == '__main__':
73 |     main()


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/tests/test_patch2.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest import TestCase
 3 | from nn_pruning.patch_coordinator import SparseTrainingArguments, ModelPatchingCoordinator
 4 | from nn_pruning.modules.masked_nn import (
 5 |     GenericLinearPruningContextModule,
 6 |     BlockLinearPruningContextModule,
 7 |     SingleDimensionLinearPruningContextModule,
 8 | )
 9 | from transformers import AutoConfig, AutoModelForQuestionAnswering
10 | 
11 | import copy
12 | 
13 | class TestFun(TestCase):
14 |     def helper(self, sparse_args, model_name_or_path):
15 |         config = AutoConfig.from_pretrained(model_name_or_path)
16 |         model = AutoModelForQuestionAnswering.from_pretrained(model_name_or_path)
17 | 
18 |         device = "cuda"
19 |         cache_dir = None
20 |         logit_names = ["start_logits", "end_logits"]
21 |         teacher_constructor = AutoModelForQuestionAnswering
22 | 
23 |         coordinator = ModelPatchingCoordinator(sparse_args, device, cache_dir, model_name_or_path, logit_names, teacher_constructor)
24 | 
25 |         return config, model, coordinator
26 | 
27 |     def test_base(self):
28 |         sparse_args = SparseTrainingArguments.hybrid(20.0)
29 |         sparse_args.layer_norm_patch = True
30 |         sparse_args.gelu_patch = True
31 | 
32 |         ref_stats = {
33 |             "bert-base-uncased":  {"main": {"patched": 72},  "layer_norm": {"patched": 25}, "gelu": {"patched": 12}},
34 |             "bert-large-uncased": {"main": {"patched": 144}, "layer_norm": {"patched": 49}, "gelu": {"patched": 24}},
35 |             "facebook/bart-base": {"main": {"patched": 96},  "layer_norm": {"patched": 32}, "gelu": {"patched": 12}}
36 |         }
37 | 
38 |         for model_name_or_path in ref_stats.keys():
39 |             config, model, coordinator = self.helper(sparse_args, model_name_or_path)
40 | 
41 |             coordinator.patch_model(model)
42 | 
43 |             stats = copy.deepcopy(coordinator.stats)
44 | 
45 |             print(stats["main"])
46 |             for k in stats:
47 |                 del stats[k]["patched_names"]
48 | 
49 |             self.assertEqual(stats, ref_stats[model_name_or_path])
50 | 
51 |     def test_context_module(self):
52 |         sparse_args = SparseTrainingArguments.hybrid(20.0)
53 |         sparse_args.layer_norm_patch = True
54 |         sparse_args.gelu_patch = True
55 | 
56 |         ref_context_module = {
57 |             "bert-base-uncased":  {"generic": 60, "block": 48, "single": 12},
58 |             "bert-large-uncased": {"generic": 120, "block": 96, "single": 24},
59 |             "facebook/bart-base": {"generic": 84, "block": 72, "single": 12},
60 |         }
61 | 
62 |         for model_name_or_path in ref_context_module.keys():
63 |             config, model, coordinator = self.helper(sparse_args, model_name_or_path)
64 | 
65 |             coordinator.patch_model(model)
66 | 
67 |             context_module = {"generic": 0, "block": 0, "single": 0}
68 | 
69 |             for name, module in model.named_modules():
70 |                 if isinstance(module, GenericLinearPruningContextModule):
71 |                     context_module["generic"] += 1
72 |                     if isinstance(module, BlockLinearPruningContextModule):
73 |                         context_module["block"] += 1
74 |                     elif isinstance(module, SingleDimensionLinearPruningContextModule):
75 |                         context_module["single"] += 1
76 | 
77 |             self.assertEqual(context_module, ref_context_module[model_name_or_path])
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     unittest.main()
82 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | TASK=$1
  4 | OPTIONS="${@:2}"
  5 | function mobile_benchmark_vivo() {
  6 |     for model in `ls ${MODEL_DIR}`
  7 |     do
  8 |         echo "********************" && echo $model &&
  9 |         python tools.py mobile_benchmark \
 10 |         --model "${MODEL_DIR}/$model" \
 11 |         --num_runs=30 \
 12 |         --warmup_runs=30 \
 13 |         --num_threads=1 \
 14 |         --taskset_mask=c0 \
 15 |         --serial_number=0000028e2c780e4e \
 16 |         --benchmark_binary_dir="/data/local/tmp" \
 17 |         --no_root \
 18 |         $OPTIONS # --no_xnnpack
 19 |     done
 20 | }
 21 | 
 22 | function mobile_benchmark_pixel1() {
 23 |     for model in `ls ${MODEL_DIR}`
 24 |     do
 25 |         echo "********************" && echo $model &&
 26 |         python tools.py mobile_benchmark \
 27 |         --model "${MODEL_DIR}/$model" \
 28 |         --num_runs=20 \
 29 |         --warmup_runs=10 \
 30 |         --num_threads=1 \
 31 |         --taskset_mask=c \
 32 |         --serial_number=FA6A70311471 \
 33 |         --benchmark_binary_dir="/data/local/tmp" \
 34 |         --no_root \
 35 |         $OPTIONS
 36 |     done
 37 | }
 38 | 
 39 | function mobile_benchmark_mi() {
 40 |     for model in `ls ${MODEL_DIR}`
 41 |     do
 42 |         echo "********************" && echo $model &&
 43 |         python tools.py mobile_benchmark \
 44 |         --model "${MODEL_DIR}/$model" \
 45 |         --num_runs=30 \
 46 |         --warmup_runs=30 \
 47 |         --num_threads=1 \
 48 |         --taskset_mask=70 \
 49 |         --serial_number=2458c476 \
 50 |         --benchmark_binary_dir="/data/local/tmp" \
 51 |         --no_root \
 52 |         $OPTIONS # --no_xnnpack
 53 |     done
 54 | }
 55 | 
 56 | function mobile_benchmark_pixel4() {
 57 |     for model in `ls ${MODEL_DIR}`
 58 |     do
 59 |         echo "********************" && echo $model &&
 60 |         python tools.py mobile_benchmark \
 61 |         --model "${MODEL_DIR}/$model" \
 62 |         --num_runs=20 \
 63 |         --warmup_runs=15 \
 64 |         --num_threads=1 \
 65 |         --taskset_mask=70 \
 66 |         $OPTIONS
 67 |     done
 68 | }
 69 | 
 70 | function mobile_benchmark_pixel4_thread4() {
 71 |     for model in `ls ${MODEL_DIR}`
 72 |     do
 73 |         echo "********************" && echo $model &&
 74 |         python tools.py mobile_benchmark \
 75 |         --model "${MODEL_DIR}/$model" \
 76 |         --num_runs=30 \
 77 |         --warmup_runs=30 \
 78 |         --num_threads=4 \
 79 |         --taskset_mask=f0
 80 |     done
 81 | }
 82 | 
 83 | function mobile_benchmark_pixel4_thread8() {
 84 |     for model in `ls ${MODEL_DIR}`
 85 |     do
 86 |         echo "********************" && echo $model &&
 87 |         python tools.py mobile_benchmark \
 88 |         --model "${MODEL_DIR}/$model" \
 89 |         --num_runs=30 \
 90 |         --warmup_runs=30 \
 91 |         --num_threads=8 \
 92 |         --taskset_mask=ff
 93 |     done
 94 | }
 95 | 
 96 | function onnx_benchmark() {
 97 |     for model in `ls ${MODEL_DIR}`
 98 |     do
 99 |         python tools.py server_benchmark \
100 |         --model "${MODEL_DIR}/$model" \
101 |         --num_runs=200 \
102 |         --top=30 \
103 |         --warmup_runs=30 \
104 |         --precision=3 \
105 |         $OPTIONS
106 |     done
107 | }
108 | 
109 | 
110 | function trt_benchmark() {
111 |     for model in `ls ${MODEL_DIR}`
112 |     do 
113 |         python benchmark/tensorrt/onnx_trt_test.py \
114 |         --model "${MODEL_DIR}/$model" \
115 |         --num_runs=100 \
116 |         --warmup_runs=30 \
117 |         --top=20 \
118 |         $OPTIONS
119 |     done
120 | }
121 | $TASK ""
122 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/modules/quantization_config.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | import torch
  4 | from torch.quantization import (
  5 |     FakeQuantize,
  6 |     MinMaxObserver,
  7 |     MovingAverageMinMaxObserver,
  8 |     QConfig,
  9 |     float_qparams_weight_only_qconfig,
 10 |     get_default_qat_qconfig,
 11 |     get_default_qconfig,
 12 | )
 13 | 
 14 | # TensorFlow Lite Quantization Specs
 15 | # https://www.tensorflow.org/lite/performance/quantization_spec?hl=en
 16 | # For activations: int8 asymmetric per-tensor [-128, 127] range
 17 | # For weights: int8 symmetric per-tensor [-127, 127] range
 18 | _TFLITE_QCONFIG = QConfig(
 19 |     activation=MovingAverageMinMaxObserver.with_args(
 20 |         dtype=torch.qint8,
 21 |         quant_min=-128,
 22 |         quant_max=127,
 23 |         qscheme=torch.per_tensor_affine,
 24 |     ),
 25 |     weight=MinMaxObserver.with_args(
 26 |         dtype=torch.qint8, quant_min=-127, quant_max=127, qscheme=torch.per_tensor_symmetric
 27 |     ),
 28 | )
 29 | _TFLITE_QAT_QCONFIG = QConfig(
 30 |     activation=FakeQuantize.with_args(
 31 |         observer=MovingAverageMinMaxObserver,
 32 |         dtype=torch.qint8,
 33 |         quant_min=-128,
 34 |         quant_max=127,
 35 |         qscheme=torch.per_tensor_affine,
 36 |     ),
 37 |     weight=FakeQuantize.with_args(
 38 |         observer=MinMaxObserver, dtype=torch.qint8, quant_min=-127, quant_max=127, qscheme=torch.per_tensor_symmetric
 39 |     ),
 40 | )
 41 | _ONNX_QCONFIG = QConfig(
 42 |     activation=MinMaxObserver.with_args(
 43 |         quant_min=0,
 44 |         quant_max=255,
 45 |         reduce_range=True,
 46 |     ),
 47 |     weight=MinMaxObserver.with_args(
 48 |         quant_min=-128, quant_max=127, dtype=torch.qint8, reduce_range=False, qscheme=torch.per_tensor_symmetric
 49 |     ),
 50 | )
 51 | _ONNX_QAT_QCONFIG = QConfig(
 52 |     activation=FakeQuantize.with_args(
 53 |         observer=MinMaxObserver,
 54 |         quant_min=0,
 55 |         quant_max=255,
 56 |         reduce_range=True,
 57 |     ),
 58 |     weight=FakeQuantize.with_args(
 59 |         observer=MinMaxObserver,
 60 |         quant_min=-128,
 61 |         quant_max=127,
 62 |         dtype=torch.qint8,
 63 |         reduce_range=False,
 64 |         qscheme=torch.per_tensor_symmetric,
 65 |     ),
 66 | )
 67 | 
 68 | _QCONFIG_DICT = {"object_type": [(torch.nn.Embedding, float_qparams_weight_only_qconfig)]}
 69 | 
 70 | _QAT_QCONFIG_DICT = {"object_type": [(torch.nn.Embedding, float_qparams_weight_only_qconfig)]}
 71 | 
 72 | _QCONFIG_MAPPING = {
 73 |     "default": "fbgemm",
 74 |     "mobile": "qnnpack",
 75 |     "fbgemm": "fbgemm",
 76 |     "qnnpack": "qnnpack",
 77 |     "tflite": _TFLITE_QCONFIG,
 78 |     "onnx": _ONNX_QCONFIG,
 79 | }
 80 | 
 81 | _QAT_QCONFING_MAPPING = {
 82 |     "default": "fbgemm",
 83 |     "mobile": "qnnpack",
 84 |     "fbgemm": "fbgemm",
 85 |     "qnnpack": "qnnpack",
 86 |     "tflite": _TFLITE_QAT_QCONFIG,
 87 |     "onnx": _ONNX_QAT_QCONFIG,
 88 | }
 89 | 
 90 | 
 91 | def create_qconfig(qconfig_name, mode):
 92 |     mode = mode.lower()
 93 |     if mode not in ["static", "qat"]:
 94 |         raise ValueError(f"mode must either be static or qat, here: {mode}")
 95 | 
 96 |     mapping = _QCONFIG_MAPPING if mode == "static" else _QAT_QCONFING_MAPPING
 97 |     qconfig = mapping.get(qconfig_name, None)
 98 |     if isinstance(qconfig, str):
 99 |         qconfig_function = get_default_qconfig if mode == "static" else get_default_qat_qconfig
100 |         qconfig = qconfig_function(qconfig)
101 |     if qconfig is None:
102 |         raise ValueError(f"qconfig name must be in {mapping.keys()}, but {qconfig_name} was provided")
103 |     qconfig_dict = _QCONFIG_DICT if mode == "static" else _QAT_QCONFIG_DICT
104 |     qconfig_dict = copy.deepcopy(qconfig_dict)
105 |     qconfig_dict[""] = qconfig
106 |     return qconfig_dict
107 | 


--------------------------------------------------------------------------------
/modeling/models/cnn_zoo.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow_hub as hub
  3 | from .squeezenet import SqueezeNet
  4 | from . import shufflenet
  5 | from . import shufflenetv2
  6 | from .proxylessnas import get_proxylessnas
  7 | from .mnasnet import mnasnet_a1
  8 | import os
  9 | 
 10 | 
 11 | def add_keras_input_layer(model, input_shape, batch_size=1):
 12 |     import tensorflow as tf
 13 |     return tf.keras.Sequential([
 14 |         tf.keras.layers.InputLayer(input_shape, batch_size=batch_size),
 15 |         model
 16 |     ])
 17 |     
 18 | def get_mobilenetv1():
 19 |     model = tf.keras.applications.MobileNet()
 20 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 21 | 
 22 | def get_mobilenetv2():
 23 |     model = tf.keras.applications.MobileNetV2()
 24 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 25 | 
 26 | def get_mobilenetv3small():
 27 |     model = tf.keras.applications.MobileNetV3Small()
 28 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 29 | 
 30 | def get_mobilenetv3large():
 31 |     model = tf.keras.applications.MobileNetV3Large()
 32 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 33 | 
 34 | def get_squeezenet():
 35 |     model = SqueezeNet(image_size=[224, 224, 3], batch_size=1)
 36 |     return model
 37 | 
 38 | def get_inception_resnetv2():
 39 |     model = tf.keras.applications.InceptionResNetV2() # input_shape=[299, 299, 3]
 40 |     return add_keras_input_layer(model, [299, 299, 3], 1) 
 41 | 
 42 | def get_inceptionv3():
 43 |     model = tf.keras.applications.InceptionV3() # input_shape=[299, 299, 3]
 44 |     return add_keras_input_layer(model, [299, 299, 3], 1)
 45 | 
 46 | def get_efficientnetb0():
 47 |     model = tf.keras.applications.EfficientNetB0()
 48 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 49 | 
 50 | def get_efficientnetb0_lite():
 51 |     model = hub.KerasLayer("https://tfhub.dev/tensorflow/efficientnet/lite0/classification/2")
 52 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 53 |     
 54 | def get_resnet50():
 55 |     model = tf.keras.applications.ResNet50()
 56 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 57 | 
 58 | def get_resnet50v2():
 59 |     model = tf.keras.applications.ResNet50V2()
 60 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 61 | 
 62 | def get_shufflenet():
 63 |     model = shufflenet.shufflenet_g1_w1()
 64 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 65 | 
 66 | def get_shufflenetv2():
 67 |     model = shufflenetv2.shufflenetv2_w1()
 68 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 69 | 
 70 | def get_proxyless_mobile():
 71 |     model = get_proxylessnas('mobile')
 72 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 73 | 
 74 | def get_mnasneta1():
 75 |     model = mnasnet_a1()
 76 |     return add_keras_input_layer(model, [224, 224, 3], 1)
 77 | 
 78 | cnn_zoo_dict = {
 79 |     'mobilenetv1': get_mobilenetv1,
 80 |     'mobilenetv2': get_mobilenetv2,
 81 |     'mobilenetv3small': get_mobilenetv3small,
 82 |     'mobilenetv3large': get_mobilenetv3large,
 83 |     'squeezenet': get_squeezenet,
 84 |     'inception_resnetv2': get_inception_resnetv2,
 85 |     'inceptionv3': get_inceptionv3,
 86 |     'efficientnetb0': get_efficientnetb0,
 87 |     'efficientnetb0_lite': get_efficientnetb0_lite,
 88 |     'resnet50': get_resnet50,
 89 |     'resnet50v2': get_resnet50v2,
 90 |     'shufflenet': get_shufflenet,
 91 |     'shufflenetv2': get_shufflenetv2,
 92 |     'proxyless_mobile': get_proxyless_mobile, 
 93 |     'mnasneta1': get_mnasneta1
 94 | }
 95 | 
 96 | def get_model(model_name):
 97 |     if model_name not in cnn_zoo_dict.keys():
 98 |         raise ValueError(f'{model_name} not supported')
 99 |     return cnn_zoo_dict[model_name]()
100 | 
101 | def save_all(output_dir):
102 |     for model_name in cnn_zoo_dict.keys():
103 |         model = cnn_zoo_dict[model_name]()
104 |         model.save(os.path.join(output_dir, model_name + '.tf'))


--------------------------------------------------------------------------------
/are_16_heads/finetune.py:
--------------------------------------------------------------------------------
  1 | from argparse import ArgumentParser
  2 | import torch
  3 | from torch.nn.parallel import DistributedDataParallel as DDP
  4 | from logger import logger
  5 | import os
  6 | import random
  7 | import numpy as np
  8 | 
  9 | import classifier_args
 10 | import classifier_training as training
 11 | from classifier_scoring import Accuracy
 12 | from classifier_eval import evaluate
 13 | from util import build_dataset
 14 | 
 15 | 
 16 | 
 17 | def main():
 18 |     parser = classifier_args.get_base_parser()
 19 |     classifier_args.training_args(parser)
 20 |     classifier_args.eval_args(parser)
 21 |     classifier_args.finetune_args(parser)
 22 | 
 23 |     args = parser.parse_args()
 24 | 
 25 |     # ==== SETUP DEVICE ====
 26 |     # This code only support distributed data parallel & gpu training
 27 |     torch.cuda.set_device(args.local_rank)
 28 |     device = torch.device("cuda", args.local_rank)
 29 |     n_gpu = 1
 30 |     logger.info(
 31 |         f"device: {device} n_gpu: {n_gpu}, "
 32 |         f"distributed training: {bool(args.local_rank != -1)}, "
 33 |     )
 34 | 
 35 |     # ==== SETUP EXPERIMENT ====
 36 | 
 37 |     def set_seeds(seed, n_gpu):
 38 |         random.seed(seed)
 39 |         np.random.seed(seed)
 40 |         torch.manual_seed(seed)
 41 |         if n_gpu > 0:
 42 |             torch.cuda.manual_seed_all(seed)
 43 | 
 44 |     set_seeds(args.seed, n_gpu)
 45 | 
 46 |     os.makedirs(args.output_dir, exist_ok=True)
 47 | 
 48 |     # ==== PREPARE DATA ====
 49 |     train_dataset, _ = build_dataset(args.data_dir, is_train=True, shuffle=True, return_dict=True)
 50 |     eval_dataset, _ = build_dataset(args.data_dir, is_train=False, shuffle=False, return_dict=False)
 51 | 
 52 |     # ==== PREPARE TRAINING ====
 53 |     training_args = training.get_training_args(
 54 |         learning_rate=args.finetune_learning_rate,
 55 |         micro_batch_size=args.train_batch_size,
 56 |         n_steps=args.n_finetune_steps_after_pruning,
 57 |         n_epochs=args.n_finetune_epochs_after_pruning,
 58 |         local_rank=args.local_rank,
 59 |         num_workers=args.num_workers,
 60 |         output_dir=args.output_dir,
 61 |     )
 62 |     is_main = args.local_rank == -1 or args.local_rank == 0
 63 | 
 64 |     # ==== PREPARE MODEL ====
 65 |     def get_deit_model(model_path):
 66 |         from transformers import ViTForImageClassification
 67 |         model = ViTForImageClassification.from_pretrained(model_path)
 68 |         model.to(device)
 69 |         return model
 70 | 
 71 |     model = get_deit_model(args.finetune_model_path)
 72 | 
 73 |     # ==== START TRAINING ====
 74 |     training.huggingface_trainer_train(
 75 |         train_dataset=train_dataset,
 76 |         model=model,
 77 |         args=training_args
 78 |     )
 79 | 
 80 |     if is_main:
 81 |         model.save_pretrained(os.path.join(args.output_dir, 'final_finetuned'))
 82 | 
 83 |     # ==== EVALUATE ====
 84 |     if args.eval_finetuned:
 85 |         # Print the pruning descriptor
 86 |         if is_main:
 87 |             logger.info("*** Evaluating ***")
 88 |         # Eval accuracy
 89 |         scorer = Accuracy()
 90 |         accuracy = evaluate(
 91 |             eval_dataset,
 92 |             model,
 93 |             args.eval_batch_size,
 94 |             save_attention_probs=args.save_attention_probs,
 95 |             print_head_entropy=False,
 96 |             device=device,
 97 |             verbose=False,
 98 |             disable_progress_bar=args.no_progress_bars,
 99 |             scorer=scorer,
100 |             distributed=args.local_rank != -1,
101 |             num_workers=args.num_workers
102 |         )[scorer.name]
103 | 
104 |         if is_main:
105 |             logger.info("***** Pruning eval results *****")
106 |             logger.info(f"Accuracy\t{accuracy}")
107 |             accuracy_file_name = f'accuracy{int(accuracy * 10000)}.txt'
108 |             os.system(f'touch {os.path.join(args.output_dir, "final_finetuned", accuracy_file_name)}')
109 | 
110 | 
111 | if __name__ == '__main__':
112 |     main()


--------------------------------------------------------------------------------
/deit_pruning/src/onnx_export.py:
--------------------------------------------------------------------------------
  1 | # from transformers.convert_graph_to_onnx import convert
  2 | from pathlib import Path
  3 | 
  4 | # missing tokenizer, so just cannot use convert directly
  5 | # convert(framework="pt", model="results/playground/final/", output=Path("results/playground/final/output.onnx"), opset=13)
  6 | 
  7 | import torch
  8 | import argparse
  9 | from model import SwiftBERTOutput
 10 | 
 11 | from nn_pruning.inference_model_patcher import optimize_model as nn_optimize
 12 | 
 13 | from onnxruntime.transformers.optimizer import optimize_model
 14 | from onnxruntime.transformers.onnx_model_bert import BertOptimizationOptions
 15 | from onnxruntime.quantization import QuantizationMode, quantize, quantize_dynamic, QuantType
 16 | 
 17 | def quant(use_original=False):
 18 |   if use_original:
 19 |     input_model = f'{output_name}.onnx'
 20 |     output_model = f'{output_name}-quant.onnx'
 21 |   else:
 22 |     input_model = f'{output_name}-opt.onnx'
 23 |     output_model = f'{output_name}-opt-quant.onnx'
 24 |   quantize_dynamic(str(args.model_dir / input_model), 
 25 |                    str(args.model_dir / output_model), 
 26 |                    weight_type=QuantType.QUInt8,
 27 |                   #  optimize_model=False, # onnxruntime 1.8.x requires this arg
 28 |                   )
 29 | 
 30 | parser = argparse.ArgumentParser()
 31 | parser.add_argument("--model_dir", type=Path, default='./results/playground/final')
 32 | parser.add_argument("--nn_pruning", action='store_true')
 33 | parser.add_argument("--no_opt", action='store_true')
 34 | parser.add_argument("--force_opt", action='store_true')
 35 | parser.add_argument("--max_ad_length", type=int, default=38)
 36 | parser.add_argument("--output_name", type=str, default="output")
 37 | parser.add_argument("--opset_version", type=int, default=13)
 38 | 
 39 | args = parser.parse_args()
 40 | assert not (args.no_opt and args.force_opt), "no_opt and force_opt cannot be set together."
 41 | # python src/onnx_export.py --model_dir ./results/dummy_mini/final/
 42 | 
 43 | model = SwiftBERTOutput.from_pretrained(args.model_dir)
 44 | original_params = model.num_parameters()
 45 | model = nn_optimize(model, "dense")
 46 | pruned_params = model.num_parameters()
 47 | print("Original params:", original_params)
 48 | print("After-pruned params:", pruned_params)
 49 | print(model)
 50 | 
 51 | bert_config = model.config
 52 | 
 53 | max_ad_length = args.max_ad_length
 54 | 
 55 | print("==== export ====")
 56 | output_name = args.output_name
 57 | if args.nn_pruning:
 58 |   model = nn_optimize(model, "dense")
 59 |   print(model)
 60 |   output_name += "_removepruned"
 61 | torch.onnx.export(
 62 |   model,
 63 |   (torch.tensor([1] * (max_ad_length)).view(-1, max_ad_length),
 64 |     torch.tensor([1] * (max_ad_length)).view(-1, max_ad_length),
 65 |     torch.tensor([1] * (max_ad_length)).view(-1, max_ad_length)),
 66 |   args.model_dir / f'{output_name}.onnx', 
 67 |   input_names=['input_ids', 'attention_mask', 'token_type_ids'],
 68 |   output_names=['score'],
 69 |   verbose=False,
 70 |   export_params=True,
 71 |   opset_version=args.opset_version,
 72 |   do_constant_folding=True
 73 | )
 74 | 
 75 | print("==== optimization ====")
 76 | if args.nn_pruning or args.no_opt:
 77 |   # TODO: how to fix that?
 78 |   if not args.force_opt:
 79 |     print("No optimization (nn_pruning or set no_opt). Doing quanting only")
 80 |     quant(use_original=True)
 81 |     exit(0)
 82 |   if args.nn_pruning:
 83 |     print("Warn: num_heads & hidden_size may be changed during nn_pruning! The optimized result can be incorrect.")
 84 |   
 85 | optimization_options = BertOptimizationOptions('bert')
 86 | optimization_options.embed_layer_norm = True
 87 | optimization_options.enable_layer_norm = True
 88 | optimization_options.enable_skip_layer_norm = True
 89 | optimization_options.enable_bias_gelu = True
 90 | optimization_options.enable_attention = True
 91 | 
 92 | optimized_model = optimize_model(
 93 |     str(args.model_dir / f'{output_name}.onnx'),
 94 |     model_type='bert',
 95 |     num_heads=bert_config.num_attention_heads,
 96 |     hidden_size=bert_config.hidden_size,
 97 |     optimization_options=optimization_options)
 98 | optimized_model.save_model_to_file(str(args.model_dir / f'{output_name}-opt.onnx'))
 99 | 
100 | print("==== quantize ====")
101 | quant(use_original=True)
102 | quant(use_original=False)
103 | 


--------------------------------------------------------------------------------
/experiments/D0104_tvm_fusion_test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from pathlib import Path
 4 | import re
 5 | import subprocess
 6 | 
 7 | 
 8 | OUTPUT_CSV_PATH = 'D0105_tvm_fusion_test.csv'
 9 | 
10 | 
11 | class TvmFusionTester:
12 |     def __init__(self, tflite_dir, tvm_dir, tracker_host, tracker_port, rpc_key, cross_compiler_path):
13 |         self.tflite_dir = tflite_dir
14 |         self.tvm_dir=tvm_dir
15 |         self.tracker_host = tracker_host
16 |         self.tracker_port = tracker_port
17 |         self.rpc_key = rpc_key
18 |         self.cross_compiler_path = cross_compiler_path
19 | 
20 |     def _tune_single(self, input_path, output_path):
21 |         pass # TODO
22 | 
23 |     def _compile_single(self, input_path, output_path):
24 |         Path(os.path.dirname(output_path)).mkdir(parents=True, exist_ok=True)
25 |         cmd = f'tvmc compile {input_path} -o {output_path} --target "llvm -mtriple=arm64-linux-android -mattr=+neon" --cross-compiler {self.cross_compiler_path}'
26 |         result = subprocess.check_output(cmd, shell=True).decode('utf-8')
27 |         print(result)
28 | 
29 |         # unzip
30 |         tar_output_dir = output_path.replace('.tar', '')
31 |         Path(tar_output_dir).mkdir(parents=True, exist_ok=True)
32 |         subprocess.check_output(f'tar -xvf {output_path} -C {tar_output_dir}', shell=True)
33 | 
34 |     def _benchmark_single(self, model_path):
35 |         print(os.path.basename(model_path))
36 |         result = subprocess.run(
37 |             f'tvmc run {model_path} --rpc-key {self.rpc_key} --rpc-tracker {self.tracker_host}:{self.tracker_port} --print-time --device=cpu --repeat 100',
38 |             shell=True, capture_output=True).stdout.decode('utf-8')
39 |         print(result)
40 |         numbers = re.findall(r'\d*\.?\d+', result)
41 |         return [float(x) for x in numbers]
42 | 
43 |     def _compile(self):
44 |         print('==== Compiling ====')
45 |         for root, dirs, files in os.walk(self.tflite_dir):
46 |             for file in sorted(files):
47 |                 if file.endswith('.tflite'):
48 |                     input_path = os.path.join(root, file)
49 |                     output_path = os.path.join(self.tvm_dir, input_path.replace(self.tflite_dir + '/', '').replace('.tflite', '.tar'))
50 |                     self._compile_single(input_path, output_path)
51 | 
52 |     def _tune(self):
53 |         print('==== Tuning ====')
54 |         for root, dirs, files in os.walk(self.tflite_dir):
55 |             for file in sorted(files):
56 |                 if file.endswith('.tflite'):
57 |                     input_path = os.path.join(root, file)
58 |                     output_path = os.path.join(self.tvm_dir, input_path.replace(self.tflite_dir + '/', '').replace('.tflite', '_autotuner_records.json'))
59 |                     self._tune_single(input_path, output_path)
60 | 
61 |     def _benchmark(self):
62 |         with open(OUTPUT_CSV_PATH, 'a') as f:
63 |             f.write('model_name,mean,median,max,min,std\n')
64 | 
65 |         print('==== benchmarking ====')
66 |         for root, dirs, files in os.walk(self.tvm_dir):
67 |             for file in sorted(files):
68 |                 if file.endswith('.tar'):
69 |                     model_path = os.path.join(root, file)
70 |                     mean_median_max_min_std_ms = self._benchmark_single(model_path)
71 |                     with open(OUTPUT_CSV_PATH, 'a') as f:
72 |                         f.write(f'{os.path.basename(model_path)},{",".join([str(round(ms, 2)) for ms in mean_median_max_min_std_ms])}\n')
73 | 
74 |     def run(self):
75 |         self._compile()
76 |         self._benchmark()
77 | 
78 | 
79 | def main():
80 |     parser = argparse.ArgumentParser()
81 |     parser.add_argument('--tflite_dir', default='models/tflite_model/fusion', type=str)
82 |     parser.add_argument('--tvm_dir', default='models/tvm_model/fusion_test', type=str)
83 |     parser.add_argument('--tracker_host', default='127.0.0.1', type=str)
84 |     parser.add_argument('--tracker_port', default=9090, type=int)
85 |     parser.add_argument('--rpc_key', default='android', type=str)
86 |     parser.add_argument('--cross_compiler_path', default=os.environ['TVM_NDK_CC'], type=str)
87 |     args = parser.parse_args()
88 | 
89 |     tester = TvmFusionTester(args.tflite_dir, args.tvm_dir, args.tracker_host, args.tracker_port, args.rpc_key, args.cross_compiler_path)
90 |     tester.run()
91 | 
92 | if __name__ == '__main__':
93 |     main()
94 | 


--------------------------------------------------------------------------------
/are_16_heads/util.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torchvision import datasets
  3 | from torchvision.datasets.folder import ImageFolder
  4 | import torch.distributed as dist
  5 | from typing import Dict 
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | def head_entropy(p):
 10 |     plogp = p * torch.log(p)
 11 |     plogp[p == 0] = 0
 12 |     return - plogp.sum(dim=-1)
 13 | 
 14 | 
 15 | def head_pairwise_kl(p):
 16 |     # p has shape bsz x nheads x L x L and is normalized in the last
 17 |     # dim
 18 |     logp = torch.log(p)
 19 |     logp[p == 0] = -40
 20 |     H_pq = -torch.einsum("bilk,bjlk->bijl", [p, logp])
 21 |     H_p = head_entropy(p).unsqueeze(-2)
 22 |     KL = H_pq - H_p
 23 |     KL.masked_fill_(p.sum(-1).eq(0).unsqueeze(1), 0.0)
 24 |     KL.masked_fill_(p.sum(-1).eq(0).unsqueeze(2), 0.0)
 25 |     return KL
 26 | 
 27 | 
 28 | def attn_disagreement(p):
 29 |     # p has shape bsz x nheads x L x L and is normalized in the last
 30 |     # dim
 31 |     n_heads = p.size(1)
 32 |     return torch.einsum("bilk,bjlk->b", [p, p]) / n_heads ** 2
 33 | 
 34 | 
 35 | def out_disagreement(out):
 36 |     # out has shape bsz x nheads x L x d
 37 |     n_heads = out.size(1)
 38 |     # Normalize
 39 |     out /= torch.sqrt((out ** 2).sum(-1)).unsqueeze(-1) + 1e-20
 40 |     cosine = torch.einsum("bild,bjld->b", [out, out])
 41 |     return cosine / n_heads ** 2
 42 | 
 43 | 
 44 | def print_1d_tensor(tensor):
 45 |     print("\t".join(f"{x:.5f}" for x in tensor.cpu().data))
 46 | 
 47 | 
 48 | def print_2d_tensor(tensor):
 49 |     for row in range(len(tensor)):
 50 |         print_1d_tensor(tensor[row])
 51 | 
 52 | 
 53 | def none_if_empty(string):
 54 |     return string if string != "" else None
 55 | 
 56 | 
 57 | def get_vit_encoder(model):
 58 |     if hasattr(model, 'vit'):
 59 |         return model.vit.encoder
 60 |     if hasattr(model, 'module'):
 61 |         return model.module.vit.encoder
 62 |     else:
 63 |         raise RuntimeError('Model neither has attribute "vit" or "module".')
 64 | 
 65 | def get_vit_config(model):
 66 |     if hasattr(model, 'vit'):
 67 |         return model.vit.config 
 68 |     if hasattr(model, 'module'):
 69 |         return model.module.vit.config
 70 |     else:
 71 |         raise RuntimeError('Model neither has attribute "vit" or "module".')
 72 | 
 73 | 
 74 | 
 75 | '''============================================================
 76 |         load data
 77 | ================================================================='''
 78 | class DictImageFolder(datasets.ImageFolder):
 79 |     def __init__(self, shuffle, *args, **kwargs):
 80 |         super().__init__(*args, **kwargs)
 81 |         self.shuffle = shuffle
 82 |         self.idx_list = np.arange(super().__len__())
 83 |         if self.shuffle:
 84 |             np.random.shuffle(self.idx_list)
 85 |       
 86 |     def __getitem__(self, index: int) -> Dict:
 87 |         index = self.idx_list[index]
 88 |         item = super().__getitem__(index)
 89 |         return dict(
 90 |           pixel_values=item[0],
 91 |           label=item[1]
 92 |         )
 93 |     
 94 | def build_dataset(data_path, input_size=224, is_train=False, shuffle=False, return_dict=True):
 95 |     def build_transform(input_size):
 96 |         from torchvision import transforms
 97 |         from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 98 |         t = []
 99 |         if input_size > 32:
100 |             size = int((256 / 224) * input_size)
101 |             t.append(
102 |                 transforms.Resize(size, interpolation=3),  # to maintain same ratio w.r.t. 224 images
103 |             )
104 |             t.append(transforms.CenterCrop(input_size))
105 | 
106 |         t.append(transforms.ToTensor())
107 |         t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD))
108 |         return transforms.Compose(t)
109 | 
110 |     import os
111 |     from torchvision import datasets
112 |     transform = build_transform(input_size)
113 |     root = os.path.join(data_path, 'val' if not is_train else 'train')
114 |     if return_dict:
115 |       dataset = DictImageFolder(shuffle, root, transform=transform)
116 |     else:
117 |       dataset = ImageFolder(root, transform=transform)
118 |     num_classes = 1000
119 |     return dataset, num_classes
120 | 
121 | 
122 | def to_data_loader(dataset, batch_size, num_workers):
123 |     import torch
124 |     sampler = torch.utils.data.SequentialSampler(dataset)
125 |     data_loader = torch.utils.data.DataLoader(
126 |         dataset, sampler=sampler,
127 |         batch_size=batch_size,
128 |         num_workers=num_workers,
129 |         pin_memory=True,
130 |         drop_last=False
131 |     )
132 |     return data_loader


--------------------------------------------------------------------------------
/deit_pruning/src/layers/super_bertlayers.py:
--------------------------------------------------------------------------------
 1 | import transformers
 2 | from transformers import BertModel, BertLayer
 3 | from transformers.models.bert.modeling_bert import BertEncoder, BertSelfAttention,  BertAttention,BertIntermediate,BertEmbeddings,BertPooler,BertOutput,BertSelfOutput
 4 | from transformers.modeling_outputs import SequenceClassifierOutput
 5 | from transformers.activations import ACT2FN
 6 | import torch
 7 | from torch import nn
 8 | from torch.nn import BCEWithLogitsLoss
 9 | class VA_BertIntermediate(BertIntermediate):
10 |     def __init__(self, config,layerconfig):
11 |         super().__init__(config)
12 |         print(layerconfig['intermediate_size'])
13 |         self.dense = nn.Linear(config.hidden_size, layerconfig['intermediate_size'])
14 |         if isinstance(config.hidden_act, str):
15 |             self.intermediate_act_fn = ACT2FN[config.hidden_act]
16 |         else:
17 |             self.intermediate_act_fn = config.hidden_act
18 | class VA_BertOutput(BertOutput):
19 |      def __init__(self, config,layerconfig):
20 |         super().__init__(config)
21 |         self.dense = nn.Linear(layerconfig['intermediate_size'], config.hidden_size)
22 |         self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
23 |         self.dropout = nn.Dropout(config.hidden_dropout_prob)
24 | class VA_BertSelfAttention(BertSelfAttention):
25 |      def __init__(self, config,heads_num):
26 |         super().__init__(config)
27 |         if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
28 |             raise ValueError(
29 |                 "The hidden size (%d) is not a multiple of the number of attention "
30 |                 "heads (%d)" % (config.hidden_size, config.num_attention_heads)
31 |             )
32 | 
33 |         self.num_attention_heads = heads_num
34 |         self.attention_head_size = int(config.hidden_size / config.num_attention_heads) ##original head size
35 |         self.all_head_size = heads_num * self.attention_head_size
36 |         print('here',heads_num,self.all_head_size)
37 | 
38 |         self.query = nn.Linear(config.hidden_size, self.all_head_size)
39 |         self.key = nn.Linear(config.hidden_size, self.all_head_size)
40 |         self.value = nn.Linear(config.hidden_size, self.all_head_size)
41 | 
42 |         self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
43 | class VA_BertSelfOutput(BertSelfOutput):
44 |     def __init__(self, config,head_num):
45 |         super().__init__(config)
46 |         attention_head_size = int(config.hidden_size / config.num_attention_heads) ##original head size
47 |         all_head_size = head_num * attention_head_size
48 |         self.dense = nn.Linear(all_head_size, config.hidden_size)
49 |         self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
50 |         self.dropout = nn.Dropout(config.hidden_dropout_prob)
51 | 
52 |  
53 | class VA_BertAttention(BertAttention):
54 |     def __init__(self, config,layerconfig):
55 |         super().__init__(config)
56 |         self.self = VA_BertSelfAttention(config,layerconfig['heads'])
57 |         self.output = VA_BertSelfOutput(config,layerconfig['heads'])
58 |         self.pruned_heads = set()
59 | 
60 |    
61 |   
62 | class VA_BertLayer(BertLayer):
63 |     def __init__(self, config,layerconfig):
64 |         super().__init__(config)
65 |         self.chunk_size_feed_forward = config.chunk_size_feed_forward
66 |         self.seq_len_dim = 1
67 |         self.attention = VA_BertAttention(config,layerconfig)
68 |         self.is_decoder = config.is_decoder
69 |         self.add_cross_attention = config.add_cross_attention
70 |         if self.add_cross_attention:
71 |             assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
72 |             self.crossattention = VA_BertAttention(config,layerconfig)
73 |         self.intermediate = VA_BertIntermediate(config,layerconfig)
74 |         self.output = VA_BertOutput(config,layerconfig)
75 | 
76 | class VA_BertEncoder(BertEncoder):
77 |      def __init__(self, config):
78 |         super().__init__(config)
79 |         self.config = config
80 |         print(self.config)
81 |         self.layer = nn.ModuleList([VA_BertLayer(config,config.layers[str(i)]) for i in range(config.num_hidden_layers)])
82 | 
83 | class VA_BertModel(BertModel):
84 | 
85 |      def __init__(self, config, add_pooling_layer=True):
86 |         super().__init__(config)
87 |         self.config = config
88 | 
89 |         self.embeddings = BertEmbeddings(config)
90 |         self.encoder = VA_BertEncoder(config)
91 | 
92 |         self.pooler = BertPooler(config) if add_pooling_layer else None
93 | 
94 |         self.init_weights()
95 |     


--------------------------------------------------------------------------------
/modeling/layers/transformer_encoder.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import math
  4 | from .residual import Residual
  5 | from .norm import LayerNorm
  6 | from .attention import Attention
  7 | from .ffn import FeedForward
  8 | 
  9 | class TransformerEncoderBlock(tf.keras.Model):
 10 |     def __init__(self, hidden_size, num_layers, num_heads, intermediate_size, norm_first=True):
 11 |         super().__init__()
 12 |         layers = []
 13 |         for _ in range(num_layers):
 14 |             layers.extend([
 15 |                 LayerNorm(Residual(Attention(hidden_size, num_heads=num_heads)), pre=norm_first),
 16 |                 LayerNorm(Residual(FeedForward(hidden_size, intermediate_size)), pre=norm_first)
 17 |             ])
 18 |         self.net = tf.keras.Sequential(layers)
 19 | 
 20 |     def call(self, x):
 21 |         return self.net(x)
 22 | 
 23 | 
 24 | class  TransformerEncoderBlock_Pruned(tf.keras.Model):
 25 |     def __init__(self, hidden_size, num_layers, num_remain_heads_list, intermediate_size_list, head_size=64, norm_first=True):
 26 |         super().__init__()
 27 |         layers = []
 28 |         for i in range(num_layers):
 29 |             layers.extend([
 30 |                 LayerNorm(Residual(Attention(hidden_size, num_heads=num_remain_heads_list[i], h_k=head_size)), pre=norm_first),
 31 |                 LayerNorm(Residual(FeedForward(hidden_size, intermediate_size_list[i])), pre=norm_first)
 32 |             ])
 33 |         self.net = tf.keras.Sequential(layers)
 34 | 
 35 |     def call(self, x):
 36 |         return self.net(x)
 37 | 
 38 | 
 39 | class TokenPerformer(tf.keras.Model):
 40 |     '''
 41 |     T2T-Module performer for T2T-ViT
 42 |     '''
 43 |     def __init__(self, head_size, num_heads, kernel_ratio=0.5, dp1=0.1, dp2=0.1):
 44 |         super().__init__()
 45 |         self.hidden_size = head_size * num_heads
 46 |         self.kqv = tf.keras.layers.Dense(self.hidden_size * 3)
 47 |         self.dp = tf.keras.layers.Dropout(dp1)
 48 |         self.attn_output = tf.keras.layers.Dense(self.hidden_size)
 49 |         self.num_heads = num_heads
 50 |         self.norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-5)
 51 |         self.norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-5)
 52 |         self.epsilon = 1e-8  # for stable in division   
 53 | 
 54 |         self.mlp = tf.keras.Sequential([
 55 |             FeedForward(self.hidden_size, self.hidden_size),
 56 |             tf.keras.layers.Dropout(dp2)
 57 |         ])
 58 | 
 59 |         self.m = int(self.hidden_size * kernel_ratio)
 60 |         self.w = self.add_weight('w',
 61 |                                  shape=[self.m, self.hidden_size],
 62 |                                  initializer=tf.keras.initializers.Orthogonal(),
 63 |                                  dtype=tf.float32,
 64 |                                  trainable=False)
 65 |         self.w = self.w * math.sqrt(self.m)
 66 | 
 67 |     def prm_exp(self, x):
 68 |         # part of the function is borrow from https://github.com/lucidrains/performer-pytorch 
 69 |         # and Simo Ryu (https://github.com/cloneofsimo)
 70 |         # ==== positive random features for gaussian kernels ====
 71 |         # x = (B, T, hs)
 72 |         # w = (m, hs)
 73 |         # return : x : B, T, m
 74 |         # SM(x, y) = E_w[exp(w^T x - |x|/2) exp(w^T y - |y|/2)]
 75 |         # therefore return exp(w^Tx - |x|/2)/sqrt(m)
 76 |         xd = tf.math.reduce_sum(x * x, axis=-1, keepdims=True)
 77 |         broadcast_shape = tf.where([True, True, False], tf.shape(xd), [0, 0, self.m])
 78 |         xd = tf.broadcast_to(xd, broadcast_shape) / 2
 79 |         wtd = tf.einsum('bti,mi->btm', tf.convert_to_tensor(x, dtype=tf.float32), self.w)
 80 | 
 81 |         return tf.exp(wtd - xd) / math.sqrt(self.m)
 82 | 
 83 |     def single_attn(self, x):
 84 |         k, q, v = tf.split(self.kqv(x), 3, axis=-1)
 85 |         kp, qp = self.prm_exp(k), self.prm_exp(q) # (B, T, m), (B, T, m)
 86 |         D = tf.einsum('bti,bi->bt', qp, tf.math.reduce_sum(kp, axis=1)) # (B, T, m) * (B, m) -> (B, T, 1)
 87 |         D = tf.expand_dims(D, axis=2)
 88 |         kptv = tf.einsum('bin,bim->bnm', tf.convert_to_tensor(v, dtype=tf.float32), kp) # (B, emb, m)
 89 |         broadcast_shape = tf.where([True, True, False], tf.shape(D), [0, 0, self.hidden_size])
 90 |         y = tf.einsum('bti,bni->btn', qp, kptv) / (tf.broadcast_to(D, broadcast_shape) + self.epsilon) # (B, T, emb) / Diag
 91 |         
 92 |         # skip connection
 93 |         y = v + self.dp(self.attn_output(y))
 94 |         return y
 95 | 
 96 |     def call(self, x):
 97 |         x = self.norm1(x)
 98 |         x = self.single_attn(x)
 99 |         x = x + self.mlp(self.norm2(x))
100 | 
101 |         return x
102 | 


--------------------------------------------------------------------------------
/benchmark/tensorrt/onnx_trt_test.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | 
 17 | import os
 18 | import sys
 19 | import argparse
 20 | import numpy as np
 21 | # This import causes pycuda to automatically manage CUDA context creation and cleanup.
 22 | import pycuda.autoinit
 23 | import tensorrt as trt
 24 | import onnx
 25 | import torch
 26 | 
 27 | import common
 28 | from calibrator import DummyCalibrator
 29 | 
 30 | 
 31 | # You can set the logger severity higher to suppress messages (or lower to display more messages).
 32 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
 33 | 
 34 | 
 35 | def get_onnx_input_shape(model_path):
 36 |     model = onnx.load(model_path)
 37 |     input0 = model.graph.input[0]
 38 |     tensor_type = input0.type.tensor_type
 39 |     input_shape = []
 40 |     for d in tensor_type.shape.dim:
 41 |         if d.HasField('dim_value'):
 42 |             input_shape.append(d.dim_value)
 43 |         else:
 44 |             input_shape.append(1)
 45 |     return input_shape
 46 | 
 47 | # The Onnx path is used for Onnx models.
 48 | def build_engine_onnx(model_file, quant=None):
 49 |     builder = trt.Builder(TRT_LOGGER)
 50 |     network = builder.create_network(common.EXPLICIT_BATCH)
 51 |     config = builder.create_builder_config()
 52 |     runtime = trt.Runtime(TRT_LOGGER)
 53 |     parser = trt.OnnxParser(network, TRT_LOGGER)
 54 | 
 55 |     config.max_workspace_size = common.GiB(1)
 56 | 
 57 |     if quant == 'int8' or quant == 'both':
 58 |         config.set_flag(trt.BuilderFlag.INT8)
 59 |         input_shape = get_onnx_input_shape(model_file)
 60 |         dummy_input = torch.rand(input_shape).numpy()
 61 |         config.int8_calibrator = DummyCalibrator(dummy_input, batch_size=1)
 62 |     if quant == 'fp16' or quant == 'both':
 63 |         config.set_flag(trt.BuilderFlag.FP16)
 64 | 
 65 |     # Load the Onnx model and parse it in order to populate the TensorRT network.
 66 |     with open(model_file, 'rb') as model:
 67 |         if not parser.parse(model.read()):
 68 |             print ('ERROR: Failed to parse the ONNX file.')
 69 |             for error in range(parser.num_errors):
 70 |                 print (parser.get_error(error))
 71 |             return None
 72 | 
 73 |     plan = builder.build_serialized_network(network, config)
 74 |     return runtime.deserialize_cuda_engine(plan)
 75 | 
 76 | 
 77 | def main():
 78 |     parser = argparse.ArgumentParser()
 79 |     parser.add_argument('--model', required=True, type=str, help="torch state_dict path")
 80 |     parser.add_argument('--quant', default=None, choices=['int8', 'fp16', 'both'], help='inference with int8')
 81 |     parser.add_argument('--num_runs', default=50, type=int, help='number of inference runs')
 82 |     parser.add_argument('--warmup_runs', default=20, type=int, help='number of warmup runs')
 83 |     parser.add_argument('--topk', default=None, type=int, help='take the avg of top k latency to reduce variance')
 84 |     parser.add_argument('--precision', default=3, type=int, help='the precision of latency result')
 85 |     args = parser.parse_args()
 86 | 
 87 | 
 88 |     # Build a TensorRT engine.
 89 |     engine = build_engine_onnx(args.model, quant=args.quant)
 90 |     # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
 91 |     # Allocate buffers and create a CUDA stream.
 92 |     inputs, outputs, bindings, stream = common.allocate_buffers(engine)
 93 |     # Contexts are used to perform inference.
 94 |     context = engine.create_execution_context()
 95 | 
 96 |     latency_list = []
 97 |     for _ in range(10):
 98 |         common.do_inference_v2_with_timer(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
 99 |     for _ in range(50):
100 |         latency_ms = common.do_inference_v2_with_timer(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
101 |         latency_list.append(latency_ms)
102 | 
103 |     if args.topk:
104 |         latency_list.sort()
105 |         latency_list = latency_list[:args.topk]
106 | 
107 |     avg_ms = np.average(latency_list)
108 |     std_ms = np.std(latency_list)
109 |     print(f'{os.path.basename(args.model)}    Avg latency {avg_ms:.3f} ms    Std {std_ms:.3f} ms')
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     main()
114 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/training_patcher.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict
  2 | 
  3 | import torch.nn as nn
  4 | 
  5 | from .model_patcher import ModelPatcher
  6 | from nn_pruning.model_structure import ModelStructure
  7 | 
  8 | class PatcherContextModule(nn.Module):
  9 |     pass
 10 | 
 11 | class PatcherContext:
 12 |     def __init__(self):
 13 |         self.context_modules: Dict = {}
 14 |         self.context_data = {}
 15 | 
 16 |     def set_context_data(self, data_key, data):
 17 |         # print("set_context_data", data_key, data)
 18 |         self.context_data[data_key] = data
 19 | 
 20 |     def set_context_data_dict(self, d: Dict[str, Any]):
 21 |         for k, v in d.items():
 22 |             self.set_context_data(k, v)
 23 | 
 24 |     def get_context_data(self, data_key,layerindex):
 25 |         #print('self.context_data',self.context_data)
 26 |         return self.context_data[layerindex][data_key]
 27 | 
 28 |     def enumerate_context_data(self):
 29 |         for k, v in self.context_data.items():
 30 |             yield k, v
 31 | 
 32 |     def has_module_context(self, key):
 33 |         d = self.context_modules
 34 |         for key_part in key:
 35 |             if key_part not in d:
 36 |                 return False
 37 |             d = d[key_part]
 38 |         return True
 39 | 
 40 |     def get_context_module(self, key) -> PatcherContextModule:
 41 |         d = self.context_modules
 42 |         for key_part in key:
 43 |             d = d[key_part]
 44 |         return d
 45 | 
 46 |     def set_module_context(self, key, module_context: PatcherContextModule):
 47 |         d = self.context_modules
 48 |         for key_part in key[:-1]:
 49 |             if key_part not in d:
 50 |                 d[key_part] = {}
 51 |             d = d[key_part]
 52 |         assert key[-1] not in d
 53 |         d[key[-1]] = module_context
 54 | 
 55 | 
 56 | class ReplacementModule(nn.Module):
 57 |     def set_context(self, context):
 58 |         self._context = context
 59 | 
 60 |     def get_context_data(self, key,module_name):
 61 |         layerindex=int(module_name.split('.')[3])
 62 |         #print(layerindex)
 63 | 
 64 |         return self._context.get_context_data(key,layerindex)
 65 | 
 66 | 
 67 | class ModulePatcher:
 68 |     def __init__(self, context: PatcherContext):
 69 |         self.context = context
 70 | 
 71 |     def get_context_key(self, child_module_name, kind="default"):
 72 |         # Default implementation: each module has its own context
 73 |         return (kind, child_module_name)
 74 | 
 75 |     def create_context_module(self, child_module_name, child_module, key):
 76 |         raise NotImplementedError("Implement in subclass")
 77 | 
 78 |     def get_context_module(self, child_module_name, child_module, kind="default", **kwargs):
 79 |         key = self.get_context_key(child_module_name, kind=kind)
 80 |         if key == None:
 81 |             return None
 82 |         if self.context.has_module_context(key):
 83 |             return self.context.get_context_module(key)
 84 |         else:
 85 |             print('here context',child_module_name)
 86 |             module_context = self.create_context_module(child_module_name, child_module, key, **kwargs)
 87 |             self.context.set_module_context(key, module_context)
 88 |             return module_context
 89 | 
 90 |     def patch(self, child_module_name, child_module) -> ReplacementModule:
 91 |         raise NotImplementedError("Implement in subclass")
 92 | 
 93 |     def patch_and_connect(self, child_module_name, child_module) -> ReplacementModule:
 94 |         ret = self.patch(child_module_name, child_module)
 95 |       
 96 |         if ret is not None:
 97 |             ret.set_context(self.context)
 98 |         return ret
 99 | 
100 | 
101 | class ModelDispatchingPatcher(ModelPatcher):
102 |     def __init__(self):
103 |         super().__init__()
104 | 
105 |     def add_patcher(self, pattern: str, patcher: ModulePatcher):
106 |         patch_info = dict(patcher=patcher)
107 |         super().add_pattern(pattern, patch_info)
108 | 
109 |     def new_child_module(self, child_module_name: str, child_module: nn.Module, patch_info: Dict):
110 |         if patch_info.get("patcher") is not None:
111 |             return patch_info["patcher"].patch_and_connect(child_module_name, child_module)
112 | 
113 |     def is_patchable(self, module_name, module, raiseError):
114 |         return isinstance(module, nn.Linear)
115 | 
116 | 
117 | class LinearModelPatcher(ModelDispatchingPatcher):
118 |     def __init__(self, patchers: Dict[str, ModulePatcher], model_structure: ModelStructure):
119 |         super().__init__()
120 |         self.model_structure = model_structure
121 |         for layer_type, patcher in patchers.items():
122 |             layer = self.model_structure.LAYER_PATTERNS.get(layer_type)
123 |             if layer is not None:
124 |                 layer_pattern = (self.model_structure.PATTERN_PREFIX + layer).replace(".", "\.")
125 |                 self.add_patcher(layer_pattern, patcher)
126 | 


--------------------------------------------------------------------------------
/deit_pruning/src/model.py:
--------------------------------------------------------------------------------
  1 | from transformers import BertPreTrainedModel, BertModel
  2 | from transformers.modeling_outputs import SequenceClassifierOutput
  3 | import torch
  4 | from torch import nn
  5 | from torch.nn import BCEWithLogitsLoss
  6 | 
  7 | 
  8 | class SwiftBERT(BertPreTrainedModel):
  9 |   def __init__(self, config):
 10 |     super().__init__(config)
 11 |     # self.num_labels = config.num_labels
 12 |     self.num_labels = 1
 13 |     self.config = config
 14 | 
 15 |     self.bert = BertModel(config)
 16 |     # self.dropout = nn.Dropout(config.hidden_dropout_prob)
 17 |     # self.relu = nn.ReLU()
 18 |     self.classifier = nn.Linear(config.hidden_size, self.num_labels)
 19 | 
 20 |     self.init_weights()
 21 | 
 22 |   def forward(
 23 |     self,
 24 |     input_ids=None,
 25 |     attention_mask=None,
 26 |     token_type_ids=None,
 27 |     position_ids=None,
 28 |     head_mask=None,
 29 |     inputs_embeds=None,
 30 |     labels=None,
 31 |     output_attentions=None,
 32 |     output_hidden_states=None,
 33 |     return_dict=None,
 34 |   ):
 35 |     r"""
 36 |     labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
 37 |         Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
 38 |         config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
 39 |         If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
 40 |     """
 41 |     return_dict = return_dict if return_dict is not None else self.config.use_return_dict
 42 | 
 43 |     outputs = self.bert(
 44 |       input_ids,
 45 |       attention_mask=attention_mask,
 46 |       token_type_ids=token_type_ids,
 47 |       position_ids=position_ids,
 48 |       head_mask=head_mask,
 49 |       inputs_embeds=inputs_embeds,
 50 |       output_attentions=output_attentions,
 51 |       output_hidden_states=output_hidden_states,
 52 |       return_dict=return_dict,
 53 |     )
 54 | 
 55 |     last_hidden_state = outputs[0]
 56 |     logits = self.classifier(last_hidden_state[:, 0])
 57 |     #print('logits',logits.shape,logits)
 58 |     # logits = self.relu(self.classifier(last_hidden_state[:, 0]))
 59 | 
 60 |     # pooled_output = outputs[1]
 61 | 
 62 |     # pooled_output = self.dropout(pooled_output)
 63 |     # logits = self.classifier(pooled_output)
 64 | 
 65 |     loss = None
 66 |     if labels is not None:
 67 |       # if self.config.problem_type is None:
 68 |       #   if self.num_labels == 1:
 69 |       #       self.config.problem_type = "regression"
 70 |       #   elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
 71 |       #       self.config.problem_type = "single_label_classification"
 72 |       #   else:
 73 |       #       self.config.problem_type = "multi_label_classification"
 74 | 
 75 |       # if self.config.problem_type == "regression":
 76 |       #   loss_fct = MSELoss()
 77 |       #   if self.num_labels == 1:
 78 |       #       loss = loss_fct(logits.squeeze(), labels.squeeze())
 79 |       #   else:
 80 |       #       loss = loss_fct(logits, labels)
 81 |       # elif self.config.problem_type == "single_label_classification":
 82 |       #   loss_fct = CrossEntropyLoss()
 83 |       #   loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
 84 |       # elif self.config.problem_type == "multi_label_classification":
 85 |       #   loss_fct = BCEWithLogitsLoss()
 86 |       #   loss = loss_fct(logits, labels)
 87 |       loss_fct = BCEWithLogitsLoss()
 88 |       loss = loss_fct(logits[:, 0:1], labels[:, 0:1])
 89 |     if not return_dict:
 90 |       output = (logits,) + outputs[2:]
 91 |       return ((loss,) + output) if loss is not None else output
 92 | 
 93 |     return SequenceClassifierOutput(
 94 |       loss=loss,
 95 |       logits=logits,
 96 |       hidden_states=outputs.hidden_states,
 97 |       attentions=outputs.attentions,
 98 |     )
 99 | 
100 | class SwiftBERTOutput(SwiftBERT):
101 |   def __init__(self, config):
102 |     super().__init__(config)
103 |     self.sigmoid = nn.Sigmoid()
104 |     self.one = torch.nn.parameter.Parameter(torch.tensor(1), requires_grad=False)
105 | 
106 |   def forward(self,
107 |     input_ids=None,
108 |     attention_mask=None,
109 |     token_type_ids=None,
110 |     position_ids=None,
111 |     head_mask=None,
112 |     inputs_embeds=None,
113 |     labels=None,
114 |     output_attentions=None,
115 |     output_hidden_states=None,
116 |     return_dict=None,
117 |   ):
118 |     outputs = super().forward(
119 |       input_ids=input_ids,
120 |       attention_mask=attention_mask,
121 |       token_type_ids=torch.min(token_type_ids, self.one),
122 |       position_ids=position_ids,
123 |       head_mask=head_mask,
124 |       inputs_embeds=inputs_embeds,
125 |       labels=labels,
126 |       output_attentions=output_attentions,
127 |       output_hidden_states=output_hidden_states,
128 |       return_dict=return_dict,
129 |     )
130 | 
131 |     return self.sigmoid(outputs.logits[0, 0])
132 | 


--------------------------------------------------------------------------------
/deit_pruning/src/supernet.py:
--------------------------------------------------------------------------------
  1 | from layers.super_bertlayers import VA_BertModel
  2 | from transformers.modeling_outputs import SequenceClassifierOutput
  3 | from transformers import BertPreTrainedModel
  4 | import torch
  5 | from torch import nn
  6 | from torch.nn import BCEWithLogitsLoss
  7 | 
  8 | 
  9 | class SwiftBERT(BertPreTrainedModel):
 10 |   def __init__(self, config):
 11 |     super().__init__(config)
 12 |     # self.num_labels = config.num_labels
 13 |     self.num_labels = 1
 14 |     self.config = config
 15 |     print(config)
 16 | 
 17 |     self.bert = VA_BertModel(config)
 18 |     # self.dropout = nn.Dropout(config.hidden_dropout_prob)
 19 |     # self.relu = nn.ReLU()
 20 |     self.classifier = nn.Linear(config.hidden_size, self.num_labels)
 21 | 
 22 |     #self.init_weights()
 23 | 
 24 |   def forward(
 25 |     self,
 26 |     input_ids=None,
 27 |     attention_mask=None,
 28 |     token_type_ids=None,
 29 |     position_ids=None,
 30 |     head_mask=None,
 31 |     inputs_embeds=None,
 32 |     labels=None,
 33 |     output_attentions=None,
 34 |     output_hidden_states=None,
 35 |     return_dict=None,
 36 |   ):
 37 |     r"""
 38 |     labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
 39 |         Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
 40 |         config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
 41 |         If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
 42 |     """
 43 |     return_dict = return_dict if return_dict is not None else self.config.use_return_dict
 44 | 
 45 |     outputs = self.bert(
 46 |       input_ids,
 47 |       attention_mask=attention_mask,
 48 |       token_type_ids=token_type_ids,
 49 |       position_ids=position_ids,
 50 |       head_mask=head_mask,
 51 |       inputs_embeds=inputs_embeds,
 52 |       output_attentions=output_attentions,
 53 |       output_hidden_states=output_hidden_states,
 54 |       return_dict=return_dict,
 55 |     )
 56 | 
 57 |     last_hidden_state = outputs[0]
 58 |     logits = self.classifier(last_hidden_state[:, 0])
 59 |     #print('logits',logits.shape,logits)
 60 |     # logits = self.relu(self.classifier(last_hidden_state[:, 0]))
 61 | 
 62 |     # pooled_output = outputs[1]
 63 | 
 64 |     # pooled_output = self.dropout(pooled_output)
 65 |     # logits = self.classifier(pooled_output)
 66 | 
 67 |     loss = None
 68 |     if labels is not None:
 69 |       # if self.config.problem_type is None:
 70 |       #   if self.num_labels == 1:
 71 |       #       self.config.problem_type = "regression"
 72 |       #   elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
 73 |       #       self.config.problem_type = "single_label_classification"
 74 |       #   else:
 75 |       #       self.config.problem_type = "multi_label_classification"
 76 | 
 77 |       # if self.config.problem_type == "regression":
 78 |       #   loss_fct = MSELoss()
 79 |       #   if self.num_labels == 1:
 80 |       #       loss = loss_fct(logits.squeeze(), labels.squeeze())
 81 |       #   else:
 82 |       #       loss = loss_fct(logits, labels)
 83 |       # elif self.config.problem_type == "single_label_classification":
 84 |       #   loss_fct = CrossEntropyLoss()
 85 |       #   loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
 86 |       # elif self.config.problem_type == "multi_label_classification":
 87 |       #   loss_fct = BCEWithLogitsLoss()
 88 |       #   loss = loss_fct(logits, labels)
 89 |       loss_fct = BCEWithLogitsLoss()
 90 |       loss = loss_fct(logits[:, 0:1], labels[:, 0:1])
 91 |     if not return_dict:
 92 |       output = (logits,) + outputs[2:]
 93 |       return ((loss,) + output) if loss is not None else output
 94 | 
 95 |     return SequenceClassifierOutput(
 96 |       loss=loss,
 97 |       logits=logits,
 98 |       hidden_states=outputs.hidden_states,
 99 |       attentions=outputs.attentions,
100 |     )
101 | 
102 | class SwiftBERTOutput(SwiftBERT):
103 |   def __init__(self, config):
104 |     super().__init__(config)
105 |     self.sigmoid = nn.Sigmoid()
106 |     self.one = torch.nn.parameter.Parameter(torch.tensor(1), requires_grad=False)
107 | 
108 |   def forward(self,
109 |     input_ids=None,
110 |     attention_mask=None,
111 |     token_type_ids=None,
112 |     position_ids=None,
113 |     head_mask=None,
114 |     inputs_embeds=None,
115 |     labels=None,
116 |     output_attentions=None,
117 |     output_hidden_states=None,
118 |     return_dict=None,
119 |   ):
120 |     outputs = super().forward(
121 |       input_ids=input_ids,
122 |       attention_mask=attention_mask,
123 |       token_type_ids=torch.min(token_type_ids, self.one),
124 |       position_ids=position_ids,
125 |       head_mask=head_mask,
126 |       inputs_embeds=inputs_embeds,
127 |       labels=labels,
128 |       output_attentions=output_attentions,
129 |       output_hidden_states=output_hidden_states,
130 |       return_dict=return_dict,
131 |     )
132 | 
133 |     return self.sigmoid(outputs.logits[0, 0])
134 | 


--------------------------------------------------------------------------------
/are_16_heads/pruning.py:
--------------------------------------------------------------------------------
  1 | from math import sqrt
  2 | from logger import logger
  3 | 
  4 | 
  5 | def parse_head_pruning_descriptors(
  6 |     descriptors,
  7 |     reverse_descriptors=False,
  8 |     n_heads=None
  9 | ):
 10 |     """Returns a dictionary mapping layers to the set of heads to prune in
 11 |     this layer"""
 12 |     to_prune = {}
 13 |     for descriptor in descriptors:
 14 |         layer, heads = descriptor.split(":")
 15 |         layer = int(layer) - 1
 16 |         heads = set(int(head) - 1 for head in heads.split(","))
 17 |         if layer not in to_prune:
 18 |             to_prune[layer] = set()
 19 |         to_prune[layer].update(heads)
 20 |     # Reverse
 21 |     if reverse_descriptors:
 22 |         if n_heads is None:
 23 |             raise ValueError("You need to specify the total number of heads")
 24 |         for layer, heads in to_prune.items():
 25 |             to_prune[layer] = set([head for head in range(n_heads)
 26 |                                    if head not in heads])
 27 |     return to_prune
 28 | 
 29 | 
 30 | def to_pruning_descriptor(to_prune):
 31 |     """Inverse of parse_head_pruning_descriptors"""
 32 |     descriptors = [f"{layer+1}:{','.join(str(head+1) for head in heads)}"
 33 |                    for layer, heads in to_prune.items()]
 34 |     return " ".join(descriptors)
 35 | 
 36 | 
 37 | def determine_pruning_sequence(
 38 |     prune_numbers,
 39 |     prune_percents,
 40 |     n_heads,
 41 |     n_layers,
 42 |     at_least_x_heads_per_layer=0,
 43 | ):
 44 |     all_n_to_prune = prune_numbers
 45 |     if all_n_to_prune is None:
 46 |         # Compute the number of heads to prune on percentage if needed
 47 |         all_n_to_prune = []
 48 |         for prune_percent in prune_percents:
 49 |             total_heads = n_heads * n_layers
 50 |             n_to_prune = int(total_heads * prune_percent / 100)
 51 |             # Make sure we keep at least one head per layer
 52 |             if at_least_x_heads_per_layer > 0:
 53 |                 if n_to_prune > total_heads - at_least_x_heads_per_layer * n_layers:
 54 |                     logger.warn(
 55 |                         f"Can't prune {prune_percent}% ({n_to_prune})"
 56 |                         f" heads AND keep at least {at_least_x_heads_per_layer}"
 57 |                         " head(s) per layer. Will"
 58 |                         f" prune only {(1-(at_least_x_heads_per_layer*n_layers)/total_heads)*100:.1f} "
 59 |                         f"({total_heads-n_layers}) heads instead"
 60 |                     )
 61 |                     n_to_prune = total_heads - at_least_x_heads_per_layer * n_layers
 62 |                     all_n_to_prune.append(n_to_prune)
 63 |                     break
 64 |             all_n_to_prune.append(n_to_prune)
 65 | 
 66 |     # We'll incrementally prune layers and evaluate
 67 |     all_n_to_prune = sorted(all_n_to_prune)
 68 |     n_to_prune_sequence = all_n_to_prune[:]
 69 |     for idx in range(1, len(all_n_to_prune)):
 70 |         n_to_prune_sequence[idx] = all_n_to_prune[idx] - all_n_to_prune[idx-1]
 71 |     # Verify that the total number of heads pruned stayed the same
 72 |     assert all_n_to_prune[-1] == sum(n_to_prune_sequence)
 73 |     return n_to_prune_sequence
 74 | 
 75 | 
 76 | def what_to_prune(
 77 |     head_importance,
 78 |     n_to_prune,
 79 |     to_prune=None,
 80 |     at_least_x_heads_per_layer=0,
 81 |     rescale_by_number=False,
 82 | ):
 83 |     head_importance = head_importance.clone()
 84 |     n_layers, n_heads = head_importance.size()
 85 |     to_prune = to_prune or {}
 86 |     if rescale_by_number:
 87 |         for layer in to_prune:
 88 |             #head_importance[layer] *= sqrt(n_layers / len(to_prune[layer]))
 89 |             head_importance[layer] *= sqrt(len(to_prune[layer]) / n_layers)
 90 |     # Sort heads by score
 91 |     heads_and_score = [
 92 |         ((layer, head), head_importance[layer, head])
 93 |         for layer in range(n_layers)
 94 |         for head in range(n_heads)
 95 |     ]
 96 |     heads_and_score = sorted(heads_and_score, key=lambda x: x[1])
 97 |     sorted_heads = [head_and_score[0]
 98 |                     for head_and_score in heads_and_score]
 99 |     # Ensure we don't delete all heads in a layer
100 |     if at_least_x_heads_per_layer:
101 |         # Remove the top scoring head in each layer
102 |         to_protect = {l: 0 for l in range(n_layers)}
103 |         filtered_sorted_heads = []
104 |         for layer, head in reversed(sorted_heads):
105 |             if layer in to_protect:
106 |                 if to_protect[layer] < at_least_x_heads_per_layer:
107 |                     to_protect[layer] += 1
108 |                     continue
109 |                 else:
110 |                     to_protect.pop(layer)
111 |             filtered_sorted_heads.insert(0, (layer, head))
112 |         sorted_heads = filtered_sorted_heads
113 |     # layer/heads that were already pruned
114 |     # Prune the lowest scoring heads
115 |     sorted_heads = [
116 |         (layer, head)
117 |         for (layer, head) in sorted_heads
118 |         if layer not in to_prune or head not in to_prune[layer]
119 |     ]
120 |     # Update heads to prune
121 |     for layer, head in sorted_heads[:n_to_prune]:
122 |         if layer not in to_prune:
123 |             to_prune[layer] = set()
124 |         to_prune[layer].add(head)
125 |     return to_prune
126 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/hp_naming.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import copy
 16 | import re
 17 | 
 18 | 
 19 | class TrialShortNamer:
 20 |     PREFIX = "hp"
 21 |     DEFAULTS = {}
 22 |     NAMING_INFO = None
 23 |     MAX_LENGTH = 200
 24 | 
 25 |     @classmethod
 26 |     def set_defaults(cls, prefix, defaults):
 27 |         cls.PREFIX = prefix
 28 |         cls.DEFAULTS = defaults
 29 |         cls.build_naming_info()
 30 | 
 31 |     @staticmethod
 32 |     def length_tuple_enumerator(parts):
 33 |         for i in range(len(parts[0])):
 34 |             if len(parts) == 1:
 35 |                 yield (i + 1,)
 36 |             else:
 37 |                 for l in TrialShortNamer.length_tuple_enumerator(parts[1:]):
 38 |                     yield (i + 1,) + l
 39 | 
 40 |     @staticmethod
 41 |     def names_enumerator(s, splitter="_", separator="_"):
 42 |         parts = s.split(splitter)
 43 |         for ls in TrialShortNamer.length_tuple_enumerator(parts):
 44 |             ps = [p[: ls[i]] for i, p in enumerate(parts)]
 45 |             yield separator.join(ps)
 46 | 
 47 |     @staticmethod
 48 |     def shortname_for_key(info, param_name):
 49 |         # We try to create a separatorless short name, but if there is a collision we have to fallback
 50 |         # to a separated short name
 51 |         separators = ["", "_"]
 52 | 
 53 |         for separator in separators:
 54 |             for shortname in TrialShortNamer.names_enumerator(param_name, separator=separator):
 55 |                 if shortname not in info["reverse_short_param"]:
 56 |                     info["short_param"][param_name] = shortname
 57 |                     info["reverse_short_param"][shortname] = param_name
 58 |                     return shortname
 59 | 
 60 |         return param_name
 61 | 
 62 |     @staticmethod
 63 |     def add_new_param_name(info, param_name):
 64 |         short_name = TrialShortNamer.shortname_for_key(info, param_name)
 65 |         info["short_param"][param_name] = short_name
 66 |         info["reverse_short_param"][short_name] = param_name
 67 | 
 68 |     @classmethod
 69 |     def build_naming_info(cls):
 70 |         if cls.NAMING_INFO is not None:
 71 |             return
 72 | 
 73 |         info = dict(
 74 |             short_word={},
 75 |             reverse_short_word={},
 76 |             short_param={},
 77 |             reverse_short_param={},
 78 |         )
 79 | 
 80 |         field_keys = list(cls.DEFAULTS.keys())
 81 | 
 82 |         for k in field_keys:
 83 |             cls.add_new_param_name(info, k)
 84 | 
 85 |         cls.NAMING_INFO = info
 86 | 
 87 |     @classmethod
 88 |     def shortname(cls, params):
 89 |         cls.build_naming_info()
 90 |         assert cls.PREFIX is not None
 91 |         name = [copy.copy(cls.PREFIX)]
 92 | 
 93 |         missing_defaults = {}
 94 |         for k, v in params.items():
 95 |             if k not in cls.DEFAULTS:
 96 |                 missing_defaults[k] = v
 97 | 
 98 |         if len(missing_defaults) != 0:
 99 |             print(missing_defaults)
100 |             raise Exception(f"You should provide a default value for the params {missing_defaults}")
101 | 
102 |         for k, v in params.items():
103 |             if v == cls.DEFAULTS[k]:
104 |                 # The default value is not added to the name
105 |                 continue
106 | 
107 |             key = cls.NAMING_INFO["short_param"][k]
108 | 
109 |             if isinstance(v, bool):
110 |                 v = 1 if v else 0
111 | 
112 |             sep = "" if isinstance(v, (int, float)) else "-"
113 |             e = f"{key}{sep}{v}"
114 |             name.append(e)
115 | 
116 |         ret = "_".join(name).replace("/", "__")
117 | 
118 |         if len(ret) > cls.MAX_LENGTH:
119 |             h = hex(abs(hash(ret)))[2:]
120 |             ret = ret[:cls.MAX_LENGTH]
121 |             ret = ret[:-len(h) - 2]
122 |             ret = ret + "--" + h
123 |         return ret
124 | 
125 |     @classmethod
126 |     def parse_repr(cls, repr):
127 |         repr = repr[len(cls.PREFIX) + 1 :]
128 |         if repr == "":
129 |             values = []
130 |         else:
131 |             values = repr.split("_")
132 | 
133 |         parameters = {}
134 | 
135 |         for value in values:
136 |             if "-" in value:
137 |                 p_k, p_v = value.split("-")
138 |             else:
139 |                 p_k = re.sub("[0-9.]", "", value)
140 |                 p_v = float(re.sub("[^0-9.]", "", value))
141 | 
142 |             key = cls.NAMING_INFO["reverse_short_param"][p_k]
143 | 
144 |             parameters[key] = p_v
145 | 
146 |         for k in cls.DEFAULTS:
147 |             if k not in parameters:
148 |                 parameters[k] = cls.DEFAULTS[k]
149 | 
150 |         return parameters
151 | 


--------------------------------------------------------------------------------
/deit_pruning/src/get_latency.py:
--------------------------------------------------------------------------------
  1 | from supernet import SwiftBERT
  2 | import random
  3 | import torch
  4 | from glob import glob
  5 | from pathlib import Path
  6 | import json
  7 | from transformers import BertConfig
  8 | from supernet import SwiftBERTOutput
  9 | import argparse
 10 | baseconfig={
 11 |   "_name_or_path": "google/bert_uncased_L-4_H-256_A-4",
 12 |   "architectures": [
 13 |     "SwiftBERT"
 14 |   ],
 15 |   "attention_probs_dropout_prob": 0.1,
 16 |   "gradient_checkpointing": False,
 17 |   "hidden_act": "gelu",
 18 |   "hidden_dropout_prob": 0.1,
 19 |   "hidden_size": 256,
 20 |   "initializer_range": 0.02,
 21 |   "intermediate_size": 1024,
 22 |   "layer_norm_eps": 1e-12,
 23 |   "max_position_embeddings": 512,
 24 |   "model_type": "bert",
 25 |   "num_attention_heads": 4,
 26 |   "num_hidden_layers": 4,
 27 |   "pad_token_id": 0,
 28 |   "position_embedding_type": "absolute",
 29 |   "transformers_version": "4.7.0",
 30 |   "type_vocab_size": 2,
 31 |   "use_cache": True,
 32 |   "vocab_size": 30522
 33 | }
 34 | def gen_testconfigs(sample_num):
 35 |     heads_nums=[0.25,0.5,0.75,1]
 36 |     intermediate_sizes=[a/100.0 for a in list(range(1,101))]
 37 |     
 38 |     #sample_num=100
 39 | 
 40 |     for si in range(sample_num):
 41 |         curconfig=baseconfig
 42 | 
 43 |         ## generate a test file
 44 |         curconfig['layers']={}
 45 |         tag=""
 46 |         for layer in range(4):
 47 |             head_num=random.sample(heads_nums,1)[0]
 48 |             im_size=random.sample(intermediate_sizes,1)[0]
 49 |             print(layer,head_num,im_size)
 50 |             curconfig['layers'][layer]={}
 51 |             curconfig['layers'][layer]['heads']=int(head_num*4)
 52 |             curconfig['layers'][layer]['intermediate_size']=int(im_size*1024) 
 53 |             tag+='h_'+str(head_num)+'_d_'+str(im_size)+'-'
 54 | 
 55 |         tag=tag[0:-1]
 56 |         fw=open('latency_data/'+tag+'.json','w')
 57 |         fw.write(json.dumps(curconfig,indent=4))
 58 | def gen_original():
 59 |     curconfig=baseconfig
 60 | 
 61 |         ## generate a test file
 62 |     curconfig['layers']={}
 63 |     tag=""
 64 |     for layer in range(4):
 65 |             head_num=4
 66 |             im_size=1024
 67 |             print(layer,head_num,im_size)
 68 |             curconfig['layers'][layer]={}
 69 |             curconfig['layers'][layer]['heads']=head_num
 70 |             curconfig['layers'][layer]['intermediate_size']=im_size 
 71 |             tag+='h_'+str(head_num)+'_d_'+str(im_size)+'-'
 72 | 
 73 |     tag='config'
 74 |     fw=open('latency_data/'+tag+'.json','w')
 75 |     fw.write(json.dumps(curconfig,indent=4))
 76 | 
 77 | def gen_uniform():
 78 |     for h in range(1,5):
 79 |         for j in range(1,101):
 80 | 
 81 |             curconfig=baseconfig
 82 | 
 83 |             ## generate a test file
 84 |             curconfig['layers']={}
 85 |             tag=""
 86 |             for layer in range(4):
 87 |                 head_num=h
 88 |                 im_size=1024
 89 |                 print(layer,head_num,im_size)
 90 |                 curconfig['layers'][layer]={}
 91 |                 curconfig['layers'][layer]['heads']=head_num
 92 |                 curconfig['layers'][layer]['intermediate_size']=int(im_size*(j/100.0)) 
 93 |                 tag+='h_'+str(head_num/4.0)+'_d_'+str(j/100.0)+'-'
 94 | 
 95 |             print(tag)
 96 |             fw=open('latency_data/'+tag[0:-1]+'.json','w')
 97 |             fw.write(json.dumps(curconfig,indent=4))
 98 | 
 99 | 
100 | 
101 | #gen_testconfigs()
102 | 
103 | 
104 | parser = argparse.ArgumentParser()
105 | parser.add_argument("--model_dir", type=Path, default='latency_data')
106 | parser.add_argument("--nn_pruning", action='store_true')
107 | parser.add_argument("--no_opt", action='store_true')
108 | parser.add_argument("--force_opt", action='store_true')
109 | parser.add_argument("--max_ad_length", type=int, default=38)
110 | parser.add_argument("--output_name", type=str, default="output")
111 | parser.add_argument("--opset_version", type=int, default=13)
112 | 
113 | args = parser.parse_args()
114 | assert not (args.no_opt and args.force_opt), "no_opt and force_opt cannot be set together."
115 | # python src/onnx_export.py --model_dir ./results/dummy_mini/final/
116 | def gen_onnx(config):
117 |     myconfig=BertConfig.from_pretrained(config)
118 |     print(myconfig)
119 |     model = SwiftBERTOutput(myconfig)
120 |     print(model)
121 |     bert_config = model.config
122 | 
123 |     max_ad_length = args.max_ad_length
124 | 
125 |     print("==== export ====")
126 |     output_name = config.replace(".json","")
127 | 
128 |     torch.onnx.export(
129 |     model,
130 |     (torch.tensor([1] * (max_ad_length)).view(-1, max_ad_length),
131 |         torch.tensor([1] * (max_ad_length)).view(-1, max_ad_length),
132 |         torch.tensor([1] * (max_ad_length)).view(-1, max_ad_length)),
133 |      f'{output_name}.onnx', 
134 |     input_names=['input_ids', 'attention_mask', 'token_type_ids'],
135 |     output_names=['score'],
136 |     verbose=False,
137 |     export_params=True,
138 |     opset_version=args.opset_version,
139 |     do_constant_folding=True
140 |     )
141 | 
142 | '''
143 | gen_original()
144 | gen_onnx('latency_data/config.json')
145 | '''
146 | 
147 | #gen_testconfigs(2000)
148 | #gen_original()
149 | #gen_uniform()
150 | filenames=glob('latency_data/**.json')
151 | for filename in filenames:
152 |     gen_onnx(filename)
153 | 


--------------------------------------------------------------------------------
/modeling/models/vit.py:
--------------------------------------------------------------------------------
  1 | from numpy.core import numeric
  2 | import tensorflow as tf
  3 | 
  4 | from einops.layers.tensorflow import Rearrange
  5 | from modeling.layers.transformer_encoder import TransformerEncoderBlock, TransformerEncoderBlock_Pruned
  6 | from modeling.layers.activation import gelu
  7 | 
  8 | 
  9 | class ViT(tf.keras.Model):
 10 | 
 11 |     def __init__(self, *, image_size=224, patch_size=16, num_classes=1000, dim=768, depth=12, heads=12, mlp_dim=3072):
 12 |         super().__init__()
 13 |         assert image_size % patch_size == 0, 'image dimensions must be divisible by the patch size'
 14 |         num_patches = (image_size // patch_size) ** 2
 15 | 
 16 |         self.patch_size = patch_size
 17 |         self.dim = dim
 18 |         self.pos_embedding = self.add_weight("position_embeddings",
 19 |                                              shape=[num_patches + 1,
 20 |                                                     dim],
 21 |                                              initializer=tf.keras.initializers.RandomNormal(),
 22 |                                              dtype=tf.float32)
 23 |         self.patch_to_embedding = tf.keras.layers.Dense(dim)
 24 |         self.cls_token = self.add_weight("cls_token",
 25 |                                          shape=[1,
 26 |                                                 1,
 27 |                                                 dim],
 28 |                                          initializer=tf.keras.initializers.RandomNormal(),
 29 |                                          dtype=tf.float32)
 30 | 
 31 |         self.rearrange = Rearrange(
 32 |             'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=self.patch_size, p2=self.patch_size)
 33 | 
 34 |         self.transformer = TransformerEncoderBlock(dim, depth, heads, mlp_dim)
 35 | 
 36 |         self.to_cls_token = tf.identity
 37 | 
 38 |         self.mlp_head = tf.keras.Sequential([tf.keras.layers.Dense(mlp_dim, activation=gelu),
 39 |                                              tf.keras.layers.Dense(num_classes)])
 40 | 
 41 |     @tf.function
 42 |     def call(self, img):
 43 |         shapes = tf.shape(img)
 44 | 
 45 |         x = self.rearrange(img)  # [b, h * w, p * p * c]
 46 |         x = self.patch_to_embedding(x)  # [b, h * w = n, dim]
 47 | 
 48 |         cls_tokens = tf.broadcast_to(
 49 |             self.cls_token, (shapes[0], 1, self.dim))  # [b, 1, dim]
 50 |         x = tf.concat((cls_tokens, x), axis=1)  # [b, n + 1, dim]
 51 |         x += self.pos_embedding
 52 |         x = self.transformer(x)
 53 | 
 54 |         x = self.to_cls_token(x[:, 0])  # [b, dim]
 55 |         return self.mlp_head(x)  # [b, num_classes]
 56 | 
 57 | 
 58 | class ViT_Pruned(ViT):
 59 | 
 60 |     def __init__(self, *, image_size=224, patch_size=16, num_classes=1000, dim=768, depth=12, heads=12, mlp_dim=3072, head_size=64, prune_encoding='all_head12_ffn1.0'):
 61 |         prune_setting, num_remain_heads, ffn_thresholds = self.decode_prune_encoding(prune_encoding)
 62 |         if prune_setting == 'all':
 63 |             num_remain_heads_list = [num_remain_heads for _ in range(depth)]
 64 |             intermediate_size_list = [int(ffn_thresholds * mlp_dim) for _ in range(depth)]
 65 |         else: # prune_setting == 'layerwise'
 66 |             assert(len(num_remain_heads) == depth and len(ffn_thresholds) == depth)
 67 |             num_remain_heads_list = num_remain_heads
 68 |             intermediate_size_list = [int(ffn_thresholds[i] * mlp_dim) for i in range(depth)]
 69 | 
 70 |         super().__init__(image_size=image_size, patch_size=patch_size,
 71 |                          num_classes=num_classes, dim=dim, depth=depth, heads=heads, mlp_dim=mlp_dim)
 72 | 
 73 |         # override TransformerEncoderBlock
 74 |         self.transformer = TransformerEncoderBlock_Pruned(hidden_size=dim, num_layers=depth, num_remain_heads_list=num_remain_heads_list, 
 75 |                                                           intermediate_size_list=intermediate_size_list, head_size=head_size, norm_first=True)
 76 |         
 77 |     def decode_prune_encoding(self, prune_encoding: str):
 78 |         tokens = prune_encoding.split('_')
 79 |         print(tokens)
 80 |         prune_setting = tokens[0]
 81 |         assert prune_setting in ['layerwise', 'all']
 82 |         if prune_setting == 'all':
 83 |             # e.g. prune_encoding = 'all_head12_ffn1.0': all layers remain 12 heads and 100% ffn
 84 |             head_setting = tokens[1]
 85 |             ffn_setting = tokens[2]
 86 |             num_heads = int(head_setting.replace('head', ''))
 87 |             ffn_threshold = float(ffn_setting.replace('ffn', ''))
 88 |             return prune_setting, num_heads, ffn_threshold
 89 |         else: # prune_setting == 'layerwise'
 90 |             # e.g. prune_encoding = 'layerwise_h2-d1.0_h3-d0.5_h1-d0.5'
 91 |             num_heads_list = []
 92 |             ffn_threshold_list = []
 93 |             for token in tokens[1: ]:
 94 |                 hx, dx = token.split('-')
 95 |                 num_heads_list.append(int(hx.replace('h', '')))
 96 |                 ffn_threshold_list.append(float(dx.replace('d', '')))
 97 |             return prune_setting, num_heads_list, ffn_threshold_list
 98 | 
 99 | 
100 | def get_deit_base():
101 |     return ViT(dim=768, depth=12)
102 | 
103 | 
104 | def get_deit_small():
105 |     return ViT(dim=384, heads=6, mlp_dim=384 * 4)
106 | 
107 | 
108 | def get_deit_tiny():
109 |     return ViT(dim=192, heads=3, mlp_dim=192 * 4)
110 | 


--------------------------------------------------------------------------------
/deit_pruning/src/pytorch_prune/pruner.py:
--------------------------------------------------------------------------------
  1 | # Still WIP
  2 | from torch.nn.utils import prune
  3 | import torch.nn
  4 | import argparse
  5 | from pathlib import Path
  6 | from .block import BlockPruningMethod, block_pruning
  7 | from .ln_smart import LnSmartStructured, ln_smart_structured
  8 | 
  9 | def is_encoder(name, module):
 10 |   return isinstance(module, torch.nn.Linear) and 'bert.encoder' in name
 11 | 
 12 | prune_mapping = {
 13 |   "random_unstructured": (prune.random_unstructured, prune.RandomUnstructured),
 14 |   "l1_unstructured": (prune.l1_unstructured, prune.L1Unstructured),
 15 |   "random_structured": (prune.random_structured, prune.RandomStructured),
 16 |   "ln_structured": (prune.ln_structured, prune.LnStructured),
 17 |   "block": (block_pruning, BlockPruningMethod),
 18 |   "ln_smart_structured": (ln_smart_structured, LnSmartStructured),
 19 | }
 20 | 
 21 | def argbuilder(args):
 22 |   if "unstructured" in args.func or args.func == 'block':
 23 |     block_args = {}
 24 |     if args.func == "block":
 25 |       assert args.block_row is not None and args.block_col is not None
 26 |       block_args['block_row'] = args.block_row
 27 |       block_args['block_col'] = args.block_col
 28 |       if args.ln is not None:
 29 |         # use fro by default
 30 |         block_args['n'] = args.ln
 31 |     return {**block_args, **{
 32 |       "amount": args.amount
 33 |     }}
 34 |   else:
 35 |     ret = {
 36 |       "amount": args.amount,
 37 |     }
 38 |     if args.func != "ln_smart_structured":
 39 |       ret["dim"] = args.dim
 40 |     if "ln" in args.func:
 41 |       ret = {**ret, **{
 42 |         "n": args.ln
 43 |       }}
 44 |     return ret
 45 | 
 46 | def isInt(s):
 47 |   try:
 48 |     int(s)
 49 |     return True
 50 |   except ValueError:
 51 |     return False
 52 | 
 53 | def norm_converter(ln: str):
 54 |   if isInt(ln):
 55 |     return int(ln)
 56 |   elif "inf" in ln:
 57 |     return float(ln)
 58 |   else:
 59 |     return ln
 60 | 
 61 | if __name__ == "__main__":
 62 |   from ..model import SwiftBERT
 63 |   from src.inspector.get_sparsity import show
 64 |   from ..utils import set_random
 65 | 
 66 |   parser = argparse.ArgumentParser()
 67 |   parser.add_argument("--func", type=str)
 68 |   parser.add_argument("--global", dest='glob', action='store_true')
 69 |   parser.add_argument("--amount", type=float, default=0.5)
 70 |   parser.add_argument("--deit_model_name", type=Path, required=True)
 71 |   parser.add_argument("--output_dir", type=Path, default='./results/playground/torch_pruned/')
 72 |   parser.add_argument("--ln", type=str, default=None)
 73 |   parser.add_argument("--dim", type=int, default=None)
 74 |   parser.add_argument("--block_row", type=int, default=None)
 75 |   parser.add_argument("--block_col", type=int, default=None)
 76 |   parser.add_argument("--seed", type=int, default=12345)
 77 |   parser.add_argument("--hybrid", action='store_true', help='It overwrites func, global & ln options')
 78 |   
 79 |   args = parser.parse_args()
 80 |   # python -m src.pytorch_prune.pruner --deit_model_name ./results/playground/final
 81 |   # python -m src.pytorch_prune.pruner --deit_model_name ./results/AdsSwiftBERT/final/ --func random_unstructured --amount 0.5 --output_dir ./results/AdsSwiftBERT/random_unstructured_0.5
 82 | 
 83 |   set_random(args.seed)
 84 |   if args.ln is not None:
 85 |     args.ln = norm_converter(args.ln)
 86 | 
 87 |   model = SwiftBERT.from_pretrained(args.deit_model_name)
 88 | 
 89 |   if args.hybrid:
 90 |     for name, module in model.named_modules():
 91 |       if is_encoder(name, module):
 92 |         if "attention" in name:
 93 |           block_pruning(module, 'weight', amount=args.amount, block_row=args.block_row, block_col=args.block_col, n='fro')
 94 |         elif "dense" in name:
 95 |           if args.dim is None:
 96 |             ln_smart_structured(module, 'weight', amount=args.amount, n=1)
 97 |           else:
 98 |             prune.ln_structured(module, 'weight', amount=args.amount, n=1, dim=args.dim)
 99 |         else:
100 |           assert 0
101 |         prune.remove(module, 'weight')
102 |   else:
103 |     assert args.func in [
104 |       "random_unstructured",
105 |       "l1_unstructured",
106 |       "random_structured",
107 |       "ln_structured",
108 |       "block",
109 |       "ln_smart_structured"
110 |     ]
111 | 
112 |     if args.glob:
113 |       assert "_structured" not in args.func
114 | 
115 |     if "_structured" in args.func:
116 |       if args.func != "ln_smart_structured":
117 |         assert args.dim is not None
118 |       assert not ("ln" in args.func and args.ln is None)
119 | 
120 |     # start!
121 |     if not args.glob:
122 |       for name, module in model.named_modules():
123 |         if is_encoder(name, module):
124 |           prune_mapping[args.func][0](module, 'weight', **argbuilder(args))
125 |           prune.remove(module, 'weight')
126 |     else:
127 |       parameters_to_prune = []
128 |       for name, module in model.named_modules():
129 |         if is_encoder(name, module):
130 |           parameters_to_prune.append((module, 'weight'))
131 |       prune.global_unstructured(
132 |         parameters_to_prune,
133 |         pruning_method=prune_mapping[args.func][1],
134 |         **argbuilder(args)
135 |       )
136 |       for name, module in model.named_modules():
137 |         if is_encoder(name, module):
138 |           prune.remove(module, 'weight')
139 | 
140 |   # check sparsity
141 |   show(model, skip_embedding=True, skip_layernorm=True, skip_bias=True)
142 | 
143 |   # export
144 |   model.save_pretrained(args.output_dir)
145 | 


--------------------------------------------------------------------------------
/deit_pruning/src/data.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import random
  3 | 
  4 | # The training data has ~300GB, loading directly into mem is almost impossible
  5 | # That's why we need an iterabledataset here
  6 | class AdIterableDataset(torch.utils.data.IterableDataset):
  7 |     def __init__(self, input_file, rids=False):
  8 |       super(AdIterableDataset).__init__()
  9 |       self.file = input_file
 10 | 
 11 |       # Transformers have already implemented IterableDatasetShard
 12 |       # So we don't need to handle it ourselves,
 13 |       # or the actual steps taken may be smaller than max_steps
 14 | 
 15 |       # if distributed:
 16 |       #   self.rank = torch.distributed.get_rank()
 17 |       #   self.world_size = torch.distributed.get_world_size()
 18 |       # else:
 19 |       #   self.rank = 0
 20 |       #   self.world_size = 1
 21 | 
 22 |       self.rids = rids
 23 | 
 24 |     def __iter__(self):
 25 |       # worker_info = torch.utils.data.get_worker_info()
 26 |       # num_workers = 1 if worker_info is None else worker_info.num_workers
 27 |       # local_worker_id = 0 if worker_info is None else worker_info.id
 28 |       
 29 |       # skip = self.world_size * num_workers
 30 |       # idx = self.rank * num_workers + local_worker_id
 31 |       # worker_id = self.rank * num_workers + local_worker_id
 32 | 
 33 |       with open(self.file, "r", encoding='utf-8') as reader:
 34 |         for entry in reader:
 35 |           # if idx % skip == worker_id:
 36 | 
 37 |           line = entry.rstrip("\n").split("\t")
 38 |           labels = torch.tensor([int(line[0])], dtype=torch.long) if self.rids else torch.tensor([float(line[0])], dtype=torch.float)
 39 |           input_ids = torch.tensor(list(map(int, line[1].split(" "))), dtype=torch.long)
 40 |           train_data = {'labels': labels, 'input_ids': input_ids}
 41 |           # idx = idx + 1
 42 |           yield train_data
 43 |           
 44 |           # else:
 45 |           #   idx = idx + 1
 46 |           #   continue
 47 | 
 48 | # work like transformers' tokenizer?
 49 | def get_token_att_ids(zero, one, token_ids, type_count=2):
 50 |     attention_mask = torch.min(token_ids, one)
 51 |     token_type_ids = torch.nn.functional.pad(torch.cumsum( torch.where(token_ids[:,0:-1] == 102, one, zero), dim=1), pad=(1,0), mode='constant', value=0)
 52 |     if type_count == 2:
 53 |         token_type_ids = torch.min(torch.mul(attention_mask, token_type_ids), one)
 54 |     else:
 55 |         token_type_ids = torch.min(torch.mul(attention_mask, token_type_ids), zero)
 56 |     return attention_mask, token_type_ids
 57 | 
 58 | # For small dataset (validset)
 59 | class AdDataset(torch.utils.data.Dataset):
 60 |   def __init__(self, input_file, model_structure="EarlyCrossModel", distributed=False, rids=False):
 61 |     assert model_structure == "EarlyCrossModel"
 62 |     assert distributed is False  # hasn't test distribute training yet
 63 |     # self.zero = torch.nn.parameter.Parameter(torch.tensor(0), requires_grad=False)
 64 |     # self.one = torch.nn.parameter.Parameter(torch.tensor(1), requires_grad=False)
 65 |     self.data = []
 66 |     with open(input_file, "r", encoding='utf-8') as reader:
 67 |       for entry in reader:
 68 |         line = entry.rstrip("\n").split("\t")
 69 |         labels = torch.tensor([int(line[0])], dtype=torch.long) if rids else torch.tensor([float(line[0])], dtype=torch.float)
 70 |         # labels = torch.tensor([int(line[0]), 1-int(line[0])], dtype=torch.long) if rids else torch.tensor([float(line[0]), 1-float(line[0])], dtype=torch.float)
 71 |         input_ids = torch.tensor(list(map(int, line[1].split(" "))), dtype=torch.long)
 72 |         # attention_mask, token_type_ids = get_token_att_ids(self.zero, self.one, input_ids.unsqueeze(0)) # TODO: optimize
 73 |         # train_data = [labels, input_ids, attention_mask[0], token_type_ids[0]]
 74 |         train_data = [labels, input_ids]
 75 |         self.data.append(train_data)
 76 |   
 77 |   def __getitem__(self, idx):
 78 |     # labels, input_ids, attention_mask, token_type_ids = self.data[idx]
 79 |     labels, input_ids = self.data[idx]
 80 | 
 81 |     return {
 82 |       'input_ids': input_ids,
 83 |       # 'token_type_ids': token_type_ids,
 84 |       # 'attention_mask': attention_mask,
 85 |       'labels': labels
 86 |     }
 87 | 
 88 |   def __len__(self):
 89 |     return len(self.data)
 90 | 
 91 | # Shuffling IterableDataset
 92 | class ShuffleDataset(torch.utils.data.IterableDataset):
 93 |   def __init__(self, dataset, buffer_size):
 94 |     super().__init__()
 95 |     self.dataset = dataset
 96 |     self.buffer_size = buffer_size
 97 | 
 98 |   def set_epoch(self, seed):
 99 |     random.seed(seed)
100 |       
101 |   def __iter__(self):
102 |     shufbuf = []
103 |     try:
104 |       dataset_iter = iter(self.dataset)
105 |       for i in range(self.buffer_size):
106 |         shufbuf.append(next(dataset_iter))
107 |     except:
108 |       self.buffer_size = len(shufbuf)
109 |         
110 |     try:
111 |       while True:
112 |         try:
113 |           item = next(dataset_iter)
114 |           evict_idx = random.randint(0, self.buffer_size - 1)
115 |           yield shufbuf[evict_idx]
116 |           shufbuf[evict_idx] = item
117 |         except StopIteration:
118 |           if len(shufbuf) > 0:
119 |             yield shufbuf.pop()
120 |           else:
121 |             break
122 |     except GeneratorExit:
123 |       pass
124 | 
125 | # get large dataset line count
126 | # saves memory
127 | def _make_gen(reader):
128 |   size = 1024 * 1024
129 |   b = reader(size)
130 |   while b:
131 |     yield b
132 |     b = reader(size)
133 | 
134 | def rawgencount(filename):
135 |     f = open(filename, 'rb')
136 |     f_gen = _make_gen(f.raw.read)
137 |     return sum(buf.count(b'\n') for buf in f_gen)
138 | 


--------------------------------------------------------------------------------
/experiments/D1207_tflite_quant_cnn_test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import re
  3 | import os
  4 | import sys
  5 | import subprocess
  6 | import re
  7 | 
  8 | sys.path.insert(0, f'{os.path.dirname(sys.argv[0])}/..')
  9 | from modeling.models.cnn_zoo import cnn_zoo_dict
 10 | from utils import tf2tflite_dir
 11 | 
 12 | class ADB:
 13 |     def __init__(self, serino):
 14 |         self.serino = serino
 15 |     
 16 |     def push(self, src, dst):
 17 |         subprocess.run(f'adb -s {self.serino} push {src} {dst}', shell=True)
 18 | 
 19 |     def pull(self, src, dst):
 20 |         subprocess.run(f'adb -s {self.serino} pull {src} {dst}', shell=True)
 21 | 
 22 |     def remove(self, dst):
 23 |         subprocess.run(f'adb -s {self.serino} shell rm {dst}', shell=True)
 24 | 
 25 |     def run_cmd(self, cmd):
 26 |         result = subprocess.check_output(f'adb -s {self.serino} shell {cmd}', shell=True).decode('utf-8')
 27 |         print(result)
 28 |         return result
 29 | 
 30 | class TfliteCnnTester:
 31 |     def __init__(self, adb: ADB, cnn_zoo_dict: dict, model_zoo_dir: str):
 32 |         self.adb = adb
 33 |         self.cnn_zoo_dict = cnn_zoo_dict
 34 |         self.tf_model_dir = os.path.join(model_zoo_dir, 'tf_model', 'quant_cnn_test')
 35 |         self.tflite_model_dir = os.path.join(model_zoo_dir, 'tflite_model', 'quant_cnn_test')
 36 | 
 37 |     def _get_tf_path(self, model_name) -> str:
 38 |         return os.path.join(self.tf_model_dir, model_name+'.tf')
 39 |     
 40 |     def _get_fp32_tflite_path(self, model_name) -> str:
 41 |         return os.path.join(self.tflite_model_dir, 'fp32', model_name+'.tflite')
 42 | 
 43 |     def _get_int8_tflite_path(self, model_name) -> str:
 44 |         return os.path.join(self.tflite_model_dir, 'int8', model_name+'_quant_int8.tflite')
 45 | 
 46 |     def _export_tf(self, ):
 47 |         print('===== Exporting TF Saved Model =====')
 48 |         for model_name, generator_func in self.cnn_zoo_dict.items():
 49 |             model = generator_func() 
 50 |             model.save(self._get_tf_path(model_name))
 51 | 
 52 |     def _convert(self, ):
 53 |         print('===== Converting TFLite =====')
 54 |         tf2tflite_dir(self.tf_model_dir, os.path.join(self.tflite_model_dir, 'fp32'), 'None')
 55 | 
 56 |     def _quantize(self, ):
 57 |         print('===== Quantizing =====')
 58 |         tf2tflite_dir(self.tf_model_dir, os.path.join(self.tflite_model_dir, 'int8'), 'int8')
 59 | 
 60 |     def _fetch_latency(self, text: str, target='cpu_fp32'):
 61 |         if target in ['cpu_fp32', 'cpu_int8']:
 62 |             match = re.findall(r'avg=\d+\.\d+|avg=\d+', text)[-1]
 63 |             return float(match[len('avg='): ]) / 1000
 64 |         else:
 65 |             match = re.findall(r'Total time - \d+\.\d+ms|Total time - \d+ms', text)[-1]
 66 |             return float(match[len('Total time - '): -len('ms')])
 67 | 
 68 |     def _benchmark_single(self, model_path, target='cpu_fp32'):
 69 |         assert target in ['cpu_fp32', 'cpu_int8', 'gpu_fp32', 'gpu_fp16']
 70 |         file_name = os.path.basename(model_path)
 71 |         dst_path = f'/sdcard/{file_name}'   
 72 |         avg_ms = 0.0
 73 |         self.adb.push(model_path, f'/sdcard/{file_name}')
 74 | 
 75 |         try:
 76 |             if target in ['cpu_fp32', 'cpu_int8']:
 77 |                 output_text = self.adb.run_cmd(
 78 |                     f'taskset 70 /data/local/tmp/benchmark_model_plus_flex_r27 --graph={dst_path} --num_runs=30 --warmup_runs=10 --use_xnnpack=false --num_threads=1')
 79 |             else:
 80 |                 output_text = self.adb.run_cmd(f'/data/local/tmp/performance_profiling_plus_f32 {dst_path} {"F32" if target == "gpu_fp32" else "F16"}')
 81 |         except:
 82 |             pass
 83 |         
 84 |         self.adb.remove(dst_path)
 85 |         avg_ms = self._fetch_latency(output_text, target)
 86 |         return avg_ms
 87 | 
 88 |     def _benchmark(self, ):
 89 |         print('===== Benchmarking =====')
 90 |         name_list = list(self.cnn_zoo_dict.keys())
 91 |         result_dict = {}
 92 |         for model_name in name_list:
 93 |             result_dict[model_name] = {}
 94 | 
 95 |         for target in ['cpu_fp32', 'gpu_fp32', 'gpu_fp16']:
 96 |             for model_name in name_list:
 97 |                 tflite_path = self._get_fp32_tflite_path(model_name)
 98 |                 avg_ms = self._benchmark_single(tflite_path, target)
 99 |                 result_dict[model_name][target] = round(avg_ms, 2 if target == 'cpu_fp32' else 5)
100 |         for model_name in name_list:
101 |             tflite_path = self._get_int8_tflite_path(model_name)
102 |             avg_ms = self._benchmark_single(tflite_path, 'cpu_int8')
103 |             result_dict[model_name]['cpu_int8'] = round(avg_ms, 2)
104 | 
105 |         print('===============================')
106 |         print('          SUMMARY')
107 |         print('===============================')
108 |         print(*name_list)
109 |         for target in ['cpu_fp32', 'cpu_int8', 'gpu_fp32', 'gpu_fp16']:
110 |             print(target, *[result_dict[k][target] for k in name_list])
111 | 
112 |     def run(self, ):
113 |         self._export_tf()
114 |         self._convert()
115 |         self._quantize()
116 |         self._benchmark()
117 | 
118 | 
119 | def main():
120 |     parser = argparse.ArgumentParser()
121 |     parser.add_argument('--model_zoo_dir', default='models', help='root dir to save tf and tflite models')
122 |     parser.add_argument('--serial_number', default='98281FFAZ009SV', help='phone serial number')
123 |     args = parser.parse_args()
124 | 
125 |     adb = ADB(args.serial_number)
126 |     tflite_cnn_tester = TfliteCnnTester(adb, cnn_zoo_dict, args.model_zoo_dir)
127 |     tflite_cnn_tester.run()
128 | 
129 | if __name__ == '__main__':
130 |     main()


--------------------------------------------------------------------------------
/deit_pruning/src/trainer.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from transformers import Trainer
  3 | import torch.nn as nn
  4 | import torch
  5 | from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
  6 | from nn_pruning.sparse_trainer import SparseTrainer
  7 | from data import get_token_att_ids
  8 | from utils import get_distil_loss
  9 | 
 10 | 
 11 | @dataclass
 12 | class DistilTrainingArguments:
 13 |     teacher_model: torch.nn.Module
 14 |     distil_temperature: float
 15 |     alpha_distil: float 
 16 | 
 17 | 
 18 | class TrainerWithTokenizer(Trainer):
 19 |     def __init__(self, *args, **kwargs):
 20 |         Trainer.__init__(self, *args, **kwargs)
 21 |         self.zero = torch.nn.parameter.Parameter(
 22 |             torch.tensor(0), requires_grad=False)
 23 |         self.one = torch.nn.parameter.Parameter(
 24 |             torch.tensor(1), requires_grad=False)
 25 | 
 26 |     def training_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]]) -> torch.Tensor:
 27 |         attention_mask, token_type_ids = get_token_att_ids(
 28 |             self.zero, self.one, inputs['input_ids'])
 29 |        # print('train with tokenizer',inputs['input_ids'])
 30 |         inputs['attention_mask'] = attention_mask
 31 |         inputs['token_type_ids'] = token_type_ids
 32 | 
 33 |         return super().training_step(model, inputs)
 34 | 
 35 |     def prediction_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]],
 36 |                         prediction_loss_only: bool, ignore_keys: Optional[List[str]] = None,) \
 37 |             -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
 38 |         attention_mask, token_type_ids = get_token_att_ids(
 39 |             self.zero, self.one, inputs['input_ids'])
 40 | 
 41 |         inputs['attention_mask'] = attention_mask
 42 |         inputs['token_type_ids'] = token_type_ids
 43 |     #  print('predict with tokenizer',inputs['input_ids'])
 44 | 
 45 |         return super().prediction_step(model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys)
 46 | 
 47 | 
 48 | class SparseWithoutTeacherTrainer(SparseTrainer, Trainer):
 49 |     def __init__(self, sparse_args, *args, **kwargs):
 50 |         Trainer.__init__(self, *args, **kwargs)
 51 |         SparseTrainer.__init__(self, sparse_args)
 52 | 
 53 |     def compute_loss(self, model, inputs, return_outputs=False):
 54 |         """
 55 |         We override the default loss in SparseTrainer because it throws an 
 56 |         error when run without distillation
 57 |         """
 58 |         outputs = model(**inputs)
 59 | 
 60 |         # Save past state if it exists
 61 |         # TODO: this needs to be fixed and made cleaner later.
 62 |         if self.args.past_index >= 0:
 63 |             self._past = outputs[self.args.past_index]
 64 | 
 65 |         # We don't use .loss here since the model may return tuples instead of ModelOutput.
 66 |         loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0]
 67 |         self.metrics["ce_loss"] += float(loss.mean())
 68 |         self.loss_counter += 1
 69 |         return (loss, outputs) if return_outputs else loss
 70 | 
 71 | 
 72 | class SparserWithTeacherTrainer(SparseTrainer, Trainer):
 73 |     def __init__(self, sparse_args, distil_args: DistilTrainingArguments, *args, **kwargs):
 74 |         Trainer.__init__(self, *args, **kwargs)
 75 |         SparseTrainer.__init__(self, sparse_args)
 76 |         self.teacher_model = distil_args.teacher_model
 77 |         self.alpha_distil = distil_args.alpha_distil
 78 |         self.distil_temperature = distil_args.distil_temperature
 79 |     
 80 |     def compute_loss(self, model, inputs, return_outputs=False):
 81 |         with torch.no_grad():
 82 |            teacher_logits = self.teacher_model(**inputs).logits
 83 |         outputs = model(**inputs)
 84 |         # Save past state if it exists
 85 |         # TODO: this needs to be fixed and made cleaner later.
 86 |         if self.args.past_index >= 0:
 87 |             self._past = outputs[self.args.past_index]
 88 | 
 89 |         # We don't use .loss here since the model may return tuples instead of ModelOutput.
 90 |         loss = outputs['loss'] if isinstance(outputs, dict) else outputs[0]
 91 |         self.metrics['ce_loss'] += float(loss.mean())
 92 |         distil_loss = get_distil_loss(outputs.logits, teacher_logits, self.distil_temperature, 'kldiv')
 93 |         self.metrics['distil_loss'] += float(distil_loss)
 94 |         loss = (1 - self.alpha_distil) * loss + self.alpha_distil * distil_loss
 95 |         self.loss_counter += 1
 96 | 
 97 |         return (loss, outputs) if return_outputs else loss
 98 | 
 99 | 
100 | class TrainerWithTeacher(Trainer):
101 |     def __init__(self, distil_args: DistilTrainingArguments, *args, **kwargs):
102 |         Trainer.__init__(self, *args, **kwargs)
103 |         self.teacher_model = distil_args.teacher_model
104 |         self.alpha_distil = distil_args.alpha_distil
105 |         self.distil_temperature = distil_args.distil_temperature
106 | 
107 |     def compute_loss(self, model, inputs, return_outputs=False):
108 |         with torch.no_grad():
109 |            teacher_logits = self.teacher_model(**inputs).logits
110 |         outputs = model(**inputs)
111 |         # Save past state if it exists
112 |         # TODO: this needs to be fixed and made cleaner later.
113 |         if self.args.past_index >= 0:
114 |             self._past = outputs[self.args.past_index]
115 | 
116 |         # We don't use .loss here since the model may return tuples instead of ModelOutput.
117 |         loss = outputs['loss'] if isinstance(outputs, dict) else outputs[0]
118 |         distil_loss = get_distil_loss(outputs.logits, teacher_logits, self.distil_temperature, 'kldiv')
119 |         loss = (1 - self.alpha_distil) * loss + self.alpha_distil * distil_loss
120 | 
121 |         return (loss, outputs) if return_outputs else loss


--------------------------------------------------------------------------------
/draw.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | 
  5 | def draw_macs_accuracy_curve():
  6 |     @dataclass
  7 |     class ModelInfo:
  8 |         b_macs : float = 0.0
  9 |         acc : float =  0.0
 10 |         m_params : float = 0.0
 11 | 
 12 | 
 13 |     modelinfo_dict = dict(
 14 |         deit_base = ModelInfo(17.7, 81.8),
 15 |         deit_small = ModelInfo(4.64, 79.9),
 16 |         deit_tiny = ModelInfo(1.28, 72.2),
 17 |         t2t_vit_14 = ModelInfo(4.8, 81.5),
 18 |         t2t_vit_12 = ModelInfo(1.8, 76.5),
 19 |         t2t_vit_10 = ModelInfo(1.5, 75.2),
 20 |         t2t_vit_7 = ModelInfo(1.1, 71.7),
 21 |         swin_base = ModelInfo(15.4, 83.5),
 22 |         swin_small = ModelInfo(8.7, 83),
 23 |         swin_tiny = ModelInfo(4.5, 81.3),
 24 |         autoformer_base = ModelInfo(11, 82.4),
 25 |         autoformer_small = ModelInfo(5.1, 81.7),
 26 |         autoformer_tiny = ModelInfo(1.3, 74.7),
 27 |         efficientnet_b7 = ModelInfo(37, 84.3),
 28 |         efficientnet_b6 = ModelInfo(19, 84),
 29 |         efficientnet_b5 = ModelInfo(9.9, 83.6),
 30 |         efficientnet_b4 = ModelInfo(4.2, 82.9),
 31 |         efficientnet_b3 = ModelInfo(1.8, 81.6),
 32 |         efficientnet_b2 = ModelInfo(1.0, 80.1),
 33 |         efficientnet_b1 = ModelInfo(0.7, 79.1),
 34 |         efficientnet_b0 = ModelInfo(0.39, 77.1),
 35 |         resnet_152 = ModelInfo(11, 77.8),
 36 |         resnet_101 = ModelInfo(7.9, 77.4),
 37 |         resnet_50 = ModelInfo(4.1, 76),
 38 |         mobilenet_v2 = ModelInfo(0.3, 72),
 39 |         mobilenet_v3_large = ModelInfo(0.22, 75.6),
 40 |         proxyless_mobile = ModelInfo(0.32, 74.6)
 41 |     )
 42 | 
 43 | 
 44 |     deit_list = ['deit_tiny', 'deit_small', 'deit_base']
 45 |     t2t_vit_list = ['t2t_vit_7', 't2t_vit_10', 't2t_vit_12', 't2t_vit_14']
 46 |     swin_list = ['swin_tiny', 'swin_small', 'swin_base']
 47 |     autoformer_list = ['autoformer_base', 'autoformer_small', 'autoformer_tiny']
 48 |     efficientnet_list = [f'efficientnet_b{v}' for v in range(0, 8)]
 49 |     resnet_list = ['resnet_50', 'resnet_101', 'resnet_152']
 50 |     mobilenet_list = ['mobilenet_v2', 'mobilenet_v3_large']
 51 |     proxyless_mobile_list = ['proxyless_mobile']
 52 | 
 53 |     plt.plot([modelinfo_dict[x].b_macs for x in deit_list], 
 54 |             [modelinfo_dict[x].acc for x in deit_list],
 55 |             label='deit', c='#0099ff', marker='^')
 56 |     plt.plot([modelinfo_dict[x].b_macs for x in t2t_vit_list], 
 57 |             [modelinfo_dict[x].acc for x in t2t_vit_list],
 58 |             label='t2t_vit', c='#4d4dff', marker='^')
 59 |     plt.plot([modelinfo_dict[x].b_macs for x in swin_list], 
 60 |             [modelinfo_dict[x].acc for x in swin_list], 
 61 |             label='swin transformer', c='#944dff', marker='^')
 62 |     plt.plot([modelinfo_dict[x].b_macs for x in autoformer_list], 
 63 |             [modelinfo_dict[x].acc for x in autoformer_list], 
 64 |             label='autoformer', c='#0099cc', marker='^')
 65 |         
 66 |     plt.plot([modelinfo_dict[x].b_macs for x in efficientnet_list], 
 67 |             [modelinfo_dict[x].acc for x in efficientnet_list], 
 68 |             label='efficientnet', c='#cc3300', marker='o')
 69 |     plt.plot([modelinfo_dict[x].b_macs for x in resnet_list], 
 70 |             [modelinfo_dict[x].acc for x in resnet_list], 
 71 |             label='resnet', c='#e67300', marker='o')
 72 |     plt.plot([modelinfo_dict[x].b_macs for x in mobilenet_list], 
 73 |             [modelinfo_dict[x].acc for x in mobilenet_list], 
 74 |             label='mobilenet', c='#ffaa00', marker='o')
 75 |     plt.plot([modelinfo_dict[x].b_macs for x in proxyless_mobile_list], 
 76 |             [modelinfo_dict[x].acc for x in proxyless_mobile_list], 
 77 |             label='proxyless_mobile', c='#ff4d4d', marker='o')
 78 | 
 79 |     plt.title('Model MACs and Accuracy')
 80 |     plt.xlabel('Billion MACs')
 81 |     plt.ylabel('Accuracy (%)')
 82 |     plt.legend()
 83 |     plt.savefig('tmp.png')
 84 | 
 85 | 
 86 | def draw_are16heads_pruned_heads():
 87 |     def pruned_head_str_to_dict(str: str):
 88 |         layers = str.split(' ')
 89 |         rv = {}
 90 |         for item in layers:
 91 |             key, value = item.split(':')
 92 |             key = int(key)
 93 |             value = [int(x) for x in value.split(',')]
 94 |             rv[key] = value
 95 |         return rv
 96 | 
 97 |     deit_base_heads72 = pruned_head_str_to_dict(
 98 |         '1:1,2,3,4,5,6,8,9,10,12 2:2,3,4,5,6,7,9,10,11,12 12:2,3,4,6,7,8 3:1,2,4,5,6,9,10,11,12 4:5,6,8,10,11,12 5:11,4,6 7:1,2,11 11:1,2,4,5,6,8,10,12 6:9,11,5,7 8:4,1,12,5 10:1,4,6,9,11 9:12,11,4,7'
 99 |     )
100 | 
101 |     deit_small_head36 = pruned_head_str_to_dict(
102 |         '12:1,2,3,4 1:1,2,4,5 2:2,3,4 11:1,3,4,5 4:4 10:1,2,6 3:1,2,5,6 9:1,2,6 5:4 6:2,5,6 7:1,2,4 8:2,4,6'
103 |     )
104 |     deit_tiny_head18 = pruned_head_str_to_dict(
105 |         '1:2,3 12:1 2:1,3 6:1,3 3:1,2 11:1,2 7:3 10:2 9:2 4:2,3 5:3 8:1'
106 |     )
107 | 
108 |     # head_mask = np.ones(shape=[12,3])
109 |     # for k, values in deit_tiny_head18.items():
110 |     #     for v in values:
111 |     #         head_mask[k - 1, v - 1] = 0
112 |     # plt.imshow(head_mask)
113 |     # plt.title('DeiT-Tiny prune 18 (50%) heads')
114 |     # plt.ylabel('Layer number')
115 |     # plt.xlabel('Head number')
116 |     # plt.savefig('deit_tiny_prune_heads18.png')
117 | 
118 |     # head_mask = np.ones(shape=[12,6])
119 |     # for k, values in deit_small_head36.items():
120 |     #     for v in values:
121 |     #         head_mask[k - 1, v - 1] = 0
122 |     # plt.imshow(head_mask)
123 |     # plt.title('DeiT-Small prune 36 (50%) heads')
124 |     # plt.ylabel('Layer number')
125 |     # plt.xlabel('Head number')
126 |     # plt.savefig('deit_small_prune_heads36.png')
127 | 
128 |     head_mask = np.ones(shape=[12,12])
129 |     for k, values in deit_base_heads72.items():
130 |         for v in values:
131 |             head_mask[k - 1, v - 1] = 0
132 |     plt.imshow(head_mask)
133 |     plt.title('DeiT-Base prune 72 (50%) heads')
134 |     plt.ylabel('Layer number')
135 |     plt.xlabel('Head number')
136 |     plt.savefig('deit_base_prune_heads72.png')
137 | 
138 | draw_are16heads_pruned_heads()


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/tests/test_patch.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from unittest import TestCase
  3 | 
  4 | from transformers import BertConfig, BertForQuestionAnswering
  5 | 
  6 | from nn_pruning.model_structure import BertStructure
  7 | from nn_pruning.modules.masked_nn import (
  8 |     ChannelPruningModulePatcher,
  9 |     JointPruningModulePatcher,
 10 |     LinearPruningArgs,
 11 |     LinearPruningModulePatcher,
 12 |     LinearPruningArgs,
 13 | )
 14 | from nn_pruning.training_patcher import LinearModelPatcher, PatcherContext
 15 | 
 16 | 
 17 | class TestFun(TestCase):
 18 |     MODEL_STRUCTURE = BertStructure
 19 |     def test_base(self):
 20 |         config = BertConfig.from_pretrained("bert-base-uncased")
 21 |         model = BertForQuestionAnswering(config)
 22 | 
 23 |         patcher = LinearModelPatcher({}, self.MODEL_STRUCTURE)
 24 |         layers = patcher.get_patchable_layers(model)
 25 |         # for regexp, layers in layers.items():
 26 |         #    print(regexp)
 27 | 
 28 |     def test_patch_module_independent_parameters(self):
 29 |         config = BertConfig.from_pretrained("bert-base-uncased")
 30 |         model = BertForQuestionAnswering(config)
 31 | 
 32 |         parameters = LinearPruningArgs(
 33 |             method="topK",
 34 |             submethod="default",
 35 |             ampere_method="disabled",
 36 |             block_rows=32,
 37 |             block_cols=32,
 38 |             min_elements=0.005,
 39 |         )
 40 | 
 41 |         context = PatcherContext()
 42 | 
 43 |         p = LinearPruningModulePatcher(context, parameters, self.MODEL_STRUCTURE)
 44 | 
 45 |         module_patchers = dict(query=p, key=p, value=p, att_dense=p, interm_dense=p, output_dense=p)
 46 | 
 47 |         patcher = LinearModelPatcher(module_patchers, self.MODEL_STRUCTURE)
 48 |         patcher.patch(model)
 49 | 
 50 |         self.assertEqual(patcher.stats["patched"], 72)
 51 |         key_sizes = {k: len(v) for k, v in context.context_modules.items()}
 52 | 
 53 |         self.assertEqual(key_sizes, {"mask": 72})
 54 | 
 55 |     def test_patch_module_ampere(self):
 56 |         config = BertConfig.from_pretrained("bert-base-uncased")
 57 |         model = BertForQuestionAnswering(config)
 58 | 
 59 |         parameters = LinearPruningArgs(
 60 |             method="topK",
 61 |             submethod="default",
 62 |             ampere_method="annealing",
 63 |             block_rows=32,
 64 |             block_cols=32,
 65 |             min_elements=0.005,
 66 |         )
 67 | 
 68 |         context = PatcherContext()
 69 | 
 70 |         p = LinearPruningModulePatcher(context, parameters, self.MODEL_STRUCTURE)
 71 | 
 72 |         module_patchers = dict(query=p, key=p, value=p, att_dense=p, interm_dense=p, output_dense=p)
 73 | 
 74 |         patcher = LinearModelPatcher(module_patchers, self.MODEL_STRUCTURE)
 75 |         patcher.patch(model)
 76 | 
 77 |         self.assertEqual(patcher.stats["patched"], 72)
 78 |         key_sizes = {k: len(v) for k, v in context.context_modules.items()}
 79 | 
 80 |         self.assertEqual(key_sizes, {"ampere_mask": 72, "mask": 72})
 81 | 
 82 |     def test_patch_module_tied_attention(self):
 83 |         config = BertConfig.from_pretrained("bert-base-uncased")
 84 |         model = BertForQuestionAnswering(config)
 85 | 
 86 |         parameters = LinearPruningArgs(
 87 |             method="topK",
 88 |             submethod="default",
 89 |             ampere_method="annealing",
 90 |             block_rows=32,
 91 |             block_cols=32,
 92 |             min_elements=0.005,
 93 |         )
 94 | 
 95 |         context = PatcherContext()
 96 | 
 97 |         p_attention = JointPruningModulePatcher(context, parameters, self.MODEL_STRUCTURE, "attention")
 98 |         p_dense = LinearPruningModulePatcher(context, parameters, self.MODEL_STRUCTURE)
 99 | 
100 |         module_patchers = dict(
101 |             query=p_attention,
102 |             key=p_attention,
103 |             value=p_attention,
104 |             att_dense=p_dense,
105 |             interm_dense=p_dense,
106 |             output_dense=p_dense,
107 |         )
108 | 
109 |         patcher = LinearModelPatcher(module_patchers, self.MODEL_STRUCTURE)
110 |         patcher.patch(model)
111 | 
112 |         self.assertEqual(patcher.stats["patched"], 72)
113 |         key_sizes = {k: len(v) for k, v in context.context_modules.items()}
114 | 
115 |         self.assertEqual(key_sizes, {"ampere_mask": 72, "mask": 48})
116 | 
117 |     def test_patch_tiedattention_line_pruning(self):
118 |         config = BertConfig.from_pretrained("bert-base-uncased")
119 |         model = BertForQuestionAnswering(config)
120 | 
121 |         parameters_attention = LinearPruningArgs(
122 |             method="topK",
123 |             submethod="default",
124 |             ampere_method="annealing",
125 |             block_rows=32,
126 |             block_cols=32,
127 |             min_elements=0.005,
128 |         )
129 | 
130 |         parameters_dense = LinearPruningArgs(
131 |             method="topK", submethod="1d", ampere_method="annealing", block_rows=32, block_cols=32, min_elements=0.005
132 |         )
133 | 
134 |         context = PatcherContext()
135 | 
136 |         p_attention = JointPruningModulePatcher(context, parameters_attention, self.MODEL_STRUCTURE, suffix=".attention")
137 |         p_dense = ChannelPruningModulePatcher(context, parameters_dense, self.MODEL_STRUCTURE, suffix="dense")
138 | 
139 |         module_patchers = dict(
140 |             query=p_attention,
141 |             key=p_attention,
142 |             value=p_attention,
143 |             att_dense=p_dense,
144 |             interm_dense=p_dense,
145 |             output_dense=p_dense,
146 |         )
147 | 
148 |         patcher = LinearModelPatcher(module_patchers, self.MODEL_STRUCTURE)
149 |         patcher.patch(model)
150 | 
151 |         self.assertEqual(patcher.stats["patched"], 72)
152 |         key_sizes = {k: len(v) for k, v in context.context_modules.items()}
153 | 
154 |         for k, v in key_sizes.items():
155 |             print(k, v)
156 | 
157 |         for k, v in context.context_modules.items():
158 |             print(k, v)
159 |         self.assertEqual(key_sizes, {"ampere_mask": 72, "mask": 12, "mask_1d": 48})
160 | 
161 | 
162 | if __name__ == "__main__":
163 |     unittest.main()
164 | 


--------------------------------------------------------------------------------
/deit_pruning/vendor/nn_pruning_v1/nn_pruning/modules/nonorm.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | from nn_pruning.model_patcher import ModelPatcher
  4 | 
  5 | 
  6 | class Layer2NoNorm(nn.Module):
  7 |     # There are two ways to specify how the module will move progressively from a LayerNorm to a NoNorm
  8 |     # If you give a non-None schedule_callback, steps and start_delta won't be used.
  9 |     # It must be a function that returns a dictionary containing at least two keys:
 10 |     #  - mix : moving from 1.0 to 0.0 , it is the lerp factor between LayerNorm and NoNorm: 1.0 -> LayerNorm, 0.0 -> NoNorm
 11 |     #  - delta : moving from 0.99 to 1.0 for example, it is the accumulator exponential decay,
 12 |     #   the higher the longer the period it smooth the mean/variance accumulator
 13 |     # If you don't specify a schedule_callback, each call to forward will count as a step, and in 'steps' steps
 14 |     # it will move to a LayerNorm to a NoNorm
 15 | 
 16 |     def __init__(self, layerNorm,
 17 |                  steps = 5000,
 18 |                  start_delta = 0.99,
 19 |                  schedule_callback = None):
 20 |         super().__init__()
 21 |         self.normalized_shape = layerNorm.normalized_shape
 22 |         self.eps = layerNorm.eps
 23 |         self.elementwise_affine = layerNorm.elementwise_affine
 24 |         assert(self.elementwise_affine)
 25 |         self.weight = nn.Parameter(layerNorm.weight.detach().clone())
 26 |         self.bias = nn.Parameter(layerNorm.bias.detach().clone())
 27 |         # Accumulators are for mean and std, and accumulator normalization factor
 28 |         self.schedule_callback = schedule_callback
 29 | 
 30 |         if self.schedule_callback is None:
 31 |             self.steps = steps
 32 |             self.delta = start_delta
 33 |             self.final_delta = 1.0
 34 |             self.delta_step = (self.final_delta - self.delta) / self.steps
 35 |             self.mix_step = 1 / self.steps
 36 |             self.mix = 1.0
 37 |         else:
 38 |             self.steps = None
 39 |             self.delta_step = None
 40 |             self.mix_step = None
 41 |             self.delta = None
 42 |             self.final_delta = None
 43 |             self.mix = None
 44 | 
 45 |         self.register_buffer("accumulator", torch.zeros(3, device=layerNorm.weight.device))
 46 | 
 47 |     def forward(self, batch):
 48 |         accumulator = self.accumulator.clone()
 49 | 
 50 |         if self.schedule_callback is not None:
 51 |             d = self.schedule_callback()
 52 |             mix = d["mix"]
 53 |             delta = d["delta"]
 54 |         else:
 55 |             if self.training:
 56 |                 mix = self.mix
 57 |                 delta = self.delta
 58 |             else:
 59 |                 mix = 0
 60 |                 delta = 1.0
 61 | 
 62 |         if mix == 0 and delta == 1.0:
 63 |             batch_mean = accumulator[0] / accumulator[2]
 64 |             batch_var = accumulator[1] / accumulator[2]
 65 |         else:
 66 |             batch_mean = batch.mean(-1, keepdim=True)
 67 |             batch_var = batch.var(-1, unbiased=False, keepdim=True)
 68 | 
 69 |             if self.training:
 70 |                 one = torch.tensor(1.0, device=batch_var.device)
 71 |                 new_acc = torch.stack([batch_mean.mean(), batch_var.mean(), one])
 72 |                 accumulator = torch.lerp(new_acc, accumulator, delta)
 73 | 
 74 |             batch_mean = torch.lerp(accumulator[0] / accumulator[2], batch_mean, mix)
 75 |             batch_var = torch.lerp(accumulator[1] / accumulator[2], batch_var, mix)
 76 | 
 77 |         ret = (batch - batch_mean) / (batch_var + self.eps).sqrt()
 78 |         ret = ret * self.weight + self.bias
 79 | 
 80 |         if self.training:
 81 |             self.accumulator = accumulator.detach()
 82 |             if self.schedule_callback is None:
 83 |                 self.mix = max(0.0, self.mix - self.mix_step)
 84 |                 self.delta = min(self.final_delta, self.delta + self.delta_step)
 85 | 
 86 |         return ret
 87 | 
 88 |     def compile(self):
 89 |         accumulator = self.accumulator
 90 |         mean = accumulator[0] / accumulator[2]
 91 |         var = accumulator[1] / accumulator[2]
 92 | 
 93 |         inv_var = 1.0 / (var + self.eps).sqrt()
 94 | 
 95 |         weight = self.weight * inv_var
 96 |         bias = - mean * inv_var * self.weight + self.bias
 97 | 
 98 |         return NoNorm(weight.detach().clone(), bias.detach().clone())
 99 | 
100 | class Layer2NoNormPatcher(ModelPatcher):
101 |     def __init__(self,
102 |                  steps = 5000,
103 |                  start_delta = 0.99,
104 |                  schedule_callback = None):
105 |         super().__init__(all_match=True)
106 |         self.steps = steps
107 |         self.start_delta = start_delta
108 |         self.schedule_callback = schedule_callback
109 | 
110 |     def is_patchable(self, module_name, module, raiseError):
111 |         return isinstance(module, nn.LayerNorm)
112 | 
113 |     def new_child_module(self, child_module_name, child_module, patch_info):
114 |         return Layer2NoNorm(child_module,
115 |                             steps = self.steps,
116 |                             start_delta = self.start_delta,
117 |                             schedule_callback = self.schedule_callback)
118 | 
119 | class NoNorm(nn.Module):
120 |     def __init__(self, weight, bias):
121 |         super().__init__()
122 |         self.weight = nn.Parameter(weight)
123 |         self.bias = nn.Parameter(bias)
124 | 
125 |     def forward(self, batch):
126 |         return batch * self.weight + self.bias
127 | 
128 | class NoNormCompiler(ModelPatcher):
129 |     def __init__(self):
130 |         super().__init__(all_match=True)
131 | 
132 |     def is_patchable(self, module_name, module, raiseError):
133 |         return isinstance(module, Layer2NoNorm)
134 | 
135 |     def new_child_module(self, child_module_name, child_module, patch_info):
136 |         return child_module.compile()
137 | 
138 | class NoNormPatcher(ModelPatcher):
139 |     def __init__(self):
140 |         super().__init__(all_match=True)
141 | 
142 |     def is_patchable(self, module_name, module, raiseError):
143 |         return isinstance(module, nn.LayerNorm)
144 | 
145 |     def new_child_module(self, child_module_name, child_module, patch_info):
146 |         return NoNorm(child_module.weight.detach(), child_module.bias.detach())
147 | 


--------------------------------------------------------------------------------
/experiments/D1207_vino_quant_cnn_test.py:
--------------------------------------------------------------------------------
  1 | from tensorflow import keras
  2 | import os
  3 | import sys
  4 | import subprocess
  5 | import argparse
  6 | import re
  7 | import json
  8 | 
  9 | sys.path.insert(0, f'{os.path.dirname(sys.argv[0])}/..')
 10 | from utils import freeze_graph
 11 | from modeling.models.cnn_zoo import cnn_zoo_dict
 12 | from benchmark.openvino.vino_cli import openvino_benchmark
 13 | 
 14 | class PotConfigJson:
 15 |     def __init__(self, model_xml_path: str, dataset_path) -> None:
 16 |         model_name = os.path.splitext(os.path.basename(model_xml_path))[0]
 17 |         self.pot_config = {
 18 |             "model": {
 19 |                 "model_name": f"{model_name}",
 20 |                 "model": f"{model_xml_path}",
 21 |                 "weights": f"{model_xml_path.replace('.xml', '.bin')}"
 22 |             },
 23 |             "engine": {
 24 |                 "type": "simplified",
 25 |                 "data_source": f"{dataset_path}"
 26 |             },
 27 |             "compression": {
 28 |                 "target_device": "CPU",
 29 |                 "algorithms": [
 30 |                     {
 31 |                         "name": "DefaultQuantization",
 32 |                         "params": {
 33 |                             "preset": "performance",
 34 |                             "stat_subset_size": 3
 35 |                         }
 36 |                     }
 37 |                 ]
 38 |             }
 39 |         }   
 40 | 
 41 |     def dump(self, output_path):
 42 |         with open(output_path, 'w') as f:
 43 |             json.dump(self.pot_config, f, indent=4)
 44 |             f.write('\n')
 45 | 
 46 | class VinoCnnTester:
 47 |     def __init__(self, cnn_zoo_dict: dict, vino_model_dir: str, dataset_dir: str):
 48 |         self.src_model_dir = os.path.join(vino_model_dir, 'src_model', 'quant_cnn_test')
 49 |         self.ir_model_dir = os.path.join(vino_model_dir, 'ir', 'quant_cnn_test')
 50 |         self.dataset_dir = dataset_dir
 51 |         self.cnn_zoo_dict = cnn_zoo_dict
 52 | 
 53 |     def _get_src_pb_path(self, model_name) -> str:
 54 |         return os.path.join(self.src_model_dir, model_name+'.pb')
 55 | 
 56 |     def _get_fp32_ir_dir(self, model_name) -> str:
 57 |         return os.path.join(self.ir_model_dir, model_name, 'FP32')
 58 | 
 59 |     def _export_src(self, ):
 60 |         print('===== Exporting SRC =====')
 61 |         for model_name, generator_func in self.cnn_zoo_dict.items():
 62 |             model = generator_func()
 63 |             freeze_graph(keras_model=model, output_path=self._get_src_pb_path(model_name))
 64 | 
 65 |     def _convert(self, ):
 66 |         print('===== Converting SRC to OpenVINO IR =====')
 67 |         for model_name in self.cnn_zoo_dict.keys():
 68 |             input_path = self._get_src_pb_path(model_name)
 69 |             output_dir = self._get_fp32_ir_dir(model_name)
 70 |             subprocess.run(f'python $VINO_MO --input_model={input_path} --model_name={model_name} --output_dir={output_dir} --batch=1 --data_type=FP32', shell=True)
 71 | 
 72 |     def _quantize(self, ):
 73 |         print('==== Quantizing FP32IR to INT8 ====')
 74 |         for model_name in self.cnn_zoo_dict.keys():
 75 |             fp32_ir_dir = self._get_fp32_ir_dir(model_name)
 76 |             output_dir = fp32_ir_dir.replace('/FP32', '/FP32-INT8')
 77 |             if os.path.exists(output_dir):
 78 |                 subprocess.run(f'rm -r {output_dir}', shell=True)
 79 |             os.mkdir(output_dir)
 80 |             # save pot-config.json
 81 |             pot_config_json_path = os.path.join(output_dir, 'pot-config.json')
 82 |             pot_config = PotConfigJson(
 83 |                 model_xml_path=os.path.join(fp32_ir_dir, model_name+'.xml'),
 84 |                 dataset_path=self.dataset_dir
 85 |             )
 86 |             pot_config.dump(pot_config_json_path)
 87 |             # quant
 88 |             subprocess.run(f'python -m pot -c {pot_config_json_path} --direct-dump --output-dir={output_dir}', shell=True)
 89 |             # move file
 90 |             subprocess.run(f'mv {output_dir}/optimized/{model_name}.xml {output_dir}', shell=True)
 91 |             subprocess.run(f'mv {output_dir}/optimized/{model_name}.bin {output_dir}', shell=True)
 92 | 
 93 |     def _benchmark(self):
 94 |         print('====== Benchmarking model performance on CPU with 1 thread ======')
 95 |         latency_list_fp32 = []
 96 |         latency_list_int8 = []
 97 |         for model_name in self.cnn_zoo_dict.keys():
 98 |             xml_path_fp32 = os.path.join(self._get_fp32_ir_dir(model_name), model_name+'.xml')
 99 |             xml_path_int8 = xml_path_fp32.replace('/FP32/', '/FP32-INT8/')
100 |             latency_fp32 = 0.0
101 |             latency_int8 = 0.0
102 |             try:
103 |                 latency_fp32 = openvino_benchmark('$VINO_BENCHMARK_APP', xml_path_fp32, niter=30, num_threads=1, batch_size=1, csv_output_dir=os.path.dirname(xml_path_fp32), show_detail=False)
104 |                 latency_int8 = openvino_benchmark('$VINO_BENCHMARK_APP', xml_path_int8, niter=30, num_threads=1, batch_size=1, csv_output_dir=os.path.dirname(xml_path_int8), show_detail=False)
105 |             except:
106 |                 pass
107 |             latency_list_fp32.append(round(latency_fp32, 2))
108 |             latency_list_int8.append(round(latency_int8, 2))
109 | 
110 |         print('==================================================================================')
111 |         print('                                  SUMMARY')
112 |         print('==================================================================================')
113 | 
114 |         print(list(self.cnn_zoo_dict.keys()))
115 |         print('FP32 ms')
116 |         print(latency_list_fp32)
117 |         print('INT8 ms')
118 |         print(latency_list_int8)
119 | 
120 |     def run(self, ):
121 |         self._export_src()
122 |         self._convert()
123 |         self._quantize()
124 |         self._benchmark()
125 | 
126 | def main():
127 |     parser = argparse.ArgumentParser()
128 |     parser.add_argument('--vino_model_dir', required=True, type=str, help='vino model root dir to save SRC(tf, onnx) and IR model')
129 |     parser.add_argument('--dataset_dir', required=True, type=str, help='dataset dir to do quantization')
130 |     args = parser.parse_args()
131 | 
132 |     vino_cnn_tester = VinoCnnTester(cnn_zoo_dict, args.vino_model_dir, args.dataset_dir)
133 |     vino_cnn_tester.run()
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     main()


--------------------------------------------------------------------------------