├── models ├── __init__.py ├── ffn_importance_score_model.py ├── neck_importance_score_model.py └── block_importance_score_model.py ├── utils ├── __init__.py ├── ema_util.py ├── pred_utils.py └── loss_utils.py ├── run_distill.sh ├── cp_image.py ├── get_started.md ├── run_importance.sh ├── test_rank_kl.py ├── README.md ├── sample_sub_model.py ├── importance ├── kl2k │ ├── Deit_base_12_neck_768_kl_2k_192.txt │ ├── Deit_base_12_attn_768_kl_10_2k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_9_2k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_3_2k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_5_2k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_6_2k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_7_2k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_4_2k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_8_2k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_2_2k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_11_2k_importance_rank_multihead3.txt │ └── Deit_base_12_attn_768_kl_1_2k_importance_rank_multihead3.txt └── kl5k │ ├── Deit_base_12_attn_768_kl_0_5k_importance_rank_multihead3.txt │ ├── Deit_base_12_neck_768_kl_5k_192.txt │ ├── Deit_base_12_attn_768_kl_10_5k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_6_5k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_5_5k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_9_5k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_3_5k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_4_5k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_7_5k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_2_5k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_11_5k_importance_rank_multihead3.txt │ ├── Deit_base_12_attn_768_kl_8_5k_importance_rank_multihead3.txt │ └── Deit_base_12_attn_768_kl_1_5k_importance_rank_multihead3.txt ├── test_ffn_importance_score.py ├── test_attn_importance_score.py ├── test_neck_importance_score.py └── requirements.txt /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /run_distill.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python main_distill.py --dist-url 'tcp://127.0.0.1:12342' --dist-backend 'nccl' --multiprocessing-distributed --world-size 1 --rank 0 -j=64 --learning-rate 1e-3 --name Deit_tiny_distill --wd 1e-3 -b 256 --alpha 0.5 /mnt/ramdisk/ImageNet/ --epochs 200 -------------------------------------------------------------------------------- /cp_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import shutil 4 | 5 | target_path = "/mnt/ramdisk/ImageNet/fewshot5_train/" 6 | source_path = "/mnt/ramdisk/ImageNet/train/" 7 | 8 | 9 | if not os.path.exists(target_path): 10 | os.mkdir(target_path) 11 | 12 | img_floder_files = os.listdir(source_path) 13 | random.shuffle(img_floder_files) 14 | # for img_floder_file in img_floder_files[:500]: 15 | for img_floder_file in img_floder_files[:]: 16 | if not os.path.exists(target_path + img_floder_file): 17 | os.mkdir(target_path + img_floder_file) 18 | img_floder_path = os.path.join(source_path, img_floder_file) 19 | imgs = os.listdir(img_floder_path) 20 | random.shuffle(imgs) 21 | l = 5 22 | for i in range(l): 23 | shutil.copy(img_floder_path + "/" + imgs[i], target_path + img_floder_file + "/" + imgs[i] ) -------------------------------------------------------------------------------- /get_started.md: -------------------------------------------------------------------------------- 1 | # UP-DeiT implementation 2 | 3 | This repository contains the implementation of compressing DeiT-B into UP-DeiT-T. 4 | 5 | ## Main requirements 6 | 7 | ``` 8 | python >= 3.7 9 | torch >= 1.4.0 10 | torchvision >= 0.5.0 11 | ``` 12 | We provide the detailed requirements in [requirements.txt](https://github.com/yuhao318/UP-ViT/blob/main/requirements.txt). You can run `conda install --yes --file requirements.txt` to create the same running environment as ours. 13 | 14 | ## Uasge 15 | 16 | ### Data preparation 17 | 18 | We use standard ImageNet dataset. You can download it from [http://image-net.org/](http://image-net.org/). 19 | 20 | To generate the proxy dataset, assume the ImageNet dataset is saved on `/mnt/ramdisk/`, run: 21 | ``` 22 | python cp_image.py 23 | ``` 24 | 25 | ### Importance Calculation 26 | 27 | Download the pretrained model [DeiT-B](https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth) into the `pretrainmodel` floader, then run: 28 | ``` 29 | ./run_importance.sh 30 | ``` 31 | **Note: We have provided the importance of each channel of DeiT-B on the importance folder, including the proxy size of 2k and 5k.** 32 | 33 | 34 | ### Sample Sub-model 35 | 36 | After generating importance, run: 37 | ``` 38 | python sample_sub_model.py 39 | ``` 40 | 41 | ### Fine-tune Sub-model 42 | 43 | To train a `UP-DeiT-T` on ImageNet from scratch, run: 44 | ``` 45 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python main_distill.py --dist-url 'tcp://127.0.0.1:12342' --dist-backend 'nccl' --multiprocessing-distributed --world-size 1 --rank 0 -j --learning-rate --name --wd -b --alpha --epochs --resume 46 | ``` -------------------------------------------------------------------------------- /run_importance.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | CUDA_VISIBLE_DEVICES=0 python test_neck_importance_score.py 3 | 4 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 0 5 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 1 6 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 2 7 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 3 8 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 4 9 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 5 10 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 6 11 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 7 12 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 8 13 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 9 14 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 10 15 | CUDA_VISIBLE_DEVICES=0 python test_attn_importance_score.py --block_ind 11 16 | 17 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 0 18 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 1 19 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 2 20 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 3 21 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 4 22 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 5 23 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 6 24 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 7 25 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 8 26 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 9 27 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 10 28 | CUDA_VISIBLE_DEVICES=0 python test_ffn_importance_score.py --reduce 11 29 | 30 | python test_rank_kl.py -------------------------------------------------------------------------------- /test_rank_kl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | floder = "importance/kl5k/importance/" 5 | reduce = 0 6 | def rank_neck(): 7 | f1 = open(floder + "Deit_base_12_neck_768_kl_5k.txt", 'r') 8 | kl = [] 9 | for i in f1: 10 | top1 = float(re.findall(r"\d+\.?\d*",i)[-1]) 11 | kl.append(top1) 12 | 13 | print(kl) 14 | 15 | sorted_id = sorted(range(len(kl)), key=lambda k: kl[k]) 16 | print(sorted_id) 17 | 18 | with open("importance/kl5k/" + "Deit_base_12_neck_768_kl_5k_192.txt", 'w') as f: 19 | for s in sorted_id[:576]: 20 | f.write(str(s) + '\n') 21 | rank_neck() 22 | 23 | 24 | 25 | floder = "importance/kl5k/importance/" 26 | reduce = 0 27 | def rank_ffn(): 28 | for ind in range(12): 29 | f1 = open(floder + "Deit_base_12_ffn_3072_kl_" + str(ind) + "_5k.txt", 'r') 30 | kl = [] 31 | for i in f1: 32 | top1 = float(re.findall(r"\d+\.?\d*",i)[-1]) 33 | kl.append(top1) 34 | 35 | print(kl) 36 | 37 | sorted_id = sorted(range(len(kl)), key=lambda k: kl[k]) 38 | print(sorted_id) 39 | 40 | with open("importance/kl5k/" + "Deit_base_12_ffn_3072_kl_" + str(ind) + "_5k_importance_rank_768.txt", 'w') as f: 41 | for s in sorted_id[:2304]: 42 | f.write(str(s) + '\n') 43 | rank_ffn() 44 | 45 | floder = "importance/kl5k/importance/" 46 | def rank_attn(): 47 | for ind in range(12): 48 | 49 | f1 = open(floder+ "Deit_base_12_attn_768_kl_" + str(ind) + "_5k.txt", 'r') 50 | kl = [] 51 | for i in f1: 52 | top1 = float(re.findall(r"\d+\.?\d*",i)[-1]) 53 | kl.append(top1) 54 | print(kl) 55 | length = len(kl) 56 | single = length // 3 57 | final_result = [] 58 | for i in range(3): 59 | kl_i = kl[i*single: (i+1)*single] 60 | 61 | print(kl_i) 62 | 63 | sorted_id = sorted(range(len(kl_i)), key=lambda k: kl_i[k]) 64 | print(sorted_id) 65 | final_result += [ t + i*single for t in sorted_id[:192]] 66 | with open("importance/kl5k/"+ "Deit_base_12_attn_768_kl_" + str(ind) + "_5k_importance_rank_multihead3.txt", 'w') as f: 67 | for s in final_result: 68 | f.write(str(s) + '\n') 69 | rank_attn() -------------------------------------------------------------------------------- /utils/ema_util.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import unicode_literals 3 | 4 | import torch 5 | 6 | 7 | # Partially based on: https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/python/training/moving_averages.py 8 | class ExponentialMovingAverage: 9 | """ 10 | Maintains (exponential) moving average of a set of parameters. 11 | """ 12 | def __init__(self, parameters, decay, use_num_updates=True): 13 | """ 14 | Args: 15 | parameters: Iterable of `torch.nn.Parameter`; usually the result of 16 | `model.parameters()`. 17 | decay: The exponential decay. 18 | use_num_updates: Whether to use number of updates when computing 19 | averages. 20 | """ 21 | if decay < 0.0 or decay > 1.0: 22 | raise ValueError('Decay must be between 0 and 1') 23 | self.decay = decay 24 | self.num_updates = 0 if use_num_updates else None 25 | self.shadow_params = [p.clone().detach() 26 | for p in parameters if p.requires_grad] 27 | 28 | def update(self, parameters): 29 | """ 30 | Update currently maintained parameters. 31 | Call this every time the parameters are updated, such as the result of 32 | the `optimizer.step()` call. 33 | Args: 34 | parameters: Iterable of `torch.nn.Parameter`; usually the same set of 35 | parameters used to initialize this object. 36 | """ 37 | decay = self.decay 38 | if self.num_updates is not None: 39 | self.num_updates += 1 40 | decay = min(decay, (1 + self.num_updates) / (10 + self.num_updates)) 41 | one_minus_decay = 1.0 - decay 42 | with torch.no_grad(): 43 | parameters = [p for p in parameters if p.requires_grad] 44 | for s_param, param in zip(self.shadow_params, parameters): 45 | s_param.sub_(one_minus_decay * (s_param - param)) 46 | 47 | def copy_to(self, parameters): 48 | """ 49 | Copies current parameters into given collection of parameters. 50 | Args: 51 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 52 | updated with the stored moving averages. 53 | """ 54 | for s_param, param in zip(self.shadow_params, parameters): 55 | if param.requires_grad: 56 | param.data.copy_(s_param.data) 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UP-ViT 2 | This is an official implementation for "[A Unified Pruning Framework for Vision Transformers](https://arxiv.org/abs/2111.15127)". 3 | 4 | ## Getting Started 5 | For UP-DeiT on the image classification task, please see [get_started.md](https://github.com/yuhao318/UP-ViT/blob/main/get_started.md) for detailed instructions. 6 | 7 | ## Main Results on ImageNet-1K with Pretrained Models 8 | ImageNet-1K Pretrained UP-DeiT Models 9 | 10 | | Model | Top-1 | #Param. | Throughputs | 11 | | ------------- | ------------- | ------------- | ------------- | 12 | | [UP-DeiT-T](https://github.com/yuhao318/UP-ViT/releases/download/v1.0.0/UP_deit_tiny_patch16_224.pth) | 75.94% | 5.7M | 1408.5 | 13 | | [UP-DeiT-S](https://github.com/yuhao318/UP-ViT/releases/download/v1.0.0/UP_deit_small_patch16_224.pth) | 81.56% | 22.1M | 603.1 | 14 | 15 | ImageNet-1K Pretrained UP-PVTv2 Models 16 | 17 | | Model | Top-1 | #Param. | Throughputs | 18 | | ------------- | ------------- | ------------- | ------------- | 19 | | [UP-PVTv2-B0](https://github.com/yuhao318/UP-ViT/releases/download/v1.0.0/UP_pvt_v2_b0.pth) | 75.30% | 3.7M | 139.9 | 20 | | [UP-PVTv2-B1](https://github.com/yuhao318/UP-ViT/releases/download/v1.0.0/UP_pvt_v2_b1.pth) | 79.48% | 14.0M | 249.9 | 21 | 22 | **Note: Test throughput on a Titan XP GPU with a fixed 32 mini-batch size.** 23 | 24 | **Note: UP-DeiT and UP-PVTv2 have the same architecture as the original DeiT and PVTv2, but with higher accuracy. See [our paper](https://arxiv.org/pdf/2111.15127.pdf) for more results.** 25 | 26 | ## Main Results on WikiText-103 with Pretrained Models 27 | 28 | [WikiText-103](https://einstein.ai/research/the-wikitext-long-term-dependency-language-modeling-dataset) Pretrained Neural Language Modeling Model with [Adaptive Inputs](https://arxiv.org/abs/1809.10853). Our model is based on [Fairseq](https://github.com/facebookresearch/fairseq/tree/v0.10.2). 29 | 30 | 31 | | Model | Perplexity | #Param. | 32 | | ------------- | ------------- | ------------- | 33 | | [Original Model](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_wiki103.v2.tar.bz2) | 19.00 | 291M | 34 | | [UP-Transformer](https://drive.google.com/file/d/1HhxpJYvcxer7iCVfS2cJYyK-n58ymOBT/view?usp=sharing) | 19.88 | 95M | 35 | 36 | 37 | ## Citation 38 | 39 | ``` 40 | @article{yu2021unified, 41 | title={A unified pruning framework for vision transformers}, 42 | author={Yu, Hao and Wu, Jianxin}, 43 | journal={arXiv preprint arXiv:2111.15127}, 44 | year={2021} 45 | } 46 | ``` 47 | 48 | 49 | ## Contacts 50 | If you have any question about our work, please do not hesitate to contact us by emails provided in the [paper](https://arxiv.org/pdf/2111.15127.pdf). 51 | 52 | -------------------------------------------------------------------------------- /sample_sub_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | checkpoint = torch.load("pretrainmodel/deit_base_patch16_224-b5f2ef4d.pth", map_location='cpu') 4 | 5 | 6 | reduce_neck = [] 7 | with open("importance/kl5k/Deit_base_12_neck_768_kl_5k_192.txt", 'r') as f: 8 | for i in f: 9 | reduce_neck.append(int(i)) 10 | 11 | new_dict = {} 12 | cnt = 1 13 | for k, v in checkpoint['model'].items(): 14 | print(k,end= ", ") 15 | print(v.shape) 16 | new_dict[ k] = v 17 | 18 | for k, v in checkpoint['model'].items(): 19 | print(k,end= ", ") 20 | if "qkv.weight" in k or "head.weight" in k or "mlp.fc1.weight" in k: 21 | new_index = [i not in reduce_neck for i in torch.arange(v.size(1))] 22 | new_v = v[:,new_index] 23 | print(new_v.shape) 24 | new_dict[ k] = new_v 25 | elif "cls_token" in k or "pos_embed" in k: 26 | new_index = [i not in reduce_neck for i in torch.arange(v.size(2))] 27 | new_v = v[:,:,new_index] 28 | print(new_v.shape) 29 | new_dict[ k] = new_v 30 | elif "patch_embed" in k or "norm" in k or "fc2" in k or "attn.proj" in k: 31 | new_index = [i not in reduce_neck for i in torch.arange(v.size(0))] 32 | new_v = v[new_index] 33 | print(new_v.shape) 34 | new_dict[ k] = new_v 35 | else: 36 | print(v.shape) 37 | new_dict[ k] = v 38 | 39 | 40 | 41 | checkpoint = new_dict 42 | 43 | reduce_attn = [] 44 | for i in range(12): 45 | reduce_i = [] 46 | file_name = "importance/kl5k/Deit_base_12_attn_768_kl_" + str(i) + "_5k_importance_rank_multihead3.txt" 47 | with open(file_name, 'r') as f: ## 48 | for t in f: 49 | reduce_i.append(int(t)) 50 | reduce_attn.append(reduce_i) 51 | 52 | reduce_mlp = [] 53 | for i in range(12): 54 | reduce_i = [] 55 | file_name = "importance/kl5k/Deit_base_12_ffn_3072_kl_" + str(i) + "_5k_importance_rank_768.txt" 56 | with open(file_name, 'r') as f: ## 57 | for t in f: 58 | reduce_i.append(int(t)) 59 | reduce_mlp.append(reduce_i) 60 | 61 | 62 | for reduce in range(0,12): 63 | block_ind = "blocks." + str(reduce) + ".attn.qkv.weight" 64 | print(block_ind,end= ", ") 65 | v = checkpoint[block_ind] 66 | interval = v.size(0) // 3 67 | new_index = [i % interval not in reduce_attn[reduce] for i in torch.arange(v.size(0))] 68 | new_v = v[new_index] 69 | print(new_v.shape) 70 | new_dict[block_ind] = new_v 71 | block_ind = "blocks." + str(reduce) + ".attn.qkv.bias" 72 | print(block_ind,end= ", ") 73 | v = checkpoint[block_ind] 74 | interval = v.size(0) // 3 75 | new_index = [i % interval not in reduce_attn[reduce] for i in torch.arange(v.size(0))] 76 | new_v = v[new_index] 77 | new_dict[block_ind] = new_v 78 | print(new_v.shape) 79 | block_ind = "blocks." + str(reduce) + ".attn.proj.weight" 80 | print(block_ind,end= ", ") 81 | v = checkpoint[block_ind] 82 | new_index = [i not in reduce_attn[reduce] for i in torch.arange(v.size(1))] 83 | new_v = v[:,new_index] 84 | new_dict[block_ind] = new_v 85 | print(new_v.shape) 86 | 87 | 88 | for reduce in range(0,12): ## 89 | block_ind = "blocks." + str(reduce) + ".mlp.fc1.weight" 90 | print(block_ind,end= ", ") 91 | v = checkpoint[block_ind] 92 | new_index = [i not in reduce_mlp[reduce] for i in torch.arange(v.size(0))] 93 | new_v = v[new_index] 94 | new_dict[block_ind] = new_v 95 | print(new_v.shape) 96 | block_ind = "blocks." + str(reduce) + ".mlp.fc1.bias" 97 | print(block_ind,end= ", ") 98 | v = checkpoint[block_ind] 99 | new_index = [i not in reduce_mlp[reduce] for i in torch.arange(v.size(0))] 100 | new_v = v[new_index] 101 | new_dict[block_ind] = new_v 102 | print(new_v.shape) 103 | block_ind = "blocks." + str(reduce) + ".mlp.fc2.weight" 104 | print(block_ind,end= ", ") 105 | v = checkpoint[block_ind] 106 | new_index = [i not in reduce_mlp[reduce] for i in torch.arange(v.size(1))] 107 | new_v = v[:,new_index] 108 | new_dict[block_ind] = new_v 109 | print(new_v.shape) 110 | 111 | torch.save(new_dict, "up_deit_t_5k_init.pth") ## -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_neck_768_kl_2k_192.txt: -------------------------------------------------------------------------------- 1 | 567 2 | 551 3 | 660 4 | 700 5 | 368 6 | 200 7 | 595 8 | 467 9 | 46 10 | 727 11 | 74 12 | 670 13 | 532 14 | 282 15 | 633 16 | 740 17 | 702 18 | 67 19 | 465 20 | 150 21 | 2 22 | 3 23 | 357 24 | 38 25 | 668 26 | 527 27 | 413 28 | 655 29 | 483 30 | 758 31 | 562 32 | 511 33 | 474 34 | 579 35 | 495 36 | 36 37 | 512 38 | 448 39 | 319 40 | 155 41 | 384 42 | 571 43 | 618 44 | 439 45 | 157 46 | 96 47 | 243 48 | 94 49 | 506 50 | 217 51 | 407 52 | 703 53 | 41 54 | 463 55 | 360 56 | 601 57 | 535 58 | 137 59 | 765 60 | 686 61 | 731 62 | 518 63 | 307 64 | 253 65 | 609 66 | 559 67 | 280 68 | 203 69 | 79 70 | 521 71 | 548 72 | 45 73 | 685 74 | 75 75 | 317 76 | 408 77 | 300 78 | 138 79 | 26 80 | 277 81 | 193 82 | 534 83 | 760 84 | 583 85 | 676 86 | 457 87 | 120 88 | 500 89 | 247 90 | 678 91 | 305 92 | 385 93 | 61 94 | 684 95 | 477 96 | 325 97 | 329 98 | 295 99 | 470 100 | 711 101 | 677 102 | 318 103 | 638 104 | 199 105 | 119 106 | 113 107 | 254 108 | 389 109 | 720 110 | 258 111 | 411 112 | 164 113 | 575 114 | 454 115 | 620 116 | 755 117 | 729 118 | 70 119 | 331 120 | 353 121 | 241 122 | 640 123 | 23 124 | 278 125 | 613 126 | 649 127 | 410 128 | 420 129 | 202 130 | 730 131 | 623 132 | 221 133 | 358 134 | 20 135 | 242 136 | 356 137 | 76 138 | 212 139 | 392 140 | 340 141 | 72 142 | 196 143 | 122 144 | 88 145 | 189 146 | 309 147 | 296 148 | 151 149 | 624 150 | 556 151 | 103 152 | 9 153 | 549 154 | 617 155 | 437 156 | 544 157 | 537 158 | 246 159 | 745 160 | 626 161 | 663 162 | 77 163 | 116 164 | 156 165 | 530 166 | 112 167 | 339 168 | 698 169 | 509 170 | 651 171 | 639 172 | 432 173 | 265 174 | 252 175 | 391 176 | 232 177 | 130 178 | 344 179 | 436 180 | 539 181 | 707 182 | 658 183 | 656 184 | 661 185 | 490 186 | 10 187 | 161 188 | 591 189 | 269 190 | 709 191 | 24 192 | 452 193 | 675 194 | 370 195 | 58 196 | 416 197 | 667 198 | 637 199 | 57 200 | 634 201 | 647 202 | 365 203 | 718 204 | 680 205 | 127 206 | 566 207 | 494 208 | 610 209 | 205 210 | 697 211 | 558 212 | 433 213 | 93 214 | 520 215 | 491 216 | 352 217 | 708 218 | 673 219 | 554 220 | 725 221 | 499 222 | 400 223 | 39 224 | 644 225 | 40 226 | 95 227 | 301 228 | 53 229 | 183 230 | 239 231 | 214 232 | 417 233 | 533 234 | 764 235 | 110 236 | 546 237 | 238 238 | 458 239 | 484 240 | 174 241 | 175 242 | 580 243 | 523 244 | 43 245 | 388 246 | 84 247 | 326 248 | 348 249 | 466 250 | 28 251 | 310 252 | 586 253 | 148 254 | 44 255 | 445 256 | 341 257 | 42 258 | 276 259 | 213 260 | 653 261 | 724 262 | 717 263 | 220 264 | 376 265 | 134 266 | 625 267 | 456 268 | 674 269 | 540 270 | 423 271 | 735 272 | 650 273 | 516 274 | 713 275 | 426 276 | 27 277 | 234 278 | 498 279 | 710 280 | 228 281 | 143 282 | 418 283 | 49 284 | 337 285 | 111 286 | 343 287 | 501 288 | 569 289 | 425 290 | 687 291 | 550 292 | 215 293 | 641 294 | 492 295 | 648 296 | 669 297 | 255 298 | 224 299 | 115 300 | 21 301 | 261 302 | 751 303 | 627 304 | 292 305 | 375 306 | 616 307 | 514 308 | 139 309 | 447 310 | 101 311 | 68 312 | 582 313 | 187 314 | 327 315 | 701 316 | 738 317 | 611 318 | 64 319 | 659 320 | 508 321 | 354 322 | 399 323 | 386 324 | 489 325 | 728 326 | 695 327 | 240 328 | 427 329 | 346 330 | 85 331 | 581 332 | 504 333 | 412 334 | 218 335 | 387 336 | 188 337 | 453 338 | 419 339 | 422 340 | 109 341 | 351 342 | 91 343 | 308 344 | 560 345 | 683 346 | 382 347 | 86 348 | 557 349 | 395 350 | 444 351 | 478 352 | 390 353 | 342 354 | 693 355 | 415 356 | 568 357 | 6 358 | 524 359 | 124 360 | 4 361 | 692 362 | 78 363 | 182 364 | 267 365 | 584 366 | 753 367 | 460 368 | 513 369 | 488 370 | 503 371 | 172 372 | 15 373 | 366 374 | 283 375 | 442 376 | 604 377 | 600 378 | 652 379 | 12 380 | 25 381 | 141 382 | 13 383 | 216 384 | 572 385 | 628 386 | 723 387 | 37 388 | 441 389 | 260 390 | 266 391 | 100 392 | 7 393 | 0 394 | 142 395 | 87 396 | 750 397 | 510 398 | 734 399 | 531 400 | 222 401 | 158 402 | 204 403 | 606 404 | 451 405 | 726 406 | 191 407 | 181 408 | 744 409 | 406 410 | 125 411 | 561 412 | 31 413 | 538 414 | 227 415 | 696 416 | 446 417 | 570 418 | 288 419 | 17 420 | 461 421 | 398 422 | 362 423 | 71 424 | 184 425 | 89 426 | 565 427 | 404 428 | 636 429 | 529 430 | 18 431 | 59 432 | 706 433 | 136 434 | 612 435 | 290 436 | 522 437 | 424 438 | 397 439 | 763 440 | 440 441 | 311 442 | 328 443 | 429 444 | 564 445 | 743 446 | 414 447 | 719 448 | 33 449 | 464 450 | 291 451 | 197 452 | 330 453 | 502 454 | 754 455 | 133 456 | 320 457 | 377 458 | 665 459 | 55 460 | 168 461 | 170 462 | 543 463 | 374 464 | 315 465 | 479 466 | 605 467 | 316 468 | 722 469 | 468 470 | 654 471 | 664 472 | 118 473 | 480 474 | 481 475 | 694 476 | 286 477 | 602 478 | 54 479 | 244 480 | 194 481 | 596 482 | 230 483 | 135 484 | 732 485 | 588 486 | 545 487 | 223 488 | 739 489 | 688 490 | 48 491 | 645 492 | 705 493 | 350 494 | 746 495 | 248 496 | 642 497 | 555 498 | 114 499 | 737 500 | 259 501 | 403 502 | 643 503 | 578 504 | 589 505 | 211 506 | 56 507 | 497 508 | 742 509 | 632 510 | 30 511 | 140 512 | 293 513 | 690 514 | 272 515 | 381 516 | 682 517 | 165 518 | 608 519 | 712 520 | 593 521 | 313 522 | 704 523 | 167 524 | 476 525 | 482 526 | 129 527 | 577 528 | 302 529 | 361 530 | 231 531 | 553 532 | 207 533 | 335 534 | 104 535 | 528 536 | 736 537 | 574 538 | 163 539 | 364 540 | 691 541 | 281 542 | 285 543 | 306 544 | 190 545 | 603 546 | 402 547 | 486 548 | 178 549 | 323 550 | 469 551 | 82 552 | 80 553 | 65 554 | 666 555 | 208 556 | 66 557 | 14 558 | 383 559 | 106 560 | 179 561 | 373 562 | 438 563 | 378 564 | 507 565 | 505 566 | 757 567 | 107 568 | 629 569 | 536 570 | 615 571 | 443 572 | 585 573 | 475 574 | 298 575 | 275 576 | 367 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_0_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 134 2 | 132 3 | 148 4 | 169 5 | 161 6 | 175 7 | 163 8 | 141 9 | 131 10 | 144 11 | 182 12 | 137 13 | 165 14 | 179 15 | 167 16 | 172 17 | 136 18 | 133 19 | 185 20 | 189 21 | 160 22 | 159 23 | 135 24 | 154 25 | 186 26 | 129 27 | 158 28 | 130 29 | 49 30 | 147 31 | 166 32 | 138 33 | 45 34 | 28 35 | 44 36 | 39 37 | 151 38 | 26 39 | 27 40 | 63 41 | 37 42 | 12 43 | 9 44 | 38 45 | 18 46 | 60 47 | 48 48 | 20 49 | 10 50 | 52 51 | 152 52 | 176 53 | 7 54 | 43 55 | 3 56 | 146 57 | 33 58 | 55 59 | 42 60 | 29 61 | 15 62 | 4 63 | 187 64 | 53 65 | 13 66 | 23 67 | 57 68 | 139 69 | 8 70 | 11 71 | 35 72 | 149 73 | 1 74 | 36 75 | 34 76 | 50 77 | 32 78 | 5 79 | 58 80 | 59 81 | 56 82 | 120 83 | 40 84 | 85 85 | 65 86 | 31 87 | 2 88 | 21 89 | 62 90 | 6 91 | 61 92 | 22 93 | 71 94 | 78 95 | 14 96 | 47 97 | 66 98 | 142 99 | 127 100 | 75 101 | 86 102 | 171 103 | 84 104 | 178 105 | 51 106 | 121 107 | 74 108 | 83 109 | 95 110 | 30 111 | 106 112 | 92 113 | 191 114 | 77 115 | 114 116 | 100 117 | 80 118 | 108 119 | 73 120 | 41 121 | 69 122 | 17 123 | 164 124 | 96 125 | 116 126 | 183 127 | 54 128 | 87 129 | 99 130 | 219 131 | 128 132 | 24 133 | 119 134 | 112 135 | 16 136 | 70 137 | 124 138 | 101 139 | 0 140 | 76 141 | 88 142 | 89 143 | 122 144 | 118 145 | 113 146 | 19 147 | 102 148 | 125 149 | 105 150 | 111 151 | 103 152 | 97 153 | 117 154 | 91 155 | 67 156 | 68 157 | 170 158 | 145 159 | 107 160 | 64 161 | 94 162 | 252 163 | 115 164 | 81 165 | 255 166 | 190 167 | 222 168 | 232 169 | 123 170 | 82 171 | 239 172 | 156 173 | 236 174 | 110 175 | 226 176 | 237 177 | 173 178 | 210 179 | 204 180 | 181 181 | 202 182 | 72 183 | 208 184 | 235 185 | 229 186 | 93 187 | 203 188 | 46 189 | 155 190 | 90 191 | 231 192 | 195 193 | 317 194 | 268 195 | 262 196 | 285 197 | 312 198 | 281 199 | 274 200 | 295 201 | 291 202 | 298 203 | 296 204 | 352 205 | 259 206 | 305 207 | 290 208 | 307 209 | 371 210 | 314 211 | 310 212 | 327 213 | 304 214 | 337 215 | 279 216 | 380 217 | 333 218 | 300 219 | 292 220 | 346 221 | 368 222 | 350 223 | 323 224 | 355 225 | 334 226 | 348 227 | 297 228 | 372 229 | 336 230 | 379 231 | 344 232 | 442 233 | 367 234 | 410 235 | 438 236 | 362 237 | 376 238 | 382 239 | 341 240 | 326 241 | 343 242 | 339 243 | 360 244 | 364 245 | 330 246 | 260 247 | 378 248 | 435 249 | 377 250 | 299 251 | 375 252 | 408 253 | 359 254 | 369 255 | 503 256 | 288 257 | 353 258 | 475 259 | 501 260 | 505 261 | 309 262 | 477 263 | 462 264 | 472 265 | 370 266 | 411 267 | 340 268 | 484 269 | 349 270 | 491 271 | 468 272 | 507 273 | 418 274 | 509 275 | 490 276 | 469 277 | 473 278 | 321 279 | 459 280 | 324 281 | 471 282 | 357 283 | 508 284 | 492 285 | 456 286 | 467 287 | 493 288 | 454 289 | 511 290 | 451 291 | 494 292 | 498 293 | 322 294 | 426 295 | 466 296 | 332 297 | 461 298 | 460 299 | 345 300 | 502 301 | 266 302 | 455 303 | 479 304 | 500 305 | 463 306 | 356 307 | 318 308 | 403 309 | 476 310 | 383 311 | 275 312 | 504 313 | 449 314 | 415 315 | 270 316 | 486 317 | 485 318 | 335 319 | 413 320 | 496 321 | 489 322 | 481 323 | 474 324 | 347 325 | 453 326 | 373 327 | 313 328 | 320 329 | 421 330 | 483 331 | 354 332 | 329 333 | 452 334 | 398 335 | 465 336 | 381 337 | 499 338 | 365 339 | 395 340 | 284 341 | 351 342 | 488 343 | 487 344 | 458 345 | 396 346 | 363 347 | 446 348 | 457 349 | 358 350 | 443 351 | 361 352 | 280 353 | 510 354 | 478 355 | 445 356 | 480 357 | 325 358 | 390 359 | 272 360 | 342 361 | 338 362 | 374 363 | 271 364 | 444 365 | 316 366 | 308 367 | 422 368 | 440 369 | 328 370 | 388 371 | 433 372 | 430 373 | 264 374 | 406 375 | 315 376 | 464 377 | 495 378 | 417 379 | 303 380 | 331 381 | 450 382 | 416 383 | 366 384 | 283 385 | 578 386 | 595 387 | 608 388 | 600 389 | 613 390 | 603 391 | 579 392 | 632 393 | 585 394 | 628 395 | 587 396 | 593 397 | 591 398 | 596 399 | 583 400 | 627 401 | 610 402 | 620 403 | 630 404 | 594 405 | 614 406 | 602 407 | 622 408 | 618 409 | 584 410 | 592 411 | 639 412 | 581 413 | 635 414 | 631 415 | 637 416 | 597 417 | 636 418 | 599 419 | 605 420 | 629 421 | 616 422 | 615 423 | 626 424 | 638 425 | 611 426 | 609 427 | 598 428 | 601 429 | 623 430 | 586 431 | 612 432 | 576 433 | 590 434 | 580 435 | 588 436 | 634 437 | 577 438 | 582 439 | 617 440 | 621 441 | 589 442 | 761 443 | 718 444 | 734 445 | 707 446 | 743 447 | 731 448 | 724 449 | 748 450 | 747 451 | 736 452 | 619 453 | 722 454 | 751 455 | 726 456 | 705 457 | 708 458 | 733 459 | 624 460 | 752 461 | 763 462 | 732 463 | 713 464 | 711 465 | 717 466 | 704 467 | 749 468 | 764 469 | 720 470 | 725 471 | 714 472 | 742 473 | 710 474 | 719 475 | 723 476 | 753 477 | 750 478 | 755 479 | 735 480 | 715 481 | 741 482 | 604 483 | 709 484 | 721 485 | 740 486 | 767 487 | 737 488 | 745 489 | 727 490 | 729 491 | 757 492 | 765 493 | 758 494 | 760 495 | 754 496 | 744 497 | 738 498 | 730 499 | 759 500 | 712 501 | 706 502 | 756 503 | 766 504 | 607 505 | 739 506 | 762 507 | 716 508 | 668 509 | 728 510 | 645 511 | 678 512 | 656 513 | 675 514 | 700 515 | 651 516 | 642 517 | 661 518 | 664 519 | 696 520 | 672 521 | 650 522 | 703 523 | 681 524 | 641 525 | 688 526 | 689 527 | 687 528 | 662 529 | 702 530 | 695 531 | 692 532 | 644 533 | 686 534 | 655 535 | 648 536 | 677 537 | 646 538 | 694 539 | 649 540 | 659 541 | 701 542 | 570 543 | 532 544 | 545 545 | 537 546 | 666 547 | 549 548 | 553 549 | 682 550 | 565 551 | 665 552 | 575 553 | 568 554 | 670 555 | 569 556 | 514 557 | 558 558 | 542 559 | 530 560 | 520 561 | 556 562 | 539 563 | 517 564 | 647 565 | 547 566 | 546 567 | 564 568 | 519 569 | 574 570 | 557 571 | 541 572 | 690 573 | 573 574 | 544 575 | 524 576 | 572 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_10_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 51 2 | 38 3 | 28 4 | 41 5 | 24 6 | 46 7 | 63 8 | 124 9 | 20 10 | 45 11 | 6 12 | 13 13 | 83 14 | 53 15 | 56 16 | 21 17 | 15 18 | 39 19 | 49 20 | 27 21 | 121 22 | 3 23 | 52 24 | 34 25 | 36 26 | 1 27 | 22 28 | 122 29 | 29 30 | 103 31 | 55 32 | 35 33 | 9 34 | 59 35 | 74 36 | 102 37 | 48 38 | 69 39 | 26 40 | 7 41 | 98 42 | 220 43 | 107 44 | 40 45 | 81 46 | 23 47 | 44 48 | 50 49 | 109 50 | 18 51 | 31 52 | 127 53 | 42 54 | 85 55 | 72 56 | 47 57 | 104 58 | 16 59 | 119 60 | 111 61 | 118 62 | 62 63 | 112 64 | 116 65 | 12 66 | 86 67 | 0 68 | 10 69 | 129 70 | 58 71 | 57 72 | 202 73 | 95 74 | 108 75 | 67 76 | 238 77 | 33 78 | 110 79 | 89 80 | 4 81 | 208 82 | 75 83 | 100 84 | 32 85 | 218 86 | 101 87 | 65 88 | 97 89 | 87 90 | 73 91 | 142 92 | 84 93 | 170 94 | 120 95 | 68 96 | 66 97 | 90 98 | 60 99 | 99 100 | 106 101 | 136 102 | 25 103 | 221 104 | 77 105 | 88 106 | 70 107 | 114 108 | 130 109 | 64 110 | 214 111 | 71 112 | 248 113 | 96 114 | 168 115 | 191 116 | 126 117 | 105 118 | 224 119 | 92 120 | 153 121 | 254 122 | 160 123 | 169 124 | 79 125 | 30 126 | 115 127 | 179 128 | 213 129 | 251 130 | 201 131 | 113 132 | 181 133 | 93 134 | 159 135 | 230 136 | 125 137 | 172 138 | 229 139 | 198 140 | 19 141 | 148 142 | 11 143 | 37 144 | 166 145 | 151 146 | 253 147 | 133 148 | 249 149 | 139 150 | 17 151 | 134 152 | 78 153 | 240 154 | 243 155 | 200 156 | 237 157 | 199 158 | 206 159 | 236 160 | 216 161 | 2 162 | 217 163 | 196 164 | 175 165 | 123 166 | 245 167 | 252 168 | 165 169 | 164 170 | 187 171 | 138 172 | 157 173 | 131 174 | 185 175 | 173 176 | 162 177 | 61 178 | 225 179 | 209 180 | 82 181 | 5 182 | 186 183 | 232 184 | 246 185 | 182 186 | 242 187 | 204 188 | 178 189 | 233 190 | 150 191 | 94 192 | 80 193 | 450 194 | 392 195 | 474 196 | 428 197 | 510 198 | 509 199 | 466 200 | 388 201 | 432 202 | 484 203 | 453 204 | 328 205 | 419 206 | 503 207 | 387 208 | 394 209 | 501 210 | 493 211 | 469 212 | 372 213 | 481 214 | 357 215 | 424 216 | 260 217 | 457 218 | 439 219 | 375 220 | 483 221 | 380 222 | 468 223 | 315 224 | 505 225 | 508 226 | 444 227 | 407 228 | 459 229 | 482 230 | 436 231 | 492 232 | 448 233 | 425 234 | 334 235 | 452 236 | 348 237 | 472 238 | 507 239 | 366 240 | 500 241 | 473 242 | 298 243 | 418 244 | 426 245 | 441 246 | 470 247 | 344 248 | 490 249 | 464 250 | 289 251 | 479 252 | 379 253 | 400 254 | 475 255 | 497 256 | 293 257 | 391 258 | 429 259 | 336 260 | 385 261 | 397 262 | 291 263 | 454 264 | 321 265 | 349 266 | 437 267 | 365 268 | 326 269 | 476 270 | 411 271 | 442 272 | 461 273 | 440 274 | 416 275 | 399 276 | 458 277 | 378 278 | 370 279 | 341 280 | 311 281 | 353 282 | 467 283 | 456 284 | 342 285 | 309 286 | 290 287 | 339 288 | 465 289 | 371 290 | 373 291 | 360 292 | 264 293 | 511 294 | 314 295 | 364 296 | 330 297 | 410 298 | 462 299 | 273 300 | 306 301 | 398 302 | 303 303 | 499 304 | 395 305 | 504 306 | 266 307 | 471 308 | 355 309 | 404 310 | 286 311 | 406 312 | 274 313 | 405 314 | 361 315 | 345 316 | 449 317 | 402 318 | 329 319 | 320 320 | 422 321 | 447 322 | 354 323 | 494 324 | 496 325 | 347 326 | 300 327 | 438 328 | 374 329 | 384 330 | 316 331 | 297 332 | 327 333 | 417 334 | 383 335 | 295 336 | 280 337 | 299 338 | 463 339 | 495 340 | 257 341 | 262 342 | 263 343 | 430 344 | 502 345 | 382 346 | 460 347 | 409 348 | 506 349 | 283 350 | 317 351 | 346 352 | 324 353 | 294 354 | 351 355 | 393 356 | 377 357 | 279 358 | 318 359 | 352 360 | 488 361 | 338 362 | 282 363 | 433 364 | 367 365 | 421 366 | 337 367 | 358 368 | 396 369 | 268 370 | 310 371 | 363 372 | 259 373 | 332 374 | 446 375 | 307 376 | 350 377 | 455 378 | 271 379 | 323 380 | 389 381 | 443 382 | 362 383 | 276 384 | 415 385 | 697 386 | 685 387 | 679 388 | 683 389 | 663 390 | 694 391 | 676 392 | 660 393 | 653 394 | 657 395 | 649 396 | 674 397 | 682 398 | 675 399 | 684 400 | 702 401 | 651 402 | 654 403 | 661 404 | 689 405 | 643 406 | 666 407 | 695 408 | 644 409 | 561 410 | 691 411 | 677 412 | 668 413 | 640 414 | 671 415 | 687 416 | 701 417 | 672 418 | 673 419 | 587 420 | 538 421 | 656 422 | 641 423 | 699 424 | 637 425 | 550 426 | 693 427 | 598 428 | 552 429 | 686 430 | 703 431 | 650 432 | 556 433 | 646 434 | 519 435 | 698 436 | 601 437 | 652 438 | 563 439 | 513 440 | 562 441 | 607 442 | 569 443 | 557 444 | 667 445 | 606 446 | 605 447 | 555 448 | 514 449 | 592 450 | 520 451 | 535 452 | 680 453 | 568 454 | 688 455 | 647 456 | 545 457 | 512 458 | 625 459 | 742 460 | 582 461 | 659 462 | 543 463 | 624 464 | 617 465 | 567 466 | 642 467 | 756 468 | 516 469 | 690 470 | 633 471 | 559 472 | 618 473 | 530 474 | 648 475 | 521 476 | 576 477 | 596 478 | 529 479 | 539 480 | 573 481 | 532 482 | 613 483 | 531 484 | 645 485 | 692 486 | 678 487 | 629 488 | 553 489 | 638 490 | 736 491 | 610 492 | 517 493 | 740 494 | 732 495 | 542 496 | 725 497 | 755 498 | 719 499 | 748 500 | 518 501 | 524 502 | 716 503 | 619 504 | 525 505 | 586 506 | 626 507 | 581 508 | 709 509 | 632 510 | 591 511 | 560 512 | 717 513 | 758 514 | 739 515 | 595 516 | 546 517 | 635 518 | 594 519 | 599 520 | 540 521 | 604 522 | 611 523 | 534 524 | 544 525 | 602 526 | 733 527 | 628 528 | 728 529 | 627 530 | 579 531 | 580 532 | 614 533 | 623 534 | 630 535 | 715 536 | 578 537 | 609 538 | 749 539 | 564 540 | 754 541 | 747 542 | 616 543 | 620 544 | 577 545 | 681 546 | 712 547 | 522 548 | 584 549 | 583 550 | 708 551 | 603 552 | 527 553 | 711 554 | 704 555 | 700 556 | 548 557 | 730 558 | 571 559 | 722 560 | 615 561 | 621 562 | 549 563 | 766 564 | 731 565 | 759 566 | 658 567 | 714 568 | 554 569 | 622 570 | 547 571 | 608 572 | 706 573 | 718 574 | 734 575 | 757 576 | 597 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_neck_768_kl_5k_192.txt: -------------------------------------------------------------------------------- 1 | 567 2 | 660 3 | 702 4 | 727 5 | 46 6 | 551 7 | 595 8 | 609 9 | 532 10 | 368 11 | 200 12 | 527 13 | 534 14 | 676 15 | 571 16 | 157 17 | 495 18 | 668 19 | 700 20 | 579 21 | 243 22 | 2 23 | 740 24 | 96 25 | 562 26 | 506 27 | 67 28 | 512 29 | 448 30 | 452 31 | 670 32 | 474 33 | 483 34 | 329 35 | 413 36 | 74 37 | 618 38 | 407 39 | 711 40 | 465 41 | 319 42 | 277 43 | 282 44 | 36 45 | 765 46 | 278 47 | 470 48 | 467 49 | 360 50 | 620 51 | 340 52 | 318 53 | 385 54 | 38 55 | 150 56 | 389 57 | 477 58 | 655 59 | 457 60 | 280 61 | 254 62 | 151 63 | 408 64 | 755 65 | 384 66 | 253 67 | 511 68 | 686 69 | 357 70 | 119 71 | 26 72 | 3 73 | 75 74 | 339 75 | 758 76 | 535 77 | 221 78 | 463 79 | 518 80 | 720 81 | 269 82 | 203 83 | 193 84 | 633 85 | 610 86 | 601 87 | 70 88 | 326 89 | 242 90 | 61 91 | 725 92 | 392 93 | 155 94 | 199 95 | 724 96 | 41 97 | 94 98 | 23 99 | 353 100 | 583 101 | 760 102 | 42 103 | 120 104 | 241 105 | 684 106 | 685 107 | 246 108 | 623 109 | 232 110 | 697 111 | 411 112 | 305 113 | 388 114 | 260 115 | 217 116 | 137 117 | 647 118 | 559 119 | 417 120 | 309 121 | 76 122 | 530 123 | 307 124 | 175 125 | 410 126 | 300 127 | 183 128 | 490 129 | 639 130 | 356 131 | 164 132 | 445 133 | 202 134 | 317 135 | 731 136 | 79 137 | 638 138 | 196 139 | 88 140 | 439 141 | 575 142 | 640 143 | 549 144 | 113 145 | 93 146 | 53 147 | 72 148 | 20 149 | 45 150 | 703 151 | 295 152 | 214 153 | 709 154 | 698 155 | 437 156 | 39 157 | 174 158 | 613 159 | 663 160 | 677 161 | 58 162 | 500 163 | 325 164 | 101 165 | 548 166 | 617 167 | 637 168 | 77 169 | 745 170 | 238 171 | 656 172 | 626 173 | 189 174 | 247 175 | 454 176 | 418 177 | 521 178 | 358 179 | 138 180 | 365 181 | 391 182 | 228 183 | 540 184 | 729 185 | 351 186 | 296 187 | 310 188 | 499 189 | 265 190 | 658 191 | 420 192 | 558 193 | 127 194 | 9 195 | 122 196 | 713 197 | 653 198 | 491 199 | 331 200 | 591 201 | 533 202 | 627 203 | 130 204 | 57 205 | 509 206 | 64 207 | 240 208 | 667 209 | 624 210 | 680 211 | 252 212 | 10 213 | 717 214 | 376 215 | 641 216 | 341 217 | 212 218 | 458 219 | 327 220 | 344 221 | 649 222 | 673 223 | 156 224 | 494 225 | 556 226 | 537 227 | 416 228 | 539 229 | 764 230 | 134 231 | 40 232 | 456 233 | 103 234 | 95 235 | 516 236 | 432 237 | 423 238 | 44 239 | 213 240 | 661 241 | 301 242 | 436 243 | 738 244 | 25 245 | 354 246 | 466 247 | 707 248 | 678 249 | 261 250 | 406 251 | 0 252 | 461 253 | 49 254 | 550 255 | 239 256 | 675 257 | 444 258 | 400 259 | 557 260 | 4 261 | 735 262 | 632 263 | 514 264 | 554 265 | 352 266 | 220 267 | 492 268 | 109 269 | 148 270 | 544 271 | 453 272 | 141 273 | 342 274 | 751 275 | 348 276 | 116 277 | 139 278 | 258 279 | 546 280 | 112 281 | 708 282 | 399 283 | 24 284 | 382 285 | 422 286 | 441 287 | 111 288 | 580 289 | 224 290 | 488 291 | 91 292 | 701 293 | 110 294 | 446 295 | 718 296 | 12 297 | 188 298 | 510 299 | 386 300 | 695 301 | 650 302 | 426 303 | 503 304 | 205 305 | 744 306 | 520 307 | 569 308 | 276 309 | 692 310 | 447 311 | 644 312 | 584 313 | 484 314 | 634 315 | 498 316 | 7 317 | 645 318 | 625 319 | 652 320 | 566 321 | 606 322 | 648 323 | 27 324 | 283 325 | 375 326 | 753 327 | 85 328 | 15 329 | 628 330 | 513 331 | 404 332 | 734 333 | 387 334 | 308 335 | 651 336 | 28 337 | 687 338 | 570 339 | 669 340 | 21 341 | 370 342 | 218 343 | 68 344 | 604 345 | 55 346 | 142 347 | 425 348 | 478 349 | 419 350 | 292 351 | 191 352 | 489 353 | 390 354 | 501 355 | 362 356 | 730 357 | 115 358 | 523 359 | 187 360 | 586 361 | 706 362 | 78 363 | 337 364 | 560 365 | 84 366 | 87 367 | 43 368 | 267 369 | 600 370 | 504 371 | 588 372 | 215 373 | 100 374 | 133 375 | 611 376 | 59 377 | 343 378 | 427 379 | 531 380 | 346 381 | 451 382 | 522 383 | 524 384 | 255 385 | 161 386 | 227 387 | 223 388 | 568 389 | 415 390 | 366 391 | 328 392 | 582 393 | 442 394 | 311 395 | 136 396 | 424 397 | 13 398 | 508 399 | 286 400 | 86 401 | 234 402 | 429 403 | 377 404 | 529 405 | 216 406 | 565 407 | 182 408 | 739 409 | 184 410 | 398 411 | 290 412 | 288 413 | 710 414 | 616 415 | 612 416 | 172 417 | 168 418 | 181 419 | 291 420 | 581 421 | 222 422 | 696 423 | 664 424 | 460 425 | 18 426 | 6 427 | 605 428 | 683 429 | 763 430 | 674 431 | 693 432 | 440 433 | 37 434 | 31 435 | 207 436 | 723 437 | 412 438 | 726 439 | 659 440 | 395 441 | 71 442 | 315 443 | 114 444 | 754 445 | 322 446 | 433 447 | 350 448 | 538 449 | 636 450 | 543 451 | 414 452 | 313 453 | 266 454 | 728 455 | 374 456 | 468 457 | 464 458 | 194 459 | 167 460 | 17 461 | 743 462 | 124 463 | 48 464 | 397 465 | 665 466 | 248 467 | 125 468 | 481 469 | 502 470 | 722 471 | 589 472 | 480 473 | 158 474 | 482 475 | 316 476 | 197 477 | 654 478 | 691 479 | 230 480 | 572 481 | 555 482 | 170 483 | 143 484 | 553 485 | 737 486 | 719 487 | 396 488 | 561 489 | 750 490 | 259 491 | 33 492 | 272 493 | 564 494 | 443 495 | 383 496 | 507 497 | 204 498 | 320 499 | 642 500 | 56 501 | 190 502 | 682 503 | 479 504 | 118 505 | 476 506 | 106 507 | 545 508 | 244 509 | 578 510 | 330 511 | 89 512 | 140 513 | 334 514 | 251 515 | 403 516 | 603 517 | 712 518 | 293 519 | 497 520 | 602 521 | 129 522 | 135 523 | 475 524 | 732 525 | 705 526 | 742 527 | 314 528 | 281 529 | 323 530 | 643 531 | 596 532 | 208 533 | 107 534 | 178 535 | 364 536 | 373 537 | 285 538 | 690 539 | 66 540 | 176 541 | 271 542 | 104 543 | 694 544 | 211 545 | 704 546 | 757 547 | 486 548 | 306 549 | 607 550 | 574 551 | 528 552 | 335 553 | 81 554 | 378 555 | 275 556 | 449 557 | 594 558 | 30 559 | 105 560 | 380 561 | 1 562 | 179 563 | 593 564 | 302 565 | 614 566 | 231 567 | 173 568 | 438 569 | 287 570 | 525 571 | 577 572 | 69 573 | 225 574 | 186 575 | 169 576 | 746 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_9_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 20 2 | 249 3 | 241 4 | 4 5 | 48 6 | 160 7 | 11 8 | 14 9 | 141 10 | 56 11 | 8 12 | 39 13 | 5 14 | 9 15 | 51 16 | 47 17 | 221 18 | 34 19 | 209 20 | 79 21 | 63 22 | 24 23 | 62 24 | 243 25 | 217 26 | 200 27 | 212 28 | 23 29 | 97 30 | 254 31 | 40 32 | 253 33 | 198 34 | 204 35 | 27 36 | 1 37 | 100 38 | 18 39 | 220 40 | 45 41 | 55 42 | 208 43 | 93 44 | 118 45 | 13 46 | 71 47 | 35 48 | 226 49 | 113 50 | 44 51 | 50 52 | 115 53 | 123 54 | 211 55 | 103 56 | 255 57 | 162 58 | 64 59 | 247 60 | 232 61 | 233 62 | 173 63 | 122 64 | 90 65 | 2 66 | 60 67 | 177 68 | 101 69 | 210 70 | 32 71 | 6 72 | 49 73 | 61 74 | 28 75 | 3 76 | 59 77 | 188 78 | 86 79 | 129 80 | 179 81 | 53 82 | 202 83 | 244 84 | 26 85 | 91 86 | 107 87 | 98 88 | 10 89 | 213 90 | 73 91 | 125 92 | 78 93 | 203 94 | 237 95 | 66 96 | 214 97 | 30 98 | 68 99 | 16 100 | 17 101 | 224 102 | 77 103 | 41 104 | 248 105 | 0 106 | 193 107 | 65 108 | 194 109 | 223 110 | 124 111 | 89 112 | 120 113 | 133 114 | 25 115 | 43 116 | 7 117 | 36 118 | 114 119 | 136 120 | 163 121 | 156 122 | 240 123 | 22 124 | 81 125 | 189 126 | 109 127 | 132 128 | 112 129 | 215 130 | 231 131 | 108 132 | 87 133 | 180 134 | 94 135 | 130 136 | 15 137 | 167 138 | 242 139 | 158 140 | 104 141 | 168 142 | 46 143 | 111 144 | 29 145 | 182 146 | 227 147 | 196 148 | 152 149 | 116 150 | 84 151 | 155 152 | 229 153 | 54 154 | 171 155 | 117 156 | 19 157 | 205 158 | 102 159 | 206 160 | 110 161 | 149 162 | 74 163 | 140 164 | 234 165 | 127 166 | 38 167 | 76 168 | 106 169 | 21 170 | 92 171 | 82 172 | 184 173 | 131 174 | 236 175 | 175 176 | 199 177 | 80 178 | 235 179 | 126 180 | 159 181 | 95 182 | 105 183 | 201 184 | 67 185 | 252 186 | 225 187 | 72 188 | 153 189 | 88 190 | 157 191 | 150 192 | 186 193 | 368 194 | 322 195 | 340 196 | 320 197 | 376 198 | 329 199 | 360 200 | 378 201 | 359 202 | 372 203 | 342 204 | 362 205 | 370 206 | 328 207 | 341 208 | 379 209 | 323 210 | 331 211 | 371 212 | 363 213 | 321 214 | 336 215 | 339 216 | 347 217 | 375 218 | 351 219 | 324 220 | 383 221 | 326 222 | 365 223 | 343 224 | 337 225 | 303 226 | 348 227 | 325 228 | 352 229 | 306 230 | 349 231 | 367 232 | 315 233 | 364 234 | 330 235 | 356 236 | 373 237 | 377 238 | 361 239 | 258 240 | 327 241 | 334 242 | 332 243 | 357 244 | 346 245 | 471 246 | 498 247 | 333 248 | 271 249 | 309 250 | 295 251 | 313 252 | 263 253 | 338 254 | 461 255 | 311 256 | 345 257 | 291 258 | 369 259 | 260 260 | 262 261 | 374 262 | 316 263 | 465 264 | 486 265 | 289 266 | 504 267 | 380 268 | 312 269 | 350 270 | 261 271 | 473 272 | 506 273 | 353 274 | 366 275 | 495 276 | 284 277 | 279 278 | 355 279 | 305 280 | 354 281 | 269 282 | 304 283 | 314 284 | 507 285 | 282 286 | 272 287 | 286 288 | 267 289 | 273 290 | 274 291 | 297 292 | 448 293 | 283 294 | 264 295 | 287 296 | 310 297 | 281 298 | 494 299 | 463 300 | 503 301 | 301 302 | 476 303 | 298 304 | 308 305 | 453 306 | 391 307 | 490 308 | 429 309 | 492 310 | 257 311 | 290 312 | 317 313 | 474 314 | 292 315 | 455 316 | 299 317 | 469 318 | 302 319 | 288 320 | 459 321 | 475 322 | 265 323 | 496 324 | 420 325 | 422 326 | 344 327 | 441 328 | 487 329 | 450 330 | 444 331 | 307 332 | 505 333 | 268 334 | 296 335 | 256 336 | 319 337 | 509 338 | 460 339 | 387 340 | 277 341 | 456 342 | 293 343 | 491 344 | 385 345 | 399 346 | 499 347 | 280 348 | 445 349 | 417 350 | 480 351 | 266 352 | 419 353 | 472 354 | 500 355 | 392 356 | 300 357 | 358 358 | 294 359 | 485 360 | 468 361 | 405 362 | 416 363 | 462 364 | 511 365 | 409 366 | 270 367 | 425 368 | 502 369 | 508 370 | 497 371 | 482 372 | 493 373 | 483 374 | 454 375 | 386 376 | 478 377 | 470 378 | 464 379 | 457 380 | 451 381 | 413 382 | 440 383 | 408 384 | 443 385 | 687 386 | 671 387 | 683 388 | 701 389 | 555 390 | 741 391 | 686 392 | 641 393 | 740 394 | 650 395 | 719 396 | 665 397 | 696 398 | 680 399 | 735 400 | 692 401 | 547 402 | 546 403 | 538 404 | 694 405 | 648 406 | 575 407 | 698 408 | 558 409 | 728 410 | 656 411 | 727 412 | 666 413 | 716 414 | 647 415 | 700 416 | 533 417 | 564 418 | 661 419 | 767 420 | 643 421 | 690 422 | 751 423 | 763 424 | 655 425 | 571 426 | 534 427 | 657 428 | 519 429 | 720 430 | 689 431 | 697 432 | 567 433 | 678 434 | 518 435 | 672 436 | 670 437 | 743 438 | 517 439 | 522 440 | 753 441 | 755 442 | 539 443 | 673 444 | 658 445 | 542 446 | 703 447 | 649 448 | 659 449 | 688 450 | 565 451 | 552 452 | 717 453 | 652 454 | 642 455 | 721 456 | 675 457 | 668 458 | 729 459 | 710 460 | 554 461 | 540 462 | 669 463 | 573 464 | 708 465 | 574 466 | 722 467 | 627 468 | 693 469 | 660 470 | 724 471 | 653 472 | 646 473 | 614 474 | 644 475 | 766 476 | 526 477 | 663 478 | 530 479 | 566 480 | 559 481 | 520 482 | 695 483 | 528 484 | 537 485 | 599 486 | 682 487 | 525 488 | 712 489 | 707 490 | 679 491 | 563 492 | 633 493 | 543 494 | 702 495 | 764 496 | 705 497 | 515 498 | 730 499 | 548 500 | 532 501 | 674 502 | 754 503 | 617 504 | 639 505 | 544 506 | 662 507 | 549 508 | 583 509 | 748 510 | 761 511 | 584 512 | 715 513 | 529 514 | 588 515 | 664 516 | 756 517 | 630 518 | 742 519 | 513 520 | 726 521 | 512 522 | 576 523 | 516 524 | 581 525 | 676 526 | 640 527 | 667 528 | 585 529 | 631 530 | 750 531 | 739 532 | 613 533 | 677 534 | 553 535 | 718 536 | 597 537 | 714 538 | 593 539 | 523 540 | 711 541 | 760 542 | 762 543 | 654 544 | 691 545 | 594 546 | 612 547 | 733 548 | 723 549 | 738 550 | 744 551 | 595 552 | 541 553 | 709 554 | 749 555 | 746 556 | 582 557 | 577 558 | 570 559 | 704 560 | 734 561 | 536 562 | 608 563 | 731 564 | 632 565 | 629 566 | 514 567 | 758 568 | 752 569 | 623 570 | 638 571 | 535 572 | 557 573 | 605 574 | 610 575 | 560 576 | 651 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_10_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 20 2 | 41 3 | 27 4 | 46 5 | 63 6 | 59 7 | 33 8 | 51 9 | 22 10 | 28 11 | 15 12 | 52 13 | 44 14 | 3 15 | 24 16 | 45 17 | 35 18 | 18 19 | 39 20 | 36 21 | 49 22 | 21 23 | 9 24 | 56 25 | 34 26 | 48 27 | 107 28 | 6 29 | 25 30 | 0 31 | 1 32 | 53 33 | 40 34 | 12 35 | 38 36 | 102 37 | 50 38 | 13 39 | 86 40 | 75 41 | 85 42 | 121 43 | 74 44 | 31 45 | 7 46 | 47 47 | 19 48 | 111 49 | 16 50 | 58 51 | 42 52 | 72 53 | 81 54 | 55 55 | 98 56 | 83 57 | 64 58 | 124 59 | 60 60 | 122 61 | 69 62 | 103 63 | 62 64 | 118 65 | 10 66 | 95 67 | 2 68 | 125 69 | 116 70 | 29 71 | 218 72 | 32 73 | 57 74 | 115 75 | 114 76 | 26 77 | 104 78 | 100 79 | 67 80 | 88 81 | 84 82 | 92 83 | 221 84 | 199 85 | 87 86 | 73 87 | 106 88 | 4 89 | 112 90 | 220 91 | 119 92 | 142 93 | 30 94 | 108 95 | 209 96 | 240 97 | 198 98 | 120 99 | 202 100 | 238 101 | 232 102 | 97 103 | 127 104 | 163 105 | 201 106 | 239 107 | 245 108 | 110 109 | 179 110 | 136 111 | 113 112 | 66 113 | 109 114 | 5 115 | 241 116 | 90 117 | 255 118 | 229 119 | 23 120 | 181 121 | 160 122 | 213 123 | 130 124 | 237 125 | 17 126 | 79 127 | 208 128 | 138 129 | 224 130 | 226 131 | 214 132 | 217 133 | 61 134 | 37 135 | 94 136 | 250 137 | 252 138 | 174 139 | 215 140 | 168 141 | 254 142 | 200 143 | 156 144 | 77 145 | 128 146 | 212 147 | 186 148 | 159 149 | 190 150 | 80 151 | 105 152 | 233 153 | 223 154 | 134 155 | 133 156 | 99 157 | 211 158 | 251 159 | 65 160 | 216 161 | 157 162 | 71 163 | 182 164 | 207 165 | 146 166 | 205 167 | 166 168 | 172 169 | 228 170 | 210 171 | 11 172 | 230 173 | 68 174 | 129 175 | 206 176 | 150 177 | 89 178 | 148 179 | 151 180 | 96 181 | 178 182 | 191 183 | 184 184 | 143 185 | 235 186 | 236 187 | 225 188 | 249 189 | 248 190 | 170 191 | 139 192 | 14 193 | 450 194 | 391 195 | 493 196 | 484 197 | 470 198 | 425 199 | 419 200 | 464 201 | 394 202 | 404 203 | 426 204 | 475 205 | 457 206 | 428 207 | 509 208 | 454 209 | 469 210 | 407 211 | 447 212 | 465 213 | 385 214 | 260 215 | 492 216 | 332 217 | 503 218 | 353 219 | 474 220 | 372 221 | 456 222 | 328 223 | 348 224 | 364 225 | 466 226 | 330 227 | 507 228 | 365 229 | 366 230 | 424 231 | 462 232 | 468 233 | 400 234 | 482 235 | 291 236 | 298 237 | 326 238 | 500 239 | 341 240 | 357 241 | 438 242 | 472 243 | 315 244 | 501 245 | 432 246 | 442 247 | 361 248 | 436 249 | 496 250 | 334 251 | 349 252 | 277 253 | 283 254 | 479 255 | 508 256 | 418 257 | 320 258 | 304 259 | 427 260 | 506 261 | 395 262 | 362 263 | 481 264 | 444 265 | 449 266 | 452 267 | 387 268 | 439 269 | 458 270 | 323 271 | 289 272 | 369 273 | 422 274 | 411 275 | 510 276 | 476 277 | 429 278 | 294 279 | 483 280 | 300 281 | 299 282 | 266 283 | 262 284 | 375 285 | 358 286 | 402 287 | 321 288 | 379 289 | 295 290 | 440 291 | 461 292 | 311 293 | 309 294 | 263 295 | 505 296 | 316 297 | 473 298 | 499 299 | 378 300 | 264 301 | 443 302 | 339 303 | 397 304 | 317 305 | 310 306 | 337 307 | 405 308 | 497 309 | 488 310 | 301 311 | 297 312 | 412 313 | 388 314 | 363 315 | 355 316 | 416 317 | 306 318 | 327 319 | 286 320 | 346 321 | 494 322 | 319 323 | 359 324 | 336 325 | 290 326 | 467 327 | 307 328 | 491 329 | 382 330 | 276 331 | 448 332 | 409 333 | 270 334 | 293 335 | 370 336 | 335 337 | 434 338 | 342 339 | 485 340 | 396 341 | 271 342 | 288 343 | 280 344 | 308 345 | 511 346 | 352 347 | 344 348 | 376 349 | 351 350 | 383 351 | 486 352 | 287 353 | 350 354 | 284 355 | 380 356 | 329 357 | 368 358 | 360 359 | 502 360 | 340 361 | 324 362 | 459 363 | 408 364 | 406 365 | 303 366 | 392 367 | 374 368 | 302 369 | 312 370 | 265 371 | 347 372 | 285 373 | 410 374 | 373 375 | 471 376 | 431 377 | 435 378 | 441 379 | 314 380 | 354 381 | 504 382 | 259 383 | 437 384 | 460 385 | 694 386 | 657 387 | 683 388 | 679 389 | 697 390 | 676 391 | 663 392 | 685 393 | 649 394 | 674 395 | 660 396 | 653 397 | 654 398 | 651 399 | 671 400 | 684 401 | 675 402 | 661 403 | 666 404 | 643 405 | 691 406 | 656 407 | 682 408 | 695 409 | 687 410 | 640 411 | 677 412 | 668 413 | 689 414 | 701 415 | 550 416 | 702 417 | 641 418 | 698 419 | 667 420 | 690 421 | 699 422 | 561 423 | 534 424 | 652 425 | 672 426 | 644 427 | 519 428 | 673 429 | 514 430 | 569 431 | 552 432 | 540 433 | 703 434 | 559 435 | 527 436 | 693 437 | 686 438 | 556 439 | 646 440 | 573 441 | 563 442 | 680 443 | 650 444 | 692 445 | 538 446 | 637 447 | 555 448 | 557 449 | 520 450 | 553 451 | 521 452 | 543 453 | 525 454 | 596 455 | 539 456 | 645 457 | 560 458 | 564 459 | 513 460 | 647 461 | 688 462 | 544 463 | 607 464 | 568 465 | 567 466 | 545 467 | 576 468 | 562 469 | 598 470 | 736 471 | 512 472 | 518 473 | 524 474 | 648 475 | 642 476 | 547 477 | 762 478 | 742 479 | 529 480 | 625 481 | 530 482 | 606 483 | 581 484 | 678 485 | 601 486 | 535 487 | 580 488 | 733 489 | 628 490 | 717 491 | 624 492 | 584 493 | 610 494 | 531 495 | 542 496 | 583 497 | 638 498 | 618 499 | 632 500 | 605 501 | 716 502 | 591 503 | 659 504 | 756 505 | 715 506 | 582 507 | 718 508 | 720 509 | 554 510 | 623 511 | 758 512 | 604 513 | 548 514 | 629 515 | 595 516 | 709 517 | 516 518 | 517 519 | 627 520 | 586 521 | 635 522 | 763 523 | 617 524 | 761 525 | 714 526 | 719 527 | 731 528 | 592 529 | 594 530 | 620 531 | 585 532 | 739 533 | 732 534 | 578 535 | 615 536 | 740 537 | 633 538 | 725 539 | 626 540 | 587 541 | 602 542 | 705 543 | 619 544 | 710 545 | 533 546 | 588 547 | 749 548 | 728 549 | 630 550 | 748 551 | 546 552 | 549 553 | 613 554 | 730 555 | 708 556 | 522 557 | 729 558 | 611 559 | 636 560 | 765 561 | 743 562 | 713 563 | 700 564 | 532 565 | 614 566 | 631 567 | 706 568 | 712 569 | 609 570 | 622 571 | 577 572 | 755 573 | 681 574 | 735 575 | 658 576 | 734 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_6_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 102 2 | 105 3 | 79 4 | 69 5 | 75 6 | 100 7 | 101 8 | 87 9 | 125 10 | 107 11 | 67 12 | 73 13 | 103 14 | 88 15 | 81 16 | 84 17 | 96 18 | 104 19 | 80 20 | 6 21 | 112 22 | 118 23 | 72 24 | 71 25 | 82 26 | 85 27 | 106 28 | 116 29 | 61 30 | 21 31 | 15 32 | 90 33 | 123 34 | 58 35 | 93 36 | 94 37 | 65 38 | 109 39 | 0 40 | 114 41 | 12 42 | 62 43 | 5 44 | 13 45 | 117 46 | 53 47 | 121 48 | 36 49 | 92 50 | 113 51 | 137 52 | 42 53 | 142 54 | 172 55 | 95 56 | 38 57 | 74 58 | 27 59 | 30 60 | 41 61 | 60 62 | 33 63 | 119 64 | 45 65 | 7 66 | 91 67 | 37 68 | 86 69 | 70 70 | 97 71 | 22 72 | 34 73 | 57 74 | 20 75 | 89 76 | 31 77 | 16 78 | 120 79 | 4 80 | 64 81 | 50 82 | 26 83 | 48 84 | 2 85 | 124 86 | 222 87 | 177 88 | 143 89 | 157 90 | 133 91 | 76 92 | 236 93 | 163 94 | 11 95 | 205 96 | 63 97 | 140 98 | 170 99 | 56 100 | 18 101 | 46 102 | 129 103 | 10 104 | 49 105 | 139 106 | 14 107 | 147 108 | 200 109 | 39 110 | 204 111 | 156 112 | 168 113 | 132 114 | 206 115 | 197 116 | 126 117 | 3 118 | 115 119 | 111 120 | 146 121 | 19 122 | 68 123 | 166 124 | 253 125 | 110 126 | 245 127 | 99 128 | 251 129 | 209 130 | 160 131 | 54 132 | 247 133 | 237 134 | 183 135 | 47 136 | 136 137 | 128 138 | 23 139 | 162 140 | 78 141 | 230 142 | 184 143 | 213 144 | 226 145 | 169 146 | 208 147 | 238 148 | 232 149 | 59 150 | 217 151 | 44 152 | 150 153 | 165 154 | 66 155 | 255 156 | 191 157 | 179 158 | 161 159 | 152 160 | 211 161 | 194 162 | 40 163 | 181 164 | 246 165 | 195 166 | 144 167 | 241 168 | 43 169 | 176 170 | 185 171 | 25 172 | 202 173 | 173 174 | 178 175 | 8 176 | 249 177 | 244 178 | 29 179 | 225 180 | 131 181 | 201 182 | 174 183 | 227 184 | 24 185 | 218 186 | 190 187 | 154 188 | 207 189 | 175 190 | 164 191 | 215 192 | 214 193 | 379 194 | 406 195 | 288 196 | 304 197 | 259 198 | 313 199 | 270 200 | 316 201 | 422 202 | 344 203 | 354 204 | 261 205 | 284 206 | 438 207 | 329 208 | 410 209 | 272 210 | 409 211 | 268 212 | 385 213 | 424 214 | 282 215 | 428 216 | 265 217 | 398 218 | 368 219 | 266 220 | 260 221 | 386 222 | 433 223 | 262 224 | 447 225 | 339 226 | 276 227 | 444 228 | 281 229 | 343 230 | 391 231 | 442 232 | 331 233 | 412 234 | 440 235 | 502 236 | 418 237 | 273 238 | 357 239 | 353 240 | 403 241 | 277 242 | 363 243 | 441 244 | 450 245 | 280 246 | 427 247 | 301 248 | 407 249 | 426 250 | 307 251 | 341 252 | 499 253 | 274 254 | 337 255 | 421 256 | 258 257 | 488 258 | 271 259 | 413 260 | 325 261 | 290 262 | 395 263 | 393 264 | 278 265 | 408 266 | 461 267 | 396 268 | 299 269 | 351 270 | 445 271 | 443 272 | 297 273 | 267 274 | 482 275 | 256 276 | 436 277 | 347 278 | 423 279 | 400 280 | 308 281 | 275 282 | 336 283 | 312 284 | 431 285 | 394 286 | 417 287 | 402 288 | 346 289 | 364 290 | 382 291 | 358 292 | 429 293 | 466 294 | 470 295 | 352 296 | 287 297 | 432 298 | 475 299 | 359 300 | 456 301 | 505 302 | 362 303 | 367 304 | 349 305 | 481 306 | 411 307 | 419 308 | 310 309 | 384 310 | 342 311 | 330 312 | 468 313 | 327 314 | 320 315 | 372 316 | 322 317 | 294 318 | 285 319 | 374 320 | 302 321 | 309 322 | 338 323 | 340 324 | 498 325 | 389 326 | 373 327 | 317 328 | 434 329 | 360 330 | 323 331 | 479 332 | 321 333 | 315 334 | 377 335 | 376 336 | 487 337 | 371 338 | 286 339 | 484 340 | 300 341 | 511 342 | 491 343 | 483 344 | 381 345 | 430 346 | 509 347 | 472 348 | 296 349 | 366 350 | 506 351 | 289 352 | 348 353 | 480 354 | 469 355 | 494 356 | 319 357 | 324 358 | 311 359 | 496 360 | 485 361 | 264 362 | 365 363 | 401 364 | 446 365 | 333 366 | 380 367 | 474 368 | 476 369 | 392 370 | 326 371 | 306 372 | 460 373 | 503 374 | 457 375 | 334 376 | 467 377 | 415 378 | 356 379 | 387 380 | 507 381 | 495 382 | 501 383 | 508 384 | 462 385 | 679 386 | 658 387 | 640 388 | 530 389 | 699 390 | 676 391 | 531 392 | 660 393 | 553 394 | 543 395 | 701 396 | 554 397 | 703 398 | 680 399 | 544 400 | 519 401 | 669 402 | 662 403 | 652 404 | 694 405 | 663 406 | 697 407 | 667 408 | 681 409 | 552 410 | 675 411 | 647 412 | 560 413 | 565 414 | 687 415 | 704 416 | 631 417 | 548 418 | 547 419 | 737 420 | 696 421 | 570 422 | 688 423 | 566 424 | 758 425 | 661 426 | 533 427 | 538 428 | 686 429 | 568 430 | 576 431 | 558 432 | 605 433 | 557 434 | 692 435 | 514 436 | 665 437 | 682 438 | 638 439 | 691 440 | 651 441 | 684 442 | 655 443 | 678 444 | 659 445 | 739 446 | 747 447 | 540 448 | 711 449 | 721 450 | 539 451 | 537 452 | 671 453 | 525 454 | 572 455 | 573 456 | 690 457 | 715 458 | 706 459 | 766 460 | 521 461 | 603 462 | 545 463 | 627 464 | 724 465 | 664 466 | 648 467 | 601 468 | 653 469 | 732 470 | 717 471 | 738 472 | 608 473 | 562 474 | 705 475 | 622 476 | 629 477 | 719 478 | 657 479 | 518 480 | 683 481 | 561 482 | 535 483 | 708 484 | 522 485 | 609 486 | 564 487 | 695 488 | 604 489 | 534 490 | 582 491 | 674 492 | 520 493 | 615 494 | 767 495 | 710 496 | 646 497 | 764 498 | 625 499 | 546 500 | 729 501 | 619 502 | 555 503 | 735 504 | 755 505 | 594 506 | 598 507 | 633 508 | 611 509 | 563 510 | 756 511 | 685 512 | 733 513 | 643 514 | 517 515 | 639 516 | 709 517 | 723 518 | 670 519 | 528 520 | 536 521 | 559 522 | 529 523 | 588 524 | 734 525 | 515 526 | 589 527 | 656 528 | 677 529 | 728 530 | 761 531 | 516 532 | 642 533 | 746 534 | 644 535 | 579 536 | 689 537 | 578 538 | 736 539 | 742 540 | 532 541 | 673 542 | 524 543 | 596 544 | 748 545 | 526 546 | 618 547 | 751 548 | 617 549 | 752 550 | 614 551 | 745 552 | 749 553 | 591 554 | 523 555 | 740 556 | 626 557 | 753 558 | 569 559 | 632 560 | 712 561 | 741 562 | 672 563 | 750 564 | 759 565 | 634 566 | 765 567 | 693 568 | 623 569 | 550 570 | 571 571 | 716 572 | 587 573 | 590 574 | 574 575 | 668 576 | 757 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_3_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 139 2 | 143 3 | 154 4 | 130 5 | 178 6 | 168 7 | 150 8 | 155 9 | 146 10 | 108 11 | 8 12 | 179 13 | 153 14 | 160 15 | 145 16 | 149 17 | 180 18 | 141 19 | 58 20 | 191 21 | 96 22 | 159 23 | 175 24 | 50 25 | 148 26 | 188 27 | 144 28 | 123 29 | 52 30 | 111 31 | 157 32 | 177 33 | 129 34 | 87 35 | 132 36 | 72 37 | 116 38 | 128 39 | 165 40 | 107 41 | 103 42 | 61 43 | 109 44 | 31 45 | 46 46 | 26 47 | 19 48 | 95 49 | 4 50 | 105 51 | 68 52 | 89 53 | 119 54 | 51 55 | 32 56 | 134 57 | 91 58 | 24 59 | 163 60 | 39 61 | 115 62 | 30 63 | 54 64 | 41 65 | 110 66 | 125 67 | 10 68 | 104 69 | 133 70 | 38 71 | 49 72 | 94 73 | 114 74 | 162 75 | 170 76 | 66 77 | 185 78 | 120 79 | 127 80 | 122 81 | 22 82 | 184 83 | 80 84 | 9 85 | 101 86 | 190 87 | 18 88 | 3 89 | 28 90 | 136 91 | 121 92 | 164 93 | 88 94 | 100 95 | 74 96 | 76 97 | 59 98 | 11 99 | 135 100 | 97 101 | 138 102 | 152 103 | 78 104 | 43 105 | 15 106 | 47 107 | 79 108 | 65 109 | 187 110 | 71 111 | 92 112 | 239 113 | 113 114 | 12 115 | 37 116 | 221 117 | 172 118 | 182 119 | 1 120 | 45 121 | 147 122 | 2 123 | 81 124 | 35 125 | 60 126 | 225 127 | 137 128 | 75 129 | 63 130 | 70 131 | 85 132 | 253 133 | 102 134 | 5 135 | 234 136 | 7 137 | 242 138 | 112 139 | 124 140 | 117 141 | 67 142 | 25 143 | 189 144 | 193 145 | 201 146 | 23 147 | 86 148 | 106 149 | 83 150 | 27 151 | 206 152 | 197 153 | 223 154 | 216 155 | 240 156 | 254 157 | 21 158 | 16 159 | 244 160 | 173 161 | 36 162 | 245 163 | 207 164 | 209 165 | 252 166 | 98 167 | 69 168 | 238 169 | 82 170 | 243 171 | 56 172 | 53 173 | 33 174 | 212 175 | 202 176 | 186 177 | 229 178 | 40 179 | 235 180 | 233 181 | 219 182 | 220 183 | 57 184 | 84 185 | 55 186 | 215 187 | 196 188 | 204 189 | 167 190 | 211 191 | 195 192 | 158 193 | 312 194 | 369 195 | 321 196 | 353 197 | 328 198 | 372 199 | 339 200 | 259 201 | 376 202 | 383 203 | 346 204 | 340 205 | 336 206 | 344 207 | 295 208 | 337 209 | 271 210 | 331 211 | 318 212 | 355 213 | 280 214 | 364 215 | 362 216 | 374 217 | 378 218 | 302 219 | 333 220 | 306 221 | 323 222 | 352 223 | 359 224 | 330 225 | 288 226 | 276 227 | 334 228 | 287 229 | 320 230 | 263 231 | 305 232 | 285 233 | 311 234 | 335 235 | 379 236 | 294 237 | 269 238 | 367 239 | 319 240 | 286 241 | 351 242 | 278 243 | 266 244 | 303 245 | 332 246 | 257 247 | 327 248 | 381 249 | 300 250 | 258 251 | 370 252 | 310 253 | 377 254 | 262 255 | 264 256 | 308 257 | 298 258 | 279 259 | 325 260 | 462 261 | 343 262 | 329 263 | 268 264 | 474 265 | 313 266 | 483 267 | 479 268 | 380 269 | 267 270 | 471 271 | 505 272 | 493 273 | 366 274 | 293 275 | 342 276 | 304 277 | 458 278 | 348 279 | 281 280 | 502 281 | 309 282 | 506 283 | 345 284 | 503 285 | 499 286 | 492 287 | 482 288 | 438 289 | 272 290 | 317 291 | 470 292 | 472 293 | 360 294 | 498 295 | 459 296 | 456 297 | 508 298 | 453 299 | 457 300 | 480 301 | 488 302 | 464 303 | 467 304 | 451 305 | 475 306 | 500 307 | 486 308 | 382 309 | 489 310 | 469 311 | 460 312 | 478 313 | 504 314 | 409 315 | 450 316 | 347 317 | 466 318 | 490 319 | 455 320 | 270 321 | 501 322 | 496 323 | 461 324 | 275 325 | 265 326 | 507 327 | 497 328 | 511 329 | 301 330 | 393 331 | 350 332 | 292 333 | 509 334 | 307 335 | 443 336 | 428 337 | 395 338 | 371 339 | 454 340 | 297 341 | 403 342 | 481 343 | 406 344 | 299 345 | 396 346 | 477 347 | 452 348 | 368 349 | 314 350 | 324 351 | 476 352 | 260 353 | 495 354 | 407 355 | 468 356 | 463 357 | 416 358 | 417 359 | 487 360 | 437 361 | 510 362 | 296 363 | 442 364 | 448 365 | 465 366 | 420 367 | 413 368 | 484 369 | 397 370 | 365 371 | 261 372 | 431 373 | 277 374 | 384 375 | 386 376 | 494 377 | 256 378 | 388 379 | 373 380 | 435 381 | 338 382 | 283 383 | 485 384 | 449 385 | 624 386 | 639 387 | 629 388 | 634 389 | 601 390 | 612 391 | 592 392 | 620 393 | 594 394 | 618 395 | 582 396 | 590 397 | 637 398 | 614 399 | 591 400 | 627 401 | 625 402 | 606 403 | 589 404 | 619 405 | 632 406 | 587 407 | 617 408 | 621 409 | 647 410 | 631 411 | 677 412 | 660 413 | 581 414 | 600 415 | 658 416 | 616 417 | 584 418 | 604 419 | 645 420 | 701 421 | 577 422 | 630 423 | 684 424 | 649 425 | 715 426 | 683 427 | 705 428 | 610 429 | 689 430 | 635 431 | 597 432 | 698 433 | 644 434 | 650 435 | 593 436 | 622 437 | 598 438 | 664 439 | 595 440 | 673 441 | 608 442 | 646 443 | 679 444 | 676 445 | 748 446 | 613 447 | 603 448 | 762 449 | 703 450 | 605 451 | 693 452 | 626 453 | 655 454 | 754 455 | 707 456 | 665 457 | 609 458 | 710 459 | 767 460 | 736 461 | 742 462 | 648 463 | 666 464 | 724 465 | 718 466 | 734 467 | 758 468 | 722 469 | 586 470 | 709 471 | 763 472 | 699 473 | 643 474 | 738 475 | 714 476 | 735 477 | 607 478 | 766 479 | 725 480 | 678 481 | 713 482 | 674 483 | 682 484 | 578 485 | 585 486 | 750 487 | 704 488 | 753 489 | 675 490 | 638 491 | 712 492 | 654 493 | 730 494 | 663 495 | 691 496 | 752 497 | 579 498 | 765 499 | 708 500 | 566 501 | 686 502 | 723 503 | 732 504 | 533 505 | 596 506 | 671 507 | 720 508 | 652 509 | 611 510 | 656 511 | 694 512 | 695 513 | 583 514 | 755 515 | 726 516 | 733 517 | 711 518 | 756 519 | 731 520 | 692 521 | 516 522 | 670 523 | 719 524 | 702 525 | 542 526 | 764 527 | 662 528 | 761 529 | 706 530 | 521 531 | 615 532 | 569 533 | 669 534 | 524 535 | 659 536 | 744 537 | 544 538 | 741 539 | 759 540 | 599 541 | 557 542 | 549 543 | 760 544 | 743 545 | 721 546 | 739 547 | 757 548 | 519 549 | 740 550 | 653 551 | 538 552 | 728 553 | 571 554 | 745 555 | 554 556 | 697 557 | 534 558 | 515 559 | 623 560 | 751 561 | 570 562 | 560 563 | 529 564 | 512 565 | 575 566 | 558 567 | 568 568 | 573 569 | 668 570 | 536 571 | 576 572 | 540 573 | 685 574 | 562 575 | 641 576 | 545 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_5_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 67 2 | 65 3 | 99 4 | 119 5 | 115 6 | 90 7 | 96 8 | 213 9 | 208 10 | 125 11 | 84 12 | 103 13 | 211 14 | 79 15 | 88 16 | 218 17 | 193 18 | 251 19 | 70 20 | 248 21 | 126 22 | 82 23 | 38 24 | 94 25 | 195 26 | 235 27 | 117 28 | 81 29 | 198 30 | 59 31 | 2 32 | 10 33 | 108 34 | 95 35 | 15 36 | 69 37 | 254 38 | 75 39 | 222 40 | 98 41 | 227 42 | 123 43 | 168 44 | 71 45 | 66 46 | 245 47 | 196 48 | 113 49 | 220 50 | 61 51 | 186 52 | 21 53 | 41 54 | 76 55 | 105 56 | 16 57 | 209 58 | 54 59 | 153 60 | 40 61 | 135 62 | 100 63 | 221 64 | 8 65 | 74 66 | 49 67 | 34 68 | 214 69 | 118 70 | 56 71 | 35 72 | 234 73 | 255 74 | 203 75 | 192 76 | 182 77 | 233 78 | 178 79 | 253 80 | 202 81 | 112 82 | 240 83 | 92 84 | 249 85 | 232 86 | 5 87 | 20 88 | 174 89 | 46 90 | 197 91 | 7 92 | 216 93 | 191 94 | 146 95 | 224 96 | 252 97 | 27 98 | 116 99 | 19 100 | 127 101 | 189 102 | 83 103 | 51 104 | 160 105 | 33 106 | 205 107 | 23 108 | 242 109 | 78 110 | 62 111 | 215 112 | 201 113 | 13 114 | 176 115 | 175 116 | 36 117 | 63 118 | 229 119 | 142 120 | 237 121 | 124 122 | 4 123 | 55 124 | 77 125 | 44 126 | 133 127 | 64 128 | 163 129 | 104 130 | 57 131 | 12 132 | 50 133 | 158 134 | 144 135 | 114 136 | 122 137 | 47 138 | 194 139 | 3 140 | 97 141 | 30 142 | 14 143 | 207 144 | 6 145 | 120 146 | 181 147 | 31 148 | 238 149 | 183 150 | 29 151 | 45 152 | 111 153 | 39 154 | 157 155 | 140 156 | 149 157 | 28 158 | 58 159 | 22 160 | 43 161 | 24 162 | 17 163 | 60 164 | 159 165 | 164 166 | 171 167 | 156 168 | 165 169 | 173 170 | 11 171 | 141 172 | 139 173 | 1 174 | 199 175 | 48 176 | 42 177 | 166 178 | 0 179 | 204 180 | 219 181 | 26 182 | 172 183 | 136 184 | 110 185 | 132 186 | 151 187 | 180 188 | 72 189 | 148 190 | 188 191 | 190 192 | 155 193 | 296 194 | 295 195 | 319 196 | 260 197 | 306 198 | 271 199 | 315 200 | 302 201 | 309 202 | 375 203 | 299 204 | 298 205 | 283 206 | 354 207 | 267 208 | 307 209 | 258 210 | 288 211 | 287 212 | 376 213 | 290 214 | 318 215 | 272 216 | 262 217 | 311 218 | 264 219 | 293 220 | 289 221 | 350 222 | 256 223 | 336 224 | 328 225 | 383 226 | 382 227 | 305 228 | 332 229 | 261 230 | 353 231 | 286 232 | 367 233 | 349 234 | 347 235 | 285 236 | 303 237 | 263 238 | 314 239 | 341 240 | 297 241 | 378 242 | 352 243 | 265 244 | 368 245 | 276 246 | 327 247 | 447 248 | 300 249 | 294 250 | 442 251 | 361 252 | 337 253 | 365 254 | 441 255 | 321 256 | 374 257 | 346 258 | 360 259 | 274 260 | 338 261 | 313 262 | 281 263 | 316 264 | 304 265 | 320 266 | 355 267 | 380 268 | 339 269 | 426 270 | 445 271 | 402 272 | 340 273 | 357 274 | 443 275 | 301 276 | 399 277 | 405 278 | 397 279 | 436 280 | 345 281 | 348 282 | 257 283 | 425 284 | 477 285 | 400 286 | 417 287 | 435 288 | 434 289 | 427 290 | 387 291 | 404 292 | 384 293 | 379 294 | 440 295 | 377 296 | 331 297 | 429 298 | 292 299 | 439 300 | 385 301 | 342 302 | 467 303 | 444 304 | 393 305 | 430 306 | 401 307 | 395 308 | 389 309 | 344 310 | 364 311 | 322 312 | 273 313 | 420 314 | 268 315 | 329 316 | 390 317 | 408 318 | 476 319 | 403 320 | 482 321 | 359 322 | 418 323 | 323 324 | 416 325 | 310 326 | 282 327 | 392 328 | 431 329 | 373 330 | 437 331 | 372 332 | 362 333 | 278 334 | 432 335 | 473 336 | 280 337 | 428 338 | 421 339 | 312 340 | 388 341 | 502 342 | 471 343 | 472 344 | 371 345 | 394 346 | 461 347 | 324 348 | 459 349 | 446 350 | 334 351 | 277 352 | 497 353 | 291 354 | 498 355 | 438 356 | 468 357 | 456 358 | 414 359 | 464 360 | 419 361 | 335 362 | 483 363 | 450 364 | 381 365 | 488 366 | 396 367 | 409 368 | 433 369 | 458 370 | 493 371 | 510 372 | 495 373 | 406 374 | 508 375 | 412 376 | 470 377 | 269 378 | 491 379 | 423 380 | 333 381 | 270 382 | 386 383 | 369 384 | 484 385 | 675 386 | 640 387 | 649 388 | 664 389 | 692 390 | 648 391 | 547 392 | 678 393 | 535 394 | 679 395 | 651 396 | 674 397 | 518 398 | 574 399 | 657 400 | 641 401 | 676 402 | 662 403 | 666 404 | 733 405 | 686 406 | 541 407 | 569 408 | 699 409 | 562 410 | 738 411 | 750 412 | 516 413 | 561 414 | 519 415 | 659 416 | 701 417 | 669 418 | 540 419 | 685 420 | 697 421 | 665 422 | 555 423 | 646 424 | 512 425 | 680 426 | 553 427 | 743 428 | 524 429 | 652 430 | 711 431 | 556 432 | 645 433 | 554 434 | 514 435 | 702 436 | 548 437 | 688 438 | 643 439 | 751 440 | 668 441 | 730 442 | 573 443 | 654 444 | 551 445 | 572 446 | 542 447 | 543 448 | 536 449 | 521 450 | 700 451 | 744 452 | 650 453 | 527 454 | 663 455 | 694 456 | 731 457 | 756 458 | 660 459 | 740 460 | 671 461 | 517 462 | 534 463 | 619 464 | 565 465 | 642 466 | 590 467 | 706 468 | 522 469 | 544 470 | 639 471 | 703 472 | 656 473 | 677 474 | 552 475 | 566 476 | 714 477 | 520 478 | 644 479 | 661 480 | 682 481 | 698 482 | 762 483 | 707 484 | 560 485 | 722 486 | 704 487 | 595 488 | 755 489 | 598 490 | 592 491 | 695 492 | 723 493 | 528 494 | 735 495 | 558 496 | 530 497 | 746 498 | 570 499 | 603 500 | 687 501 | 721 502 | 617 503 | 587 504 | 612 505 | 757 506 | 578 507 | 620 508 | 637 509 | 725 510 | 716 511 | 673 512 | 765 513 | 515 514 | 630 515 | 753 516 | 593 517 | 604 518 | 684 519 | 734 520 | 708 521 | 767 522 | 632 523 | 577 524 | 647 525 | 625 526 | 586 527 | 758 528 | 537 529 | 710 530 | 584 531 | 523 532 | 606 533 | 568 534 | 736 535 | 559 536 | 615 537 | 631 538 | 621 539 | 601 540 | 539 541 | 610 542 | 760 543 | 638 544 | 739 545 | 623 546 | 709 547 | 571 548 | 607 549 | 613 550 | 531 551 | 670 552 | 576 553 | 582 554 | 628 555 | 589 556 | 591 557 | 717 558 | 636 559 | 616 560 | 672 561 | 690 562 | 614 563 | 763 564 | 713 565 | 608 566 | 585 567 | 629 568 | 575 569 | 609 570 | 596 571 | 594 572 | 580 573 | 618 574 | 583 575 | 633 576 | 567 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_6_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 102 2 | 79 3 | 101 4 | 75 5 | 105 6 | 69 7 | 100 8 | 87 9 | 67 10 | 88 11 | 107 12 | 73 13 | 125 14 | 104 15 | 112 16 | 80 17 | 116 18 | 103 19 | 81 20 | 118 21 | 123 22 | 15 23 | 72 24 | 71 25 | 6 26 | 84 27 | 96 28 | 90 29 | 85 30 | 61 31 | 21 32 | 82 33 | 106 34 | 109 35 | 58 36 | 62 37 | 94 38 | 117 39 | 65 40 | 0 41 | 13 42 | 114 43 | 36 44 | 92 45 | 121 46 | 137 47 | 93 48 | 119 49 | 12 50 | 5 51 | 113 52 | 53 53 | 172 54 | 74 55 | 41 56 | 30 57 | 33 58 | 86 59 | 95 60 | 60 61 | 38 62 | 22 63 | 37 64 | 64 65 | 27 66 | 45 67 | 26 68 | 142 69 | 31 70 | 70 71 | 97 72 | 2 73 | 34 74 | 7 75 | 42 76 | 91 77 | 89 78 | 20 79 | 177 80 | 124 81 | 170 82 | 57 83 | 76 84 | 236 85 | 16 86 | 120 87 | 163 88 | 157 89 | 140 90 | 63 91 | 147 92 | 49 93 | 205 94 | 48 95 | 129 96 | 133 97 | 56 98 | 4 99 | 10 100 | 204 101 | 11 102 | 54 103 | 111 104 | 46 105 | 139 106 | 222 107 | 143 108 | 19 109 | 68 110 | 126 111 | 50 112 | 3 113 | 115 114 | 146 115 | 156 116 | 209 117 | 166 118 | 206 119 | 99 120 | 200 121 | 168 122 | 197 123 | 183 124 | 160 125 | 59 126 | 18 127 | 237 128 | 253 129 | 255 130 | 39 131 | 110 132 | 14 133 | 78 134 | 47 135 | 245 136 | 213 137 | 162 138 | 230 139 | 128 140 | 40 141 | 184 142 | 169 143 | 208 144 | 251 145 | 132 146 | 136 147 | 179 148 | 181 149 | 191 150 | 238 151 | 173 152 | 247 153 | 144 154 | 226 155 | 246 156 | 161 157 | 194 158 | 66 159 | 217 160 | 218 161 | 23 162 | 195 163 | 25 164 | 232 165 | 150 166 | 176 167 | 29 168 | 152 169 | 225 170 | 43 171 | 227 172 | 244 173 | 180 174 | 154 175 | 249 176 | 239 177 | 211 178 | 44 179 | 193 180 | 165 181 | 215 182 | 131 183 | 202 184 | 145 185 | 185 186 | 241 187 | 178 188 | 234 189 | 188 190 | 212 191 | 214 192 | 201 193 | 406 194 | 288 195 | 354 196 | 313 197 | 428 198 | 270 199 | 444 200 | 261 201 | 410 202 | 259 203 | 385 204 | 304 205 | 422 206 | 398 207 | 368 208 | 276 209 | 344 210 | 403 211 | 424 212 | 343 213 | 433 214 | 284 215 | 282 216 | 329 217 | 265 218 | 391 219 | 331 220 | 325 221 | 408 222 | 440 223 | 316 224 | 341 225 | 272 226 | 262 227 | 441 228 | 409 229 | 438 230 | 353 231 | 436 232 | 386 233 | 421 234 | 281 235 | 442 236 | 447 237 | 499 238 | 339 239 | 336 240 | 268 241 | 427 242 | 363 243 | 418 244 | 271 245 | 274 246 | 277 247 | 395 248 | 260 249 | 273 250 | 393 251 | 423 252 | 412 253 | 482 254 | 337 255 | 407 256 | 301 257 | 445 258 | 297 259 | 358 260 | 307 261 | 488 262 | 396 263 | 417 264 | 402 265 | 426 266 | 372 267 | 502 268 | 256 269 | 352 270 | 413 271 | 431 272 | 470 273 | 351 274 | 266 275 | 443 276 | 299 277 | 382 278 | 267 279 | 308 280 | 384 281 | 374 282 | 362 283 | 461 284 | 322 285 | 278 286 | 475 287 | 275 288 | 450 289 | 367 290 | 394 291 | 287 292 | 342 293 | 258 294 | 349 295 | 456 296 | 379 297 | 377 298 | 340 299 | 364 300 | 338 301 | 429 302 | 483 303 | 290 304 | 347 305 | 400 306 | 323 307 | 371 308 | 434 309 | 327 310 | 411 311 | 309 312 | 365 313 | 498 314 | 324 315 | 373 316 | 294 317 | 481 318 | 432 319 | 286 320 | 312 321 | 321 322 | 479 323 | 466 324 | 280 325 | 468 326 | 330 327 | 505 328 | 359 329 | 315 330 | 380 331 | 300 332 | 311 333 | 419 334 | 310 335 | 320 336 | 366 337 | 430 338 | 302 339 | 334 340 | 511 341 | 389 342 | 472 343 | 381 344 | 484 345 | 469 346 | 376 347 | 487 348 | 348 349 | 387 350 | 333 351 | 346 352 | 356 353 | 415 354 | 317 355 | 446 356 | 494 357 | 509 358 | 503 359 | 506 360 | 285 361 | 496 362 | 296 363 | 401 364 | 319 365 | 361 366 | 476 367 | 451 368 | 289 369 | 357 370 | 375 371 | 480 372 | 306 373 | 491 374 | 392 375 | 474 376 | 360 377 | 279 378 | 460 379 | 383 380 | 508 381 | 490 382 | 485 383 | 507 384 | 467 385 | 530 386 | 640 387 | 679 388 | 658 389 | 531 390 | 680 391 | 553 392 | 703 393 | 699 394 | 554 395 | 660 396 | 669 397 | 661 398 | 544 399 | 565 400 | 547 401 | 676 402 | 662 403 | 570 404 | 543 405 | 701 406 | 519 407 | 647 408 | 686 409 | 704 410 | 566 411 | 697 412 | 533 413 | 663 414 | 747 415 | 694 416 | 681 417 | 638 418 | 671 419 | 696 420 | 667 421 | 558 422 | 631 423 | 682 424 | 652 425 | 687 426 | 568 427 | 675 428 | 690 429 | 560 430 | 692 431 | 739 432 | 552 433 | 691 434 | 548 435 | 538 436 | 648 437 | 605 438 | 651 439 | 737 440 | 545 441 | 758 442 | 688 443 | 721 444 | 608 445 | 573 446 | 627 447 | 678 448 | 514 449 | 665 450 | 539 451 | 525 452 | 766 453 | 557 454 | 576 455 | 655 456 | 537 457 | 534 458 | 711 459 | 659 460 | 604 461 | 715 462 | 695 463 | 705 464 | 684 465 | 683 466 | 706 467 | 732 468 | 601 469 | 724 470 | 609 471 | 521 472 | 629 473 | 603 474 | 572 475 | 540 476 | 535 477 | 520 478 | 564 479 | 625 480 | 664 481 | 529 482 | 738 483 | 657 484 | 653 485 | 518 486 | 517 487 | 546 488 | 719 489 | 528 490 | 536 491 | 729 492 | 717 493 | 561 494 | 562 495 | 582 496 | 755 497 | 622 498 | 614 499 | 708 500 | 674 501 | 615 502 | 619 503 | 689 504 | 633 505 | 589 506 | 709 507 | 764 508 | 611 509 | 677 510 | 643 511 | 522 512 | 767 513 | 594 514 | 642 515 | 734 516 | 644 517 | 516 518 | 559 519 | 524 520 | 735 521 | 656 522 | 563 523 | 639 524 | 746 525 | 685 526 | 748 527 | 710 528 | 670 529 | 756 530 | 598 531 | 555 532 | 588 533 | 673 534 | 742 535 | 523 536 | 745 537 | 736 538 | 569 539 | 612 540 | 578 541 | 571 542 | 596 543 | 646 544 | 723 545 | 515 546 | 617 547 | 751 548 | 712 549 | 618 550 | 532 551 | 526 552 | 749 553 | 728 554 | 752 555 | 741 556 | 698 557 | 626 558 | 579 559 | 591 560 | 549 561 | 672 562 | 761 563 | 750 564 | 733 565 | 586 566 | 590 567 | 606 568 | 632 569 | 753 570 | 765 571 | 668 572 | 744 573 | 623 574 | 740 575 | 641 576 | 693 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_5_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 67 2 | 213 3 | 99 4 | 115 5 | 79 6 | 65 7 | 125 8 | 96 9 | 208 10 | 90 11 | 103 12 | 88 13 | 211 14 | 248 15 | 94 16 | 119 17 | 81 18 | 218 19 | 70 20 | 108 21 | 92 22 | 227 23 | 38 24 | 84 25 | 10 26 | 186 27 | 193 28 | 82 29 | 251 30 | 95 31 | 69 32 | 126 33 | 198 34 | 254 35 | 195 36 | 168 37 | 15 38 | 235 39 | 117 40 | 196 41 | 59 42 | 203 43 | 2 44 | 61 45 | 40 46 | 182 47 | 49 48 | 209 49 | 113 50 | 98 51 | 255 52 | 222 53 | 135 54 | 221 55 | 100 56 | 174 57 | 75 58 | 214 59 | 76 60 | 240 61 | 5 62 | 234 63 | 220 64 | 252 65 | 249 66 | 120 67 | 16 68 | 245 69 | 54 70 | 202 71 | 197 72 | 112 73 | 66 74 | 233 75 | 253 76 | 74 77 | 12 78 | 21 79 | 56 80 | 7 81 | 27 82 | 178 83 | 71 84 | 232 85 | 41 86 | 191 87 | 35 88 | 123 89 | 153 90 | 105 91 | 8 92 | 46 93 | 34 94 | 23 95 | 19 96 | 20 97 | 33 98 | 216 99 | 146 100 | 62 101 | 181 102 | 192 103 | 142 104 | 175 105 | 144 106 | 207 107 | 160 108 | 229 109 | 215 110 | 63 111 | 189 112 | 158 113 | 114 114 | 116 115 | 51 116 | 237 117 | 31 118 | 149 119 | 36 120 | 118 121 | 50 122 | 14 123 | 176 124 | 124 125 | 57 126 | 205 127 | 78 128 | 201 129 | 163 130 | 83 131 | 6 132 | 224 133 | 194 134 | 242 135 | 122 136 | 127 137 | 47 138 | 45 139 | 55 140 | 13 141 | 141 142 | 44 143 | 3 144 | 104 145 | 157 146 | 183 147 | 140 148 | 238 149 | 165 150 | 159 151 | 30 152 | 4 153 | 29 154 | 43 155 | 204 156 | 24 157 | 22 158 | 28 159 | 39 160 | 97 161 | 133 162 | 188 163 | 173 164 | 132 165 | 139 166 | 172 167 | 111 168 | 58 169 | 77 170 | 219 171 | 166 172 | 171 173 | 60 174 | 17 175 | 162 176 | 1 177 | 48 178 | 156 179 | 199 180 | 151 181 | 180 182 | 109 183 | 131 184 | 11 185 | 110 186 | 243 187 | 26 188 | 136 189 | 164 190 | 190 191 | 0 192 | 155 193 | 296 194 | 295 195 | 319 196 | 315 197 | 309 198 | 260 199 | 271 200 | 306 201 | 311 202 | 354 203 | 318 204 | 283 205 | 302 206 | 375 207 | 299 208 | 287 209 | 314 210 | 267 211 | 290 212 | 289 213 | 376 214 | 298 215 | 350 216 | 258 217 | 272 218 | 307 219 | 262 220 | 264 221 | 336 222 | 305 223 | 288 224 | 328 225 | 285 226 | 367 227 | 293 228 | 332 229 | 378 230 | 353 231 | 382 232 | 442 233 | 256 234 | 263 235 | 286 236 | 261 237 | 265 238 | 341 239 | 349 240 | 297 241 | 352 242 | 383 243 | 303 244 | 347 245 | 300 246 | 368 247 | 447 248 | 274 249 | 346 250 | 360 251 | 327 252 | 361 253 | 365 254 | 339 255 | 321 256 | 338 257 | 294 258 | 337 259 | 304 260 | 441 261 | 281 262 | 374 263 | 402 264 | 426 265 | 355 266 | 401 267 | 313 268 | 434 269 | 397 270 | 340 271 | 435 272 | 380 273 | 348 274 | 320 275 | 276 276 | 316 277 | 292 278 | 357 279 | 445 280 | 425 281 | 405 282 | 399 283 | 282 284 | 301 285 | 257 286 | 387 287 | 379 288 | 404 289 | 400 290 | 477 291 | 377 292 | 439 293 | 417 294 | 345 295 | 436 296 | 427 297 | 331 298 | 384 299 | 403 300 | 389 301 | 444 302 | 364 303 | 342 304 | 268 305 | 310 306 | 443 307 | 393 308 | 429 309 | 329 310 | 395 311 | 408 312 | 385 313 | 440 314 | 344 315 | 322 316 | 430 317 | 388 318 | 416 319 | 420 320 | 467 321 | 371 322 | 273 323 | 482 324 | 476 325 | 323 326 | 392 327 | 390 328 | 312 329 | 421 330 | 437 331 | 373 332 | 394 333 | 372 334 | 431 335 | 461 336 | 428 337 | 446 338 | 362 339 | 418 340 | 473 341 | 359 342 | 438 343 | 432 344 | 414 345 | 278 346 | 502 347 | 472 348 | 291 349 | 280 350 | 334 351 | 335 352 | 277 353 | 498 354 | 456 355 | 433 356 | 324 357 | 508 358 | 497 359 | 471 360 | 493 361 | 406 362 | 510 363 | 270 364 | 450 365 | 419 366 | 459 367 | 488 368 | 495 369 | 409 370 | 470 371 | 412 372 | 396 373 | 483 374 | 333 375 | 369 376 | 491 377 | 458 378 | 484 379 | 468 380 | 269 381 | 381 382 | 464 383 | 507 384 | 284 385 | 675 386 | 649 387 | 640 388 | 692 389 | 648 390 | 664 391 | 535 392 | 674 393 | 678 394 | 701 395 | 641 396 | 518 397 | 657 398 | 651 399 | 676 400 | 679 401 | 547 402 | 738 403 | 562 404 | 699 405 | 733 406 | 686 407 | 659 408 | 574 409 | 666 410 | 750 411 | 662 412 | 540 413 | 685 414 | 561 415 | 697 416 | 516 417 | 688 418 | 555 419 | 512 420 | 541 421 | 680 422 | 652 423 | 665 424 | 553 425 | 524 426 | 556 427 | 711 428 | 643 429 | 519 430 | 554 431 | 514 432 | 573 433 | 646 434 | 645 435 | 669 436 | 668 437 | 702 438 | 654 439 | 548 440 | 743 441 | 751 442 | 551 443 | 744 444 | 543 445 | 700 446 | 537 447 | 663 448 | 731 449 | 730 450 | 650 451 | 536 452 | 542 453 | 572 454 | 703 455 | 521 456 | 527 457 | 756 458 | 671 459 | 694 460 | 740 461 | 656 462 | 660 463 | 590 464 | 592 465 | 714 466 | 706 467 | 698 468 | 569 469 | 682 470 | 560 471 | 639 472 | 517 473 | 644 474 | 522 475 | 552 476 | 746 477 | 642 478 | 661 479 | 762 480 | 565 481 | 528 482 | 755 483 | 530 484 | 515 485 | 534 486 | 695 487 | 595 488 | 619 489 | 544 490 | 677 491 | 707 492 | 721 493 | 566 494 | 735 495 | 704 496 | 723 497 | 687 498 | 722 499 | 598 500 | 716 501 | 577 502 | 570 503 | 767 504 | 586 505 | 708 506 | 520 507 | 603 508 | 558 509 | 725 510 | 684 511 | 734 512 | 578 513 | 625 514 | 673 515 | 620 516 | 617 517 | 612 518 | 753 519 | 613 520 | 593 521 | 637 522 | 587 523 | 757 524 | 760 525 | 523 526 | 614 527 | 604 528 | 623 529 | 765 530 | 606 531 | 584 532 | 710 533 | 539 534 | 647 535 | 630 536 | 739 537 | 568 538 | 672 539 | 628 540 | 736 541 | 758 542 | 631 543 | 709 544 | 636 545 | 632 546 | 601 547 | 582 548 | 607 549 | 763 550 | 621 551 | 653 552 | 717 553 | 615 554 | 559 555 | 638 556 | 670 557 | 713 558 | 531 559 | 690 560 | 610 561 | 589 562 | 633 563 | 575 564 | 571 565 | 576 566 | 683 567 | 616 568 | 635 569 | 580 570 | 591 571 | 629 572 | 533 573 | 597 574 | 583 575 | 618 576 | 596 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_9_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 51 2 | 20 3 | 82 4 | 243 5 | 9 6 | 64 7 | 48 8 | 8 9 | 209 10 | 49 11 | 2 12 | 81 13 | 62 14 | 125 15 | 24 16 | 109 17 | 212 18 | 115 19 | 141 20 | 221 21 | 17 22 | 66 23 | 189 24 | 224 25 | 104 26 | 11 27 | 73 28 | 241 29 | 89 30 | 217 31 | 7 32 | 220 33 | 152 34 | 122 35 | 136 36 | 98 37 | 77 38 | 240 39 | 4 40 | 254 41 | 244 42 | 93 43 | 12 44 | 249 45 | 27 46 | 248 47 | 90 48 | 120 49 | 106 50 | 56 51 | 237 52 | 117 53 | 61 54 | 71 55 | 65 56 | 101 57 | 34 58 | 5 59 | 47 60 | 110 61 | 112 62 | 107 63 | 23 64 | 21 65 | 123 66 | 63 67 | 14 68 | 94 69 | 113 70 | 140 71 | 111 72 | 213 73 | 214 74 | 59 75 | 226 76 | 100 77 | 234 78 | 68 79 | 80 80 | 44 81 | 96 82 | 78 83 | 236 84 | 40 85 | 87 86 | 79 87 | 154 88 | 211 89 | 230 90 | 173 91 | 105 92 | 210 93 | 160 94 | 55 95 | 198 96 | 86 97 | 26 98 | 195 99 | 108 100 | 137 101 | 84 102 | 43 103 | 116 104 | 3 105 | 215 106 | 15 107 | 76 108 | 147 109 | 39 110 | 156 111 | 6 112 | 103 113 | 54 114 | 92 115 | 32 116 | 158 117 | 203 118 | 124 119 | 199 120 | 118 121 | 38 122 | 162 123 | 235 124 | 232 125 | 204 126 | 231 127 | 194 128 | 255 129 | 50 130 | 153 131 | 129 132 | 114 133 | 74 134 | 1 135 | 253 136 | 28 137 | 233 138 | 127 139 | 91 140 | 46 141 | 97 142 | 193 143 | 191 144 | 205 145 | 19 146 | 171 147 | 13 148 | 150 149 | 219 150 | 22 151 | 30 152 | 18 153 | 196 154 | 179 155 | 186 156 | 247 157 | 25 158 | 95 159 | 132 160 | 225 161 | 102 162 | 188 163 | 130 164 | 170 165 | 190 166 | 53 167 | 16 168 | 163 169 | 183 170 | 138 171 | 208 172 | 45 173 | 229 174 | 206 175 | 175 176 | 88 177 | 245 178 | 202 179 | 67 180 | 155 181 | 176 182 | 33 183 | 128 184 | 126 185 | 178 186 | 41 187 | 133 188 | 184 189 | 200 190 | 0 191 | 239 192 | 250 193 | 323 194 | 376 195 | 370 196 | 360 197 | 342 198 | 362 199 | 320 200 | 372 201 | 322 202 | 368 203 | 329 204 | 340 205 | 347 206 | 343 207 | 321 208 | 369 209 | 351 210 | 324 211 | 328 212 | 326 213 | 363 214 | 378 215 | 349 216 | 316 217 | 338 218 | 325 219 | 359 220 | 379 221 | 367 222 | 365 223 | 336 224 | 333 225 | 375 226 | 332 227 | 341 228 | 292 229 | 357 230 | 345 231 | 356 232 | 364 233 | 380 234 | 334 235 | 327 236 | 284 237 | 309 238 | 371 239 | 337 240 | 339 241 | 361 242 | 377 243 | 330 244 | 331 245 | 355 246 | 291 247 | 346 248 | 353 249 | 258 250 | 486 251 | 383 252 | 354 253 | 287 254 | 498 255 | 315 256 | 473 257 | 352 258 | 311 259 | 350 260 | 279 261 | 465 262 | 306 263 | 348 264 | 271 265 | 289 266 | 469 267 | 262 268 | 507 269 | 282 270 | 312 271 | 273 272 | 274 273 | 260 274 | 295 275 | 263 276 | 303 277 | 310 278 | 286 279 | 504 280 | 476 281 | 313 282 | 495 283 | 471 284 | 503 285 | 366 286 | 293 287 | 301 288 | 459 289 | 453 290 | 494 291 | 455 292 | 373 293 | 505 294 | 509 295 | 448 296 | 267 297 | 490 298 | 496 299 | 475 300 | 451 301 | 460 302 | 506 303 | 298 304 | 314 305 | 283 306 | 300 307 | 297 308 | 480 309 | 472 310 | 280 311 | 374 312 | 302 313 | 483 314 | 281 315 | 288 316 | 264 317 | 269 318 | 511 319 | 474 320 | 508 321 | 493 322 | 468 323 | 261 324 | 308 325 | 272 326 | 299 327 | 420 328 | 478 329 | 268 330 | 461 331 | 294 332 | 482 333 | 257 334 | 492 335 | 429 336 | 317 337 | 456 338 | 265 339 | 499 340 | 491 341 | 502 342 | 304 343 | 417 344 | 256 345 | 344 346 | 387 347 | 422 348 | 464 349 | 319 350 | 277 351 | 270 352 | 450 353 | 481 354 | 307 355 | 485 356 | 454 357 | 463 358 | 487 359 | 305 360 | 500 361 | 424 362 | 405 363 | 358 364 | 399 365 | 386 366 | 296 367 | 290 368 | 462 369 | 385 370 | 441 371 | 266 372 | 419 373 | 416 374 | 501 375 | 435 376 | 443 377 | 470 378 | 444 379 | 389 380 | 412 381 | 510 382 | 436 383 | 497 384 | 447 385 | 665 386 | 646 387 | 741 388 | 547 389 | 683 390 | 701 391 | 687 392 | 696 393 | 656 394 | 727 395 | 641 396 | 657 397 | 686 398 | 649 399 | 679 400 | 644 401 | 538 402 | 703 403 | 519 404 | 559 405 | 728 406 | 552 407 | 716 408 | 680 409 | 658 410 | 740 411 | 660 412 | 671 413 | 700 414 | 763 415 | 666 416 | 558 417 | 719 418 | 767 419 | 555 420 | 668 421 | 675 422 | 659 423 | 698 424 | 751 425 | 518 426 | 697 427 | 694 428 | 710 429 | 674 430 | 689 431 | 648 432 | 564 433 | 642 434 | 692 435 | 743 436 | 758 437 | 571 438 | 647 439 | 528 440 | 669 441 | 705 442 | 695 443 | 678 444 | 643 445 | 650 446 | 718 447 | 762 448 | 567 449 | 565 450 | 759 451 | 682 452 | 755 453 | 670 454 | 533 455 | 708 456 | 750 457 | 677 458 | 661 459 | 522 460 | 754 461 | 640 462 | 516 463 | 529 464 | 540 465 | 563 466 | 663 467 | 573 468 | 526 469 | 667 470 | 724 471 | 546 472 | 515 473 | 690 474 | 753 475 | 688 476 | 711 477 | 673 478 | 693 479 | 539 480 | 525 481 | 651 482 | 721 483 | 702 484 | 542 485 | 541 486 | 765 487 | 534 488 | 672 489 | 726 490 | 735 491 | 576 492 | 712 493 | 766 494 | 717 495 | 549 496 | 520 497 | 633 498 | 722 499 | 544 500 | 742 501 | 517 502 | 734 503 | 597 504 | 745 505 | 676 506 | 720 507 | 764 508 | 548 509 | 584 510 | 730 511 | 664 512 | 729 513 | 632 514 | 574 515 | 662 516 | 631 517 | 715 518 | 554 519 | 713 520 | 637 521 | 733 522 | 527 523 | 593 524 | 575 525 | 543 526 | 532 527 | 537 528 | 530 529 | 639 530 | 614 531 | 550 532 | 707 533 | 761 534 | 739 535 | 566 536 | 536 537 | 513 538 | 617 539 | 748 540 | 704 541 | 749 542 | 561 543 | 691 544 | 653 545 | 591 546 | 588 547 | 587 548 | 523 549 | 581 550 | 605 551 | 752 552 | 756 553 | 562 554 | 744 555 | 583 556 | 746 557 | 731 558 | 601 559 | 569 560 | 627 561 | 553 562 | 654 563 | 635 564 | 582 565 | 625 566 | 652 567 | 630 568 | 535 569 | 747 570 | 599 571 | 655 572 | 551 573 | 636 574 | 760 575 | 623 576 | 723 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_7_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 118 2 | 109 3 | 65 4 | 210 5 | 100 6 | 99 7 | 107 8 | 119 9 | 117 10 | 124 11 | 80 12 | 235 13 | 121 14 | 92 15 | 81 16 | 221 17 | 90 18 | 247 19 | 87 20 | 114 21 | 206 22 | 84 23 | 126 24 | 102 25 | 105 26 | 123 27 | 125 28 | 252 29 | 108 30 | 95 31 | 69 32 | 68 33 | 196 34 | 93 35 | 111 36 | 231 37 | 239 38 | 106 39 | 72 40 | 250 41 | 209 42 | 197 43 | 195 44 | 246 45 | 115 46 | 10 47 | 224 48 | 66 49 | 253 50 | 233 51 | 211 52 | 198 53 | 30 54 | 127 55 | 122 56 | 22 57 | 230 58 | 112 59 | 236 60 | 255 61 | 234 62 | 82 63 | 5 64 | 54 65 | 113 66 | 110 67 | 8 68 | 26 69 | 77 70 | 201 71 | 242 72 | 101 73 | 71 74 | 240 75 | 232 76 | 63 77 | 229 78 | 79 79 | 243 80 | 1 81 | 53 82 | 18 83 | 244 84 | 38 85 | 3 86 | 25 87 | 219 88 | 241 89 | 248 90 | 97 91 | 104 92 | 228 93 | 76 94 | 29 95 | 89 96 | 204 97 | 98 98 | 91 99 | 57 100 | 203 101 | 44 102 | 251 103 | 52 104 | 14 105 | 212 106 | 199 107 | 86 108 | 94 109 | 27 110 | 45 111 | 48 112 | 200 113 | 37 114 | 13 115 | 34 116 | 227 117 | 136 118 | 220 119 | 174 120 | 50 121 | 17 122 | 41 123 | 74 124 | 205 125 | 36 126 | 178 127 | 208 128 | 218 129 | 161 130 | 249 131 | 222 132 | 33 133 | 193 134 | 9 135 | 75 136 | 254 137 | 61 138 | 245 139 | 145 140 | 132 141 | 42 142 | 213 143 | 28 144 | 116 145 | 120 146 | 135 147 | 21 148 | 32 149 | 16 150 | 11 151 | 160 152 | 51 153 | 238 154 | 55 155 | 4 156 | 180 157 | 149 158 | 35 159 | 151 160 | 64 161 | 78 162 | 138 163 | 46 164 | 60 165 | 166 166 | 183 167 | 19 168 | 154 169 | 237 170 | 188 171 | 214 172 | 172 173 | 153 174 | 40 175 | 191 176 | 49 177 | 96 178 | 165 179 | 162 180 | 20 181 | 156 182 | 176 183 | 7 184 | 169 185 | 141 186 | 130 187 | 131 188 | 164 189 | 58 190 | 202 191 | 150 192 | 12 193 | 298 194 | 462 195 | 274 196 | 260 197 | 289 198 | 284 199 | 270 200 | 317 201 | 312 202 | 491 203 | 492 204 | 311 205 | 300 206 | 506 207 | 484 208 | 337 209 | 319 210 | 375 211 | 379 212 | 451 213 | 498 214 | 377 215 | 302 216 | 294 217 | 332 218 | 264 219 | 307 220 | 257 221 | 348 222 | 475 223 | 286 224 | 314 225 | 306 226 | 450 227 | 452 228 | 279 229 | 504 230 | 488 231 | 500 232 | 494 233 | 490 234 | 369 235 | 269 236 | 335 237 | 265 238 | 282 239 | 293 240 | 301 241 | 380 242 | 343 243 | 509 244 | 277 245 | 315 246 | 309 247 | 373 248 | 271 249 | 273 250 | 296 251 | 482 252 | 496 253 | 354 254 | 367 255 | 256 256 | 258 257 | 493 258 | 295 259 | 331 260 | 374 261 | 275 262 | 481 263 | 263 264 | 459 265 | 469 266 | 361 267 | 333 268 | 352 269 | 486 270 | 453 271 | 339 272 | 468 273 | 454 274 | 483 275 | 463 276 | 290 277 | 318 278 | 299 279 | 455 280 | 342 281 | 341 282 | 385 283 | 288 284 | 456 285 | 505 286 | 345 287 | 365 288 | 471 289 | 438 290 | 276 291 | 308 292 | 507 293 | 442 294 | 392 295 | 262 296 | 329 297 | 368 298 | 417 299 | 480 300 | 511 301 | 281 302 | 310 303 | 502 304 | 478 305 | 472 306 | 470 307 | 324 308 | 429 309 | 508 310 | 437 311 | 272 312 | 325 313 | 355 314 | 358 315 | 503 316 | 460 317 | 351 318 | 268 319 | 322 320 | 370 321 | 259 322 | 382 323 | 405 324 | 432 325 | 328 326 | 323 327 | 381 328 | 476 329 | 485 330 | 353 331 | 305 332 | 477 333 | 473 334 | 362 335 | 501 336 | 316 337 | 285 338 | 350 339 | 283 340 | 489 341 | 474 342 | 404 343 | 321 344 | 330 345 | 267 346 | 499 347 | 495 348 | 461 349 | 366 350 | 399 351 | 395 352 | 466 353 | 425 354 | 479 355 | 336 356 | 360 357 | 396 358 | 340 359 | 418 360 | 465 361 | 402 362 | 430 363 | 433 364 | 439 365 | 431 366 | 378 367 | 408 368 | 422 369 | 387 370 | 376 371 | 427 372 | 389 373 | 347 374 | 266 375 | 363 376 | 398 377 | 359 378 | 327 379 | 303 380 | 444 381 | 394 382 | 326 383 | 434 384 | 436 385 | 727 386 | 764 387 | 735 388 | 723 389 | 741 390 | 752 391 | 763 392 | 532 393 | 677 394 | 748 395 | 714 396 | 737 397 | 767 398 | 719 399 | 575 400 | 749 401 | 738 402 | 705 403 | 747 404 | 721 405 | 745 406 | 718 407 | 708 408 | 739 409 | 750 410 | 570 411 | 736 412 | 758 413 | 730 414 | 760 415 | 715 416 | 689 417 | 534 418 | 682 419 | 577 420 | 544 421 | 679 422 | 550 423 | 680 424 | 700 425 | 728 426 | 516 427 | 699 428 | 734 429 | 667 430 | 724 431 | 757 432 | 622 433 | 549 434 | 562 435 | 522 436 | 543 437 | 573 438 | 766 439 | 692 440 | 704 441 | 756 442 | 528 443 | 653 444 | 517 445 | 527 446 | 600 447 | 515 448 | 712 449 | 526 450 | 732 451 | 601 452 | 520 453 | 555 454 | 591 455 | 751 456 | 619 457 | 552 458 | 686 459 | 678 460 | 709 461 | 722 462 | 683 463 | 512 464 | 729 465 | 563 466 | 647 467 | 753 468 | 637 469 | 592 470 | 529 471 | 545 472 | 652 473 | 566 474 | 670 475 | 742 476 | 554 477 | 596 478 | 609 479 | 541 480 | 578 481 | 740 482 | 649 483 | 641 484 | 664 485 | 759 486 | 654 487 | 659 488 | 628 489 | 560 490 | 603 491 | 731 492 | 663 493 | 762 494 | 561 495 | 568 496 | 598 497 | 586 498 | 672 499 | 519 500 | 614 501 | 651 502 | 635 503 | 593 504 | 530 505 | 582 506 | 660 507 | 513 508 | 604 509 | 605 510 | 574 511 | 625 512 | 546 513 | 754 514 | 594 515 | 565 516 | 548 517 | 646 518 | 523 519 | 531 520 | 606 521 | 616 522 | 648 523 | 713 524 | 581 525 | 755 526 | 564 527 | 676 528 | 518 529 | 524 530 | 634 531 | 537 532 | 588 533 | 761 534 | 583 535 | 539 536 | 557 537 | 514 538 | 657 539 | 567 540 | 535 541 | 674 542 | 556 543 | 569 544 | 571 545 | 521 546 | 613 547 | 626 548 | 629 549 | 681 550 | 706 551 | 698 552 | 551 553 | 733 554 | 642 555 | 631 556 | 658 557 | 691 558 | 707 559 | 655 560 | 576 561 | 662 562 | 703 563 | 587 564 | 621 565 | 547 566 | 559 567 | 542 568 | 710 569 | 669 570 | 579 571 | 540 572 | 638 573 | 615 574 | 538 575 | 696 576 | 618 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_3_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 154 2 | 143 3 | 130 4 | 139 5 | 180 6 | 178 7 | 168 8 | 150 9 | 108 10 | 146 11 | 8 12 | 155 13 | 58 14 | 145 15 | 160 16 | 179 17 | 96 18 | 175 19 | 50 20 | 132 21 | 87 22 | 157 23 | 123 24 | 191 25 | 111 26 | 148 27 | 116 28 | 141 29 | 144 30 | 188 31 | 177 32 | 72 33 | 159 34 | 46 35 | 149 36 | 52 37 | 129 38 | 128 39 | 61 40 | 26 41 | 19 42 | 107 43 | 103 44 | 31 45 | 165 46 | 51 47 | 109 48 | 119 49 | 95 50 | 153 51 | 39 52 | 30 53 | 41 54 | 115 55 | 125 56 | 4 57 | 54 58 | 10 59 | 105 60 | 68 61 | 89 62 | 91 63 | 94 64 | 24 65 | 185 66 | 49 67 | 110 68 | 32 69 | 134 70 | 114 71 | 3 72 | 9 73 | 38 74 | 120 75 | 127 76 | 28 77 | 101 78 | 66 79 | 190 80 | 104 81 | 11 82 | 170 83 | 133 84 | 162 85 | 22 86 | 18 87 | 122 88 | 121 89 | 79 90 | 100 91 | 76 92 | 80 93 | 184 94 | 88 95 | 43 96 | 47 97 | 59 98 | 78 99 | 74 100 | 136 101 | 113 102 | 92 103 | 12 104 | 97 105 | 164 106 | 15 107 | 239 108 | 221 109 | 65 110 | 163 111 | 1 112 | 225 113 | 37 114 | 81 115 | 138 116 | 25 117 | 60 118 | 45 119 | 117 120 | 124 121 | 35 122 | 71 123 | 63 124 | 75 125 | 2 126 | 135 127 | 137 128 | 102 129 | 147 130 | 5 131 | 234 132 | 201 133 | 216 134 | 182 135 | 172 136 | 70 137 | 83 138 | 242 139 | 253 140 | 197 141 | 67 142 | 112 143 | 152 144 | 193 145 | 7 146 | 86 147 | 106 148 | 206 149 | 53 150 | 56 151 | 173 152 | 240 153 | 252 154 | 189 155 | 36 156 | 21 157 | 23 158 | 85 159 | 243 160 | 202 161 | 244 162 | 235 163 | 254 164 | 223 165 | 82 166 | 238 167 | 220 168 | 229 169 | 27 170 | 209 171 | 212 172 | 233 173 | 207 174 | 69 175 | 33 176 | 187 177 | 245 178 | 55 179 | 158 180 | 98 181 | 40 182 | 84 183 | 204 184 | 16 185 | 215 186 | 219 187 | 211 188 | 224 189 | 198 190 | 195 191 | 186 192 | 217 193 | 312 194 | 369 195 | 353 196 | 321 197 | 328 198 | 372 199 | 339 200 | 346 201 | 383 202 | 340 203 | 376 204 | 259 205 | 336 206 | 271 207 | 344 208 | 352 209 | 331 210 | 364 211 | 337 212 | 318 213 | 311 214 | 280 215 | 374 216 | 288 217 | 302 218 | 295 219 | 323 220 | 378 221 | 362 222 | 355 223 | 359 224 | 306 225 | 320 226 | 287 227 | 333 228 | 285 229 | 335 230 | 334 231 | 330 232 | 263 233 | 276 234 | 379 235 | 305 236 | 294 237 | 269 238 | 303 239 | 351 240 | 367 241 | 278 242 | 286 243 | 257 244 | 266 245 | 327 246 | 381 247 | 319 248 | 258 249 | 332 250 | 298 251 | 264 252 | 300 253 | 370 254 | 310 255 | 262 256 | 377 257 | 325 258 | 308 259 | 279 260 | 462 261 | 474 262 | 268 263 | 267 264 | 329 265 | 313 266 | 483 267 | 499 268 | 457 269 | 343 270 | 380 271 | 293 272 | 471 273 | 479 274 | 505 275 | 345 276 | 502 277 | 493 278 | 366 279 | 438 280 | 492 281 | 309 282 | 342 283 | 281 284 | 458 285 | 272 286 | 503 287 | 506 288 | 472 289 | 482 290 | 360 291 | 498 292 | 508 293 | 470 294 | 304 295 | 348 296 | 475 297 | 480 298 | 456 299 | 464 300 | 275 301 | 451 302 | 488 303 | 478 304 | 450 305 | 317 306 | 467 307 | 460 308 | 486 309 | 453 310 | 489 311 | 469 312 | 497 313 | 265 314 | 461 315 | 500 316 | 504 317 | 490 318 | 459 319 | 301 320 | 382 321 | 496 322 | 455 323 | 507 324 | 476 325 | 270 326 | 307 327 | 501 328 | 428 329 | 297 330 | 409 331 | 393 332 | 350 333 | 347 334 | 406 335 | 509 336 | 511 337 | 477 338 | 324 339 | 371 340 | 466 341 | 403 342 | 443 343 | 481 344 | 292 345 | 396 346 | 395 347 | 314 348 | 260 349 | 299 350 | 368 351 | 452 352 | 454 353 | 407 354 | 417 355 | 397 356 | 420 357 | 442 358 | 416 359 | 437 360 | 296 361 | 448 362 | 365 363 | 468 364 | 487 365 | 495 366 | 413 367 | 463 368 | 484 369 | 465 370 | 510 371 | 256 372 | 261 373 | 431 374 | 277 375 | 384 376 | 373 377 | 386 378 | 338 379 | 435 380 | 388 381 | 399 382 | 361 383 | 494 384 | 283 385 | 624 386 | 612 387 | 594 388 | 601 389 | 592 390 | 620 391 | 634 392 | 629 393 | 639 394 | 582 395 | 618 396 | 590 397 | 614 398 | 606 399 | 637 400 | 627 401 | 587 402 | 589 403 | 647 404 | 625 405 | 649 406 | 677 407 | 584 408 | 658 409 | 632 410 | 684 411 | 619 412 | 600 413 | 591 414 | 617 415 | 673 416 | 621 417 | 616 418 | 581 419 | 631 420 | 650 421 | 598 422 | 683 423 | 644 424 | 630 425 | 577 426 | 604 427 | 622 428 | 646 429 | 689 430 | 715 431 | 679 432 | 705 433 | 698 434 | 635 435 | 610 436 | 645 437 | 701 438 | 593 439 | 664 440 | 748 441 | 597 442 | 608 443 | 660 444 | 762 445 | 665 446 | 666 447 | 595 448 | 603 449 | 613 450 | 736 451 | 609 452 | 738 453 | 693 454 | 676 455 | 767 456 | 707 457 | 648 458 | 754 459 | 763 460 | 718 461 | 605 462 | 714 463 | 709 464 | 655 465 | 725 466 | 766 467 | 758 468 | 724 469 | 734 470 | 675 471 | 678 472 | 710 473 | 702 474 | 586 475 | 703 476 | 626 477 | 765 478 | 750 479 | 722 480 | 699 481 | 704 482 | 607 483 | 735 484 | 713 485 | 708 486 | 742 487 | 730 488 | 674 489 | 753 490 | 663 491 | 752 492 | 654 493 | 638 494 | 566 495 | 585 496 | 723 497 | 726 498 | 578 499 | 731 500 | 756 501 | 643 502 | 732 503 | 682 504 | 670 505 | 686 506 | 656 507 | 721 508 | 611 509 | 697 510 | 695 511 | 712 512 | 533 513 | 691 514 | 733 515 | 720 516 | 652 517 | 579 518 | 694 519 | 596 520 | 583 521 | 711 522 | 669 523 | 744 524 | 719 525 | 516 526 | 755 527 | 760 528 | 761 529 | 685 530 | 741 531 | 706 532 | 764 533 | 759 534 | 692 535 | 524 536 | 740 537 | 554 538 | 757 539 | 542 540 | 569 541 | 739 542 | 615 543 | 544 544 | 557 545 | 623 546 | 521 547 | 743 548 | 549 549 | 745 550 | 751 551 | 653 552 | 728 553 | 668 554 | 662 555 | 560 556 | 529 557 | 599 558 | 659 559 | 571 560 | 651 561 | 515 562 | 570 563 | 536 564 | 538 565 | 671 566 | 573 567 | 576 568 | 519 569 | 512 570 | 575 571 | 558 572 | 642 573 | 534 574 | 562 575 | 572 576 | 641 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_4_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 216 2 | 249 3 | 245 4 | 199 5 | 237 6 | 235 7 | 232 8 | 198 9 | 223 10 | 213 11 | 240 12 | 203 13 | 252 14 | 219 15 | 247 16 | 224 17 | 226 18 | 251 19 | 250 20 | 221 21 | 214 22 | 242 23 | 248 24 | 238 25 | 244 26 | 197 27 | 241 28 | 231 29 | 204 30 | 192 31 | 53 32 | 222 33 | 215 34 | 194 35 | 22 36 | 234 37 | 211 38 | 8 39 | 43 40 | 49 41 | 92 42 | 2 43 | 41 44 | 35 45 | 115 46 | 3 47 | 210 48 | 13 49 | 228 50 | 44 51 | 10 52 | 17 53 | 62 54 | 217 55 | 243 56 | 63 57 | 71 58 | 230 59 | 47 60 | 105 61 | 36 62 | 122 63 | 11 64 | 124 65 | 37 66 | 14 67 | 208 68 | 4 69 | 70 70 | 51 71 | 56 72 | 27 73 | 196 74 | 233 75 | 59 76 | 107 77 | 202 78 | 121 79 | 209 80 | 31 81 | 96 82 | 12 83 | 212 84 | 162 85 | 118 86 | 84 87 | 50 88 | 254 89 | 88 90 | 30 91 | 46 92 | 113 93 | 127 94 | 151 95 | 98 96 | 207 97 | 76 98 | 45 99 | 33 100 | 77 101 | 39 102 | 111 103 | 106 104 | 104 105 | 218 106 | 52 107 | 66 108 | 174 109 | 200 110 | 158 111 | 123 112 | 184 113 | 95 114 | 143 115 | 146 116 | 166 117 | 9 118 | 128 119 | 164 120 | 135 121 | 163 122 | 155 123 | 136 124 | 181 125 | 100 126 | 141 127 | 126 128 | 6 129 | 239 130 | 112 131 | 149 132 | 229 133 | 69 134 | 156 135 | 102 136 | 16 137 | 61 138 | 29 139 | 64 140 | 94 141 | 97 142 | 117 143 | 189 144 | 153 145 | 180 146 | 21 147 | 82 148 | 206 149 | 93 150 | 114 151 | 91 152 | 1 153 | 55 154 | 74 155 | 120 156 | 7 157 | 133 158 | 119 159 | 182 160 | 159 161 | 185 162 | 26 163 | 80 164 | 175 165 | 131 166 | 176 167 | 79 168 | 15 169 | 227 170 | 172 171 | 83 172 | 171 173 | 68 174 | 28 175 | 201 176 | 145 177 | 48 178 | 134 179 | 132 180 | 150 181 | 89 182 | 186 183 | 116 184 | 144 185 | 65 186 | 190 187 | 130 188 | 19 189 | 60 190 | 129 191 | 0 192 | 72 193 | 383 194 | 360 195 | 372 196 | 363 197 | 334 198 | 348 199 | 364 200 | 368 201 | 378 202 | 338 203 | 320 204 | 323 205 | 354 206 | 380 207 | 349 208 | 350 209 | 373 210 | 352 211 | 288 212 | 327 213 | 367 214 | 329 215 | 369 216 | 337 217 | 366 218 | 346 219 | 341 220 | 315 221 | 382 222 | 261 223 | 270 224 | 258 225 | 263 226 | 262 227 | 336 228 | 359 229 | 379 230 | 345 231 | 264 232 | 322 233 | 300 234 | 305 235 | 268 236 | 278 237 | 361 238 | 330 239 | 340 240 | 328 241 | 362 242 | 326 243 | 298 244 | 295 245 | 324 246 | 273 247 | 265 248 | 280 249 | 285 250 | 430 251 | 435 252 | 339 253 | 266 254 | 283 255 | 343 256 | 440 257 | 344 258 | 385 259 | 365 260 | 335 261 | 347 262 | 375 263 | 301 264 | 381 265 | 417 266 | 438 267 | 355 268 | 308 269 | 307 270 | 331 271 | 342 272 | 412 273 | 291 274 | 351 275 | 309 276 | 419 277 | 371 278 | 299 279 | 433 280 | 279 281 | 325 282 | 321 283 | 387 284 | 399 285 | 314 286 | 400 287 | 281 288 | 290 289 | 401 290 | 294 291 | 408 292 | 332 293 | 429 294 | 272 295 | 286 296 | 434 297 | 356 298 | 376 299 | 357 300 | 425 301 | 384 302 | 422 303 | 388 304 | 374 305 | 311 306 | 393 307 | 391 308 | 274 309 | 450 310 | 398 311 | 271 312 | 397 313 | 477 314 | 418 315 | 462 316 | 441 317 | 431 318 | 395 319 | 411 320 | 436 321 | 293 322 | 319 323 | 260 324 | 492 325 | 413 326 | 427 327 | 486 328 | 469 329 | 318 330 | 403 331 | 445 332 | 415 333 | 442 334 | 312 335 | 444 336 | 256 337 | 481 338 | 292 339 | 392 340 | 302 341 | 458 342 | 451 343 | 426 344 | 377 345 | 313 346 | 443 347 | 421 348 | 297 349 | 465 350 | 502 351 | 277 352 | 439 353 | 289 354 | 405 355 | 496 356 | 490 357 | 476 358 | 478 359 | 454 360 | 407 361 | 506 362 | 259 363 | 483 364 | 500 365 | 501 366 | 370 367 | 493 368 | 482 369 | 475 370 | 472 371 | 267 372 | 386 373 | 449 374 | 464 375 | 284 376 | 410 377 | 416 378 | 333 379 | 505 380 | 511 381 | 423 382 | 306 383 | 484 384 | 508 385 | 557 386 | 572 387 | 549 388 | 514 389 | 525 390 | 524 391 | 538 392 | 523 393 | 541 394 | 547 395 | 515 396 | 565 397 | 561 398 | 551 399 | 518 400 | 556 401 | 544 402 | 554 403 | 513 404 | 526 405 | 567 406 | 527 407 | 533 408 | 682 409 | 644 410 | 543 411 | 560 412 | 519 413 | 516 414 | 539 415 | 555 416 | 677 417 | 659 418 | 512 419 | 683 420 | 675 421 | 537 422 | 552 423 | 545 424 | 571 425 | 666 426 | 532 427 | 550 428 | 672 429 | 564 430 | 667 431 | 566 432 | 684 433 | 674 434 | 534 435 | 668 436 | 522 437 | 563 438 | 681 439 | 703 440 | 548 441 | 665 442 | 669 443 | 670 444 | 531 445 | 663 446 | 641 447 | 568 448 | 520 449 | 658 450 | 687 451 | 694 452 | 536 453 | 695 454 | 569 455 | 691 456 | 686 457 | 680 458 | 692 459 | 701 460 | 678 461 | 662 462 | 647 463 | 642 464 | 698 465 | 661 466 | 715 467 | 693 468 | 574 469 | 702 470 | 710 471 | 517 472 | 655 473 | 713 474 | 540 475 | 738 476 | 712 477 | 726 478 | 656 479 | 581 480 | 529 481 | 562 482 | 654 483 | 737 484 | 739 485 | 657 486 | 628 487 | 636 488 | 756 489 | 708 490 | 664 491 | 714 492 | 762 493 | 600 494 | 748 495 | 696 496 | 742 497 | 651 498 | 707 499 | 746 500 | 676 501 | 723 502 | 637 503 | 648 504 | 705 505 | 764 506 | 744 507 | 752 508 | 743 509 | 727 510 | 609 511 | 757 512 | 724 513 | 622 514 | 553 515 | 626 516 | 673 517 | 606 518 | 688 519 | 721 520 | 603 521 | 699 522 | 619 523 | 643 524 | 717 525 | 719 526 | 749 527 | 722 528 | 729 529 | 535 530 | 700 531 | 731 532 | 638 533 | 646 534 | 728 535 | 730 536 | 635 537 | 653 538 | 709 539 | 617 540 | 690 541 | 711 542 | 640 543 | 621 544 | 706 545 | 616 546 | 725 547 | 577 548 | 649 549 | 542 550 | 747 551 | 613 552 | 740 553 | 602 554 | 671 555 | 745 556 | 598 557 | 588 558 | 584 559 | 754 560 | 632 561 | 704 562 | 679 563 | 736 564 | 685 565 | 585 566 | 625 567 | 608 568 | 645 569 | 620 570 | 758 571 | 587 572 | 765 573 | 578 574 | 579 575 | 604 576 | 576 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_7_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 118 2 | 109 3 | 117 4 | 99 5 | 65 6 | 100 7 | 119 8 | 80 9 | 121 10 | 107 11 | 92 12 | 114 13 | 90 14 | 87 15 | 247 16 | 124 17 | 123 18 | 210 19 | 69 20 | 84 21 | 68 22 | 221 23 | 206 24 | 93 25 | 102 26 | 106 27 | 108 28 | 253 29 | 95 30 | 105 31 | 125 32 | 250 33 | 126 34 | 239 35 | 81 36 | 72 37 | 255 38 | 235 39 | 115 40 | 122 41 | 111 42 | 209 43 | 197 44 | 127 45 | 196 46 | 66 47 | 233 48 | 26 49 | 198 50 | 101 51 | 10 52 | 77 53 | 211 54 | 195 55 | 252 56 | 22 57 | 231 58 | 30 59 | 110 60 | 112 61 | 201 62 | 224 63 | 244 64 | 76 65 | 82 66 | 246 67 | 5 68 | 113 69 | 234 70 | 230 71 | 236 72 | 63 73 | 104 74 | 91 75 | 79 76 | 229 77 | 243 78 | 219 79 | 54 80 | 44 81 | 8 82 | 232 83 | 53 84 | 48 85 | 97 86 | 240 87 | 71 88 | 52 89 | 248 90 | 208 91 | 50 92 | 212 93 | 37 94 | 18 95 | 57 96 | 1 97 | 242 98 | 203 99 | 25 100 | 89 101 | 204 102 | 38 103 | 86 104 | 199 105 | 227 106 | 29 107 | 241 108 | 98 109 | 200 110 | 74 111 | 178 112 | 251 113 | 13 114 | 94 115 | 136 116 | 61 117 | 3 118 | 174 119 | 34 120 | 161 121 | 220 122 | 17 123 | 254 124 | 27 125 | 9 126 | 228 127 | 205 128 | 45 129 | 42 130 | 51 131 | 19 132 | 214 133 | 11 134 | 213 135 | 183 136 | 32 137 | 16 138 | 245 139 | 41 140 | 21 141 | 249 142 | 14 143 | 40 144 | 218 145 | 35 146 | 120 147 | 131 148 | 36 149 | 193 150 | 132 151 | 151 152 | 237 153 | 64 154 | 55 155 | 4 156 | 116 157 | 172 158 | 46 159 | 33 160 | 138 161 | 75 162 | 60 163 | 238 164 | 191 165 | 222 166 | 180 167 | 160 168 | 145 169 | 153 170 | 78 171 | 20 172 | 135 173 | 188 174 | 141 175 | 96 176 | 166 177 | 176 178 | 56 179 | 223 180 | 49 181 | 58 182 | 28 183 | 185 184 | 162 185 | 168 186 | 202 187 | 134 188 | 43 189 | 70 190 | 164 191 | 149 192 | 175 193 | 462 194 | 298 195 | 375 196 | 284 197 | 319 198 | 274 199 | 260 200 | 289 201 | 317 202 | 379 203 | 312 204 | 491 205 | 506 206 | 335 207 | 270 208 | 492 209 | 494 210 | 277 211 | 269 212 | 332 213 | 484 214 | 498 215 | 451 216 | 468 217 | 337 218 | 377 219 | 311 220 | 300 221 | 360 222 | 373 223 | 286 224 | 331 225 | 343 226 | 307 227 | 279 228 | 348 229 | 490 230 | 301 231 | 380 232 | 294 233 | 450 234 | 306 235 | 504 236 | 452 237 | 374 238 | 475 239 | 361 240 | 310 241 | 309 242 | 472 243 | 469 244 | 264 245 | 496 246 | 500 247 | 471 248 | 321 249 | 417 250 | 341 251 | 488 252 | 271 253 | 453 254 | 481 255 | 459 256 | 342 257 | 333 258 | 308 259 | 493 260 | 486 261 | 302 262 | 368 263 | 275 264 | 265 265 | 369 266 | 339 267 | 454 268 | 262 269 | 295 270 | 256 271 | 370 272 | 258 273 | 505 274 | 257 275 | 318 276 | 483 277 | 299 278 | 456 279 | 482 280 | 509 281 | 273 282 | 314 283 | 293 284 | 455 285 | 282 286 | 429 287 | 355 288 | 463 289 | 392 290 | 382 291 | 508 292 | 460 293 | 288 294 | 324 295 | 385 296 | 480 297 | 323 298 | 272 299 | 507 300 | 503 301 | 367 302 | 290 303 | 502 304 | 511 305 | 263 306 | 478 307 | 362 308 | 485 309 | 329 310 | 315 311 | 405 312 | 353 313 | 322 314 | 358 315 | 351 316 | 325 317 | 281 318 | 404 319 | 336 320 | 461 321 | 276 322 | 422 323 | 365 324 | 489 325 | 470 326 | 267 327 | 259 328 | 438 329 | 439 330 | 418 331 | 330 332 | 296 333 | 305 334 | 354 335 | 340 336 | 283 337 | 326 338 | 473 339 | 476 340 | 285 341 | 268 342 | 437 343 | 328 344 | 395 345 | 359 346 | 266 347 | 433 348 | 442 349 | 345 350 | 352 351 | 427 352 | 378 353 | 425 354 | 327 355 | 464 356 | 399 357 | 501 358 | 474 359 | 499 360 | 432 361 | 316 362 | 413 363 | 431 364 | 495 365 | 465 366 | 477 367 | 387 368 | 389 369 | 376 370 | 430 371 | 346 372 | 434 373 | 303 374 | 381 375 | 366 376 | 320 377 | 479 378 | 421 379 | 291 380 | 388 381 | 396 382 | 424 383 | 363 384 | 349 385 | 727 386 | 741 387 | 735 388 | 750 389 | 748 390 | 677 391 | 752 392 | 723 393 | 737 394 | 764 395 | 718 396 | 622 397 | 766 398 | 705 399 | 767 400 | 763 401 | 721 402 | 745 403 | 715 404 | 714 405 | 739 406 | 699 407 | 749 408 | 532 409 | 689 410 | 534 411 | 680 412 | 575 413 | 757 414 | 700 415 | 522 416 | 730 417 | 758 418 | 738 419 | 562 420 | 734 421 | 667 422 | 682 423 | 728 424 | 760 425 | 724 426 | 678 427 | 544 428 | 732 429 | 751 430 | 577 431 | 719 432 | 708 433 | 516 434 | 570 435 | 550 436 | 692 437 | 603 438 | 670 439 | 545 440 | 679 441 | 653 442 | 528 443 | 686 444 | 647 445 | 543 446 | 601 447 | 740 448 | 753 449 | 747 450 | 736 451 | 591 452 | 704 453 | 530 454 | 637 455 | 609 456 | 573 457 | 756 458 | 596 459 | 712 460 | 672 461 | 552 462 | 619 463 | 635 464 | 527 465 | 709 466 | 729 467 | 515 468 | 683 469 | 731 470 | 676 471 | 664 472 | 560 473 | 520 474 | 586 475 | 555 476 | 614 477 | 526 478 | 588 479 | 566 480 | 513 481 | 659 482 | 722 483 | 652 484 | 578 485 | 554 486 | 651 487 | 641 488 | 541 489 | 605 490 | 519 491 | 762 492 | 742 493 | 649 494 | 563 495 | 539 496 | 598 497 | 546 498 | 628 499 | 663 500 | 592 501 | 654 502 | 549 503 | 568 504 | 517 505 | 593 506 | 600 507 | 607 508 | 512 509 | 582 510 | 529 511 | 537 512 | 754 513 | 648 514 | 616 515 | 755 516 | 625 517 | 531 518 | 565 519 | 634 520 | 561 521 | 660 522 | 569 523 | 523 524 | 604 525 | 658 526 | 583 527 | 646 528 | 713 529 | 703 530 | 606 531 | 581 532 | 518 533 | 759 534 | 574 535 | 594 536 | 696 537 | 657 538 | 548 539 | 720 540 | 681 541 | 551 542 | 524 543 | 613 544 | 698 545 | 761 546 | 629 547 | 587 548 | 669 549 | 621 550 | 638 551 | 564 552 | 540 553 | 557 554 | 521 555 | 538 556 | 684 557 | 533 558 | 675 559 | 631 560 | 567 561 | 674 562 | 618 563 | 514 564 | 691 565 | 556 566 | 585 567 | 707 568 | 559 569 | 611 570 | 571 571 | 542 572 | 642 573 | 576 574 | 615 575 | 608 576 | 746 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_4_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 216 2 | 249 3 | 245 4 | 199 5 | 237 6 | 235 7 | 232 8 | 203 9 | 223 10 | 198 11 | 213 12 | 224 13 | 226 14 | 247 15 | 219 16 | 252 17 | 251 18 | 240 19 | 250 20 | 221 21 | 242 22 | 248 23 | 238 24 | 244 25 | 197 26 | 214 27 | 231 28 | 53 29 | 241 30 | 192 31 | 204 32 | 222 33 | 234 34 | 215 35 | 194 36 | 49 37 | 211 38 | 43 39 | 115 40 | 92 41 | 22 42 | 8 43 | 2 44 | 62 45 | 35 46 | 41 47 | 17 48 | 210 49 | 228 50 | 13 51 | 3 52 | 44 53 | 243 54 | 217 55 | 63 56 | 124 57 | 233 58 | 230 59 | 37 60 | 4 61 | 47 62 | 105 63 | 36 64 | 10 65 | 122 66 | 71 67 | 11 68 | 208 69 | 51 70 | 196 71 | 70 72 | 212 73 | 202 74 | 107 75 | 121 76 | 209 77 | 27 78 | 14 79 | 50 80 | 46 81 | 59 82 | 56 83 | 84 84 | 96 85 | 31 86 | 254 87 | 162 88 | 113 89 | 77 90 | 12 91 | 30 92 | 174 93 | 45 94 | 88 95 | 218 96 | 118 97 | 52 98 | 151 99 | 126 100 | 127 101 | 39 102 | 66 103 | 33 104 | 111 105 | 158 106 | 76 107 | 98 108 | 200 109 | 207 110 | 163 111 | 104 112 | 166 113 | 106 114 | 135 115 | 102 116 | 123 117 | 146 118 | 164 119 | 239 120 | 143 121 | 149 122 | 141 123 | 112 124 | 100 125 | 95 126 | 156 127 | 9 128 | 189 129 | 136 130 | 128 131 | 29 132 | 61 133 | 6 134 | 117 135 | 94 136 | 155 137 | 229 138 | 184 139 | 133 140 | 93 141 | 1 142 | 16 143 | 69 144 | 91 145 | 64 146 | 114 147 | 55 148 | 180 149 | 181 150 | 120 151 | 206 152 | 79 153 | 82 154 | 153 155 | 182 156 | 171 157 | 134 158 | 131 159 | 201 160 | 74 161 | 176 162 | 119 163 | 26 164 | 145 165 | 172 166 | 97 167 | 7 168 | 83 169 | 185 170 | 175 171 | 150 172 | 15 173 | 186 174 | 21 175 | 65 176 | 159 177 | 80 178 | 72 179 | 68 180 | 116 181 | 89 182 | 110 183 | 129 184 | 28 185 | 19 186 | 132 187 | 177 188 | 160 189 | 148 190 | 130 191 | 48 192 | 154 193 | 383 194 | 360 195 | 372 196 | 368 197 | 378 198 | 363 199 | 338 200 | 364 201 | 348 202 | 380 203 | 323 204 | 334 205 | 349 206 | 350 207 | 369 208 | 373 209 | 354 210 | 367 211 | 320 212 | 288 213 | 327 214 | 352 215 | 329 216 | 341 217 | 315 218 | 346 219 | 337 220 | 261 221 | 262 222 | 366 223 | 359 224 | 263 225 | 345 226 | 270 227 | 258 228 | 300 229 | 379 230 | 326 231 | 268 232 | 278 233 | 264 234 | 340 235 | 382 236 | 322 237 | 336 238 | 305 239 | 328 240 | 330 241 | 298 242 | 344 243 | 266 244 | 361 245 | 362 246 | 435 247 | 285 248 | 265 249 | 280 250 | 273 251 | 339 252 | 324 253 | 365 254 | 331 255 | 417 256 | 375 257 | 430 258 | 295 259 | 385 260 | 335 261 | 301 262 | 307 263 | 355 264 | 283 265 | 440 266 | 291 267 | 343 268 | 347 269 | 279 270 | 308 271 | 332 272 | 438 273 | 325 274 | 376 275 | 371 276 | 299 277 | 342 278 | 412 279 | 419 280 | 381 281 | 314 282 | 399 283 | 321 284 | 356 285 | 281 286 | 401 287 | 309 288 | 351 289 | 433 290 | 286 291 | 272 292 | 418 293 | 408 294 | 294 295 | 290 296 | 311 297 | 429 298 | 387 299 | 357 300 | 434 301 | 374 302 | 274 303 | 388 304 | 422 305 | 393 306 | 271 307 | 400 308 | 293 309 | 391 310 | 450 311 | 384 312 | 469 313 | 411 314 | 441 315 | 425 316 | 431 317 | 477 318 | 403 319 | 413 320 | 397 321 | 462 322 | 427 323 | 436 324 | 442 325 | 318 326 | 443 327 | 256 328 | 395 329 | 292 330 | 492 331 | 260 332 | 319 333 | 370 334 | 486 335 | 445 336 | 444 337 | 312 338 | 415 339 | 377 340 | 277 341 | 398 342 | 465 343 | 458 344 | 313 345 | 289 346 | 302 347 | 426 348 | 259 349 | 483 350 | 297 351 | 476 352 | 407 353 | 451 354 | 421 355 | 392 356 | 439 357 | 500 358 | 506 359 | 502 360 | 490 361 | 501 362 | 496 363 | 478 364 | 405 365 | 284 366 | 475 367 | 493 368 | 454 369 | 481 370 | 482 371 | 484 372 | 472 373 | 449 374 | 423 375 | 424 376 | 267 377 | 386 378 | 410 379 | 306 380 | 464 381 | 505 382 | 257 383 | 333 384 | 511 385 | 557 386 | 549 387 | 572 388 | 524 389 | 538 390 | 541 391 | 525 392 | 515 393 | 561 394 | 514 395 | 523 396 | 513 397 | 556 398 | 565 399 | 547 400 | 518 401 | 527 402 | 682 403 | 526 404 | 544 405 | 551 406 | 677 407 | 533 408 | 560 409 | 516 410 | 539 411 | 567 412 | 644 413 | 543 414 | 555 415 | 571 416 | 519 417 | 512 418 | 659 419 | 554 420 | 545 421 | 537 422 | 683 423 | 675 424 | 684 425 | 522 426 | 563 427 | 667 428 | 532 429 | 666 430 | 674 431 | 672 432 | 641 433 | 669 434 | 665 435 | 564 436 | 531 437 | 550 438 | 668 439 | 703 440 | 566 441 | 658 442 | 569 443 | 663 444 | 568 445 | 681 446 | 534 447 | 536 448 | 520 449 | 552 450 | 670 451 | 692 452 | 698 453 | 701 454 | 686 455 | 693 456 | 715 457 | 695 458 | 694 459 | 687 460 | 691 461 | 680 462 | 662 463 | 713 464 | 655 465 | 642 466 | 647 467 | 702 468 | 678 469 | 548 470 | 690 471 | 661 472 | 574 473 | 540 474 | 517 475 | 656 476 | 710 477 | 726 478 | 738 479 | 581 480 | 636 481 | 762 482 | 664 483 | 600 484 | 657 485 | 737 486 | 748 487 | 562 488 | 756 489 | 712 490 | 529 491 | 764 492 | 739 493 | 648 494 | 622 495 | 651 496 | 653 497 | 743 498 | 696 499 | 714 500 | 707 501 | 626 502 | 730 503 | 606 504 | 676 505 | 628 506 | 723 507 | 757 508 | 746 509 | 729 510 | 603 511 | 609 512 | 708 513 | 727 514 | 643 515 | 654 516 | 673 517 | 742 518 | 719 519 | 722 520 | 640 521 | 699 522 | 752 523 | 688 524 | 535 525 | 717 526 | 700 527 | 637 528 | 617 529 | 749 530 | 619 531 | 709 532 | 711 533 | 616 534 | 721 535 | 744 536 | 621 537 | 706 538 | 649 539 | 731 540 | 577 541 | 705 542 | 635 543 | 728 544 | 646 545 | 602 546 | 740 547 | 747 548 | 553 549 | 542 550 | 598 551 | 685 552 | 745 553 | 671 554 | 638 555 | 578 556 | 608 557 | 766 558 | 725 559 | 758 560 | 585 561 | 632 562 | 645 563 | 625 564 | 754 565 | 765 566 | 724 567 | 620 568 | 573 569 | 607 570 | 604 571 | 633 572 | 679 573 | 761 574 | 584 575 | 587 576 | 558 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_8_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 238 2 | 108 3 | 201 4 | 233 5 | 101 6 | 115 7 | 247 8 | 215 9 | 236 10 | 204 11 | 194 12 | 235 13 | 197 14 | 37 15 | 77 16 | 225 17 | 118 18 | 63 19 | 113 20 | 203 21 | 106 22 | 91 23 | 59 24 | 3 25 | 122 26 | 141 27 | 74 28 | 110 29 | 124 30 | 79 31 | 128 32 | 207 33 | 85 34 | 44 35 | 12 36 | 162 37 | 246 38 | 75 39 | 89 40 | 158 41 | 229 42 | 132 43 | 96 44 | 47 45 | 248 46 | 100 47 | 69 48 | 223 49 | 153 50 | 116 51 | 244 52 | 39 53 | 180 54 | 117 55 | 249 56 | 241 57 | 86 58 | 123 59 | 166 60 | 192 61 | 210 62 | 183 63 | 105 64 | 231 65 | 27 66 | 163 67 | 178 68 | 198 69 | 209 70 | 143 71 | 188 72 | 230 73 | 140 74 | 157 75 | 228 76 | 254 77 | 38 78 | 216 79 | 67 80 | 119 81 | 52 82 | 145 83 | 56 84 | 206 85 | 125 86 | 213 87 | 191 88 | 138 89 | 255 90 | 152 91 | 155 92 | 237 93 | 2 94 | 170 95 | 190 96 | 214 97 | 48 98 | 32 99 | 41 100 | 161 101 | 54 102 | 93 103 | 176 104 | 49 105 | 173 106 | 175 107 | 15 108 | 4 109 | 217 110 | 51 111 | 20 112 | 90 113 | 53 114 | 185 115 | 66 116 | 129 117 | 205 118 | 109 119 | 245 120 | 1 121 | 240 122 | 25 123 | 251 124 | 55 125 | 150 126 | 61 127 | 179 128 | 0 129 | 219 130 | 97 131 | 149 132 | 21 133 | 81 134 | 135 135 | 159 136 | 195 137 | 130 138 | 226 139 | 243 140 | 73 141 | 92 142 | 13 143 | 8 144 | 211 145 | 26 146 | 184 147 | 111 148 | 7 149 | 220 150 | 42 151 | 107 152 | 80 153 | 181 154 | 24 155 | 43 156 | 31 157 | 112 158 | 174 159 | 222 160 | 212 161 | 40 162 | 22 163 | 82 164 | 64 165 | 76 166 | 98 167 | 146 168 | 10 169 | 57 170 | 35 171 | 9 172 | 187 173 | 14 174 | 208 175 | 177 176 | 164 177 | 147 178 | 165 179 | 16 180 | 23 181 | 103 182 | 242 183 | 136 184 | 6 185 | 102 186 | 171 187 | 50 188 | 160 189 | 200 190 | 182 191 | 127 192 | 144 193 | 427 194 | 442 195 | 477 196 | 394 197 | 468 198 | 392 199 | 423 200 | 438 201 | 511 202 | 418 203 | 441 204 | 431 205 | 411 206 | 426 207 | 401 208 | 485 209 | 368 210 | 399 211 | 407 212 | 460 213 | 400 214 | 469 215 | 480 216 | 402 217 | 419 218 | 388 219 | 462 220 | 398 221 | 475 222 | 412 223 | 478 224 | 390 225 | 406 226 | 429 227 | 495 228 | 434 229 | 440 230 | 446 231 | 417 232 | 436 233 | 509 234 | 488 235 | 396 236 | 338 237 | 501 238 | 494 239 | 346 240 | 404 241 | 479 242 | 507 243 | 397 244 | 360 245 | 493 246 | 432 247 | 463 248 | 454 249 | 415 250 | 437 251 | 387 252 | 405 253 | 384 254 | 408 255 | 443 256 | 344 257 | 486 258 | 456 259 | 307 260 | 452 261 | 380 262 | 352 263 | 351 264 | 326 265 | 282 266 | 305 267 | 288 268 | 265 269 | 336 270 | 391 271 | 414 272 | 490 273 | 357 274 | 435 275 | 484 276 | 455 277 | 315 278 | 465 279 | 393 280 | 361 281 | 461 282 | 295 283 | 410 284 | 297 285 | 316 286 | 311 287 | 272 288 | 403 289 | 421 290 | 466 291 | 409 292 | 439 293 | 510 294 | 367 295 | 328 296 | 482 297 | 422 298 | 451 299 | 318 300 | 310 301 | 472 302 | 312 303 | 496 304 | 474 305 | 425 306 | 428 307 | 383 308 | 416 309 | 302 310 | 373 311 | 464 312 | 483 313 | 362 314 | 505 315 | 458 316 | 317 317 | 289 318 | 445 319 | 378 320 | 332 321 | 385 322 | 331 323 | 370 324 | 340 325 | 306 326 | 377 327 | 320 328 | 457 329 | 322 330 | 334 331 | 365 332 | 314 333 | 264 334 | 430 335 | 447 336 | 498 337 | 459 338 | 476 339 | 293 340 | 267 341 | 363 342 | 345 343 | 279 344 | 341 345 | 274 346 | 371 347 | 503 348 | 491 349 | 330 350 | 369 351 | 349 352 | 492 353 | 285 354 | 448 355 | 366 356 | 308 357 | 420 358 | 508 359 | 467 360 | 329 361 | 395 362 | 450 363 | 276 364 | 502 365 | 286 366 | 356 367 | 375 368 | 300 369 | 303 370 | 347 371 | 324 372 | 261 373 | 343 374 | 273 375 | 487 376 | 342 377 | 283 378 | 323 379 | 275 380 | 381 381 | 348 382 | 321 383 | 337 384 | 262 385 | 584 386 | 576 387 | 597 388 | 621 389 | 623 390 | 594 391 | 619 392 | 630 393 | 629 394 | 590 395 | 607 396 | 613 397 | 722 398 | 625 399 | 600 400 | 582 401 | 608 402 | 592 403 | 639 404 | 651 405 | 642 406 | 610 407 | 612 408 | 617 409 | 641 410 | 593 411 | 634 412 | 622 413 | 611 414 | 646 415 | 699 416 | 591 417 | 725 418 | 654 419 | 736 420 | 743 421 | 583 422 | 690 423 | 720 424 | 746 425 | 606 426 | 638 427 | 681 428 | 717 429 | 688 430 | 598 431 | 682 432 | 645 433 | 627 434 | 708 435 | 633 436 | 602 437 | 618 438 | 588 439 | 696 440 | 709 441 | 707 442 | 579 443 | 758 444 | 745 445 | 631 446 | 578 447 | 616 448 | 694 449 | 719 450 | 650 451 | 649 452 | 757 453 | 632 454 | 666 455 | 727 456 | 595 457 | 756 458 | 643 459 | 647 460 | 628 461 | 609 462 | 604 463 | 716 464 | 652 465 | 759 466 | 664 467 | 762 468 | 715 469 | 763 470 | 615 471 | 713 472 | 734 473 | 519 474 | 669 475 | 705 476 | 677 477 | 728 478 | 739 479 | 674 480 | 680 481 | 714 482 | 573 483 | 640 484 | 577 485 | 753 486 | 766 487 | 581 488 | 711 489 | 700 490 | 730 491 | 656 492 | 742 493 | 561 494 | 685 495 | 752 496 | 723 497 | 655 498 | 636 499 | 673 500 | 701 501 | 580 502 | 740 503 | 662 504 | 587 505 | 599 506 | 744 507 | 686 508 | 614 509 | 659 510 | 684 511 | 704 512 | 691 513 | 737 514 | 710 515 | 702 516 | 726 517 | 697 518 | 566 519 | 574 520 | 635 521 | 538 522 | 527 523 | 663 524 | 729 525 | 718 526 | 556 527 | 750 528 | 751 529 | 676 530 | 732 531 | 692 532 | 689 533 | 644 534 | 683 535 | 531 536 | 653 537 | 585 538 | 721 539 | 731 540 | 557 541 | 671 542 | 555 543 | 620 544 | 679 545 | 567 546 | 657 547 | 520 548 | 712 549 | 760 550 | 755 551 | 754 552 | 529 553 | 542 554 | 678 555 | 724 556 | 514 557 | 667 558 | 525 559 | 761 560 | 586 561 | 550 562 | 558 563 | 523 564 | 524 565 | 703 566 | 526 567 | 589 568 | 521 569 | 670 570 | 518 571 | 648 572 | 693 573 | 570 574 | 672 575 | 551 576 | 559 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_2_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 31 2 | 27 3 | 51 4 | 10 5 | 12 6 | 38 7 | 32 8 | 2 9 | 52 10 | 54 11 | 62 12 | 208 13 | 24 14 | 20 15 | 9 16 | 21 17 | 47 18 | 0 19 | 218 20 | 11 21 | 236 22 | 53 23 | 196 24 | 40 25 | 227 26 | 26 27 | 177 28 | 255 29 | 16 30 | 186 31 | 173 32 | 59 33 | 44 34 | 135 35 | 34 36 | 162 37 | 237 38 | 15 39 | 134 40 | 6 41 | 28 42 | 60 43 | 164 44 | 212 45 | 250 46 | 131 47 | 211 48 | 148 49 | 4 50 | 234 51 | 241 52 | 205 53 | 35 54 | 129 55 | 152 56 | 199 57 | 151 58 | 128 59 | 158 60 | 239 61 | 169 62 | 183 63 | 136 64 | 203 65 | 209 66 | 23 67 | 229 68 | 202 69 | 18 70 | 248 71 | 190 72 | 253 73 | 167 74 | 247 75 | 56 76 | 244 77 | 1 78 | 147 79 | 214 80 | 184 81 | 133 82 | 225 83 | 233 84 | 8 85 | 230 86 | 166 87 | 216 88 | 22 89 | 174 90 | 254 91 | 217 92 | 242 93 | 153 94 | 157 95 | 49 96 | 69 97 | 188 98 | 149 99 | 141 100 | 57 101 | 182 102 | 143 103 | 132 104 | 213 105 | 180 106 | 246 107 | 172 108 | 231 109 | 139 110 | 138 111 | 215 112 | 156 113 | 179 114 | 226 115 | 171 116 | 221 117 | 191 118 | 91 119 | 197 120 | 73 121 | 98 122 | 223 123 | 235 124 | 37 125 | 185 126 | 245 127 | 39 128 | 222 129 | 58 130 | 65 131 | 29 132 | 13 133 | 192 134 | 249 135 | 228 136 | 130 137 | 145 138 | 122 139 | 189 140 | 150 141 | 165 142 | 206 143 | 168 144 | 104 145 | 137 146 | 232 147 | 243 148 | 176 149 | 198 150 | 85 151 | 78 152 | 100 153 | 142 154 | 204 155 | 86 156 | 106 157 | 80 158 | 107 159 | 71 160 | 97 161 | 121 162 | 77 163 | 17 164 | 240 165 | 94 166 | 102 167 | 210 168 | 70 169 | 161 170 | 238 171 | 89 172 | 146 173 | 84 174 | 99 175 | 83 176 | 75 177 | 195 178 | 111 179 | 67 180 | 64 181 | 159 182 | 126 183 | 19 184 | 175 185 | 90 186 | 117 187 | 110 188 | 103 189 | 123 190 | 118 191 | 42 192 | 45 193 | 374 194 | 356 195 | 317 196 | 340 197 | 331 198 | 358 199 | 309 200 | 382 201 | 292 202 | 320 203 | 355 204 | 303 205 | 319 206 | 335 207 | 401 208 | 306 209 | 271 210 | 282 211 | 295 212 | 285 213 | 286 214 | 343 215 | 351 216 | 354 217 | 283 218 | 360 219 | 300 220 | 304 221 | 281 222 | 289 223 | 364 224 | 269 225 | 308 226 | 256 227 | 307 228 | 275 229 | 352 230 | 278 231 | 383 232 | 261 233 | 399 234 | 350 235 | 380 236 | 430 237 | 438 238 | 284 239 | 412 240 | 312 241 | 367 242 | 425 243 | 325 244 | 305 245 | 270 246 | 403 247 | 406 248 | 258 249 | 279 250 | 262 251 | 441 252 | 368 253 | 294 254 | 318 255 | 329 256 | 359 257 | 280 258 | 272 259 | 273 260 | 337 261 | 276 262 | 277 263 | 365 264 | 296 265 | 313 266 | 274 267 | 299 268 | 267 269 | 347 270 | 327 271 | 345 272 | 302 273 | 405 274 | 439 275 | 410 276 | 263 277 | 428 278 | 445 279 | 265 280 | 326 281 | 291 282 | 431 283 | 414 284 | 290 285 | 310 286 | 402 287 | 268 288 | 429 289 | 336 290 | 435 291 | 259 292 | 397 293 | 338 294 | 315 295 | 421 296 | 301 297 | 378 298 | 288 299 | 444 300 | 424 301 | 389 302 | 266 303 | 416 304 | 413 305 | 379 306 | 417 307 | 443 308 | 324 309 | 442 310 | 341 311 | 509 312 | 499 313 | 386 314 | 433 315 | 427 316 | 376 317 | 311 318 | 322 319 | 260 320 | 344 321 | 330 322 | 361 323 | 372 324 | 264 325 | 333 326 | 346 327 | 511 328 | 298 329 | 437 330 | 407 331 | 297 332 | 409 333 | 474 334 | 370 335 | 469 336 | 420 337 | 332 338 | 314 339 | 475 340 | 371 341 | 373 342 | 316 343 | 491 344 | 339 345 | 489 346 | 457 347 | 505 348 | 257 349 | 493 350 | 348 351 | 393 352 | 411 353 | 479 354 | 362 355 | 342 356 | 287 357 | 492 358 | 349 359 | 483 360 | 510 361 | 450 362 | 470 363 | 381 364 | 440 365 | 390 366 | 465 367 | 482 368 | 418 369 | 498 370 | 464 371 | 387 372 | 487 373 | 363 374 | 467 375 | 461 376 | 504 377 | 486 378 | 494 379 | 471 380 | 480 381 | 388 382 | 448 383 | 449 384 | 453 385 | 518 386 | 544 387 | 563 388 | 529 389 | 532 390 | 512 391 | 568 392 | 526 393 | 517 394 | 553 395 | 562 396 | 559 397 | 566 398 | 561 399 | 666 400 | 523 401 | 595 402 | 612 403 | 545 404 | 519 405 | 708 406 | 596 407 | 537 408 | 548 409 | 546 410 | 687 411 | 684 412 | 646 413 | 551 414 | 756 415 | 575 416 | 635 417 | 632 418 | 641 419 | 557 420 | 514 421 | 576 422 | 602 423 | 674 424 | 739 425 | 528 426 | 707 427 | 590 428 | 613 429 | 592 430 | 650 431 | 724 432 | 754 433 | 660 434 | 536 435 | 713 436 | 589 437 | 716 438 | 629 439 | 709 440 | 516 441 | 680 442 | 651 443 | 675 444 | 667 445 | 737 446 | 738 447 | 722 448 | 642 449 | 572 450 | 535 451 | 700 452 | 734 453 | 659 454 | 721 455 | 695 456 | 638 457 | 608 458 | 645 459 | 731 460 | 644 461 | 591 462 | 631 463 | 725 464 | 748 465 | 677 466 | 718 467 | 547 468 | 682 469 | 765 470 | 649 471 | 611 472 | 606 473 | 763 474 | 761 475 | 584 476 | 534 477 | 702 478 | 694 479 | 759 480 | 656 481 | 717 482 | 733 483 | 746 484 | 661 485 | 762 486 | 749 487 | 743 488 | 671 489 | 764 490 | 652 491 | 747 492 | 539 493 | 581 494 | 704 495 | 580 496 | 689 497 | 678 498 | 643 499 | 567 500 | 736 501 | 751 502 | 758 503 | 726 504 | 614 505 | 599 506 | 670 507 | 634 508 | 654 509 | 628 510 | 703 511 | 618 512 | 705 513 | 735 514 | 681 515 | 640 516 | 755 517 | 742 518 | 701 519 | 730 520 | 744 521 | 753 522 | 648 523 | 543 524 | 685 525 | 752 526 | 524 527 | 610 528 | 729 529 | 564 530 | 714 531 | 668 532 | 673 533 | 672 534 | 723 535 | 533 536 | 558 537 | 525 538 | 609 539 | 706 540 | 710 541 | 692 542 | 527 543 | 745 544 | 593 545 | 697 546 | 693 547 | 679 548 | 655 549 | 715 550 | 669 551 | 699 552 | 621 553 | 585 554 | 604 555 | 657 556 | 637 557 | 560 558 | 711 559 | 683 560 | 658 561 | 639 562 | 574 563 | 540 564 | 571 565 | 617 566 | 741 567 | 767 568 | 554 569 | 696 570 | 750 571 | 521 572 | 586 573 | 620 574 | 538 575 | 633 576 | 636 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_2_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 31 2 | 27 3 | 38 4 | 51 5 | 12 6 | 54 7 | 52 8 | 10 9 | 32 10 | 62 11 | 208 12 | 47 13 | 24 14 | 9 15 | 20 16 | 2 17 | 236 18 | 0 19 | 21 20 | 53 21 | 40 22 | 26 23 | 196 24 | 177 25 | 255 26 | 11 27 | 218 28 | 173 29 | 227 30 | 237 31 | 202 32 | 135 33 | 44 34 | 16 35 | 34 36 | 22 37 | 60 38 | 162 39 | 15 40 | 186 41 | 6 42 | 28 43 | 212 44 | 234 45 | 151 46 | 134 47 | 35 48 | 164 49 | 129 50 | 148 51 | 59 52 | 250 53 | 229 54 | 205 55 | 23 56 | 158 57 | 152 58 | 128 59 | 211 60 | 183 61 | 18 62 | 131 63 | 4 64 | 199 65 | 209 66 | 169 67 | 241 68 | 217 69 | 149 70 | 184 71 | 190 72 | 203 73 | 248 74 | 136 75 | 153 76 | 147 77 | 225 78 | 167 79 | 233 80 | 230 81 | 133 82 | 49 83 | 239 84 | 56 85 | 247 86 | 188 87 | 1 88 | 214 89 | 253 90 | 244 91 | 143 92 | 242 93 | 8 94 | 174 95 | 216 96 | 157 97 | 182 98 | 156 99 | 172 100 | 215 101 | 37 102 | 171 103 | 197 104 | 245 105 | 91 106 | 180 107 | 254 108 | 206 109 | 132 110 | 231 111 | 166 112 | 69 113 | 226 114 | 138 115 | 139 116 | 141 117 | 185 118 | 57 119 | 191 120 | 179 121 | 221 122 | 13 123 | 228 124 | 235 125 | 73 126 | 246 127 | 98 128 | 189 129 | 222 130 | 223 131 | 213 132 | 165 133 | 29 134 | 39 135 | 130 136 | 168 137 | 58 138 | 192 139 | 65 140 | 145 141 | 137 142 | 86 143 | 249 144 | 243 145 | 176 146 | 150 147 | 80 148 | 198 149 | 232 150 | 77 151 | 17 152 | 142 153 | 195 154 | 78 155 | 204 156 | 240 157 | 122 158 | 89 159 | 85 160 | 64 161 | 121 162 | 146 163 | 123 164 | 84 165 | 104 166 | 238 167 | 94 168 | 161 169 | 159 170 | 71 171 | 210 172 | 107 173 | 70 174 | 90 175 | 126 176 | 111 177 | 100 178 | 175 179 | 67 180 | 75 181 | 83 182 | 102 183 | 66 184 | 97 185 | 43 186 | 108 187 | 140 188 | 106 189 | 120 190 | 99 191 | 103 192 | 117 193 | 356 194 | 358 195 | 331 196 | 355 197 | 292 198 | 374 199 | 351 200 | 317 201 | 309 202 | 320 203 | 306 204 | 286 205 | 401 206 | 343 207 | 303 208 | 354 209 | 271 210 | 382 211 | 295 212 | 368 213 | 383 214 | 289 215 | 319 216 | 285 217 | 438 218 | 352 219 | 304 220 | 282 221 | 307 222 | 399 223 | 278 224 | 403 225 | 340 226 | 283 227 | 350 228 | 261 229 | 275 230 | 412 231 | 308 232 | 300 233 | 256 234 | 305 235 | 269 236 | 274 237 | 281 238 | 312 239 | 279 240 | 430 241 | 364 242 | 270 243 | 296 244 | 380 245 | 410 246 | 262 247 | 425 248 | 294 249 | 273 250 | 359 251 | 376 252 | 365 253 | 302 254 | 406 255 | 347 256 | 405 257 | 335 258 | 441 259 | 284 260 | 280 261 | 290 262 | 367 263 | 329 264 | 378 265 | 258 266 | 310 267 | 318 268 | 313 269 | 325 270 | 268 271 | 299 272 | 277 273 | 445 274 | 428 275 | 336 276 | 276 277 | 272 278 | 267 279 | 301 280 | 327 281 | 414 282 | 416 283 | 337 284 | 417 285 | 345 286 | 341 287 | 291 288 | 431 289 | 439 290 | 435 291 | 360 292 | 263 293 | 315 294 | 413 295 | 265 296 | 402 297 | 407 298 | 266 299 | 421 300 | 397 301 | 379 302 | 429 303 | 259 304 | 370 305 | 444 306 | 424 307 | 371 308 | 330 309 | 509 310 | 442 311 | 326 312 | 288 313 | 389 314 | 443 315 | 297 316 | 324 317 | 437 318 | 361 319 | 314 320 | 264 321 | 338 322 | 499 323 | 311 324 | 322 325 | 260 326 | 433 327 | 475 328 | 373 329 | 469 330 | 409 331 | 386 332 | 491 333 | 346 334 | 511 335 | 372 336 | 333 337 | 427 338 | 316 339 | 474 340 | 420 341 | 393 342 | 298 343 | 344 344 | 510 345 | 257 346 | 493 347 | 450 348 | 464 349 | 479 350 | 411 351 | 348 352 | 287 353 | 492 354 | 440 355 | 505 356 | 381 357 | 465 358 | 470 359 | 457 360 | 473 361 | 489 362 | 390 363 | 507 364 | 504 365 | 483 366 | 502 367 | 482 368 | 480 369 | 467 370 | 332 371 | 375 372 | 418 373 | 387 374 | 486 375 | 447 376 | 461 377 | 362 378 | 498 379 | 494 380 | 321 381 | 471 382 | 400 383 | 495 384 | 490 385 | 518 386 | 544 387 | 563 388 | 529 389 | 512 390 | 532 391 | 568 392 | 517 393 | 526 394 | 553 395 | 562 396 | 566 397 | 559 398 | 546 399 | 595 400 | 523 401 | 519 402 | 551 403 | 548 404 | 684 405 | 537 406 | 561 407 | 635 408 | 596 409 | 545 410 | 666 411 | 612 412 | 687 413 | 708 414 | 632 415 | 756 416 | 707 417 | 590 418 | 641 419 | 713 420 | 575 421 | 514 422 | 528 423 | 576 424 | 602 425 | 557 426 | 646 427 | 651 428 | 754 429 | 709 430 | 516 431 | 739 432 | 737 433 | 591 434 | 592 435 | 589 436 | 642 437 | 650 438 | 674 439 | 613 440 | 724 441 | 660 442 | 572 443 | 716 444 | 631 445 | 702 446 | 677 447 | 629 448 | 695 449 | 536 450 | 608 451 | 700 452 | 731 453 | 722 454 | 725 455 | 718 456 | 721 457 | 645 458 | 606 459 | 734 460 | 659 461 | 667 462 | 761 463 | 644 464 | 611 465 | 675 466 | 547 467 | 765 468 | 738 469 | 764 470 | 682 471 | 746 472 | 749 473 | 535 474 | 661 475 | 704 476 | 649 477 | 680 478 | 759 479 | 584 480 | 581 481 | 534 482 | 751 483 | 638 484 | 747 485 | 748 486 | 656 487 | 762 488 | 678 489 | 758 490 | 643 491 | 717 492 | 652 493 | 763 494 | 654 495 | 567 496 | 694 497 | 705 498 | 533 499 | 726 500 | 614 501 | 730 502 | 665 503 | 671 504 | 733 505 | 742 506 | 753 507 | 743 508 | 689 509 | 668 510 | 670 511 | 736 512 | 703 513 | 599 514 | 618 515 | 524 516 | 628 517 | 714 518 | 752 519 | 681 520 | 580 521 | 539 522 | 744 523 | 673 524 | 729 525 | 710 526 | 648 527 | 701 528 | 634 529 | 692 530 | 543 531 | 755 532 | 640 533 | 735 534 | 558 535 | 525 536 | 564 537 | 745 538 | 685 539 | 715 540 | 750 541 | 585 542 | 669 543 | 593 544 | 655 545 | 610 546 | 706 547 | 639 548 | 723 549 | 697 550 | 609 551 | 741 552 | 560 553 | 711 554 | 527 555 | 621 556 | 672 557 | 693 558 | 604 559 | 683 560 | 540 561 | 699 562 | 767 563 | 633 564 | 657 565 | 521 566 | 574 567 | 679 568 | 554 569 | 658 570 | 617 571 | 586 572 | 594 573 | 571 574 | 676 575 | 696 576 | 538 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_11_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 224 2 | 221 3 | 215 4 | 24 5 | 154 6 | 237 7 | 205 8 | 198 9 | 33 10 | 172 11 | 10 12 | 46 13 | 232 14 | 152 15 | 15 16 | 216 17 | 203 18 | 252 19 | 135 20 | 249 21 | 247 22 | 187 23 | 134 24 | 201 25 | 170 26 | 223 27 | 65 28 | 35 29 | 95 30 | 208 31 | 251 32 | 179 33 | 73 34 | 81 35 | 181 36 | 220 37 | 248 38 | 150 39 | 128 40 | 185 41 | 157 42 | 143 43 | 245 44 | 180 45 | 7 46 | 132 47 | 108 48 | 210 49 | 38 50 | 246 51 | 62 52 | 129 53 | 44 54 | 59 55 | 126 56 | 244 57 | 253 58 | 80 59 | 85 60 | 125 61 | 196 62 | 118 63 | 229 64 | 53 65 | 225 66 | 240 67 | 136 68 | 131 69 | 167 70 | 233 71 | 160 72 | 137 73 | 189 74 | 29 75 | 183 76 | 50 77 | 32 78 | 176 79 | 52 80 | 138 81 | 115 82 | 226 83 | 218 84 | 47 85 | 158 86 | 191 87 | 242 88 | 66 89 | 145 90 | 243 91 | 140 92 | 12 93 | 250 94 | 239 95 | 88 96 | 171 97 | 96 98 | 39 99 | 211 100 | 254 101 | 193 102 | 6 103 | 142 104 | 214 105 | 67 106 | 105 107 | 169 108 | 98 109 | 21 110 | 60 111 | 123 112 | 155 113 | 228 114 | 166 115 | 159 116 | 83 117 | 192 118 | 188 119 | 184 120 | 18 121 | 90 122 | 69 123 | 235 124 | 20 125 | 194 126 | 230 127 | 84 128 | 147 129 | 1 130 | 31 131 | 199 132 | 72 133 | 16 134 | 255 135 | 100 136 | 241 137 | 153 138 | 102 139 | 121 140 | 111 141 | 173 142 | 106 143 | 27 144 | 119 145 | 175 146 | 133 147 | 11 148 | 124 149 | 63 150 | 231 151 | 36 152 | 227 153 | 195 154 | 57 155 | 206 156 | 130 157 | 162 158 | 190 159 | 77 160 | 122 161 | 234 162 | 3 163 | 116 164 | 104 165 | 86 166 | 49 167 | 139 168 | 40 169 | 99 170 | 141 171 | 200 172 | 87 173 | 164 174 | 168 175 | 156 176 | 89 177 | 74 178 | 58 179 | 41 180 | 78 181 | 82 182 | 197 183 | 9 184 | 45 185 | 75 186 | 146 187 | 148 188 | 91 189 | 174 190 | 120 191 | 0 192 | 70 193 | 404 194 | 391 195 | 434 196 | 394 197 | 390 198 | 406 199 | 311 200 | 407 201 | 388 202 | 442 203 | 422 204 | 444 205 | 405 206 | 264 207 | 414 208 | 415 209 | 400 210 | 431 211 | 409 212 | 408 213 | 447 214 | 502 215 | 402 216 | 427 217 | 397 218 | 416 219 | 483 220 | 466 221 | 396 222 | 410 223 | 432 224 | 325 225 | 387 226 | 271 227 | 411 228 | 428 229 | 418 230 | 398 231 | 423 232 | 492 233 | 440 234 | 366 235 | 304 236 | 503 237 | 369 238 | 364 239 | 462 240 | 316 241 | 499 242 | 472 243 | 438 244 | 439 245 | 437 246 | 328 247 | 425 248 | 474 249 | 487 250 | 401 251 | 421 252 | 341 253 | 436 254 | 386 255 | 446 256 | 310 257 | 370 258 | 380 259 | 454 260 | 336 261 | 281 262 | 291 263 | 385 264 | 482 265 | 458 266 | 445 267 | 510 268 | 443 269 | 460 270 | 435 271 | 395 272 | 372 273 | 509 274 | 330 275 | 461 276 | 331 277 | 392 278 | 426 279 | 505 280 | 329 281 | 471 282 | 413 283 | 378 284 | 494 285 | 305 286 | 379 287 | 449 288 | 475 289 | 459 290 | 430 291 | 424 292 | 307 293 | 344 294 | 360 295 | 368 296 | 337 297 | 338 298 | 420 299 | 256 300 | 358 301 | 393 302 | 265 303 | 467 304 | 362 305 | 501 306 | 451 307 | 352 308 | 453 309 | 320 310 | 399 311 | 260 312 | 403 313 | 481 314 | 268 315 | 488 316 | 470 317 | 469 318 | 361 319 | 257 320 | 384 321 | 507 322 | 345 323 | 349 324 | 452 325 | 468 326 | 327 327 | 296 328 | 374 329 | 323 330 | 371 331 | 464 332 | 496 333 | 376 334 | 456 335 | 267 336 | 353 337 | 495 338 | 262 339 | 497 340 | 441 341 | 489 342 | 382 343 | 343 344 | 326 345 | 490 346 | 450 347 | 347 348 | 480 349 | 493 350 | 486 351 | 455 352 | 286 353 | 321 354 | 282 355 | 367 356 | 324 357 | 473 358 | 350 359 | 279 360 | 356 361 | 355 362 | 498 363 | 287 364 | 309 365 | 504 366 | 373 367 | 261 368 | 312 369 | 346 370 | 308 371 | 381 372 | 335 373 | 332 374 | 348 375 | 301 376 | 340 377 | 357 378 | 290 379 | 354 380 | 333 381 | 295 382 | 479 383 | 363 384 | 508 385 | 560 386 | 541 387 | 650 388 | 564 389 | 530 390 | 555 391 | 517 392 | 562 393 | 667 394 | 534 395 | 658 396 | 532 397 | 544 398 | 563 399 | 646 400 | 653 401 | 514 402 | 536 403 | 684 404 | 692 405 | 680 406 | 662 407 | 512 408 | 546 409 | 525 410 | 565 411 | 524 412 | 529 413 | 557 414 | 551 415 | 651 416 | 665 417 | 531 418 | 681 419 | 702 420 | 740 421 | 538 422 | 553 423 | 547 424 | 527 425 | 572 426 | 693 427 | 548 428 | 676 429 | 575 430 | 664 431 | 679 432 | 554 433 | 736 434 | 571 435 | 634 436 | 520 437 | 720 438 | 764 439 | 632 440 | 672 441 | 766 442 | 628 443 | 533 444 | 719 445 | 677 446 | 519 447 | 678 448 | 518 449 | 574 450 | 561 451 | 604 452 | 516 453 | 727 454 | 522 455 | 627 456 | 750 457 | 580 458 | 643 459 | 694 460 | 668 461 | 559 462 | 618 463 | 594 464 | 558 465 | 605 466 | 729 467 | 738 468 | 566 469 | 615 470 | 724 471 | 535 472 | 673 473 | 523 474 | 610 475 | 675 476 | 567 477 | 608 478 | 540 479 | 674 480 | 655 481 | 683 482 | 607 483 | 659 484 | 521 485 | 569 486 | 721 487 | 718 488 | 633 489 | 685 490 | 635 491 | 705 492 | 652 493 | 714 494 | 695 495 | 725 496 | 595 497 | 645 498 | 648 499 | 614 500 | 612 501 | 697 502 | 586 503 | 737 504 | 581 505 | 663 506 | 570 507 | 637 508 | 707 509 | 696 510 | 701 511 | 621 512 | 657 513 | 735 514 | 577 515 | 598 516 | 689 517 | 687 518 | 549 519 | 730 520 | 661 521 | 754 522 | 728 523 | 709 524 | 743 525 | 700 526 | 542 527 | 545 528 | 597 529 | 757 530 | 630 531 | 690 532 | 759 533 | 682 534 | 641 535 | 722 536 | 515 537 | 585 538 | 699 539 | 639 540 | 691 541 | 631 542 | 708 543 | 550 544 | 642 545 | 744 546 | 733 547 | 686 548 | 619 549 | 568 550 | 573 551 | 671 552 | 732 553 | 601 554 | 623 555 | 590 556 | 600 557 | 666 558 | 745 559 | 753 560 | 528 561 | 654 562 | 644 563 | 747 564 | 726 565 | 670 566 | 640 567 | 748 568 | 603 569 | 579 570 | 723 571 | 763 572 | 751 573 | 625 574 | 717 575 | 602 576 | 526 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_11_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 224 2 | 154 3 | 221 4 | 194 5 | 247 6 | 172 7 | 181 8 | 24 9 | 251 10 | 134 11 | 153 12 | 237 13 | 232 14 | 135 15 | 33 16 | 147 17 | 89 18 | 252 19 | 7 20 | 46 21 | 125 22 | 223 23 | 32 24 | 143 25 | 239 26 | 10 27 | 215 28 | 185 29 | 201 30 | 66 31 | 208 32 | 129 33 | 198 34 | 180 35 | 170 36 | 59 37 | 203 38 | 245 39 | 191 40 | 229 41 | 105 42 | 159 43 | 85 44 | 162 45 | 184 46 | 152 47 | 167 48 | 214 49 | 250 50 | 44 51 | 243 52 | 246 53 | 35 54 | 226 55 | 179 56 | 193 57 | 81 58 | 248 59 | 15 60 | 169 61 | 211 62 | 16 63 | 240 64 | 124 65 | 205 66 | 158 67 | 128 68 | 80 69 | 22 70 | 164 71 | 216 72 | 227 73 | 233 74 | 82 75 | 122 76 | 115 77 | 234 78 | 38 79 | 29 80 | 235 81 | 187 82 | 241 83 | 69 84 | 62 85 | 138 86 | 218 87 | 219 88 | 155 89 | 244 90 | 139 91 | 176 92 | 242 93 | 75 94 | 60 95 | 199 96 | 137 97 | 131 98 | 171 99 | 53 100 | 39 101 | 166 102 | 220 103 | 177 104 | 183 105 | 92 106 | 206 107 | 12 108 | 210 109 | 99 110 | 196 111 | 95 112 | 173 113 | 120 114 | 249 115 | 47 116 | 3 117 | 188 118 | 104 119 | 253 120 | 136 121 | 148 122 | 108 123 | 88 124 | 132 125 | 86 126 | 73 127 | 189 128 | 228 129 | 156 130 | 123 131 | 197 132 | 50 133 | 121 134 | 140 135 | 49 136 | 236 137 | 72 138 | 150 139 | 160 140 | 40 141 | 255 142 | 126 143 | 6 144 | 231 145 | 192 146 | 112 147 | 2 148 | 142 149 | 133 150 | 116 151 | 1 152 | 96 153 | 31 154 | 118 155 | 20 156 | 21 157 | 106 158 | 74 159 | 90 160 | 17 161 | 145 162 | 67 163 | 41 164 | 57 165 | 98 166 | 225 167 | 36 168 | 63 169 | 230 170 | 83 171 | 119 172 | 141 173 | 28 174 | 157 175 | 204 176 | 9 177 | 254 178 | 27 179 | 127 180 | 0 181 | 78 182 | 77 183 | 91 184 | 168 185 | 70 186 | 84 187 | 195 188 | 18 189 | 45 190 | 58 191 | 111 192 | 130 193 | 386 194 | 391 195 | 440 196 | 390 197 | 388 198 | 414 199 | 423 200 | 404 201 | 442 202 | 437 203 | 447 204 | 400 205 | 434 206 | 422 207 | 444 208 | 407 209 | 343 210 | 415 211 | 445 212 | 384 213 | 380 214 | 418 215 | 396 216 | 394 217 | 311 218 | 376 219 | 402 220 | 310 221 | 483 222 | 428 223 | 413 224 | 466 225 | 508 226 | 264 227 | 510 228 | 406 229 | 502 230 | 431 231 | 316 232 | 505 233 | 392 234 | 398 235 | 352 236 | 411 237 | 271 238 | 341 239 | 439 240 | 492 241 | 435 242 | 408 243 | 325 244 | 281 245 | 405 246 | 331 247 | 438 248 | 370 249 | 369 250 | 364 251 | 338 252 | 467 253 | 360 254 | 389 255 | 387 256 | 454 257 | 409 258 | 395 259 | 401 260 | 501 261 | 430 262 | 443 263 | 503 264 | 461 265 | 291 266 | 256 267 | 462 268 | 499 269 | 494 270 | 345 271 | 473 272 | 410 273 | 425 274 | 416 275 | 424 276 | 332 277 | 504 278 | 393 279 | 471 280 | 336 281 | 472 282 | 329 283 | 421 284 | 449 285 | 426 286 | 397 287 | 509 288 | 427 289 | 458 290 | 323 291 | 358 292 | 441 293 | 460 294 | 328 295 | 330 296 | 491 297 | 399 298 | 385 299 | 346 300 | 482 301 | 305 302 | 337 303 | 279 304 | 379 305 | 334 306 | 480 307 | 366 308 | 381 309 | 490 310 | 497 311 | 450 312 | 304 313 | 368 314 | 432 315 | 420 316 | 372 317 | 487 318 | 324 319 | 350 320 | 287 321 | 436 322 | 378 323 | 451 324 | 307 325 | 347 326 | 488 327 | 320 328 | 489 329 | 356 330 | 382 331 | 446 332 | 359 333 | 475 334 | 470 335 | 507 336 | 257 337 | 353 338 | 321 339 | 361 340 | 486 341 | 340 342 | 367 343 | 496 344 | 474 345 | 327 346 | 464 347 | 363 348 | 481 349 | 296 350 | 349 351 | 375 352 | 335 353 | 374 354 | 456 355 | 468 356 | 260 357 | 403 358 | 309 359 | 267 360 | 362 361 | 344 362 | 373 363 | 493 364 | 371 365 | 478 366 | 377 367 | 453 368 | 495 369 | 339 370 | 326 371 | 355 372 | 268 373 | 452 374 | 469 375 | 282 376 | 286 377 | 312 378 | 498 379 | 348 380 | 476 381 | 511 382 | 333 383 | 354 384 | 351 385 | 650 386 | 653 387 | 560 388 | 536 389 | 532 390 | 680 391 | 564 392 | 530 393 | 562 394 | 555 395 | 553 396 | 677 397 | 667 398 | 662 399 | 534 400 | 572 401 | 672 402 | 523 403 | 544 404 | 517 405 | 541 406 | 703 407 | 658 408 | 664 409 | 702 410 | 681 411 | 571 412 | 554 413 | 675 414 | 738 415 | 646 416 | 665 417 | 676 418 | 684 419 | 692 420 | 529 421 | 538 422 | 557 423 | 512 424 | 565 425 | 563 426 | 514 427 | 527 428 | 610 429 | 546 430 | 551 431 | 558 432 | 575 433 | 518 434 | 522 435 | 651 436 | 524 437 | 519 438 | 693 439 | 521 440 | 561 441 | 574 442 | 750 443 | 637 444 | 548 445 | 618 446 | 659 447 | 679 448 | 701 449 | 687 450 | 570 451 | 531 452 | 535 453 | 736 454 | 657 455 | 720 456 | 516 457 | 766 458 | 740 459 | 689 460 | 668 461 | 634 462 | 678 463 | 655 464 | 525 465 | 691 466 | 694 467 | 673 468 | 724 469 | 648 470 | 545 471 | 543 472 | 595 473 | 683 474 | 615 475 | 627 476 | 764 477 | 705 478 | 605 479 | 526 480 | 654 481 | 614 482 | 566 483 | 757 484 | 569 485 | 621 486 | 604 487 | 696 488 | 759 489 | 695 490 | 520 491 | 576 492 | 652 493 | 581 494 | 663 495 | 580 496 | 547 497 | 719 498 | 722 499 | 606 500 | 727 501 | 758 502 | 706 503 | 540 504 | 735 505 | 729 506 | 700 507 | 718 508 | 632 509 | 754 510 | 628 511 | 682 512 | 594 513 | 661 514 | 666 515 | 608 516 | 537 517 | 725 518 | 760 519 | 612 520 | 567 521 | 715 522 | 611 523 | 674 524 | 528 525 | 643 526 | 714 527 | 660 528 | 728 529 | 741 530 | 737 531 | 607 532 | 670 533 | 585 534 | 559 535 | 699 536 | 697 537 | 568 538 | 550 539 | 630 540 | 708 541 | 645 542 | 573 543 | 590 544 | 690 545 | 515 546 | 635 547 | 732 548 | 603 549 | 730 550 | 542 551 | 636 552 | 622 553 | 533 554 | 685 555 | 761 556 | 688 557 | 709 558 | 721 559 | 711 560 | 707 561 | 642 562 | 641 563 | 619 564 | 593 565 | 686 566 | 723 567 | 749 568 | 549 569 | 716 570 | 644 571 | 671 572 | 639 573 | 640 574 | 734 575 | 733 576 | 577 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_8_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 235 2 | 238 3 | 106 4 | 194 5 | 201 6 | 246 7 | 96 8 | 230 9 | 247 10 | 233 11 | 158 12 | 77 13 | 100 14 | 228 15 | 69 16 | 39 17 | 236 18 | 231 19 | 116 20 | 132 21 | 251 22 | 166 23 | 123 24 | 207 25 | 192 26 | 115 27 | 113 28 | 163 29 | 215 30 | 197 31 | 108 32 | 162 33 | 244 34 | 183 35 | 79 36 | 204 37 | 141 38 | 225 39 | 118 40 | 27 41 | 254 42 | 248 43 | 63 44 | 119 45 | 208 46 | 180 47 | 97 48 | 91 49 | 153 50 | 213 51 | 203 52 | 219 53 | 37 54 | 122 55 | 143 56 | 241 57 | 32 58 | 55 59 | 214 60 | 101 61 | 206 62 | 38 63 | 229 64 | 175 65 | 90 66 | 15 67 | 47 68 | 124 69 | 48 70 | 110 71 | 255 72 | 117 73 | 74 74 | 220 75 | 190 76 | 157 77 | 209 78 | 109 79 | 59 80 | 125 81 | 12 82 | 105 83 | 89 84 | 178 85 | 85 86 | 159 87 | 21 88 | 216 89 | 140 90 | 161 91 | 93 92 | 184 93 | 51 94 | 155 95 | 75 96 | 146 97 | 212 98 | 3 99 | 0 100 | 249 101 | 128 102 | 53 103 | 223 104 | 129 105 | 80 106 | 237 107 | 25 108 | 152 109 | 149 110 | 1 111 | 134 112 | 136 113 | 198 114 | 186 115 | 176 116 | 41 117 | 195 118 | 170 119 | 165 120 | 66 121 | 191 122 | 43 123 | 210 124 | 167 125 | 173 126 | 67 127 | 226 128 | 84 129 | 2 130 | 52 131 | 44 132 | 211 133 | 49 134 | 20 135 | 145 136 | 111 137 | 147 138 | 137 139 | 160 140 | 243 141 | 71 142 | 35 143 | 81 144 | 138 145 | 171 146 | 130 147 | 150 148 | 8 149 | 188 150 | 112 151 | 205 152 | 169 153 | 13 154 | 240 155 | 26 156 | 23 157 | 187 158 | 4 159 | 144 160 | 179 161 | 172 162 | 24 163 | 218 164 | 245 165 | 200 166 | 64 167 | 56 168 | 16 169 | 50 170 | 242 171 | 14 172 | 164 173 | 185 174 | 40 175 | 135 176 | 9 177 | 217 178 | 182 179 | 61 180 | 127 181 | 86 182 | 10 183 | 177 184 | 107 185 | 224 186 | 42 187 | 156 188 | 222 189 | 76 190 | 174 191 | 73 192 | 7 193 | 442 194 | 392 195 | 468 196 | 423 197 | 427 198 | 394 199 | 438 200 | 426 201 | 436 202 | 399 203 | 412 204 | 402 205 | 460 206 | 480 207 | 477 208 | 408 209 | 431 210 | 441 211 | 494 212 | 440 213 | 484 214 | 398 215 | 419 216 | 401 217 | 384 218 | 446 219 | 418 220 | 388 221 | 390 222 | 429 223 | 404 224 | 469 225 | 415 226 | 485 227 | 411 228 | 511 229 | 507 230 | 478 231 | 422 232 | 400 233 | 417 234 | 406 235 | 454 236 | 495 237 | 462 238 | 456 239 | 391 240 | 407 241 | 488 242 | 475 243 | 425 244 | 360 245 | 434 246 | 421 247 | 272 248 | 452 249 | 357 250 | 479 251 | 368 252 | 482 253 | 345 254 | 405 255 | 443 256 | 437 257 | 396 258 | 410 259 | 445 260 | 346 261 | 334 262 | 387 263 | 397 264 | 338 265 | 439 266 | 486 267 | 428 268 | 509 269 | 496 270 | 493 271 | 367 272 | 474 273 | 466 274 | 510 275 | 349 276 | 403 277 | 505 278 | 461 279 | 321 280 | 332 281 | 498 282 | 362 283 | 455 284 | 351 285 | 393 286 | 416 287 | 501 288 | 463 289 | 352 290 | 430 291 | 295 292 | 276 293 | 302 294 | 312 295 | 409 296 | 378 297 | 472 298 | 450 299 | 307 300 | 457 301 | 322 302 | 326 303 | 324 304 | 311 305 | 487 306 | 414 307 | 297 308 | 285 309 | 342 310 | 288 311 | 459 312 | 385 313 | 328 314 | 380 315 | 361 316 | 303 317 | 503 318 | 467 319 | 305 320 | 465 321 | 341 322 | 274 323 | 447 324 | 289 325 | 316 326 | 261 327 | 370 328 | 432 329 | 372 330 | 464 331 | 318 332 | 267 333 | 279 334 | 313 335 | 344 336 | 265 337 | 262 338 | 282 339 | 315 340 | 490 341 | 458 342 | 329 343 | 306 344 | 435 345 | 363 346 | 336 347 | 330 348 | 483 349 | 333 350 | 327 351 | 395 352 | 502 353 | 448 354 | 293 355 | 343 356 | 491 357 | 373 358 | 365 359 | 348 360 | 383 361 | 314 362 | 356 363 | 508 364 | 355 365 | 451 366 | 506 367 | 317 368 | 323 369 | 331 370 | 310 371 | 280 372 | 476 373 | 298 374 | 264 375 | 376 376 | 420 377 | 366 378 | 369 379 | 284 380 | 377 381 | 290 382 | 300 383 | 301 384 | 273 385 | 619 386 | 630 387 | 576 388 | 623 389 | 594 390 | 584 391 | 597 392 | 625 393 | 621 394 | 613 395 | 590 396 | 629 397 | 582 398 | 583 399 | 607 400 | 649 401 | 600 402 | 639 403 | 608 404 | 592 405 | 725 406 | 618 407 | 722 408 | 645 409 | 591 410 | 696 411 | 610 412 | 622 413 | 651 414 | 634 415 | 606 416 | 612 417 | 743 418 | 579 419 | 681 420 | 617 421 | 736 422 | 578 423 | 598 424 | 642 425 | 756 426 | 604 427 | 602 428 | 588 429 | 593 430 | 609 431 | 758 432 | 717 433 | 633 434 | 654 435 | 611 436 | 694 437 | 631 438 | 704 439 | 638 440 | 632 441 | 615 442 | 627 443 | 699 444 | 757 445 | 685 446 | 595 447 | 641 448 | 709 449 | 690 450 | 664 451 | 720 452 | 708 453 | 705 454 | 763 455 | 714 456 | 580 457 | 643 458 | 707 459 | 750 460 | 728 461 | 628 462 | 677 463 | 652 464 | 730 465 | 729 466 | 614 467 | 746 468 | 616 469 | 686 470 | 726 471 | 688 472 | 724 473 | 737 474 | 716 475 | 715 476 | 740 477 | 646 478 | 739 479 | 727 480 | 751 481 | 663 482 | 599 483 | 689 484 | 691 485 | 587 486 | 719 487 | 711 488 | 673 489 | 734 490 | 666 491 | 577 492 | 744 493 | 700 494 | 635 495 | 745 496 | 650 497 | 755 498 | 710 499 | 713 500 | 674 501 | 581 502 | 732 503 | 703 504 | 682 505 | 573 506 | 742 507 | 718 508 | 760 509 | 697 510 | 753 511 | 680 512 | 636 513 | 670 514 | 640 515 | 601 516 | 766 517 | 656 518 | 693 519 | 647 520 | 655 521 | 759 522 | 754 523 | 723 524 | 762 525 | 706 526 | 519 527 | 653 528 | 620 529 | 669 530 | 659 531 | 702 532 | 721 533 | 662 534 | 684 535 | 557 536 | 514 537 | 683 538 | 566 539 | 701 540 | 752 541 | 560 542 | 648 543 | 585 544 | 589 545 | 520 546 | 687 547 | 538 548 | 529 549 | 531 550 | 556 551 | 586 552 | 731 553 | 542 554 | 521 555 | 767 556 | 543 557 | 561 558 | 522 559 | 530 560 | 667 561 | 567 562 | 526 563 | 657 564 | 562 565 | 678 566 | 747 567 | 738 568 | 574 569 | 712 570 | 572 571 | 551 572 | 637 573 | 692 574 | 527 575 | 559 576 | 516 577 | -------------------------------------------------------------------------------- /importance/kl2k/Deit_base_12_attn_768_kl_1_2k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 69 2 | 77 3 | 73 4 | 112 5 | 92 6 | 81 7 | 123 8 | 68 9 | 116 10 | 122 11 | 80 12 | 78 13 | 124 14 | 71 15 | 117 16 | 113 17 | 110 18 | 84 19 | 100 20 | 108 21 | 95 22 | 105 23 | 74 24 | 104 25 | 120 26 | 101 27 | 126 28 | 79 29 | 70 30 | 254 31 | 233 32 | 224 33 | 247 34 | 93 35 | 252 36 | 192 37 | 85 38 | 88 39 | 66 40 | 253 41 | 87 42 | 228 43 | 212 44 | 246 45 | 240 46 | 209 47 | 239 48 | 238 49 | 194 50 | 236 51 | 207 52 | 222 53 | 226 54 | 107 55 | 214 56 | 231 57 | 249 58 | 204 59 | 72 60 | 242 61 | 234 62 | 220 63 | 218 64 | 221 65 | 201 66 | 251 67 | 225 68 | 98 69 | 102 70 | 75 71 | 205 72 | 227 73 | 215 74 | 230 75 | 217 76 | 127 77 | 213 78 | 90 79 | 223 80 | 250 81 | 76 82 | 232 83 | 241 84 | 125 85 | 64 86 | 94 87 | 244 88 | 193 89 | 197 90 | 109 91 | 199 92 | 202 93 | 119 94 | 235 95 | 229 96 | 99 97 | 198 98 | 255 99 | 208 100 | 67 101 | 86 102 | 216 103 | 118 104 | 121 105 | 203 106 | 237 107 | 200 108 | 210 109 | 38 110 | 243 111 | 65 112 | 96 113 | 16 114 | 83 115 | 179 116 | 160 117 | 103 118 | 111 119 | 191 120 | 196 121 | 89 122 | 82 123 | 151 124 | 248 125 | 176 126 | 106 127 | 149 128 | 114 129 | 31 130 | 195 131 | 58 132 | 153 133 | 178 134 | 20 135 | 156 136 | 147 137 | 157 138 | 2 139 | 143 140 | 140 141 | 144 142 | 136 143 | 5 144 | 166 145 | 49 146 | 128 147 | 0 148 | 173 149 | 186 150 | 182 151 | 97 152 | 139 153 | 161 154 | 177 155 | 146 156 | 155 157 | 190 158 | 189 159 | 131 160 | 188 161 | 183 162 | 163 163 | 168 164 | 141 165 | 158 166 | 187 167 | 165 168 | 28 169 | 167 170 | 130 171 | 50 172 | 59 173 | 185 174 | 164 175 | 184 176 | 56 177 | 175 178 | 21 179 | 171 180 | 138 181 | 8 182 | 39 183 | 1 184 | 132 185 | 9 186 | 17 187 | 34 188 | 150 189 | 46 190 | 24 191 | 170 192 | 172 193 | 438 194 | 413 195 | 296 196 | 331 197 | 308 198 | 442 199 | 282 200 | 385 201 | 375 202 | 386 203 | 428 204 | 381 205 | 410 206 | 427 207 | 295 208 | 262 209 | 343 210 | 346 211 | 317 212 | 440 213 | 314 214 | 345 215 | 357 216 | 421 217 | 265 218 | 366 219 | 306 220 | 415 221 | 360 222 | 394 223 | 285 224 | 323 225 | 426 226 | 371 227 | 310 228 | 261 229 | 338 230 | 256 231 | 436 232 | 289 233 | 324 234 | 400 235 | 266 236 | 377 237 | 372 238 | 284 239 | 358 240 | 392 241 | 359 242 | 411 243 | 292 244 | 363 245 | 328 246 | 349 247 | 259 248 | 404 249 | 348 250 | 389 251 | 420 252 | 474 253 | 430 254 | 268 255 | 332 256 | 347 257 | 309 258 | 333 259 | 433 260 | 267 261 | 409 262 | 287 263 | 272 264 | 369 265 | 269 266 | 407 267 | 305 268 | 344 269 | 401 270 | 387 271 | 281 272 | 312 273 | 370 274 | 431 275 | 447 276 | 320 277 | 468 278 | 304 279 | 443 280 | 278 281 | 362 282 | 302 283 | 398 284 | 406 285 | 483 286 | 334 287 | 419 288 | 321 289 | 319 290 | 498 291 | 318 292 | 399 293 | 257 294 | 280 295 | 449 296 | 276 297 | 275 298 | 429 299 | 405 300 | 258 301 | 376 302 | 503 303 | 437 304 | 382 305 | 460 306 | 464 307 | 497 308 | 300 309 | 301 310 | 457 311 | 373 312 | 466 313 | 506 314 | 353 315 | 322 316 | 286 317 | 313 318 | 422 319 | 367 320 | 380 321 | 368 322 | 326 323 | 486 324 | 499 325 | 378 326 | 379 327 | 439 328 | 482 329 | 501 330 | 271 331 | 452 332 | 351 333 | 390 334 | 342 335 | 288 336 | 340 337 | 484 338 | 435 339 | 465 340 | 489 341 | 396 342 | 445 343 | 491 344 | 391 345 | 450 346 | 356 347 | 355 348 | 510 349 | 508 350 | 388 351 | 414 352 | 481 353 | 330 354 | 505 355 | 393 356 | 441 357 | 325 358 | 412 359 | 337 360 | 509 361 | 279 362 | 477 363 | 461 364 | 480 365 | 485 366 | 341 367 | 432 368 | 417 369 | 469 370 | 339 371 | 454 372 | 418 373 | 507 374 | 335 375 | 299 376 | 467 377 | 459 378 | 364 379 | 494 380 | 446 381 | 424 382 | 453 383 | 270 384 | 488 385 | 745 386 | 724 387 | 737 388 | 743 389 | 767 390 | 760 391 | 765 392 | 721 393 | 742 394 | 764 395 | 758 396 | 716 397 | 756 398 | 739 399 | 714 400 | 715 401 | 729 402 | 761 403 | 749 404 | 740 405 | 759 406 | 722 407 | 744 408 | 755 409 | 746 410 | 710 411 | 734 412 | 728 413 | 751 414 | 754 415 | 525 416 | 708 417 | 554 418 | 567 419 | 516 420 | 558 421 | 541 422 | 730 423 | 530 424 | 546 425 | 705 426 | 555 427 | 550 428 | 552 429 | 565 430 | 517 431 | 526 432 | 572 433 | 575 434 | 523 435 | 536 436 | 539 437 | 693 438 | 532 439 | 531 440 | 574 441 | 750 442 | 563 443 | 515 444 | 736 445 | 569 446 | 763 447 | 748 448 | 577 449 | 521 450 | 665 451 | 547 452 | 537 453 | 585 454 | 766 455 | 630 456 | 518 457 | 551 458 | 686 459 | 679 460 | 549 461 | 542 462 | 762 463 | 673 464 | 680 465 | 732 466 | 725 467 | 681 468 | 570 469 | 695 470 | 628 471 | 562 472 | 618 473 | 603 474 | 632 475 | 561 476 | 701 477 | 571 478 | 627 479 | 622 480 | 612 481 | 535 482 | 667 483 | 533 484 | 649 485 | 671 486 | 682 487 | 670 488 | 633 489 | 573 490 | 674 491 | 609 492 | 543 493 | 611 494 | 678 495 | 522 496 | 513 497 | 613 498 | 676 499 | 582 500 | 616 501 | 713 502 | 654 503 | 690 504 | 702 505 | 669 506 | 668 507 | 568 508 | 625 509 | 593 510 | 691 511 | 620 512 | 666 513 | 604 514 | 641 515 | 545 516 | 683 517 | 588 518 | 617 519 | 698 520 | 640 521 | 592 522 | 635 523 | 700 524 | 675 525 | 598 526 | 718 527 | 653 528 | 643 529 | 687 530 | 579 531 | 689 532 | 560 533 | 586 534 | 583 535 | 597 536 | 601 537 | 591 538 | 602 539 | 614 540 | 548 541 | 610 542 | 599 543 | 646 544 | 580 545 | 703 546 | 645 547 | 685 548 | 596 549 | 684 550 | 576 551 | 656 552 | 527 553 | 581 554 | 553 555 | 606 556 | 624 557 | 608 558 | 757 559 | 600 560 | 590 561 | 647 562 | 658 563 | 735 564 | 528 565 | 605 566 | 639 567 | 688 568 | 524 569 | 637 570 | 699 571 | 634 572 | 662 573 | 559 574 | 623 575 | 607 576 | 578 577 | -------------------------------------------------------------------------------- /importance/kl5k/Deit_base_12_attn_768_kl_1_5k_importance_rank_multihead3.txt: -------------------------------------------------------------------------------- 1 | 78 2 | 116 3 | 77 4 | 71 5 | 80 6 | 92 7 | 73 8 | 112 9 | 69 10 | 122 11 | 68 12 | 100 13 | 110 14 | 120 15 | 124 16 | 95 17 | 81 18 | 74 19 | 117 20 | 84 21 | 104 22 | 105 23 | 113 24 | 123 25 | 108 26 | 70 27 | 247 28 | 79 29 | 101 30 | 126 31 | 233 32 | 93 33 | 192 34 | 254 35 | 252 36 | 66 37 | 88 38 | 85 39 | 224 40 | 87 41 | 253 42 | 240 43 | 228 44 | 238 45 | 222 46 | 194 47 | 246 48 | 236 49 | 226 50 | 207 51 | 239 52 | 107 53 | 214 54 | 209 55 | 249 56 | 212 57 | 225 58 | 221 59 | 242 60 | 204 61 | 72 62 | 98 63 | 218 64 | 201 65 | 231 66 | 251 67 | 234 68 | 220 69 | 75 70 | 213 71 | 205 72 | 227 73 | 215 74 | 127 75 | 230 76 | 217 77 | 102 78 | 250 79 | 223 80 | 90 81 | 76 82 | 64 83 | 125 84 | 94 85 | 232 86 | 244 87 | 109 88 | 197 89 | 193 90 | 241 91 | 198 92 | 229 93 | 202 94 | 235 95 | 99 96 | 119 97 | 199 98 | 255 99 | 237 100 | 216 101 | 208 102 | 67 103 | 86 104 | 118 105 | 121 106 | 203 107 | 200 108 | 96 109 | 210 110 | 38 111 | 65 112 | 83 113 | 243 114 | 16 115 | 179 116 | 191 117 | 160 118 | 103 119 | 82 120 | 196 121 | 149 122 | 89 123 | 111 124 | 151 125 | 31 126 | 176 127 | 128 128 | 248 129 | 114 130 | 106 131 | 153 132 | 178 133 | 136 134 | 195 135 | 20 136 | 156 137 | 58 138 | 157 139 | 139 140 | 182 141 | 168 142 | 166 143 | 144 144 | 173 145 | 140 146 | 2 147 | 177 148 | 183 149 | 5 150 | 164 151 | 171 152 | 0 153 | 161 154 | 186 155 | 188 156 | 97 157 | 141 158 | 143 159 | 131 160 | 165 161 | 130 162 | 147 163 | 190 164 | 49 165 | 146 166 | 189 167 | 50 168 | 138 169 | 155 170 | 184 171 | 187 172 | 185 173 | 28 174 | 167 175 | 158 176 | 39 177 | 175 178 | 17 179 | 1 180 | 8 181 | 163 182 | 170 183 | 59 184 | 46 185 | 172 186 | 12 187 | 135 188 | 9 189 | 174 190 | 56 191 | 21 192 | 150 193 | 438 194 | 331 195 | 413 196 | 308 197 | 385 198 | 296 199 | 282 200 | 442 201 | 375 202 | 343 203 | 386 204 | 265 205 | 285 206 | 428 207 | 262 208 | 317 209 | 346 210 | 381 211 | 295 212 | 410 213 | 421 214 | 306 215 | 440 216 | 357 217 | 427 218 | 360 219 | 392 220 | 261 221 | 345 222 | 289 223 | 366 224 | 338 225 | 415 226 | 400 227 | 324 228 | 323 229 | 394 230 | 310 231 | 284 232 | 314 233 | 377 234 | 372 235 | 404 236 | 436 237 | 363 238 | 292 239 | 426 240 | 349 241 | 411 242 | 358 243 | 371 244 | 259 245 | 332 246 | 266 247 | 256 248 | 359 249 | 328 250 | 305 251 | 420 252 | 430 253 | 304 254 | 407 255 | 474 256 | 409 257 | 347 258 | 387 259 | 333 260 | 269 261 | 258 262 | 401 263 | 433 264 | 268 265 | 300 266 | 272 267 | 267 268 | 389 269 | 348 270 | 287 271 | 344 272 | 278 273 | 321 274 | 309 275 | 369 276 | 320 277 | 318 278 | 468 279 | 257 280 | 405 281 | 312 282 | 319 283 | 376 284 | 370 285 | 419 286 | 431 287 | 398 288 | 406 289 | 281 290 | 301 291 | 443 292 | 275 293 | 373 294 | 271 295 | 302 296 | 286 297 | 362 298 | 483 299 | 449 300 | 498 301 | 447 302 | 437 303 | 399 304 | 353 305 | 326 306 | 429 307 | 506 308 | 503 309 | 466 310 | 457 311 | 439 312 | 460 313 | 382 314 | 380 315 | 452 316 | 313 317 | 280 318 | 378 319 | 276 320 | 322 321 | 482 322 | 497 323 | 435 324 | 510 325 | 340 326 | 379 327 | 367 328 | 501 329 | 288 330 | 351 331 | 334 332 | 422 333 | 464 334 | 486 335 | 368 336 | 396 337 | 465 338 | 484 339 | 505 340 | 414 341 | 477 342 | 491 343 | 485 344 | 390 345 | 355 346 | 489 347 | 330 348 | 481 349 | 388 350 | 499 351 | 391 352 | 342 353 | 445 354 | 441 355 | 450 356 | 325 357 | 393 358 | 356 359 | 337 360 | 412 361 | 279 362 | 417 363 | 508 364 | 299 365 | 509 366 | 469 367 | 339 368 | 461 369 | 480 370 | 424 371 | 507 372 | 470 373 | 432 374 | 454 375 | 297 376 | 446 377 | 341 378 | 364 379 | 335 380 | 418 381 | 494 382 | 270 383 | 467 384 | 488 385 | 745 386 | 765 387 | 743 388 | 724 389 | 742 390 | 737 391 | 721 392 | 716 393 | 760 394 | 767 395 | 756 396 | 758 397 | 764 398 | 714 399 | 739 400 | 740 401 | 729 402 | 715 403 | 749 404 | 761 405 | 722 406 | 759 407 | 744 408 | 755 409 | 746 410 | 710 411 | 734 412 | 554 413 | 728 414 | 567 415 | 525 416 | 708 417 | 558 418 | 751 419 | 546 420 | 555 421 | 541 422 | 530 423 | 754 424 | 572 425 | 550 426 | 516 427 | 552 428 | 565 429 | 526 430 | 730 431 | 575 432 | 705 433 | 517 434 | 574 435 | 531 436 | 532 437 | 539 438 | 693 439 | 523 440 | 536 441 | 563 442 | 515 443 | 561 444 | 763 445 | 750 446 | 766 447 | 521 448 | 736 449 | 547 450 | 585 451 | 577 452 | 679 453 | 665 454 | 537 455 | 748 456 | 551 457 | 542 458 | 518 459 | 630 460 | 571 461 | 686 462 | 533 463 | 549 464 | 681 465 | 535 466 | 628 467 | 603 468 | 667 469 | 680 470 | 618 471 | 569 472 | 612 473 | 673 474 | 695 475 | 627 476 | 633 477 | 632 478 | 701 479 | 676 480 | 562 481 | 762 482 | 674 483 | 622 484 | 682 485 | 609 486 | 616 487 | 671 488 | 570 489 | 613 490 | 649 491 | 513 492 | 732 493 | 670 494 | 725 495 | 543 496 | 702 497 | 593 498 | 617 499 | 678 500 | 604 501 | 698 502 | 573 503 | 582 504 | 611 505 | 669 506 | 691 507 | 522 508 | 668 509 | 588 510 | 666 511 | 635 512 | 654 513 | 713 514 | 690 515 | 592 516 | 596 517 | 653 518 | 614 519 | 641 520 | 576 521 | 637 522 | 625 523 | 620 524 | 598 525 | 568 526 | 545 527 | 597 528 | 553 529 | 683 530 | 591 531 | 700 532 | 689 533 | 560 534 | 527 535 | 718 536 | 643 537 | 640 538 | 646 539 | 703 540 | 586 541 | 675 542 | 580 543 | 687 544 | 583 545 | 601 546 | 645 547 | 685 548 | 599 549 | 684 550 | 600 551 | 581 552 | 656 553 | 548 554 | 610 555 | 590 556 | 579 557 | 624 558 | 608 559 | 606 560 | 615 561 | 639 562 | 602 563 | 524 564 | 658 565 | 647 566 | 634 567 | 757 568 | 619 569 | 623 570 | 605 571 | 578 572 | 559 573 | 528 574 | 662 575 | 638 576 | 688 577 | -------------------------------------------------------------------------------- /utils/pred_utils.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | import csv 5 | import numpy as np 6 | import time 7 | import sys 8 | import shutil 9 | 10 | import torch 11 | 12 | 13 | class ProgressMeter(object): 14 | def __init__(self, num_batches, meters, prefix=""): 15 | self.batch_fmtstr = self._get_batch_fmtstr(num_batches) 16 | self.meters = meters 17 | self.prefix = prefix 18 | 19 | def display(self, batch): 20 | entries = [self.prefix + self.batch_fmtstr.format(batch)] 21 | entries += [str(meter) for meter in self.meters] 22 | print('\t'.join(entries)) 23 | 24 | def _get_batch_fmtstr(self, num_batches): 25 | num_digits = len(str(num_batches // 1)) 26 | fmt = '{:' + str(num_digits) + 'd}' 27 | return '[' + fmt + '/' + fmt.format(num_batches) + ']' 28 | 29 | def accuracy(output, target, topk=(1,)): 30 | """Computes the accuracy over the k top predictions for the specified values of k""" 31 | with torch.no_grad(): 32 | maxk = max(topk) 33 | batch_size = target.size(0) 34 | 35 | _, pred = output.topk(maxk, 1, True, True) 36 | pred = pred.t() 37 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 38 | 39 | res = [] 40 | for k in topk: 41 | correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True) 42 | res.append(correct_k.mul_(100.0 / batch_size)) 43 | return res 44 | 45 | 46 | def simple_accuracy(preds, labels): 47 | return (preds == labels).mean() 48 | 49 | def save_model(args, model, name): 50 | model_to_save = model.module if hasattr(model, 'module') else model 51 | model_checkpoint = os.path.join(args.output_dir, "%s_checkpoint.bin" % name) 52 | torch.save(model_to_save.state_dict(), model_checkpoint) 53 | 54 | def set_seed(args): 55 | random.seed(args.seed) 56 | np.random.seed(args.seed) 57 | torch.manual_seed(args.seed) 58 | if args.n_gpu > 0: 59 | torch.cuda.manual_seed_all(args.seed) 60 | 61 | 62 | class AverageMeter(object): 63 | """Computes and stores the average and current value""" 64 | def __init__(self, name, fmt=':f'): 65 | self.name = name 66 | self.fmt = fmt 67 | self.reset() 68 | 69 | def reset(self): 70 | self.val = 0 71 | self.avg = 0 72 | self.sum = 0 73 | self.count = 0 74 | 75 | def update(self, val, n=1): 76 | self.val = val 77 | self.sum += val * n 78 | self.count += n 79 | self.avg = self.sum / self.count 80 | 81 | def add(self, nsum, n=1): 82 | self.val = nsum / n 83 | self.sum += nsum 84 | self.count += n 85 | self.avg = self.sum / self.count 86 | 87 | def __str__(self): 88 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' 89 | return fmtstr.format(**self.__dict__) 90 | 91 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): 92 | torch.save(state, filename) 93 | # if is_best: 94 | # shutil.copyfile(filename, 'model_best.pth.tar') 95 | 96 | 97 | _, term_width = os.popen('stty size', 'r').read().split() 98 | term_width = int(term_width) 99 | 100 | TOTAL_BAR_LENGTH = 65. 101 | last_time = time.time() 102 | begin_time = last_time 103 | 104 | def progress_bar(current, total, msg=None): 105 | global last_time, begin_time 106 | if current == 0: 107 | begin_time = time.time() # Reset for new bar. 108 | 109 | cur_len = int(TOTAL_BAR_LENGTH*current/total) 110 | rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1 111 | 112 | sys.stdout.write(' [') 113 | for i in range(cur_len): 114 | sys.stdout.write('=') 115 | sys.stdout.write('>') 116 | for i in range(rest_len): 117 | sys.stdout.write('.') 118 | sys.stdout.write(']') 119 | 120 | cur_time = time.time() 121 | step_time = cur_time - last_time 122 | last_time = cur_time 123 | tot_time = cur_time - begin_time 124 | 125 | L = [] 126 | L.append(' Step: %s' % format_time(step_time)) 127 | L.append(' | Tot: %s' % format_time(tot_time)) 128 | if msg: 129 | L.append(' | ' + msg) 130 | 131 | msg = ''.join(L) 132 | sys.stdout.write(msg) 133 | for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3): 134 | sys.stdout.write(' ') 135 | 136 | # Go back to the center of the bar. 137 | for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2): 138 | sys.stdout.write('\b') 139 | sys.stdout.write(' %d/%d ' % (current+1, total)) 140 | 141 | if current < total-1: 142 | sys.stdout.write('\r') 143 | else: 144 | sys.stdout.write('\n') 145 | sys.stdout.flush() 146 | 147 | def format_time(seconds): 148 | days = int(seconds / 3600/24) 149 | seconds = seconds - days*3600*24 150 | hours = int(seconds / 3600) 151 | seconds = seconds - hours*3600 152 | minutes = int(seconds / 60) 153 | seconds = seconds - minutes*60 154 | secondsf = int(seconds) 155 | seconds = seconds - secondsf 156 | millis = int(seconds*1000) 157 | 158 | f = '' 159 | i = 1 160 | if days > 0: 161 | f += str(days) + 'D' 162 | i += 1 163 | if hours > 0 and i <= 2: 164 | f += str(hours) + 'h' 165 | i += 1 166 | if minutes > 0 and i <= 2: 167 | f += str(minutes) + 'm' 168 | i += 1 169 | if secondsf > 0 and i <= 2: 170 | f += str(secondsf) + 's' 171 | i += 1 172 | if millis > 0 and i <= 2: 173 | f += str(millis) + 'ms' 174 | i += 1 175 | if f == '': 176 | f = '0ms' 177 | return f 178 | -------------------------------------------------------------------------------- /test_ffn_importance_score.py: -------------------------------------------------------------------------------- 1 | '''Train CIFAR10 with PyTorch.''' 2 | from __future__ import print_function 3 | 4 | import torch 5 | import torch.nn as nn 6 | from functools import partial 7 | import torch.nn.functional as F 8 | import torch.backends.cudnn as cudnn 9 | from torch.autograd import Variable 10 | 11 | import torchvision 12 | import torchvision.transforms as transforms 13 | 14 | import logging 15 | import argparse 16 | import os 17 | import random 18 | import csv 19 | import numpy as np 20 | import time 21 | 22 | import torch 23 | from torch.cuda.amp import autocast as autocast 24 | from torch.cuda.amp import GradScaler as GradScaler 25 | from tqdm import tqdm 26 | from utils.pred_utils import ProgressMeter, accuracy, AverageMeter 27 | 28 | from models.model import VisionTransformer 29 | from models.ffn_importance_score_model import VisionTransformer as ffn_VisionTransformer 30 | 31 | 32 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 33 | parser.add_argument('--resume_sess', default='vit_default',type=str, help='session id') 34 | parser.add_argument("--img_size", default=256, type=int, 35 | help="Resolution size") 36 | parser.add_argument("--batch_size", default=256, type=int, 37 | help="Total batch size for training.") 38 | parser.add_argument("--reduce", default=0, type=int, 39 | help="Total batch size for training.") 40 | 41 | args = parser.parse_args() 42 | 43 | 44 | use_cuda = torch.cuda.is_available() 45 | batch_size = args.batch_size 46 | if use_cuda: 47 | n_gpu = torch.cuda.device_count() 48 | batch_size *= n_gpu 49 | 50 | # print('==> Preparing data..') 51 | transform_test = transforms.Compose([ 52 | # transforms.Resize((256, 256)), 53 | transforms.Resize(256, interpolation=3), 54 | transforms.CenterCrop(224), 55 | transforms.ToTensor(), 56 | transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 57 | ]) 58 | 59 | 60 | testset = torchvision.datasets.ImageFolder(root='/mnt/ramdisk/ImageNet/fewshot5_train/', transform=transform_test) 61 | testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False,pin_memory=True, num_workers=16) 62 | 63 | checkpoint = torch.load("pretrainmodel/deit_base_patch16_224-b5f2ef4d.pth", map_location='cpu') 64 | teacher_model = VisionTransformer( 65 | patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, 66 | norm_layer=partial(nn.LayerNorm, eps=1e-6)) 67 | 68 | model_dict = teacher_model.state_dict() 69 | new_dict = {} 70 | cnt = 1 71 | for k, v in checkpoint['model'].items(): 72 | if k in model_dict and v.size()==model_dict[k].size(): 73 | cnt += 1 74 | new_dict[k] = v 75 | model_dict.update(new_dict) 76 | teacher_model.load_state_dict(model_dict) 77 | teacher_model.cuda() 78 | teacher_model = torch.nn.DataParallel(teacher_model) 79 | 80 | checkpoint = torch.load("pretrainmodel/deit_base_patch16_224-b5f2ef4d.pth", map_location='cpu') 81 | 82 | kls = [] 83 | coss = [] 84 | results1 = [] 85 | results5 = [] 86 | 87 | net = ffn_VisionTransformer( 88 | patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, 89 | norm_layer=partial(nn.LayerNorm, eps=1e-6), reduce_ind = args.reduce) 90 | net.cuda() 91 | net = torch.nn.DataParallel(net) 92 | 93 | model_dict = net.state_dict() 94 | 95 | candidate_index = range(3072) 96 | for delete_ind in candidate_index: 97 | new_dict = {} 98 | cnt = 1 99 | for k, v in checkpoint['model'].items(): 100 | # print(k,end= ", ") 101 | if "blocks." + str(args.reduce) + ".mlp.fc1" in k: 102 | new_v = v[torch.arange(v.size(0))!=delete_ind] 103 | # print(new_v.shape) 104 | new_dict["module." + k] = new_v 105 | elif "blocks." + str(args.reduce) + ".mlp.fc2.weight" in k: 106 | new_v = v[:,torch.arange(v.size(1))!=delete_ind] 107 | new_dict["module." + k] = new_v 108 | else: 109 | # print(v.shape) 110 | new_dict["module." + k] = v 111 | 112 | model_dict.update(new_dict) 113 | 114 | net.load_state_dict(model_dict) 115 | 116 | 117 | batch_time = AverageMeter('Time', ':6.3f') 118 | losses = AverageMeter('Loss', ':.4e') 119 | top1 = AverageMeter('Acc@1', ':6.2f') 120 | top5 = AverageMeter('Acc@5', ':6.2f') 121 | kl = AverageMeter('KL', ':6.3f') 122 | cos = AverageMeter('Cosine', ':6.3f') 123 | 124 | 125 | progress = ProgressMeter( 126 | len(testloader), 127 | [batch_time, losses, top1, top5, kl], 128 | prefix='Test: ') 129 | 130 | evaluate = True 131 | if evaluate: 132 | criterion = nn.CrossEntropyLoss() 133 | with torch.no_grad(): 134 | net.eval() 135 | 136 | end = time.time() 137 | for i, (images, target) in enumerate(testloader): 138 | images = images.cuda( non_blocking=True) 139 | target = target.cuda( non_blocking=True) 140 | 141 | # compute output 142 | output = net(images) 143 | loss = criterion(output, target) 144 | with torch.no_grad(): 145 | teacher_output, teacher_feature, teacher_patch_output = teacher_model(images) 146 | 147 | 148 | logsoftmax = torch.nn.LogSoftmax(dim=1).cuda() 149 | softmax = torch.nn.Softmax(dim=1).cuda() 150 | distil_loss = torch.sum( 151 | torch.sum(softmax(teacher_output) * (logsoftmax(teacher_output)-logsoftmax(output)), dim=1)) 152 | 153 | kl.add(distil_loss,images.size(0)) 154 | 155 | # measure elapsed time 156 | batch_time.update(time.time() - end) 157 | end = time.time() 158 | 159 | # if i % 1 == 0: 160 | # progress.display(i) 161 | 162 | print(kl.sum.item()) 163 | kls.append(kl.sum.item()) 164 | 165 | with open("importance/kl5k/importance/Deit_base_12_ffn_3072_kl_" +str(args.reduce)+ "_5k.txt", 'w') as f: 166 | for s in kls: 167 | f.write(str(s) + '\n') 168 | -------------------------------------------------------------------------------- /test_attn_importance_score.py: -------------------------------------------------------------------------------- 1 | '''Train CIFAR10 with PyTorch.''' 2 | from __future__ import print_function 3 | 4 | import torch 5 | import torch.nn as nn 6 | from functools import partial 7 | import torch.nn.functional as F 8 | import torch.backends.cudnn as cudnn 9 | from torch.autograd import Variable 10 | 11 | import torchvision 12 | import torchvision.transforms as transforms 13 | 14 | import logging 15 | import argparse 16 | import os 17 | import random 18 | import csv 19 | import numpy as np 20 | import time 21 | 22 | import torch 23 | from torch.cuda.amp import autocast as autocast 24 | from torch.cuda.amp import GradScaler as GradScaler 25 | from utils.pred_utils import ProgressMeter, accuracy, AverageMeter 26 | 27 | from models.model import VisionTransformer 28 | from models.attn_importance_score_model import VisionTransformer as attn_VisionTransformer 29 | 30 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 31 | parser.add_argument("--img_size", default=256, type=int, 32 | help="Resolution size") 33 | parser.add_argument("--batch_size", default=128, type=int, 34 | help="Total batch size for training.") 35 | parser.add_argument("--block_ind", default=-1, type=int, 36 | help="Total batch size for training.") 37 | 38 | args = parser.parse_args() 39 | 40 | 41 | use_cuda = torch.cuda.is_available() 42 | batch_size = args.batch_size 43 | if use_cuda: 44 | n_gpu = torch.cuda.device_count() 45 | batch_size *= n_gpu 46 | 47 | print('==> Preparing data..') 48 | transform_test = transforms.Compose([ 49 | transforms.Resize(256, interpolation=3), 50 | transforms.CenterCrop(224), 51 | transforms.ToTensor(), 52 | transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 53 | ]) 54 | 55 | 56 | testset = torchvision.datasets.ImageFolder(root='/mnt/ramdisk/ImageNet/fewshot5_train/', transform=transform_test) 57 | testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False,pin_memory=True, num_workers=8) 58 | print('==> Resuming from checkpoint..') 59 | checkpoint = torch.load("pretrainmodel/deit_base_patch16_224-b5f2ef4d.pth", map_location='cpu') 60 | teacher_model = VisionTransformer( 61 | patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, 62 | norm_layer=partial(nn.LayerNorm, eps=1e-6)) 63 | model_dict = teacher_model.state_dict() 64 | new_dict = {} 65 | cnt = 1 66 | for k, v in checkpoint['model'].items(): 67 | cnt += 1 68 | new_dict[k] = v 69 | model_dict.update(new_dict) 70 | teacher_model.load_state_dict(model_dict) 71 | teacher_model.cuda() 72 | teacher_model = torch.nn.DataParallel(teacher_model) 73 | print("=> loaded teacher checkpoint") 74 | 75 | checkpoint = torch.load("pretrainmodel/deit_base_patch16_224-b5f2ef4d.pth", map_location='cpu') 76 | candidate_index = range(768) 77 | results1 = [] 78 | results5 = [] 79 | kls = [] 80 | for delete_ind in candidate_index: 81 | net = attn_VisionTransformer( 82 | patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, 83 | norm_layer=partial(nn.LayerNorm, eps=1e-6), reduce = delete_ind, ind=args.block_ind) 84 | 85 | net.cuda() 86 | net = torch.nn.DataParallel(net) 87 | 88 | model_dict = net.state_dict() 89 | 90 | new_dict = {} 91 | cnt = 1 92 | for k, v in checkpoint['model'].items(): 93 | # print(k,end= ", ") 94 | if "blocks." + str(args.block_ind) + ".attn.qkv.bias" in k: 95 | interval = v.size(0) // 3 96 | new_index = [i not in [delete_ind,delete_ind+interval,delete_ind + 2* interval] for i in torch.arange(v.size(0))] 97 | new_v = v[new_index] 98 | # print(new_v.shape) 99 | new_dict["module." + k] = new_v 100 | elif "blocks." + str(args.block_ind) + ".attn.qkv.weight" in k: 101 | interval = v.size(0) // 3 102 | new_index = [i not in [delete_ind,delete_ind+interval,delete_ind + 2* interval] for i in torch.arange(v.size(0))] 103 | new_v = v[new_index,:] 104 | # print(new_v.shape) 105 | new_dict["module." + k] = new_v 106 | elif "blocks." + str(args.block_ind) + ".attn.proj.weight" in k: 107 | new_v = v[:,torch.arange(v.size(1))!=delete_ind] 108 | # print(new_v.shape) 109 | new_dict["module." + k] = new_v 110 | else: 111 | # print(v.shape) 112 | new_dict["module." + k] = v 113 | model_dict.update(new_dict) 114 | net.load_state_dict(model_dict) 115 | 116 | 117 | batch_time = AverageMeter('Time', ':6.3f') 118 | losses = AverageMeter('Loss', ':.4e') 119 | top1 = AverageMeter('Acc@1', ':6.2f') 120 | top5 = AverageMeter('Acc@5', ':6.2f') 121 | kl = AverageMeter('KL', ':6.3f') 122 | cos = AverageMeter('Cosine', ':6.3f') 123 | 124 | progress = ProgressMeter( 125 | len(testloader), 126 | [batch_time, losses, top1, top5, kl], 127 | prefix='Test: ') 128 | 129 | evaluate = True 130 | if evaluate: 131 | criterion = nn.CrossEntropyLoss() 132 | with torch.no_grad(): 133 | net.eval() 134 | 135 | end = time.time() 136 | for i, (images, target) in enumerate(testloader): 137 | with autocast(): 138 | 139 | images = images.cuda( non_blocking=True) 140 | target = target.cuda( non_blocking=True) 141 | 142 | # compute output 143 | output = net(images) 144 | with torch.no_grad(): 145 | teacher_output, teacher_feature, teacher_patch_output = teacher_model(images) 146 | logsoftmax = torch.nn.LogSoftmax(dim=1).cuda() 147 | softmax = torch.nn.Softmax(dim=1).cuda() 148 | distil_loss = torch.sum( 149 | torch.sum(softmax(teacher_output) * (logsoftmax(teacher_output)-logsoftmax(output)), dim=1)) 150 | 151 | kl.add(distil_loss,images.size(0)) 152 | # measure elapsed time 153 | batch_time.update(time.time() - end) 154 | end = time.time() 155 | 156 | 157 | print(kl.sum.item()) 158 | kls.append(kl.sum.item()) 159 | 160 | with open("importance/kl5k/importance/Deit_base_12_attn_768_kl_" +str(args.block_ind)+ "_5k.txt", 'w') as f: 161 | for s in kls: 162 | f.write(str(s) + '\n') 163 | -------------------------------------------------------------------------------- /utils/loss_utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.backends.cudnn as cudnn 7 | 8 | class DistillationLoss(torch.nn.Module): 9 | """ 10 | This module wraps a standard criterion and adds an extra knowledge distillation loss by 11 | taking a teacher model prediction and using it as additional supervision. 12 | """ 13 | def __init__(self, base_criterion: torch.nn.Module, teacher_model: torch.nn.Module, 14 | distillation_type: str, alpha: float, tau: float): 15 | super().__init__() 16 | self.base_criterion = base_criterion 17 | self.teacher_model = teacher_model 18 | assert distillation_type in ['none', 'soft', 'hard'] 19 | self.distillation_type = distillation_type 20 | self.alpha = alpha 21 | self.tau = tau 22 | 23 | def forward(self, inputs, outputs, labels): 24 | """ 25 | Args: 26 | inputs: The original inputs that are feed to the teacher model 27 | outputs: the outputs of the model to be trained. It is expected to be 28 | either a Tensor, or a Tuple[Tensor, Tensor], with the original output 29 | in the first position and the distillation predictions as the second output 30 | labels: the labels for the base criterion 31 | """ 32 | outputs_kd = None 33 | if not isinstance(outputs, torch.Tensor): 34 | # assume that the model outputs a tuple of [outputs, outputs_kd] 35 | outputs, outputs_kd = outputs 36 | base_loss = self.base_criterion(outputs, labels) 37 | if self.distillation_type == 'none': 38 | return base_loss 39 | 40 | if outputs_kd is None: 41 | raise ValueError("When knowledge distillation is enabled, the model is " 42 | "expected to return a Tuple[Tensor, Tensor] with the output of the " 43 | "class_token and the dist_token") 44 | # don't backprop throught the teacher 45 | with torch.no_grad(): 46 | teacher_outputs = self.teacher_model(inputs) 47 | 48 | if self.distillation_type == 'soft': 49 | T = self.tau 50 | # taken from https://github.com/peterliht/knowledge-distillation-pytorch/blob/master/model/net.py#L100 51 | # with slight modifications 52 | distillation_loss = F.kl_div( 53 | F.log_softmax(outputs_kd / T, dim=1), 54 | F.log_softmax(teacher_outputs / T, dim=1), 55 | reduction='sum', 56 | log_target=True 57 | ) * (T * T) / outputs_kd.numel() 58 | elif self.distillation_type == 'hard': 59 | distillation_loss = F.cross_entropy(outputs_kd, teacher_outputs.argmax(dim=1)) 60 | 61 | loss = base_loss * (1 - self.alpha) + distillation_loss * self.alpha 62 | return loss 63 | 64 | class DistillKL(nn.Module): 65 | """Distilling the Knowledge in a Neural Network""" 66 | def __init__(self, T): 67 | super(DistillKL, self).__init__() 68 | self.T = T 69 | 70 | def forward(self, y_s, y_t): 71 | p_s = F.log_softmax(y_s/self.T, dim=1) 72 | p_t = F.softmax(y_t/self.T, dim=1) 73 | loss = F.kl_div(p_s, p_t, reduction='sum') * (self.T**2) / y_s.shape[0] 74 | return loss 75 | 76 | class LabelSmoothingCrossEntropy(nn.Module): 77 | """ 78 | NLL loss with label smoothing. 79 | """ 80 | def __init__(self, smoothing=0.1): 81 | """ 82 | Constructor for the LabelSmoothing module. 83 | :param smoothing: label smoothing factor 84 | """ 85 | super(LabelSmoothingCrossEntropy, self).__init__() 86 | assert smoothing < 1.0 87 | self.smoothing = smoothing 88 | self.confidence = 1. - smoothing 89 | 90 | def forward(self, x, target): 91 | logprobs = F.log_softmax(x, dim=-1) 92 | nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) 93 | nll_loss = nll_loss.squeeze(1) 94 | smooth_loss = -logprobs.mean(dim=-1) 95 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss 96 | return loss.mean() 97 | 98 | class LSH(nn.Module): 99 | def __init__(self, input_dim, output_dim, std=1.0, with_l2=True, LSH_loss='BCE'): 100 | super(LSH, self).__init__() 101 | self.input_dim = input_dim 102 | self.output_dim = output_dim 103 | self.std = std 104 | self.LSH_loss_type = LSH_loss 105 | 106 | self.LSH_weight = nn.Linear(self.input_dim, self.output_dim, bias=True) 107 | if with_l2: 108 | self.mse_loss = torch.nn.MSELoss(reduction='mean') 109 | else: 110 | self.mse_loss = None 111 | if LSH_loss == 'BCE': 112 | self.LSH_loss = nn.BCEWithLogitsLoss() 113 | elif LSH_loss == 'L2': 114 | self.LSH_loss = torch.nn.MSELoss(reduction='mean') 115 | elif LSH_loss == 'L1': 116 | self.LSH_loss = torch.nn.L1Loss(reduction='mean') 117 | else: 118 | raise NotImplementedError(LSH_loss) 119 | 120 | self._initialize() 121 | 122 | 123 | def _initialize(self): 124 | nn.init.normal_(self.LSH_weight.weight, mean=0.0, std=self.std) 125 | nn.init.constant_(self.LSH_weight.bias, 0) 126 | self.LSH_weight.weight.requires_grad_(False) 127 | self.LSH_weight.bias.requires_grad_(False) 128 | 129 | 130 | def init_bias(self, model_t, train_loader, print_freq=None, use_median=True): 131 | if use_median: 132 | print("=> Init LSH bias by median") 133 | else: 134 | print("=> Init LSH bias by mean") 135 | dataset_size = len(train_loader.dataset) 136 | if use_median: 137 | all_hash_value = torch.zeros(dataset_size, self.output_dim) 138 | else: 139 | mean = torch.zeros(self.output_dim) 140 | 141 | model_t.eval() 142 | 143 | for idx, data in enumerate(train_loader): 144 | input = data[0] 145 | 146 | input = input.float() 147 | if torch.cuda.is_available(): 148 | input = input.cuda() 149 | 150 | # ============= forward ============== 151 | with torch.no_grad(): 152 | feat_t, _ = model_t(input, is_feat=True, preact=False) 153 | feat_t = [f.detach() for f in feat_t] 154 | hash_t = self.LSH_weight(feat_t[-1]) 155 | 156 | if use_median: 157 | index = data[-1] 158 | all_hash_value[index] = hash_t.cpu() 159 | else: 160 | mean += hash_t.sum(0).cpu() / dataset_size 161 | if print_freq is not None: 162 | if idx % print_freq == 0: 163 | print("Init Bias: [{}/{}]".format(idx, len(train_loader))) 164 | 165 | if use_median: 166 | self.LSH_weight.bias.data[:] = - all_hash_value.median(0)[0] 167 | else: 168 | self.LSH_weight.bias.data[:] = - mean 169 | 170 | 171 | def forward(self, f_s, f_t): 172 | if self.mse_loss: 173 | l2_loss = self.mse_loss(f_s, f_t) 174 | else: 175 | l2_loss = 0 176 | hash_s = self.LSH_weight(f_s) 177 | hash_t = self.LSH_weight(f_t) 178 | if self.LSH_loss_type == 'BCE': 179 | pseudo_label = (hash_t > 0).float() 180 | loss = self.LSH_loss(hash_s, pseudo_label) 181 | else: 182 | loss = self.LSH_loss(hash_s, hash_t) 183 | return l2_loss + loss 184 | 185 | -------------------------------------------------------------------------------- /test_neck_importance_score.py: -------------------------------------------------------------------------------- 1 | '''Train CIFAR10 with PyTorch.''' 2 | from __future__ import print_function 3 | 4 | import torch 5 | import torch.nn as nn 6 | from functools import partial 7 | import torch.nn.functional as F 8 | import torch.backends.cudnn as cudnn 9 | from torch.autograd import Variable 10 | 11 | import torchvision 12 | import torchvision.transforms as transforms 13 | 14 | import logging 15 | import argparse 16 | import os 17 | import random 18 | import csv 19 | import numpy as np 20 | import time 21 | 22 | import torch 23 | from torch.cuda.amp import autocast as autocast 24 | from torch.cuda.amp import GradScaler as GradScaler 25 | from tqdm import tqdm 26 | from utils.pred_utils import ProgressMeter, accuracy, AverageMeter 27 | 28 | from models.model import VisionTransformer 29 | from models.neck_importance_score_model import VisionTransformer as neck_VisionTransformer 30 | 31 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 32 | parser.add_argument('--resume_sess', default='vit_default',type=str, help='session id') 33 | parser.add_argument("--img_size", default=256, type=int, 34 | help="Resolution size") 35 | parser.add_argument("--batch_size", default=128, type=int, 36 | help="Total batch size for training.") 37 | parser.add_argument("--delete_ind", default=-1, type=int, 38 | help="The index of delete neck.") 39 | 40 | args = parser.parse_args() 41 | 42 | 43 | use_cuda = torch.cuda.is_available() 44 | batch_size = args.batch_size 45 | if use_cuda: 46 | n_gpu = torch.cuda.device_count() 47 | batch_size *= n_gpu 48 | 49 | # print('==> Preparing data..') 50 | transform_test = transforms.Compose([ 51 | transforms.Resize(256, interpolation=3), 52 | transforms.CenterCrop(224), 53 | transforms.ToTensor(), 54 | transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 55 | ]) 56 | 57 | # testset = torchvision.datasets.ImageFolder(root='../dataset/ImageNet/val/', transform=transform_test) 58 | # testset = torchvision.datasets.ImageFolder(root='/mnt/ramdisk/ImageNet/fewshot_val/', transform=transform_test) 59 | 60 | testset = torchvision.datasets.ImageFolder(root='/mnt/ramdisk/ImageNet/fewshot5_train/', transform=transform_test) 61 | testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False,pin_memory=True, num_workers=8) 62 | 63 | # print('==> Resuming from checkpoint..') 64 | 65 | checkpoint = torch.load("pretrainmodel/deit_base_patch16_224-b5f2ef4d.pth", map_location='cpu') 66 | # for k,v in model_dict.items(): 67 | # print(k,v.size()) 68 | teacher_model = VisionTransformer( 69 | patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, 70 | norm_layer=partial(nn.LayerNorm, eps=1e-6)) 71 | 72 | model_dict = teacher_model.state_dict() 73 | new_dict = {} 74 | cnt = 1 75 | for k, v in checkpoint['model'].items(): 76 | if k in model_dict and v.size()==model_dict[k].size(): 77 | # print('update teacher cnt {} : {}'.format(cnt, k)) 78 | cnt += 1 79 | new_dict[k] = v 80 | model_dict.update(new_dict) 81 | teacher_model.load_state_dict(model_dict) 82 | teacher_model.cuda() 83 | teacher_model = torch.nn.DataParallel(teacher_model) 84 | cudnn.benchmark = True 85 | # print("=> loaded teacher checkpoint") 86 | 87 | # candidate_index = range(768) 88 | results1 = [] 89 | results5 = [] 90 | kls = [] 91 | coss = [] 92 | net = neck_VisionTransformer(patch_size=16, embed_dim=767, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,norm_layer=partial(nn.LayerNorm, eps=1e-6)) 93 | model_dict = net.state_dict() 94 | new_dict = {} 95 | cnt = 1 96 | 97 | candidate_index = range(768) 98 | for delete_ind in candidate_index: 99 | for k, v in checkpoint['model'].items(): 100 | # print(k,end= ", ") 101 | if "qkv.weight" in k: 102 | new_v = v[:,torch.arange(v.size(1))!=delete_ind] 103 | # print(new_v.shape) 104 | new_dict[ k] = new_v 105 | elif "cls_token" in k or "pos_embed" in k: 106 | new_v = v[:,:,torch.arange(v.size(2))!=delete_ind] 107 | # print(new_v.shape) 108 | new_dict[ k] = new_v 109 | elif "patch_embed" in k or "norm" in k or "fc2" in k or "attn.proj" in k: 110 | new_v = v[torch.arange(v.size(0))!=delete_ind] 111 | # print(new_v.shape) 112 | new_dict[ k] = new_v 113 | elif "head.weight" in k or "mlp.fc1.weight" in k: 114 | new_v = v[:,torch.arange(v.size(1))!=delete_ind] 115 | # print(new_v.shape) 116 | new_dict[ k] = new_v 117 | else: 118 | # print(v.shape) 119 | new_dict[ k] = v 120 | 121 | model_dict.update(new_dict) 122 | 123 | net.load_state_dict(model_dict) 124 | net.cuda() 125 | net = torch.nn.DataParallel(net) 126 | 127 | 128 | batch_time = AverageMeter('Time', ':6.3f') 129 | losses = AverageMeter('Loss', ':.4e') 130 | top1 = AverageMeter('Acc@1', ':6.3f') 131 | top5 = AverageMeter('Acc@5', ':6.3f') 132 | kl = AverageMeter('KL', ':6.3f') 133 | cos = AverageMeter('Cosine', ':6.3f') 134 | 135 | progress = ProgressMeter( 136 | len(testloader), 137 | [batch_time, losses, top1, top5], 138 | prefix='Test: ') 139 | 140 | evaluate = True 141 | if evaluate: 142 | criterion = nn.CrossEntropyLoss() 143 | with torch.no_grad(): 144 | net.eval() 145 | teacher_model.eval() 146 | end = time.time() 147 | for i, (images, target) in enumerate(testloader): 148 | with autocast(): 149 | 150 | images = images.cuda( non_blocking=True) 151 | target = target.cuda( non_blocking=True) 152 | 153 | # compute output 154 | output = net(images) 155 | # loss = criterion(output, target) 156 | with torch.no_grad(): 157 | teacher_output, teacher_feature, teacher_patch_output = teacher_model(images) 158 | # cosine_similarity = F.cosine_similarity(output, teacher_output) 159 | # cosine_similarity = torch.sum(cosine_similarity) 160 | # distil_loss = F.mse_loss(feature, teacher_feature) 161 | logsoftmax = torch.nn.LogSoftmax(dim=1).cuda() 162 | softmax = torch.nn.Softmax(dim=1).cuda() 163 | distil_loss = torch.sum( 164 | torch.sum(softmax(teacher_output) * (logsoftmax(teacher_output) - logsoftmax(output)), dim=1)) 165 | # measure accuracy and record loss 166 | # acc1, acc5 = accuracy(output, target, topk=(1, 5)) 167 | # losses.update(loss.item(), images.size(0)) 168 | # top1.update(acc1[0], images.size(0)) 169 | # top5.update(acc5[0], images.size(0)) 170 | kl.add(distil_loss,images.size(0)) 171 | # cos.add(cosine_similarity, images.size(0)) 172 | # measure elapsed time 173 | batch_time.update(time.time() - end) 174 | end = time.time() 175 | 176 | # if i % 10 == 0: 177 | # progress.display(i) 178 | 179 | # TODO: this should also be done with the ProgressMeter 180 | # print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' 181 | # .format(top1=top1, top5=top5)) 182 | # print(cos.avg.item()) 183 | # coss.append(cos.avg.item()) 184 | # results1.append(top1.avg.item()) 185 | # results5.append(top5.avg.item()) 186 | print(kl.sum.item()) 187 | kls.append(kl.sum.item()) 188 | with open("importance/kl5k/importance/Deit_base_12_neck_768_kl_5k.txt", 'a') as f: 189 | for s in kls: 190 | f.write(str(s) + '\n') -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | _ipyw_jlab_nb_ext_conf=0.1.0=py38_0 5 | _libgcc_mutex=0.1=main 6 | addict=2.4.0=pypi_0 7 | alabaster=0.7.12=py_0 8 | anaconda=2020.11=py38_0 9 | anaconda-client=1.7.2=py38_0 10 | anaconda-navigator=1.10.0=py38_0 11 | anaconda-project=0.8.4=py_0 12 | antlr4-python3-runtime=4.8=pypi_0 13 | apex=0.1=pypi_0 14 | argh=0.26.2=py38_0 15 | argon2-cffi=20.1.0=py38h7b6447c_1 16 | asn1crypto=1.4.0=py_0 17 | astroid=2.4.2=py38_0 18 | astropy=4.0.2=py38h7b6447c_0 19 | async_generator=1.10=py_0 20 | atomicwrites=1.4.0=py_0 21 | attrs=20.3.0=pyhd3eb1b0_0 22 | autopep8=1.5.4=py_0 23 | babel=2.8.1=pyhd3eb1b0_0 24 | backcall=0.2.0=py_0 25 | backports=1.0=py_2 26 | backports.functools_lru_cache=1.6.1=py_0 27 | backports.shutil_get_terminal_size=1.0.0=py38_2 28 | backports.tempfile=1.0=py_1 29 | backports.weakref=1.0.post1=py_1 30 | beautifulsoup4=4.9.3=pyhb0f4dca_0 31 | bitarray=1.6.1=py38h27cfd23_0 32 | bkcharts=0.2=py38_0 33 | blas=1.0=mkl 34 | bleach=3.2.1=py_0 35 | blosc=1.20.1=hd408876_0 36 | bokeh=2.2.3=py38_0 37 | boto=2.49.0=py38_0 38 | bottleneck=1.3.2=py38heb32a55_1 39 | brotlipy=0.7.0=py38h7b6447c_1000 40 | bzip2=1.0.8=h7b6447c_0 41 | ca-certificates=2020.10.14=0 42 | cairo=1.14.12=h8948797_3 43 | certifi=2020.6.20=pyhd3eb1b0_3 44 | cffi=1.14.3=py38he30daa8_0 45 | chardet=3.0.4=py38_1003 46 | click=7.1.2=py_0 47 | cloudpickle=1.6.0=py_0 48 | clyent=1.2.2=py38_1 49 | colorama=0.4.4=py_0 50 | conda=4.9.2=py38h06a4308_0 51 | conda-build=3.20.5=py38_1 52 | conda-env=2.6.0=1 53 | conda-package-handling=1.7.2=py38h03888b9_0 54 | conda-verify=3.4.2=py_1 55 | contextlib2=0.6.0.post1=py_0 56 | cryptography=3.1.1=py38h1ba5d50_0 57 | cudatoolkit=11.0.221=h6bb024c_0 58 | cupy-cuda111=10.4.0=pypi_0 59 | curl=7.71.1=hbc83047_1 60 | cycler=0.10.0=py38_0 61 | cython=0.29.21=py38he6710b0_0 62 | cytoolz=0.11.0=py38h7b6447c_0 63 | dask=2.30.0=py_0 64 | dask-core=2.30.0=py_0 65 | dbus=1.13.18=hb2f20db_0 66 | decorator=4.4.2=py_0 67 | defusedxml=0.6.0=py_0 68 | diff-match-patch=20200713=py_0 69 | distributed=2.30.1=py38h06a4308_0 70 | docutils=0.16=py38_1 71 | entrypoints=0.3=py38_0 72 | et_xmlfile=1.0.1=py_1001 73 | expat=2.2.10=he6710b0_2 74 | fairseq=1.0.0a0=dev_0 75 | fastcache=1.1.0=py38h7b6447c_0 76 | fastrlock=0.8=pypi_0 77 | filelock=3.0.12=py_0 78 | flake8=3.8.4=py_0 79 | flask=1.1.2=py_0 80 | fontconfig=2.13.0=h9420a91_0 81 | freetype=2.10.4=h5ab3b9f_0 82 | fribidi=1.0.10=h7b6447c_0 83 | fsspec=0.8.3=py_0 84 | future=0.18.2=py38_1 85 | get_terminal_size=1.0.0=haa9412d_0 86 | gevent=20.9.0=py38h7b6447c_0 87 | glib=2.66.1=h92f7085_0 88 | glob2=0.7=py_0 89 | gmp=6.1.2=h6c8ec71_1 90 | gmpy2=2.0.8=py38hd5f6e3b_3 91 | graphite2=1.3.14=h23475e2_0 92 | greenlet=0.4.17=py38h7b6447c_0 93 | gst-plugins-base=1.14.0=hbbd80ab_1 94 | gstreamer=1.14.0=hb31296c_0 95 | h5py=2.10.0=py38h7918eee_0 96 | harfbuzz=2.4.0=hca77d97_1 97 | hdf5=1.10.4=hb1b8bf9_0 98 | heapdict=1.0.1=py_0 99 | html5lib=1.1=py_0 100 | hydra-core=1.0.7=pypi_0 101 | icu=58.2=he6710b0_3 102 | idna=2.10=py_0 103 | imageio=2.9.0=py_0 104 | imagesize=1.2.0=py_0 105 | importlib-metadata=2.0.0=py_1 106 | importlib-resources=5.4.0=pypi_0 107 | importlib_metadata=2.0.0=1 108 | iniconfig=1.1.1=py_0 109 | intel-openmp=2020.2=254 110 | intervaltree=3.1.0=py_0 111 | ipykernel=5.3.4=py38h5ca1d4c_0 112 | ipython=7.19.0=py38hb070fc8_0 113 | ipython_genutils=0.2.0=py38_0 114 | ipywidgets=7.5.1=py_1 115 | isort=5.6.4=py_0 116 | itsdangerous=1.1.0=py_0 117 | jbig=2.1=hdba287a_0 118 | jdcal=1.4.1=py_0 119 | jedi=0.17.1=py38_0 120 | jeepney=0.5.0=pyhd3eb1b0_0 121 | jinja2=2.11.2=py_0 122 | joblib=0.17.0=py_0 123 | jpeg=9b=h024ee3a_2 124 | json5=0.9.5=py_0 125 | jsonschema=3.2.0=py_2 126 | jupyter=1.0.0=py38_7 127 | jupyter_client=6.1.7=py_0 128 | jupyter_console=6.2.0=py_0 129 | jupyter_core=4.6.3=py38_0 130 | jupyterlab=2.2.6=py_0 131 | jupyterlab_pygments=0.1.2=py_0 132 | jupyterlab_server=1.2.0=py_0 133 | keyring=21.4.0=py38_1 134 | kiwisolver=1.3.0=py38h2531618_0 135 | krb5=1.18.2=h173b8e3_0 136 | lazy-object-proxy=1.4.3=py38h7b6447c_0 137 | lcms2=2.11=h396b838_0 138 | ld_impl_linux-64=2.33.1=h53a641e_7 139 | libarchive=3.4.2=h62408e4_0 140 | libcurl=7.71.1=h20c2e04_1 141 | libedit=3.1.20191231=h14c3975_1 142 | libffi=3.3=he6710b0_2 143 | libgcc-ng=9.1.0=hdf63c60_0 144 | libgfortran-ng=7.3.0=hdf63c60_0 145 | liblief=0.10.1=he6710b0_0 146 | libllvm10=10.0.1=hbcb73fb_5 147 | libpng=1.6.37=hbc83047_0 148 | libsodium=1.0.18=h7b6447c_0 149 | libspatialindex=1.9.3=he6710b0_0 150 | libssh2=1.9.0=h1ba5d50_1 151 | libstdcxx-ng=9.1.0=hdf63c60_0 152 | libtiff=4.1.0=h2733197_1 153 | libtool=2.4.6=h7b6447c_1005 154 | libuuid=1.0.3=h1bed415_2 155 | libuv=1.40.0=h7b6447c_0 156 | libxcb=1.14=h7b6447c_0 157 | libxml2=2.9.10=hb55368b_3 158 | libxslt=1.1.34=hc22bd24_0 159 | llvmlite=0.34.0=py38h269e1b5_4 160 | locket=0.2.0=py38_1 161 | lxml=4.6.1=py38hefd8a0e_0 162 | lz4-c=1.9.2=heb0550a_3 163 | lzo=2.10=h7b6447c_2 164 | markupsafe=1.1.1=py38h7b6447c_0 165 | matplotlib=3.3.2=0 166 | matplotlib-base=3.3.2=py38h817c723_0 167 | mccabe=0.6.1=py38_1 168 | mistune=0.8.4=py38h7b6447c_1000 169 | mkl=2020.2=256 170 | mkl-service=2.3.0=py38he904b0f_0 171 | mkl_fft=1.2.0=py38h23d657b_0 172 | mkl_random=1.1.1=py38h0573a6f_0 173 | mmcv-full=1.3.11=pypi_0 174 | mmpycocotools=12.0.3=pypi_0 175 | mock=4.0.2=py_0 176 | more-itertools=8.6.0=pyhd3eb1b0_0 177 | mpc=1.1.0=h10f8cd9_1 178 | mpfr=4.0.2=hb69a4c5_1 179 | mpmath=1.1.0=py38_0 180 | msgpack-python=1.0.0=py38hfd86e86_1 181 | multipledispatch=0.6.0=py38_0 182 | navigator-updater=0.2.1=py38_0 183 | nbclient=0.5.1=py_0 184 | nbconvert=6.0.7=py38_0 185 | nbformat=5.0.8=py_0 186 | ncurses=6.2=he6710b0_1 187 | nest-asyncio=1.4.2=pyhd3eb1b0_0 188 | networkx=2.5=py_0 189 | ninja=1.10.2=py38hff7bd54_0 190 | nltk=3.5=py_0 191 | nose=1.3.7=py38_2 192 | notebook=6.1.4=py38_0 193 | numba=0.51.2=py38h0573a6f_1 194 | numexpr=2.7.1=py38h423224d_0 195 | numpy=1.22.4=pypi_0 196 | numpydoc=1.1.0=pyhd3eb1b0_1 197 | nvidia-dali-cuda110=1.12.0=pypi_0 198 | olefile=0.46=py_0 199 | omegaconf=2.0.6=pypi_0 200 | opencv-python=4.4.0.46=pypi_0 201 | openpyxl=3.0.5=py_0 202 | openssl=1.1.1h=h7b6447c_0 203 | packaging=20.4=py_0 204 | pandas=1.1.3=py38he6710b0_0 205 | pandoc=2.11=hb0f4dca_0 206 | pandocfilters=1.4.3=py38h06a4308_1 207 | pango=1.45.3=hd140c19_0 208 | parso=0.7.0=py_0 209 | partd=1.1.0=py_0 210 | patchelf=0.12=he6710b0_0 211 | path=15.0.0=py38_0 212 | path.py=12.5.0=0 213 | pathlib2=2.3.5=py38_0 214 | pathtools=0.1.2=py_1 215 | patsy=0.5.1=py38_0 216 | pcre=8.44=he6710b0_0 217 | pep8=1.7.1=py38_0 218 | pexpect=4.8.0=py38_0 219 | pickleshare=0.7.5=py38_1000 220 | pillow=8.0.1=py38he98fc37_0 221 | pip=20.2.4=py38h06a4308_0 222 | pixman=0.40.0=h7b6447c_0 223 | pkginfo=1.6.1=py38h06a4308_0 224 | pluggy=0.13.1=py38_0 225 | ply=3.11=py38_0 226 | portalocker=2.3.2=pypi_0 227 | prometheus_client=0.8.0=py_0 228 | prompt-toolkit=3.0.8=py_0 229 | prompt_toolkit=3.0.8=0 230 | psutil=5.7.2=py38h7b6447c_0 231 | ptflops=0.6.4=pypi_0 232 | ptyprocess=0.6.0=py38_0 233 | py=1.9.0=py_0 234 | py-lief=0.10.1=py38h403a769_0 235 | pycodestyle=2.6.0=py_0 236 | pycosat=0.6.3=py38h7b6447c_1 237 | pycparser=2.20=py_2 238 | pycurl=7.43.0.6=py38h1ba5d50_0 239 | pydocstyle=5.1.1=py_0 240 | pyflakes=2.2.0=py_0 241 | pygments=2.7.2=pyhd3eb1b0_0 242 | pylint=2.6.0=py38_0 243 | pyodbc=4.0.30=py38he6710b0_0 244 | pyopenssl=19.1.0=py_1 245 | pyparsing=2.4.7=py_0 246 | pyqt=5.9.2=py38h05f1152_4 247 | pyrsistent=0.17.3=py38h7b6447c_0 248 | pysocks=1.7.1=py38_0 249 | pytables=3.6.1=py38h9fd0a39_0 250 | pytest=6.1.1=py38_0 251 | python=3.8.5=h7579374_1 252 | python-dateutil=2.8.1=py_0 253 | python-jsonrpc-server=0.4.0=py_0 254 | python-language-server=0.35.1=py_0 255 | python-libarchive-c=2.9=py_0 256 | pytorch=1.7.1=py3.8_cuda11.0.221_cudnn8.0.5_0 257 | pytz=2020.1=py_0 258 | pywavelets=1.1.1=py38h7b6447c_2 259 | pyxdg=0.27=pyhd3eb1b0_0 260 | pyyaml=5.3.1=py38h7b6447c_1 261 | pyzmq=19.0.2=py38he6710b0_1 262 | qdarkstyle=2.8.1=py_0 263 | qt=5.9.7=h5867ecd_1 264 | qtawesome=1.0.1=py_0 265 | qtconsole=4.7.7=py_0 266 | qtpy=1.9.0=py_0 267 | readline=8.0=h7b6447c_0 268 | regex=2020.10.15=py38h7b6447c_0 269 | requests=2.24.0=py_0 270 | ripgrep=12.1.1=0 271 | rope=0.18.0=py_0 272 | rtree=0.9.4=py38_1 273 | ruamel_yaml=0.15.87=py38h7b6447c_1 274 | sacrebleu=2.0.0=pypi_0 275 | scikit-image=0.17.2=py38hdf5156a_0 276 | scikit-learn=0.23.2=py38h0573a6f_0 277 | scipy=1.5.2=py38h0b6359f_0 278 | seaborn=0.11.0=py_0 279 | secretstorage=3.1.2=py38_0 280 | send2trash=1.5.0=py38_0 281 | setuptools=50.3.1=py38h06a4308_1 282 | simplegeneric=0.8.1=py38_2 283 | singledispatch=3.4.0.3=py_1001 284 | sip=4.19.13=py38he6710b0_0 285 | six=1.15.0=py38h06a4308_0 286 | snowballstemmer=2.0.0=py_0 287 | sortedcollections=1.2.1=py_0 288 | sortedcontainers=2.2.2=py_0 289 | soupsieve=2.0.1=py_0 290 | sphinx=3.2.1=py_0 291 | sphinxcontrib=1.0=py38_1 292 | sphinxcontrib-applehelp=1.0.2=py_0 293 | sphinxcontrib-devhelp=1.0.2=py_0 294 | sphinxcontrib-htmlhelp=1.0.3=py_0 295 | sphinxcontrib-jsmath=1.0.1=py_0 296 | sphinxcontrib-qthelp=1.0.3=py_0 297 | sphinxcontrib-serializinghtml=1.1.4=py_0 298 | sphinxcontrib-websupport=1.2.4=py_0 299 | spyder=4.1.5=py38_0 300 | spyder-kernels=1.9.4=py38_0 301 | sqlalchemy=1.3.20=py38h7b6447c_0 302 | sqlite=3.33.0=h62c20be_0 303 | statsmodels=0.12.0=py38h7b6447c_0 304 | sympy=1.6.2=py38h06a4308_1 305 | tabulate=0.8.9=pypi_0 306 | tbb=2020.3=hfd86e86_0 307 | tblib=1.7.0=py_0 308 | termcolor=1.1.0=pypi_0 309 | terminado=0.9.1=py38_0 310 | testpath=0.4.4=py_0 311 | threadpoolctl=2.1.0=pyh5ca1d4c_0 312 | tifffile=2020.10.1=py38hdd07704_2 313 | timm=0.3.4=pypi_0 314 | tk=8.6.10=hbc83047_0 315 | toml=0.10.1=py_0 316 | toolz=0.11.1=py_0 317 | torchaudio=0.10.0=pypi_0 318 | torchvision=0.8.2=py38_cu110 319 | tornado=6.0.4=py38h7b6447c_1 320 | tqdm=4.50.2=py_0 321 | traitlets=5.0.5=py_0 322 | typing_extensions=3.7.4.3=py_0 323 | ujson=4.0.1=py38he6710b0_0 324 | unicodecsv=0.14.1=py38_0 325 | unixodbc=2.3.9=h7b6447c_0 326 | urllib3=1.25.11=py_0 327 | watchdog=0.10.3=py38_0 328 | wcwidth=0.2.5=py_0 329 | webencodings=0.5.1=py38_1 330 | werkzeug=1.0.1=py_0 331 | wheel=0.35.1=py_0 332 | widgetsnbextension=3.5.1=py38_0 333 | wrapt=1.11.2=py38h7b6447c_0 334 | wurlitzer=2.0.1=py38_0 335 | xlrd=1.2.0=py_0 336 | xlsxwriter=1.3.7=py_0 337 | xlwt=1.3.0=py38_0 338 | xmltodict=0.12.0=py_0 339 | xz=5.2.5=h7b6447c_0 340 | yacs=0.1.8=pypi_0 341 | yaml=0.2.5=h7b6447c_0 342 | yapf=0.30.0=py_0 343 | zeromq=4.3.3=he6710b0_3 344 | zict=2.0.0=py_0 345 | zipp=3.4.0=pyhd3eb1b0_0 346 | zlib=1.2.11=h7b6447c_3 347 | zope=1.0=py38_1 348 | zope.event=4.5.0=py38_0 349 | zope.interface=5.1.2=py38h7b6447c_0 350 | zstd=1.4.5=h9ceee32_0 351 | -------------------------------------------------------------------------------- /models/ffn_importance_score_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from functools import partial 4 | 5 | from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 6 | from timm.models.helpers import load_pretrained 7 | from timm.models.layers import DropPath, to_2tuple, trunc_normal_ 8 | from timm.models.resnet import resnet26d, resnet50d 9 | from timm.models.registry import register_model 10 | 11 | class Mlp(nn.Module): 12 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0., reduce_mask = False): 13 | super().__init__() 14 | out_features = out_features or in_features 15 | hidden_features = 3072 16 | if reduce_mask: 17 | hidden_features = 3071 18 | self.fc1 = nn.Linear(in_features, hidden_features) 19 | self.act = act_layer() 20 | self.fc2 = nn.Linear(hidden_features, out_features) 21 | self.drop = nn.Dropout(drop) 22 | 23 | def forward(self, x): 24 | x = self.fc1(x) 25 | x = self.act(x) 26 | x = self.drop(x) 27 | x = self.fc2(x) 28 | x = self.drop(x) 29 | return x 30 | 31 | class Attention(nn.Module): 32 | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): 33 | super().__init__() 34 | self.num_heads = num_heads 35 | self.head_dim = 768 // num_heads 36 | # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights 37 | self.scale = qk_scale or self.head_dim ** -0.5 38 | 39 | self.qkv = nn.Linear(dim, 768 * 3, bias=qkv_bias) 40 | self.attn_drop = nn.Dropout(attn_drop) 41 | self.proj = nn.Linear(768, dim) 42 | self.proj_drop = nn.Dropout(proj_drop) 43 | def forward(self, x): 44 | B, N, C = x.shape 45 | 46 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4) 47 | q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) 48 | 49 | attn = (q @ k.transpose(-2, -1)) * self.scale 50 | attn = attn.softmax(dim=-1) 51 | attn = self.attn_drop(attn) 52 | 53 | x = (attn @ v).transpose(1, 2).reshape(B, N, 768) 54 | x = self.proj(x) 55 | x = self.proj_drop(x) 56 | return x 57 | 58 | 59 | 60 | class Block(nn.Module): 61 | 62 | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., 63 | drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, reduce_mask = False): 64 | super().__init__() 65 | self.norm1 = norm_layer(dim) 66 | self.attn = Attention( 67 | dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) 68 | # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here 69 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 70 | self.norm2 = norm_layer(dim) 71 | mlp_hidden_dim = int(dim * mlp_ratio) 72 | self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop, reduce_mask = reduce_mask) 73 | 74 | def forward(self, x): 75 | x = x + self.drop_path(self.attn(self.norm1(x))) 76 | x = x + self.drop_path(self.mlp(self.norm2(x))) 77 | return x 78 | 79 | 80 | class PatchEmbed(nn.Module): 81 | """ Image to Patch Embedding 82 | """ 83 | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): 84 | super().__init__() 85 | img_size = to_2tuple(img_size) 86 | patch_size = to_2tuple(patch_size) 87 | num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) 88 | self.img_size = img_size 89 | self.patch_size = patch_size 90 | self.num_patches = num_patches 91 | 92 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) 93 | 94 | def forward(self, x): 95 | B, C, H, W = x.shape 96 | # FIXME look at relaxing size constraints 97 | assert H == self.img_size[0] and W == self.img_size[1], \ 98 | f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." 99 | x = self.proj(x).flatten(2).transpose(1, 2) 100 | return x 101 | 102 | 103 | class HybridEmbed(nn.Module): 104 | """ CNN Feature Map Embedding 105 | Extract feature map from CNN, flatten, project to embedding dim. 106 | """ 107 | def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768): 108 | super().__init__() 109 | assert isinstance(backbone, nn.Module) 110 | img_size = to_2tuple(img_size) 111 | self.img_size = img_size 112 | self.backbone = backbone 113 | if feature_size is None: 114 | with torch.no_grad(): 115 | # FIXME this is hacky, but most reliable way of determining the exact dim of the output feature 116 | # map for all networks, the feature metadata has reliable channel and stride info, but using 117 | # stride to calc feature dim requires info about padding of each stage that isn't captured. 118 | training = backbone.training 119 | if training: 120 | backbone.eval() 121 | o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1] 122 | feature_size = o.shape[-2:] 123 | feature_dim = o.shape[1] 124 | backbone.train(training) 125 | else: 126 | feature_size = to_2tuple(feature_size) 127 | feature_dim = self.backbone.feature_info.channels()[-1] 128 | self.num_patches = feature_size[0] * feature_size[1] 129 | self.proj = nn.Linear(feature_dim, embed_dim) 130 | 131 | def forward(self, x): 132 | x = self.backbone(x)[-1] 133 | x = x.flatten(2).transpose(1, 2) 134 | x = self.proj(x) 135 | return x 136 | 137 | 138 | class VisionTransformer(nn.Module): 139 | """ Vision Transformer with support for patch or hybrid CNN input stage 140 | """ 141 | def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12, 142 | num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., 143 | drop_path_rate=0., hybrid_backbone=None, norm_layer=nn.LayerNorm, reduce_ind = None): 144 | super().__init__() 145 | self.num_classes = num_classes 146 | self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models 147 | 148 | if hybrid_backbone is not None: 149 | self.patch_embed = HybridEmbed( 150 | hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim) 151 | else: 152 | self.patch_embed = PatchEmbed( 153 | img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) 154 | num_patches = self.patch_embed.num_patches 155 | 156 | self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) 157 | self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim)) 158 | self.pos_drop = nn.Dropout(p=drop_rate) 159 | 160 | dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule 161 | self.blocks = nn.ModuleList([ 162 | Block( 163 | dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, 164 | drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, reduce_mask = (i == reduce_ind)) 165 | for i in range(depth)]) 166 | self.norm = norm_layer(embed_dim) 167 | 168 | # NOTE as per official impl, we could have a pre-logits representation dense layer + tanh here 169 | #self.repr = nn.Linear(embed_dim, representation_size) 170 | #self.repr_act = nn.Tanh() 171 | 172 | # Classifier head 173 | self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity() 174 | 175 | trunc_normal_(self.pos_embed, std=.02) 176 | trunc_normal_(self.cls_token, std=.02) 177 | self.apply(self._init_weights) 178 | 179 | def _init_weights(self, m): 180 | if isinstance(m, nn.Linear): 181 | trunc_normal_(m.weight, std=.02) 182 | if isinstance(m, nn.Linear) and m.bias is not None: 183 | nn.init.constant_(m.bias, 0) 184 | elif isinstance(m, nn.LayerNorm): 185 | nn.init.constant_(m.bias, 0) 186 | nn.init.constant_(m.weight, 1.0) 187 | 188 | @torch.jit.ignore 189 | def no_weight_decay(self): 190 | return {'pos_embed', 'cls_token'} 191 | 192 | def get_classifier(self): 193 | return self.head 194 | 195 | def reset_classifier(self, num_classes, global_pool=''): 196 | self.num_classes = num_classes 197 | self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() 198 | 199 | def forward_features(self, x): 200 | B = x.shape[0] 201 | x = self.patch_embed(x) 202 | 203 | cls_tokens = self.cls_token.expand(B, -1, -1) # stole cls_tokens impl from Phil Wang, thanks 204 | x = torch.cat((cls_tokens, x), dim=1) 205 | x = x + self.pos_embed 206 | x = self.pos_drop(x) 207 | 208 | blk_output = [] 209 | for blk in self.blocks: 210 | x = blk(x) 211 | blk_output.append(x) 212 | x = self.norm(x) 213 | return x[:, 0], blk_output, x[:, 1:] 214 | 215 | def forward(self, x): 216 | feature, blk_output, patch_output = self.forward_features(x) 217 | x = self.head(feature) 218 | return x 219 | 220 | 221 | def _conv_filter(state_dict, patch_size=16): 222 | """ convert patch embedding weight from manual patchify + linear proj to conv""" 223 | out_dict = {} 224 | for k, v in state_dict.items(): 225 | if 'patch_embed.proj.weight' in k: 226 | v = v.reshape((v.shape[0], 3, patch_size, patch_size)) 227 | out_dict[k] = v 228 | return out_dict -------------------------------------------------------------------------------- /models/neck_importance_score_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from functools import partial 4 | 5 | from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 6 | from timm.models.helpers import load_pretrained 7 | from timm.models.layers import DropPath, to_2tuple, trunc_normal_ 8 | from timm.models.resnet import resnet26d, resnet50d 9 | from timm.models.registry import register_model 10 | 11 | class Mlp(nn.Module): 12 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): 13 | super().__init__() 14 | out_features = out_features or in_features 15 | # hidden_features = hidden_features or in_features 16 | hidden_features = 3072 17 | self.fc1 = nn.Linear(in_features, hidden_features) 18 | self.act = act_layer() 19 | self.fc2 = nn.Linear(hidden_features, out_features) 20 | self.drop = nn.Dropout(drop) 21 | 22 | def forward(self, x): 23 | x = self.fc1(x) 24 | x = self.act(x) 25 | x = self.drop(x) 26 | x = self.fc2(x) 27 | x = self.drop(x) 28 | return x 29 | 30 | 31 | class Attention(nn.Module): 32 | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): 33 | super().__init__() 34 | self.num_heads = num_heads 35 | self.head_dim = 768 // num_heads 36 | # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights 37 | self.scale = qk_scale or self.head_dim ** -0.5 38 | 39 | self.qkv = nn.Linear(dim, 768 * 3, bias=qkv_bias) 40 | self.attn_drop = nn.Dropout(attn_drop) 41 | self.proj = nn.Linear(768, dim) 42 | self.proj_drop = nn.Dropout(proj_drop) 43 | def forward(self, x): 44 | B, N, C = x.shape 45 | 46 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4) 47 | q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) 48 | 49 | attn = (q @ k.transpose(-2, -1)) * self.scale 50 | attn = attn.softmax(dim=-1) 51 | attn = self.attn_drop(attn) 52 | 53 | x = (attn @ v).transpose(1, 2).reshape(B, N, 768) 54 | x = self.proj(x) 55 | x = self.proj_drop(x) 56 | return x, attn 57 | 58 | 59 | class Block(nn.Module): 60 | 61 | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., 62 | drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): 63 | super().__init__() 64 | self.norm1 = norm_layer(dim) 65 | self.attn = Attention( 66 | dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) 67 | # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here 68 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 69 | self.norm2 = norm_layer(dim) 70 | mlp_hidden_dim = int((dim + 1) * mlp_ratio) 71 | self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) 72 | 73 | def forward(self, x): 74 | y, attn_output = self.attn(self.norm1(x)) 75 | x = x + self.drop_path(y) 76 | # print(x.shape) 77 | x = x + self.drop_path(self.mlp(self.norm2(x))) 78 | return x, attn_output 79 | 80 | 81 | class PatchEmbed(nn.Module): 82 | """ Image to Patch Embedding 83 | """ 84 | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): 85 | super().__init__() 86 | img_size = to_2tuple(img_size) 87 | patch_size = to_2tuple(patch_size) 88 | num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) 89 | self.img_size = img_size 90 | self.patch_size = patch_size 91 | self.num_patches = num_patches 92 | 93 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) 94 | 95 | def forward(self, x): 96 | B, C, H, W = x.shape 97 | # FIXME look at relaxing size constraints 98 | assert H == self.img_size[0] and W == self.img_size[1], \ 99 | f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." 100 | x = self.proj(x).flatten(2).transpose(1, 2) 101 | return x 102 | 103 | 104 | class HybridEmbed(nn.Module): 105 | """ CNN Feature Map Embedding 106 | Extract feature map from CNN, flatten, project to embedding dim. 107 | """ 108 | def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768): 109 | super().__init__() 110 | assert isinstance(backbone, nn.Module) 111 | img_size = to_2tuple(img_size) 112 | self.img_size = img_size 113 | self.backbone = backbone 114 | if feature_size is None: 115 | with torch.no_grad(): 116 | # FIXME this is hacky, but most reliable way of determining the exact dim of the output feature 117 | # map for all networks, the feature metadata has reliable channel and stride info, but using 118 | # stride to calc feature dim requires info about padding of each stage that isn't captured. 119 | training = backbone.training 120 | if training: 121 | backbone.eval() 122 | o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1] 123 | feature_size = o.shape[-2:] 124 | feature_dim = o.shape[1] 125 | backbone.train(training) 126 | else: 127 | feature_size = to_2tuple(feature_size) 128 | feature_dim = self.backbone.feature_info.channels()[-1] 129 | self.num_patches = feature_size[0] * feature_size[1] 130 | self.proj = nn.Linear(feature_dim, embed_dim) 131 | 132 | def forward(self, x): 133 | x = self.backbone(x)[-1] 134 | x = x.flatten(2).transpose(1, 2) 135 | x = self.proj(x) 136 | return x 137 | 138 | 139 | class VisionTransformer(nn.Module): 140 | """ Vision Transformer with support for patch or hybrid CNN input stage 141 | """ 142 | def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12, 143 | num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., 144 | drop_path_rate=0., hybrid_backbone=None, norm_layer=nn.LayerNorm): 145 | super().__init__() 146 | self.num_classes = num_classes 147 | self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models 148 | 149 | if hybrid_backbone is not None: 150 | self.patch_embed = HybridEmbed( 151 | hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim) 152 | else: 153 | self.patch_embed = PatchEmbed( 154 | img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) 155 | num_patches = self.patch_embed.num_patches 156 | 157 | self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) 158 | self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim)) 159 | self.pos_drop = nn.Dropout(p=drop_rate) 160 | 161 | 162 | dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule 163 | self.blocks = nn.ModuleList([ 164 | Block( 165 | dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, 166 | drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer) 167 | for i in range(depth)]) 168 | self.norm = norm_layer(embed_dim) 169 | 170 | # NOTE as per official impl, we could have a pre-logits representation dense layer + tanh here 171 | #self.repr = nn.Linear(embed_dim, representation_size) 172 | #self.repr_act = nn.Tanh() 173 | 174 | # Classifier head 175 | self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity() 176 | 177 | trunc_normal_(self.pos_embed, std=.02) 178 | trunc_normal_(self.cls_token, std=.02) 179 | self.apply(self._init_weights) 180 | 181 | def _init_weights(self, m): 182 | if isinstance(m, nn.Linear): 183 | trunc_normal_(m.weight, std=.02) 184 | if isinstance(m, nn.Linear) and m.bias is not None: 185 | nn.init.constant_(m.bias, 0) 186 | elif isinstance(m, nn.LayerNorm): 187 | nn.init.constant_(m.bias, 0) 188 | nn.init.constant_(m.weight, 1.0) 189 | 190 | @torch.jit.ignore 191 | def no_weight_decay(self): 192 | return {'pos_embed', 'cls_token'} 193 | 194 | def get_classifier(self): 195 | return self.head 196 | 197 | def reset_classifier(self, num_classes, global_pool=''): 198 | self.num_classes = num_classes 199 | self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() 200 | 201 | def forward_features(self, x): 202 | B = x.shape[0] 203 | x = self.patch_embed(x) 204 | 205 | cls_tokens = self.cls_token.expand(B, -1, -1) # stole cls_tokens impl from Phil Wang, thanks 206 | x = torch.cat((cls_tokens, x), dim=1) 207 | x = x + self.pos_embed 208 | x = self.pos_drop(x) 209 | # print(x.shape) 210 | count = 0 211 | attn_output = [] 212 | attn_mask = torch.zeros(x.size(0), dtype=int) 213 | for blk in self.blocks: 214 | y = x.clone() 215 | x, attn = blk(x) 216 | attn_output.append(attn) 217 | x = self.norm(x) 218 | return x[:, 0], attn_output, count 219 | 220 | def forward(self, x): 221 | feature, attn_output, count = self.forward_features(x) 222 | x = self.head(feature) 223 | return x 224 | 225 | 226 | def _conv_filter(state_dict, patch_size=16): 227 | """ convert patch embedding weight from manual patchify + linear proj to conv""" 228 | out_dict = {} 229 | for k, v in state_dict.items(): 230 | if 'patch_embed.proj.weight' in k: 231 | v = v.reshape((v.shape[0], 3, patch_size, patch_size)) 232 | out_dict[k] = v 233 | return out_dict -------------------------------------------------------------------------------- /models/block_importance_score_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from functools import partial 4 | 5 | from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 6 | from timm.models.helpers import load_pretrained 7 | from timm.models.layers import DropPath, to_2tuple, trunc_normal_ 8 | from timm.models.resnet import resnet26d, resnet50d 9 | from timm.models.registry import register_model 10 | 11 | 12 | class Mlp(nn.Module): 13 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): 14 | super().__init__() 15 | out_features = out_features or in_features 16 | hidden_features = hidden_features or in_features 17 | self.fc1 = nn.Linear(in_features, hidden_features) 18 | self.act = act_layer() 19 | self.fc2 = nn.Linear(hidden_features, out_features) 20 | self.drop = nn.Dropout(drop) 21 | 22 | def forward(self, x): 23 | x = self.fc1(x) 24 | x = self.act(x) 25 | x = self.drop(x) 26 | x = self.fc2(x) 27 | x = self.drop(x) 28 | return x 29 | 30 | 31 | class Attention(nn.Module): 32 | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): 33 | super().__init__() 34 | self.num_heads = num_heads 35 | head_dim = dim // num_heads 36 | # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights 37 | self.scale = qk_scale or head_dim ** -0.5 38 | 39 | self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) 40 | self.attn_drop = nn.Dropout(attn_drop) 41 | self.proj = nn.Linear(dim, dim) 42 | self.proj_drop = nn.Dropout(proj_drop) 43 | 44 | def forward(self, x): 45 | B, N, C = x.shape 46 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) 47 | q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) 48 | 49 | attn = (q @ k.transpose(-2, -1)) * self.scale 50 | attn = attn.softmax(dim=-1) 51 | attn = self.attn_drop(attn) 52 | # print(attn.shape) 53 | x = (attn @ v).transpose(1, 2).reshape(B, N, C) 54 | x = self.proj(x) 55 | x = self.proj_drop(x) 56 | return x, attn 57 | 58 | 59 | class Block(nn.Module): 60 | 61 | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., 62 | drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, delete_ind= None, index = None): 63 | super().__init__() 64 | self.norm1 = norm_layer(dim) 65 | self.attn = Attention( 66 | dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) 67 | # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here 68 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 69 | self.norm2 = norm_layer(dim) 70 | mlp_hidden_dim = int(dim * mlp_ratio) 71 | self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) 72 | self.delete_ind = delete_ind 73 | self.index = index 74 | def forward(self, x): 75 | if self.delete_ind != 2* self.index and self.delete_ind != 2* self.index-1: 76 | y, attn_output = self.attn(self.norm1(x)) 77 | x = x + self.drop_path(y) 78 | # print(x.shape) 79 | if self.delete_ind != 2* self.index and self.delete_ind != 2* self.index + 1: 80 | x = x + self.drop_path(self.mlp(self.norm2(x))) 81 | return x 82 | 83 | 84 | class PatchEmbed(nn.Module): 85 | """ Image to Patch Embedding 86 | """ 87 | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): 88 | super().__init__() 89 | img_size = to_2tuple(img_size) 90 | patch_size = to_2tuple(patch_size) 91 | num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0]) 92 | self.img_size = img_size 93 | self.patch_size = patch_size 94 | self.num_patches = num_patches 95 | 96 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) 97 | 98 | def forward(self, x): 99 | B, C, H, W = x.shape 100 | # FIXME look at relaxing size constraints 101 | assert H == self.img_size[0] and W == self.img_size[1], \ 102 | f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." 103 | x = self.proj(x).flatten(2).transpose(1, 2) 104 | return x 105 | 106 | 107 | class HybridEmbed(nn.Module): 108 | """ CNN Feature Map Embedding 109 | Extract feature map from CNN, flatten, project to embedding dim. 110 | """ 111 | def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768): 112 | super().__init__() 113 | assert isinstance(backbone, nn.Module) 114 | img_size = to_2tuple(img_size) 115 | self.img_size = img_size 116 | self.backbone = backbone 117 | if feature_size is None: 118 | with torch.no_grad(): 119 | # FIXME this is hacky, but most reliable way of determining the exact dim of the output feature 120 | # map for all networks, the feature metadata has reliable channel and stride info, but using 121 | # stride to calc feature dim requires info about padding of each stage that isn't captured. 122 | training = backbone.training 123 | if training: 124 | backbone.eval() 125 | o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1] 126 | feature_size = o.shape[-2:] 127 | feature_dim = o.shape[1] 128 | backbone.train(training) 129 | else: 130 | feature_size = to_2tuple(feature_size) 131 | feature_dim = self.backbone.feature_info.channels()[-1] 132 | self.num_patches = feature_size[0] * feature_size[1] 133 | self.proj = nn.Linear(feature_dim, embed_dim) 134 | 135 | def forward(self, x): 136 | x = self.backbone(x)[-1] 137 | x = x.flatten(2).transpose(1, 2) 138 | x = self.proj(x) 139 | return x 140 | 141 | 142 | class VisionTransformer(nn.Module): 143 | """ Vision Transformer with support for patch or hybrid CNN input stage 144 | """ 145 | def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12, 146 | num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., 147 | drop_path_rate=0., hybrid_backbone=None, norm_layer=nn.LayerNorm, delete_ind= None): 148 | super().__init__() 149 | self.num_classes = num_classes 150 | self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models 151 | 152 | if hybrid_backbone is not None: 153 | self.patch_embed = HybridEmbed( 154 | hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim) 155 | else: 156 | self.patch_embed = PatchEmbed( 157 | img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) 158 | num_patches = self.patch_embed.num_patches 159 | 160 | self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) 161 | self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim)) 162 | self.pos_drop = nn.Dropout(p=drop_rate) 163 | 164 | dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule 165 | self.blocks = nn.ModuleList([ 166 | Block( 167 | dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, 168 | drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, delete_ind= delete_ind, index = i) 169 | for i in range(depth)]) 170 | self.norm = norm_layer(embed_dim) 171 | 172 | # NOTE as per official impl, we could have a pre-logits representation dense layer + tanh here 173 | #self.repr = nn.Linear(embed_dim, representation_size) 174 | #self.repr_act = nn.Tanh() 175 | 176 | # Classifier head 177 | self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity() 178 | 179 | trunc_normal_(self.pos_embed, std=.02) 180 | trunc_normal_(self.cls_token, std=.02) 181 | self.apply(self._init_weights) 182 | 183 | def _init_weights(self, m): 184 | if isinstance(m, nn.Linear): 185 | trunc_normal_(m.weight, std=.02) 186 | if isinstance(m, nn.Linear) and m.bias is not None: 187 | nn.init.constant_(m.bias, 0) 188 | elif isinstance(m, nn.LayerNorm): 189 | nn.init.constant_(m.bias, 0) 190 | nn.init.constant_(m.weight, 1.0) 191 | 192 | @torch.jit.ignore 193 | def no_weight_decay(self): 194 | return {'pos_embed', 'cls_token'} 195 | 196 | def get_classifier(self): 197 | return self.head 198 | 199 | def reset_classifier(self, num_classes, global_pool=''): 200 | self.num_classes = num_classes 201 | self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() 202 | 203 | def forward_features(self, x): 204 | B = x.shape[0] 205 | x = self.patch_embed(x) 206 | 207 | cls_tokens = self.cls_token.expand(B, -1, -1) # stole cls_tokens impl from Phil Wang, thanks 208 | x = torch.cat((cls_tokens, x), dim=1) 209 | x = x + self.pos_embed 210 | x = self.pos_drop(x) 211 | 212 | for blk in self.blocks: 213 | x = blk(x) 214 | x = self.norm(x) 215 | return x[:, 0] 216 | 217 | def forward(self, x): 218 | feature = self.forward_features(x) 219 | x = self.head(feature) 220 | return x 221 | 222 | 223 | def _conv_filter(state_dict, patch_size=16): 224 | """ convert patch embedding weight from manual patchify + linear proj to conv""" 225 | out_dict = {} 226 | for k, v in state_dict.items(): 227 | if 'patch_embed.proj.weight' in k: 228 | v = v.reshape((v.shape[0], 3, patch_size, patch_size)) 229 | out_dict[k] = v 230 | return out_dict --------------------------------------------------------------------------------