├── .gitignore ├── CV ├── README.md ├── figures │ ├── Supernet.pdf │ ├── Supernet.png │ ├── vis_arch_v1.pdf │ ├── vis_arch_v1.png │ ├── vis_arch_v2.pdf │ └── vis_arch_v2.png ├── plot_supernet.py ├── retraining_hybrid │ ├── boss_candidates │ │ ├── __init__.py │ │ ├── adder │ │ │ ├── add2.py │ │ │ ├── adder.py │ │ │ ├── adder_cuda.cpp │ │ │ ├── adder_cuda_kernel.cu │ │ │ ├── adder_slow.py │ │ │ ├── check.py │ │ │ ├── jit.py │ │ │ ├── quantize.py │ │ │ └── setup.py │ │ ├── bot_op.py │ │ ├── deepshift │ │ │ ├── __init__.py │ │ │ ├── convert.py │ │ │ ├── kernels │ │ │ │ ├── __init__.py │ │ │ │ ├── cpu │ │ │ │ │ ├── setup.py │ │ │ │ │ └── shift_cpu.cpp │ │ │ │ ├── cuda │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── convert_to_unoptimized.py │ │ │ │ │ ├── setup.py │ │ │ │ │ ├── shift.cu │ │ │ │ │ ├── shift_cuda.cpp │ │ │ │ │ ├── unoptimized_conv.py │ │ │ │ │ ├── unoptimized_cuda.cpp │ │ │ │ │ ├── unoptimized_cuda_kernel.cu │ │ │ │ │ └── unoptimized_linear.py │ │ │ │ └── kernels.py │ │ │ ├── modules.py │ │ │ ├── modules_q.py │ │ │ ├── ste.py │ │ │ └── utils.py │ │ └── resnet_op.py │ ├── boss_models.py │ ├── boss_models_Q.py │ ├── cal_energy.py │ ├── cal_flops.py │ ├── datasets.py │ ├── engine.py │ ├── hw_record │ │ ├── bossnet_T0_mix_v3_distilled-224 │ │ │ ├── add.csv │ │ │ ├── conv.csv │ │ │ ├── linear.csv │ │ │ ├── matmul.csv │ │ │ └── shift.csv │ │ ├── bossnet_T0_mix_v3_distilled-256 │ │ │ ├── add.csv │ │ │ ├── conv.csv │ │ │ ├── linear.csv │ │ │ ├── matmul.csv │ │ │ └── shift.csv │ │ ├── bossnet_T0_mix_v3_distilled-288 │ │ │ ├── add.csv │ │ │ ├── conv.csv │ │ │ ├── linear.csv │ │ │ ├── matmul.csv │ │ │ └── shift.csv │ │ ├── bossnet_T0_mix_v4_distilled_Q-224 │ │ │ ├── conv.csv │ │ │ ├── linear.csv │ │ │ ├── matmul.csv │ │ │ └── shfit_q.csv │ │ ├── bossnet_T0_mix_v4_distilled_Q-256 │ │ │ ├── conv.csv │ │ │ ├── linear.csv │ │ │ ├── matmul.csv │ │ │ └── shfit_q.csv │ │ ├── bossnet_T1_mix_v3_distilled-224 │ │ │ ├── add.csv │ │ │ ├── conv.csv │ │ │ ├── linear.csv │ │ │ ├── matmul.csv │ │ │ └── shift.csv │ │ ├── bossnet_T1_mix_v3_distilled_Q-256 │ │ │ ├── add.csv │ │ │ ├── conv.csv │ │ │ ├── linear.csv │ │ │ ├── matmul.csv │ │ │ └── shfit_q.csv │ │ ├── bossnet_T1_mix_v4_distilled_Q-224 │ │ │ ├── conv.csv │ │ │ ├── linear.csv │ │ │ ├── matmul.csv │ │ │ └── shfit_q.csv │ │ └── bossnet_T1_mix_v4_distilled_Q-256 │ │ │ ├── conv.csv │ │ │ ├── linear.csv │ │ │ ├── matmul.csv │ │ │ └── shfit_q.csv │ ├── hw_utils.py │ ├── inference.py │ ├── losses.py │ ├── main.py │ ├── main_Q.py │ ├── plot_dist.py │ ├── run_with_submitit.py │ ├── samplers.py │ ├── test.py │ └── utils.py ├── searching_v1 │ ├── arch_ranking.py │ ├── bossnas │ │ ├── __init__.py │ │ ├── apis │ │ │ ├── __init__.py │ │ │ └── train.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ └── bossnas_data.py │ │ ├── hooks │ │ │ ├── __init__.py │ │ │ ├── optimizer_hook.py │ │ │ ├── path_hook.py │ │ │ └── val_hook.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── operations │ │ │ │ ├── __init__.py │ │ │ │ ├── adder │ │ │ │ │ ├── add2.py │ │ │ │ │ ├── adder.py │ │ │ │ │ ├── adder_cuda.cpp │ │ │ │ │ ├── adder_cuda_kernel.cu │ │ │ │ │ ├── adder_slow.py │ │ │ │ │ ├── check.py │ │ │ │ │ ├── jit.py │ │ │ │ │ ├── quantize.py │ │ │ │ │ └── setup.py │ │ │ │ ├── deepshift │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── convert.py │ │ │ │ │ ├── kernels │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── cpu │ │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ │ └── shift_cpu.cpp │ │ │ │ │ │ ├── cuda │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── convert_to_unoptimized.py │ │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ │ ├── shift.cu │ │ │ │ │ │ │ ├── shift_cuda.cpp │ │ │ │ │ │ │ ├── unoptimized_conv.py │ │ │ │ │ │ │ ├── unoptimized_cuda.cpp │ │ │ │ │ │ │ ├── unoptimized_cuda_kernel.cu │ │ │ │ │ │ │ └── unoptimized_linear.py │ │ │ │ │ │ └── kernels.py │ │ │ │ │ ├── modules.py │ │ │ │ │ ├── modules_q.py │ │ │ │ │ ├── ste.py │ │ │ │ │ └── utils.py │ │ │ │ ├── hytra_ops.py │ │ │ │ ├── hytra_ops_ws.py │ │ │ │ ├── hytra_ops_ws_dist.py │ │ │ │ ├── mbconv_ops.py │ │ │ │ ├── nats_ops.py │ │ │ │ └── operation_dict.py │ │ │ ├── siamese_supernets │ │ │ │ ├── __init__.py │ │ │ │ ├── boss_necks.py │ │ │ │ ├── siamese_supernets_hytra.py │ │ │ │ ├── siamese_supernets_mbconv.py │ │ │ │ └── siamese_supernets_nats.py │ │ │ ├── supernets │ │ │ │ ├── __init__.py │ │ │ │ ├── hytra_supernet.py │ │ │ │ ├── hytra_supernet_ws.py │ │ │ │ ├── hytra_supernet_ws_dist.py │ │ │ │ ├── mbconv_supernet.py │ │ │ │ └── nats_supernet.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── hytra_paths.py │ │ └── runners │ │ │ ├── __init__.py │ │ │ └── multi_stage_runner.py │ ├── cifar.py │ ├── configs │ │ ├── base.py │ │ ├── hytra.py │ │ ├── hytra_bs64_accumulate8_ep6_multi_aug_gpus8.py │ │ ├── hytra_bs64_accumulate8_ep6_multi_aug_gpus8_full.py │ │ ├── mbconv_bs64_accumulate8_ep6_multi_aug_gpus8.py │ │ ├── nats_c100_bs256_accumulate4_gpus4.py │ │ └── nats_c10_bs256_accumulate4_gpus4.py │ ├── datasets.py │ ├── engine.py │ ├── evo_configs.npy │ ├── plot.py │ ├── plot_dist_search.py │ ├── plot_dist_search_ws.py │ ├── plot_laplace.py │ ├── samplers.py │ ├── search.py │ ├── train.py │ ├── train_hy.py │ ├── train_hy_no_ema.py │ ├── utils.py │ └── work_dirs │ │ └── cifar100 │ │ └── Supernet_v1-20210818-133106 │ │ ├── log.txt │ │ ├── plot.py │ │ ├── trajectory.pdf │ │ └── trajectory.svg ├── searching_v2 │ ├── arch_ranking.py │ ├── bossnas │ │ ├── __init__.py │ │ ├── apis │ │ │ ├── __init__.py │ │ │ └── train.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ └── bossnas_data.py │ │ ├── hooks │ │ │ ├── __init__.py │ │ │ ├── optimizer_hook.py │ │ │ ├── path_hook.py │ │ │ └── val_hook.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── operations │ │ │ │ ├── __init__.py │ │ │ │ ├── adder │ │ │ │ │ ├── add2.py │ │ │ │ │ ├── adder.py │ │ │ │ │ ├── adder_cuda.cpp │ │ │ │ │ ├── adder_cuda_kernel.cu │ │ │ │ │ ├── adder_slow.py │ │ │ │ │ ├── check.py │ │ │ │ │ ├── jit.py │ │ │ │ │ ├── quantize.py │ │ │ │ │ └── setup.py │ │ │ │ ├── deepshift │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── convert.py │ │ │ │ │ ├── kernels │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── cpu │ │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ │ └── shift_cpu.cpp │ │ │ │ │ │ ├── cuda │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── convert_to_unoptimized.py │ │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ │ ├── shift.cu │ │ │ │ │ │ │ ├── shift_cuda.cpp │ │ │ │ │ │ │ ├── unoptimized_conv.py │ │ │ │ │ │ │ ├── unoptimized_cuda.cpp │ │ │ │ │ │ │ ├── unoptimized_cuda_kernel.cu │ │ │ │ │ │ │ └── unoptimized_linear.py │ │ │ │ │ │ └── kernels.py │ │ │ │ │ ├── modules.py │ │ │ │ │ ├── modules_q.py │ │ │ │ │ ├── ste.py │ │ │ │ │ └── utils.py │ │ │ │ ├── hytra_ops.py │ │ │ │ ├── hytra_ops_ws.py │ │ │ │ ├── hytra_ops_ws_dist.py │ │ │ │ ├── mbconv_ops.py │ │ │ │ ├── nats_ops.py │ │ │ │ └── operation_dict.py │ │ │ ├── siamese_supernets │ │ │ │ ├── __init__.py │ │ │ │ ├── boss_necks.py │ │ │ │ ├── siamese_supernets_hytra.py │ │ │ │ ├── siamese_supernets_mbconv.py │ │ │ │ └── siamese_supernets_nats.py │ │ │ ├── supernets │ │ │ │ ├── __init__.py │ │ │ │ ├── hytra_supernet.py │ │ │ │ ├── hytra_supernet_ws.py │ │ │ │ ├── hytra_supernet_ws_dist.py │ │ │ │ ├── mbconv_supernet.py │ │ │ │ └── nats_supernet.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── hytra_paths.py │ │ └── runners │ │ │ ├── __init__.py │ │ │ └── multi_stage_runner.py │ ├── cifar.py │ ├── comp_search_kl.py │ ├── comp_search_ws.py │ ├── configs │ │ ├── base.py │ │ ├── hytra.py │ │ ├── hytra_bs64_accumulate8_ep6_multi_aug_gpus8.py │ │ ├── hytra_bs64_accumulate8_ep6_multi_aug_gpus8_full.py │ │ ├── mbconv_bs64_accumulate8_ep6_multi_aug_gpus8.py │ │ ├── nats_c100_bs256_accumulate4_gpus4.py │ │ └── nats_c10_bs256_accumulate4_gpus4.py │ ├── datasets.py │ ├── engine.py │ ├── evo_configs.npy │ ├── plot_dist_search.py │ ├── plot_dist_search_ws.py │ ├── plot_dist_search_ws_dist.py │ ├── plot_laplace.py │ ├── samplers.py │ ├── search.py │ ├── train.py │ ├── train_hy.py │ ├── train_hy_no_ema.py │ ├── utils.py │ └── work_dirs │ │ ├── cifar100 │ │ └── Supernet_v2-20210820-220728 │ │ │ ├── log.txt │ │ │ ├── plot.py │ │ │ ├── trajectory.pdf │ │ │ └── trajectory.svg │ │ ├── corr.pdf │ │ └── corr.svg ├── searching_v3 │ ├── arch_ranking.py │ ├── bossnas │ │ ├── __init__.py │ │ ├── apis │ │ │ ├── __init__.py │ │ │ └── train.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ └── bossnas_data.py │ │ ├── hooks │ │ │ ├── __init__.py │ │ │ ├── optimizer_hook.py │ │ │ ├── path_hook.py │ │ │ └── val_hook.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── operations │ │ │ │ ├── __init__.py │ │ │ │ ├── adder │ │ │ │ │ ├── add2.py │ │ │ │ │ ├── adder.py │ │ │ │ │ ├── adder_cuda.cpp │ │ │ │ │ ├── adder_cuda_kernel.cu │ │ │ │ │ ├── adder_slow.py │ │ │ │ │ ├── check.py │ │ │ │ │ ├── jit.py │ │ │ │ │ ├── quantize.py │ │ │ │ │ └── setup.py │ │ │ │ ├── deepshift │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── convert.py │ │ │ │ │ ├── kernels │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── cpu │ │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ │ └── shift_cpu.cpp │ │ │ │ │ │ ├── cuda │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── convert_to_unoptimized.py │ │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ │ ├── shift.cu │ │ │ │ │ │ │ ├── shift_cuda.cpp │ │ │ │ │ │ │ ├── unoptimized_conv.py │ │ │ │ │ │ │ ├── unoptimized_cuda.cpp │ │ │ │ │ │ │ ├── unoptimized_cuda_kernel.cu │ │ │ │ │ │ │ └── unoptimized_linear.py │ │ │ │ │ │ └── kernels.py │ │ │ │ │ ├── modules.py │ │ │ │ │ ├── modules_q.py │ │ │ │ │ ├── ste.py │ │ │ │ │ └── utils.py │ │ │ │ ├── hytra_ops.py │ │ │ │ ├── hytra_ops_ws.py │ │ │ │ ├── hytra_ops_ws_dist.py │ │ │ │ ├── mbconv_ops.py │ │ │ │ ├── nats_ops.py │ │ │ │ └── operation_dict.py │ │ │ ├── siamese_supernets │ │ │ │ ├── __init__.py │ │ │ │ ├── boss_necks.py │ │ │ │ ├── siamese_supernets_hytra.py │ │ │ │ ├── siamese_supernets_mbconv.py │ │ │ │ └── siamese_supernets_nats.py │ │ │ ├── supernets │ │ │ │ ├── __init__.py │ │ │ │ ├── hytra_supernet.py │ │ │ │ ├── hytra_supernet_ws.py │ │ │ │ ├── hytra_supernet_ws_dist.py │ │ │ │ ├── mbconv_supernet.py │ │ │ │ └── nats_supernet.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── hytra_paths.py │ │ └── runners │ │ │ ├── __init__.py │ │ │ └── multi_stage_runner.py │ ├── cifar.py │ ├── comp_search_kl.py │ ├── comp_search_now.py │ ├── comp_search_ws.py │ ├── configs │ │ ├── base.py │ │ ├── hytra.py │ │ ├── hytra_bs64_accumulate8_ep6_multi_aug_gpus8.py │ │ ├── hytra_bs64_accumulate8_ep6_multi_aug_gpus8_full.py │ │ ├── mbconv_bs64_accumulate8_ep6_multi_aug_gpus8.py │ │ ├── nats_c100_bs256_accumulate4_gpus4.py │ │ └── nats_c10_bs256_accumulate4_gpus4.py │ ├── datasets.py │ ├── engine.py │ ├── evo_configs.npy │ ├── plot_dist_search.py │ ├── plot_dist_search_ws.py │ ├── plot_dist_search_ws_dist.py │ ├── plot_laplace.py │ ├── samplers.py │ ├── search.py │ ├── train.py │ ├── train_hy.py │ ├── train_hy_no_ema.py │ ├── utils.py │ └── work_dirs │ │ ├── cifar100 │ │ └── Supernet_v3-20210820-231347 │ │ │ ├── log.txt │ │ │ ├── plot.py │ │ │ ├── trajectory.pdf │ │ │ └── trajectory.svg │ │ ├── corr_v1_v2.pdf │ │ ├── corr_v1_v2.svg │ │ ├── corr_v1_v3.pdf │ │ ├── corr_v1_v3.svg │ │ ├── corr_v2_v3.pdf │ │ └── corr_v2_v3.svg ├── searching_v4 │ ├── arch_ranking.py │ ├── bossnas │ │ ├── __init__.py │ │ ├── apis │ │ │ ├── __init__.py │ │ │ └── train.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ └── bossnas_data.py │ │ ├── hooks │ │ │ ├── __init__.py │ │ │ ├── optimizer_hook.py │ │ │ ├── path_hook.py │ │ │ └── val_hook.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── operations │ │ │ │ ├── __init__.py │ │ │ │ ├── adder │ │ │ │ │ ├── add2.py │ │ │ │ │ ├── adder.py │ │ │ │ │ ├── adder_cuda.cpp │ │ │ │ │ ├── adder_cuda_kernel.cu │ │ │ │ │ ├── adder_slow.py │ │ │ │ │ ├── check.py │ │ │ │ │ ├── jit.py │ │ │ │ │ ├── quantize.py │ │ │ │ │ └── setup.py │ │ │ │ ├── deepshift │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── convert.py │ │ │ │ │ ├── kernels │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── cpu │ │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ │ └── shift_cpu.cpp │ │ │ │ │ │ ├── cuda │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── convert_to_unoptimized.py │ │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ │ ├── shift.cu │ │ │ │ │ │ │ ├── shift_cuda.cpp │ │ │ │ │ │ │ ├── unoptimized_conv.py │ │ │ │ │ │ │ ├── unoptimized_cuda.cpp │ │ │ │ │ │ │ ├── unoptimized_cuda_kernel.cu │ │ │ │ │ │ │ └── unoptimized_linear.py │ │ │ │ │ │ └── kernels.py │ │ │ │ │ ├── modules.py │ │ │ │ │ ├── modules_q.py │ │ │ │ │ ├── ste.py │ │ │ │ │ └── utils.py │ │ │ │ ├── hytra_ops.py │ │ │ │ ├── hytra_ops_ws.py │ │ │ │ ├── hytra_ops_ws_dist.py │ │ │ │ ├── mbconv_ops.py │ │ │ │ ├── nats_ops.py │ │ │ │ └── operation_dict.py │ │ │ ├── siamese_supernets │ │ │ │ ├── __init__.py │ │ │ │ ├── boss_necks.py │ │ │ │ ├── siamese_supernets_hytra.py │ │ │ │ ├── siamese_supernets_mbconv.py │ │ │ │ └── siamese_supernets_nats.py │ │ │ ├── supernets │ │ │ │ ├── __init__.py │ │ │ │ ├── hytra_supernet.py │ │ │ │ ├── hytra_supernet_ws.py │ │ │ │ ├── hytra_supernet_ws_dist.py │ │ │ │ ├── mbconv_supernet.py │ │ │ │ └── nats_supernet.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── hytra_paths.py │ │ └── runners │ │ │ ├── __init__.py │ │ │ └── multi_stage_runner.py │ ├── cifar.py │ ├── comp_search_ws.py │ ├── configs │ │ ├── base.py │ │ ├── hytra.py │ │ ├── hytra_bs64_accumulate8_ep6_multi_aug_gpus8.py │ │ ├── hytra_bs64_accumulate8_ep6_multi_aug_gpus8_full.py │ │ ├── mbconv_bs64_accumulate8_ep6_multi_aug_gpus8.py │ │ ├── nats_c100_bs256_accumulate4_gpus4.py │ │ └── nats_c10_bs256_accumulate4_gpus4.py │ ├── datasets.py │ ├── engine.py │ ├── evo_configs.npy │ ├── plot_dist_search.py │ ├── plot_dist_search_ws.py │ ├── plot_dist_search_ws_dist.py │ ├── plot_laplace.py │ ├── samplers.py │ ├── search.py │ ├── train.py │ ├── train_hy.py │ ├── train_hy_no_ema.py │ ├── utils.py │ └── work_dirs │ │ ├── cifar100 │ │ └── Supernet_v4-20210826-193803 │ │ │ ├── log.txt │ │ │ ├── plot.py │ │ │ ├── trajectory.pdf │ │ │ └── trajectory.svg │ │ ├── corr_v1_v4.pdf │ │ └── corr_v1_v4.svg └── searching_v5 │ ├── arch_ranking.py │ ├── bossnas │ ├── __init__.py │ ├── apis │ │ ├── __init__.py │ │ └── train.py │ ├── datasets │ │ ├── __init__.py │ │ └── bossnas_data.py │ ├── hooks │ │ ├── __init__.py │ │ ├── optimizer_hook.py │ │ ├── path_hook.py │ │ └── val_hook.py │ ├── models │ │ ├── __init__.py │ │ ├── operations │ │ │ ├── __init__.py │ │ │ ├── adder │ │ │ │ ├── add2.py │ │ │ │ ├── adder.py │ │ │ │ ├── adder_cuda.cpp │ │ │ │ ├── adder_cuda_kernel.cu │ │ │ │ ├── adder_slow.py │ │ │ │ ├── check.py │ │ │ │ ├── jit.py │ │ │ │ ├── quantize.py │ │ │ │ └── setup.py │ │ │ ├── deepshift │ │ │ │ ├── __init__.py │ │ │ │ ├── convert.py │ │ │ │ ├── kernels │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── cpu │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ └── shift_cpu.cpp │ │ │ │ │ ├── cuda │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── convert_to_unoptimized.py │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ ├── shift.cu │ │ │ │ │ │ ├── shift_cuda.cpp │ │ │ │ │ │ ├── unoptimized_conv.py │ │ │ │ │ │ ├── unoptimized_cuda.cpp │ │ │ │ │ │ ├── unoptimized_cuda_kernel.cu │ │ │ │ │ │ └── unoptimized_linear.py │ │ │ │ │ └── kernels.py │ │ │ │ ├── modules.py │ │ │ │ ├── modules_q.py │ │ │ │ ├── ste.py │ │ │ │ └── utils.py │ │ │ ├── hytra_ops.py │ │ │ ├── hytra_ops_ws.py │ │ │ ├── hytra_ops_ws_dist.py │ │ │ ├── hytra_ops_ws_dist_Q.py │ │ │ ├── mbconv_ops.py │ │ │ ├── nats_ops.py │ │ │ └── operation_dict.py │ │ ├── siamese_supernets │ │ │ ├── __init__.py │ │ │ ├── boss_necks.py │ │ │ ├── siamese_supernets_hytra.py │ │ │ ├── siamese_supernets_mbconv.py │ │ │ └── siamese_supernets_nats.py │ │ ├── supernets │ │ │ ├── __init__.py │ │ │ ├── hytra_supernet.py │ │ │ ├── hytra_supernet_ws.py │ │ │ ├── hytra_supernet_ws_dist.py │ │ │ ├── mbconv_supernet.py │ │ │ └── nats_supernet.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── hytra_paths.py │ └── runners │ │ ├── __init__.py │ │ └── multi_stage_runner.py │ ├── cal_params_flops │ ├── boss_candidates │ │ ├── __init__.py │ │ ├── adder │ │ │ ├── add2.py │ │ │ ├── adder.py │ │ │ ├── adder_cuda.cpp │ │ │ ├── adder_cuda_kernel.cu │ │ │ ├── adder_slow.py │ │ │ ├── check.py │ │ │ ├── jit.py │ │ │ ├── quantize.py │ │ │ └── setup.py │ │ ├── bot_op.py │ │ ├── deepshift │ │ │ ├── __init__.py │ │ │ ├── convert.py │ │ │ ├── kernels │ │ │ │ ├── __init__.py │ │ │ │ ├── cpu │ │ │ │ │ ├── setup.py │ │ │ │ │ └── shift_cpu.cpp │ │ │ │ ├── cuda │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── convert_to_unoptimized.py │ │ │ │ │ ├── setup.py │ │ │ │ │ ├── shift.cu │ │ │ │ │ ├── shift_cuda.cpp │ │ │ │ │ ├── unoptimized_conv.py │ │ │ │ │ ├── unoptimized_cuda.cpp │ │ │ │ │ ├── unoptimized_cuda_kernel.cu │ │ │ │ │ └── unoptimized_linear.py │ │ │ │ └── kernels.py │ │ │ ├── modules.py │ │ │ ├── modules_q.py │ │ │ ├── ste.py │ │ │ └── utils.py │ │ └── resnet_op.py │ ├── boss_models.py │ ├── boss_models_Q.py │ ├── cal_flops.py │ ├── datasets.py │ ├── engine.py │ ├── main.py │ ├── main_Q.py │ ├── plot_dist.py │ ├── samplers.py │ ├── test.py │ └── utils.py │ ├── cifar.py │ ├── comp_search_now.py │ ├── configs │ ├── base.py │ ├── hytra.py │ ├── hytra_bs64_accumulate8_ep6_multi_aug_gpus8.py │ ├── hytra_bs64_accumulate8_ep6_multi_aug_gpus8_full.py │ ├── mbconv_bs64_accumulate8_ep6_multi_aug_gpus8.py │ ├── nats_c100_bs256_accumulate4_gpus4.py │ └── nats_c10_bs256_accumulate4_gpus4.py │ ├── datasets.py │ ├── engine.py │ ├── evo_configs.npy │ ├── plot_dist_search.py │ ├── plot_dist_search_ws.py │ ├── plot_dist_search_ws_dist.py │ ├── plot_laplace.py │ ├── samplers.py │ ├── search.py │ ├── search_real.py │ ├── train.py │ ├── train_hy.py │ ├── train_hy_no_ema.py │ ├── utils.py │ └── work_dirs │ ├── ImageNet_100 │ └── Supernet_v5-20210914-172555 │ │ └── log.txt │ ├── cifar100 │ └── Supernet_v5-20210902-220001 │ │ └── log.txt │ ├── corr_v2_v5.pdf │ └── corr_v2_v5.svg ├── LICENSE ├── NLP ├── en-de │ ├── README.md │ ├── average_checkpoints.py │ ├── build_modules.sh │ ├── checkpoints │ │ └── wmt14.en-de │ │ │ └── subtransformer │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@100ms_update │ │ │ └── record.txt │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@120ms_update │ │ │ └── record.txt │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@150ms_update │ │ │ └── record.txt │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@180ms_update │ │ │ └── record.txt │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms_update │ │ │ └── record.txt │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_update │ │ │ └── record.txt │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_update │ │ │ └── record.txt │ │ │ └── wmt14ende_gpu_V100_shiftadd_v3@350ms_update │ │ │ └── record.txt │ ├── config.npy │ ├── configs │ │ ├── .DS_Store │ │ ├── iwslt14.de-en │ │ │ ├── .DS_Store │ │ │ ├── average_checkpoint.sh │ │ │ ├── evo_search │ │ │ │ └── iwslt14deen_titanxp.yml │ │ │ ├── get_preprocessed.sh │ │ │ ├── latency_dataset │ │ │ │ └── gpu_titanxp.yml │ │ │ ├── latency_predictor │ │ │ │ └── gpu_titanxp.yml │ │ │ ├── preprocess.sh │ │ │ ├── subtransformer │ │ │ │ ├── HAT_iwslt14deen_titanxp@109.0ms_bleu@34.5.yml │ │ │ │ ├── HAT_iwslt14deen_titanxp@137.8ms_bleu@34.7.yml │ │ │ │ ├── HAT_iwslt14deen_titanxp@168.8ms_bleu@34.8.yml │ │ │ │ ├── HAT_iwslt14deen_titanxp@45.6ms_bleu@33.4.yml │ │ │ │ ├── HAT_iwslt14deen_titanxp@74.5ms_bleu@34.2.yml │ │ │ │ └── common.yml │ │ │ ├── supertransformer │ │ │ │ └── space1.yml │ │ │ └── test.sh │ │ ├── wmt14.en-de │ │ │ ├── .DS_Store │ │ │ ├── evo_search │ │ │ │ ├── evo_search_100ms_update.out │ │ │ │ ├── evo_search_120ms_update.out │ │ │ │ ├── wmt14ende_2080Ti_shiftadd.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v2_100.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v2_120.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v2_150.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v2_180.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v2_200.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v2_250.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v2_300.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v2_400.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_100_update.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_120_update.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_150_update.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_180_update.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_200.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_200_update.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_250.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_250_update.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_300.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_300_update.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_350_update.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_400_update.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_500.yml │ │ │ │ ├── wmt14ende_V100_shiftadd_v3_80_update.yml │ │ │ │ ├── wmt14ende_raspberrypi.yml │ │ │ │ ├── wmt14ende_titanxp.yml │ │ │ │ └── wmt14ende_xeon.yml │ │ │ ├── get_preprocessed.sh │ │ │ ├── latency_dataset │ │ │ │ ├── .DS_Store │ │ │ │ ├── cpu_raspberrypi.yml │ │ │ │ ├── cpu_xeon.yml │ │ │ │ ├── cpu_xeon_shiftadd.yml │ │ │ │ ├── gpu_2080Ti_shiftadd.yml │ │ │ │ ├── gpu_V100_shiftadd_v2.yml │ │ │ │ ├── gpu_V100_shiftadd_v3.yml │ │ │ │ └── gpu_titanxp.yml │ │ │ ├── latency_predictor │ │ │ │ ├── cpu_raspberrypi.yml │ │ │ │ ├── cpu_xeon.yml │ │ │ │ ├── gpu_2080Ti_shiftadd.yml │ │ │ │ ├── gpu_V100_shiftadd_v2.yml │ │ │ │ ├── gpu_V100_shiftadd_v3.yml │ │ │ │ └── gpu_titanxp.yml │ │ │ ├── preprocess.sh │ │ │ ├── subtransformer │ │ │ │ ├── .DS_Store │ │ │ │ ├── HAT_wmt14ende_raspberrypi@3.5s_bleu@25.8.yml │ │ │ │ ├── HAT_wmt14ende_raspberrypi@4.0s_bleu@26.9.yml │ │ │ │ ├── HAT_wmt14ende_raspberrypi@4.5s_bleu@27.6.yml │ │ │ │ ├── HAT_wmt14ende_raspberrypi@5.0s_bleu@27.8.yml │ │ │ │ ├── HAT_wmt14ende_raspberrypi@6.0s_bleu@28.2.yml │ │ │ │ ├── HAT_wmt14ende_raspberrypi@6.9s_bleu@28.4.yml │ │ │ │ ├── HAT_wmt14ende_titanxp@126.0ms_bleu@27.9.yml │ │ │ │ ├── HAT_wmt14ende_titanxp@146.7ms_bleu@28.1.yml │ │ │ │ ├── HAT_wmt14ende_titanxp@208.1ms_bleu@28.5.yml │ │ │ │ ├── HAT_wmt14ende_titanxp@57.1ms_bleu@25.8.yml │ │ │ │ ├── HAT_wmt14ende_titanxp@91.2ms_bleu@27.6.yml │ │ │ │ ├── HAT_wmt14ende_xeon@137.9ms_bleu@25.8.yml │ │ │ │ ├── HAT_wmt14ende_xeon@204.2ms_bleu@27.6.yml │ │ │ │ ├── HAT_wmt14ende_xeon@278.7ms_bleu@27.9.yml │ │ │ │ ├── HAT_wmt14ende_xeon@340.2ms_bleu@28.1.yml │ │ │ │ ├── HAT_wmt14ende_xeon@369.6ms_bleu@28.2.yml │ │ │ │ ├── HAT_wmt14ende_xeon@450.9ms_bleu@28.5.yml │ │ │ │ ├── common.yml │ │ │ │ ├── common_shiftadd.yml │ │ │ │ ├── common_shiftadd_v2.yml │ │ │ │ ├── common_shiftadd_v3.yml │ │ │ │ ├── common_shiftadd_v3_quant.yml │ │ │ │ ├── common_shiftadd_v3_recipe2.yml │ │ │ │ ├── common_test_ckpt.yml │ │ │ │ ├── sample_200.yml │ │ │ │ ├── sample_evo.yml │ │ │ │ ├── sample_test.yml │ │ │ │ ├── sample_test_1.yml │ │ │ │ ├── sample_test_2.yml │ │ │ │ ├── sample_test_3.yml │ │ │ │ ├── sample_test_4.yml │ │ │ │ ├── sample_test_5.yml │ │ │ │ ├── sample_test_6.yml │ │ │ │ ├── sample_test_de_1.yml │ │ │ │ ├── sample_test_de_2.yml │ │ │ │ ├── sample_test_de_3.yml │ │ │ │ ├── sample_test_de_multi_1.yml │ │ │ │ ├── sample_test_de_multi_2.yml │ │ │ │ ├── sample_test_de_multi_3.yml │ │ │ │ ├── train_subtransformer_2.sh │ │ │ │ ├── transformer_base.yml │ │ │ │ ├── transformer_scale_down.yml │ │ │ │ ├── wmt14ende@200ms_all_attention.yml │ │ │ │ ├── wmt14ende_gpu_2080Ti_shiftadd@200ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@100ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@100ms_all_attention.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@100ms_all_conv.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@120ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@120ms_all_attention.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@120ms_all_conv.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@150ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@150ms_all_attention.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@150ms_all_conv.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@180ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@180ms_all_attention.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@180ms_all_conv.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@200ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@200ms_all_attention.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@200ms_all_conv.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@250ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@300ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@400ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@100ms_update.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@120ms_update.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@150ms_update.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@180ms_update.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms_Jun.29.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms_Jun.29_quant.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms_quant.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms_update.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_Jun.29.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_Jun.29_quant.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_new.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_update.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_update_shift_ffn_v1.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jul.12.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jul.12_quant.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jul.23.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jul.28.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jun.29.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jun.29_quant.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jun.29_quant_cyclic.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_new.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_update.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_Jul.13.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_Jul.13_quant.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_Jul.28.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_Jul.28_shift_ffn_v3.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_Jul.8.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_update.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@400ms_Jul.8.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@400ms_update.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@500ms.yml │ │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3_big.yml │ │ │ │ ├── wmt14ende_shiftadd_v2@200ms.yml │ │ │ │ ├── wmt14ende_shiftadd_v2@200ms_all_attention.yml │ │ │ │ ├── wmt14ende_test_ckpt.yml │ │ │ │ └── wmt14ende_titanxp@200ms.yml │ │ │ ├── supertransformer │ │ │ │ ├── .DS_Store │ │ │ │ ├── space0.yml │ │ │ │ ├── space_shiftadd.yml │ │ │ │ ├── space_shiftadd_act_1.yml │ │ │ │ ├── space_shiftadd_v2_act_1.yml │ │ │ │ └── space_shiftadd_v3_act_1.yml │ │ │ └── test.sh │ │ ├── wmt14.en-fr │ │ │ ├── .DS_Store │ │ │ ├── evo_search │ │ │ │ ├── wmt14enfr_raspberrypi.yml │ │ │ │ ├── wmt14enfr_titanxp.yml │ │ │ │ └── wmt14enfr_xeon.yml │ │ │ ├── get_preprocessed.sh │ │ │ ├── latency_dataset │ │ │ │ ├── cpu_raspberrypi.yml │ │ │ │ ├── cpu_xeon.yml │ │ │ │ └── gpu_titanxp.yml │ │ │ ├── latency_predictor │ │ │ │ ├── cpu_raspberrypi.yml │ │ │ │ ├── cpu_xeon.yml │ │ │ │ └── gpu_titanxp.yml │ │ │ ├── preprocess.sh │ │ │ ├── subtransformer │ │ │ │ ├── HAT_wmt14enfr_raspberrypi@4.3s_bleu@38.8.yml │ │ │ │ ├── HAT_wmt14enfr_raspberrypi@5.3s_bleu@40.1.yml │ │ │ │ ├── HAT_wmt14enfr_raspberrypi@5.8s_bleu@40.6.yml │ │ │ │ ├── HAT_wmt14enfr_raspberrypi@6.9s_bleu@41.1.yml │ │ │ │ ├── HAT_wmt14enfr_raspberrypi@7.8s_bleu@41.4.yml │ │ │ │ ├── HAT_wmt14enfr_raspberrypi@9.1s_bleu@41.8.yml │ │ │ │ ├── HAT_wmt14enfr_titanxp@132.9ms_bleu@40.7.yml │ │ │ │ ├── HAT_wmt14enfr_titanxp@168.3ms_bleu@41.1.yml │ │ │ │ ├── HAT_wmt14enfr_titanxp@208.3ms_bleu@41.7.yml │ │ │ │ ├── HAT_wmt14enfr_titanxp@69.3ms_bleu@39.1.yml │ │ │ │ ├── HAT_wmt14enfr_titanxp@94.9ms_bleu@40.0.yml │ │ │ │ ├── HAT_wmt14enfr_xeon@154.7ms_bleu@39.1.yml │ │ │ │ ├── HAT_wmt14enfr_xeon@208.8ms_bleu@40.0.yml │ │ │ │ ├── HAT_wmt14enfr_xeon@329.4ms_bleu@41.1.yml │ │ │ │ ├── HAT_wmt14enfr_xeon@394.5ms_bleu@41.4.yml │ │ │ │ ├── HAT_wmt14enfr_xeon@442.0ms_bleu@41.7.yml │ │ │ │ └── common.yml │ │ │ ├── supertransformer │ │ │ │ └── space0.yml │ │ │ └── test.sh │ │ └── wmt19.en-de │ │ │ ├── evo_search │ │ │ └── wmt19ende_titanxp.yml │ │ │ ├── get_preprocessed.sh │ │ │ ├── latency_dataset │ │ │ └── gpu_titanxp.yml │ │ │ ├── latency_predictor │ │ │ └── gpu_titanxp.yml │ │ │ ├── preprocess.sh │ │ │ ├── subtransformer │ │ │ ├── HAT_wmt19ende_titanxp@134.5ms_bleu@45.4.yml │ │ │ ├── HAT_wmt19ende_titanxp@176.1ms_bleu@46.2.yml │ │ │ ├── HAT_wmt19ende_titanxp@204.5ms_bleu@46.5.yml │ │ │ ├── HAT_wmt19ende_titanxp@237.8ms_bleu@46.7.yml │ │ │ ├── HAT_wmt19ende_titanxp@55.7ms_bleu@42.4.yml │ │ │ ├── HAT_wmt19ende_titanxp@93.2ms_bleu@44.4.yml │ │ │ └── common.yml │ │ │ ├── supertransformer │ │ │ └── space0.yml │ │ │ └── test.sh │ ├── deepshift │ │ ├── __init__.py │ │ ├── convert.py │ │ ├── kernels │ │ │ ├── __init__.py │ │ │ ├── cpu │ │ │ │ ├── setup.py │ │ │ │ └── shift_cpu.cpp │ │ │ ├── cuda │ │ │ │ ├── __init__.py │ │ │ │ ├── convert_to_unoptimized.py │ │ │ │ ├── setup.py │ │ │ │ ├── shift.cu │ │ │ │ ├── shift_cuda.cpp │ │ │ │ ├── unoptimized_conv.py │ │ │ │ ├── unoptimized_cuda.cpp │ │ │ │ ├── unoptimized_cuda_kernel.cu │ │ │ │ └── unoptimized_linear.py │ │ │ └── kernels.py │ │ ├── modules.py │ │ ├── modules_q.py │ │ ├── quantize.py │ │ ├── ste.py │ │ └── utils.py │ ├── download_model.py │ ├── env.sh │ ├── evo_searach │ │ ├── evo_search_100ms_update.out │ │ ├── evo_search_120ms_update.out │ │ ├── evo_search_150ms_update.out │ │ ├── evo_search_180ms_update.out │ │ ├── evo_search_200ms_update.out │ │ ├── evo_search_250ms_update.out │ │ ├── evo_search_300ms_update.out │ │ ├── evo_search_350ms_update.out │ │ ├── evo_search_400ms_update.out │ │ └── evo_search_80ms_update.out │ ├── evo_search.py │ ├── fairseq │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── binarizer.py │ │ ├── bleu.py │ │ ├── checkpoint_utils.py │ │ ├── clib │ │ │ └── libbleu │ │ │ │ ├── libbleu.cpp │ │ │ │ └── module.cpp │ │ ├── criterions │ │ │ ├── __init__.py │ │ │ ├── cross_entropy.py │ │ │ ├── fairseq_criterion.py │ │ │ └── label_smoothed_cross_entropy.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── base_wrapper_dataset.py │ │ │ ├── concat_dataset.py │ │ │ ├── data_utils.py │ │ │ ├── data_utils_fast.cpp │ │ │ ├── data_utils_fast.pyx │ │ │ ├── dictionary.py │ │ │ ├── distill_dataset.py │ │ │ ├── encoders │ │ │ │ ├── __init__.py │ │ │ │ ├── fastbpe.py │ │ │ │ ├── gpt2_bpe.py │ │ │ │ ├── gpt2_bpe_utils.py │ │ │ │ ├── hf_bert_bpe.py │ │ │ │ ├── moses_tokenizer.py │ │ │ │ ├── nltk_tokenizer.py │ │ │ │ ├── sentencepiece_bpe.py │ │ │ │ ├── space_tokenizer.py │ │ │ │ └── subword_nmt_bpe.py │ │ │ ├── fairseq_dataset.py │ │ │ ├── id_dataset.py │ │ │ ├── indexed_dataset.py │ │ │ ├── iterators.py │ │ │ ├── language_pair_dataset.py │ │ │ ├── strip_token_dataset.py │ │ │ ├── token_block_utils_fast.c │ │ │ ├── token_block_utils_fast.pyx │ │ │ └── truncate_dataset.py │ │ ├── distributed_utils.py │ │ ├── evolution.py │ │ ├── fed_utils.py │ │ ├── file_utils.py │ │ ├── hub_utils.py │ │ ├── init.py │ │ ├── legacy_distributed_data_parallel.py │ │ ├── meters.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── distributed_fairseq_model.py │ │ │ ├── fairseq_decoder.py │ │ │ ├── fairseq_encoder.py │ │ │ ├── fairseq_incremental_decoder.py │ │ │ ├── fairseq_model.py │ │ │ ├── transformer.py │ │ │ ├── transformer_super.py │ │ │ ├── transformer_super_shiftadd.py │ │ │ └── transformer_super_shiftadd_v2.py │ │ ├── modules │ │ │ ├── .DS_Store │ │ │ ├── __init__.py │ │ │ ├── adaptive_softmax.py │ │ │ ├── cuda_utils.cu │ │ │ ├── embedding_super.py │ │ │ ├── gelu.py │ │ │ ├── layer_norm.py │ │ │ ├── learned_positional_embedding.py │ │ │ ├── lightadd_layer │ │ │ │ ├── __init__.py │ │ │ │ ├── cuda_function_gen.py │ │ │ │ ├── lightadd_cuda.cpp │ │ │ │ ├── lightadd_cuda.cuh │ │ │ │ ├── lightadd_cuda_backward.cu │ │ │ │ ├── lightadd_cuda_forward.cu │ │ │ │ ├── lightadd_cuda_kernel.cu │ │ │ │ ├── lightadd_cuda_kernel_new.cu │ │ │ │ ├── lightadd_cuda_kernel_prev.cu │ │ │ │ ├── lightadd_layer.py │ │ │ │ ├── quantize.py │ │ │ │ └── setup.py │ │ │ ├── lightconv_layer │ │ │ │ ├── __init__.py │ │ │ │ ├── cuda_function_gen.py │ │ │ │ ├── lightconv_cuda.cpp │ │ │ │ ├── lightconv_cuda.cuh │ │ │ │ ├── lightconv_cuda_backward.cu │ │ │ │ ├── lightconv_cuda_forward.cu │ │ │ │ ├── lightconv_cuda_kernel.cu │ │ │ │ ├── lightconv_layer.py │ │ │ │ ├── quantize.py │ │ │ │ └── setup.py │ │ │ ├── lightshiftadd_layer │ │ │ │ ├── __init__.py │ │ │ │ ├── cuda_function_gen.py │ │ │ │ ├── lightshiftadd_cuda.cpp │ │ │ │ ├── lightshiftadd_cuda.cuh │ │ │ │ ├── lightshiftadd_cuda_backward.cu │ │ │ │ ├── lightshiftadd_cuda_forward.cu │ │ │ │ ├── lightshiftadd_cuda_kernel.cu │ │ │ │ ├── lightshiftadd_cuda_kernel_new.cu │ │ │ │ ├── lightshiftadd_cuda_kernel_prev.cu │ │ │ │ ├── lightshiftadd_layer.py │ │ │ │ ├── quantize.py │ │ │ │ └── setup.py │ │ │ ├── lightweight_convolution.py │ │ │ ├── linear_super.py │ │ │ ├── multibranch.py │ │ │ ├── multihead_attention.py │ │ │ ├── multihead_attention_super.py │ │ │ ├── positional_embedding.py │ │ │ ├── quantize.py │ │ │ ├── sinusoidal_positional_embedding.py │ │ │ ├── transformer_layer.py │ │ │ ├── transformer_sentence_encoder.py │ │ │ ├── transformer_sentence_encoder_layer.py │ │ │ └── unfold.py │ │ ├── optim │ │ │ ├── __init__.py │ │ │ ├── adam.py │ │ │ ├── bmuf.py │ │ │ ├── fairseq_optimizer.py │ │ │ ├── fp16_optimizer.py │ │ │ ├── lr_scheduler │ │ │ │ ├── __init__.py │ │ │ │ ├── cosine_lr_scheduler.py │ │ │ │ ├── fairseq_lr_scheduler.py │ │ │ │ ├── fixed_schedule.py │ │ │ │ ├── inverse_square_root_schedule.py │ │ │ │ ├── polynomial_decay_schedule.py │ │ │ │ ├── reduce_lr_on_plateau.py │ │ │ │ ├── tri_stage_lr_scheduler.py │ │ │ │ └── triangular_lr_scheduler.py │ │ │ └── nag.py │ │ ├── options.py │ │ ├── progress_bar.py │ │ ├── registry.py │ │ ├── search.py │ │ ├── sequence_generator.py │ │ ├── sequence_scorer.py │ │ ├── tasks │ │ │ ├── __init__.py │ │ │ ├── fairseq_task.py │ │ │ └── translation.py │ │ ├── tokenizer.py │ │ ├── trainer.py │ │ └── utils.py │ ├── figures │ │ ├── BLEUs.pdf │ │ ├── BLEUs.png │ │ ├── config_2.npy │ │ ├── config_200.npy │ │ ├── config_2000.npy │ │ ├── energy.pdf │ │ ├── energy.png │ │ ├── latency.pdf │ │ ├── latency.png │ │ ├── latency_2.npy │ │ ├── latency_200.npy │ │ ├── latency_2000.npy │ │ ├── loss_2.npy │ │ ├── loss_200.npy │ │ ├── loss_2000.npy │ │ ├── loss_vs_lat_200.png │ │ ├── loss_vs_lat_2000.png │ │ ├── loss_vs_lat_2010.png │ │ ├── plot_energy.py │ │ ├── plot_latency.py │ │ ├── plot_params_flops.py │ │ ├── pred_vs_real.pdf │ │ ├── trajectory_350.pdf │ │ ├── trajectory_350.svg │ │ ├── trajectory_comp.pdf │ │ ├── trajectory_comp.png │ │ ├── trajectory_comp.svg │ │ ├── weight_vis.svg │ │ └── weight_vis_300.svg │ ├── generate.py │ ├── get_loss.py │ ├── last_bleus.txt │ ├── latency.npy │ ├── latency_dataset.py │ ├── latency_dataset │ │ ├── iwslt14deen_gpu_titanxp_all.csv │ │ ├── predictors │ │ │ ├── wmt14ende_gpu_2080Ti_shiftadd.pt │ │ │ └── wmt14ende_gpu_V100_shiftadd_v3.pt │ │ ├── wmt14ende_cpu_raspberrypi.csv │ │ ├── wmt14ende_cpu_raspberrypi_all.csv │ │ ├── wmt14ende_cpu_xeon_all.csv │ │ ├── wmt14ende_cpu_xeon_shiftadd.csv │ │ ├── wmt14ende_gpu_2080Ti_shiftadd.csv │ │ ├── wmt14ende_gpu_titanxp_all.csv │ │ ├── wmt14enfr_cpu_raspberrypi_all.csv │ │ ├── wmt14enfr_cpu_xeon_all.csv │ │ ├── wmt14enfr_gpu_titanxp_all.csv │ │ └── wmt19ende_gpu_titanxp_all.csv │ ├── latency_predictor.py │ ├── loss.npy │ ├── plot.py │ ├── plot_dist.py │ ├── plot_pred_real.py │ ├── plot_trajectory_300.py │ ├── plot_trajectory_350.py │ ├── plot_trajectory_aug_14.py │ ├── preprocess.py │ ├── record.txt │ ├── score.py │ ├── setup.py │ ├── sim │ │ ├── 100ms_Aug_14_decoder_32bits.csv │ │ ├── 100ms_Aug_14_encoder_32bits.csv │ │ ├── 120ms_Aug_14_decoder_32bits.csv │ │ ├── 120ms_Aug_14_encoder_32bits.csv │ │ ├── 150ms_Aug_14_decoder_32bits.csv │ │ ├── 150ms_Aug_14_encoder_32bits.csv │ │ ├── 180ms_Aug_14_decoder_32bits.csv │ │ ├── 180ms_Aug_14_encoder_32bits.csv │ │ ├── 200ms_Aug_4_decoder_32bits.csv │ │ ├── 200ms_Aug_4_encoder_32bits.csv │ │ ├── 250ms_Aug_4_decoder_32bits.csv │ │ ├── 250ms_Aug_4_encoder_32bits.csv │ │ ├── 300ms_Aug_4_encoder_32bits.csv │ │ ├── 300ms_Jul_23_decoder_32bits.csv │ │ ├── 300ms_Jul_23_encoder_32bits.csv │ │ ├── 300ms_decoder.csv │ │ ├── 300ms_decoder_32bits.csv │ │ ├── 300ms_decoder_8bits.csv │ │ ├── 300ms_encoder.csv │ │ ├── 300ms_encoder_32bits.csv │ │ ├── 300ms_encoder_8bits.csv │ │ ├── 350ms_Aug_14_decoder_32bits.csv │ │ ├── 350ms_Aug_14_encoder_32bits.csv │ │ ├── 350ms_decoder_32bits.csv │ │ ├── 350ms_decoder_32bits_all_shift.csv │ │ ├── 350ms_decoder_32bits_shift_ffn_v1.csv │ │ ├── 350ms_decoder_8bits.csv │ │ ├── 350ms_encoder_32bits.csv │ │ ├── 350ms_encoder_32bits_all_shift.csv │ │ ├── 350ms_encoder_32bits_shift_ffn_v1.csv │ │ ├── 350ms_encoder_8bits.csv │ │ ├── Conv.py │ │ ├── HAT.py │ │ ├── HAT_raspi_25.8_decoder.csv │ │ ├── HAT_raspi_25.8_encoder.csv │ │ ├── HAT_raspi_26.9_decoder.csv │ │ ├── HAT_raspi_26.9_encoder.csv │ │ ├── HAT_raspi_27.6_decoder.csv │ │ ├── HAT_raspi_27.6_encoder.csv │ │ ├── HAT_raspi_27.8_decoder.csv │ │ ├── HAT_raspi_27.8_encoder.csv │ │ ├── HAT_raspi_28.2.csv │ │ ├── HAT_raspi_28.2_decoder.csv │ │ ├── HAT_raspi_28.2_encoder.csv │ │ ├── HAT_raspi_28.4_decoder.csv │ │ ├── HAT_raspi_28.4_encoder.csv │ │ ├── ShiftAddNAS.py │ │ ├── ShiftAddNAS_all_shift.py │ │ ├── ShiftAddNAS_shift_ffn_v1.py │ │ ├── Transformer.py │ │ ├── base_27.3_decoder.csv │ │ ├── base_27.3_encoder.csv │ │ ├── big_28.4_decoder.csv │ │ ├── big_28.4_encoder.csv │ │ ├── conv_150ms_decoder_32bits.csv │ │ ├── conv_150ms_encoder_32bits.csv │ │ ├── conv_200ms_decoder_32bits.csv │ │ ├── conv_200ms_encoder_32bits.csv │ │ ├── scale_down_24.7_decoder.csv │ │ └── scale_down_24.7_encoder.csv │ ├── train.py │ └── train_cpt.py └── en-fr │ ├── README.md │ ├── average_checkpoints.py │ ├── build_modules.sh │ ├── config.npy │ ├── configs │ ├── .DS_Store │ ├── iwslt14.de-en │ │ ├── .DS_Store │ │ ├── average_checkpoint.sh │ │ ├── evo_search │ │ │ └── iwslt14deen_titanxp.yml │ │ ├── get_preprocessed.sh │ │ ├── latency_dataset │ │ │ └── gpu_titanxp.yml │ │ ├── latency_predictor │ │ │ └── gpu_titanxp.yml │ │ ├── preprocess.sh │ │ ├── subtransformer │ │ │ ├── HAT_iwslt14deen_titanxp@109.0ms_bleu@34.5.yml │ │ │ ├── HAT_iwslt14deen_titanxp@137.8ms_bleu@34.7.yml │ │ │ ├── HAT_iwslt14deen_titanxp@168.8ms_bleu@34.8.yml │ │ │ ├── HAT_iwslt14deen_titanxp@45.6ms_bleu@33.4.yml │ │ │ ├── HAT_iwslt14deen_titanxp@74.5ms_bleu@34.2.yml │ │ │ └── common.yml │ │ ├── supertransformer │ │ │ ├── space1.yml │ │ │ └── space_shiftadd_v1_act_1.yml │ │ └── test.sh │ ├── wmt14.en-de │ │ ├── .DS_Store │ │ ├── evo_search │ │ │ ├── evo_search_100ms_update.out │ │ │ ├── evo_search_120ms_update.out │ │ │ ├── wmt14ende_2080Ti_shiftadd.yml │ │ │ ├── wmt14ende_V100_shiftadd_v2_100.yml │ │ │ ├── wmt14ende_V100_shiftadd_v2_120.yml │ │ │ ├── wmt14ende_V100_shiftadd_v2_150.yml │ │ │ ├── wmt14ende_V100_shiftadd_v2_180.yml │ │ │ ├── wmt14ende_V100_shiftadd_v2_200.yml │ │ │ ├── wmt14ende_V100_shiftadd_v2_250.yml │ │ │ ├── wmt14ende_V100_shiftadd_v2_300.yml │ │ │ ├── wmt14ende_V100_shiftadd_v2_400.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_100_update.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_120_update.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_150_update.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_180_update.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_200.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_200_update.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_250.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_250_update.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_300.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_300_update.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_350_update.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_400_update.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_500.yml │ │ │ ├── wmt14ende_V100_shiftadd_v3_80_update.yml │ │ │ ├── wmt14ende_raspberrypi.yml │ │ │ ├── wmt14ende_titanxp.yml │ │ │ └── wmt14ende_xeon.yml │ │ ├── get_preprocessed.sh │ │ ├── latency_dataset │ │ │ ├── .DS_Store │ │ │ ├── cpu_raspberrypi.yml │ │ │ ├── cpu_xeon.yml │ │ │ ├── cpu_xeon_shiftadd.yml │ │ │ ├── gpu_2080Ti_shiftadd.yml │ │ │ ├── gpu_V100_shiftadd_v2.yml │ │ │ ├── gpu_V100_shiftadd_v3.yml │ │ │ └── gpu_titanxp.yml │ │ ├── latency_predictor │ │ │ ├── cpu_raspberrypi.yml │ │ │ ├── cpu_xeon.yml │ │ │ ├── gpu_2080Ti_shiftadd.yml │ │ │ ├── gpu_V100_shiftadd_v2.yml │ │ │ ├── gpu_V100_shiftadd_v3.yml │ │ │ └── gpu_titanxp.yml │ │ ├── preprocess.sh │ │ ├── subtransformer │ │ │ ├── .DS_Store │ │ │ ├── HAT_wmt14ende_raspberrypi@3.5s_bleu@25.8.yml │ │ │ ├── HAT_wmt14ende_raspberrypi@4.0s_bleu@26.9.yml │ │ │ ├── HAT_wmt14ende_raspberrypi@4.5s_bleu@27.6.yml │ │ │ ├── HAT_wmt14ende_raspberrypi@5.0s_bleu@27.8.yml │ │ │ ├── HAT_wmt14ende_raspberrypi@6.0s_bleu@28.2.yml │ │ │ ├── HAT_wmt14ende_raspberrypi@6.9s_bleu@28.4.yml │ │ │ ├── HAT_wmt14ende_titanxp@126.0ms_bleu@27.9.yml │ │ │ ├── HAT_wmt14ende_titanxp@146.7ms_bleu@28.1.yml │ │ │ ├── HAT_wmt14ende_titanxp@208.1ms_bleu@28.5.yml │ │ │ ├── HAT_wmt14ende_titanxp@57.1ms_bleu@25.8.yml │ │ │ ├── HAT_wmt14ende_titanxp@91.2ms_bleu@27.6.yml │ │ │ ├── HAT_wmt14ende_xeon@137.9ms_bleu@25.8.yml │ │ │ ├── HAT_wmt14ende_xeon@204.2ms_bleu@27.6.yml │ │ │ ├── HAT_wmt14ende_xeon@278.7ms_bleu@27.9.yml │ │ │ ├── HAT_wmt14ende_xeon@340.2ms_bleu@28.1.yml │ │ │ ├── HAT_wmt14ende_xeon@369.6ms_bleu@28.2.yml │ │ │ ├── HAT_wmt14ende_xeon@450.9ms_bleu@28.5.yml │ │ │ ├── common.yml │ │ │ ├── common_shiftadd.yml │ │ │ ├── common_shiftadd_v2.yml │ │ │ ├── common_shiftadd_v3.yml │ │ │ ├── common_shiftadd_v3_quant.yml │ │ │ ├── common_shiftadd_v3_recipe2.yml │ │ │ ├── common_test_ckpt.yml │ │ │ ├── sample_200.yml │ │ │ ├── sample_evo.yml │ │ │ ├── sample_test.yml │ │ │ ├── sample_test_1.yml │ │ │ ├── sample_test_2.yml │ │ │ ├── sample_test_3.yml │ │ │ ├── sample_test_4.yml │ │ │ ├── sample_test_5.yml │ │ │ ├── sample_test_6.yml │ │ │ ├── sample_test_de_1.yml │ │ │ ├── sample_test_de_2.yml │ │ │ ├── sample_test_de_3.yml │ │ │ ├── sample_test_de_multi_1.yml │ │ │ ├── sample_test_de_multi_2.yml │ │ │ ├── sample_test_de_multi_3.yml │ │ │ ├── train_subtransformer_2.sh │ │ │ ├── transformer_base.yml │ │ │ ├── transformer_scale_down.yml │ │ │ ├── wmt14ende@200ms_all_attention.yml │ │ │ ├── wmt14ende_gpu_2080Ti_shiftadd@200ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@100ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@100ms_all_attention.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@100ms_all_conv.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@120ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@120ms_all_attention.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@120ms_all_conv.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@150ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@150ms_all_attention.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@150ms_all_conv.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@180ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@180ms_all_attention.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@180ms_all_conv.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@200ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@200ms_all_attention.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@200ms_all_conv.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@250ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@300ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v2@400ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@100ms_update.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@120ms_update.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@150ms_update.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@180ms_update.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms_Jun.29.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms_Jun.29_quant.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms_quant.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@200ms_update.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_Jun.29.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_Jun.29_quant.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_new.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_update.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@250ms_update_shift_ffn_v1.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jul.12.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jul.12_quant.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jul.23.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jul.28.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jun.29.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jun.29_quant.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_Jun.29_quant_cyclic.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_new.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@300ms_update.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_Jul.13.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_Jul.13_quant.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_Jul.28.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_Jul.28_shift_ffn_v3.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_Jul.8.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@350ms_update.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@400ms_Jul.8.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@400ms_update.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3@500ms.yml │ │ │ ├── wmt14ende_gpu_V100_shiftadd_v3_big.yml │ │ │ ├── wmt14ende_shiftadd_v2@200ms.yml │ │ │ ├── wmt14ende_shiftadd_v2@200ms_all_attention.yml │ │ │ ├── wmt14ende_test_ckpt.yml │ │ │ └── wmt14ende_titanxp@200ms.yml │ │ ├── supertransformer │ │ │ ├── .DS_Store │ │ │ ├── space0.yml │ │ │ ├── space_shiftadd.yml │ │ │ ├── space_shiftadd_act_1.yml │ │ │ ├── space_shiftadd_v2_act_1.yml │ │ │ └── space_shiftadd_v3_act_1.yml │ │ └── test.sh │ ├── wmt14.en-fr │ │ ├── .DS_Store │ │ ├── evo_search │ │ │ ├── wmt14enfr_V100_shiftadd_v1_100.yml │ │ │ ├── wmt14enfr_V100_shiftadd_v1_120.yml │ │ │ ├── wmt14enfr_V100_shiftadd_v1_150.yml │ │ │ ├── wmt14enfr_V100_shiftadd_v1_160.yml │ │ │ ├── wmt14enfr_V100_shiftadd_v1_170.yml │ │ │ ├── wmt14enfr_V100_shiftadd_v1_180.yml │ │ │ ├── wmt14enfr_V100_shiftadd_v1_200.yml │ │ │ ├── wmt14enfr_V100_shiftadd_v1_250.yml │ │ │ ├── wmt14enfr_V100_shiftadd_v1_300.yml │ │ │ ├── wmt14enfr_V100_shiftadd_v1_350.yml │ │ │ ├── wmt14enfr_raspberrypi.yml │ │ │ ├── wmt14enfr_titanxp.yml │ │ │ └── wmt14enfr_xeon.yml │ │ ├── get_preprocessed.sh │ │ ├── latency_dataset │ │ │ ├── cpu_raspberrypi.yml │ │ │ ├── cpu_xeon.yml │ │ │ ├── gpu_V100_shiftadd_v1.yml │ │ │ └── gpu_titanxp.yml │ │ ├── latency_predictor │ │ │ ├── cpu_raspberrypi.yml │ │ │ ├── cpu_xeon.yml │ │ │ ├── gpu_V100_shiftadd_v1.yml │ │ │ └── gpu_titanxp.yml │ │ ├── preprocess.sh │ │ ├── subtransformer │ │ │ ├── HAT_wmt14enfr_raspberrypi@4.3s_bleu@38.8.yml │ │ │ ├── HAT_wmt14enfr_raspberrypi@5.3s_bleu@40.1.yml │ │ │ ├── HAT_wmt14enfr_raspberrypi@5.8s_bleu@40.6.yml │ │ │ ├── HAT_wmt14enfr_raspberrypi@6.9s_bleu@41.1.yml │ │ │ ├── HAT_wmt14enfr_raspberrypi@7.8s_bleu@41.4.yml │ │ │ ├── HAT_wmt14enfr_raspberrypi@9.1s_bleu@41.8.yml │ │ │ ├── HAT_wmt14enfr_titanxp@132.9ms_bleu@40.7.yml │ │ │ ├── HAT_wmt14enfr_titanxp@168.3ms_bleu@41.1.yml │ │ │ ├── HAT_wmt14enfr_titanxp@208.3ms_bleu@41.7.yml │ │ │ ├── HAT_wmt14enfr_titanxp@69.3ms_bleu@39.1.yml │ │ │ ├── HAT_wmt14enfr_titanxp@94.9ms_bleu@40.0.yml │ │ │ ├── HAT_wmt14enfr_xeon@154.7ms_bleu@39.1.yml │ │ │ ├── HAT_wmt14enfr_xeon@208.8ms_bleu@40.0.yml │ │ │ ├── HAT_wmt14enfr_xeon@329.4ms_bleu@41.1.yml │ │ │ ├── HAT_wmt14enfr_xeon@394.5ms_bleu@41.4.yml │ │ │ ├── HAT_wmt14enfr_xeon@442.0ms_bleu@41.7.yml │ │ │ ├── LW_add.yml │ │ │ ├── LW_conv+attn.yml │ │ │ ├── LW_conv.yml │ │ │ ├── LW_shiftadd.yml │ │ │ ├── Transformer_base.yml │ │ │ ├── Transformer_big.yml │ │ │ ├── Transformer_small.yml │ │ │ ├── common.yml │ │ │ ├── common_shiftadd_LW.yml │ │ │ ├── common_shiftadd_v1.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@100ms.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@120ms.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@120ms_quant.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@150ms.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@150ms_quant.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@160ms.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@170ms.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@170ms_quant.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@180ms.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@180ms_quant.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@200ms.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@200ms_quant.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@250ms.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@300ms.yml │ │ │ ├── wmt14enfr_gpu_V100_shiftadd_v1@300ms_quant.yml │ │ │ └── wmt14enfr_gpu_V100_shiftadd_v1@350ms.yml │ │ ├── supertransformer │ │ │ ├── space0.yml │ │ │ └── space_shiftadd_v1_act_1.yml │ │ └── test.sh │ └── wmt19.en-de │ │ ├── evo_search │ │ └── wmt19ende_titanxp.yml │ │ ├── get_preprocessed.sh │ │ ├── latency_dataset │ │ └── gpu_titanxp.yml │ │ ├── latency_predictor │ │ └── gpu_titanxp.yml │ │ ├── preprocess.sh │ │ ├── subtransformer │ │ ├── HAT_wmt19ende_titanxp@134.5ms_bleu@45.4.yml │ │ ├── HAT_wmt19ende_titanxp@176.1ms_bleu@46.2.yml │ │ ├── HAT_wmt19ende_titanxp@204.5ms_bleu@46.5.yml │ │ ├── HAT_wmt19ende_titanxp@237.8ms_bleu@46.7.yml │ │ ├── HAT_wmt19ende_titanxp@55.7ms_bleu@42.4.yml │ │ ├── HAT_wmt19ende_titanxp@93.2ms_bleu@44.4.yml │ │ └── common.yml │ │ ├── supertransformer │ │ └── space0.yml │ │ └── test.sh │ ├── deepshift │ ├── __init__.py │ ├── convert.py │ ├── kernels │ │ ├── __init__.py │ │ ├── cpu │ │ │ ├── setup.py │ │ │ └── shift_cpu.cpp │ │ ├── cuda │ │ │ ├── __init__.py │ │ │ ├── convert_to_unoptimized.py │ │ │ ├── setup.py │ │ │ ├── shift.cu │ │ │ ├── shift_cuda.cpp │ │ │ ├── unoptimized_conv.py │ │ │ ├── unoptimized_cuda.cpp │ │ │ ├── unoptimized_cuda_kernel.cu │ │ │ └── unoptimized_linear.py │ │ └── kernels.py │ ├── modules.py │ ├── modules_q.py │ ├── quantize.py │ ├── ste.py │ └── utils.py │ ├── env.sh │ ├── evo_config.py │ ├── evo_search.py │ ├── fairseq │ ├── .DS_Store │ ├── __init__.py │ ├── binarizer.py │ ├── bleu.py │ ├── checkpoint_utils.py │ ├── clib │ │ └── libbleu │ │ │ ├── libbleu.cpp │ │ │ └── module.cpp │ ├── criterions │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── fairseq_criterion.py │ │ └── label_smoothed_cross_entropy.py │ ├── data │ │ ├── __init__.py │ │ ├── base_wrapper_dataset.py │ │ ├── concat_dataset.py │ │ ├── data_utils.py │ │ ├── data_utils_fast.cpp │ │ ├── data_utils_fast.pyx │ │ ├── dictionary.py │ │ ├── distill_dataset.py │ │ ├── encoders │ │ │ ├── __init__.py │ │ │ ├── fastbpe.py │ │ │ ├── gpt2_bpe.py │ │ │ ├── gpt2_bpe_utils.py │ │ │ ├── hf_bert_bpe.py │ │ │ ├── moses_tokenizer.py │ │ │ ├── nltk_tokenizer.py │ │ │ ├── sentencepiece_bpe.py │ │ │ ├── space_tokenizer.py │ │ │ └── subword_nmt_bpe.py │ │ ├── fairseq_dataset.py │ │ ├── id_dataset.py │ │ ├── indexed_dataset.py │ │ ├── iterators.py │ │ ├── language_pair_dataset.py │ │ ├── strip_token_dataset.py │ │ ├── token_block_utils_fast.c │ │ ├── token_block_utils_fast.pyx │ │ └── truncate_dataset.py │ ├── distributed_utils.py │ ├── evolution.py │ ├── fed_utils.py │ ├── file_utils.py │ ├── hub_utils.py │ ├── init.py │ ├── legacy_distributed_data_parallel.py │ ├── meters.py │ ├── models │ │ ├── __init__.py │ │ ├── distributed_fairseq_model.py │ │ ├── fairseq_decoder.py │ │ ├── fairseq_encoder.py │ │ ├── fairseq_incremental_decoder.py │ │ ├── fairseq_model.py │ │ ├── transformer.py │ │ ├── transformer_super.py │ │ ├── transformer_super_shiftadd.py │ │ ├── transformer_super_shiftadd_LW_only.py │ │ └── transformer_super_shiftadd_v2.py │ ├── modules │ │ ├── __init__.py │ │ ├── adaptive_softmax.py │ │ ├── cuda_utils.cu │ │ ├── embedding_super.py │ │ ├── gelu.py │ │ ├── layer_norm.py │ │ ├── learned_positional_embedding.py │ │ ├── lightadd_layer │ │ │ ├── __init__.py │ │ │ ├── cuda_function_gen.py │ │ │ ├── lightadd_cuda.cpp │ │ │ ├── lightadd_cuda.cuh │ │ │ ├── lightadd_cuda_backward.cu │ │ │ ├── lightadd_cuda_forward.cu │ │ │ ├── lightadd_cuda_kernel.cu │ │ │ ├── lightadd_cuda_kernel_new.cu │ │ │ ├── lightadd_cuda_kernel_prev.cu │ │ │ ├── lightadd_layer.py │ │ │ ├── quantize.py │ │ │ └── setup.py │ │ ├── lightconv_layer │ │ │ ├── __init__.py │ │ │ ├── cuda_function_gen.py │ │ │ ├── lightconv_cuda.cpp │ │ │ ├── lightconv_cuda.cuh │ │ │ ├── lightconv_cuda_backward.cu │ │ │ ├── lightconv_cuda_forward.cu │ │ │ ├── lightconv_cuda_kernel.cu │ │ │ ├── lightconv_layer.py │ │ │ ├── quantize.py │ │ │ └── setup.py │ │ ├── lightshiftadd_layer │ │ │ ├── __init__.py │ │ │ ├── cuda_function_gen.py │ │ │ ├── lightshiftadd_cuda.cpp │ │ │ ├── lightshiftadd_cuda.cuh │ │ │ ├── lightshiftadd_cuda_backward.cu │ │ │ ├── lightshiftadd_cuda_forward.cu │ │ │ ├── lightshiftadd_cuda_kernel.cu │ │ │ ├── lightshiftadd_cuda_kernel_new.cu │ │ │ ├── lightshiftadd_cuda_kernel_prev.cu │ │ │ ├── lightshiftadd_layer.py │ │ │ ├── quantize.py │ │ │ └── setup.py │ │ ├── lightweight_convolution.py │ │ ├── linear_super.py │ │ ├── multibranch.py │ │ ├── multihead_attention.py │ │ ├── multihead_attention_super.py │ │ ├── positional_embedding.py │ │ ├── quantize.py │ │ ├── sinusoidal_positional_embedding.py │ │ ├── transformer_layer.py │ │ ├── transformer_sentence_encoder.py │ │ ├── transformer_sentence_encoder_layer.py │ │ └── unfold.py │ ├── optim │ │ ├── __init__.py │ │ ├── adam.py │ │ ├── bmuf.py │ │ ├── fairseq_optimizer.py │ │ ├── fp16_optimizer.py │ │ ├── lr_scheduler │ │ │ ├── __init__.py │ │ │ ├── cosine_lr_scheduler.py │ │ │ ├── fairseq_lr_scheduler.py │ │ │ ├── fixed_schedule.py │ │ │ ├── inverse_square_root_schedule.py │ │ │ ├── polynomial_decay_schedule.py │ │ │ ├── reduce_lr_on_plateau.py │ │ │ ├── tri_stage_lr_scheduler.py │ │ │ └── triangular_lr_scheduler.py │ │ └── nag.py │ ├── options.py │ ├── progress_bar.py │ ├── registry.py │ ├── search.py │ ├── sequence_generator.py │ ├── sequence_scorer.py │ ├── tasks │ │ ├── __init__.py │ │ ├── fairseq_task.py │ │ └── translation.py │ ├── tokenizer.py │ ├── trainer.py │ └── utils.py │ ├── generate.py │ ├── get_loss.py │ ├── latency_dataset.py │ ├── latency_dataset │ ├── iwslt14deen_gpu_titanxp_all.csv │ ├── predictors │ │ └── wmt14enfr_gpu_V100_shiftadd_v1.pt │ ├── wmt14ende_cpu_raspberrypi.csv │ ├── wmt14ende_cpu_raspberrypi_all.csv │ ├── wmt14ende_cpu_xeon_all.csv │ ├── wmt14ende_cpu_xeon_shiftadd.csv │ ├── wmt14ende_gpu_2080Ti_shiftadd.csv │ ├── wmt14ende_gpu_titanxp_all.csv │ ├── wmt14enfr_cpu_raspberrypi_all.csv │ ├── wmt14enfr_cpu_xeon_all.csv │ ├── wmt14enfr_gpu_V100_shiftadd_v1.csv │ ├── wmt14enfr_gpu_titanxp_all.csv │ └── wmt19ende_gpu_titanxp_all.csv │ ├── latency_predictor.py │ ├── plot.py │ ├── plot_dist.py │ ├── plot_pred_real.py │ ├── plot_trajectory_300.py │ ├── plot_trajectory_350.py │ ├── plot_trajectory_aug_14.py │ ├── preprocess.py │ ├── score.py │ ├── setup.py │ ├── sim │ ├── 100ms_Aug_14_decoder_32bits.csv │ ├── 100ms_Aug_14_encoder_32bits.csv │ ├── 120ms_Aug_14_decoder_32bits.csv │ ├── 120ms_Aug_14_encoder_32bits.csv │ ├── 150ms_Aug_14_decoder_32bits.csv │ ├── 150ms_Aug_14_encoder_32bits.csv │ ├── 180ms_Aug_14_decoder_32bits.csv │ ├── 180ms_Aug_14_encoder_32bits.csv │ ├── 200ms_Aug_4_decoder_32bits.csv │ ├── 200ms_Aug_4_encoder_32bits.csv │ ├── 250ms_Aug_4_decoder_32bits.csv │ ├── 250ms_Aug_4_encoder_32bits.csv │ ├── 300ms_Aug_4_encoder_32bits.csv │ ├── 300ms_Jul_23_decoder_32bits.csv │ ├── 300ms_Jul_23_encoder_32bits.csv │ ├── 300ms_decoder.csv │ ├── 300ms_decoder_32bits.csv │ ├── 300ms_decoder_8bits.csv │ ├── 300ms_encoder.csv │ ├── 300ms_encoder_32bits.csv │ ├── 300ms_encoder_8bits.csv │ ├── 350ms_Aug_14_decoder_32bits.csv │ ├── 350ms_Aug_14_encoder_32bits.csv │ ├── 350ms_decoder_32bits.csv │ ├── 350ms_decoder_32bits_all_shift.csv │ ├── 350ms_decoder_32bits_shift_ffn_v1.csv │ ├── 350ms_decoder_8bits.csv │ ├── 350ms_encoder_32bits.csv │ ├── 350ms_encoder_32bits_all_shift.csv │ ├── 350ms_encoder_32bits_shift_ffn_v1.csv │ ├── 350ms_encoder_8bits.csv │ ├── Conv.py │ ├── HAT.py │ ├── HAT_raspi_25.8_decoder.csv │ ├── HAT_raspi_25.8_encoder.csv │ ├── HAT_raspi_26.9_decoder.csv │ ├── HAT_raspi_26.9_encoder.csv │ ├── HAT_raspi_27.6_decoder.csv │ ├── HAT_raspi_27.6_encoder.csv │ ├── HAT_raspi_27.8_decoder.csv │ ├── HAT_raspi_27.8_encoder.csv │ ├── HAT_raspi_28.2.csv │ ├── HAT_raspi_28.2_decoder.csv │ ├── HAT_raspi_28.2_encoder.csv │ ├── HAT_raspi_28.4_decoder.csv │ ├── HAT_raspi_28.4_encoder.csv │ ├── ShiftAddNAS.py │ ├── ShiftAddNAS_all_shift.py │ ├── ShiftAddNAS_shift_ffn_v1.py │ ├── Transformer.py │ ├── base_27.3_decoder.csv │ ├── base_27.3_encoder.csv │ ├── big_28.4_decoder.csv │ ├── big_28.4_encoder.csv │ ├── conv_150ms_decoder_32bits.csv │ ├── conv_150ms_encoder_32bits.csv │ ├── conv_200ms_decoder_32bits.csv │ ├── conv_200ms_encoder_32bits.csv │ ├── scale_down_24.7_decoder.csv │ └── scale_down_24.7_encoder.csv │ ├── train.py │ └── train_cpt.py ├── README.md └── environment.yml /CV/figures/Supernet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/figures/Supernet.pdf -------------------------------------------------------------------------------- /CV/figures/Supernet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/figures/Supernet.png -------------------------------------------------------------------------------- /CV/figures/vis_arch_v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/figures/vis_arch_v1.pdf -------------------------------------------------------------------------------- /CV/figures/vis_arch_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/figures/vis_arch_v1.png -------------------------------------------------------------------------------- /CV/figures/vis_arch_v2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/figures/vis_arch_v2.pdf -------------------------------------------------------------------------------- /CV/figures/vis_arch_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/figures/vis_arch_v2.png -------------------------------------------------------------------------------- /CV/retraining_hybrid/boss_candidates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/retraining_hybrid/boss_candidates/__init__.py -------------------------------------------------------------------------------- /CV/retraining_hybrid/boss_candidates/adder/jit.py: -------------------------------------------------------------------------------- 1 | from torch.utils.cpp_extension import load 2 | 3 | conv_cuda = load( 4 | 'adder_cuda', ['adder_cuda.cpp', 'adder_cuda_kernel.cu'], verbose=True) 5 | help(adder_cuda) 6 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/boss_candidates/adder/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='adder', 6 | ext_modules=[ 7 | CUDAExtension('adder_cuda', [ 8 | 'adder_cuda.cpp', 9 | 'adder_cuda_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/boss_candidates/deepshift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/retraining_hybrid/boss_candidates/deepshift/__init__.py -------------------------------------------------------------------------------- /CV/retraining_hybrid/boss_candidates/deepshift/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from .kernels import * -------------------------------------------------------------------------------- /CV/retraining_hybrid/boss_candidates/deepshift/kernels/cpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from torch.utils import cpp_extension 3 | 4 | setup( 5 | name='deepshift_cpu', 6 | ext_modules=[ 7 | cpp_extension.CppExtension('deepshift_cpu', [ 8 | 'shift_cpu.cpp' 9 | ], extra_compile_args=['-fopenmp', '-O3']) 10 | ], 11 | cmdclass={ 12 | 'build_ext': cpp_extension.BuildExtension 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/boss_candidates/deepshift/kernels/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from .convert_to_unoptimized import * -------------------------------------------------------------------------------- /CV/retraining_hybrid/boss_candidates/deepshift/kernels/cuda/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deepshift_cuda', 6 | ext_modules=[ 7 | CUDAExtension('deepshift_cuda', [ 8 | 'shift_cuda.cpp', 9 | 'shift.cu', 10 | ],extra_compile_args=['-O3']) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | setup( 17 | name='unoptimized_cuda_kernel', 18 | ext_modules=[ 19 | CUDAExtension('unoptimized_cuda_kernel', [ 20 | 'unoptimized_cuda.cpp', 21 | 'unoptimized_cuda_kernel.cu', 22 | ],extra_compile_args=['-O3']) 23 | ], 24 | cmdclass={ 25 | 'build_ext': BuildExtension 26 | }) -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T0_mix_v3_distilled-224/linear.csv: -------------------------------------------------------------------------------- 1 | 1,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 2 | 2,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 3 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T0_mix_v3_distilled-256/linear.csv: -------------------------------------------------------------------------------- 1 | 1,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 2 | 2,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 3 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T0_mix_v3_distilled-256/shift.csv: -------------------------------------------------------------------------------- 1 | 1,17448304.64,1.8806531686400003,0.00131072,0.498335744,0.9216983040000002,0.4431708160000001,0.017448304640000002,0.32767999999999997,0.0,0.225509376,0.37827379200000005,0.9317416960000002 2 | 2,17448304.64,1.7629505126400002,0.00131072,0.531103744,0.7712276480000001,0.4431708160000001,0.017448304640000002,0.229376,0.0,0.225509376,0.3588751360000001,0.9317416960000002 3 | 3,19629342.720000003,2.0733473587200004,0.0019660800000000003,0.4692377600000001,0.8894545920000002,0.6950256640000001,0.01962934272,0.188416,0.0,0.17276928,0.17943756800000005,1.5130951680000004 4 | 4,19629342.720000003,1.9832353587200005,0.0019660800000000003,0.39465779200000006,0.8157265920000002,0.753221632,0.01962934272,0.098304,0.0,0.129122304,0.10669260800000002,1.6294871040000003 5 | 5,8724152.32,0.9533764403200002,0.00065536,0.249167872,0.4608491520000001,0.234635264,0.008724152320000001,0.180224,0.0,0.10911744,0.18913689600000003,0.4661739520000001 6 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T0_mix_v3_distilled-288/linear.csv: -------------------------------------------------------------------------------- 1 | 1,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 2 | 2,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 3 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T0_mix_v3_distilled-288/shift.csv: -------------------------------------------------------------------------------- 1 | 1,22083010.560000002,2.3505466265600004,0.00147456,0.6721781760000001,0.9603645440000002,0.695920896,0.02208301056,0.255488,0.0,0.432718848,0.4610211840000001,1.1792355840000002 2 | 2,22083010.560000002,2.21007430656,0.00165888,0.6721781760000001,0.9549250560000002,0.560888064,0.02208301056,0.271872,0.0,0.285410304,0.45147340800000013,1.1792355840000002 3 | 3,24843386.880000003,2.5849859788800003,0.00165888,0.59387904,1.154138112,0.8121254400000002,0.024843386880000002,0.273024,0.0,0.145006848,0.22710067200000003,1.9150110720000004 4 | 4,24843386.880000003,2.4802277068800005,0.00165888,0.4891207680000001,1.154138112,0.8121254400000002,0.024843386880000002,0.24192,0.0,0.089766144,0.208687104,1.9150110720000004 5 | 5,11041505.280000001,1.19178953728,0.00073728,0.33608908800000004,0.4801822720000001,0.364476672,0.01104150528,0.14848,0.0,0.211756032,0.23051059200000004,0.5900014080000001 6 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T0_mix_v4_distilled_Q-224/linear.csv: -------------------------------------------------------------------------------- 1 | 1,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 2 | 2,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 3 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T0_mix_v4_distilled_Q-256/linear.csv: -------------------------------------------------------------------------------- 1 | 1,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 2 | 2,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 3 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T1_mix_v3_distilled-224/linear.csv: -------------------------------------------------------------------------------- 1 | 1,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 2 | 2,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 3 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T1_mix_v3_distilled_Q-256/linear.csv: -------------------------------------------------------------------------------- 1 | 1,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 2 | 2,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 3 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T1_mix_v3_distilled_Q-256/shfit_q.csv: -------------------------------------------------------------------------------- 1 | 1,17448304.64,1.8806531686400003,0.00131072,0.498335744,0.9216983040000002,0.4431708160000001,0.017448304640000002,0.32767999999999997,0.0,0.225509376,0.37827379200000005,0.9317416960000002 2 | 2,17448304.64,1.7629505126400002,0.00131072,0.531103744,0.7712276480000001,0.4431708160000001,0.017448304640000002,0.229376,0.0,0.225509376,0.3588751360000001,0.9317416960000002 3 | 3,19629342.720000003,2.0733473587200004,0.0019660800000000003,0.4692377600000001,0.8894545920000002,0.6950256640000001,0.01962934272,0.188416,0.0,0.17276928,0.17943756800000005,1.5130951680000004 4 | 4,19629342.720000003,1.9832353587200005,0.0019660800000000003,0.39465779200000006,0.8157265920000002,0.753221632,0.01962934272,0.098304,0.0,0.129122304,0.10669260800000002,1.6294871040000003 5 | 5,8724152.32,0.9533764403200002,0.00065536,0.249167872,0.4608491520000001,0.234635264,0.008724152320000001,0.180224,0.0,0.10911744,0.18913689600000003,0.4661739520000001 6 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T1_mix_v4_distilled_Q-224/linear.csv: -------------------------------------------------------------------------------- 1 | 1,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 2 | 2,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 3 | -------------------------------------------------------------------------------- /CV/retraining_hybrid/hw_record/bossnet_T1_mix_v4_distilled_Q-256/linear.csv: -------------------------------------------------------------------------------- 1 | 1,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 2 | 2,7577600.0,0.928905864,5.1200000000000004e-05,0.020824064000000003,0.8798207999999998,0.0206834,0.0075776,0.8204192,0.0,0.007413024,0.04802304000000001,0.045473000000000006 3 | -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/__init__.py: -------------------------------------------------------------------------------- 1 | from bossnas.datasets import bossnas_data 2 | from bossnas.hooks import path_hook, val_hook 3 | import bossnas.models 4 | -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/apis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/bossnas/apis/__init__.py -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/bossnas/datasets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/hooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/bossnas/hooks/__init__.py -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/__init__.py: -------------------------------------------------------------------------------- 1 | from bossnas.models.siamese_supernets.siamese_supernets_hytra import SiameseSupernetsHyTra 2 | from bossnas.models.siamese_supernets.siamese_supernets_nats import SiameseSupernetsNATS 3 | from bossnas.models.siamese_supernets.siamese_supernets_mbconv import SiameseSupernetsMBConv 4 | from bossnas.models.siamese_supernets.boss_necks import NonLinearNeckSimCLRProject 5 | from bossnas.models.supernets.hytra_supernet import SupernetHyTra 6 | from bossnas.models.supernets.mbconv_supernet import SupernetMBConv 7 | from bossnas.models.supernets.nats_supernet import SupernetNATS 8 | -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/operations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/bossnas/models/operations/__init__.py -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/operations/adder/jit.py: -------------------------------------------------------------------------------- 1 | from torch.utils.cpp_extension import load 2 | 3 | conv_cuda = load( 4 | 'adder_cuda', ['adder_cuda.cpp', 'adder_cuda_kernel.cu'], verbose=True) 5 | help(adder_cuda) 6 | -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/operations/adder/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='adder', 6 | ext_modules=[ 7 | CUDAExtension('adder_cuda', [ 8 | 'adder_cuda.cpp', 9 | 'adder_cuda_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/operations/deepshift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/bossnas/models/operations/deepshift/__init__.py -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/operations/deepshift/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from .kernels import * -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/operations/deepshift/kernels/cpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from torch.utils import cpp_extension 3 | 4 | setup( 5 | name='deepshift_cpu', 6 | ext_modules=[ 7 | cpp_extension.CppExtension('deepshift_cpu', [ 8 | 'shift_cpu.cpp' 9 | ], extra_compile_args=['-fopenmp', '-O3']) 10 | ], 11 | cmdclass={ 12 | 'build_ext': cpp_extension.BuildExtension 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/operations/deepshift/kernels/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from .convert_to_unoptimized import * -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/operations/deepshift/kernels/cuda/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deepshift_cuda', 6 | ext_modules=[ 7 | CUDAExtension('deepshift_cuda', [ 8 | 'shift_cuda.cpp', 9 | 'shift.cu', 10 | ],extra_compile_args=['-O3']) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | setup( 17 | name='unoptimized_cuda_kernel', 18 | ext_modules=[ 19 | CUDAExtension('unoptimized_cuda_kernel', [ 20 | 'unoptimized_cuda.cpp', 21 | 'unoptimized_cuda_kernel.cu', 22 | ],extra_compile_args=['-O3']) 23 | ], 24 | cmdclass={ 25 | 'build_ext': BuildExtension 26 | }) -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/siamese_supernets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/bossnas/models/siamese_supernets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/supernets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/bossnas/models/supernets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/models/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/bossnas/models/utils/__init__.py -------------------------------------------------------------------------------- /CV/searching_v1/bossnas/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/bossnas/runners/__init__.py -------------------------------------------------------------------------------- /CV/searching_v1/configs/base.py: -------------------------------------------------------------------------------- 1 | train_cfg = {} 2 | test_cfg = {} 3 | optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb 4 | # yapf:disable 5 | log_config = dict( 6 | interval=50, 7 | hooks=[ 8 | dict(type='TextLoggerHook'), 9 | dict(type='TensorboardLoggerHook') 10 | ]) 11 | # yapf:enable 12 | # runtime settings 13 | dist_params = dict(backend='nccl') 14 | cudnn_benchmark = True 15 | log_level = 'INFO' 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | prefetch = False 20 | -------------------------------------------------------------------------------- /CV/searching_v1/evo_configs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/evo_configs.npy -------------------------------------------------------------------------------- /CV/searching_v1/work_dirs/cifar100/Supernet_v1-20210818-133106/trajectory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v1/work_dirs/cifar100/Supernet_v1-20210818-133106/trajectory.pdf -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/__init__.py: -------------------------------------------------------------------------------- 1 | from bossnas.datasets import bossnas_data 2 | from bossnas.hooks import path_hook, val_hook 3 | import bossnas.models 4 | -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/apis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/bossnas/apis/__init__.py -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/bossnas/datasets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/hooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/bossnas/hooks/__init__.py -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/__init__.py: -------------------------------------------------------------------------------- 1 | from bossnas.models.siamese_supernets.siamese_supernets_hytra import SiameseSupernetsHyTra 2 | from bossnas.models.siamese_supernets.siamese_supernets_nats import SiameseSupernetsNATS 3 | from bossnas.models.siamese_supernets.siamese_supernets_mbconv import SiameseSupernetsMBConv 4 | from bossnas.models.siamese_supernets.boss_necks import NonLinearNeckSimCLRProject 5 | from bossnas.models.supernets.hytra_supernet import SupernetHyTra 6 | from bossnas.models.supernets.mbconv_supernet import SupernetMBConv 7 | from bossnas.models.supernets.nats_supernet import SupernetNATS 8 | -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/operations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/bossnas/models/operations/__init__.py -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/operations/adder/jit.py: -------------------------------------------------------------------------------- 1 | from torch.utils.cpp_extension import load 2 | 3 | conv_cuda = load( 4 | 'adder_cuda', ['adder_cuda.cpp', 'adder_cuda_kernel.cu'], verbose=True) 5 | help(adder_cuda) 6 | -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/operations/adder/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='adder', 6 | ext_modules=[ 7 | CUDAExtension('adder_cuda', [ 8 | 'adder_cuda.cpp', 9 | 'adder_cuda_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/operations/deepshift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/bossnas/models/operations/deepshift/__init__.py -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/operations/deepshift/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from .kernels import * -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/operations/deepshift/kernels/cpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from torch.utils import cpp_extension 3 | 4 | setup( 5 | name='deepshift_cpu', 6 | ext_modules=[ 7 | cpp_extension.CppExtension('deepshift_cpu', [ 8 | 'shift_cpu.cpp' 9 | ], extra_compile_args=['-fopenmp', '-O3']) 10 | ], 11 | cmdclass={ 12 | 'build_ext': cpp_extension.BuildExtension 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/operations/deepshift/kernels/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from .convert_to_unoptimized import * -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/operations/deepshift/kernels/cuda/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deepshift_cuda', 6 | ext_modules=[ 7 | CUDAExtension('deepshift_cuda', [ 8 | 'shift_cuda.cpp', 9 | 'shift.cu', 10 | ],extra_compile_args=['-O3']) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | setup( 17 | name='unoptimized_cuda_kernel', 18 | ext_modules=[ 19 | CUDAExtension('unoptimized_cuda_kernel', [ 20 | 'unoptimized_cuda.cpp', 21 | 'unoptimized_cuda_kernel.cu', 22 | ],extra_compile_args=['-O3']) 23 | ], 24 | cmdclass={ 25 | 'build_ext': BuildExtension 26 | }) -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/siamese_supernets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/bossnas/models/siamese_supernets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/supernets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/bossnas/models/supernets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/models/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/bossnas/models/utils/__init__.py -------------------------------------------------------------------------------- /CV/searching_v2/bossnas/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/bossnas/runners/__init__.py -------------------------------------------------------------------------------- /CV/searching_v2/configs/base.py: -------------------------------------------------------------------------------- 1 | train_cfg = {} 2 | test_cfg = {} 3 | optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb 4 | # yapf:disable 5 | log_config = dict( 6 | interval=50, 7 | hooks=[ 8 | dict(type='TextLoggerHook'), 9 | dict(type='TensorboardLoggerHook') 10 | ]) 11 | # yapf:enable 12 | # runtime settings 13 | dist_params = dict(backend='nccl') 14 | cudnn_benchmark = True 15 | log_level = 'INFO' 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | prefetch = False 20 | -------------------------------------------------------------------------------- /CV/searching_v2/evo_configs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/evo_configs.npy -------------------------------------------------------------------------------- /CV/searching_v2/work_dirs/cifar100/Supernet_v2-20210820-220728/trajectory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/work_dirs/cifar100/Supernet_v2-20210820-220728/trajectory.pdf -------------------------------------------------------------------------------- /CV/searching_v2/work_dirs/corr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v2/work_dirs/corr.pdf -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/__init__.py: -------------------------------------------------------------------------------- 1 | from bossnas.datasets import bossnas_data 2 | from bossnas.hooks import path_hook, val_hook 3 | import bossnas.models 4 | -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/apis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/bossnas/apis/__init__.py -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/bossnas/datasets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/hooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/bossnas/hooks/__init__.py -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/__init__.py: -------------------------------------------------------------------------------- 1 | from bossnas.models.siamese_supernets.siamese_supernets_hytra import SiameseSupernetsHyTra 2 | from bossnas.models.siamese_supernets.siamese_supernets_nats import SiameseSupernetsNATS 3 | from bossnas.models.siamese_supernets.siamese_supernets_mbconv import SiameseSupernetsMBConv 4 | from bossnas.models.siamese_supernets.boss_necks import NonLinearNeckSimCLRProject 5 | from bossnas.models.supernets.hytra_supernet import SupernetHyTra 6 | from bossnas.models.supernets.mbconv_supernet import SupernetMBConv 7 | from bossnas.models.supernets.nats_supernet import SupernetNATS 8 | -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/operations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/bossnas/models/operations/__init__.py -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/operations/adder/jit.py: -------------------------------------------------------------------------------- 1 | from torch.utils.cpp_extension import load 2 | 3 | conv_cuda = load( 4 | 'adder_cuda', ['adder_cuda.cpp', 'adder_cuda_kernel.cu'], verbose=True) 5 | help(adder_cuda) 6 | -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/operations/adder/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='adder', 6 | ext_modules=[ 7 | CUDAExtension('adder_cuda', [ 8 | 'adder_cuda.cpp', 9 | 'adder_cuda_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/operations/deepshift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/bossnas/models/operations/deepshift/__init__.py -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/operations/deepshift/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from .kernels import * -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/operations/deepshift/kernels/cpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from torch.utils import cpp_extension 3 | 4 | setup( 5 | name='deepshift_cpu', 6 | ext_modules=[ 7 | cpp_extension.CppExtension('deepshift_cpu', [ 8 | 'shift_cpu.cpp' 9 | ], extra_compile_args=['-fopenmp', '-O3']) 10 | ], 11 | cmdclass={ 12 | 'build_ext': cpp_extension.BuildExtension 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/operations/deepshift/kernels/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from .convert_to_unoptimized import * -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/operations/deepshift/kernels/cuda/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deepshift_cuda', 6 | ext_modules=[ 7 | CUDAExtension('deepshift_cuda', [ 8 | 'shift_cuda.cpp', 9 | 'shift.cu', 10 | ],extra_compile_args=['-O3']) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | setup( 17 | name='unoptimized_cuda_kernel', 18 | ext_modules=[ 19 | CUDAExtension('unoptimized_cuda_kernel', [ 20 | 'unoptimized_cuda.cpp', 21 | 'unoptimized_cuda_kernel.cu', 22 | ],extra_compile_args=['-O3']) 23 | ], 24 | cmdclass={ 25 | 'build_ext': BuildExtension 26 | }) -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/siamese_supernets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/bossnas/models/siamese_supernets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/supernets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/bossnas/models/supernets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/models/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/bossnas/models/utils/__init__.py -------------------------------------------------------------------------------- /CV/searching_v3/bossnas/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/bossnas/runners/__init__.py -------------------------------------------------------------------------------- /CV/searching_v3/configs/base.py: -------------------------------------------------------------------------------- 1 | train_cfg = {} 2 | test_cfg = {} 3 | optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb 4 | # yapf:disable 5 | log_config = dict( 6 | interval=50, 7 | hooks=[ 8 | dict(type='TextLoggerHook'), 9 | dict(type='TensorboardLoggerHook') 10 | ]) 11 | # yapf:enable 12 | # runtime settings 13 | dist_params = dict(backend='nccl') 14 | cudnn_benchmark = True 15 | log_level = 'INFO' 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | prefetch = False 20 | -------------------------------------------------------------------------------- /CV/searching_v3/evo_configs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/evo_configs.npy -------------------------------------------------------------------------------- /CV/searching_v3/work_dirs/cifar100/Supernet_v3-20210820-231347/trajectory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/work_dirs/cifar100/Supernet_v3-20210820-231347/trajectory.pdf -------------------------------------------------------------------------------- /CV/searching_v3/work_dirs/corr_v1_v2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/work_dirs/corr_v1_v2.pdf -------------------------------------------------------------------------------- /CV/searching_v3/work_dirs/corr_v1_v3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/work_dirs/corr_v1_v3.pdf -------------------------------------------------------------------------------- /CV/searching_v3/work_dirs/corr_v2_v3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v3/work_dirs/corr_v2_v3.pdf -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/__init__.py: -------------------------------------------------------------------------------- 1 | from bossnas.datasets import bossnas_data 2 | from bossnas.hooks import path_hook, val_hook 3 | import bossnas.models 4 | -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/apis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/bossnas/apis/__init__.py -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/bossnas/datasets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/hooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/bossnas/hooks/__init__.py -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/__init__.py: -------------------------------------------------------------------------------- 1 | from bossnas.models.siamese_supernets.siamese_supernets_hytra import SiameseSupernetsHyTra 2 | from bossnas.models.siamese_supernets.siamese_supernets_nats import SiameseSupernetsNATS 3 | from bossnas.models.siamese_supernets.siamese_supernets_mbconv import SiameseSupernetsMBConv 4 | from bossnas.models.siamese_supernets.boss_necks import NonLinearNeckSimCLRProject 5 | from bossnas.models.supernets.hytra_supernet import SupernetHyTra 6 | from bossnas.models.supernets.mbconv_supernet import SupernetMBConv 7 | from bossnas.models.supernets.nats_supernet import SupernetNATS 8 | -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/operations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/bossnas/models/operations/__init__.py -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/operations/adder/jit.py: -------------------------------------------------------------------------------- 1 | from torch.utils.cpp_extension import load 2 | 3 | conv_cuda = load( 4 | 'adder_cuda', ['adder_cuda.cpp', 'adder_cuda_kernel.cu'], verbose=True) 5 | help(adder_cuda) 6 | -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/operations/adder/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='adder', 6 | ext_modules=[ 7 | CUDAExtension('adder_cuda', [ 8 | 'adder_cuda.cpp', 9 | 'adder_cuda_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/operations/deepshift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/bossnas/models/operations/deepshift/__init__.py -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/operations/deepshift/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from .kernels import * -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/operations/deepshift/kernels/cpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from torch.utils import cpp_extension 3 | 4 | setup( 5 | name='deepshift_cpu', 6 | ext_modules=[ 7 | cpp_extension.CppExtension('deepshift_cpu', [ 8 | 'shift_cpu.cpp' 9 | ], extra_compile_args=['-fopenmp', '-O3']) 10 | ], 11 | cmdclass={ 12 | 'build_ext': cpp_extension.BuildExtension 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/operations/deepshift/kernels/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from .convert_to_unoptimized import * -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/operations/deepshift/kernels/cuda/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deepshift_cuda', 6 | ext_modules=[ 7 | CUDAExtension('deepshift_cuda', [ 8 | 'shift_cuda.cpp', 9 | 'shift.cu', 10 | ],extra_compile_args=['-O3']) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | setup( 17 | name='unoptimized_cuda_kernel', 18 | ext_modules=[ 19 | CUDAExtension('unoptimized_cuda_kernel', [ 20 | 'unoptimized_cuda.cpp', 21 | 'unoptimized_cuda_kernel.cu', 22 | ],extra_compile_args=['-O3']) 23 | ], 24 | cmdclass={ 25 | 'build_ext': BuildExtension 26 | }) -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/siamese_supernets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/bossnas/models/siamese_supernets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/supernets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/bossnas/models/supernets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/models/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/bossnas/models/utils/__init__.py -------------------------------------------------------------------------------- /CV/searching_v4/bossnas/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/bossnas/runners/__init__.py -------------------------------------------------------------------------------- /CV/searching_v4/configs/base.py: -------------------------------------------------------------------------------- 1 | train_cfg = {} 2 | test_cfg = {} 3 | optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb 4 | # yapf:disable 5 | log_config = dict( 6 | interval=50, 7 | hooks=[ 8 | dict(type='TextLoggerHook'), 9 | dict(type='TensorboardLoggerHook') 10 | ]) 11 | # yapf:enable 12 | # runtime settings 13 | dist_params = dict(backend='nccl') 14 | cudnn_benchmark = True 15 | log_level = 'INFO' 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | prefetch = False 20 | -------------------------------------------------------------------------------- /CV/searching_v4/evo_configs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/evo_configs.npy -------------------------------------------------------------------------------- /CV/searching_v4/work_dirs/cifar100/Supernet_v4-20210826-193803/trajectory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/work_dirs/cifar100/Supernet_v4-20210826-193803/trajectory.pdf -------------------------------------------------------------------------------- /CV/searching_v4/work_dirs/corr_v1_v4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v4/work_dirs/corr_v1_v4.pdf -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/__init__.py: -------------------------------------------------------------------------------- 1 | from bossnas.datasets import bossnas_data 2 | from bossnas.hooks import path_hook, val_hook 3 | import bossnas.models 4 | -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/apis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/bossnas/apis/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/bossnas/datasets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/hooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/bossnas/hooks/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/__init__.py: -------------------------------------------------------------------------------- 1 | from bossnas.models.siamese_supernets.siamese_supernets_hytra import SiameseSupernetsHyTra 2 | from bossnas.models.siamese_supernets.siamese_supernets_nats import SiameseSupernetsNATS 3 | from bossnas.models.siamese_supernets.siamese_supernets_mbconv import SiameseSupernetsMBConv 4 | from bossnas.models.siamese_supernets.boss_necks import NonLinearNeckSimCLRProject 5 | from bossnas.models.supernets.hytra_supernet import SupernetHyTra 6 | from bossnas.models.supernets.mbconv_supernet import SupernetMBConv 7 | from bossnas.models.supernets.nats_supernet import SupernetNATS 8 | -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/operations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/bossnas/models/operations/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/operations/adder/jit.py: -------------------------------------------------------------------------------- 1 | from torch.utils.cpp_extension import load 2 | 3 | conv_cuda = load( 4 | 'adder_cuda', ['adder_cuda.cpp', 'adder_cuda_kernel.cu'], verbose=True) 5 | help(adder_cuda) 6 | -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/operations/adder/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='adder', 6 | ext_modules=[ 7 | CUDAExtension('adder_cuda', [ 8 | 'adder_cuda.cpp', 9 | 'adder_cuda_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/operations/deepshift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/bossnas/models/operations/deepshift/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/operations/deepshift/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from .kernels import * -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/operations/deepshift/kernels/cpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from torch.utils import cpp_extension 3 | 4 | setup( 5 | name='deepshift_cpu', 6 | ext_modules=[ 7 | cpp_extension.CppExtension('deepshift_cpu', [ 8 | 'shift_cpu.cpp' 9 | ], extra_compile_args=['-fopenmp', '-O3']) 10 | ], 11 | cmdclass={ 12 | 'build_ext': cpp_extension.BuildExtension 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/operations/deepshift/kernels/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from .convert_to_unoptimized import * -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/operations/deepshift/kernels/cuda/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deepshift_cuda', 6 | ext_modules=[ 7 | CUDAExtension('deepshift_cuda', [ 8 | 'shift_cuda.cpp', 9 | 'shift.cu', 10 | ],extra_compile_args=['-O3']) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | setup( 17 | name='unoptimized_cuda_kernel', 18 | ext_modules=[ 19 | CUDAExtension('unoptimized_cuda_kernel', [ 20 | 'unoptimized_cuda.cpp', 21 | 'unoptimized_cuda_kernel.cu', 22 | ],extra_compile_args=['-O3']) 23 | ], 24 | cmdclass={ 25 | 'build_ext': BuildExtension 26 | }) -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/siamese_supernets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/bossnas/models/siamese_supernets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/supernets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/bossnas/models/supernets/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/models/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/bossnas/models/utils/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/bossnas/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/bossnas/runners/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/cal_params_flops/boss_candidates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/cal_params_flops/boss_candidates/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/cal_params_flops/boss_candidates/adder/jit.py: -------------------------------------------------------------------------------- 1 | from torch.utils.cpp_extension import load 2 | 3 | conv_cuda = load( 4 | 'adder_cuda', ['adder_cuda.cpp', 'adder_cuda_kernel.cu'], verbose=True) 5 | help(adder_cuda) 6 | -------------------------------------------------------------------------------- /CV/searching_v5/cal_params_flops/boss_candidates/adder/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='adder', 6 | ext_modules=[ 7 | CUDAExtension('adder_cuda', [ 8 | 'adder_cuda.cpp', 9 | 'adder_cuda_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | -------------------------------------------------------------------------------- /CV/searching_v5/cal_params_flops/boss_candidates/deepshift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/cal_params_flops/boss_candidates/deepshift/__init__.py -------------------------------------------------------------------------------- /CV/searching_v5/cal_params_flops/boss_candidates/deepshift/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from .kernels import * -------------------------------------------------------------------------------- /CV/searching_v5/cal_params_flops/boss_candidates/deepshift/kernels/cpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from torch.utils import cpp_extension 3 | 4 | setup( 5 | name='deepshift_cpu', 6 | ext_modules=[ 7 | cpp_extension.CppExtension('deepshift_cpu', [ 8 | 'shift_cpu.cpp' 9 | ], extra_compile_args=['-fopenmp', '-O3']) 10 | ], 11 | cmdclass={ 12 | 'build_ext': cpp_extension.BuildExtension 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /CV/searching_v5/cal_params_flops/boss_candidates/deepshift/kernels/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from .convert_to_unoptimized import * -------------------------------------------------------------------------------- /CV/searching_v5/cal_params_flops/boss_candidates/deepshift/kernels/cuda/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deepshift_cuda', 6 | ext_modules=[ 7 | CUDAExtension('deepshift_cuda', [ 8 | 'shift_cuda.cpp', 9 | 'shift.cu', 10 | ],extra_compile_args=['-O3']) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | setup( 17 | name='unoptimized_cuda_kernel', 18 | ext_modules=[ 19 | CUDAExtension('unoptimized_cuda_kernel', [ 20 | 'unoptimized_cuda.cpp', 21 | 'unoptimized_cuda_kernel.cu', 22 | ],extra_compile_args=['-O3']) 23 | ], 24 | cmdclass={ 25 | 'build_ext': BuildExtension 26 | }) -------------------------------------------------------------------------------- /CV/searching_v5/configs/base.py: -------------------------------------------------------------------------------- 1 | train_cfg = {} 2 | test_cfg = {} 3 | optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb 4 | # yapf:disable 5 | log_config = dict( 6 | interval=50, 7 | hooks=[ 8 | dict(type='TextLoggerHook'), 9 | dict(type='TensorboardLoggerHook') 10 | ]) 11 | # yapf:enable 12 | # runtime settings 13 | dist_params = dict(backend='nccl') 14 | cudnn_benchmark = True 15 | log_level = 'INFO' 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | prefetch = False 20 | -------------------------------------------------------------------------------- /CV/searching_v5/evo_configs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/evo_configs.npy -------------------------------------------------------------------------------- /CV/searching_v5/work_dirs/corr_v2_v5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/CV/searching_v5/work_dirs/corr_v2_v5.pdf -------------------------------------------------------------------------------- /NLP/en-de/build_modules.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## SBATCH config file 3 | #SBATCH --job-name=hy34_build_modules 4 | #SBATCH --partition=V100 5 | #SBATCH --reservation=ICLR21 6 | #SBATCH –-nodes=1 7 | #SBATCH --ntasks=1 8 | #SBATCH –-cpus-per-task=8 9 | #SBATCH --gres=gpu:1 10 | #SBATCH --nodelist=asimov-230 11 | #SBATCH –-signal=USR1@600 12 | #SBATCH --output=outputfile_%j.log 13 | 14 | ## env 15 | echo $CUDA_VISIBLE_DEVICES 16 | 17 | source /mnt/home/v_youhaoran/anaconda3/etc/profile.d/conda.sh 18 | source /mnt/home/v_youhaoran/.bashrc 19 | conda activate nas 20 | 21 | echo 'start' 22 | echo $CUDA_VISIBLE_DEVICES 23 | nvidia-smi 24 | 25 | cd fairseq/modules/lightadd_layer 26 | python setup.py install 27 | cd ../../.. 28 | cd fairseq/modules/lightconv_layer 29 | python setup.py install 30 | cd ../../.. 31 | cd fairseq/modules/lightshiftadd_layer 32 | python setup.py install 33 | cd ../../.. 34 | 35 | echo 'end' 36 | -------------------------------------------------------------------------------- /NLP/en-de/config.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/config.npy -------------------------------------------------------------------------------- /NLP/en-de/configs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/configs/.DS_Store -------------------------------------------------------------------------------- /NLP/en-de/configs/iwslt14.de-en/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/configs/iwslt14.de-en/.DS_Store -------------------------------------------------------------------------------- /NLP/en-de/configs/iwslt14.de-en/average_checkpoint.sh: -------------------------------------------------------------------------------- 1 | checkpoints_path=$1 2 | avg_checkpoints=${2:-10} 3 | 4 | model=average_model_$avg_checkpoints.pt 5 | output_path=$checkpoints_path 6 | 7 | python average_checkpoints.py \ 8 | --inputs $output_path \ 9 | --num-epoch-checkpoints $avg_checkpoints \ 10 | --output $output_path/$model 11 | -------------------------------------------------------------------------------- /NLP/en-de/configs/iwslt14.de-en/evo_search/iwslt14deen_titanxp.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/iwslt14deen_gpu_titanxp.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 200 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_iwslt14deen_super_space1.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/iwslt14.de-en/subtransformer/iwslt14deen_titanxp@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-de/configs/iwslt14.de-en/get_preprocessed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p data/binary/iwslt14_de_en 4 | 5 | wget -O data/binary/iwslt14_de_en/iwslt14_de_en.preprocessed.tgz 'https://www.dropbox.com/s/t5dqiamjdzahhfc/iwslt14_de_en.preproessed.tgz?dl=0' 6 | 7 | cd data/binary/iwslt14_de_en 8 | 9 | tar -xzvf iwslt14_de_en.preprocessed.tgz 10 | -------------------------------------------------------------------------------- /NLP/en-de/configs/iwslt14.de-en/latency_predictor/gpu_titanxp.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/iwslt14deen_gpu_titanxp_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/iwslt14deen_gpu_titanxp.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-de/configs/iwslt14.de-en/subtransformer/HAT_iwslt14deen_titanxp@109.0ms_bleu@34.5.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 1024, 1024, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 2048] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 4, 4, 4, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 4, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4, 4] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [2, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/iwslt14.de-en/subtransformer/HAT_iwslt14deen_titanxp@137.8ms_bleu@34.7.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 1024, 1024, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [1024, 2048, 2048, 1024] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 4, 4, 4, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 4, 4, 2] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4, 4, 4] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [2, -1, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/iwslt14.de-en/subtransformer/HAT_iwslt14deen_titanxp@168.8ms_bleu@34.8.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 1024, 1024, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [2048, 2048, 2048, 1024, 1024] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 4, 4, 4, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 4, 4, 2, 2] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4, 4, 4, 4] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [2, -1, -1, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/iwslt14.de-en/subtransformer/HAT_iwslt14deen_titanxp@45.6ms_bleu@33.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 1024, 1024, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [2048] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 2, 4, 4, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4] 12 | decoder-ende-attention-heads-all-subtransformer: [4] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/iwslt14.de-en/subtransformer/HAT_iwslt14deen_titanxp@74.5ms_bleu@34.2.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 1024, 1024, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [2048, 1024] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 4, 4, 4, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 2] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/configs/wmt14.en-de/.DS_Store -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/evo_search_100ms_update.out: -------------------------------------------------------------------------------- 1 | nohup: ignoring input 2 | python: can't open file 'evo_search.py': [Errno 2] No such file or directory 3 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/evo_search_120ms_update.out: -------------------------------------------------------------------------------- 1 | nohup: ignoring input 2 | python: can't open file 'evo_search.py': [Errno 2] No such file or directory 3 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/wmt14ende_2080Ti_shiftadd.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_2080Ti_shiftadd.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | feature-dim: 94 14 | new-feature-dim: 12 15 | # lat-norm should match with that when train the latency predictor 16 | lat-norm: 200 17 | # path to load supertransformer weights 18 | # restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 19 | restore-file: ./checkpoints/wmt14.en-de/supertransformer/space_shiftadd_act_1/checkpoint_last.pt 20 | 21 | 22 | # path to write subtransformer configs 23 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_gpu_2080Ti_shiftadd@200ms.yml 24 | # latency constraint 25 | latency-constraint: 200 26 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/wmt14ende_V100_shiftadd_v2_100.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v2.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | feature-dim: 94 14 | new-feature-dim: 12 15 | # lat-norm should match with that when train the latency predictor 16 | lat-norm: 200 17 | # path to load supertransformer weights 18 | # restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 19 | restore-file: ./checkpoints/wmt14.en-de/supertransformer/space_shiftadd_v2_act_1/checkpoint_last.pt 20 | 21 | 22 | # path to write subtransformer configs 23 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@100ms.yml 24 | # latency constraint 25 | latency-constraint: 100 26 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/wmt14ende_V100_shiftadd_v2_120.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v2.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | feature-dim: 94 14 | new-feature-dim: 12 15 | # lat-norm should match with that when train the latency predictor 16 | lat-norm: 200 17 | # path to load supertransformer weights 18 | # restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 19 | restore-file: ./checkpoints/wmt14.en-de/supertransformer/space_shiftadd_v2_act_1/checkpoint_last.pt 20 | 21 | 22 | # path to write subtransformer configs 23 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@120ms.yml 24 | # latency constraint 25 | latency-constraint: 120 26 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/wmt14ende_V100_shiftadd_v2_150.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v2.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | feature-dim: 94 14 | new-feature-dim: 12 15 | # lat-norm should match with that when train the latency predictor 16 | lat-norm: 200 17 | # path to load supertransformer weights 18 | # restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 19 | restore-file: ./checkpoints/wmt14.en-de/supertransformer/space_shiftadd_v2_act_1/checkpoint_last.pt 20 | 21 | 22 | # path to write subtransformer configs 23 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@150ms.yml 24 | # latency constraint 25 | latency-constraint: 150 26 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/wmt14ende_V100_shiftadd_v2_180.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v2.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | feature-dim: 94 14 | new-feature-dim: 12 15 | # lat-norm should match with that when train the latency predictor 16 | lat-norm: 200 17 | # path to load supertransformer weights 18 | # restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 19 | restore-file: ./checkpoints/wmt14.en-de/supertransformer/space_shiftadd_v2_act_1/checkpoint_last.pt 20 | 21 | 22 | # path to write subtransformer configs 23 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@180ms.yml 24 | # latency constraint 25 | latency-constraint: 180 26 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/wmt14ende_V100_shiftadd_v2_200.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v2.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | feature-dim: 94 14 | new-feature-dim: 12 15 | # lat-norm should match with that when train the latency predictor 16 | lat-norm: 200 17 | # path to load supertransformer weights 18 | # restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 19 | restore-file: ./checkpoints/wmt14.en-de/supertransformer/space_shiftadd_v2_act_1/checkpoint_last.pt 20 | 21 | 22 | # path to write subtransformer configs 23 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@200ms.yml 24 | # latency constraint 25 | latency-constraint: 200 26 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/wmt14ende_V100_shiftadd_v2_250.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v2.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | feature-dim: 94 14 | new-feature-dim: 12 15 | # lat-norm should match with that when train the latency predictor 16 | lat-norm: 200 17 | # path to load supertransformer weights 18 | # restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 19 | restore-file: ./checkpoints/wmt14.en-de/supertransformer/space_shiftadd_v2_act_1/checkpoint_last.pt 20 | 21 | 22 | # path to write subtransformer configs 23 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@250ms.yml 24 | # latency constraint 25 | latency-constraint: 250 26 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/wmt14ende_raspberrypi.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_cpu_raspberrypi.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 5000 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_raspberrypi@6s.yml 21 | # latency constraint 22 | latency-constraint: 6000 23 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/wmt14ende_titanxp.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_titanxp.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 200 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_titanxp@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/evo_search/wmt14ende_xeon.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_cpu_xeon.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 300 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_xeon@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/get_preprocessed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p data/binary/wmt16_en_de 4 | 5 | wget -O data/binary/wmt16_en_de/wmt16_en_de.preprocessed.tgz 'https://www.dropbox.com/s/axfwl1vawper8yk/wmt16_en_de.preprocessed.tgz?dl=0' 6 | 7 | cd data/binary/wmt16_en_de 8 | 9 | tar -xzvf wmt16_en_de.preprocessed.tgz 10 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/latency_dataset/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/configs/wmt14.en-de/latency_dataset/.DS_Store -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/latency_predictor/cpu_raspberrypi.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_cpu_raspberrypi_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 5000 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14ende_cpu_raspberrypi.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/latency_predictor/cpu_xeon.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_cpu_xeon_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 300 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14ende_cpu_xeon.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/latency_predictor/gpu_2080Ti_shiftadd.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_gpu_2080Ti_shiftadd.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 94 # 10 + 6*7*2 5 | new-feature-dim: 12 6 | hidden-dim: 400 7 | hidden-layer-num: 3 8 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_2080Ti_shiftadd.pt 9 | train-steps: 5000 10 | bsz: 128 11 | lr: 1e-5 12 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/latency_predictor/gpu_V100_shiftadd_v2.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_gpu_V100_shiftadd_v2.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 94 # 10 + 6*7*2 5 | new-feature-dim: 12 6 | hidden-dim: 400 7 | hidden-layer-num: 3 8 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v2.pt 9 | train-steps: 5000 10 | bsz: 128 11 | lr: 1e-5 12 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/latency_predictor/gpu_V100_shiftadd_v3.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_gpu_V100_shiftadd_v3.csv 2 | feature-norm: [1024, 6, 4096, 6, 1024, 6, 4096, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 94 # 10 + 6*7*2 5 | new-feature-dim: 12 6 | hidden-dim: 400 7 | hidden-layer-num: 3 8 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v3.pt 9 | train-steps: 5000 10 | bsz: 128 11 | lr: 1e-5 12 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/latency_predictor/gpu_titanxp.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_gpu_titanxp_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_titanxp.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/configs/wmt14.en-de/subtransformer/.DS_Store -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@137.9ms_bleu@25.8.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8] 12 | decoder-ende-attention-heads-all-subtransformer: [8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@204.2ms_bleu@27.6.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 4, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@278.7ms_bleu@27.9.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 8, 8, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@340.2ms_bleu@28.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 8, 8, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@369.6ms_bleu@28.2.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 4, 8, 8, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@450.9ms_bleu@28.5.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 6 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 8, 8, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4, 4, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, 1, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/wmt14ende@200ms_all_attention.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 1024, 3072, 1024, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 4, 4, 4, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [1, 1, 2, -1, 1] -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@100ms_all_attention.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 3072, 3072, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 4, 4, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [2, 1] 15 | 16 | encoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 17 | decoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 18 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@100ms_all_conv.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 3072, 3072, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 4, 4, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [2, 1] 15 | 16 | encoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 17 | decoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 18 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@120ms_all_attention.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 640 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 3072, 1024, 2048, 2048, 2048] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 4, 4, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8, 4] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [2, -1, -1] 15 | 16 | encoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 17 | decoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 18 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@120ms_all_conv.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 640 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 3072, 1024, 2048, 2048, 2048] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 4, 4, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8, 4] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [2, -1, -1] 15 | 16 | encoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 17 | decoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 18 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@180ms_all_attention.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 1024, 3072, 3072, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [4, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4, 8, 8] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [2, 2, -1, -1] 15 | 16 | encoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 17 | decoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 18 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/subtransformer/wmt14ende_titanxp@200ms.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 2048, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 4, 4, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8, 8, 8] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [1, 1, 1, -1] 15 | 16 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-de/supertransformer/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/configs/wmt14.en-de/supertransformer/.DS_Store -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/configs/wmt14.en-fr/.DS_Store -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/evo_search/wmt14enfr_raspberrypi.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_cpu_raspberrypi.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 5000 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14enfr_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-fr/subtransformer/wmt14enfr_raspberrypi@6s.yml 21 | # latency constraint 22 | latency-constraint: 6000 23 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/evo_search/wmt14enfr_titanxp.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_gpu_titanxp.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 200 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14enfr_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-fr/subtransformer/wmt14enfr_titanxp@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/evo_search/wmt14enfr_xeon.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_cpu_xeon.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 300 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14enfr_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-fr/subtransformer/wmt14enfr_xeon@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/get_preprocessed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p data/binary/wmt14_en_fr 4 | 5 | wget -O data/binary/wmt14_en_fr/wmt14_en_fr.preprocessed.tgz 'https://www.dropbox.com/s/mrs8efjrrnc61xi/wmt14_en_fr.preprocessed.tgz?dl=0' 6 | 7 | cd data/binary/wmt14_en_fr 8 | 9 | tar -xzvf wmt14_en_fr.preprocessed.tgz 10 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/latency_predictor/cpu_raspberrypi.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14enfr_cpu_raspberrypi_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 5000 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_cpu_raspberrypi.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/latency_predictor/cpu_xeon.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14enfr_cpu_xeon_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 300 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_cpu_xeon.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/latency_predictor/gpu_titanxp.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14enfr_gpu_titanxp_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_gpu_titanxp.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@4.3s_bleu@38.8.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [2048, 2048, 2048, 2048, 3072, 2048] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 4, 8, 4, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8] 12 | decoder-ende-attention-heads-all-subtransformer: [8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@5.3s_bleu@40.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@5.8s_bleu@40.6.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 2048] 5 | decoder-ffn-embed-dim-all-subtransformer: [1024, 2048, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [4, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, -1, 1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@6.9s_bleu@41.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [2048, 3072, 2048, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 8, 4, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, -1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@7.8s_bleu@41.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [2048, 3072, 3072, 3072, 2048] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@9.1s_bleu@41.8.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 6 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 4, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, -1, 1, 2, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_titanxp@132.9ms_bleu@40.7.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, -1, 1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_titanxp@168.3ms_bleu@41.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_titanxp@208.3ms_bleu@41.7.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2, 1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_titanxp@69.3ms_bleu@39.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8] 12 | decoder-ende-attention-heads-all-subtransformer: [8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_titanxp@94.9ms_bleu@40.0.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_xeon@154.7ms_bleu@39.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8] 12 | decoder-ende-attention-heads-all-subtransformer: [8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_xeon@208.8ms_bleu@40.0.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_xeon@329.4ms_bleu@41.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 2048, 3072, 3072, 3072, 2048] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, -1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_xeon@394.5ms_bleu@41.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2, 1, 2, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_xeon@442.0ms_bleu@41.7.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 6 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, 2, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt19.en-de/evo_search/wmt19ende_titanxp.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt19ende_gpu_titanxp.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 200 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt19ende_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt19.en-de/subtransformer/wmt19ende_titanxp@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt19.en-de/get_preprocessed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p data/binary/wmt19_en_de 4 | 5 | wget -O data/binary/wmt19_en_de/wmt19_en_de.preprocessed.tgz 'https://www.dropbox.com/s/q2st4ox2na9z5z2/wmt19_en_de.preprocessed.tgz?dl=0' 6 | 7 | cd data/binary/wmt19_en_de 8 | 9 | tar -xzvf wmt19_en_de.preprocessed.tgz 10 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt19.en-de/latency_predictor/gpu_titanxp.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt19ende_gpu_titanxp_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt19ende_gpu_titanxp.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@134.5ms_bleu@45.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@176.1ms_bleu@46.2.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@204.5ms_bleu@46.5.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@237.8ms_bleu@46.7.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 6 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 4, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, 1, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@55.7ms_bleu@42.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 640 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8] 12 | decoder-ende-attention-heads-all-subtransformer: [8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [1] 17 | -------------------------------------------------------------------------------- /NLP/en-de/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@93.2ms_bleu@44.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 640 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2] 17 | -------------------------------------------------------------------------------- /NLP/en-de/deepshift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/deepshift/__init__.py -------------------------------------------------------------------------------- /NLP/en-de/deepshift/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from .kernels import * -------------------------------------------------------------------------------- /NLP/en-de/deepshift/kernels/cpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from torch.utils import cpp_extension 3 | 4 | setup( 5 | name='deepshift_cpu', 6 | ext_modules=[ 7 | cpp_extension.CppExtension('deepshift_cpu', [ 8 | 'shift_cpu.cpp' 9 | ], extra_compile_args=['-fopenmp', '-O3']) 10 | ], 11 | cmdclass={ 12 | 'build_ext': cpp_extension.BuildExtension 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /NLP/en-de/deepshift/kernels/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from .convert_to_unoptimized import * -------------------------------------------------------------------------------- /NLP/en-de/deepshift/kernels/cuda/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deepshift_cuda', 6 | ext_modules=[ 7 | CUDAExtension('deepshift_cuda', [ 8 | 'shift_cuda.cpp', 9 | 'shift.cu', 10 | ],extra_compile_args=['-O3']) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | setup( 17 | name='unoptimized_cuda_kernel', 18 | ext_modules=[ 19 | CUDAExtension('unoptimized_cuda_kernel', [ 20 | 'unoptimized_cuda.cpp', 21 | 'unoptimized_cuda_kernel.cu', 22 | ],extra_compile_args=['-O3']) 23 | ], 24 | cmdclass={ 25 | 'build_ext': BuildExtension 26 | }) -------------------------------------------------------------------------------- /NLP/en-de/env.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=/mnt/archive/qiuling/transformer_shiftadd/hw_transformer_nas/fairseq/modules/lightadd_layer 2 | export PYTHONPATH=$PYTHONPATH:/mnt/archive/qiuling/transformer_shiftadd/hw_transformer_nas/fairseq/modules/lightconv_layer 3 | export PYTHONPATH=$PYTHONPATH:/mnt/archive/qiuling/transformer_shiftadd/hw_transformer_nas/fairseq/modules/lightshiftadd_layer 4 | echo $PYTHONPATH 5 | 6 | # conda activate nas 7 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/fairseq/.DS_Store -------------------------------------------------------------------------------- /NLP/en-de/fairseq/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | __all__ = ['pdb'] 7 | __version__ = '0.8.0' 8 | 9 | import fairseq.criterions # noqa 10 | import fairseq.models # noqa 11 | import fairseq.modules # noqa 12 | import fairseq.optim # noqa 13 | import fairseq.optim.lr_scheduler # noqa 14 | import fairseq.tasks # noqa 15 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/clib/libbleu/module.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2017-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | 12 | static PyMethodDef method_def[] = { 13 | {NULL, NULL, 0, NULL} 14 | }; 15 | 16 | static struct PyModuleDef module_def = { 17 | PyModuleDef_HEAD_INIT, 18 | "libbleu", /* name of module */ 19 | NULL, /* module documentation, may be NULL */ 20 | -1, /* size of per-interpreter state of the module, 21 | or -1 if the module keeps state in global variables. */ 22 | method_def 23 | }; 24 | 25 | 26 | #if PY_MAJOR_VERSION == 2 27 | PyMODINIT_FUNC init_libbleu() 28 | #else 29 | PyMODINIT_FUNC PyInit_libbleu() 30 | #endif 31 | { 32 | PyObject *m = PyModule_Create(&module_def); 33 | if (!m) { 34 | return NULL; 35 | } 36 | return m; 37 | } 38 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/criterions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import importlib 7 | import os 8 | 9 | from fairseq import registry 10 | from fairseq.criterions.fairseq_criterion import FairseqCriterion 11 | 12 | 13 | build_criterion, register_criterion, CRITERION_REGISTRY = registry.setup_registry( 14 | '--criterion', 15 | base_class=FairseqCriterion, 16 | default='cross_entropy', 17 | ) 18 | 19 | 20 | # automatically import any Python files in the criterions/ directory 21 | for file in os.listdir(os.path.dirname(__file__)): 22 | if file.endswith('.py') and not file.startswith('_'): 23 | module = file[:file.find('.py')] 24 | importlib.import_module('fairseq.criterions.' + module) 25 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/data/encoders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import importlib 8 | import os 9 | 10 | from fairseq import registry 11 | 12 | 13 | build_tokenizer, register_tokenizer, TOKENIZER_REGISTRY = registry.setup_registry( 14 | '--tokenizer', 15 | default=None, 16 | ) 17 | 18 | 19 | build_bpe, register_bpe, BPE_REGISTRY = registry.setup_registry( 20 | '--bpe', 21 | default=None, 22 | ) 23 | 24 | 25 | # automatically import any Python files in the encoders/ directory 26 | for file in os.listdir(os.path.dirname(__file__)): 27 | if file.endswith('.py') and not file.startswith('_'): 28 | module = file[:file.find('.py')] 29 | importlib.import_module('fairseq.data.encoders.' + module) 30 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/data/encoders/nltk_tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq.data.encoders import register_tokenizer 7 | 8 | 9 | @register_tokenizer('nltk') 10 | class NLTKTokenizer(object): 11 | 12 | def __init__(self, source_lang=None, target_lang=None): 13 | try: 14 | from nltk.tokenize import word_tokenize 15 | self.word_tokenize = word_tokenize 16 | except ImportError: 17 | raise ImportError('Please install nltk with: pip install nltk') 18 | 19 | def encode(self, x: str) -> str: 20 | return ' '.join(self.word_tokenize(x)) 21 | 22 | def decode(self, x: str) -> str: 23 | return x 24 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/data/encoders/space_tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import re 7 | 8 | from fairseq.data.encoders import register_tokenizer 9 | 10 | 11 | @register_tokenizer('space') 12 | class SpaceTokenizer(object): 13 | 14 | def __init__(self, source_lang=None, target_lang=None): 15 | self.space_tok = re.compile(r"\s+") 16 | 17 | def encode(self, x: str) -> str: 18 | return self.space_tok.sub(' ', x) 19 | 20 | def decode(self, x: str) -> str: 21 | return x 22 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/data/id_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | from . import FairseqDataset 9 | 10 | 11 | class IdDataset(FairseqDataset): 12 | 13 | def __getitem__(self, index): 14 | return index 15 | 16 | def __len__(self): 17 | return 0 18 | 19 | def collater(self, samples): 20 | return torch.tensor(samples) 21 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/data/strip_token_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from . import BaseWrapperDataset 7 | 8 | 9 | class StripTokenDataset(BaseWrapperDataset): 10 | 11 | def __init__(self, dataset, id_to_strip): 12 | super().__init__(dataset) 13 | self.id_to_strip = id_to_strip 14 | 15 | def __getitem__(self, index): 16 | item = self.dataset[index] 17 | return item[item.ne(self.id_to_strip)] 18 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/modules/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/fairseq/modules/.DS_Store -------------------------------------------------------------------------------- /NLP/en-de/fairseq/modules/gelu.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | """ 6 | See "Gaussian Error Linear Units (GELUs)" by Dan Hendrycks and Kevin Gimpel with 7 | the corresponding GitHub repo: https://github.com/hendrycks/GELUs 8 | """ 9 | 10 | import math 11 | 12 | import torch 13 | 14 | 15 | def gelu_accurate(x): 16 | if not hasattr(gelu_accurate, "_a"): 17 | gelu_accurate._a = math.sqrt(2 / math.pi) 18 | return 0.5 * x * (1 + torch.tanh(gelu_accurate._a * (x + 0.044715 * torch.pow(x, 3)))) 19 | 20 | 21 | def gelu(x: torch.Tensor) -> torch.Tensor: 22 | if hasattr(torch.nn.functional, 'gelu'): 23 | return torch.nn.functional.gelu(x.float()).type_as(x) 24 | else: 25 | return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) 26 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/modules/lightadd_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .lightadd_layer import LightaddLayer # noqa 7 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/modules/lightadd_layer/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import setup 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 9 | 10 | setup( 11 | name='lightadd_layer', 12 | ext_modules=[ 13 | CUDAExtension('lightadd_cuda', [ 14 | 'lightadd_cuda.cpp', 15 | 'lightadd_cuda_kernel.cu', 16 | ]), 17 | ], 18 | cmdclass={ 19 | 'build_ext': BuildExtension 20 | }) 21 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/modules/lightconv_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .lightconv_layer import LightconvLayer # noqa 7 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/modules/lightconv_layer/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import setup 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 9 | 10 | setup( 11 | name='lightconv_layer', 12 | ext_modules=[ 13 | CUDAExtension('lightconv_cuda', [ 14 | 'lightconv_cuda.cpp', 15 | 'lightconv_cuda_kernel.cu', 16 | ]), 17 | ], 18 | cmdclass={ 19 | 'build_ext': BuildExtension 20 | }) 21 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/modules/lightshiftadd_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .lightshiftadd_layer import LightshiftaddLayer # noqa 7 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/modules/lightshiftadd_layer/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import setup 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 9 | 10 | setup( 11 | name='lightshiftadd_layer', 12 | ext_modules=[ 13 | CUDAExtension('lightshiftadd_cuda', [ 14 | 'lightshiftadd_cuda.cpp', 15 | 'lightshiftadd_cuda_kernel.cu', 16 | ]), 17 | ], 18 | cmdclass={ 19 | 'build_ext': BuildExtension 20 | }) 21 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/modules/unfold.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn.functional as F 7 | 8 | 9 | def unfold1d(x, kernel_size, padding_l, pad_value=0): 10 | '''unfold T x B x C to T x B x C x K''' 11 | if kernel_size > 1: 12 | T, B, C = x.size() 13 | x = F.pad(x, (0, 0, 0, 0, padding_l, kernel_size - 1 - padding_l), value=pad_value) 14 | x = x.as_strided((T, B, C, kernel_size), (B*C, C, 1, B*C)) 15 | else: 16 | x = x.unsqueeze(3) 17 | return x 18 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/optim/lr_scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import importlib 7 | import os 8 | 9 | from fairseq import registry 10 | from fairseq.optim.lr_scheduler.fairseq_lr_scheduler import FairseqLRScheduler 11 | 12 | 13 | build_lr_scheduler, register_lr_scheduler, LR_SCHEDULER_REGISTRY = registry.setup_registry( 14 | '--lr-scheduler', 15 | base_class=FairseqLRScheduler, 16 | default='fixed', 17 | ) 18 | 19 | # automatically import any Python files in the optim/lr_scheduler/ directory 20 | for file in os.listdir(os.path.dirname(__file__)): 21 | if file.endswith('.py') and not file.startswith('_'): 22 | module = file[:file.find('.py')] 23 | importlib.import_module('fairseq.optim.lr_scheduler.' + module) 24 | -------------------------------------------------------------------------------- /NLP/en-de/fairseq/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import re 7 | 8 | SPACE_NORMALIZER = re.compile(r"\s+") 9 | 10 | 11 | def tokenize_line(line): 12 | line = SPACE_NORMALIZER.sub(" ", line) 13 | line = line.strip() 14 | return line.split() 15 | -------------------------------------------------------------------------------- /NLP/en-de/figures/BLEUs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/BLEUs.pdf -------------------------------------------------------------------------------- /NLP/en-de/figures/BLEUs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/BLEUs.png -------------------------------------------------------------------------------- /NLP/en-de/figures/config_2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/config_2.npy -------------------------------------------------------------------------------- /NLP/en-de/figures/config_200.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/config_200.npy -------------------------------------------------------------------------------- /NLP/en-de/figures/config_2000.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/config_2000.npy -------------------------------------------------------------------------------- /NLP/en-de/figures/energy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/energy.pdf -------------------------------------------------------------------------------- /NLP/en-de/figures/energy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/energy.png -------------------------------------------------------------------------------- /NLP/en-de/figures/latency.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/latency.pdf -------------------------------------------------------------------------------- /NLP/en-de/figures/latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/latency.png -------------------------------------------------------------------------------- /NLP/en-de/figures/latency_2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/latency_2.npy -------------------------------------------------------------------------------- /NLP/en-de/figures/latency_200.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/latency_200.npy -------------------------------------------------------------------------------- /NLP/en-de/figures/latency_2000.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/latency_2000.npy -------------------------------------------------------------------------------- /NLP/en-de/figures/loss_2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/loss_2.npy -------------------------------------------------------------------------------- /NLP/en-de/figures/loss_200.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/loss_200.npy -------------------------------------------------------------------------------- /NLP/en-de/figures/loss_2000.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/loss_2000.npy -------------------------------------------------------------------------------- /NLP/en-de/figures/loss_vs_lat_200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/loss_vs_lat_200.png -------------------------------------------------------------------------------- /NLP/en-de/figures/loss_vs_lat_2000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/loss_vs_lat_2000.png -------------------------------------------------------------------------------- /NLP/en-de/figures/loss_vs_lat_2010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/loss_vs_lat_2010.png -------------------------------------------------------------------------------- /NLP/en-de/figures/pred_vs_real.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/pred_vs_real.pdf -------------------------------------------------------------------------------- /NLP/en-de/figures/trajectory_350.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/trajectory_350.pdf -------------------------------------------------------------------------------- /NLP/en-de/figures/trajectory_comp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/trajectory_comp.pdf -------------------------------------------------------------------------------- /NLP/en-de/figures/trajectory_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/figures/trajectory_comp.png -------------------------------------------------------------------------------- /NLP/en-de/last_bleus.txt: -------------------------------------------------------------------------------- 1 | BLEU4 = 26.90, 58.1/32.4/20.5/13.5 (BP=1.000, ratio=1.006, syslen=64881, reflen=64496) -------------------------------------------------------------------------------- /NLP/en-de/latency.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/latency.npy -------------------------------------------------------------------------------- /NLP/en-de/latency_dataset/predictors/wmt14ende_gpu_2080Ti_shiftadd.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/latency_dataset/predictors/wmt14ende_gpu_2080Ti_shiftadd.pt -------------------------------------------------------------------------------- /NLP/en-de/latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v3.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v3.pt -------------------------------------------------------------------------------- /NLP/en-de/latency_dataset/wmt14ende_cpu_xeon_shiftadd.csv: -------------------------------------------------------------------------------- 1 | encoder_embed_dim,encoder_layer_num,encoder_ffn_embed_dim_avg,encoder_self_attention_heads_avg,decoder_embed_dim,decoder_layer_num,decoder_ffn_embed_dim_avg,decoder_self_attention_heads_avg,decoder_ende_attention_heads_avg,decoder_arbitrary_ende_attn_avg,encoder_block_types,decoder_block_types,latency_mean_encoder,latency_mean_decoder,latency_std_encoder,latency_std_decoder 2 | 512,6,768.0,3.6666666666666665,640,6,1792.0,6.333333333333333,4.666666666666667,1.8333333333333333,[['self_attention'], ['self_attention+lightweight_conv'], ['self_attention+lightweight_shiftadd'], ['self_attention+lightweight_conv'], ['self_attention'], ['lightweight_add']],['lightweight_add', 'lightweight_conv', 'lightweight_add', 'self_attention+lightweight_add', 'lightweight_add', 'self_attention+lightweight_conv'], -------------------------------------------------------------------------------- /NLP/en-de/loss.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-de/loss.npy -------------------------------------------------------------------------------- /NLP/en-fr/build_modules.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## SBATCH config file 3 | #SBATCH --job-name=hy34_build_modules 4 | #SBATCH --partition=V100 5 | #SBATCH --reservation=ICLR21 6 | #SBATCH –-nodes=1 7 | #SBATCH --ntasks=1 8 | #SBATCH –-cpus-per-task=8 9 | #SBATCH --gres=gpu:1 10 | #SBATCH --nodelist=asimov-230 11 | #SBATCH –-signal=USR1@600 12 | #SBATCH --output=outputfile_%j.log 13 | 14 | ## env 15 | echo $CUDA_VISIBLE_DEVICES 16 | 17 | source /mnt/home/v_youhaoran/anaconda3/etc/profile.d/conda.sh 18 | source /mnt/home/v_youhaoran/.bashrc 19 | conda activate nas 20 | 21 | echo 'start' 22 | echo $CUDA_VISIBLE_DEVICES 23 | nvidia-smi 24 | 25 | cd fairseq/modules/lightadd_layer 26 | python setup.py install 27 | cd ../../.. 28 | cd fairseq/modules/lightconv_layer 29 | python setup.py install 30 | cd ../../.. 31 | cd fairseq/modules/lightshiftadd_layer 32 | python setup.py install 33 | cd ../../.. 34 | 35 | echo 'end' 36 | -------------------------------------------------------------------------------- /NLP/en-fr/config.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/config.npy -------------------------------------------------------------------------------- /NLP/en-fr/configs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/configs/.DS_Store -------------------------------------------------------------------------------- /NLP/en-fr/configs/iwslt14.de-en/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/configs/iwslt14.de-en/.DS_Store -------------------------------------------------------------------------------- /NLP/en-fr/configs/iwslt14.de-en/average_checkpoint.sh: -------------------------------------------------------------------------------- 1 | checkpoints_path=$1 2 | avg_checkpoints=${2:-10} 3 | 4 | model=average_model_$avg_checkpoints.pt 5 | output_path=$checkpoints_path 6 | 7 | python average_checkpoints.py \ 8 | --inputs $output_path \ 9 | --num-epoch-checkpoints $avg_checkpoints \ 10 | --output $output_path/$model 11 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/iwslt14.de-en/evo_search/iwslt14deen_titanxp.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/iwslt14deen_gpu_titanxp.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 200 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_iwslt14deen_super_space1.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/iwslt14.de-en/subtransformer/iwslt14deen_titanxp@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/iwslt14.de-en/get_preprocessed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p data/binary/iwslt14_de_en 4 | 5 | wget -O data/binary/iwslt14_de_en/iwslt14_de_en.preprocessed.tgz 'https://www.dropbox.com/s/t5dqiamjdzahhfc/iwslt14_de_en.preproessed.tgz?dl=0' 6 | 7 | cd data/binary/iwslt14_de_en 8 | 9 | tar -xzvf iwslt14_de_en.preprocessed.tgz 10 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/iwslt14.de-en/latency_predictor/gpu_titanxp.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/iwslt14deen_gpu_titanxp_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/iwslt14deen_gpu_titanxp.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/iwslt14.de-en/subtransformer/HAT_iwslt14deen_titanxp@109.0ms_bleu@34.5.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 1024, 1024, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 2048] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 4, 4, 4, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 4, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4, 4] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [2, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/iwslt14.de-en/subtransformer/HAT_iwslt14deen_titanxp@137.8ms_bleu@34.7.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 1024, 1024, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [1024, 2048, 2048, 1024] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 4, 4, 4, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 4, 4, 2] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4, 4, 4] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [2, -1, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/iwslt14.de-en/subtransformer/HAT_iwslt14deen_titanxp@168.8ms_bleu@34.8.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 1024, 1024, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [2048, 2048, 2048, 1024, 1024] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 4, 4, 4, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 4, 4, 2, 2] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4, 4, 4, 4] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [2, -1, -1, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/iwslt14.de-en/subtransformer/HAT_iwslt14deen_titanxp@45.6ms_bleu@33.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 1024, 1024, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [2048] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 2, 4, 4, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4] 12 | decoder-ende-attention-heads-all-subtransformer: [4] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/iwslt14.de-en/subtransformer/HAT_iwslt14deen_titanxp@74.5ms_bleu@34.2.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 1024, 1024, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [2048, 1024] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 4, 4, 4, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 2] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/configs/wmt14.en-de/.DS_Store -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/evo_search/evo_search_100ms_update.out: -------------------------------------------------------------------------------- 1 | nohup: ignoring input 2 | python: can't open file 'evo_search.py': [Errno 2] No such file or directory 3 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/evo_search/evo_search_120ms_update.out: -------------------------------------------------------------------------------- 1 | nohup: ignoring input 2 | python: can't open file 'evo_search.py': [Errno 2] No such file or directory 3 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/evo_search/wmt14ende_2080Ti_shiftadd.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_2080Ti_shiftadd.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | feature-dim: 94 14 | new-feature-dim: 12 15 | # lat-norm should match with that when train the latency predictor 16 | lat-norm: 200 17 | # path to load supertransformer weights 18 | # restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 19 | restore-file: ./checkpoints/wmt14.en-de/supertransformer/space_shiftadd_act_1/checkpoint_last.pt 20 | 21 | 22 | # path to write subtransformer configs 23 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_gpu_2080Ti_shiftadd@200ms.yml 24 | # latency constraint 25 | latency-constraint: 200 26 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/evo_search/wmt14ende_raspberrypi.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_cpu_raspberrypi.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 5000 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_raspberrypi@6s.yml 21 | # latency constraint 22 | latency-constraint: 6000 23 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/evo_search/wmt14ende_titanxp.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_titanxp.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 200 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_titanxp@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/evo_search/wmt14ende_xeon.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14ende_cpu_xeon.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 300 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14ende_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-de/subtransformer/wmt14ende_xeon@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/get_preprocessed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p data/binary/wmt16_en_de 4 | 5 | wget -O data/binary/wmt16_en_de/wmt16_en_de.preprocessed.tgz 'https://www.dropbox.com/s/axfwl1vawper8yk/wmt16_en_de.preprocessed.tgz?dl=0' 6 | 7 | cd data/binary/wmt16_en_de 8 | 9 | tar -xzvf wmt16_en_de.preprocessed.tgz 10 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/latency_dataset/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/configs/wmt14.en-de/latency_dataset/.DS_Store -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/latency_predictor/cpu_raspberrypi.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_cpu_raspberrypi_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 5000 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14ende_cpu_raspberrypi.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/latency_predictor/cpu_xeon.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_cpu_xeon_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 300 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14ende_cpu_xeon.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/latency_predictor/gpu_2080Ti_shiftadd.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_gpu_2080Ti_shiftadd.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 94 # 10 + 6*7*2 5 | new-feature-dim: 12 6 | hidden-dim: 400 7 | hidden-layer-num: 3 8 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_2080Ti_shiftadd.pt 9 | train-steps: 5000 10 | bsz: 128 11 | lr: 1e-5 12 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/latency_predictor/gpu_V100_shiftadd_v2.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_gpu_V100_shiftadd_v2.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 94 # 10 + 6*7*2 5 | new-feature-dim: 12 6 | hidden-dim: 400 7 | hidden-layer-num: 3 8 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v2.pt 9 | train-steps: 5000 10 | bsz: 128 11 | lr: 1e-5 12 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/latency_predictor/gpu_V100_shiftadd_v3.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_gpu_V100_shiftadd_v3.csv 2 | feature-norm: [1024, 6, 4096, 6, 1024, 6, 4096, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 94 # 10 + 6*7*2 5 | new-feature-dim: 12 6 | hidden-dim: 400 7 | hidden-layer-num: 3 8 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_V100_shiftadd_v3.pt 9 | train-steps: 5000 10 | bsz: 128 11 | lr: 1e-5 12 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/latency_predictor/gpu_titanxp.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14ende_gpu_titanxp_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14ende_gpu_titanxp.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/configs/wmt14.en-de/subtransformer/.DS_Store -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@137.9ms_bleu@25.8.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8] 12 | decoder-ende-attention-heads-all-subtransformer: [8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@204.2ms_bleu@27.6.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 4, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@278.7ms_bleu@27.9.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 8, 8, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@340.2ms_bleu@28.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 8, 8, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@369.6ms_bleu@28.2.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 4, 8, 8, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/HAT_wmt14ende_xeon@450.9ms_bleu@28.5.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 6 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 8, 8, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4, 4, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, 1, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/wmt14ende@200ms_all_attention.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 1024, 3072, 1024, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 4, 4, 4, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [1, 1, 2, -1, 1] -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@100ms_all_attention.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 3072, 3072, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 4, 4, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [2, 1] 15 | 16 | encoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 17 | decoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 18 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@100ms_all_conv.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 1024, 3072, 3072, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 4, 4, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [2, 1] 15 | 16 | encoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 17 | decoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 18 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@120ms_all_attention.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 640 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 3072, 1024, 2048, 2048, 2048] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 4, 4, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8, 4] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [2, -1, -1] 15 | 16 | encoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 17 | decoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 18 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@120ms_all_conv.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 640 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [1024, 3072, 1024, 2048, 2048, 2048] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 4, 4, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [4, 4, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8, 4] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [2, -1, -1] 15 | 16 | encoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 17 | decoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 18 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/wmt14ende_gpu_V100_shiftadd_v2@180ms_all_attention.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 1024, 3072, 3072, 1024, 1024] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [4, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4, 8, 8] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [2, 2, -1, -1] 15 | 16 | encoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 17 | decoder-block-types-all-subtransformer: [self_attention, self_attention, self_attention, self_attention, self_attention, self_attention] 18 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/subtransformer/wmt14ende_titanxp@200ms.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 2048, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 4, 4, 8, 4] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8, 8, 8] 13 | 14 | decoder-arbitrary-ende-attn-all-subtransformer: [1, 1, 1, -1] 15 | 16 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-de/supertransformer/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/configs/wmt14.en-de/supertransformer/.DS_Store -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/configs/wmt14.en-fr/.DS_Store -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/evo_search/wmt14enfr_raspberrypi.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_cpu_raspberrypi.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 5000 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14enfr_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-fr/subtransformer/wmt14enfr_raspberrypi@6s.yml 21 | # latency constraint 22 | latency-constraint: 6000 23 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/evo_search/wmt14enfr_titanxp.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_gpu_titanxp.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 200 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14enfr_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-fr/subtransformer/wmt14enfr_titanxp@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/evo_search/wmt14enfr_xeon.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_cpu_xeon.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 300 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt14enfr_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt14.en-fr/subtransformer/wmt14enfr_xeon@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/get_preprocessed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p data/binary/wmt14_en_fr 4 | 5 | wget -O data/binary/wmt14_en_fr/wmt14_en_fr.preprocessed.tgz 'https://www.dropbox.com/s/mrs8efjrrnc61xi/wmt14_en_fr.preprocessed.tgz?dl=0' 6 | 7 | cd data/binary/wmt14_en_fr 8 | 9 | tar -xzvf wmt14_en_fr.preprocessed.tgz 10 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/latency_predictor/cpu_raspberrypi.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14enfr_cpu_raspberrypi_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 5000 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_cpu_raspberrypi.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/latency_predictor/cpu_xeon.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14enfr_cpu_xeon_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 300 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_cpu_xeon.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/latency_predictor/gpu_V100_shiftadd_v1.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14enfr_gpu_V100_shiftadd_v1.csv 2 | feature-norm: [1024, 6, 4096, 6, 1024, 6, 3072, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 94 # 10 + 6*7*2 5 | new-feature-dim: 12 6 | hidden-dim: 400 7 | hidden-layer-num: 3 8 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_gpu_V100_shiftadd_v1.pt 9 | train-steps: 5000 10 | bsz: 128 11 | lr: 1e-5 12 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/latency_predictor/gpu_titanxp.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt14enfr_gpu_titanxp_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt14enfr_gpu_titanxp.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@4.3s_bleu@38.8.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [2048, 2048, 2048, 2048, 3072, 2048] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 4, 8, 4, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8] 12 | decoder-ende-attention-heads-all-subtransformer: [8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@5.3s_bleu@40.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@5.8s_bleu@40.6.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 2048] 5 | decoder-ffn-embed-dim-all-subtransformer: [1024, 2048, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [4, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, -1, 1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@6.9s_bleu@41.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [2048, 3072, 2048, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 4, 8, 4, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 4, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, -1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@7.8s_bleu@41.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [2048, 3072, 3072, 3072, 2048] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_raspberrypi@9.1s_bleu@41.8.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 6 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 4, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, -1, 1, 2, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_xeon@154.7ms_bleu@39.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8] 12 | decoder-ende-attention-heads-all-subtransformer: [8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_xeon@208.8ms_bleu@40.0.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_xeon@329.4ms_bleu@41.1.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 2048, 3072, 3072, 3072, 2048] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, -1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_xeon@394.5ms_bleu@41.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2, 1, 2, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt14.en-fr/subtransformer/HAT_wmt14enfr_xeon@442.0ms_bleu@41.7.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 6 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, 2, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt19.en-de/evo_search/wmt19ende_titanxp.yml: -------------------------------------------------------------------------------- 1 | evo-iter: 30 2 | population-size: 125 3 | parent-size: 25 4 | mutation-size: 50 5 | crossover-size: 50 6 | mutation-prob: 0.3 7 | 8 | 9 | # path to load latency predictor 10 | ckpt-path: ./latency_dataset/predictors/wmt19ende_gpu_titanxp.pt 11 | # feature-norm should match with that when train the latency predictor 12 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 13 | # lat-norm should match with that when train the latency predictor 14 | lat-norm: 200 15 | # path to load supertransformer weights 16 | restore-file: ./downloaded_models/HAT_wmt19ende_super_space0.pt 17 | 18 | 19 | # path to write subtransformer configs 20 | write-config-path: configs/wmt19.en-de/subtransformer/wmt19ende_titanxp@200ms.yml 21 | # latency constraint 22 | latency-constraint: 200 23 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt19.en-de/get_preprocessed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p data/binary/wmt19_en_de 4 | 5 | wget -O data/binary/wmt19_en_de/wmt19_en_de.preprocessed.tgz 'https://www.dropbox.com/s/q2st4ox2na9z5z2/wmt19_en_de.preprocessed.tgz?dl=0' 6 | 7 | cd data/binary/wmt19_en_de 8 | 9 | tar -xzvf wmt19_en_de.preprocessed.tgz 10 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt19.en-de/latency_predictor/gpu_titanxp.yml: -------------------------------------------------------------------------------- 1 | lat-dataset-path: ./latency_dataset/wmt19ende_gpu_titanxp_all.csv 2 | feature-norm: [640, 6, 2048, 6, 640, 6, 2048, 6, 6, 2] 3 | lat-norm: 200 4 | feature-dim: 10 5 | hidden-dim: 400 6 | hidden-layer-num: 3 7 | ckpt-path: ./latency_dataset/predictors/wmt19ende_gpu_titanxp.pt 8 | train-steps: 5000 9 | bsz: 128 10 | lr: 1e-5 11 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@134.5ms_bleu@45.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 3 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@176.1ms_bleu@46.2.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 4 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@204.5ms_bleu@46.5.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 5 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@237.8ms_bleu@46.7.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 512 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 6 9 | 10 | encoder-self-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 4, 4] 12 | decoder-ende-attention-heads-all-subtransformer: [4, 8, 8, 8, 8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 1, 1, 1, -1, -1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@55.7ms_bleu@42.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 640 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 1 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8] 12 | decoder-ende-attention-heads-all-subtransformer: [8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [1] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/configs/wmt19.en-de/subtransformer/HAT_wmt19ende_titanxp@93.2ms_bleu@44.4.yml: -------------------------------------------------------------------------------- 1 | encoder-embed-dim-subtransformer: 640 2 | decoder-embed-dim-subtransformer: 512 3 | 4 | encoder-ffn-embed-dim-all-subtransformer: [3072, 3072, 3072, 3072, 3072, 3072] 5 | decoder-ffn-embed-dim-all-subtransformer: [3072, 3072] 6 | 7 | encoder-layer-num-subtransformer: 6 8 | decoder-layer-num-subtransformer: 2 9 | 10 | encoder-self-attention-heads-all-subtransformer: [8, 8, 8, 8, 8, 8] 11 | decoder-self-attention-heads-all-subtransformer: [8, 8] 12 | decoder-ende-attention-heads-all-subtransformer: [8, 8] 13 | 14 | # for arbitrary encoder decoder attention. -1 means attending to last one encoder layer 15 | # 1 means last two encoder layers, 2 means last three encoder layers 16 | decoder-arbitrary-ende-attn-all-subtransformer: [-1, 2] 17 | -------------------------------------------------------------------------------- /NLP/en-fr/deepshift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/deepshift/__init__.py -------------------------------------------------------------------------------- /NLP/en-fr/deepshift/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | from .kernels import * -------------------------------------------------------------------------------- /NLP/en-fr/deepshift/kernels/cpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from torch.utils import cpp_extension 3 | 4 | setup( 5 | name='deepshift_cpu', 6 | ext_modules=[ 7 | cpp_extension.CppExtension('deepshift_cpu', [ 8 | 'shift_cpu.cpp' 9 | ], extra_compile_args=['-fopenmp', '-O3']) 10 | ], 11 | cmdclass={ 12 | 'build_ext': cpp_extension.BuildExtension 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /NLP/en-fr/deepshift/kernels/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from .convert_to_unoptimized import * -------------------------------------------------------------------------------- /NLP/en-fr/deepshift/kernels/cuda/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deepshift_cuda', 6 | ext_modules=[ 7 | CUDAExtension('deepshift_cuda', [ 8 | 'shift_cuda.cpp', 9 | 'shift.cu', 10 | ],extra_compile_args=['-O3']) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | setup( 17 | name='unoptimized_cuda_kernel', 18 | ext_modules=[ 19 | CUDAExtension('unoptimized_cuda_kernel', [ 20 | 'unoptimized_cuda.cpp', 21 | 'unoptimized_cuda_kernel.cu', 22 | ],extra_compile_args=['-O3']) 23 | ], 24 | cmdclass={ 25 | 'build_ext': BuildExtension 26 | }) -------------------------------------------------------------------------------- /NLP/en-fr/env.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=/mnt/archive/qiuling/transformer_shiftadd/hw_transformer_nas/fairseq/modules/lightadd_layer 2 | export PYTHONPATH=$PYTHONPATH:/mnt/archive/qiuling/transformer_shiftadd/hw_transformer_nas/fairseq/modules/lightconv_layer 3 | export PYTHONPATH=$PYTHONPATH:/mnt/archive/qiuling/transformer_shiftadd/hw_transformer_nas/fairseq/modules/lightshiftadd_layer 4 | echo $PYTHONPATH 5 | 6 | # conda activate nas 7 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/fairseq/.DS_Store -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | __all__ = ['pdb'] 7 | __version__ = '0.8.0' 8 | 9 | import fairseq.criterions # noqa 10 | import fairseq.models # noqa 11 | import fairseq.modules # noqa 12 | import fairseq.optim # noqa 13 | import fairseq.optim.lr_scheduler # noqa 14 | import fairseq.tasks # noqa 15 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/clib/libbleu/module.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2017-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | 12 | static PyMethodDef method_def[] = { 13 | {NULL, NULL, 0, NULL} 14 | }; 15 | 16 | static struct PyModuleDef module_def = { 17 | PyModuleDef_HEAD_INIT, 18 | "libbleu", /* name of module */ 19 | NULL, /* module documentation, may be NULL */ 20 | -1, /* size of per-interpreter state of the module, 21 | or -1 if the module keeps state in global variables. */ 22 | method_def 23 | }; 24 | 25 | 26 | #if PY_MAJOR_VERSION == 2 27 | PyMODINIT_FUNC init_libbleu() 28 | #else 29 | PyMODINIT_FUNC PyInit_libbleu() 30 | #endif 31 | { 32 | PyObject *m = PyModule_Create(&module_def); 33 | if (!m) { 34 | return NULL; 35 | } 36 | return m; 37 | } 38 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/criterions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import importlib 7 | import os 8 | 9 | from fairseq import registry 10 | from fairseq.criterions.fairseq_criterion import FairseqCriterion 11 | 12 | 13 | build_criterion, register_criterion, CRITERION_REGISTRY = registry.setup_registry( 14 | '--criterion', 15 | base_class=FairseqCriterion, 16 | default='cross_entropy', 17 | ) 18 | 19 | 20 | # automatically import any Python files in the criterions/ directory 21 | for file in os.listdir(os.path.dirname(__file__)): 22 | if file.endswith('.py') and not file.startswith('_'): 23 | module = file[:file.find('.py')] 24 | importlib.import_module('fairseq.criterions.' + module) 25 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/data/encoders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import importlib 8 | import os 9 | 10 | from fairseq import registry 11 | 12 | 13 | build_tokenizer, register_tokenizer, TOKENIZER_REGISTRY = registry.setup_registry( 14 | '--tokenizer', 15 | default=None, 16 | ) 17 | 18 | 19 | build_bpe, register_bpe, BPE_REGISTRY = registry.setup_registry( 20 | '--bpe', 21 | default=None, 22 | ) 23 | 24 | 25 | # automatically import any Python files in the encoders/ directory 26 | for file in os.listdir(os.path.dirname(__file__)): 27 | if file.endswith('.py') and not file.startswith('_'): 28 | module = file[:file.find('.py')] 29 | importlib.import_module('fairseq.data.encoders.' + module) 30 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/data/encoders/nltk_tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from fairseq.data.encoders import register_tokenizer 7 | 8 | 9 | @register_tokenizer('nltk') 10 | class NLTKTokenizer(object): 11 | 12 | def __init__(self, source_lang=None, target_lang=None): 13 | try: 14 | from nltk.tokenize import word_tokenize 15 | self.word_tokenize = word_tokenize 16 | except ImportError: 17 | raise ImportError('Please install nltk with: pip install nltk') 18 | 19 | def encode(self, x: str) -> str: 20 | return ' '.join(self.word_tokenize(x)) 21 | 22 | def decode(self, x: str) -> str: 23 | return x 24 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/data/encoders/space_tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import re 7 | 8 | from fairseq.data.encoders import register_tokenizer 9 | 10 | 11 | @register_tokenizer('space') 12 | class SpaceTokenizer(object): 13 | 14 | def __init__(self, source_lang=None, target_lang=None): 15 | self.space_tok = re.compile(r"\s+") 16 | 17 | def encode(self, x: str) -> str: 18 | return self.space_tok.sub(' ', x) 19 | 20 | def decode(self, x: str) -> str: 21 | return x 22 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/data/id_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | 8 | from . import FairseqDataset 9 | 10 | 11 | class IdDataset(FairseqDataset): 12 | 13 | def __getitem__(self, index): 14 | return index 15 | 16 | def __len__(self): 17 | return 0 18 | 19 | def collater(self, samples): 20 | return torch.tensor(samples) 21 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/data/strip_token_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from . import BaseWrapperDataset 7 | 8 | 9 | class StripTokenDataset(BaseWrapperDataset): 10 | 11 | def __init__(self, dataset, id_to_strip): 12 | super().__init__(dataset) 13 | self.id_to_strip = id_to_strip 14 | 15 | def __getitem__(self, index): 16 | item = self.dataset[index] 17 | return item[item.ne(self.id_to_strip)] 18 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/modules/gelu.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | """ 6 | See "Gaussian Error Linear Units (GELUs)" by Dan Hendrycks and Kevin Gimpel with 7 | the corresponding GitHub repo: https://github.com/hendrycks/GELUs 8 | """ 9 | 10 | import math 11 | 12 | import torch 13 | 14 | 15 | def gelu_accurate(x): 16 | if not hasattr(gelu_accurate, "_a"): 17 | gelu_accurate._a = math.sqrt(2 / math.pi) 18 | return 0.5 * x * (1 + torch.tanh(gelu_accurate._a * (x + 0.044715 * torch.pow(x, 3)))) 19 | 20 | 21 | def gelu(x: torch.Tensor) -> torch.Tensor: 22 | if hasattr(torch.nn.functional, 'gelu'): 23 | return torch.nn.functional.gelu(x.float()).type_as(x) 24 | else: 25 | return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) 26 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/modules/lightadd_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .lightadd_layer import LightaddLayer # noqa 7 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/modules/lightadd_layer/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import setup 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 9 | 10 | setup( 11 | name='lightadd_layer', 12 | ext_modules=[ 13 | CUDAExtension('lightadd_cuda', [ 14 | 'lightadd_cuda.cpp', 15 | 'lightadd_cuda_kernel.cu', 16 | ]), 17 | ], 18 | cmdclass={ 19 | 'build_ext': BuildExtension 20 | }) 21 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/modules/lightconv_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .lightconv_layer import LightconvLayer # noqa 7 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/modules/lightconv_layer/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import setup 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 9 | 10 | setup( 11 | name='lightconv_layer', 12 | ext_modules=[ 13 | CUDAExtension('lightconv_cuda', [ 14 | 'lightconv_cuda.cpp', 15 | 'lightconv_cuda_kernel.cu', 16 | ]), 17 | ], 18 | cmdclass={ 19 | 'build_ext': BuildExtension 20 | }) 21 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/modules/lightshiftadd_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .lightshiftadd_layer import LightshiftaddLayer # noqa 7 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/modules/lightshiftadd_layer/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import setup 8 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 9 | 10 | setup( 11 | name='lightshiftadd_layer', 12 | ext_modules=[ 13 | CUDAExtension('lightshiftadd_cuda', [ 14 | 'lightshiftadd_cuda.cpp', 15 | 'lightshiftadd_cuda_kernel.cu', 16 | ]), 17 | ], 18 | cmdclass={ 19 | 'build_ext': BuildExtension 20 | }) 21 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/modules/unfold.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn.functional as F 7 | 8 | 9 | def unfold1d(x, kernel_size, padding_l, pad_value=0): 10 | '''unfold T x B x C to T x B x C x K''' 11 | if kernel_size > 1: 12 | T, B, C = x.size() 13 | x = F.pad(x, (0, 0, 0, 0, padding_l, kernel_size - 1 - padding_l), value=pad_value) 14 | x = x.as_strided((T, B, C, kernel_size), (B*C, C, 1, B*C)) 15 | else: 16 | x = x.unsqueeze(3) 17 | return x 18 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/optim/lr_scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import importlib 7 | import os 8 | 9 | from fairseq import registry 10 | from fairseq.optim.lr_scheduler.fairseq_lr_scheduler import FairseqLRScheduler 11 | 12 | 13 | build_lr_scheduler, register_lr_scheduler, LR_SCHEDULER_REGISTRY = registry.setup_registry( 14 | '--lr-scheduler', 15 | base_class=FairseqLRScheduler, 16 | default='fixed', 17 | ) 18 | 19 | # automatically import any Python files in the optim/lr_scheduler/ directory 20 | for file in os.listdir(os.path.dirname(__file__)): 21 | if file.endswith('.py') and not file.startswith('_'): 22 | module = file[:file.find('.py')] 23 | importlib.import_module('fairseq.optim.lr_scheduler.' + module) 24 | -------------------------------------------------------------------------------- /NLP/en-fr/fairseq/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import re 7 | 8 | SPACE_NORMALIZER = re.compile(r"\s+") 9 | 10 | 11 | def tokenize_line(line): 12 | line = SPACE_NORMALIZER.sub(" ", line) 13 | line = line.strip() 14 | return line.split() 15 | -------------------------------------------------------------------------------- /NLP/en-fr/latency_dataset/predictors/wmt14enfr_gpu_V100_shiftadd_v1.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GATECH-EIC/ShiftAddNAS/b60a0e6a880e8146c83c8cf56730bd270e6264f3/NLP/en-fr/latency_dataset/predictors/wmt14enfr_gpu_V100_shiftadd_v1.pt -------------------------------------------------------------------------------- /NLP/en-fr/latency_dataset/wmt14ende_cpu_xeon_shiftadd.csv: -------------------------------------------------------------------------------- 1 | encoder_embed_dim,encoder_layer_num,encoder_ffn_embed_dim_avg,encoder_self_attention_heads_avg,decoder_embed_dim,decoder_layer_num,decoder_ffn_embed_dim_avg,decoder_self_attention_heads_avg,decoder_ende_attention_heads_avg,decoder_arbitrary_ende_attn_avg,encoder_block_types,decoder_block_types,latency_mean_encoder,latency_mean_decoder,latency_std_encoder,latency_std_decoder 2 | 512,6,768.0,3.6666666666666665,640,6,1792.0,6.333333333333333,4.666666666666667,1.8333333333333333,[['self_attention'], ['self_attention+lightweight_conv'], ['self_attention+lightweight_shiftadd'], ['self_attention+lightweight_conv'], ['self_attention'], ['lightweight_add']],['lightweight_add', 'lightweight_conv', 'lightweight_add', 'self_attention+lightweight_add', 'lightweight_add', 'self_attention+lightweight_conv'], --------------------------------------------------------------------------------