├── .gitignore ├── LICENSE ├── README.md ├── all-pkgs.txt ├── auto_model_register.py ├── base-deps.txt ├── create_models.py ├── ds_config.json ├── experiments ├── .DS_Store ├── bench_dense_inference_llama.py ├── bench_dense_inference_mistral.py ├── bench_sparse_inference_llama.py ├── bench_sparse_inference_mistral.py ├── data │ ├── __pycache__ │ │ ├── dataset.cpython-38.pyc │ │ ├── dataset.cpython-39.pyc │ │ ├── get_billsum_dataset.cpython-38.pyc │ │ ├── get_billsum_dataset.cpython-39.pyc │ │ ├── get_dataset.cpython-38.pyc │ │ ├── get_dataset.cpython-39.pyc │ │ ├── get_sst2_dataset.cpython-38.pyc │ │ └── get_sst2_dataset.cpython-39.pyc │ ├── dataset.py │ ├── get_billsum_dataset.py │ ├── get_dataset.py │ ├── get_default_dataset.py │ ├── get_glue_dataset.py │ ├── get_glue_dataset_classification.py │ ├── get_refinedweb_dataset.py │ └── get_sst2_dataset.py ├── format_dataset_for_instruction_tuning.py ├── instruct_tuning.py ├── models │ ├── sparse_mistral │ │ ├── sparse_silu.py │ │ └── svd_router.py │ └── sparse_silu │ │ ├── ugly_utils.py │ │ └── utils.py ├── plot_act_sparsity.py ├── plot_throughput.py ├── plot_zero_shot.py ├── pretrain_sparse_model.py ├── replace_util.py ├── sparse_finetune.py ├── time_util_nofile.py └── trainer.py ├── flash_gemv ├── README.md ├── artifact │ ├── ablation │ │ ├── llama7B-mlp-full-fp32.pdf │ │ ├── llama7B-mlp-full-fp32.xlsx │ │ ├── mistral7B-mlp-full-fp32.pdf │ │ ├── mistral7B-mlp-full-fp32.xlsx │ │ └── plot_ablation.py │ ├── e2e │ │ ├── calculate_speedup.py │ │ ├── e2e_gen_llama_fp32.pdf │ │ ├── e2e_gen_mistral_fp32.pdf │ │ ├── e2e_generation.pdf │ │ ├── geom_e2e.py │ │ ├── llama │ │ │ ├── dense-fp32-1-1024.csv │ │ │ ├── dense-fp32-1-2048.csv │ │ │ ├── dense-fp32-1-256.csv │ │ │ ├── dense-fp32-1-512.csv │ │ │ ├── triton-fp32-1-1024.csv │ │ │ ├── triton-fp32-1-2048.csv │ │ │ ├── triton-fp32-1-256.csv │ │ │ └── triton-fp32-1-512.csv │ │ ├── mistral │ │ │ ├── dense-fp32-1-1024.csv │ │ │ ├── dense-fp32-1-2048.csv │ │ │ ├── dense-fp32-1-256.csv │ │ │ ├── dense-fp32-1-512.csv │ │ │ ├── triton-fp32-1-1024.csv │ │ │ ├── triton-fp32-1-2048.csv │ │ │ ├── triton-fp32-1-256.csv │ │ │ └── triton-fp32-1-512.csv │ │ └── plot_bar.py │ └── mlp │ │ ├── llama7B-mlp-l40s-fp32.pdf │ │ ├── llama7B-mlp-l40s-fp32.xlsx │ │ ├── mistral7B-mlp-l40s-fp32.pdf │ │ ├── mistral7B-mlp-l40s-fp32.xlsx │ │ └── plot_latency.py ├── bench │ ├── __init__.py │ ├── final_profile_llama7B.sh │ ├── final_profile_mistral7B.sh │ ├── plot_latency.py │ ├── plot_latency_sub.py │ ├── profile_llama7B.py │ ├── profile_mistral7B.py │ └── utils.py ├── csrc │ ├── cuda │ │ ├── fuse_gemv_cmp_cuda.cu │ │ ├── fuse_gemv_cmp_cuda.h │ │ └── reduce_kernel_utils.cuh │ └── fuse_gemv_cmp.cpp ├── flash_gemv │ ├── __init__.py │ ├── flash_gemv.py │ └── kernels.py ├── setup.py └── test │ └── test_fuse_gemv_cmp.py ├── inference.py ├── reproduction_script.sh ├── requirements.txt ├── scripts ├── bench_generation_llama7B.sh ├── bench_generation_mistral7B.sh ├── evaluate_base_model.sh ├── general_finetuning_llama_cats.sh ├── general_finetuning_llama_relufication.sh ├── general_finetuning_mistral_cats.sh ├── general_finetuning_mistral_relufication.sh ├── instruction_tuning.sh ├── plot_mlp_histogram.sh ├── plot_post_training_activation_sparsity_per_layer.sh └── zero_shot_evaluation_without_general_finetuning.sh ├── setup.py └── utils ├── .DS_Store ├── __pycache__ ├── constants.cpython-38.pyc ├── constants.cpython-39.pyc ├── parse_string.cpython-38.pyc ├── parse_string.cpython-39.pyc └── utils.cpython-38.pyc ├── constants.py ├── mistral_utils.py ├── parse_args.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | ./idea 2 | .DS_Store 3 | 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/README.md -------------------------------------------------------------------------------- /all-pkgs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/all-pkgs.txt -------------------------------------------------------------------------------- /auto_model_register.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/auto_model_register.py -------------------------------------------------------------------------------- /base-deps.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/base-deps.txt -------------------------------------------------------------------------------- /create_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/create_models.py -------------------------------------------------------------------------------- /ds_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/ds_config.json -------------------------------------------------------------------------------- /experiments/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/.DS_Store -------------------------------------------------------------------------------- /experiments/bench_dense_inference_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/bench_dense_inference_llama.py -------------------------------------------------------------------------------- /experiments/bench_dense_inference_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/bench_dense_inference_mistral.py -------------------------------------------------------------------------------- /experiments/bench_sparse_inference_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/bench_sparse_inference_llama.py -------------------------------------------------------------------------------- /experiments/bench_sparse_inference_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/bench_sparse_inference_mistral.py -------------------------------------------------------------------------------- /experiments/data/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /experiments/data/__pycache__/dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/__pycache__/dataset.cpython-39.pyc -------------------------------------------------------------------------------- /experiments/data/__pycache__/get_billsum_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/__pycache__/get_billsum_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /experiments/data/__pycache__/get_billsum_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/__pycache__/get_billsum_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /experiments/data/__pycache__/get_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/__pycache__/get_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /experiments/data/__pycache__/get_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/__pycache__/get_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /experiments/data/__pycache__/get_sst2_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/__pycache__/get_sst2_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /experiments/data/__pycache__/get_sst2_dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/__pycache__/get_sst2_dataset.cpython-39.pyc -------------------------------------------------------------------------------- /experiments/data/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/dataset.py -------------------------------------------------------------------------------- /experiments/data/get_billsum_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/get_billsum_dataset.py -------------------------------------------------------------------------------- /experiments/data/get_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/get_dataset.py -------------------------------------------------------------------------------- /experiments/data/get_default_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/get_default_dataset.py -------------------------------------------------------------------------------- /experiments/data/get_glue_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/get_glue_dataset.py -------------------------------------------------------------------------------- /experiments/data/get_glue_dataset_classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/get_glue_dataset_classification.py -------------------------------------------------------------------------------- /experiments/data/get_refinedweb_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/get_refinedweb_dataset.py -------------------------------------------------------------------------------- /experiments/data/get_sst2_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/data/get_sst2_dataset.py -------------------------------------------------------------------------------- /experiments/format_dataset_for_instruction_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/format_dataset_for_instruction_tuning.py -------------------------------------------------------------------------------- /experiments/instruct_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/instruct_tuning.py -------------------------------------------------------------------------------- /experiments/models/sparse_mistral/sparse_silu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/models/sparse_mistral/sparse_silu.py -------------------------------------------------------------------------------- /experiments/models/sparse_mistral/svd_router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/models/sparse_mistral/svd_router.py -------------------------------------------------------------------------------- /experiments/models/sparse_silu/ugly_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/models/sparse_silu/ugly_utils.py -------------------------------------------------------------------------------- /experiments/models/sparse_silu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/models/sparse_silu/utils.py -------------------------------------------------------------------------------- /experiments/plot_act_sparsity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/plot_act_sparsity.py -------------------------------------------------------------------------------- /experiments/plot_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/plot_throughput.py -------------------------------------------------------------------------------- /experiments/plot_zero_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/plot_zero_shot.py -------------------------------------------------------------------------------- /experiments/pretrain_sparse_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/pretrain_sparse_model.py -------------------------------------------------------------------------------- /experiments/replace_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/replace_util.py -------------------------------------------------------------------------------- /experiments/sparse_finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/sparse_finetune.py -------------------------------------------------------------------------------- /experiments/time_util_nofile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/time_util_nofile.py -------------------------------------------------------------------------------- /experiments/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/experiments/trainer.py -------------------------------------------------------------------------------- /flash_gemv/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | pip install -e . 3 | ``` -------------------------------------------------------------------------------- /flash_gemv/artifact/ablation/llama7B-mlp-full-fp32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/ablation/llama7B-mlp-full-fp32.pdf -------------------------------------------------------------------------------- /flash_gemv/artifact/ablation/llama7B-mlp-full-fp32.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/ablation/llama7B-mlp-full-fp32.xlsx -------------------------------------------------------------------------------- /flash_gemv/artifact/ablation/mistral7B-mlp-full-fp32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/ablation/mistral7B-mlp-full-fp32.pdf -------------------------------------------------------------------------------- /flash_gemv/artifact/ablation/mistral7B-mlp-full-fp32.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/ablation/mistral7B-mlp-full-fp32.xlsx -------------------------------------------------------------------------------- /flash_gemv/artifact/ablation/plot_ablation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/ablation/plot_ablation.py -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/calculate_speedup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/calculate_speedup.py -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/e2e_gen_llama_fp32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/e2e_gen_llama_fp32.pdf -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/e2e_gen_mistral_fp32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/e2e_gen_mistral_fp32.pdf -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/e2e_generation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/e2e_generation.pdf -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/geom_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/geom_e2e.py -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/llama/dense-fp32-1-1024.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/llama/dense-fp32-1-1024.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/llama/dense-fp32-1-2048.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/llama/dense-fp32-1-2048.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/llama/dense-fp32-1-256.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/llama/dense-fp32-1-256.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/llama/dense-fp32-1-512.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/llama/dense-fp32-1-512.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/llama/triton-fp32-1-1024.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/llama/triton-fp32-1-1024.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/llama/triton-fp32-1-2048.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/llama/triton-fp32-1-2048.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/llama/triton-fp32-1-256.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/llama/triton-fp32-1-256.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/llama/triton-fp32-1-512.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/llama/triton-fp32-1-512.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/mistral/dense-fp32-1-1024.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/mistral/dense-fp32-1-1024.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/mistral/dense-fp32-1-2048.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/mistral/dense-fp32-1-2048.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/mistral/dense-fp32-1-256.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/mistral/dense-fp32-1-256.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/mistral/dense-fp32-1-512.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/mistral/dense-fp32-1-512.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/mistral/triton-fp32-1-1024.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/mistral/triton-fp32-1-1024.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/mistral/triton-fp32-1-2048.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/mistral/triton-fp32-1-2048.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/mistral/triton-fp32-1-256.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/mistral/triton-fp32-1-256.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/mistral/triton-fp32-1-512.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/mistral/triton-fp32-1-512.csv -------------------------------------------------------------------------------- /flash_gemv/artifact/e2e/plot_bar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/e2e/plot_bar.py -------------------------------------------------------------------------------- /flash_gemv/artifact/mlp/llama7B-mlp-l40s-fp32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/mlp/llama7B-mlp-l40s-fp32.pdf -------------------------------------------------------------------------------- /flash_gemv/artifact/mlp/llama7B-mlp-l40s-fp32.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/mlp/llama7B-mlp-l40s-fp32.xlsx -------------------------------------------------------------------------------- /flash_gemv/artifact/mlp/mistral7B-mlp-l40s-fp32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/mlp/mistral7B-mlp-l40s-fp32.pdf -------------------------------------------------------------------------------- /flash_gemv/artifact/mlp/mistral7B-mlp-l40s-fp32.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/mlp/mistral7B-mlp-l40s-fp32.xlsx -------------------------------------------------------------------------------- /flash_gemv/artifact/mlp/plot_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/artifact/mlp/plot_latency.py -------------------------------------------------------------------------------- /flash_gemv/bench/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_gemv/bench/final_profile_llama7B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/bench/final_profile_llama7B.sh -------------------------------------------------------------------------------- /flash_gemv/bench/final_profile_mistral7B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/bench/final_profile_mistral7B.sh -------------------------------------------------------------------------------- /flash_gemv/bench/plot_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/bench/plot_latency.py -------------------------------------------------------------------------------- /flash_gemv/bench/plot_latency_sub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/bench/plot_latency_sub.py -------------------------------------------------------------------------------- /flash_gemv/bench/profile_llama7B.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/bench/profile_llama7B.py -------------------------------------------------------------------------------- /flash_gemv/bench/profile_mistral7B.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/bench/profile_mistral7B.py -------------------------------------------------------------------------------- /flash_gemv/bench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/bench/utils.py -------------------------------------------------------------------------------- /flash_gemv/csrc/cuda/fuse_gemv_cmp_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/csrc/cuda/fuse_gemv_cmp_cuda.cu -------------------------------------------------------------------------------- /flash_gemv/csrc/cuda/fuse_gemv_cmp_cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/csrc/cuda/fuse_gemv_cmp_cuda.h -------------------------------------------------------------------------------- /flash_gemv/csrc/cuda/reduce_kernel_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/csrc/cuda/reduce_kernel_utils.cuh -------------------------------------------------------------------------------- /flash_gemv/csrc/fuse_gemv_cmp.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/csrc/fuse_gemv_cmp.cpp -------------------------------------------------------------------------------- /flash_gemv/flash_gemv/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/flash_gemv/__init__.py -------------------------------------------------------------------------------- /flash_gemv/flash_gemv/flash_gemv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/flash_gemv/flash_gemv.py -------------------------------------------------------------------------------- /flash_gemv/flash_gemv/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/flash_gemv/kernels.py -------------------------------------------------------------------------------- /flash_gemv/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/setup.py -------------------------------------------------------------------------------- /flash_gemv/test/test_fuse_gemv_cmp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/flash_gemv/test/test_fuse_gemv_cmp.py -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/inference.py -------------------------------------------------------------------------------- /reproduction_script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/reproduction_script.sh -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/bench_generation_llama7B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/bench_generation_llama7B.sh -------------------------------------------------------------------------------- /scripts/bench_generation_mistral7B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/bench_generation_mistral7B.sh -------------------------------------------------------------------------------- /scripts/evaluate_base_model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/evaluate_base_model.sh -------------------------------------------------------------------------------- /scripts/general_finetuning_llama_cats.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/general_finetuning_llama_cats.sh -------------------------------------------------------------------------------- /scripts/general_finetuning_llama_relufication.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/general_finetuning_llama_relufication.sh -------------------------------------------------------------------------------- /scripts/general_finetuning_mistral_cats.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/general_finetuning_mistral_cats.sh -------------------------------------------------------------------------------- /scripts/general_finetuning_mistral_relufication.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/general_finetuning_mistral_relufication.sh -------------------------------------------------------------------------------- /scripts/instruction_tuning.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/instruction_tuning.sh -------------------------------------------------------------------------------- /scripts/plot_mlp_histogram.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/plot_mlp_histogram.sh -------------------------------------------------------------------------------- /scripts/plot_post_training_activation_sparsity_per_layer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/plot_post_training_activation_sparsity_per_layer.sh -------------------------------------------------------------------------------- /scripts/zero_shot_evaluation_without_general_finetuning.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/scripts/zero_shot_evaluation_without_general_finetuning.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/setup.py -------------------------------------------------------------------------------- /utils/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/utils/.DS_Store -------------------------------------------------------------------------------- /utils/__pycache__/constants.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/utils/__pycache__/constants.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/constants.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/utils/__pycache__/constants.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/parse_string.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/utils/__pycache__/parse_string.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/parse_string.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/utils/__pycache__/parse_string.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/utils/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/utils/constants.py -------------------------------------------------------------------------------- /utils/mistral_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/utils/mistral_utils.py -------------------------------------------------------------------------------- /utils/parse_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/utils/parse_args.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/CATS/HEAD/utils/utils.py --------------------------------------------------------------------------------