├── .github └── workflows │ ├── cuda │ ├── cu102-Linux-env.sh │ ├── cu102-Linux.sh │ ├── cu113-Linux-env.sh │ ├── cu113-Linux.sh │ ├── cu116-Linux-env.sh │ └── cu116-Linux.sh │ ├── env.sh │ └── publish.yml ├── .gitignore ├── .gitmodules ├── AUTHORS ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── assets ├── flashattn_banner.jpg ├── flashattn_banner.pdf ├── flashattn_memory.jpg ├── flashattn_speedup.jpg ├── flashattn_speedup_3090.jpg ├── flashattn_speedup_a100_d128.jpg ├── flashattn_speedup_t4.jpg ├── flashattn_speedup_t4_fwd.jpg ├── gpt2_training_curve.jpg ├── gpt2_training_efficiency.jpg ├── gpt3_training_curve.jpg └── gpt3_training_efficiency.jpg ├── benchmarks ├── benchmark_causal.py └── benchmark_flash_attention.py ├── csrc ├── flash_attn │ ├── fmha_api.cpp │ └── src │ │ ├── fmha.h │ │ ├── fmha │ │ ├── gemm.h │ │ ├── gmem_tile.h │ │ ├── kernel_traits.h │ │ ├── mask.h │ │ ├── smem_tile.h │ │ ├── softmax.h │ │ └── utils.h │ │ ├── fmha_block_dgrad_fp16_kernel_loop.sm80.cu │ │ ├── fmha_block_dgrad_kernel_1xN_loop.h │ │ ├── fmha_block_fprop_fp16_kernel.sm80.cu │ │ ├── fmha_block_fprop_kernel_1xN.h │ │ ├── fmha_blockmask.h │ │ ├── fmha_bwd_hdim128.cu │ │ ├── fmha_bwd_hdim32.cu │ │ ├── fmha_bwd_hdim64.cu │ │ ├── fmha_bwd_launch_template.h │ │ ├── fmha_dgrad_kernel_1xN_loop.h │ │ ├── fmha_fprop_kernel_1xN.h │ │ ├── fmha_fwd_hdim128.cu │ │ ├── fmha_fwd_hdim32.cu │ │ ├── fmha_fwd_hdim64.cu │ │ ├── fmha_fwd_launch_template.h │ │ ├── fmha_kernel.h │ │ ├── fmha_utils.h │ │ ├── philox.cuh │ │ └── static_switch.h ├── fused_dense_lib │ ├── README.md │ ├── fused_dense.cpp │ ├── fused_dense_cuda.cu │ └── setup.py ├── fused_softmax │ ├── fused_softmax.cpp │ ├── scaled_masked_softmax.h │ ├── scaled_masked_softmax_cuda.cu │ ├── scaled_upper_triang_masked_softmax.h │ ├── scaled_upper_triang_masked_softmax_cuda.cu │ ├── setup.py │ └── type_shim.h ├── layer_norm │ ├── README.md │ ├── ln.h │ ├── ln_api.cpp │ ├── ln_bwd_1024.cu │ ├── ln_bwd_1280.cu │ ├── ln_bwd_1536.cu │ ├── ln_bwd_2048.cu │ ├── ln_bwd_256.cu │ ├── ln_bwd_2560.cu │ ├── ln_bwd_3072.cu │ ├── ln_bwd_4096.cu │ ├── ln_bwd_512.cu │ ├── ln_bwd_5120.cu │ ├── ln_bwd_6144.cu │ ├── ln_bwd_768.cu │ ├── ln_bwd_kernels.cuh │ ├── ln_fwd_1024.cu │ ├── ln_fwd_1280.cu │ ├── ln_fwd_1536.cu │ ├── ln_fwd_2048.cu │ ├── ln_fwd_256.cu │ ├── ln_fwd_2560.cu │ ├── ln_fwd_3072.cu │ ├── ln_fwd_4096.cu │ ├── ln_fwd_512.cu │ ├── ln_fwd_5120.cu │ ├── ln_fwd_6144.cu │ ├── ln_fwd_768.cu │ ├── ln_fwd_kernels.cuh │ ├── ln_kernel_traits.h │ ├── ln_utils.cuh │ ├── setup.py │ └── static_switch.h ├── rotary │ ├── rotary.cpp │ ├── rotary_cuda.cu │ └── setup.py └── xentropy │ ├── README.md │ ├── interface.cpp │ ├── setup.py │ └── xentropy_kernel.cu ├── flash_attn ├── __init__.py ├── bert_padding.py ├── flash_attention.py ├── flash_attn_interface.py ├── flash_attn_triton.py ├── flash_attn_triton_og.py ├── flash_blocksparse_attention.py ├── flash_blocksparse_attn_interface.py ├── fused_softmax.py ├── layers │ ├── __init__.py │ ├── patch_embed.py │ └── rotary.py ├── losses │ ├── __init__.py │ └── cross_entropy.py ├── models │ ├── __init__.py │ ├── bert.py │ ├── gpt.py │ └── vit.py ├── modules │ ├── __init__.py │ ├── block.py │ ├── embedding.py │ ├── mha.py │ └── mlp.py ├── ops │ ├── __init__.py │ ├── fused_dense.py │ ├── gelu_activation.py │ ├── layer_norm.py │ └── triton │ │ ├── k_activations.py │ │ ├── linear.py │ │ └── mlp.py └── utils │ ├── __init__.py │ ├── benchmark.py │ ├── distributed.py │ ├── generation.py │ └── pretrained.py ├── flash_attn_README.md ├── setup.py ├── tests ├── losses │ ├── test_cross_entropy.py │ └── test_cross_entropy_parallel.py ├── models │ ├── test_bert.py │ ├── test_gpt.py │ ├── test_gpt_generation.py │ └── test_gpt_parallel.py ├── modules │ ├── test_block_parallel.py │ ├── test_embedding_parallel.py │ └── test_mha_parallel.py ├── ops │ ├── test_dropout_layer_norm.py │ ├── test_fused_dense.py │ └── test_fused_dense_parallel.py ├── test_flash_attn.py └── test_rotary.py ├── training ├── Dockerfile ├── configs │ ├── callbacks │ │ ├── causality-monitor.yaml │ │ ├── default.yaml │ │ ├── ema.yaml │ │ ├── flop-count.yaml │ │ ├── gpu-monitor.yaml │ │ ├── model-summary.yaml │ │ ├── none.yaml │ │ ├── norm-monitor.yaml │ │ ├── params-log.yaml │ │ └── wandb.yaml │ ├── config.yaml │ ├── datamodule │ │ ├── openwebtext.yaml │ │ └── thepile.yaml │ ├── experiment │ │ ├── owt │ │ │ ├── backpack-micro-flash.yaml │ │ │ ├── backpack-mini-flash-vecs-1.yaml │ │ │ ├── backpack-mini-flash-vecs-16.yaml │ │ │ ├── backpack-mini-flash-vecs-4.yaml │ │ │ ├── backpack-mini-flash-vecs-64.yaml │ │ │ ├── backpack-mini-flash.yaml │ │ │ ├── backpack-small-flash-shrink-100k.yaml │ │ │ ├── backpack-small-flash.yaml │ │ │ ├── base.yaml │ │ │ ├── gpt2l-flash.yaml │ │ │ ├── gpt2l-hf.yaml │ │ │ ├── gpt2l.yaml │ │ │ ├── gpt2m-flash.yaml │ │ │ ├── gpt2m-hf.yaml │ │ │ ├── gpt2m.yaml │ │ │ ├── gpt2micro-flash.yaml │ │ │ ├── gpt2mini-flash.yaml │ │ │ ├── gpt2s-flash.yaml │ │ │ ├── gpt2s-hf.yaml │ │ │ ├── gpt2s.yaml │ │ │ ├── gpt2xl-flash.yaml │ │ │ ├── gpt2xl-hf.yaml │ │ │ └── gpt2xl.yaml │ │ └── pile │ │ │ ├── base.yaml │ │ │ ├── gpt3-2.7B-flash-8k.yaml │ │ │ ├── gpt3-2.7B-flash-hdim128-rotary-8k.yaml │ │ │ ├── gpt3-2.7B-flash-hdim128-rotary.yaml │ │ │ ├── gpt3-2.7B-flash-hdim128.yaml │ │ │ ├── gpt3-2.7B-flash-rotary-8k.yaml │ │ │ ├── gpt3-2.7B-flash-rotary.yaml │ │ │ ├── gpt3-2.7B-flash.yaml │ │ │ ├── gpt3-2.7B-hf-hdim128.yaml │ │ │ ├── gpt3-2.7B-hf.yaml │ │ │ ├── gpt3l-flash-8k.yaml │ │ │ ├── gpt3l-flash-rotary-30B.yaml │ │ │ ├── gpt3l-flash-rotary-8k.yaml │ │ │ ├── gpt3l-flash-rotary.yaml │ │ │ ├── gpt3l-flash.yaml │ │ │ ├── gpt3l-hf.yaml │ │ │ ├── gpt3m-flash-8k.yaml │ │ │ ├── gpt3m-flash-rotary-30B.yaml │ │ │ ├── gpt3m-flash-rotary-8k.yaml │ │ │ ├── gpt3m-flash-rotary.yaml │ │ │ ├── gpt3m-flash.yaml │ │ │ ├── gpt3m-hf.yaml │ │ │ ├── gpt3s-flash-8k.yaml │ │ │ ├── gpt3s-flash-rotary-30B.yaml │ │ │ ├── gpt3s-flash-rotary-8k.yaml │ │ │ ├── gpt3s-flash-rotary.yaml │ │ │ ├── gpt3s-flash.yaml │ │ │ ├── gpt3s-hf.yaml │ │ │ ├── gpt3xl-flash-8k.yaml │ │ │ ├── gpt3xl-flash-rotary-60B.yaml │ │ │ ├── gpt3xl-flash-rotary-8k.yaml │ │ │ ├── gpt3xl-flash-rotary.yaml │ │ │ ├── gpt3xl-flash.yaml │ │ │ └── gpt3xl-hf.yaml │ ├── logger │ │ ├── comet.yaml │ │ ├── csv.yaml │ │ ├── many_loggers.yaml │ │ ├── mlflow.yaml │ │ ├── neptune.yaml │ │ ├── tensorboard.yaml │ │ └── wandb.yaml │ ├── metrics │ │ ├── acc.yaml │ │ ├── acc_ignore_index.yaml │ │ ├── acctop5.yaml │ │ ├── mse.yaml │ │ ├── num-tokens.yaml │ │ └── perplexity.yaml │ ├── mode │ │ ├── debug.yaml │ │ ├── default.yaml │ │ ├── exp.yaml │ │ ├── profile.yaml │ │ └── smoke.yaml │ ├── model │ │ ├── backpack.yaml │ │ ├── gpt2-hf.yaml │ │ ├── gpt2.yaml │ │ └── gpt2model │ │ │ ├── gpt2-large.yaml │ │ │ ├── gpt2-medium.yaml │ │ │ ├── gpt2-micro.yaml │ │ │ ├── gpt2-mini.yaml │ │ │ ├── gpt2-small.yaml │ │ │ └── gpt2-xlarge.yaml │ ├── optimizer │ │ ├── adam.yaml │ │ ├── adamw-apex-distributed.yaml │ │ ├── adamw-apex-zero.yaml │ │ ├── adamw-apex.yaml │ │ ├── adamw-zero.yaml │ │ ├── adamw.yaml │ │ ├── fusedlamb-ds.yaml │ │ ├── fusedlamb.yaml │ │ └── sgd.yaml │ ├── scheduler │ │ ├── cosine-warmup-timm.yaml │ │ ├── cosine-warmup.yaml │ │ ├── invsqrt.yaml │ │ ├── linear-warmup.yaml │ │ ├── multi-step.yaml │ │ ├── plateau.yaml │ │ ├── poly-warmup.yaml │ │ └── step.yaml │ ├── task │ │ └── sequence-model.yaml │ └── trainer │ │ ├── all_params.yaml │ │ ├── ddp.yaml │ │ ├── debug.yaml │ │ └── default.yaml ├── control_words │ └── toxicity.txt ├── data │ ├── RG65.csv │ ├── SimLex-999 │ │ ├── README.txt │ │ └── SimLex-999.txt │ ├── SimVerb-3500.txt │ └── WS353.csv ├── demo_convert.py ├── do_backpack_topic.sh ├── do_backpack_toxicity.sh ├── do_pplm.sh ├── do_pplm_toxicity.sh ├── flash_attn_README.md ├── lt.py ├── make_val_strings.py ├── plot_gender.py ├── plot_topic.py ├── pplm_classification_head.py ├── run.py ├── run_all_backpack_topic.sh ├── run_all_pplm.sh ├── run_pplm.py ├── run_pplm_toxicity.sh ├── scripts │ └── make_topic_plot.py ├── src │ ├── callbacks │ │ ├── __init__.py │ │ ├── causality_monitor.py │ │ ├── ema.py │ │ ├── flop_count.py │ │ ├── gpu_affinity.py │ │ ├── loss_scale_monitor.py │ │ ├── model_checkpoint.py │ │ ├── norm_monitor.py │ │ ├── params_log.py │ │ ├── speed_monitor.py │ │ └── wandb_callbacks.py │ ├── datamodules │ │ ├── datasets │ │ │ ├── detokenizer.py │ │ │ └── lm_dataset.py │ │ ├── fault_tolerant_sampler.py │ │ ├── imagenet.py │ │ ├── language_modeling_hf.py │ │ └── timm_mixup.py │ ├── demo_generate.py │ ├── distributed │ │ └── ddp_comm_hooks.py │ ├── eval.py │ ├── interactive.py │ ├── localize_pred.py │ ├── losses │ │ └── cross_entropy.py │ ├── make_pca.py │ ├── metrics │ │ ├── accuracy.py │ │ ├── num_tokens.py │ │ └── perplexity.py │ ├── models │ │ ├── backpack.py │ │ ├── intervened_models.py │ │ └── modules │ │ │ └── seq_common.py │ ├── modulate_generate.py │ ├── optim │ │ ├── param_grouping.py │ │ └── timm_lr_scheduler.py │ ├── rank_vocab.py │ ├── run_mauve.py │ ├── run_simlex.py │ ├── tasks │ │ └── seq.py │ ├── test_genderbias.py │ ├── test_sentiment.py │ ├── test_topic.py │ ├── test_toxicity.py │ ├── train.py │ ├── utils │ │ ├── checkpoint.py │ │ ├── ddp_zero1.py │ │ ├── ddp_zero2.py │ │ ├── distributed.py │ │ ├── ema.py │ │ ├── flops.py │ │ ├── generation.py │ │ ├── gpu_affinity.py │ │ └── utils.py │ ├── visualize_sim.py │ └── visualize_vocab.py ├── tests │ └── datamodules │ │ └── test_language_modeling_hf.py ├── topic_classes │ ├── arts_culture.txt │ ├── business_entrepreneurs.txt │ ├── celebrity_pop_culture.txt │ ├── diaries_daily_life.txt │ ├── family.txt │ ├── fashion_style.txt │ ├── film_tv_video.txt │ ├── fitness_health.txt │ ├── food_dining.txt │ ├── gaming.txt │ ├── music.txt │ ├── news_social_concern.txt │ ├── other_hobbies.txt │ ├── relationships.txt │ ├── sports.txt │ ├── travel_adventure.txt │ └── youth_student_life.txt └── val-100len.json └── usage.md /.github/workflows/cuda/cu102-Linux-env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/.github/workflows/cuda/cu102-Linux-env.sh -------------------------------------------------------------------------------- /.github/workflows/cuda/cu102-Linux.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/.github/workflows/cuda/cu102-Linux.sh -------------------------------------------------------------------------------- /.github/workflows/cuda/cu113-Linux-env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/.github/workflows/cuda/cu113-Linux-env.sh -------------------------------------------------------------------------------- /.github/workflows/cuda/cu113-Linux.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/.github/workflows/cuda/cu113-Linux.sh -------------------------------------------------------------------------------- /.github/workflows/cuda/cu116-Linux-env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/.github/workflows/cuda/cu116-Linux-env.sh -------------------------------------------------------------------------------- /.github/workflows/cuda/cu116-Linux.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/.github/workflows/cuda/cu116-Linux.sh -------------------------------------------------------------------------------- /.github/workflows/env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/.github/workflows/env.sh -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/.github/workflows/publish.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/.gitmodules -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/AUTHORS -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/README.md -------------------------------------------------------------------------------- /assets/flashattn_banner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/flashattn_banner.jpg -------------------------------------------------------------------------------- /assets/flashattn_banner.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/flashattn_banner.pdf -------------------------------------------------------------------------------- /assets/flashattn_memory.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/flashattn_memory.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/flashattn_speedup.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_3090.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/flashattn_speedup_3090.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_a100_d128.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/flashattn_speedup_a100_d128.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_t4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/flashattn_speedup_t4.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_t4_fwd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/flashattn_speedup_t4_fwd.jpg -------------------------------------------------------------------------------- /assets/gpt2_training_curve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/gpt2_training_curve.jpg -------------------------------------------------------------------------------- /assets/gpt2_training_efficiency.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/gpt2_training_efficiency.jpg -------------------------------------------------------------------------------- /assets/gpt3_training_curve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/gpt3_training_curve.jpg -------------------------------------------------------------------------------- /assets/gpt3_training_efficiency.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/assets/gpt3_training_efficiency.jpg -------------------------------------------------------------------------------- /benchmarks/benchmark_causal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/benchmarks/benchmark_causal.py -------------------------------------------------------------------------------- /benchmarks/benchmark_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/benchmarks/benchmark_flash_attention.py -------------------------------------------------------------------------------- /csrc/flash_attn/fmha_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/fmha_api.cpp -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha/gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha/gemm.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha/gmem_tile.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha/gmem_tile.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha/kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha/kernel_traits.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha/mask.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha/mask.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha/smem_tile.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha/smem_tile.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha/softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha/softmax.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha/utils.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_block_dgrad_fp16_kernel_loop.sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_block_dgrad_fp16_kernel_loop.sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_block_dgrad_kernel_1xN_loop.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_block_dgrad_kernel_1xN_loop.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_block_fprop_fp16_kernel.sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_block_fprop_fp16_kernel.sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_block_fprop_kernel_1xN.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_block_fprop_kernel_1xN.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_blockmask.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_blockmask.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_bwd_hdim128.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_bwd_hdim128.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_bwd_hdim32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_bwd_hdim32.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_bwd_hdim64.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_bwd_hdim64.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_bwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_bwd_launch_template.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_dgrad_kernel_1xN_loop.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_dgrad_kernel_1xN_loop.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_fprop_kernel_1xN.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_fprop_kernel_1xN.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_fwd_hdim128.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_fwd_hdim128.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_fwd_hdim32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_fwd_hdim32.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_fwd_hdim64.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_fwd_hdim64.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_fwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_fwd_launch_template.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_kernel.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/fmha_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/fmha_utils.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/philox.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/philox.cuh -------------------------------------------------------------------------------- /csrc/flash_attn/src/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/flash_attn/src/static_switch.h -------------------------------------------------------------------------------- /csrc/fused_dense_lib/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_dense_lib/README.md -------------------------------------------------------------------------------- /csrc/fused_dense_lib/fused_dense.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_dense_lib/fused_dense.cpp -------------------------------------------------------------------------------- /csrc/fused_dense_lib/fused_dense_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_dense_lib/fused_dense_cuda.cu -------------------------------------------------------------------------------- /csrc/fused_dense_lib/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_dense_lib/setup.py -------------------------------------------------------------------------------- /csrc/fused_softmax/fused_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_softmax/fused_softmax.cpp -------------------------------------------------------------------------------- /csrc/fused_softmax/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_softmax/scaled_masked_softmax.h -------------------------------------------------------------------------------- /csrc/fused_softmax/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_softmax/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /csrc/fused_softmax/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_softmax/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /csrc/fused_softmax/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_softmax/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /csrc/fused_softmax/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_softmax/setup.py -------------------------------------------------------------------------------- /csrc/fused_softmax/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/fused_softmax/type_shim.h -------------------------------------------------------------------------------- /csrc/layer_norm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/README.md -------------------------------------------------------------------------------- /csrc/layer_norm/ln.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln.h -------------------------------------------------------------------------------- /csrc/layer_norm/ln_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_api.cpp -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_1024.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_1024.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_1280.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_1280.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_1536.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_1536.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_2048.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_2048.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_256.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_256.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_2560.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_2560.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_3072.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_3072.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_4096.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_4096.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_512.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_512.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_5120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_5120.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_6144.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_6144.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_768.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_768.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_bwd_kernels.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_1024.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_1024.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_1280.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_1280.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_1536.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_1536.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_2048.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_2048.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_256.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_256.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_2560.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_2560.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_3072.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_3072.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_4096.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_4096.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_512.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_512.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_5120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_5120.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_6144.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_6144.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_768.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_768.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_fwd_kernels.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/ln_kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_kernel_traits.h -------------------------------------------------------------------------------- /csrc/layer_norm/ln_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/ln_utils.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/setup.py -------------------------------------------------------------------------------- /csrc/layer_norm/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/layer_norm/static_switch.h -------------------------------------------------------------------------------- /csrc/rotary/rotary.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/rotary/rotary.cpp -------------------------------------------------------------------------------- /csrc/rotary/rotary_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/rotary/rotary_cuda.cu -------------------------------------------------------------------------------- /csrc/rotary/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/rotary/setup.py -------------------------------------------------------------------------------- /csrc/xentropy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/xentropy/README.md -------------------------------------------------------------------------------- /csrc/xentropy/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/xentropy/interface.cpp -------------------------------------------------------------------------------- /csrc/xentropy/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/xentropy/setup.py -------------------------------------------------------------------------------- /csrc/xentropy/xentropy_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/csrc/xentropy/xentropy_kernel.cu -------------------------------------------------------------------------------- /flash_attn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/bert_padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/bert_padding.py -------------------------------------------------------------------------------- /flash_attn/flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/flash_attention.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/flash_attn_interface.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/flash_attn_triton.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_og.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/flash_attn_triton_og.py -------------------------------------------------------------------------------- /flash_attn/flash_blocksparse_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/flash_blocksparse_attention.py -------------------------------------------------------------------------------- /flash_attn/flash_blocksparse_attn_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/flash_blocksparse_attn_interface.py -------------------------------------------------------------------------------- /flash_attn/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/fused_softmax.py -------------------------------------------------------------------------------- /flash_attn/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/layers/patch_embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/layers/patch_embed.py -------------------------------------------------------------------------------- /flash_attn/layers/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/layers/rotary.py -------------------------------------------------------------------------------- /flash_attn/losses/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/losses/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/losses/cross_entropy.py -------------------------------------------------------------------------------- /flash_attn/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/models/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/models/bert.py -------------------------------------------------------------------------------- /flash_attn/models/gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/models/gpt.py -------------------------------------------------------------------------------- /flash_attn/models/vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/models/vit.py -------------------------------------------------------------------------------- /flash_attn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/modules/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/modules/block.py -------------------------------------------------------------------------------- /flash_attn/modules/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/modules/embedding.py -------------------------------------------------------------------------------- /flash_attn/modules/mha.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/modules/mha.py -------------------------------------------------------------------------------- /flash_attn/modules/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/modules/mlp.py -------------------------------------------------------------------------------- /flash_attn/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/ops/fused_dense.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/ops/fused_dense.py -------------------------------------------------------------------------------- /flash_attn/ops/gelu_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/ops/gelu_activation.py -------------------------------------------------------------------------------- /flash_attn/ops/layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/ops/layer_norm.py -------------------------------------------------------------------------------- /flash_attn/ops/triton/k_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/ops/triton/k_activations.py -------------------------------------------------------------------------------- /flash_attn/ops/triton/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/ops/triton/linear.py -------------------------------------------------------------------------------- /flash_attn/ops/triton/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/ops/triton/mlp.py -------------------------------------------------------------------------------- /flash_attn/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/utils/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/utils/benchmark.py -------------------------------------------------------------------------------- /flash_attn/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/utils/distributed.py -------------------------------------------------------------------------------- /flash_attn/utils/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/utils/generation.py -------------------------------------------------------------------------------- /flash_attn/utils/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn/utils/pretrained.py -------------------------------------------------------------------------------- /flash_attn_README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/flash_attn_README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/setup.py -------------------------------------------------------------------------------- /tests/losses/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/losses/test_cross_entropy.py -------------------------------------------------------------------------------- /tests/losses/test_cross_entropy_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/losses/test_cross_entropy_parallel.py -------------------------------------------------------------------------------- /tests/models/test_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/models/test_bert.py -------------------------------------------------------------------------------- /tests/models/test_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/models/test_gpt.py -------------------------------------------------------------------------------- /tests/models/test_gpt_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/models/test_gpt_generation.py -------------------------------------------------------------------------------- /tests/models/test_gpt_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/models/test_gpt_parallel.py -------------------------------------------------------------------------------- /tests/modules/test_block_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/modules/test_block_parallel.py -------------------------------------------------------------------------------- /tests/modules/test_embedding_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/modules/test_embedding_parallel.py -------------------------------------------------------------------------------- /tests/modules/test_mha_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/modules/test_mha_parallel.py -------------------------------------------------------------------------------- /tests/ops/test_dropout_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/ops/test_dropout_layer_norm.py -------------------------------------------------------------------------------- /tests/ops/test_fused_dense.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/ops/test_fused_dense.py -------------------------------------------------------------------------------- /tests/ops/test_fused_dense_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/ops/test_fused_dense_parallel.py -------------------------------------------------------------------------------- /tests/test_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/test_flash_attn.py -------------------------------------------------------------------------------- /tests/test_rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/tests/test_rotary.py -------------------------------------------------------------------------------- /training/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/Dockerfile -------------------------------------------------------------------------------- /training/configs/callbacks/causality-monitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/callbacks/causality-monitor.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/callbacks/default.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/ema.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/callbacks/ema.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/flop-count.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/callbacks/flop-count.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/gpu-monitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/callbacks/gpu-monitor.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/model-summary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/callbacks/model-summary.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/none.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/configs/callbacks/norm-monitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/callbacks/norm-monitor.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/params-log.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/callbacks/params-log.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/wandb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/callbacks/wandb.yaml -------------------------------------------------------------------------------- /training/configs/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/config.yaml -------------------------------------------------------------------------------- /training/configs/datamodule/openwebtext.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/datamodule/openwebtext.yaml -------------------------------------------------------------------------------- /training/configs/datamodule/thepile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/datamodule/thepile.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/backpack-micro-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/backpack-micro-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/backpack-mini-flash-vecs-1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/backpack-mini-flash-vecs-1.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/backpack-mini-flash-vecs-16.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/backpack-mini-flash-vecs-16.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/backpack-mini-flash-vecs-4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/backpack-mini-flash-vecs-4.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/backpack-mini-flash-vecs-64.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/backpack-mini-flash-vecs-64.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/backpack-mini-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/backpack-mini-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/backpack-small-flash-shrink-100k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/backpack-small-flash-shrink-100k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/backpack-small-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/backpack-small-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/base.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2l-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2l-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2l-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2l-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2l.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2l.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2m-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2m-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2m-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2m-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2m.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2m.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2micro-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2micro-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2mini-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2mini-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2s-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2s-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2s-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2s-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2s.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2s.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2xl-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2xl-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2xl-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2xl-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/owt/gpt2xl.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/base.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-hdim128.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-hf-hdim128.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3-2.7B-hf-hdim128.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3-2.7B-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3l-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-rotary-30B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3l-flash-rotary-30B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3l-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3l-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3l-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3l-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3m-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-rotary-30B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3m-flash-rotary-30B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3m-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3m-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3m-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3m-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3s-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-rotary-30B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3s-flash-rotary-30B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3s-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3s-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3s-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3s-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3xl-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-rotary-60B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3xl-flash-rotary-60B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3xl-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3xl-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3xl-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/experiment/pile/gpt3xl-hf.yaml -------------------------------------------------------------------------------- /training/configs/logger/comet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/logger/comet.yaml -------------------------------------------------------------------------------- /training/configs/logger/csv.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/logger/csv.yaml -------------------------------------------------------------------------------- /training/configs/logger/many_loggers.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/logger/many_loggers.yaml -------------------------------------------------------------------------------- /training/configs/logger/mlflow.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/logger/mlflow.yaml -------------------------------------------------------------------------------- /training/configs/logger/neptune.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/logger/neptune.yaml -------------------------------------------------------------------------------- /training/configs/logger/tensorboard.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/logger/tensorboard.yaml -------------------------------------------------------------------------------- /training/configs/logger/wandb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/logger/wandb.yaml -------------------------------------------------------------------------------- /training/configs/metrics/acc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/metrics/acc.yaml -------------------------------------------------------------------------------- /training/configs/metrics/acc_ignore_index.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/metrics/acc_ignore_index.yaml -------------------------------------------------------------------------------- /training/configs/metrics/acctop5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/metrics/acctop5.yaml -------------------------------------------------------------------------------- /training/configs/metrics/mse.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/metrics/mse.yaml -------------------------------------------------------------------------------- /training/configs/metrics/num-tokens.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/metrics/num-tokens.yaml -------------------------------------------------------------------------------- /training/configs/metrics/perplexity.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/metrics/perplexity.yaml -------------------------------------------------------------------------------- /training/configs/mode/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/mode/debug.yaml -------------------------------------------------------------------------------- /training/configs/mode/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/mode/default.yaml -------------------------------------------------------------------------------- /training/configs/mode/exp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/mode/exp.yaml -------------------------------------------------------------------------------- /training/configs/mode/profile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/mode/profile.yaml -------------------------------------------------------------------------------- /training/configs/mode/smoke.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/mode/smoke.yaml -------------------------------------------------------------------------------- /training/configs/model/backpack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/model/backpack.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/model/gpt2-hf.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/model/gpt2.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-large.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/model/gpt2model/gpt2-large.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-medium.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/model/gpt2model/gpt2-medium.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-micro.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/model/gpt2model/gpt2-micro.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-mini.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/model/gpt2model/gpt2-mini.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-small.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/model/gpt2model/gpt2-small.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-xlarge.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/model/gpt2model/gpt2-xlarge.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adam.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/optimizer/adam.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-apex-distributed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/optimizer/adamw-apex-distributed.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-apex-zero.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/optimizer/adamw-apex-zero.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-apex.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/optimizer/adamw-apex.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-zero.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/optimizer/adamw-zero.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/optimizer/adamw.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/fusedlamb-ds.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/optimizer/fusedlamb-ds.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/fusedlamb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/optimizer/fusedlamb.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/sgd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/optimizer/sgd.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/cosine-warmup-timm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/scheduler/cosine-warmup-timm.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/cosine-warmup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/scheduler/cosine-warmup.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/invsqrt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/scheduler/invsqrt.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/linear-warmup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/scheduler/linear-warmup.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/multi-step.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/scheduler/multi-step.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/plateau.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/scheduler/plateau.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/poly-warmup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/scheduler/poly-warmup.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/step.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/scheduler/step.yaml -------------------------------------------------------------------------------- /training/configs/task/sequence-model.yaml: -------------------------------------------------------------------------------- 1 | _target_: src.tasks.seq.SequenceModel 2 | -------------------------------------------------------------------------------- /training/configs/trainer/all_params.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/trainer/all_params.yaml -------------------------------------------------------------------------------- /training/configs/trainer/ddp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/trainer/ddp.yaml -------------------------------------------------------------------------------- /training/configs/trainer/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/trainer/debug.yaml -------------------------------------------------------------------------------- /training/configs/trainer/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/configs/trainer/default.yaml -------------------------------------------------------------------------------- /training/control_words/toxicity.txt: -------------------------------------------------------------------------------- 1 | hate 2 | ugly 3 | idiot 4 | terror 5 | fuck 6 | -------------------------------------------------------------------------------- /training/data/RG65.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/data/RG65.csv -------------------------------------------------------------------------------- /training/data/SimLex-999/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/data/SimLex-999/README.txt -------------------------------------------------------------------------------- /training/data/SimLex-999/SimLex-999.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/data/SimLex-999/SimLex-999.txt -------------------------------------------------------------------------------- /training/data/SimVerb-3500.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/data/SimVerb-3500.txt -------------------------------------------------------------------------------- /training/data/WS353.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/data/WS353.csv -------------------------------------------------------------------------------- /training/demo_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/demo_convert.py -------------------------------------------------------------------------------- /training/do_backpack_topic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/do_backpack_topic.sh -------------------------------------------------------------------------------- /training/do_backpack_toxicity.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/do_backpack_toxicity.sh -------------------------------------------------------------------------------- /training/do_pplm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/do_pplm.sh -------------------------------------------------------------------------------- /training/do_pplm_toxicity.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/do_pplm_toxicity.sh -------------------------------------------------------------------------------- /training/flash_attn_README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/flash_attn_README.md -------------------------------------------------------------------------------- /training/lt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/lt.py -------------------------------------------------------------------------------- /training/make_val_strings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/make_val_strings.py -------------------------------------------------------------------------------- /training/plot_gender.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/plot_gender.py -------------------------------------------------------------------------------- /training/plot_topic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/plot_topic.py -------------------------------------------------------------------------------- /training/pplm_classification_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/pplm_classification_head.py -------------------------------------------------------------------------------- /training/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/run.py -------------------------------------------------------------------------------- /training/run_all_backpack_topic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/run_all_backpack_topic.sh -------------------------------------------------------------------------------- /training/run_all_pplm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/run_all_pplm.sh -------------------------------------------------------------------------------- /training/run_pplm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/run_pplm.py -------------------------------------------------------------------------------- /training/run_pplm_toxicity.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/run_pplm_toxicity.sh -------------------------------------------------------------------------------- /training/scripts/make_topic_plot.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/src/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/src/callbacks/causality_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/callbacks/causality_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/callbacks/ema.py -------------------------------------------------------------------------------- /training/src/callbacks/flop_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/callbacks/flop_count.py -------------------------------------------------------------------------------- /training/src/callbacks/gpu_affinity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/callbacks/gpu_affinity.py -------------------------------------------------------------------------------- /training/src/callbacks/loss_scale_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/callbacks/loss_scale_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/model_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/callbacks/model_checkpoint.py -------------------------------------------------------------------------------- /training/src/callbacks/norm_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/callbacks/norm_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/params_log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/callbacks/params_log.py -------------------------------------------------------------------------------- /training/src/callbacks/speed_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/callbacks/speed_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/wandb_callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/callbacks/wandb_callbacks.py -------------------------------------------------------------------------------- /training/src/datamodules/datasets/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/datamodules/datasets/detokenizer.py -------------------------------------------------------------------------------- /training/src/datamodules/datasets/lm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/datamodules/datasets/lm_dataset.py -------------------------------------------------------------------------------- /training/src/datamodules/fault_tolerant_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/datamodules/fault_tolerant_sampler.py -------------------------------------------------------------------------------- /training/src/datamodules/imagenet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/datamodules/imagenet.py -------------------------------------------------------------------------------- /training/src/datamodules/language_modeling_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/datamodules/language_modeling_hf.py -------------------------------------------------------------------------------- /training/src/datamodules/timm_mixup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/datamodules/timm_mixup.py -------------------------------------------------------------------------------- /training/src/demo_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/demo_generate.py -------------------------------------------------------------------------------- /training/src/distributed/ddp_comm_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/distributed/ddp_comm_hooks.py -------------------------------------------------------------------------------- /training/src/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/eval.py -------------------------------------------------------------------------------- /training/src/interactive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/interactive.py -------------------------------------------------------------------------------- /training/src/localize_pred.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/localize_pred.py -------------------------------------------------------------------------------- /training/src/losses/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/losses/cross_entropy.py -------------------------------------------------------------------------------- /training/src/make_pca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/make_pca.py -------------------------------------------------------------------------------- /training/src/metrics/accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/metrics/accuracy.py -------------------------------------------------------------------------------- /training/src/metrics/num_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/metrics/num_tokens.py -------------------------------------------------------------------------------- /training/src/metrics/perplexity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/metrics/perplexity.py -------------------------------------------------------------------------------- /training/src/models/backpack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/models/backpack.py -------------------------------------------------------------------------------- /training/src/models/intervened_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/models/intervened_models.py -------------------------------------------------------------------------------- /training/src/models/modules/seq_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/models/modules/seq_common.py -------------------------------------------------------------------------------- /training/src/modulate_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/modulate_generate.py -------------------------------------------------------------------------------- /training/src/optim/param_grouping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/optim/param_grouping.py -------------------------------------------------------------------------------- /training/src/optim/timm_lr_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/optim/timm_lr_scheduler.py -------------------------------------------------------------------------------- /training/src/rank_vocab.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/rank_vocab.py -------------------------------------------------------------------------------- /training/src/run_mauve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/run_mauve.py -------------------------------------------------------------------------------- /training/src/run_simlex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/run_simlex.py -------------------------------------------------------------------------------- /training/src/tasks/seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/tasks/seq.py -------------------------------------------------------------------------------- /training/src/test_genderbias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/test_genderbias.py -------------------------------------------------------------------------------- /training/src/test_sentiment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/test_sentiment.py -------------------------------------------------------------------------------- /training/src/test_topic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/test_topic.py -------------------------------------------------------------------------------- /training/src/test_toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/test_toxicity.py -------------------------------------------------------------------------------- /training/src/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/train.py -------------------------------------------------------------------------------- /training/src/utils/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/utils/checkpoint.py -------------------------------------------------------------------------------- /training/src/utils/ddp_zero1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/utils/ddp_zero1.py -------------------------------------------------------------------------------- /training/src/utils/ddp_zero2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/utils/ddp_zero2.py -------------------------------------------------------------------------------- /training/src/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/utils/distributed.py -------------------------------------------------------------------------------- /training/src/utils/ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/utils/ema.py -------------------------------------------------------------------------------- /training/src/utils/flops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/utils/flops.py -------------------------------------------------------------------------------- /training/src/utils/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/utils/generation.py -------------------------------------------------------------------------------- /training/src/utils/gpu_affinity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/utils/gpu_affinity.py -------------------------------------------------------------------------------- /training/src/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/utils/utils.py -------------------------------------------------------------------------------- /training/src/visualize_sim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/visualize_sim.py -------------------------------------------------------------------------------- /training/src/visualize_vocab.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/src/visualize_vocab.py -------------------------------------------------------------------------------- /training/tests/datamodules/test_language_modeling_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/tests/datamodules/test_language_modeling_hf.py -------------------------------------------------------------------------------- /training/topic_classes/arts_culture.txt: -------------------------------------------------------------------------------- 1 | arts 2 | culture 3 | -------------------------------------------------------------------------------- /training/topic_classes/business_entrepreneurs.txt: -------------------------------------------------------------------------------- 1 | business 2 | entrepreneurs 3 | -------------------------------------------------------------------------------- /training/topic_classes/celebrity_pop_culture.txt: -------------------------------------------------------------------------------- 1 | celebrity 2 | pop 3 | culture 4 | -------------------------------------------------------------------------------- /training/topic_classes/diaries_daily_life.txt: -------------------------------------------------------------------------------- 1 | diaries 2 | daily 3 | life 4 | -------------------------------------------------------------------------------- /training/topic_classes/family.txt: -------------------------------------------------------------------------------- 1 | family 2 | -------------------------------------------------------------------------------- /training/topic_classes/fashion_style.txt: -------------------------------------------------------------------------------- 1 | fashion 2 | style 3 | -------------------------------------------------------------------------------- /training/topic_classes/film_tv_video.txt: -------------------------------------------------------------------------------- 1 | film 2 | tv 3 | video 4 | -------------------------------------------------------------------------------- /training/topic_classes/fitness_health.txt: -------------------------------------------------------------------------------- 1 | fitness 2 | health 3 | -------------------------------------------------------------------------------- /training/topic_classes/food_dining.txt: -------------------------------------------------------------------------------- 1 | food 2 | dining 3 | -------------------------------------------------------------------------------- /training/topic_classes/gaming.txt: -------------------------------------------------------------------------------- 1 | gaming 2 | -------------------------------------------------------------------------------- /training/topic_classes/music.txt: -------------------------------------------------------------------------------- 1 | music 2 | -------------------------------------------------------------------------------- /training/topic_classes/news_social_concern.txt: -------------------------------------------------------------------------------- 1 | news 2 | social 3 | concern 4 | -------------------------------------------------------------------------------- /training/topic_classes/other_hobbies.txt: -------------------------------------------------------------------------------- 1 | hobbies 2 | -------------------------------------------------------------------------------- /training/topic_classes/relationships.txt: -------------------------------------------------------------------------------- 1 | relationships 2 | -------------------------------------------------------------------------------- /training/topic_classes/sports.txt: -------------------------------------------------------------------------------- 1 | sports 2 | -------------------------------------------------------------------------------- /training/topic_classes/travel_adventure.txt: -------------------------------------------------------------------------------- 1 | travel 2 | adventure 3 | -------------------------------------------------------------------------------- /training/topic_classes/youth_student_life.txt: -------------------------------------------------------------------------------- 1 | youth 2 | student 3 | life 4 | -------------------------------------------------------------------------------- /training/val-100len.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/training/val-100len.json -------------------------------------------------------------------------------- /usage.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/john-hewitt/backpacks-flash-attn/HEAD/usage.md --------------------------------------------------------------------------------