├── .clang-format ├── .flake8 ├── .github └── ISSUE_TEMPLATE │ └── bug_report.yml ├── .gitignore ├── .gitlab-ci.yml ├── .gitlab ├── issue_templates │ ├── bug.md │ └── feature.md └── merge_request_templates │ └── merge.md ├── .gitmodules ├── .vscode └── settings.json ├── 3rdparty ├── CMakeLists.txt ├── INIReader.h ├── cub │ ├── agent │ │ ├── agent_histogram.cuh │ │ ├── agent_radix_sort_downsweep.cuh │ │ ├── agent_radix_sort_upsweep.cuh │ │ ├── agent_reduce.cuh │ │ ├── agent_reduce_by_key.cuh │ │ ├── agent_rle.cuh │ │ ├── agent_scan.cuh │ │ ├── agent_segment_fixup.cuh │ │ ├── agent_select_if.cuh │ │ ├── agent_spmv_orig.cuh │ │ └── single_pass_scan_operators.cuh │ ├── block │ │ ├── block_adjacent_difference.cuh │ │ ├── block_discontinuity.cuh │ │ ├── block_exchange.cuh │ │ ├── block_histogram.cuh │ │ ├── block_load.cuh │ │ ├── block_radix_rank.cuh │ │ ├── block_radix_sort.cuh │ │ ├── block_raking_layout.cuh │ │ ├── block_reduce.cuh │ │ ├── block_scan.cuh │ │ ├── block_shuffle.cuh │ │ ├── block_store.cuh │ │ └── specializations │ │ │ ├── block_histogram_atomic.cuh │ │ │ ├── block_histogram_sort.cuh │ │ │ ├── block_reduce_raking.cuh │ │ │ ├── block_reduce_raking_commutative_only.cuh │ │ │ ├── block_reduce_warp_reductions.cuh │ │ │ ├── block_scan_raking.cuh │ │ │ ├── block_scan_warp_scans.cuh │ │ │ ├── block_scan_warp_scans2.cuh │ │ │ └── block_scan_warp_scans3.cuh │ ├── cub.cuh │ ├── device │ │ ├── device_histogram.cuh │ │ ├── device_partition.cuh │ │ ├── device_radix_sort.cuh │ │ ├── device_reduce.cuh │ │ ├── device_run_length_encode.cuh │ │ ├── device_scan.cuh │ │ ├── device_segmented_radix_sort.cuh │ │ ├── device_segmented_reduce.cuh │ │ ├── device_select.cuh │ │ ├── device_spmv.cuh │ │ └── dispatch │ │ │ ├── dispatch_histogram.cuh │ │ │ ├── dispatch_radix_sort.cuh │ │ │ ├── dispatch_reduce.cuh │ │ │ ├── dispatch_reduce_by_key.cuh │ │ │ ├── dispatch_rle.cuh │ │ │ ├── dispatch_scan.cuh │ │ │ ├── dispatch_select_if.cuh │ │ │ └── dispatch_spmv_orig.cuh │ ├── grid │ │ ├── grid_barrier.cuh │ │ ├── grid_even_share.cuh │ │ ├── grid_mapping.cuh │ │ └── grid_queue.cuh │ ├── host │ │ └── mutex.cuh │ ├── iterator │ │ ├── arg_index_input_iterator.cuh │ │ ├── cache_modified_input_iterator.cuh │ │ ├── cache_modified_output_iterator.cuh │ │ ├── constant_input_iterator.cuh │ │ ├── counting_input_iterator.cuh │ │ ├── discard_output_iterator.cuh │ │ ├── tex_obj_input_iterator.cuh │ │ ├── tex_ref_input_iterator.cuh │ │ └── transform_input_iterator.cuh │ ├── thread │ │ ├── thread_load.cuh │ │ ├── thread_operators.cuh │ │ ├── thread_reduce.cuh │ │ ├── thread_scan.cuh │ │ ├── thread_search.cuh │ │ └── thread_store.cuh │ ├── util_allocator.cuh │ ├── util_arch.cuh │ ├── util_debug.cuh │ ├── util_device.cuh │ ├── util_macro.cuh │ ├── util_namespace.cuh │ ├── util_ptx.cuh │ ├── util_type.cuh │ └── warp │ │ ├── specializations │ │ ├── warp_reduce_shfl.cuh │ │ ├── warp_reduce_smem.cuh │ │ ├── warp_scan_shfl.cuh │ │ └── warp_scan_smem.cuh │ │ ├── warp_reduce.cuh │ │ └── warp_scan.cuh └── trt_fused_multihead_attention │ ├── CMakeLists.txt │ ├── common.cuh │ ├── cudaDriverWrapper.cpp │ ├── cudaDriverWrapper.h │ ├── fused_mha_with_relPosBias_fp16_128_32_kernel.sm75.cpp │ ├── fused_mha_with_relPosBias_fp16_128_32_kernel.sm80.cpp │ ├── fused_mha_with_relPosBias_fp16_128_32_kernel.sm86.cpp │ ├── fused_mha_with_relPosBias_fp16_256_32_kernel.sm75.cpp │ ├── fused_mha_with_relPosBias_fp16_256_32_kernel.sm80.cpp │ ├── fused_mha_with_relPosBias_fp16_256_32_kernel.sm86.cpp │ ├── fused_mha_with_relPosBias_fp16_64_32_kernel.sm75.cpp │ ├── fused_mha_with_relPosBias_fp16_64_32_kernel.sm80.cpp │ ├── fused_mha_with_relPosBias_fp16_64_32_kernel.sm86.cpp │ ├── fused_mha_with_relPosBias_int8_256_32_kernel.sm75.cpp │ ├── fused_mha_with_relPosBias_int8_256_32_kernel.sm80.cpp │ ├── fused_mha_with_relPosBias_int8_64_32_kernel.sm75.cpp │ ├── fused_mha_with_relPosBias_int8_64_32_kernel.sm80.cpp │ ├── fused_multihead_attention.h │ ├── fused_multihead_attention_common.h │ ├── fused_multihead_attention_fp16_128_64_kernel.sm75.cpp │ ├── fused_multihead_attention_fp16_128_64_kernel.sm80.cpp │ ├── fused_multihead_attention_fp16_384_64_kernel.sm75.cpp │ ├── fused_multihead_attention_fp16_384_64_kernel.sm80.cpp │ ├── fused_multihead_attention_fp16_64_64_kernel.sm75.cpp │ ├── fused_multihead_attention_fp16_64_64_kernel.sm80.cpp │ ├── fused_multihead_attention_fp16_96_64_kernel.sm75.cpp │ ├── fused_multihead_attention_fp16_96_64_kernel.sm80.cpp │ ├── fused_multihead_attention_int8_128_64_kernel.sm75.cpp │ ├── fused_multihead_attention_int8_128_64_kernel.sm80.cpp │ ├── fused_multihead_attention_int8_384_64_kernel.sm75.cpp │ ├── fused_multihead_attention_int8_384_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2.h │ ├── fused_multihead_attention_v2_fp16_128_64_kernel.sm70.cpp │ ├── fused_multihead_attention_v2_fp16_128_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_128_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_128_64_kernel.sm86.cpp │ ├── fused_multihead_attention_v2_fp16_256_64_kernel.sm70.cpp │ ├── fused_multihead_attention_v2_fp16_256_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_256_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_256_64_kernel.sm86.cpp │ ├── fused_multihead_attention_v2_fp16_384_64_kernel.sm70.cpp │ ├── fused_multihead_attention_v2_fp16_384_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_384_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_384_64_kernel.sm86.cpp │ ├── fused_multihead_attention_v2_fp16_64_64_kernel.sm70.cpp │ ├── fused_multihead_attention_v2_fp16_64_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_64_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_64_64_kernel.sm86.cpp │ ├── fused_multihead_attention_v2_fp16_96_64_kernel.sm70.cpp │ ├── fused_multihead_attention_v2_fp16_96_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_96_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_96_64_kernel.sm86.cpp │ ├── fused_multihead_attention_v2_int8_128_64_kernel.sm72.cpp │ ├── fused_multihead_attention_v2_int8_128_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_128_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_128_64_kernel.sm86.cpp │ ├── fused_multihead_attention_v2_int8_192_64_kernel.sm72.cpp │ ├── fused_multihead_attention_v2_int8_192_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_192_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_192_64_kernel.sm86.cpp │ ├── fused_multihead_attention_v2_int8_256_64_kernel.sm72.cpp │ ├── fused_multihead_attention_v2_int8_256_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_256_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_256_64_kernel.sm86.cpp │ ├── fused_multihead_attention_v2_int8_384_64_kernel.sm72.cpp │ ├── fused_multihead_attention_v2_int8_384_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_384_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_384_64_kernel.sm86.cpp │ ├── fused_multihead_attention_v2_int8_64_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_64_64_kernel.sm80.cpp │ ├── qkvToContext.cu │ └── qkvToContext.h ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── benchmarks ├── bert │ ├── pyt_benchmark.sh │ ├── pyt_int8_benchmark.sh │ ├── pyt_sp_fp16_benchmark.sh │ ├── pyt_sp_int8_mode2_benchmark.sh │ ├── tf_benchmark.sh │ └── tf_int8_benchmark.sh ├── decoding │ ├── pyt_decoding_beamsearch_benchmark.sh │ ├── tf_decoding_beamsearch_benchmark.sh │ └── tf_decoding_sampling_benchmark.sh └── t5 │ └── pyt_benchmark.sh ├── cmake ├── FasterTransformerConfig.cmake.in └── Modules │ └── FindNCCL.cmake ├── docker ├── Dockerfile ├── README.md ├── build.sh ├── check_cuda.py ├── install.sh ├── setenv.sh └── start.sh ├── docs ├── QAList.md ├── bert_guide.md ├── decoder_guide.md ├── gpt_guide.md ├── gptj_guide.md ├── images │ ├── FP-swin-flowchart.png │ ├── FT_Encoder_T4.png │ ├── FT_GPT_A100.png │ ├── INT8-swin-flowchart.png │ ├── PyTorch_Encoder_T4.png │ ├── Py_Decoder_T4.png │ ├── Py_Encoder_T4.png │ ├── TF_Decoder_T4.png │ ├── TF_Encoder_T4.png │ ├── decoding │ │ └── decoding.png │ ├── effective_transformer.png │ ├── encoder-decoding-2.png │ ├── encoder_flowchart.png │ ├── gpt │ │ ├── Megatron_530B_benchmark_1.png │ │ ├── Megatron_530B_benchmark_2.png │ │ ├── Megatron_530B_benchmark_3.png │ │ ├── Megatron_530B_benchmark_4.png │ │ ├── gpt.png │ │ ├── gpt_context.png │ │ └── parallelgpt.png │ ├── gpt_flowchart.png │ ├── longformer_compute_flow.png │ ├── vit │ │ ├── vit-FMHA.png │ │ └── vit-fp32-fp16-compute-flow.png │ ├── workflow-of-int8-inference.png │ └── xlnet_flowchart.png ├── longformer_guide.md ├── models │ ├── megatron-345m-model.md │ └── megatron-530b-model.md ├── swin_guide.md ├── t5_guide.md ├── vit_guide.md └── xlnet_guide.md ├── examples ├── CMakeLists.txt ├── cpp │ ├── CMakeLists.txt │ ├── bert │ │ ├── CMakeLists.txt │ │ └── bert_example.cc │ ├── bert_int8 │ │ ├── CMakeLists.txt │ │ └── bert_int8_example.cc │ ├── decoding │ │ ├── CMakeLists.txt │ │ ├── decoding_example.cc │ │ └── layernorm_test.cc │ ├── glm │ │ ├── CMakeLists.txt │ │ ├── bad_words.csv │ │ ├── glm_config.ini │ │ ├── glm_example.cc │ │ ├── glm_example_utils.cc │ │ ├── glm_example_utils.h │ │ ├── start_ids.csv │ │ └── stop_words.csv │ ├── gpt │ │ ├── CMakeLists.txt │ │ ├── gpt_config.ini │ │ ├── gpt_example.cc │ │ └── start_ids.csv │ ├── gptj │ │ ├── CMakeLists.txt │ │ ├── bad_words.csv │ │ ├── gptj_config.ini │ │ ├── gptj_example.cc │ │ ├── gptj_triton_example.cc │ │ ├── start_ids.csv │ │ └── stop_words.csv │ ├── multi_gpu_gpt │ │ ├── CMakeLists.txt │ │ ├── gpt_config.ini │ │ ├── gpt_example_utils.cc │ │ ├── gpt_example_utils.h │ │ ├── multi_gpu_gpt_async_example.cc │ │ ├── multi_gpu_gpt_example.cc │ │ ├── multi_gpu_gpt_triton_example.cc │ │ └── start_ids.csv │ ├── swin │ │ ├── CMakeLists.txt │ │ ├── functions.h │ │ └── swin_example.cc │ ├── swin_int8 │ │ ├── CMakeLists.txt │ │ └── swin_int8_example.cc │ ├── vit │ │ ├── CMakeLists.txt │ │ └── vit_example.cc │ ├── vit_int8 │ │ ├── CMakeLists.txt │ │ └── vit_int8_example.cc │ └── xlnet │ │ ├── CMakeLists.txt │ │ ├── cnpy.cpp │ │ ├── cnpy.h │ │ ├── xlnet_correctness_example.cc │ │ └── xlnet_example.cc ├── onnx │ └── multi_gpu_gpt │ │ └── onnx_ckpt_convert.py ├── pytorch │ ├── bert │ │ ├── bert-quantization-sparsity │ │ │ ├── .dockerignore │ │ │ ├── Dockerfile │ │ │ ├── LICENSE │ │ │ ├── NOTICE │ │ │ ├── README.md │ │ │ ├── README_orig.md │ │ │ ├── apex_sparsity │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── asp.py │ │ │ │ ├── sparse_masklib.py │ │ │ │ └── test │ │ │ │ │ ├── checkpointing_test_part1.py │ │ │ │ │ ├── checkpointing_test_part2.py │ │ │ │ │ ├── checkpointing_test_reference.py │ │ │ │ │ └── toy_problem.py │ │ │ ├── bert_config.json │ │ │ ├── checkpoints │ │ │ │ └── .keep │ │ │ ├── configurations.yml │ │ │ ├── create_pretraining_data.py │ │ │ ├── data │ │ │ │ ├── BooksDownloader.py │ │ │ │ ├── BookscorpusTextFormatting.py │ │ │ │ ├── Downloader.py │ │ │ │ ├── GLUEDownloader.py │ │ │ │ ├── GooglePretrainedWeightDownloader.py │ │ │ │ ├── NVIDIAPretrainedWeightDownloader.py │ │ │ │ ├── SquadDownloader.py │ │ │ │ ├── TextSharding.py │ │ │ │ ├── WikiDownloader.py │ │ │ │ ├── WikicorpusTextFormatting.py │ │ │ │ ├── __init__.py │ │ │ │ ├── bertPrep.py │ │ │ │ ├── create_datasets_from_start.sh │ │ │ │ └── squad │ │ │ │ │ └── squad_download.sh │ │ │ ├── extract_features.py │ │ │ ├── file_utils.py │ │ │ ├── images │ │ │ │ ├── loss_curves.png │ │ │ │ ├── model.png │ │ │ │ └── nvlamb.png │ │ │ ├── inference.py │ │ │ ├── modeling.py │ │ │ ├── optimization.py │ │ │ ├── processors │ │ │ │ ├── __init__.py │ │ │ │ └── glue.py │ │ │ ├── quant_utils.py │ │ │ ├── requirements.txt │ │ │ ├── run.sub │ │ │ ├── run_glue.py │ │ │ ├── run_pretraining.py │ │ │ ├── run_squad.py │ │ │ ├── run_swag.py │ │ │ ├── schedulers.py │ │ │ ├── scripts │ │ │ │ ├── configs │ │ │ │ │ ├── glue_config.sh │ │ │ │ │ ├── pretrain_config.sh │ │ │ │ │ └── squad_config.sh │ │ │ │ ├── data_download.sh │ │ │ │ ├── docker │ │ │ │ │ ├── build.sh │ │ │ │ │ └── launch.sh │ │ │ │ ├── run_glue.sh │ │ │ │ ├── run_pretraining.sh │ │ │ │ ├── run_squad.sh │ │ │ │ └── run_swag.sh │ │ │ ├── tokenization.py │ │ │ ├── utils.py │ │ │ └── vocab │ │ │ │ └── vocab │ │ ├── bert_example.py │ │ ├── run_glue.py │ │ ├── run_squad.py │ │ ├── scripts │ │ │ ├── run_mrpc.sh │ │ │ └── run_squad.sh │ │ └── utils │ │ │ ├── checkpoint_quantization.py │ │ │ ├── encoder.py │ │ │ ├── get_mrpc_data.py │ │ │ └── modeling_bert.py │ ├── decoder │ │ ├── decoder_example.py │ │ └── utils │ │ │ ├── decoder.py │ │ │ └── ft_decoder.py │ ├── decoding │ │ ├── decoding_example.py │ │ ├── translate_example.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── decoding.py │ │ │ ├── download_model.sh │ │ │ ├── ft_decoding.py │ │ │ ├── recover_bpe.py │ │ │ ├── translation │ │ │ ├── test.de │ │ │ ├── test.en │ │ │ └── wmtende.vocab │ │ │ ├── translation_model.py │ │ │ └── translator.py │ ├── encoder │ │ ├── encoder_example.py │ │ └── utils │ │ │ └── ft_encoder.py │ ├── glm │ │ ├── benchmark-generation.sh │ │ ├── glm_example.py │ │ ├── glm_server.py │ │ ├── glm_server.sh │ │ ├── glm_server_test.py │ │ ├── icetk_glm_130B │ │ │ ├── __init__.py │ │ │ ├── ice_tokenizer.py │ │ │ └── tokenizer.py │ │ ├── input.txt │ │ └── utils │ │ │ ├── glm.py │ │ │ ├── glm_ckpt_convert.py │ │ │ ├── strategies.py │ │ │ └── tools.py │ ├── gpt │ │ ├── evaluate_zeroshot_gpt.py │ │ ├── gpt_example.py │ │ ├── gpt_summarization.py │ │ ├── multi_gpu_gpt_example.py │ │ ├── requirement.txt │ │ ├── scripts │ │ │ └── evaluate_zeroshot_gpt.sh │ │ └── utils │ │ │ ├── checkpoint_saver_fastertransformer.py │ │ │ ├── generate_gpt_config.py │ │ │ ├── generate_start_ids.py │ │ │ ├── gpt.py │ │ │ ├── gpt_token_converter.py │ │ │ ├── gpt_token_encoder.py │ │ │ ├── huggingface_gpt_convert.py │ │ │ ├── huggingface_jp_gpt_convert.py │ │ │ ├── megatron_ckpt_convert.py │ │ │ ├── megatron_ckpt_convert_2.py │ │ │ ├── nemo_ckpt_convert.py │ │ │ ├── parallel_gpt.py │ │ │ └── word_list.py │ ├── gptj │ │ └── utils │ │ │ ├── gptj_ckpt_convert.py │ │ │ └── reference_gptj.py │ ├── longformer │ │ ├── longformer_qa.py │ │ └── model.py │ ├── requirement.txt │ ├── swin │ │ ├── Swin-Transformer-Quantization │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── calib.sh │ │ │ ├── data.py │ │ │ ├── main.py │ │ │ ├── models.py │ │ │ ├── qat.sh │ │ │ ├── quant_utils.py │ │ │ └── run.sh │ │ ├── SwinTransformerINT8Weight.py │ │ ├── SwinTransformerWeightTransposeQKVWeight.py │ │ ├── checkpoint_quantization.py │ │ ├── infer_swintransformer_int8_op.py │ │ ├── infer_swintransformer_op.py │ │ ├── run_test.sh │ │ ├── run_test_int8.sh │ │ └── run_test_int8_accuracy.sh │ ├── t5 │ │ ├── perf_benchmark.py │ │ ├── requirement.txt │ │ ├── translate_example.py │ │ └── utils │ │ │ ├── ft_decoding.py │ │ │ ├── ft_encoder.py │ │ │ ├── huggingface_t5_ckpt_convert.py │ │ │ ├── megatron_t5_ckpt_convert.py │ │ │ ├── nemo_t5_ckpt_convert.py │ │ │ └── t5_ckpt_convert.py │ ├── utils.py │ └── vit │ │ ├── ViT-quantization │ │ ├── README.md │ │ ├── calib.sh │ │ ├── config.py │ │ ├── data.py │ │ ├── main.py │ │ ├── qat.sh │ │ ├── quant_utils.py │ │ └── vit_int8.py │ │ ├── VisionTransformerINT8WeightLoader.py │ │ ├── VisionTransformerWeightLoader.py │ │ ├── checkpoint_quantization.py │ │ ├── infer_visiontransformer_int8_op.py │ │ ├── infer_visiontransformer_op.py │ │ ├── requirement.txt │ │ ├── run.sh │ │ └── run2.sh ├── tensorflow │ ├── bert │ │ ├── bert-quantization │ │ │ ├── .dockerignore │ │ │ ├── CONTRIBUTING.md │ │ │ ├── Dockerfile │ │ │ ├── LICENSE │ │ │ ├── NOTICE │ │ │ ├── README.md │ │ │ ├── README_orig.md │ │ │ ├── __init__.py │ │ │ ├── configurations.yml │ │ │ ├── extract_features.py │ │ │ ├── fp16_utils.py │ │ │ ├── ft-tensorflow-quantization │ │ │ │ ├── README.md │ │ │ │ ├── ft_tensorflow_quantization │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── python │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── calib │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── calibrator.py │ │ │ │ │ │ ├── histogram.py │ │ │ │ │ │ └── max.py │ │ │ │ │ │ ├── layers │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── dense.py │ │ │ │ │ │ ├── tensor_quantizer.py │ │ │ │ │ │ └── utils.py │ │ │ │ │ │ ├── ops │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── fake_quantize.py │ │ │ │ │ │ └── utils │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── utils.py │ │ │ │ └── setup.py │ │ │ ├── fused_layer_norm.py │ │ │ ├── gpu_environment.py │ │ │ ├── modeling.py │ │ │ ├── optimization.py │ │ │ ├── run_pretraining.py │ │ │ ├── run_squad.py │ │ │ ├── tf_metrics.py │ │ │ ├── tokenization.py │ │ │ └── utils │ │ │ │ ├── create_glue_data.py │ │ │ │ ├── create_pretraining_data.py │ │ │ │ ├── create_squad_data.py │ │ │ │ └── utils.py │ │ ├── bert_example.py │ │ ├── tensorflow_bert │ │ │ ├── __init__.py │ │ │ ├── ckpt_quantization.py │ │ │ ├── ckpt_type_convert.py │ │ │ ├── fast_infer_util.py │ │ │ ├── my_modeling.py │ │ │ ├── profile_bert_inference.py │ │ │ ├── profile_transformer_inference.py │ │ │ ├── profile_util.py │ │ │ ├── run_classifier_wrap.py │ │ │ ├── run_squad_wrap.py │ │ │ ├── sample.md │ │ │ ├── squad_evaluate-v1.1.py │ │ │ └── squad_evaluate_v1_1.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── bert.py │ │ │ ├── common.py │ │ │ ├── position.py │ │ │ └── reducer.py │ ├── ckpt_type_convert.py │ ├── common_utils │ │ ├── common.py │ │ ├── position.py │ │ └── reducer.py │ ├── decoder │ │ ├── decoder_example.py │ │ └── utils │ │ │ ├── beam_search.py │ │ │ ├── common.py │ │ │ ├── decoder.py │ │ │ ├── decoding.py │ │ │ ├── position.py │ │ │ ├── reducer.py │ │ │ └── sampling.py │ ├── decoding │ │ ├── decoding_example.py │ │ ├── translate_example.py │ │ └── utils │ │ │ ├── bleu_score.py │ │ │ ├── ft_decoding.py │ │ │ └── translation │ │ │ ├── download_model_data.sh │ │ │ ├── test.de │ │ │ ├── test.en │ │ │ └── wmtende.vocab │ ├── encoder │ │ ├── encoder_example.py │ │ └── utils │ │ │ └── encoder.py │ ├── gpt │ │ ├── gpt_example.py │ │ └── utils │ │ │ ├── download_gpt2_model.py │ │ │ ├── gpt_token_converter.py │ │ │ ├── gpt_token_encoder.py │ │ │ └── openai_gpt_ckpt_converter.py │ ├── requirement.txt │ └── xlnet │ │ ├── convertInput.py │ │ ├── convertModel.py │ │ ├── downloadModel.sh │ │ ├── modeling.py │ │ ├── runData.py │ │ └── verifyCorrectness.sh └── tensorrt │ ├── swin │ ├── builder_fp16.py │ ├── builder_fp32.py │ ├── builder_int8.py │ ├── infer_swintransformer_plugin.py │ ├── infer_swintransformer_plugin_int8.py │ ├── run_builder_fp16.sh │ ├── run_builder_fp32.sh │ ├── run_builder_int8.sh │ ├── run_infer_fp16.sh │ ├── run_infer_fp32.sh │ └── run_infer_int8.sh │ ├── t5 │ ├── createT5TestData.py │ ├── extractT5ModelToBIN.py │ └── testT5Plugin.py │ └── vit │ ├── infer_visiontransformer_plugin.py │ └── plugin_loader.py ├── src ├── CMakeLists.txt └── fastertransformer │ ├── CMakeLists.txt │ ├── kernels │ ├── CMakeLists.txt │ ├── activation_int8_kernels.cu │ ├── activation_int8_kernels.h │ ├── activation_kernels.cu │ ├── activation_kernels.h │ ├── add_bias_transpose_kernels.cu │ ├── add_bias_transpose_kernels.h │ ├── add_residual_kernels.cu │ ├── add_residual_kernels.h │ ├── ban_bad_words.cu │ ├── ban_bad_words.h │ ├── beam_search_penalty_kernels.cu │ ├── beam_search_penalty_kernels.h │ ├── beam_search_topk_kernels.cu │ ├── beam_search_topk_kernels.h │ ├── bert_preprocess_kernels.cu │ ├── bert_preprocess_kernels.h │ ├── bfloat16_fallback_kenrels.cuh │ ├── calibrate_quantize_weight_kernels.cu │ ├── calibrate_quantize_weight_kernels.h │ ├── custom_ar_kernels.cu │ ├── custom_ar_kernels.h │ ├── decoder_masked_multihead_attention.cu │ ├── decoder_masked_multihead_attention.h │ ├── decoder_masked_multihead_attention_utils.h │ ├── decoding_kernels.cu │ ├── decoding_kernels.h │ ├── dequantize_kernels.cu │ ├── dequantize_kernels.h │ ├── gen_relative_pos_bias.cu │ ├── gen_relative_pos_bias.h │ ├── gpt_kernels.cu │ ├── gpt_kernels.h │ ├── int8_utils.cuh │ ├── layernorm_int8_kernels.cu │ ├── layernorm_int8_kernels.h │ ├── layernorm_kernels.cu │ ├── layernorm_kernels.h │ ├── layout_transformer_int8_kernels.cu │ ├── layout_transformer_int8_kernels.h │ ├── logprob_kernels.cu │ ├── logprob_kernels.h │ ├── longformer_kernels.cu │ ├── longformer_kernels.h │ ├── matrix_transpose_kernels.cu │ ├── matrix_transpose_kernels.h │ ├── matrix_vector_multiplication.cu │ ├── matrix_vector_multiplication.h │ ├── online_softmax_beamsearch_kernels.cu │ ├── online_softmax_beamsearch_kernels.h │ ├── quantization_int8_kernels.cu │ ├── quantization_int8_kernels.h │ ├── quantize_weight.cu │ ├── quantize_weight.h │ ├── reduce_kernel_utils.cuh │ ├── reverse_roll_kernels.cu │ ├── reverse_roll_kernels.h │ ├── sampling_penalty_kernels.cu │ ├── sampling_penalty_kernels.h │ ├── sampling_topk_kernels.cu │ ├── sampling_topk_kernels.h │ ├── sampling_topp_kernels.cu │ ├── sampling_topp_kernels.h │ ├── softmax_int8_kernels.cu │ ├── softmax_int8_kernels.h │ ├── stop_criteria_kernels.cu │ ├── stop_criteria_kernels.h │ ├── transform_mask_kernels.cu │ ├── transform_mask_kernels.h │ ├── transpose_int8_kernels.cu │ ├── transpose_int8_kernels.h │ ├── unfused_attention_int8_kernels.cu │ ├── unfused_attention_int8_kernels.h │ ├── unfused_attention_kernels.cu │ ├── unfused_attention_kernels.h │ ├── vit_kernels.cu │ ├── vit_kernels.h │ ├── xlnet_attention_kernels.cu │ ├── xlnet_attention_kernels.h │ ├── xlnet_preprocess_kernels.cu │ └── xlnet_preprocess_kernels.h │ ├── layers │ ├── BaseLayer.h │ ├── CMakeLists.txt │ ├── DenseWeight.h │ ├── DynamicDecodeBaseLayer.h │ ├── DynamicDecodeLayer.cc │ ├── DynamicDecodeLayer.h │ ├── FfnINT8Weight.h │ ├── FfnLayer.cc │ ├── FfnLayer.h │ ├── FfnLayerINT8.cc │ ├── FfnLayerINT8.h │ ├── FfnWeight.h │ ├── GluFfnLayer.cc │ ├── GluFfnLayer.h │ ├── GluFfnWeight.h │ ├── TensorParallelGeluFfnLayer.cc │ ├── TensorParallelGeluFfnLayer.h │ ├── TensorParallelGeluGluFfnLayer.cc │ ├── TensorParallelGeluGluFfnLayer.h │ ├── TensorParallelReluFfnLayer.cc │ ├── TensorParallelReluFfnLayer.h │ ├── attention_layers │ │ ├── AttentionWeight.h │ │ ├── BaseAttentionLayer.h │ │ ├── CMakeLists.txt │ │ ├── DecoderCrossAttentionLayer.cu │ │ ├── DecoderCrossAttentionLayer.h │ │ ├── DecoderSelfAttentionLayer.cc │ │ ├── DecoderSelfAttentionLayer.h │ │ ├── FusedAttentionLayer.cu │ │ ├── FusedAttentionLayer.h │ │ ├── GlmContextAttentionLayer.cc │ │ ├── GlmContextAttentionLayer.h │ │ ├── GlmDecoderSelfAttentionLayer.cc │ │ ├── GlmDecoderSelfAttentionLayer.h │ │ ├── GptContextAttentionLayer.cc │ │ ├── GptContextAttentionLayer.h │ │ ├── LongformerAttentionLayer.cc │ │ ├── LongformerAttentionLayer.h │ │ ├── TensorParallelDecoderCrossAttentionLayer.cc │ │ ├── TensorParallelDecoderCrossAttentionLayer.cu │ │ ├── TensorParallelDecoderCrossAttentionLayer.h │ │ ├── TensorParallelDecoderSelfAttentionLayer.cc │ │ ├── TensorParallelDecoderSelfAttentionLayer.h │ │ ├── TensorParallelGlmContextAttentionLayer.cc │ │ ├── TensorParallelGlmContextAttentionLayer.h │ │ ├── TensorParallelGlmDecoderSelfAttentionLayer.cc │ │ ├── TensorParallelGlmDecoderSelfAttentionLayer.h │ │ ├── TensorParallelGptContextAttentionLayer.cc │ │ ├── TensorParallelGptContextAttentionLayer.h │ │ ├── TensorParallelUnfusedAttentionLayer.cc │ │ ├── TensorParallelUnfusedAttentionLayer.h │ │ ├── UnfusedAttentionLayer.cc │ │ ├── UnfusedAttentionLayer.h │ │ ├── WindowAttention.cc │ │ └── WindowAttention.h │ ├── attention_layers_int8 │ │ ├── AttentionINT8Weight.h │ │ ├── CMakeLists.txt │ │ ├── FusedAttentionLayerINT8.cu │ │ ├── FusedAttentionLayerINT8.h │ │ ├── UnfusedAttentionLayerINT8.cc │ │ ├── UnfusedAttentionLayerINT8.h │ │ ├── WindowAttentionINT8.cu │ │ └── WindowAttentionINT8.h │ ├── beam_search_layers │ │ ├── BaseBeamSearchLayer.cu │ │ ├── BaseBeamSearchLayer.h │ │ ├── BeamSearchLayer.cu │ │ ├── BeamSearchLayer.h │ │ ├── CMakeLists.txt │ │ ├── OnlineBeamSearchLayer.cu │ │ └── OnlineBeamSearchLayer.h │ ├── sampling_layers │ │ ├── BaseSamplingLayer.cc │ │ ├── BaseSamplingLayer.h │ │ ├── CMakeLists.txt │ │ ├── TopKSamplingLayer.cu │ │ ├── TopKSamplingLayer.h │ │ ├── TopKTopPSamplingLayer.cu │ │ ├── TopKTopPSamplingLayer.h │ │ ├── TopPSamplingLayer.cu │ │ └── TopPSamplingLayer.h │ └── xlnet_attention_layers │ │ ├── CMakeLists.txt │ │ ├── XlnetAttentionLayer.cc │ │ ├── XlnetAttentionLayer.h │ │ └── XlnetAttentionWeight.h │ ├── models │ ├── CMakeLists.txt │ ├── bert │ │ ├── Bert.cc │ │ ├── Bert.h │ │ ├── BertLayerWeight.h │ │ ├── BertWeight.h │ │ ├── CMakeLists.txt │ │ └── bert_gemm.cc │ ├── bert_int8 │ │ ├── BertINT8.cc │ │ ├── BertINT8.h │ │ ├── BertLayerINT8.cc │ │ ├── BertLayerINT8.h │ │ ├── BertLayerINT8Weight.h │ │ └── CMakeLists.txt │ ├── decoder │ │ ├── CMakeLists.txt │ │ ├── Decoder.cc │ │ ├── Decoder.h │ │ └── DecoderLayerWeight.h │ ├── decoding │ │ ├── CMakeLists.txt │ │ ├── Decoding.cc │ │ ├── Decoding.h │ │ ├── DecodingWeight.h │ │ └── decoding_gemm.cc │ ├── glm │ │ ├── CMakeLists.txt │ │ ├── Glm.cc │ │ ├── Glm.h │ │ ├── GlmContextDecoder.cc │ │ ├── GlmContextDecoder.h │ │ ├── GlmDecoder.cc │ │ ├── GlmDecoder.h │ │ ├── GlmDecoderLayerWeight.cc │ │ ├── GlmDecoderLayerWeight.h │ │ ├── GlmWeight.cc │ │ └── GlmWeight.h │ ├── gpt │ │ ├── CMakeLists.txt │ │ ├── Gpt.cc │ │ ├── Gpt.h │ │ ├── GptContextDecoder.cc │ │ ├── GptContextDecoder.h │ │ ├── GptDecoder.cc │ │ ├── GptDecoder.h │ │ ├── GptDecoderLayerWeight.cc │ │ ├── GptDecoderLayerWeight.h │ │ ├── GptWeight.cc │ │ └── GptWeight.h │ ├── gptj │ │ ├── CMakeLists.txt │ │ ├── GptJ.cc │ │ ├── GptJ.h │ │ ├── GptJContextDecoder.cc │ │ ├── GptJContextDecoder.h │ │ ├── GptJDecoder.cc │ │ ├── GptJDecoder.h │ │ ├── GptJDecoderLayerWeight.cc │ │ ├── GptJDecoderLayerWeight.h │ │ ├── GptJWeight.cc │ │ └── GptJWeight.h │ ├── longformer │ │ ├── CMakeLists.txt │ │ ├── LongformerEncoder.cc │ │ └── LongformerEncoder.h │ ├── multi_gpu_gpt │ │ ├── CMakeLists.txt │ │ ├── ParallelGpt.cc │ │ ├── ParallelGpt.h │ │ ├── ParallelGptContextDecoder.cc │ │ ├── ParallelGptContextDecoder.h │ │ ├── ParallelGptDecoder.cc │ │ ├── ParallelGptDecoder.h │ │ ├── ParallelGptDecoderLayerWeight.cc │ │ ├── ParallelGptDecoderLayerWeight.h │ │ ├── ParallelGptWeight.cc │ │ ├── ParallelGptWeight.h │ │ └── gpt_gemm.cc │ ├── swin │ │ ├── CMakeLists.txt │ │ ├── Swin.cc │ │ ├── Swin.h │ │ ├── SwinBasicLayer.cc │ │ ├── SwinBasicLayer.h │ │ ├── SwinBlock.cc │ │ ├── SwinBlock.h │ │ ├── SwinWeight.h │ │ └── swin_gemm.cc │ ├── swin_int8 │ │ ├── CMakeLists.txt │ │ ├── SwinBasicLayerINT8.cc │ │ ├── SwinBasicLayerINT8.h │ │ ├── SwinBlockINT8.cc │ │ ├── SwinBlockINT8.h │ │ ├── SwinINT8.cc │ │ ├── SwinINT8.h │ │ └── SwinINT8Weight.h │ ├── t5 │ │ ├── CMakeLists.txt │ │ ├── T5Decoder.cc │ │ ├── T5Decoder.h │ │ ├── T5DecoderLayerWeight.cc │ │ ├── T5DecoderLayerWeight.h │ │ ├── T5Decoding.cc │ │ ├── T5Decoding.h │ │ ├── T5DecodingWeight.cc │ │ ├── T5DecodingWeight.h │ │ ├── T5Encoder.cc │ │ ├── T5Encoder.h │ │ ├── T5EncoderLayerWeight.cc │ │ ├── T5EncoderLayerWeight.h │ │ ├── T5EncoderWeight.cc │ │ ├── T5EncoderWeight.h │ │ └── t5_gemm.cc │ ├── vit │ │ ├── CMakeLists.txt │ │ ├── ViT.cc │ │ ├── ViT.h │ │ ├── ViTLayerWeight.h │ │ ├── ViTWeight.h │ │ └── vit_gemm.cc │ ├── vit_int8 │ │ ├── CMakeLists.txt │ │ ├── ViTINT8.cc │ │ ├── ViTINT8.h │ │ ├── ViTINT8Weight.h │ │ └── ViTLayerINT8Weight.h │ └── xlnet │ │ ├── CMakeLists.txt │ │ ├── Xlnet.cc │ │ ├── Xlnet.h │ │ ├── XlnetLayerWeight.h │ │ └── xlnet_gemm.cc │ ├── tensorrt_plugin │ ├── CMakeLists.txt │ ├── swin │ │ ├── CMakeLists.txt │ │ ├── serialize.hpp │ │ ├── swinTransformerINT8Plugin.cpp │ │ ├── swinTransformerINT8Plugin.h │ │ ├── swinTransformerPlugin.cpp │ │ └── swinTransformerPlugin.h │ ├── t5 │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── T5Plugin.cu │ │ ├── T5Plugin.h │ │ ├── T5PluginGemm.cc │ │ └── T5PluginGemm.h │ └── vit │ │ ├── CMakeLists.txt │ │ ├── ViTPlugin.cpp │ │ └── ViTPlugin.h │ ├── tf_op │ ├── BaseOp.h │ ├── CMakeLists.txt │ ├── bert │ │ ├── BertINT8Op.cc │ │ ├── BertOp.cc │ │ ├── CMakeLists.txt │ │ └── weight_quantize_op.cc │ ├── decoder │ │ ├── CMakeLists.txt │ │ ├── DecoderOp.cc │ │ └── FusedSelfAttentionOp.cc │ ├── decoding │ │ ├── CMakeLists.txt │ │ └── DecodingOp.cc │ ├── encoder │ │ ├── CMakeLists.txt │ │ └── EncoderOp.cc │ └── gpt │ │ ├── CMakeLists.txt │ │ └── GptOp.cc │ ├── th_op │ ├── CMakeLists.txt │ ├── bert │ │ ├── BertINT8Op.cc │ │ ├── BertINT8Op.h │ │ ├── BertOp.cc │ │ ├── BertOp.h │ │ ├── CMakeLists.txt │ │ └── WeightQuantizeOp.cc │ ├── decoder │ │ ├── CMakeLists.txt │ │ ├── DecoderOp.cc │ │ └── DecoderOp.h │ ├── decoding │ │ ├── CMakeLists.txt │ │ ├── DecodingOp.cc │ │ ├── DecodingOp.h │ │ ├── GatherTreeOp.cc │ │ └── GatherTreeOp.h │ ├── encoder │ │ ├── CMakeLists.txt │ │ ├── EncoderOp.cc │ │ └── EncoderOp.h │ ├── glm │ │ ├── CMakeLists.txt │ │ ├── GlmOp.cc │ │ └── GlmOp.h │ ├── gpt │ │ ├── CMakeLists.txt │ │ ├── GptOp.cc │ │ └── GptOp.h │ ├── longformer │ │ ├── CMakeLists.txt │ │ ├── LongformerEncoderOp.cc │ │ └── LongformerEncoderOp.h │ ├── multi_gpu_gpt │ │ ├── CMakeLists.txt │ │ ├── ParallelGptOp.cc │ │ ├── ParallelGptOp.h │ │ ├── WeightTransposeCalibrateQuantizeOp.cc │ │ └── WeightTransposeCalibrateQuantizeOp.h │ ├── swin │ │ ├── CMakeLists.txt │ │ ├── SwinINT8Op.cc │ │ ├── SwinINT8Op.h │ │ ├── SwinOp.cc │ │ ├── SwinOp.h │ │ └── WeightQuantizeOp.cc │ ├── t5 │ │ ├── CMakeLists.txt │ │ ├── T5DecoderOp.cc │ │ ├── T5DecoderOp.h │ │ ├── T5DecodingOp.cc │ │ ├── T5DecodingOp.h │ │ ├── T5EncoderOp.cc │ │ └── T5EncoderOp.h │ ├── th_traits.h │ ├── th_utils.cu │ ├── th_utils.h │ └── vit │ │ ├── CMakeLists.txt │ │ ├── ViTINT8Op.cc │ │ ├── ViTINT8Op.h │ │ ├── ViTOp.cc │ │ ├── ViTOp.h │ │ └── WeightQuantizeOp.cc │ ├── triton_backend │ ├── CMakeLists.txt │ ├── gptj │ │ ├── CMakeLists.txt │ │ ├── GptJTritonModel.cc │ │ ├── GptJTritonModel.h │ │ ├── GptJTritonModelInstance.cc │ │ └── GptJTritonModelInstance.h │ ├── multi_gpu_gpt │ │ ├── CMakeLists.txt │ │ ├── ParallelGptTritonModel.cc │ │ ├── ParallelGptTritonModel.h │ │ ├── ParallelGptTritonModelInstance.cc │ │ └── ParallelGptTritonModelInstance.h │ ├── t5 │ │ ├── CMakeLists.txt │ │ ├── T5TritonModel.cc │ │ ├── T5TritonModel.h │ │ ├── T5TritonModelInstance.cc │ │ └── T5TritonModelInstance.h │ ├── transformer_triton_backend.hpp │ └── triton_utils.hpp │ └── utils │ ├── CMakeLists.txt │ ├── ScaleList.h │ ├── Tensor.h │ ├── allocator.h │ ├── conv2d.h │ ├── convert_data_type.h │ ├── cublasAlgoMap.cc │ ├── cublasAlgoMap.h │ ├── cublasINT8MMWrapper.cc │ ├── cublasINT8MMWrapper.h │ ├── cublasMMWrapper.cc │ ├── cublasMMWrapper.h │ ├── cuda_bf16_wrapper.h │ ├── cuda_utils.h │ ├── custom_ar_comm.cc │ ├── custom_ar_comm.h │ ├── gemm.cc │ ├── gemm.h │ ├── gemm_test │ ├── CMakeLists.txt │ ├── decoding_gemm_func.cc │ ├── decoding_gemm_func.h │ ├── encoder_gemm_func.cc │ ├── encoder_gemm_func.h │ ├── encoder_igemm_func.cc │ ├── encoder_igemm_func.h │ ├── gemm_func.cc │ ├── gemm_func.h │ ├── gpt_gemm_func.cc │ ├── gpt_gemm_func.h │ ├── swin_gemm_func.cc │ ├── swin_gemm_func.h │ ├── swin_igemm_func.cc │ ├── swin_igemm_func.h │ ├── t5_gemm_func.cc │ ├── t5_gemm_func.h │ ├── xlnet_gemm_func.cc │ └── xlnet_gemm_func.h │ ├── logger.h │ ├── memory_utils.cu │ ├── memory_utils.h │ ├── mpi_utils.h │ ├── nccl_utils.cc │ ├── nccl_utils.h │ ├── nvtx_utils.cc │ ├── nvtx_utils.h │ ├── string_utils.h │ ├── word_list.cc │ └── word_list.h ├── templates └── adding_a_new_model │ └── README.md └── tests ├── CMakeLists.txt ├── bert ├── tf_bert_unit_test.py ├── tf_encoder_unit_test.py ├── th_bert_unit_test.py └── th_encoder_unit_test.py ├── decoding ├── tf_decoding_unit_test.py └── tf_fused_self_multihead_attention_unit_test.py ├── longformer └── py_longformer_unit_test.py └── unittests ├── CMakeLists.txt ├── test_gemm.cu ├── test_logprob_kernels.cu └── test_sampling.cu /.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/.clang-format -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/.flake8 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/.github/ISSUE_TEMPLATE/bug_report.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/.gitlab-ci.yml -------------------------------------------------------------------------------- /.gitlab/issue_templates/bug.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/.gitlab/issue_templates/bug.md -------------------------------------------------------------------------------- /.gitlab/issue_templates/feature.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/.gitlab/issue_templates/feature.md -------------------------------------------------------------------------------- /.gitlab/merge_request_templates/merge.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/.gitlab/merge_request_templates/merge.md -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/.gitmodules -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /3rdparty/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/INIReader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/INIReader.h -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_histogram.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/agent_histogram.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_radix_sort_downsweep.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/agent_radix_sort_downsweep.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_radix_sort_upsweep.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/agent_radix_sort_upsweep.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/agent_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_reduce_by_key.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/agent_reduce_by_key.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_rle.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/agent_rle.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/agent_scan.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_segment_fixup.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/agent_segment_fixup.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_select_if.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/agent_select_if.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_spmv_orig.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/agent_spmv_orig.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/single_pass_scan_operators.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/agent/single_pass_scan_operators.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_adjacent_difference.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_adjacent_difference.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_discontinuity.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_discontinuity.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_exchange.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_exchange.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_histogram.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_histogram.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_load.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_load.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_radix_rank.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_radix_rank.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_radix_sort.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_radix_sort.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_raking_layout.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_raking_layout.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_scan.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_shuffle.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_shuffle.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_store.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/block_store.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/specializations/block_histogram_atomic.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/specializations/block_histogram_atomic.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/specializations/block_histogram_sort.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/specializations/block_histogram_sort.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/specializations/block_reduce_raking.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/specializations/block_reduce_raking.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/specializations/block_scan_raking.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/specializations/block_scan_raking.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/specializations/block_scan_warp_scans.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/specializations/block_scan_warp_scans.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/specializations/block_scan_warp_scans2.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/specializations/block_scan_warp_scans2.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/specializations/block_scan_warp_scans3.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/block/specializations/block_scan_warp_scans3.cuh -------------------------------------------------------------------------------- /3rdparty/cub/cub.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/cub.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_histogram.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/device_histogram.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_partition.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/device_partition.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_radix_sort.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/device_radix_sort.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/device_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_run_length_encode.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/device_run_length_encode.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/device_scan.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_segmented_radix_sort.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/device_segmented_radix_sort.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_segmented_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/device_segmented_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_select.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/device_select.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_spmv.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/device_spmv.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_histogram.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_histogram.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_radix_sort.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_radix_sort.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_reduce_by_key.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_reduce_by_key.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_rle.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_rle.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_scan.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_select_if.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_select_if.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_spmv_orig.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_spmv_orig.cuh -------------------------------------------------------------------------------- /3rdparty/cub/grid/grid_barrier.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/grid/grid_barrier.cuh -------------------------------------------------------------------------------- /3rdparty/cub/grid/grid_even_share.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/grid/grid_even_share.cuh -------------------------------------------------------------------------------- /3rdparty/cub/grid/grid_mapping.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/grid/grid_mapping.cuh -------------------------------------------------------------------------------- /3rdparty/cub/grid/grid_queue.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/grid/grid_queue.cuh -------------------------------------------------------------------------------- /3rdparty/cub/host/mutex.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/host/mutex.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/arg_index_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/iterator/arg_index_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/cache_modified_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/iterator/cache_modified_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/cache_modified_output_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/iterator/cache_modified_output_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/constant_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/iterator/constant_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/counting_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/iterator/counting_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/discard_output_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/iterator/discard_output_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/tex_obj_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/iterator/tex_obj_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/tex_ref_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/iterator/tex_ref_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/transform_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/iterator/transform_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_load.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/thread/thread_load.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_operators.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/thread/thread_operators.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/thread/thread_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/thread/thread_scan.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_search.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/thread/thread_search.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_store.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/thread/thread_store.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_allocator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/util_allocator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_arch.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/util_arch.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_debug.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/util_debug.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_device.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/util_device.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_macro.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/util_macro.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_namespace.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/util_namespace.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_ptx.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/util_ptx.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_type.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/util_type.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/specializations/warp_reduce_shfl.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/warp/specializations/warp_reduce_shfl.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/specializations/warp_reduce_smem.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/warp/specializations/warp_reduce_smem.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/specializations/warp_scan_shfl.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/warp/specializations/warp_scan_shfl.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/specializations/warp_scan_smem.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/warp/specializations/warp_scan_smem.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/warp_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/warp/warp_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/warp_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/cub/warp/warp_scan.cuh -------------------------------------------------------------------------------- /3rdparty/trt_fused_multihead_attention/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/trt_fused_multihead_attention/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/trt_fused_multihead_attention/common.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/trt_fused_multihead_attention/common.cuh -------------------------------------------------------------------------------- /3rdparty/trt_fused_multihead_attention/cudaDriverWrapper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/trt_fused_multihead_attention/cudaDriverWrapper.cpp -------------------------------------------------------------------------------- /3rdparty/trt_fused_multihead_attention/cudaDriverWrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/trt_fused_multihead_attention/cudaDriverWrapper.h -------------------------------------------------------------------------------- /3rdparty/trt_fused_multihead_attention/qkvToContext.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/trt_fused_multihead_attention/qkvToContext.cu -------------------------------------------------------------------------------- /3rdparty/trt_fused_multihead_attention/qkvToContext.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/3rdparty/trt_fused_multihead_attention/qkvToContext.h -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/README.md -------------------------------------------------------------------------------- /benchmarks/bert/pyt_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/benchmarks/bert/pyt_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/pyt_int8_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/benchmarks/bert/pyt_int8_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/pyt_sp_fp16_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/benchmarks/bert/pyt_sp_fp16_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/pyt_sp_int8_mode2_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/benchmarks/bert/pyt_sp_int8_mode2_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/tf_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/benchmarks/bert/tf_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/tf_int8_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/benchmarks/bert/tf_int8_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/decoding/pyt_decoding_beamsearch_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/benchmarks/decoding/pyt_decoding_beamsearch_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/decoding/tf_decoding_beamsearch_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/benchmarks/decoding/tf_decoding_beamsearch_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/decoding/tf_decoding_sampling_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/benchmarks/decoding/tf_decoding_sampling_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/t5/pyt_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/benchmarks/t5/pyt_benchmark.sh -------------------------------------------------------------------------------- /cmake/FasterTransformerConfig.cmake.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/cmake/FasterTransformerConfig.cmake.in -------------------------------------------------------------------------------- /cmake/Modules/FindNCCL.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/cmake/Modules/FindNCCL.cmake -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docker/README.md -------------------------------------------------------------------------------- /docker/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docker/build.sh -------------------------------------------------------------------------------- /docker/check_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docker/check_cuda.py -------------------------------------------------------------------------------- /docker/install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docker/install.sh -------------------------------------------------------------------------------- /docker/setenv.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docker/setenv.sh -------------------------------------------------------------------------------- /docker/start.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docker/start.sh -------------------------------------------------------------------------------- /docs/QAList.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/QAList.md -------------------------------------------------------------------------------- /docs/bert_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/bert_guide.md -------------------------------------------------------------------------------- /docs/decoder_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/decoder_guide.md -------------------------------------------------------------------------------- /docs/gpt_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/gpt_guide.md -------------------------------------------------------------------------------- /docs/gptj_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/gptj_guide.md -------------------------------------------------------------------------------- /docs/images/FP-swin-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/FP-swin-flowchart.png -------------------------------------------------------------------------------- /docs/images/FT_Encoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/FT_Encoder_T4.png -------------------------------------------------------------------------------- /docs/images/FT_GPT_A100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/FT_GPT_A100.png -------------------------------------------------------------------------------- /docs/images/INT8-swin-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/INT8-swin-flowchart.png -------------------------------------------------------------------------------- /docs/images/PyTorch_Encoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/PyTorch_Encoder_T4.png -------------------------------------------------------------------------------- /docs/images/Py_Decoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/Py_Decoder_T4.png -------------------------------------------------------------------------------- /docs/images/Py_Encoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/Py_Encoder_T4.png -------------------------------------------------------------------------------- /docs/images/TF_Decoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/TF_Decoder_T4.png -------------------------------------------------------------------------------- /docs/images/TF_Encoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/TF_Encoder_T4.png -------------------------------------------------------------------------------- /docs/images/decoding/decoding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/decoding/decoding.png -------------------------------------------------------------------------------- /docs/images/effective_transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/effective_transformer.png -------------------------------------------------------------------------------- /docs/images/encoder-decoding-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/encoder-decoding-2.png -------------------------------------------------------------------------------- /docs/images/encoder_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/encoder_flowchart.png -------------------------------------------------------------------------------- /docs/images/gpt/Megatron_530B_benchmark_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/gpt/Megatron_530B_benchmark_1.png -------------------------------------------------------------------------------- /docs/images/gpt/Megatron_530B_benchmark_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/gpt/Megatron_530B_benchmark_2.png -------------------------------------------------------------------------------- /docs/images/gpt/Megatron_530B_benchmark_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/gpt/Megatron_530B_benchmark_3.png -------------------------------------------------------------------------------- /docs/images/gpt/Megatron_530B_benchmark_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/gpt/Megatron_530B_benchmark_4.png -------------------------------------------------------------------------------- /docs/images/gpt/gpt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/gpt/gpt.png -------------------------------------------------------------------------------- /docs/images/gpt/gpt_context.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/gpt/gpt_context.png -------------------------------------------------------------------------------- /docs/images/gpt/parallelgpt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/gpt/parallelgpt.png -------------------------------------------------------------------------------- /docs/images/gpt_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/gpt_flowchart.png -------------------------------------------------------------------------------- /docs/images/longformer_compute_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/longformer_compute_flow.png -------------------------------------------------------------------------------- /docs/images/vit/vit-FMHA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/vit/vit-FMHA.png -------------------------------------------------------------------------------- /docs/images/vit/vit-fp32-fp16-compute-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/vit/vit-fp32-fp16-compute-flow.png -------------------------------------------------------------------------------- /docs/images/workflow-of-int8-inference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/workflow-of-int8-inference.png -------------------------------------------------------------------------------- /docs/images/xlnet_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/images/xlnet_flowchart.png -------------------------------------------------------------------------------- /docs/longformer_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/longformer_guide.md -------------------------------------------------------------------------------- /docs/models/megatron-345m-model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/models/megatron-345m-model.md -------------------------------------------------------------------------------- /docs/models/megatron-530b-model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/models/megatron-530b-model.md -------------------------------------------------------------------------------- /docs/swin_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/swin_guide.md -------------------------------------------------------------------------------- /docs/t5_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/t5_guide.md -------------------------------------------------------------------------------- /docs/vit_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/vit_guide.md -------------------------------------------------------------------------------- /docs/xlnet_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/docs/xlnet_guide.md -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/bert/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/bert/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/bert/bert_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/bert/bert_example.cc -------------------------------------------------------------------------------- /examples/cpp/bert_int8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/bert_int8/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/bert_int8/bert_int8_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/bert_int8/bert_int8_example.cc -------------------------------------------------------------------------------- /examples/cpp/decoding/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/decoding/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/decoding/decoding_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/decoding/decoding_example.cc -------------------------------------------------------------------------------- /examples/cpp/decoding/layernorm_test.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/decoding/layernorm_test.cc -------------------------------------------------------------------------------- /examples/cpp/glm/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/glm/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/glm/bad_words.csv: -------------------------------------------------------------------------------- 1 | 7768,3908 2 | 1,2 3 | -------------------------------------------------------------------------------- /examples/cpp/glm/glm_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/glm/glm_config.ini -------------------------------------------------------------------------------- /examples/cpp/glm/glm_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/glm/glm_example.cc -------------------------------------------------------------------------------- /examples/cpp/glm/glm_example_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/glm/glm_example_utils.cc -------------------------------------------------------------------------------- /examples/cpp/glm/glm_example_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/glm/glm_example_utils.h -------------------------------------------------------------------------------- /examples/cpp/glm/start_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/glm/start_ids.csv -------------------------------------------------------------------------------- /examples/cpp/glm/stop_words.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/glm/stop_words.csv -------------------------------------------------------------------------------- /examples/cpp/gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/gpt/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/gpt/gpt_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/gpt/gpt_config.ini -------------------------------------------------------------------------------- /examples/cpp/gpt/gpt_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/gpt/gpt_example.cc -------------------------------------------------------------------------------- /examples/cpp/gpt/start_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/gpt/start_ids.csv -------------------------------------------------------------------------------- /examples/cpp/gptj/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/gptj/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/gptj/bad_words.csv: -------------------------------------------------------------------------------- 1 | 7768,3908 2 | 1,2 3 | -------------------------------------------------------------------------------- /examples/cpp/gptj/gptj_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/gptj/gptj_config.ini -------------------------------------------------------------------------------- /examples/cpp/gptj/gptj_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/gptj/gptj_example.cc -------------------------------------------------------------------------------- /examples/cpp/gptj/gptj_triton_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/gptj/gptj_triton_example.cc -------------------------------------------------------------------------------- /examples/cpp/gptj/start_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/gptj/start_ids.csv -------------------------------------------------------------------------------- /examples/cpp/gptj/stop_words.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/gptj/stop_words.csv -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/gpt_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/gpt_config.ini -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/gpt_example_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/gpt_example_utils.cc -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/gpt_example_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/gpt_example_utils.h -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/multi_gpu_gpt_async_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/multi_gpu_gpt_async_example.cc -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/multi_gpu_gpt_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/multi_gpu_gpt_example.cc -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/multi_gpu_gpt_triton_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/multi_gpu_gpt_triton_example.cc -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/start_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/start_ids.csv -------------------------------------------------------------------------------- /examples/cpp/swin/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/swin/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/swin/functions.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/swin/functions.h -------------------------------------------------------------------------------- /examples/cpp/swin/swin_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/swin/swin_example.cc -------------------------------------------------------------------------------- /examples/cpp/swin_int8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/swin_int8/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/swin_int8/swin_int8_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/swin_int8/swin_int8_example.cc -------------------------------------------------------------------------------- /examples/cpp/vit/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/vit/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/vit/vit_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/vit/vit_example.cc -------------------------------------------------------------------------------- /examples/cpp/vit_int8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/vit_int8/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/vit_int8/vit_int8_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/vit_int8/vit_int8_example.cc -------------------------------------------------------------------------------- /examples/cpp/xlnet/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/xlnet/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/xlnet/cnpy.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/xlnet/cnpy.cpp -------------------------------------------------------------------------------- /examples/cpp/xlnet/cnpy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/xlnet/cnpy.h -------------------------------------------------------------------------------- /examples/cpp/xlnet/xlnet_correctness_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/xlnet/xlnet_correctness_example.cc -------------------------------------------------------------------------------- /examples/cpp/xlnet/xlnet_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/cpp/xlnet/xlnet_example.cc -------------------------------------------------------------------------------- /examples/onnx/multi_gpu_gpt/onnx_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/onnx/multi_gpu_gpt/onnx_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/.dockerignore -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/Dockerfile -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/LICENSE -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/NOTICE -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/README.md -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/README_orig.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/README_orig.md -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/checkpoints/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/file_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/file_utils.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/inference.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/modeling.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/optimization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/optimization.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/processors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/run.sub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/run.sub -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/run_glue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/run_glue.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/run_squad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/run_squad.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/run_swag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/run_swag.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/schedulers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/schedulers.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/utils.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/vocab/vocab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/vocab/vocab -------------------------------------------------------------------------------- /examples/pytorch/bert/bert_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/bert_example.py -------------------------------------------------------------------------------- /examples/pytorch/bert/run_glue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/run_glue.py -------------------------------------------------------------------------------- /examples/pytorch/bert/run_squad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/run_squad.py -------------------------------------------------------------------------------- /examples/pytorch/bert/scripts/run_mrpc.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/scripts/run_mrpc.sh -------------------------------------------------------------------------------- /examples/pytorch/bert/scripts/run_squad.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/scripts/run_squad.sh -------------------------------------------------------------------------------- /examples/pytorch/bert/utils/checkpoint_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/utils/checkpoint_quantization.py -------------------------------------------------------------------------------- /examples/pytorch/bert/utils/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/utils/encoder.py -------------------------------------------------------------------------------- /examples/pytorch/bert/utils/get_mrpc_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/utils/get_mrpc_data.py -------------------------------------------------------------------------------- /examples/pytorch/bert/utils/modeling_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/bert/utils/modeling_bert.py -------------------------------------------------------------------------------- /examples/pytorch/decoder/decoder_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoder/decoder_example.py -------------------------------------------------------------------------------- /examples/pytorch/decoder/utils/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoder/utils/decoder.py -------------------------------------------------------------------------------- /examples/pytorch/decoder/utils/ft_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoder/utils/ft_decoder.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/decoding_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/decoding_example.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/translate_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/translate_example.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/utils/__init__.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/utils/decoding.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/download_model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/utils/download_model.sh -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/ft_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/utils/ft_decoding.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/recover_bpe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/utils/recover_bpe.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/translation/test.de: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/utils/translation/test.de -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/translation/test.en: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/utils/translation/test.en -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/translation/wmtende.vocab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/utils/translation/wmtende.vocab -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/translation_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/utils/translation_model.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/translator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/decoding/utils/translator.py -------------------------------------------------------------------------------- /examples/pytorch/encoder/encoder_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/encoder/encoder_example.py -------------------------------------------------------------------------------- /examples/pytorch/encoder/utils/ft_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/encoder/utils/ft_encoder.py -------------------------------------------------------------------------------- /examples/pytorch/glm/benchmark-generation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/benchmark-generation.sh -------------------------------------------------------------------------------- /examples/pytorch/glm/glm_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/glm_example.py -------------------------------------------------------------------------------- /examples/pytorch/glm/glm_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/glm_server.py -------------------------------------------------------------------------------- /examples/pytorch/glm/glm_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/glm_server.sh -------------------------------------------------------------------------------- /examples/pytorch/glm/glm_server_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/glm_server_test.py -------------------------------------------------------------------------------- /examples/pytorch/glm/icetk_glm_130B/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/icetk_glm_130B/__init__.py -------------------------------------------------------------------------------- /examples/pytorch/glm/icetk_glm_130B/ice_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/icetk_glm_130B/ice_tokenizer.py -------------------------------------------------------------------------------- /examples/pytorch/glm/icetk_glm_130B/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/icetk_glm_130B/tokenizer.py -------------------------------------------------------------------------------- /examples/pytorch/glm/input.txt: -------------------------------------------------------------------------------- 1 | 凯旋门位于意大利米兰市古城堡旁。1807年为纪念[MASK]而建,门高25米,顶上矗立两武士青铜古兵车铸像。 -------------------------------------------------------------------------------- /examples/pytorch/glm/utils/glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/utils/glm.py -------------------------------------------------------------------------------- /examples/pytorch/glm/utils/glm_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/utils/glm_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/glm/utils/strategies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/utils/strategies.py -------------------------------------------------------------------------------- /examples/pytorch/glm/utils/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/glm/utils/tools.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/evaluate_zeroshot_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/evaluate_zeroshot_gpt.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/gpt_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/gpt_example.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/gpt_summarization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/gpt_summarization.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/multi_gpu_gpt_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/multi_gpu_gpt_example.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/requirement.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | fire 3 | rouge_score 4 | transformers -------------------------------------------------------------------------------- /examples/pytorch/gpt/scripts/evaluate_zeroshot_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/scripts/evaluate_zeroshot_gpt.sh -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/generate_gpt_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/generate_gpt_config.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/generate_start_ids.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/generate_start_ids.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/gpt.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/gpt_token_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/gpt_token_converter.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/huggingface_gpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/huggingface_gpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/huggingface_jp_gpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/huggingface_jp_gpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/megatron_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/megatron_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/megatron_ckpt_convert_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/megatron_ckpt_convert_2.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/nemo_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/nemo_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/parallel_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/parallel_gpt.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/word_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gpt/utils/word_list.py -------------------------------------------------------------------------------- /examples/pytorch/gptj/utils/gptj_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gptj/utils/gptj_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gptj/utils/reference_gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/gptj/utils/reference_gptj.py -------------------------------------------------------------------------------- /examples/pytorch/longformer/longformer_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/longformer/longformer_qa.py -------------------------------------------------------------------------------- /examples/pytorch/longformer/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/longformer/model.py -------------------------------------------------------------------------------- /examples/pytorch/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/requirement.txt -------------------------------------------------------------------------------- /examples/pytorch/swin/Swin-Transformer-Quantization/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/Swin-Transformer-Quantization/README.md -------------------------------------------------------------------------------- /examples/pytorch/swin/Swin-Transformer-Quantization/calib.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/Swin-Transformer-Quantization/calib.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/Swin-Transformer-Quantization/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/Swin-Transformer-Quantization/data.py -------------------------------------------------------------------------------- /examples/pytorch/swin/Swin-Transformer-Quantization/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/Swin-Transformer-Quantization/main.py -------------------------------------------------------------------------------- /examples/pytorch/swin/Swin-Transformer-Quantization/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/Swin-Transformer-Quantization/models.py -------------------------------------------------------------------------------- /examples/pytorch/swin/Swin-Transformer-Quantization/qat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/Swin-Transformer-Quantization/qat.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/Swin-Transformer-Quantization/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/Swin-Transformer-Quantization/run.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/SwinTransformerINT8Weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/SwinTransformerINT8Weight.py -------------------------------------------------------------------------------- /examples/pytorch/swin/checkpoint_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/checkpoint_quantization.py -------------------------------------------------------------------------------- /examples/pytorch/swin/infer_swintransformer_int8_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/infer_swintransformer_int8_op.py -------------------------------------------------------------------------------- /examples/pytorch/swin/infer_swintransformer_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/infer_swintransformer_op.py -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/run_test.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test_int8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/run_test_int8.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test_int8_accuracy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/swin/run_test_int8_accuracy.sh -------------------------------------------------------------------------------- /examples/pytorch/t5/perf_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/t5/perf_benchmark.py -------------------------------------------------------------------------------- /examples/pytorch/t5/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/t5/requirement.txt -------------------------------------------------------------------------------- /examples/pytorch/t5/translate_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/t5/translate_example.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/ft_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/t5/utils/ft_decoding.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/ft_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/t5/utils/ft_encoder.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/huggingface_t5_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/t5/utils/huggingface_t5_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/megatron_t5_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/t5/utils/megatron_t5_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/nemo_t5_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/t5/utils/nemo_t5_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/t5_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/t5/utils/t5_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/utils.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/README.md -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/calib.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/calib.sh -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/config.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/data.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/main.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/qat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/qat.sh -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/quant_utils.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/vit_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/vit_int8.py -------------------------------------------------------------------------------- /examples/pytorch/vit/VisionTransformerINT8WeightLoader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/VisionTransformerINT8WeightLoader.py -------------------------------------------------------------------------------- /examples/pytorch/vit/VisionTransformerWeightLoader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/VisionTransformerWeightLoader.py -------------------------------------------------------------------------------- /examples/pytorch/vit/checkpoint_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/checkpoint_quantization.py -------------------------------------------------------------------------------- /examples/pytorch/vit/infer_visiontransformer_int8_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/infer_visiontransformer_int8_op.py -------------------------------------------------------------------------------- /examples/pytorch/vit/infer_visiontransformer_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/infer_visiontransformer_op.py -------------------------------------------------------------------------------- /examples/pytorch/vit/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/requirement.txt -------------------------------------------------------------------------------- /examples/pytorch/vit/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/run.sh -------------------------------------------------------------------------------- /examples/pytorch/vit/run2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/pytorch/vit/run2.sh -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/.dockerignore -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/CONTRIBUTING.md -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/Dockerfile -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/LICENSE -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/NOTICE -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/README.md -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/README_orig.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/README_orig.md -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/__init__.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/configurations.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/configurations.yml -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/extract_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/extract_features.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/fp16_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/fp16_utils.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/fused_layer_norm.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/gpu_environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/gpu_environment.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/modeling.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/optimization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/optimization.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/run_pretraining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/run_pretraining.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/run_squad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/run_squad.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/tf_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/tf_metrics.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/tokenization.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/utils/utils.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/bert_example.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/__init__.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/ckpt_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/ckpt_quantization.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/ckpt_type_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/ckpt_type_convert.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/fast_infer_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/fast_infer_util.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/my_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/my_modeling.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/profile_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/profile_util.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/run_squad_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/run_squad_wrap.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/sample.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/sample.md -------------------------------------------------------------------------------- /examples/tensorflow/bert/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/utils/__init__.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/utils/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/utils/bert.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/utils/common.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/utils/position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/utils/position.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/utils/reducer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/bert/utils/reducer.py -------------------------------------------------------------------------------- /examples/tensorflow/ckpt_type_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/ckpt_type_convert.py -------------------------------------------------------------------------------- /examples/tensorflow/common_utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/common_utils/common.py -------------------------------------------------------------------------------- /examples/tensorflow/common_utils/position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/common_utils/position.py -------------------------------------------------------------------------------- /examples/tensorflow/common_utils/reducer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/common_utils/reducer.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/decoder_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoder/decoder_example.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/beam_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/beam_search.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/common.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/decoder.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/decoding.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/position.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/reducer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/reducer.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/sampling.py -------------------------------------------------------------------------------- /examples/tensorflow/decoding/decoding_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoding/decoding_example.py -------------------------------------------------------------------------------- /examples/tensorflow/decoding/translate_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoding/translate_example.py -------------------------------------------------------------------------------- /examples/tensorflow/decoding/utils/bleu_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoding/utils/bleu_score.py -------------------------------------------------------------------------------- /examples/tensorflow/decoding/utils/ft_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoding/utils/ft_decoding.py -------------------------------------------------------------------------------- /examples/tensorflow/decoding/utils/translation/test.de: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoding/utils/translation/test.de -------------------------------------------------------------------------------- /examples/tensorflow/decoding/utils/translation/test.en: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoding/utils/translation/test.en -------------------------------------------------------------------------------- /examples/tensorflow/decoding/utils/translation/wmtende.vocab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/decoding/utils/translation/wmtende.vocab -------------------------------------------------------------------------------- /examples/tensorflow/encoder/encoder_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/encoder/encoder_example.py -------------------------------------------------------------------------------- /examples/tensorflow/encoder/utils/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/encoder/utils/encoder.py -------------------------------------------------------------------------------- /examples/tensorflow/gpt/gpt_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/gpt/gpt_example.py -------------------------------------------------------------------------------- /examples/tensorflow/gpt/utils/download_gpt2_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/gpt/utils/download_gpt2_model.py -------------------------------------------------------------------------------- /examples/tensorflow/gpt/utils/gpt_token_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/gpt/utils/gpt_token_converter.py -------------------------------------------------------------------------------- /examples/tensorflow/gpt/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/gpt/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /examples/tensorflow/gpt/utils/openai_gpt_ckpt_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/gpt/utils/openai_gpt_ckpt_converter.py -------------------------------------------------------------------------------- /examples/tensorflow/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/requirement.txt -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/convertInput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/xlnet/convertInput.py -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/convertModel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/xlnet/convertModel.py -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/downloadModel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/xlnet/downloadModel.sh -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/xlnet/modeling.py -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/runData.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/xlnet/runData.py -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/verifyCorrectness.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorflow/xlnet/verifyCorrectness.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/builder_fp16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/builder_fp16.py -------------------------------------------------------------------------------- /examples/tensorrt/swin/builder_fp32.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/builder_fp32.py -------------------------------------------------------------------------------- /examples/tensorrt/swin/builder_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/builder_int8.py -------------------------------------------------------------------------------- /examples/tensorrt/swin/infer_swintransformer_plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/infer_swintransformer_plugin.py -------------------------------------------------------------------------------- /examples/tensorrt/swin/infer_swintransformer_plugin_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/infer_swintransformer_plugin_int8.py -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_builder_fp16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/run_builder_fp16.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_builder_fp32.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/run_builder_fp32.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_builder_int8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/run_builder_int8.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_infer_fp16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/run_infer_fp16.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_infer_fp32.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/run_infer_fp32.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_infer_int8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/swin/run_infer_int8.sh -------------------------------------------------------------------------------- /examples/tensorrt/t5/createT5TestData.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/t5/createT5TestData.py -------------------------------------------------------------------------------- /examples/tensorrt/t5/extractT5ModelToBIN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/t5/extractT5ModelToBIN.py -------------------------------------------------------------------------------- /examples/tensorrt/t5/testT5Plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/t5/testT5Plugin.py -------------------------------------------------------------------------------- /examples/tensorrt/vit/infer_visiontransformer_plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/vit/infer_visiontransformer_plugin.py -------------------------------------------------------------------------------- /examples/tensorrt/vit/plugin_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/examples/tensorrt/vit/plugin_loader.py -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/kernels/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/kernels/activation_int8_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/activation_int8_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/activation_int8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/activation_int8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/activation_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/activation_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/activation_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/activation_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/add_bias_transpose_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/add_bias_transpose_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/add_bias_transpose_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/add_bias_transpose_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/add_residual_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/add_residual_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/add_residual_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/add_residual_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/ban_bad_words.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/ban_bad_words.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/ban_bad_words.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/ban_bad_words.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/beam_search_penalty_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/beam_search_penalty_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/beam_search_penalty_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/beam_search_penalty_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/beam_search_topk_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/beam_search_topk_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/beam_search_topk_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/beam_search_topk_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/bert_preprocess_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/bert_preprocess_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/bert_preprocess_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/bert_preprocess_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/bfloat16_fallback_kenrels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/bfloat16_fallback_kenrels.cuh -------------------------------------------------------------------------------- /src/fastertransformer/kernels/custom_ar_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/custom_ar_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/custom_ar_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/custom_ar_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/decoding_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/decoding_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/decoding_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/decoding_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/dequantize_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/dequantize_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/dequantize_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/dequantize_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/gen_relative_pos_bias.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/gen_relative_pos_bias.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/gen_relative_pos_bias.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/gen_relative_pos_bias.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/gpt_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/gpt_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/gpt_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/gpt_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/int8_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/int8_utils.cuh -------------------------------------------------------------------------------- /src/fastertransformer/kernels/layernorm_int8_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/layernorm_int8_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/layernorm_int8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/layernorm_int8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/layernorm_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/layernorm_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/layernorm_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/layernorm_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/logprob_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/logprob_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/logprob_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/logprob_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/longformer_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/longformer_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/longformer_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/longformer_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/matrix_transpose_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/matrix_transpose_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/matrix_transpose_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/matrix_transpose_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/matrix_vector_multiplication.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/matrix_vector_multiplication.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/matrix_vector_multiplication.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/matrix_vector_multiplication.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/quantization_int8_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/quantization_int8_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/quantization_int8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/quantization_int8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/quantize_weight.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/quantize_weight.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/quantize_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/quantize_weight.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/reduce_kernel_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/reduce_kernel_utils.cuh -------------------------------------------------------------------------------- /src/fastertransformer/kernels/reverse_roll_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/reverse_roll_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/reverse_roll_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/reverse_roll_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/sampling_penalty_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/sampling_penalty_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/sampling_penalty_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/sampling_penalty_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/sampling_topk_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/sampling_topk_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/sampling_topk_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/sampling_topk_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/sampling_topp_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/sampling_topp_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/sampling_topp_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/sampling_topp_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/softmax_int8_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/softmax_int8_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/softmax_int8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/softmax_int8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/stop_criteria_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/stop_criteria_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/stop_criteria_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/stop_criteria_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/transform_mask_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/transform_mask_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/transform_mask_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/transform_mask_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/transpose_int8_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/transpose_int8_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/transpose_int8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/transpose_int8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/unfused_attention_int8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/unfused_attention_int8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/unfused_attention_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/unfused_attention_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/unfused_attention_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/unfused_attention_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/vit_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/vit_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/vit_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/vit_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/xlnet_attention_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/xlnet_attention_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/xlnet_attention_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/xlnet_attention_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/xlnet_preprocess_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/xlnet_preprocess_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/xlnet_preprocess_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/kernels/xlnet_preprocess_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/BaseLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/BaseLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/layers/DenseWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/DenseWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/DynamicDecodeBaseLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/DynamicDecodeBaseLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/DynamicDecodeLayer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/DynamicDecodeLayer.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/DynamicDecodeLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/DynamicDecodeLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnINT8Weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/FfnINT8Weight.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnLayer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/FfnLayer.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/FfnLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnLayerINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/FfnLayerINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnLayerINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/FfnLayerINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/FfnWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/GluFfnLayer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/GluFfnLayer.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/GluFfnLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/GluFfnLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/GluFfnWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/GluFfnWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/TensorParallelGeluFfnLayer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/TensorParallelGeluFfnLayer.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/TensorParallelGeluFfnLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/TensorParallelGeluFfnLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/TensorParallelGeluGluFfnLayer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/TensorParallelGeluGluFfnLayer.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/TensorParallelGeluGluFfnLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/TensorParallelGeluGluFfnLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/TensorParallelReluFfnLayer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/TensorParallelReluFfnLayer.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/TensorParallelReluFfnLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/TensorParallelReluFfnLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/attention_layers/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/attention_layers/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/layers/beam_search_layers/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/beam_search_layers/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/layers/sampling_layers/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/layers/sampling_layers/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/Bert.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert/Bert.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/Bert.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert/Bert.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/BertLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert/BertLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/BertWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert/BertWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/bert_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert/bert_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_int8/BertINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert_int8/BertINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_int8/BertINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert_int8/BertINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_int8/BertLayerINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert_int8/BertLayerINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_int8/BertLayerINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert_int8/BertLayerINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_int8/BertLayerINT8Weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert_int8/BertLayerINT8Weight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_int8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/bert_int8/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/decoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/decoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/decoder/Decoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/decoder/Decoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/decoder/Decoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/decoder/Decoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/decoder/DecoderLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/decoder/DecoderLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/decoding/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/decoding/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/decoding/Decoding.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/decoding/Decoding.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/decoding/Decoding.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/decoding/Decoding.h -------------------------------------------------------------------------------- /src/fastertransformer/models/decoding/DecodingWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/decoding/DecodingWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/decoding/decoding_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/decoding/decoding_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/Glm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/Glm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/Glm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/Glm.h -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/GlmContextDecoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/GlmContextDecoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/GlmContextDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/GlmContextDecoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/GlmDecoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/GlmDecoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/GlmDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/GlmDecoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/GlmDecoderLayerWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/GlmDecoderLayerWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/GlmDecoderLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/GlmDecoderLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/GlmWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/GlmWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/glm/GlmWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/glm/GlmWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/Gpt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/Gpt.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/Gpt.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/Gpt.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/GptContextDecoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/GptContextDecoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/GptContextDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/GptContextDecoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/GptDecoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/GptDecoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/GptDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/GptDecoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/GptDecoderLayerWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/GptDecoderLayerWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/GptDecoderLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/GptDecoderLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/GptWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/GptWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt/GptWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gpt/GptWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJ.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJ.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJ.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJ.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJContextDecoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJContextDecoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJContextDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJContextDecoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJDecoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJDecoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJDecoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJDecoderLayerWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJDecoderLayerWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJDecoderLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJDecoderLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/longformer/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/longformer/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/longformer/LongformerEncoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/longformer/LongformerEncoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/longformer/LongformerEncoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/longformer/LongformerEncoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/multi_gpu_gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/multi_gpu_gpt/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/multi_gpu_gpt/ParallelGpt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/multi_gpu_gpt/ParallelGpt.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/multi_gpu_gpt/ParallelGpt.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/multi_gpu_gpt/ParallelGpt.h -------------------------------------------------------------------------------- /src/fastertransformer/models/multi_gpu_gpt/ParallelGptWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/multi_gpu_gpt/ParallelGptWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/multi_gpu_gpt/gpt_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/multi_gpu_gpt/gpt_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/Swin.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin/Swin.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/Swin.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin/Swin.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/SwinBasicLayer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin/SwinBasicLayer.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/SwinBasicLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin/SwinBasicLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/SwinBlock.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin/SwinBlock.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/SwinBlock.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin/SwinBlock.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/SwinWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin/SwinWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/swin_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin/swin_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin_int8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin_int8/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/swin_int8/SwinBasicLayerINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin_int8/SwinBasicLayerINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin_int8/SwinBasicLayerINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin_int8/SwinBasicLayerINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin_int8/SwinBlockINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin_int8/SwinBlockINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin_int8/SwinBlockINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin_int8/SwinBlockINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin_int8/SwinINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin_int8/SwinINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin_int8/SwinINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin_int8/SwinINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin_int8/SwinINT8Weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/swin_int8/SwinINT8Weight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Decoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Decoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Decoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Decoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5DecoderLayerWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5DecoderLayerWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5DecoderLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5DecoderLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Decoding.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Decoding.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Decoding.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Decoding.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5DecodingWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5DecodingWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5DecodingWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5DecodingWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Encoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Encoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Encoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Encoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5EncoderLayerWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5EncoderLayerWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5EncoderLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5EncoderLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5EncoderWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5EncoderWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5EncoderWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5EncoderWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/t5_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/t5/t5_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/ViT.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit/ViT.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/ViT.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit/ViT.h -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/ViTLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit/ViTLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/ViTWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit/ViTWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/vit_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit/vit_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/vit_int8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit_int8/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/vit_int8/ViTINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit_int8/ViTINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/vit_int8/ViTINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit_int8/ViTINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/vit_int8/ViTINT8Weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit_int8/ViTINT8Weight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/vit_int8/ViTLayerINT8Weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/vit_int8/ViTLayerINT8Weight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/xlnet/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/xlnet/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/xlnet/Xlnet.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/xlnet/Xlnet.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/xlnet/Xlnet.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/xlnet/Xlnet.h -------------------------------------------------------------------------------- /src/fastertransformer/models/xlnet/XlnetLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/xlnet/XlnetLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/xlnet/xlnet_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/models/xlnet/xlnet_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/swin/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/swin/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/swin/serialize.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/swin/serialize.hpp -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/t5/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/t5/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/t5/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/t5/README.md -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/t5/T5Plugin.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/t5/T5Plugin.cu -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/t5/T5Plugin.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/t5/T5Plugin.h -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/t5/T5PluginGemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/t5/T5PluginGemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/t5/T5PluginGemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/t5/T5PluginGemm.h -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/vit/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/vit/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/vit/ViTPlugin.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/vit/ViTPlugin.cpp -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/vit/ViTPlugin.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/vit/ViTPlugin.h -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/BaseOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/BaseOp.h -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/bert/BertINT8Op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/bert/BertINT8Op.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/bert/BertOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/bert/BertOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/bert/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/bert/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/bert/weight_quantize_op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/bert/weight_quantize_op.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/decoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/decoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/decoder/DecoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/decoder/DecoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/decoder/FusedSelfAttentionOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/decoder/FusedSelfAttentionOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/decoding/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/decoding/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/decoding/DecodingOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/decoding/DecodingOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/encoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/encoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/encoder/EncoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/encoder/EncoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/gpt/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/gpt/GptOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/tf_op/gpt/GptOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/BertINT8Op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/BertINT8Op.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/BertINT8Op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/BertINT8Op.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/BertOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/BertOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/BertOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/BertOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/WeightQuantizeOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/WeightQuantizeOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/decoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoder/DecoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/decoder/DecoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoder/DecoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/decoder/DecoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoding/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/decoding/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoding/DecodingOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/decoding/DecodingOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoding/DecodingOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/decoding/DecodingOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoding/GatherTreeOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/decoding/GatherTreeOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoding/GatherTreeOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/decoding/GatherTreeOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/encoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/encoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/encoder/EncoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/encoder/EncoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/encoder/EncoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/encoder/EncoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/glm/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/glm/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/glm/GlmOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/glm/GlmOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/glm/GlmOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/glm/GlmOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/gpt/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gpt/GptOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/gpt/GptOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gpt/GptOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/gpt/GptOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/longformer/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/longformer/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/longformer/LongformerEncoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/longformer/LongformerEncoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/longformer/LongformerEncoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/longformer/LongformerEncoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/multi_gpu_gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/multi_gpu_gpt/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/multi_gpu_gpt/ParallelGptOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/multi_gpu_gpt/ParallelGptOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/multi_gpu_gpt/ParallelGptOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/multi_gpu_gpt/ParallelGptOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/SwinINT8Op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/SwinINT8Op.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/SwinINT8Op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/SwinINT8Op.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/SwinOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/SwinOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/SwinOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/SwinOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/WeightQuantizeOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/WeightQuantizeOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5DecoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5DecoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5DecoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5DecoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5DecodingOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5DecodingOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5DecodingOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5DecodingOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5EncoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5EncoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5EncoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5EncoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/th_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/th_traits.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/th_utils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/th_utils.cu -------------------------------------------------------------------------------- /src/fastertransformer/th_op/th_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/th_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/ViTINT8Op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/ViTINT8Op.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/ViTINT8Op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/ViTINT8Op.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/ViTOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/ViTOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/ViTOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/ViTOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/WeightQuantizeOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/WeightQuantizeOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/triton_backend/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/triton_backend/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/triton_backend/gptj/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/triton_backend/gptj/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/triton_backend/gptj/GptJTritonModel.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/triton_backend/gptj/GptJTritonModel.cc -------------------------------------------------------------------------------- /src/fastertransformer/triton_backend/gptj/GptJTritonModel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/triton_backend/gptj/GptJTritonModel.h -------------------------------------------------------------------------------- /src/fastertransformer/triton_backend/t5/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/triton_backend/t5/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/triton_backend/t5/T5TritonModel.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/triton_backend/t5/T5TritonModel.cc -------------------------------------------------------------------------------- /src/fastertransformer/triton_backend/t5/T5TritonModel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/triton_backend/t5/T5TritonModel.h -------------------------------------------------------------------------------- /src/fastertransformer/triton_backend/triton_utils.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/triton_backend/triton_utils.hpp -------------------------------------------------------------------------------- /src/fastertransformer/utils/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/utils/ScaleList.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/ScaleList.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/Tensor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/Tensor.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/allocator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/allocator.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/conv2d.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/conv2d.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/convert_data_type.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/convert_data_type.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasAlgoMap.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/cublasAlgoMap.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasAlgoMap.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/cublasAlgoMap.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasINT8MMWrapper.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/cublasINT8MMWrapper.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasINT8MMWrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/cublasINT8MMWrapper.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasMMWrapper.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/cublasMMWrapper.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasMMWrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/cublasMMWrapper.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cuda_bf16_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/cuda_bf16_wrapper.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cuda_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/cuda_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/custom_ar_comm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/custom_ar_comm.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/custom_ar_comm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/custom_ar_comm.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/decoding_gemm_func.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/decoding_gemm_func.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/decoding_gemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/decoding_gemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/encoder_gemm_func.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/encoder_gemm_func.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/encoder_gemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/encoder_gemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/encoder_igemm_func.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/encoder_igemm_func.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/encoder_igemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/encoder_igemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/gemm_func.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/gemm_func.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/gemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/gemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/gpt_gemm_func.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/gpt_gemm_func.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/gpt_gemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/gpt_gemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/swin_gemm_func.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/swin_gemm_func.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/swin_gemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/swin_gemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/swin_igemm_func.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/swin_igemm_func.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/swin_igemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/swin_igemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/t5_gemm_func.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/t5_gemm_func.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/t5_gemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/t5_gemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/xlnet_gemm_func.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/xlnet_gemm_func.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/xlnet_gemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/xlnet_gemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/logger.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/logger.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/memory_utils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/memory_utils.cu -------------------------------------------------------------------------------- /src/fastertransformer/utils/memory_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/memory_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/mpi_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/mpi_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/nccl_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/nccl_utils.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/nccl_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/nccl_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/nvtx_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/nvtx_utils.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/nvtx_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/nvtx_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/string_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/string_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/word_list.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/word_list.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/word_list.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/src/fastertransformer/utils/word_list.h -------------------------------------------------------------------------------- /templates/adding_a_new_model/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/templates/adding_a_new_model/README.md -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/CMakeLists.txt -------------------------------------------------------------------------------- /tests/bert/tf_bert_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/bert/tf_bert_unit_test.py -------------------------------------------------------------------------------- /tests/bert/tf_encoder_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/bert/tf_encoder_unit_test.py -------------------------------------------------------------------------------- /tests/bert/th_bert_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/bert/th_bert_unit_test.py -------------------------------------------------------------------------------- /tests/bert/th_encoder_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/bert/th_encoder_unit_test.py -------------------------------------------------------------------------------- /tests/decoding/tf_decoding_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/decoding/tf_decoding_unit_test.py -------------------------------------------------------------------------------- /tests/decoding/tf_fused_self_multihead_attention_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/decoding/tf_fused_self_multihead_attention_unit_test.py -------------------------------------------------------------------------------- /tests/longformer/py_longformer_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/longformer/py_longformer_unit_test.py -------------------------------------------------------------------------------- /tests/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/unittests/CMakeLists.txt -------------------------------------------------------------------------------- /tests/unittests/test_gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/unittests/test_gemm.cu -------------------------------------------------------------------------------- /tests/unittests/test_logprob_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/unittests/test_logprob_kernels.cu -------------------------------------------------------------------------------- /tests/unittests/test_sampling.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/FasterTransformer/HEAD/tests/unittests/test_sampling.cu --------------------------------------------------------------------------------