├── .clang-format ├── .dockerignore ├── .flake8 ├── .github └── ISSUE_TEMPLATE │ └── bug_report.yml ├── .gitignore ├── .gitmodules ├── .vscode └── settings.json ├── 3rdparty ├── CMakeLists.txt ├── INIReader.h ├── common │ ├── CMakeLists.txt │ ├── cudaDriverWrapper.cpp │ └── cudaDriverWrapper.h ├── cub │ ├── agent │ │ ├── agent_histogram.cuh │ │ ├── agent_radix_sort_downsweep.cuh │ │ ├── agent_radix_sort_upsweep.cuh │ │ ├── agent_reduce.cuh │ │ ├── agent_reduce_by_key.cuh │ │ ├── agent_rle.cuh │ │ ├── agent_scan.cuh │ │ ├── agent_segment_fixup.cuh │ │ ├── agent_select_if.cuh │ │ ├── agent_spmv_orig.cuh │ │ └── single_pass_scan_operators.cuh │ ├── block │ │ ├── block_adjacent_difference.cuh │ │ ├── block_discontinuity.cuh │ │ ├── block_exchange.cuh │ │ ├── block_histogram.cuh │ │ ├── block_load.cuh │ │ ├── block_radix_rank.cuh │ │ ├── block_radix_sort.cuh │ │ ├── block_raking_layout.cuh │ │ ├── block_reduce.cuh │ │ ├── block_scan.cuh │ │ ├── block_shuffle.cuh │ │ ├── block_store.cuh │ │ └── specializations │ │ │ ├── block_histogram_atomic.cuh │ │ │ ├── block_histogram_sort.cuh │ │ │ ├── block_reduce_raking.cuh │ │ │ ├── block_reduce_raking_commutative_only.cuh │ │ │ ├── block_reduce_warp_reductions.cuh │ │ │ ├── block_scan_raking.cuh │ │ │ ├── block_scan_warp_scans.cuh │ │ │ ├── block_scan_warp_scans2.cuh │ │ │ └── block_scan_warp_scans3.cuh │ ├── cub.cuh │ ├── device │ │ ├── device_histogram.cuh │ │ ├── device_partition.cuh │ │ ├── device_radix_sort.cuh │ │ ├── device_reduce.cuh │ │ ├── device_run_length_encode.cuh │ │ ├── device_scan.cuh │ │ ├── device_segmented_radix_sort.cuh │ │ ├── device_segmented_reduce.cuh │ │ ├── device_select.cuh │ │ ├── device_spmv.cuh │ │ └── dispatch │ │ │ ├── dispatch_histogram.cuh │ │ │ ├── dispatch_radix_sort.cuh │ │ │ ├── dispatch_reduce.cuh │ │ │ ├── dispatch_reduce_by_key.cuh │ │ │ ├── dispatch_rle.cuh │ │ │ ├── dispatch_scan.cuh │ │ │ ├── dispatch_select_if.cuh │ │ │ └── dispatch_spmv_orig.cuh │ ├── grid │ │ ├── grid_barrier.cuh │ │ ├── grid_even_share.cuh │ │ ├── grid_mapping.cuh │ │ └── grid_queue.cuh │ ├── host │ │ └── mutex.cuh │ ├── iterator │ │ ├── arg_index_input_iterator.cuh │ │ ├── cache_modified_input_iterator.cuh │ │ ├── cache_modified_output_iterator.cuh │ │ ├── constant_input_iterator.cuh │ │ ├── counting_input_iterator.cuh │ │ ├── discard_output_iterator.cuh │ │ ├── tex_obj_input_iterator.cuh │ │ ├── tex_ref_input_iterator.cuh │ │ └── transform_input_iterator.cuh │ ├── thread │ │ ├── thread_load.cuh │ │ ├── thread_operators.cuh │ │ ├── thread_reduce.cuh │ │ ├── thread_scan.cuh │ │ ├── thread_search.cuh │ │ └── thread_store.cuh │ ├── util_allocator.cuh │ ├── util_arch.cuh │ ├── util_debug.cuh │ ├── util_device.cuh │ ├── util_macro.cuh │ ├── util_namespace.cuh │ ├── util_ptx.cuh │ ├── util_type.cuh │ └── warp │ │ ├── specializations │ │ ├── warp_reduce_shfl.cuh │ │ ├── warp_reduce_smem.cuh │ │ ├── warp_scan_shfl.cuh │ │ └── warp_scan_smem.cuh │ │ ├── warp_reduce.cuh │ │ └── warp_scan.cuh ├── fp8_qgmma_1x1 │ ├── CMakeLists.txt │ ├── compute.cuh │ ├── conv1x1.cuh │ ├── conv1x1_interface.hpp │ ├── cubins │ │ ├── fp8_gemm_1x1_gelu.cubin.cpp │ │ ├── fp8_gemm_1x1_no_act.cubin.cpp │ │ └── fp8_gemm_1x1_relu.cubin.cpp │ ├── dma.cuh │ ├── fp8_gemm_1x1.h │ ├── fp8_qgmma_1x1_utils.cu │ ├── fp8_qgmma_1x1_utils.h │ ├── parse_profile.py │ ├── scheduler.cuh │ ├── sharedCubinLoader.h │ ├── tile_profile.cuh │ ├── traits.h │ └── utils.h ├── trt_fp8_fmha │ └── fused_multihead_attention.h └── trt_fused_multihead_attention │ ├── CMakeLists.txt │ ├── common.cuh │ ├── cudaDriverWrapper.cpp │ ├── cudaDriverWrapper.h │ ├── fmha_v2_flash_attention_fp16_0_32_sm75.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_0_32_sm80.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_0_32_sm86.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_0_64_sm70.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_0_64_sm72.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_0_64_sm75.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_0_64_sm80.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_0_64_sm86.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_128_sm70.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_128_sm72.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_128_sm75.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_128_sm80.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_128_sm86.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_128_sm89.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_144_sm70.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_144_sm72.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_144_sm75.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_144_sm80.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_144_sm86.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_144_sm89.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_160_sm70.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_160_sm72.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_160_sm75.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_160_sm80.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_160_sm86.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_160_sm89.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_256_sm70.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_256_sm72.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_256_sm75.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_256_sm80.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_256_sm86.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_256_sm89.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_32_sm70.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_32_sm72.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_32_sm75.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_32_sm80.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_32_sm86.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_32_sm89.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_40_sm70.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_40_sm72.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_40_sm75.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_40_sm80.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_40_sm86.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_40_sm89.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_64_sm70.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_64_sm72.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_64_sm75.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_64_sm80.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_64_sm86.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_64_sm89.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_80_sm70.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_80_sm72.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_80_sm75.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_80_sm80.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_80_sm86.cubin.cpp │ ├── fmha_v2_flash_attention_fp16_Causal_0_80_sm89.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_32_sm70.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_32_sm72.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_32_sm75.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_32_sm80.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_32_sm86.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_32_sm89.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_40_sm70.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_40_sm72.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_40_sm75.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_40_sm80.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_40_sm86.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_40_sm89.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_64_sm70.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_64_sm72.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_64_sm75.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_64_sm80.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_64_sm86.cubin.cpp │ ├── fmha_v2_fp16_Causal_128_64_sm89.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_32_sm70.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_32_sm72.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_32_sm75.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_32_sm80.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_32_sm86.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_32_sm89.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_40_sm70.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_40_sm72.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_40_sm75.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_40_sm80.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_40_sm86.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_40_sm89.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_64_sm70.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_64_sm72.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_64_sm75.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_64_sm80.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_64_sm86.cubin.cpp │ ├── fmha_v2_fp16_Causal_64_64_sm89.cubin.cpp │ ├── fp8 │ ├── fmha_v2_e4m3_128_64_ldgsts_sm90.cubin.cpp │ ├── fmha_v2_e4m3_192_64_ldgsts_sm90.cubin.cpp │ ├── fmha_v2_e4m3_256_64_ldgsts_sm90.cubin.cpp │ ├── fmha_v2_e4m3_384_64_ldgsts_sm90.cubin.cpp │ ├── fmha_v2_e4m3_512_64_ldgsts_sm90.cubin.cpp │ ├── fmha_v2_e4m3_fp32_128_64_sm89.cubin.cpp │ ├── fmha_v2_e4m3_fp32_192_64_sm89.cubin.cpp │ ├── fmha_v2_e4m3_fp32_256_64_sm89.cubin.cpp │ ├── fmha_v2_e4m3_fp32_384_64_sm89.cubin.cpp │ └── fmha_v2_e4m3_fp32_512_64_sm89.cubin.cpp │ ├── fused_mha_with_relPosBias_fp16_128_32_kernel.sm75.cpp │ ├── fused_mha_with_relPosBias_fp16_128_32_kernel.sm80.cpp │ ├── fused_mha_with_relPosBias_fp16_128_32_kernel.sm86.cpp │ ├── fused_mha_with_relPosBias_fp16_256_32_kernel.sm75.cpp │ ├── fused_mha_with_relPosBias_fp16_256_32_kernel.sm80.cpp │ ├── fused_mha_with_relPosBias_fp16_256_32_kernel.sm86.cpp │ ├── fused_mha_with_relPosBias_fp16_64_32_kernel.sm75.cpp │ ├── fused_mha_with_relPosBias_fp16_64_32_kernel.sm80.cpp │ ├── fused_mha_with_relPosBias_fp16_64_32_kernel.sm86.cpp │ ├── fused_mha_with_relPosBias_int8_256_32_kernel.sm75.cpp │ ├── fused_mha_with_relPosBias_int8_256_32_kernel.sm80.cpp │ ├── fused_mha_with_relPosBias_int8_64_32_kernel.sm75.cpp │ ├── fused_mha_with_relPosBias_int8_64_32_kernel.sm80.cpp │ ├── fused_multihead_attention.h │ ├── fused_multihead_attention_common.h │ ├── fused_multihead_attention_fp16_128_64_kernel.sm75.cpp │ ├── fused_multihead_attention_fp16_128_64_kernel.sm80.cpp │ ├── fused_multihead_attention_fp16_384_64_kernel.sm75.cpp │ ├── fused_multihead_attention_fp16_384_64_kernel.sm80.cpp │ ├── fused_multihead_attention_fp16_64_64_kernel.sm75.cpp │ ├── fused_multihead_attention_fp16_64_64_kernel.sm80.cpp │ ├── fused_multihead_attention_fp16_96_64_kernel.sm75.cpp │ ├── fused_multihead_attention_fp16_96_64_kernel.sm80.cpp │ ├── fused_multihead_attention_int8_128_64_kernel.sm75.cpp │ ├── fused_multihead_attention_int8_128_64_kernel.sm80.cpp │ ├── fused_multihead_attention_int8_384_64_kernel.sm75.cpp │ ├── fused_multihead_attention_int8_384_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2.h │ ├── fused_multihead_attention_v2_fp16_128_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_128_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_128_64_kernel.sm70.cpp │ ├── fused_multihead_attention_v2_fp16_128_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_128_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_192_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_192_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_192_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_192_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_256_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_256_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_256_64_kernel.sm70.cpp │ ├── fused_multihead_attention_v2_fp16_256_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_256_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_32_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_32_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_32_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_32_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_384_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_384_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_384_64_kernel.sm70.cpp │ ├── fused_multihead_attention_v2_fp16_384_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_384_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_512_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_512_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_512_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_64_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_64_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_64_64_kernel.sm70.cpp │ ├── fused_multihead_attention_v2_fp16_64_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_64_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_96_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_96_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_fp16_96_64_kernel.sm70.cpp │ ├── fused_multihead_attention_v2_fp16_96_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_fp16_96_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_128_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_128_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_128_64_kernel.sm72.cpp │ ├── fused_multihead_attention_v2_int8_128_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_128_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_192_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_192_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_192_64_kernel.sm72.cpp │ ├── fused_multihead_attention_v2_int8_192_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_192_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_256_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_256_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_256_64_kernel.sm72.cpp │ ├── fused_multihead_attention_v2_int8_256_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_256_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_384_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_384_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_384_64_kernel.sm72.cpp │ ├── fused_multihead_attention_v2_int8_384_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_384_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_512_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_512_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_512_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_64_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_64_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_64_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_64_64_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_96_32_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_96_32_kernel.sm80.cpp │ ├── fused_multihead_attention_v2_int8_96_64_kernel.sm75.cpp │ ├── fused_multihead_attention_v2_int8_96_64_kernel.sm80.cpp │ ├── qkvToContext.cu │ └── qkvToContext.h ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── benchmarks ├── bert │ ├── pyt_benchmark.sh │ ├── pyt_int8_benchmark.sh │ ├── pyt_sp_fp16_benchmark.sh │ ├── pyt_sp_int8_mode2_benchmark.sh │ ├── pyt_tp_benchmark.sh │ ├── tf_benchmark.sh │ └── tf_int8_benchmark.sh ├── decoding │ ├── pyt_decoding_beamsearch_benchmark.sh │ ├── tf_decoding_beamsearch_benchmark.sh │ └── tf_decoding_sampling_benchmark.sh ├── gpt │ └── cpp_benchmark.sh └── t5 │ └── pyt_benchmark.sh ├── cmake ├── FasterTransformerConfig.cmake.in └── Modules │ ├── FindCUDNN.cmake │ └── FindNCCL.cmake ├── docker ├── Dockerfile.tf ├── Dockerfile.tf2 └── Dockerfile.torch ├── docs ├── QAList.md ├── bart_guide.md ├── bert_guide.md ├── deberta_guide.md ├── decoder_guide.md ├── gpt_guide.md ├── gptj_guide.md ├── gptneox_guide.md ├── images │ ├── FP-swin-flowchart.png │ ├── FT_Encoder_T4.png │ ├── FT_GPT_A100.png │ ├── INT8-swin-flowchart.png │ ├── PyTorch_Encoder_T4.png │ ├── Py_Decoder_T4.png │ ├── Py_Encoder_T4.png │ ├── TF_Decoder_T4.png │ ├── TF_Encoder_T4.png │ ├── decoding │ │ └── decoding.png │ ├── effective_transformer.png │ ├── encoder-decoding-2.png │ ├── encoder_flowchart.png │ ├── gpt │ │ ├── Megatron_530B_benchmark_1.png │ │ ├── Megatron_530B_benchmark_2.png │ │ ├── Megatron_530B_benchmark_3.png │ │ ├── Megatron_530B_benchmark_4.png │ │ ├── SmoothQuant_workflow.png │ │ ├── gpt.png │ │ ├── gpt_context.png │ │ ├── gpt_interactive_generation.0.png │ │ ├── gpt_interactive_generation.1.png │ │ ├── gpt_interactive_generation.2.png │ │ └── parallelgpt.png │ ├── gpt_flowchart.png │ ├── longformer_compute_flow.png │ ├── vit │ │ ├── vit-FMHA.png │ │ └── vit-fp32-fp16-compute-flow.png │ ├── workflow-of-int8-inference.png │ └── xlnet_flowchart.png ├── longformer_guide.md ├── models │ ├── megatron-345m-model.md │ └── megatron-530b-model.md ├── swin_guide.md ├── t5_guide.md ├── vit_guide.md └── xlnet_guide.md ├── examples ├── CMakeLists.txt ├── __init__.py ├── cpp │ ├── CMakeLists.txt │ ├── bert │ │ ├── CMakeLists.txt │ │ ├── bert_config.ini │ │ ├── bert_example.cc │ │ └── bert_triton_example.cc │ ├── bert_fp8 │ │ ├── CMakeLists.txt │ │ ├── bert_fp8_example.cc │ │ └── bert_fp8_example_squad.cc │ ├── bert_int8 │ │ ├── CMakeLists.txt │ │ └── bert_int8_example.cc │ ├── decoding │ │ ├── CMakeLists.txt │ │ ├── decoding_example.cc │ │ └── layernorm_test.cc │ ├── gpt │ │ ├── CMakeLists.txt │ │ ├── gpt_config.ini │ │ ├── gpt_example.cc │ │ └── start_ids.csv │ ├── gpt_fp8 │ │ ├── CMakeLists.txt │ │ ├── gpt_config.ini │ │ ├── gpt_fp8_example.cc │ │ ├── gpt_fp8_triton_example.cc │ │ └── start_ids.csv │ ├── gptj │ │ ├── CMakeLists.txt │ │ ├── bad_words.csv │ │ ├── gptj_config.ini │ │ ├── gptj_example.cc │ │ ├── gptj_triton_example.cc │ │ ├── start_ids.csv │ │ └── stop_words.csv │ ├── gptneox │ │ ├── CMakeLists.txt │ │ ├── bad_words.csv │ │ ├── gptneox_config.ini │ │ ├── gptneox_example.cc │ │ ├── gptneox_triton_example.cc │ │ ├── start_ids.csv │ │ └── stop_words.csv │ ├── llama │ │ ├── CMakeLists.txt │ │ ├── bad_words.csv │ │ ├── check_with_huggingface.py │ │ ├── huggingface_llama_convert.py │ │ ├── llama_config.ini │ │ ├── llama_example.cc │ │ ├── llama_triton_example.cc │ │ ├── model_config.json │ │ ├── start_ids.csv │ │ └── stop_words.csv │ ├── multi_gpu_gpt │ │ ├── CMakeLists.txt │ │ ├── concat_interactive_ids.csv │ │ ├── gpt_config.ini │ │ ├── gpt_example_utils.cc │ │ ├── gpt_example_utils.h │ │ ├── interactive_inputs_ids.csv │ │ ├── multi_gpu_gpt_async_example.cc │ │ ├── multi_gpu_gpt_example.cc │ │ ├── multi_gpu_gpt_interactive_example.cc │ │ ├── multi_gpu_gpt_triton_example.cc │ │ ├── start_ids.csv │ │ └── start_ids_opt.csv │ ├── swin │ │ ├── CMakeLists.txt │ │ ├── functions.h │ │ └── swin_example.cc │ ├── swin_int8 │ │ ├── CMakeLists.txt │ │ └── swin_int8_example.cc │ ├── vit │ │ ├── CMakeLists.txt │ │ └── vit_example.cc │ ├── vit_int8 │ │ ├── CMakeLists.txt │ │ └── vit_int8_example.cc │ ├── wenet │ │ ├── CMakeLists.txt │ │ ├── wenet_decoder_example.cc │ │ └── wenet_encoder_example.cc │ └── xlnet │ │ ├── CMakeLists.txt │ │ ├── cnpy.cpp │ │ ├── cnpy.h │ │ ├── xlnet_correctness_example.cc │ │ └── xlnet_example.cc ├── onnx │ └── multi_gpu_gpt │ │ └── onnx_ckpt_convert.py ├── pytorch │ ├── __init__.py │ ├── bart │ │ ├── bart.ipynb │ │ ├── requirement.txt │ │ ├── translate_example.py │ │ └── utils │ │ │ ├── ft_decoding.py │ │ │ └── ft_encoder.py │ ├── bert │ │ ├── bert-quantization-sparsity │ │ │ ├── .dockerignore │ │ │ ├── Dockerfile │ │ │ ├── LICENSE │ │ │ ├── NOTICE │ │ │ ├── README.md │ │ │ ├── README_orig.md │ │ │ ├── apex_sparsity │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── asp.py │ │ │ │ ├── sparse_masklib.py │ │ │ │ └── test │ │ │ │ │ ├── checkpointing_test_part1.py │ │ │ │ │ ├── checkpointing_test_part2.py │ │ │ │ │ ├── checkpointing_test_reference.py │ │ │ │ │ └── toy_problem.py │ │ │ ├── bert_config.json │ │ │ ├── checkpoints │ │ │ │ └── .keep │ │ │ ├── configurations.yml │ │ │ ├── create_pretraining_data.py │ │ │ ├── data │ │ │ │ ├── BooksDownloader.py │ │ │ │ ├── BookscorpusTextFormatting.py │ │ │ │ ├── Downloader.py │ │ │ │ ├── GLUEDownloader.py │ │ │ │ ├── GooglePretrainedWeightDownloader.py │ │ │ │ ├── NVIDIAPretrainedWeightDownloader.py │ │ │ │ ├── SquadDownloader.py │ │ │ │ ├── TextSharding.py │ │ │ │ ├── WikiDownloader.py │ │ │ │ ├── WikicorpusTextFormatting.py │ │ │ │ ├── __init__.py │ │ │ │ ├── bertPrep.py │ │ │ │ ├── create_datasets_from_start.sh │ │ │ │ └── squad │ │ │ │ │ └── squad_download.sh │ │ │ ├── extract_features.py │ │ │ ├── file_utils.py │ │ │ ├── images │ │ │ │ ├── loss_curves.png │ │ │ │ ├── model.png │ │ │ │ └── nvlamb.png │ │ │ ├── inference.py │ │ │ ├── modeling.py │ │ │ ├── optimization.py │ │ │ ├── processors │ │ │ │ ├── __init__.py │ │ │ │ └── glue.py │ │ │ ├── quant_utils.py │ │ │ ├── requirements.txt │ │ │ ├── run.sub │ │ │ ├── run_glue.py │ │ │ ├── run_pretraining.py │ │ │ ├── run_squad.py │ │ │ ├── run_swag.py │ │ │ ├── schedulers.py │ │ │ ├── scripts │ │ │ │ ├── configs │ │ │ │ │ ├── glue_config.sh │ │ │ │ │ ├── pretrain_config.sh │ │ │ │ │ └── squad_config.sh │ │ │ │ ├── data_download.sh │ │ │ │ ├── docker │ │ │ │ │ ├── build.sh │ │ │ │ │ └── launch.sh │ │ │ │ ├── run_glue.sh │ │ │ │ ├── run_pretraining.sh │ │ │ │ ├── run_squad.sh │ │ │ │ └── run_swag.sh │ │ │ ├── tokenization.py │ │ │ ├── utils.py │ │ │ └── vocab │ │ │ │ └── vocab │ │ ├── bert_example.py │ │ ├── run_glue.py │ │ ├── run_squad.py │ │ ├── scripts │ │ │ ├── run_mrpc.sh │ │ │ └── run_squad.sh │ │ └── utils │ │ │ ├── checkpoint_quantization.py │ │ │ ├── encoder.py │ │ │ ├── get_mrpc_data.py │ │ │ ├── huggingface_bert_convert.py │ │ │ ├── huggingface_bert_fp8_convert.py │ │ │ ├── modeling_bert.py │ │ │ └── update_bert_config.py │ ├── decoder │ │ ├── decoder_example.py │ │ └── utils │ │ │ ├── decoder.py │ │ │ └── ft_decoder.py │ ├── decoding │ │ ├── decoding_example.py │ │ ├── translate_example.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── bleu_score.py │ │ │ ├── decoding.py │ │ │ ├── download_model.sh │ │ │ ├── ft_decoding.py │ │ │ ├── recover_bpe.py │ │ │ ├── translation │ │ │ ├── test.de │ │ │ ├── test.en │ │ │ └── wmtende.vocab │ │ │ ├── translation_model.py │ │ │ └── translator.py │ ├── encoder │ │ ├── encoder_example.py │ │ └── utils │ │ │ └── ft_encoder.py │ ├── gpt │ │ ├── bloom_lambada.py │ │ ├── duplicate_input_ids.txt │ │ ├── evaluate_zeroshot_gpt.py │ │ ├── gpt_example.py │ │ ├── gpt_summarization.py │ │ ├── lambada_task_example.py │ │ ├── multi_gpu_gpt_example.py │ │ ├── opt_summarization.py │ │ ├── requirement.txt │ │ ├── scripts │ │ │ └── evaluate_zeroshot_gpt.sh │ │ └── utils │ │ │ ├── bloom.py │ │ │ ├── checkpoint_saver_fastertransformer.py │ │ │ ├── comm.py │ │ │ ├── generate_gpt_config.py │ │ │ ├── generate_start_ids.py │ │ │ ├── gpt.py │ │ │ ├── gpt_decoder.py │ │ │ ├── gpt_fp8.py │ │ │ ├── gpt_token_converter.py │ │ │ ├── gpt_token_encoder.py │ │ │ ├── huggingface_bloom_convert.py │ │ │ ├── huggingface_gpt_convert.py │ │ │ ├── huggingface_jp_gpt_convert.py │ │ │ ├── huggingface_opt_convert.py │ │ │ ├── megatron_ckpt_convert.py │ │ │ ├── megatron_ckpt_convert_llama.py │ │ │ ├── megatron_fp8_ckpt_convert.py │ │ │ ├── megatron_gpt_moe_ckpt_convert.py │ │ │ ├── nemo_ckpt_convert.py │ │ │ ├── parallel_gpt.py │ │ │ ├── profiler.py │ │ │ ├── tokenizer.py │ │ │ ├── update_gpt_config.py │ │ │ └── word_list.py │ ├── gptj │ │ └── utils │ │ │ ├── generate_gptj_config.py │ │ │ ├── gptj_ckpt_convert.py │ │ │ ├── huggingface_gptj_ckpt_convert.py │ │ │ └── reference_gptj.py │ ├── gptneox │ │ ├── gptneox_example.py │ │ └── utils │ │ │ ├── eleutherai_gpt_neox_convert.py │ │ │ ├── gptneox.py │ │ │ ├── hftokenizer.py │ │ │ ├── huggingface_gptneox_convert.py │ │ │ └── huggingface_jp_gptneox_convert.py │ ├── longformer │ │ ├── longformer_qa.py │ │ └── model.py │ ├── nemo.py │ ├── swin │ │ ├── Swin-Transformer-Quantization │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── calib.sh │ │ │ ├── data.py │ │ │ ├── main.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── build.py │ │ │ │ ├── linear_activation.py │ │ │ │ ├── swin_transformer.py │ │ │ │ └── swin_transformer_v2.py │ │ │ ├── qat.sh │ │ │ ├── quant_utils.py │ │ │ └── run.sh │ │ ├── SwinTransformerINT8Weight.py │ │ ├── SwinTransformerWeightTransposeQKVWeight.py │ │ ├── checkpoint_quantization.py │ │ ├── infer_swintransformer_acc.py │ │ ├── infer_swintransformer_int8_op.py │ │ ├── infer_swintransformer_op.py │ │ ├── run_test_fp16_accuracy.sh │ │ ├── run_test_fp32_accuracy.sh │ │ ├── run_test_v1.sh │ │ ├── run_test_v1_int8.sh │ │ ├── run_test_v1_int8_accuracy.sh │ │ ├── run_test_v2.sh │ │ ├── run_test_v2_int8.sh │ │ └── run_test_v2_int8_accuracy.sh │ ├── t5 │ │ ├── mnli_task_example.py │ │ ├── perf_benchmark.py │ │ ├── requirement.txt │ │ ├── summarization.py │ │ ├── translate_example.py │ │ ├── utils │ │ │ ├── ft_decoding.py │ │ │ ├── ft_encoder.py │ │ │ ├── huggingface_t5_ckpt_convert.py │ │ │ ├── megatron-deepspeed_t5_ckpt_convert.py │ │ │ ├── megatron_t5_ckpt_convert.py │ │ │ ├── nemo_t5_ckpt_convert.py │ │ │ └── nemo_t5_ia3.py │ │ └── xnli_task_example.py │ ├── tokenizer.py │ ├── utils.py │ └── vit │ │ ├── ViT-quantization │ │ ├── README.md │ │ ├── calib.sh │ │ ├── config.py │ │ ├── data.py │ │ ├── eval.sh │ │ ├── eval_engine.py │ │ ├── eval_int8.sh │ │ ├── main.py │ │ ├── qat.sh │ │ ├── quant_utils.py │ │ └── vit_int8.py │ │ ├── VisionTransformerINT8WeightLoader.py │ │ ├── VisionTransformerWeightLoader.py │ │ ├── checkpoint_quantization.py │ │ ├── infer_visiontransformer_int8_op.py │ │ ├── infer_visiontransformer_op.py │ │ ├── requirement.txt │ │ ├── run.sh │ │ ├── run2.sh │ │ └── run_int8_accuracy.sh ├── tensorflow │ ├── bert │ │ ├── bert-quantization │ │ │ ├── .dockerignore │ │ │ ├── CONTRIBUTING.md │ │ │ ├── Dockerfile │ │ │ ├── LICENSE │ │ │ ├── NOTICE │ │ │ ├── README.md │ │ │ ├── README_orig.md │ │ │ ├── __init__.py │ │ │ ├── configurations.yml │ │ │ ├── extract_features.py │ │ │ ├── fp16_utils.py │ │ │ ├── ft-tensorflow-quantization │ │ │ │ ├── README.md │ │ │ │ ├── ft_tensorflow_quantization │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── python │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── calib │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── calibrator.py │ │ │ │ │ │ ├── histogram.py │ │ │ │ │ │ └── max.py │ │ │ │ │ │ ├── layers │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── dense.py │ │ │ │ │ │ ├── tensor_quantizer.py │ │ │ │ │ │ └── utils.py │ │ │ │ │ │ ├── ops │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── fake_quantize.py │ │ │ │ │ │ └── utils │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── utils.py │ │ │ │ └── setup.py │ │ │ ├── fused_layer_norm.py │ │ │ ├── gpu_environment.py │ │ │ ├── modeling.py │ │ │ ├── optimization.py │ │ │ ├── run_pretraining.py │ │ │ ├── run_squad.py │ │ │ ├── tf_metrics.py │ │ │ ├── tokenization.py │ │ │ └── utils │ │ │ │ ├── create_glue_data.py │ │ │ │ ├── create_pretraining_data.py │ │ │ │ ├── create_squad_data.py │ │ │ │ └── utils.py │ │ ├── bert_example.py │ │ ├── tensorflow_bert │ │ │ ├── __init__.py │ │ │ ├── ckpt_quantization.py │ │ │ ├── ckpt_type_convert.py │ │ │ ├── fast_infer_util.py │ │ │ ├── my_modeling.py │ │ │ ├── profile_bert_inference.py │ │ │ ├── profile_transformer_inference.py │ │ │ ├── profile_util.py │ │ │ ├── run_classifier_wrap.py │ │ │ ├── run_squad_wrap.py │ │ │ ├── sample.md │ │ │ ├── squad_evaluate-v1.1.py │ │ │ └── squad_evaluate_v1_1.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── bert.py │ │ │ ├── common.py │ │ │ ├── position.py │ │ │ └── reducer.py │ ├── ckpt_type_convert.py │ ├── common_utils │ │ ├── common.py │ │ ├── position.py │ │ └── reducer.py │ ├── deberta │ │ ├── deberta_example.py │ │ ├── requirement.txt │ │ └── utils │ │ │ └── ft_deberta.py │ ├── decoder │ │ ├── decoder_example.py │ │ └── utils │ │ │ ├── beam_search.py │ │ │ ├── common.py │ │ │ ├── decoder.py │ │ │ ├── decoding.py │ │ │ ├── position.py │ │ │ ├── reducer.py │ │ │ └── sampling.py │ ├── decoding │ │ ├── decoding_example.py │ │ ├── translate_example.py │ │ └── utils │ │ │ ├── bleu_score.py │ │ │ ├── ft_decoding.py │ │ │ └── translation │ │ │ ├── download_model_data.sh │ │ │ ├── test.de │ │ │ ├── test.en │ │ │ └── wmtende.vocab │ ├── encoder │ │ ├── encoder_example.py │ │ └── utils │ │ │ └── encoder.py │ ├── gpt │ │ ├── gpt_example.py │ │ └── utils │ │ │ ├── download_gpt2_model.py │ │ │ ├── gpt_token_converter.py │ │ │ ├── gpt_token_encoder.py │ │ │ └── openai_gpt_ckpt_converter.py │ ├── requirement.txt │ ├── t5 │ │ ├── requirement.txt │ │ ├── translate_example.py │ │ └── utils │ │ │ ├── ft_decoding.py │ │ │ ├── ft_encoder.py │ │ │ ├── jax_t5_ckpt_convert.py │ │ │ ├── tf_saved_model_t5_ckpt_convert.py │ │ │ └── ul2_config.template │ └── xlnet │ │ ├── convertInput.py │ │ ├── convertModel.py │ │ ├── downloadModel.sh │ │ ├── modeling.py │ │ ├── runData.py │ │ └── verifyCorrectness.sh ├── tensorrt │ ├── swin │ │ ├── builder.py │ │ ├── builder_int8.py │ │ ├── infer_swintransformer_plugin.py │ │ ├── infer_swintransformer_plugin_int8.py │ │ ├── run_builder_fp16_v1.sh │ │ ├── run_builder_fp16_v2.sh │ │ ├── run_builder_fp32_v1.sh │ │ ├── run_builder_fp32_v2.sh │ │ ├── run_builder_int8_v1.sh │ │ ├── run_builder_int8_v2.sh │ │ ├── run_infer_fp16.sh │ │ ├── run_infer_fp32.sh │ │ └── run_infer_int8.sh │ ├── t5 │ │ ├── createT5TestData.py │ │ ├── extractT5ModelToBIN.py │ │ └── testT5Plugin.py │ └── vit │ │ ├── infer_visiontransformer_int8_plugin.py │ │ ├── infer_visiontransformer_plugin.py │ │ ├── plugin_loader.py │ │ └── plugin_loader_int8.py └── utils │ ├── hf_detokenize.py │ └── hf_tokenize.py ├── src ├── CMakeLists.txt └── fastertransformer │ ├── CMakeLists.txt │ ├── cutlass_extensions │ └── include │ │ └── cutlass_extensions │ │ ├── arch │ │ └── mma.h │ │ ├── compute_occupancy.h │ │ ├── epilogue │ │ ├── epilogue_quant_helper.h │ │ ├── thread │ │ │ └── ft_fused_activations.h │ │ └── threadblock │ │ │ ├── epilogue_per_row_per_col_scale.h │ │ │ └── epilogue_tensor_op_int32.h │ │ ├── epilogue_helpers.h │ │ ├── ft_gemm_configs.h │ │ ├── gemm │ │ ├── kernel │ │ │ ├── default_fpA_intB_traits.h │ │ │ ├── fpA_intB_gemm.h │ │ │ ├── gemm_moe_problem_visitor.h │ │ │ ├── gemm_with_epilogue_visitor.h │ │ │ ├── mixed_gemm_B_layout.h │ │ │ ├── moe_cutlass_kernel.h │ │ │ └── moe_problem_visitor.h │ │ ├── threadblock │ │ │ ├── default_dq_mma.h │ │ │ ├── default_dq_mma_multistage.h │ │ │ ├── default_dq_mma_pipelined.h │ │ │ ├── default_mma.h │ │ │ ├── default_mma_bf16.h │ │ │ ├── dq_mma_base.h │ │ │ ├── dq_mma_multistage.h │ │ │ └── dq_mma_pipelined.h │ │ └── warp │ │ │ ├── default_mma_tensor_op.h │ │ │ ├── mma_tensorop_compute_B_with_f16.h │ │ │ └── mma_tensorop_dequantizer.h │ │ ├── interleaved_numeric_conversion.h │ │ └── tile_interleaved_layout.h │ ├── kernels │ ├── CMakeLists.txt │ ├── activation_fp8_kernels.cu │ ├── activation_fp8_kernels.h │ ├── activation_int8_kernels.cu │ ├── activation_int8_kernels.h │ ├── activation_kernels.cu │ ├── activation_kernels.h │ ├── add_bias_transpose_kernels.cu │ ├── add_bias_transpose_kernels.h │ ├── add_residual_kernels.cu │ ├── add_residual_kernels.h │ ├── ban_bad_words.cu │ ├── ban_bad_words.h │ ├── beam_search_penalty_kernels.cu │ ├── beam_search_penalty_kernels.h │ ├── beam_search_topk_kernels.cu │ ├── beam_search_topk_kernels.h │ ├── bert_preprocess_kernels.cu │ ├── bert_preprocess_kernels.h │ ├── calibrate_quantize_weight_kernels.cu │ ├── calibrate_quantize_weight_kernels.h │ ├── custom_ar_kernels.cu │ ├── custom_ar_kernels.h │ ├── cutlass_kernels │ │ ├── CMakeLists.txt │ │ ├── cutlass_heuristic.cc │ │ ├── cutlass_heuristic.h │ │ ├── cutlass_preprocessors.cc │ │ ├── cutlass_preprocessors.h │ │ ├── fpA_intB_gemm │ │ │ ├── fpA_intB_gemm.h │ │ │ ├── fpA_intB_gemm_bf16_uint4.cu │ │ │ ├── fpA_intB_gemm_bf16_uint8.cu │ │ │ ├── fpA_intB_gemm_dummy_stubs.cu │ │ │ ├── fpA_intB_gemm_fp16_int4.cu │ │ │ ├── fpA_intB_gemm_fp16_int8.cu │ │ │ └── fpA_intB_gemm_template.h │ │ ├── int8_gemm │ │ │ ├── int8_gemm.h │ │ │ ├── int8_gemm_bf16.cu │ │ │ ├── int8_gemm_fp16.cu │ │ │ ├── int8_gemm_fp32.cu │ │ │ ├── int8_gemm_int32.cu │ │ │ └── int8_gemm_template.h │ │ └── moe_gemm │ │ │ ├── moe_gemm_kernels.h │ │ │ ├── moe_gemm_kernels_bf16_bf16.cu │ │ │ ├── moe_gemm_kernels_bf16_uint4.cu │ │ │ ├── moe_gemm_kernels_bf16_uint8.cu │ │ │ ├── moe_gemm_kernels_fp16_fp16.cu │ │ │ ├── moe_gemm_kernels_fp16_uint4.cu │ │ │ ├── moe_gemm_kernels_fp16_uint8.cu │ │ │ ├── moe_gemm_kernels_fp32_fp32.cu │ │ │ └── moe_gemm_kernels_template.h │ ├── decoder_masked_multihead_attention.cu │ ├── decoder_masked_multihead_attention.h │ ├── decoder_masked_multihead_attention │ │ ├── decoder_masked_multihead_attention_128.cu │ │ ├── decoder_masked_multihead_attention_144.cu │ │ ├── decoder_masked_multihead_attention_160.cu │ │ ├── decoder_masked_multihead_attention_192.cu │ │ ├── decoder_masked_multihead_attention_224.cu │ │ ├── decoder_masked_multihead_attention_256.cu │ │ ├── decoder_masked_multihead_attention_32.cu │ │ ├── decoder_masked_multihead_attention_48.cu │ │ ├── decoder_masked_multihead_attention_64.cu │ │ ├── decoder_masked_multihead_attention_80.cu │ │ ├── decoder_masked_multihead_attention_96.cu │ │ └── decoder_masked_multihead_attention_template.hpp │ ├── decoder_masked_multihead_attention_fp8_test.cc │ ├── decoder_masked_multihead_attention_utils.h │ ├── decoding_kernels.cu │ ├── decoding_kernels.h │ ├── dequantize_kernels.cu │ ├── dequantize_kernels.h │ ├── disentangled_attention_kernels.cu │ ├── disentangled_attention_kernels.h │ ├── gen_relative_pos_bias.cu │ ├── gen_relative_pos_bias.h │ ├── gpt_kernels.cu │ ├── gpt_kernels.h │ ├── image_merge_kernels.cu │ ├── image_merge_kernels.h │ ├── image_shift_partition_kernels.cu │ ├── image_shift_partition_kernels.h │ ├── int8_utils.cuh │ ├── layernorm_fp8_kernels.cu │ ├── layernorm_fp8_kernels.h │ ├── layernorm_fp8_kernels_test.cc │ ├── layernorm_int8_kernels.cu │ ├── layernorm_int8_kernels.h │ ├── layernorm_kernels.cu │ ├── layernorm_kernels.h │ ├── layout_transformer_int8_kernels.cu │ ├── layout_transformer_int8_kernels.h │ ├── logprob_kernels.cu │ ├── logprob_kernels.h │ ├── longformer_kernels.cu │ ├── longformer_kernels.h │ ├── matrix_transpose_fp8_kernels_test.cc │ ├── matrix_transpose_kernels.cu │ ├── matrix_transpose_kernels.h │ ├── matrix_vector_multiplication.cu │ ├── matrix_vector_multiplication.h │ ├── moe_kernels.cu │ ├── moe_kernels.h │ ├── normalize_kernels.cu │ ├── normalize_kernels.h │ ├── online_softmax_beamsearch_kernels.cu │ ├── online_softmax_beamsearch_kernels.h │ ├── penalty_types.h │ ├── quantization_int8_kernels.cu │ ├── quantization_int8_kernels.h │ ├── quantize_weight.cu │ ├── quantize_weight.h │ ├── reduce_kernel_utils.cuh │ ├── reverse_roll_kernels.cu │ ├── reverse_roll_kernels.h │ ├── sampling_penalty_kernels.cu │ ├── sampling_penalty_kernels.h │ ├── sampling_topk_kernels.cu │ ├── sampling_topk_kernels.h │ ├── sampling_topp_kernels.cu │ ├── sampling_topp_kernels.h │ ├── softmax_int8_kernels.cu │ ├── softmax_int8_kernels.h │ ├── stop_criteria_kernels.cu │ ├── stop_criteria_kernels.h │ ├── transform_mask_kernels.cu │ ├── transform_mask_kernels.h │ ├── transpose_int8_kernels.cu │ ├── transpose_int8_kernels.h │ ├── unfused_attention_fp8_kernels.cu │ ├── unfused_attention_fp8_kernels.h │ ├── unfused_attention_fp8_kernels_test.cc │ ├── unfused_attention_int8_kernels.cu │ ├── unfused_attention_int8_kernels.h │ ├── unfused_attention_kernels.cu │ ├── unfused_attention_kernels.h │ ├── vit_kernels.cu │ ├── vit_kernels.h │ ├── xlnet_attention_kernels.cu │ ├── xlnet_attention_kernels.h │ ├── xlnet_preprocess_kernels.cu │ └── xlnet_preprocess_kernels.h │ ├── layers │ ├── BaseLayer.h │ ├── CMakeLists.txt │ ├── DenseWeight.h │ ├── DynamicDecodeBaseLayer.h │ ├── DynamicDecodeLayer.cc │ ├── DynamicDecodeLayer.h │ ├── FfnFP8Layer.cc │ ├── FfnFP8Layer.h │ ├── FfnFP8Weight.h │ ├── FfnINT8Weight.h │ ├── FfnLayer.cc │ ├── FfnLayer.h │ ├── FfnLayerINT8.cc │ ├── FfnLayerINT8.h │ ├── FfnWeight.h │ ├── TensorParallelGeluFfnFP8Layer.cc │ ├── TensorParallelGeluFfnFP8Layer.h │ ├── TensorParallelGeluFfnLayer.cc │ ├── TensorParallelGeluFfnLayer.h │ ├── TensorParallelReluFfnLayer.cc │ ├── TensorParallelReluFfnLayer.h │ ├── TensorParallelSiluFfnLayer.cc │ ├── TensorParallelSiluFfnLayer.h │ ├── adapter_layers │ │ ├── CMakeLists.txt │ │ ├── LinearAdapterLayer.cc │ │ ├── LinearAdapterLayer.h │ │ └── LinearAdapterWeight.h │ ├── attention_layers │ │ ├── AttentionWeight.h │ │ ├── BaseAttentionLayer.h │ │ ├── CMakeLists.txt │ │ ├── DecoderCrossAttentionLayer.cu │ │ ├── DecoderCrossAttentionLayer.h │ │ ├── DecoderSelfAttentionLayer.cc │ │ ├── DecoderSelfAttentionLayer.h │ │ ├── DisentangledAttentionLayer.cc │ │ ├── DisentangledAttentionLayer.h │ │ ├── FusedAttentionLayer.cu │ │ ├── FusedAttentionLayer.h │ │ ├── GptContextAttentionLayer.cc │ │ ├── GptContextAttentionLayer.h │ │ ├── LongformerAttentionLayer.cc │ │ ├── LongformerAttentionLayer.h │ │ ├── TensorParallelDecoderCrossAttentionLayer.cc │ │ ├── TensorParallelDecoderCrossAttentionLayer.h │ │ ├── TensorParallelDecoderSelfAttentionLayer.cc │ │ ├── TensorParallelDecoderSelfAttentionLayer.h │ │ ├── TensorParallelDisentangledAttentionLayer.cc │ │ ├── TensorParallelDisentangledAttentionLayer.h │ │ ├── TensorParallelGptContextAttentionLayer.cc │ │ ├── TensorParallelGptContextAttentionLayer.h │ │ ├── TensorParallelUnfusedAttentionLayer.cc │ │ ├── TensorParallelUnfusedAttentionLayer.h │ │ ├── UnfusedAttentionLayer.cc │ │ ├── UnfusedAttentionLayer.h │ │ ├── WindowAttention.cc │ │ └── WindowAttention.h │ ├── attention_layers_fp8 │ │ ├── AttentionFP8Weight.h │ │ ├── BaseAttentionFP8Layer.h │ │ ├── CMakeLists.txt │ │ ├── DecoderSelfAttentionFP8Layer.cc │ │ ├── DecoderSelfAttentionFP8Layer.h │ │ ├── GptContextAttentionFP8Layer.cc │ │ ├── GptContextAttentionFP8Layer.h │ │ ├── SelfAttentionFP8Layer.cc │ │ ├── SelfAttentionFP8Layer.h │ │ ├── TensorParallelDecoderSelfAttentionFP8Layer.cc │ │ ├── TensorParallelDecoderSelfAttentionFP8Layer.h │ │ ├── TensorParallelGptContextAttentionFP8Layer.cc │ │ └── TensorParallelGptContextAttentionFP8Layer.h │ ├── attention_layers_int8 │ │ ├── AttentionINT8Weight.h │ │ ├── CMakeLists.txt │ │ ├── FusedAttentionLayerINT8.cu │ │ ├── FusedAttentionLayerINT8.h │ │ ├── UnfusedAttentionLayerINT8.cc │ │ ├── UnfusedAttentionLayerINT8.h │ │ ├── WindowAttentionINT8.cu │ │ └── WindowAttentionINT8.h │ ├── beam_search_layers │ │ ├── BaseBeamSearchLayer.cu │ │ ├── BaseBeamSearchLayer.h │ │ ├── BeamSearchLayer.cu │ │ ├── BeamSearchLayer.h │ │ ├── CMakeLists.txt │ │ ├── OnlineBeamSearchLayer.cu │ │ └── OnlineBeamSearchLayer.h │ ├── sampling_layers │ │ ├── BaseSamplingLayer.cc │ │ ├── BaseSamplingLayer.h │ │ ├── CMakeLists.txt │ │ ├── TopKSamplingLayer.cu │ │ ├── TopKSamplingLayer.h │ │ ├── TopPSamplingLayer.cu │ │ └── TopPSamplingLayer.h │ └── xlnet_attention_layers │ │ ├── CMakeLists.txt │ │ ├── XlnetAttentionLayer.cc │ │ ├── XlnetAttentionLayer.h │ │ └── XlnetAttentionWeight.h │ ├── models │ ├── BaseWeight.h │ ├── CMakeLists.txt │ ├── bart │ │ ├── BartDecoder.cc │ │ ├── BartDecoder.h │ │ ├── BartDecoderLayerWeight.cc │ │ ├── BartDecoderLayerWeight.h │ │ ├── BartDecoding.cc │ │ ├── BartDecoding.h │ │ ├── BartDecodingWeight.cc │ │ ├── BartDecodingWeight.h │ │ ├── BartEncoder.cc │ │ ├── BartEncoder.h │ │ ├── BartEncoderLayerWeight.cc │ │ ├── BartEncoderLayerWeight.h │ │ ├── BartEncoderWeight.cc │ │ ├── BartEncoderWeight.h │ │ └── CMakeLists.txt │ ├── bert │ │ ├── Bert.cc │ │ ├── Bert.h │ │ ├── BertLayerWeight.cc │ │ ├── BertLayerWeight.h │ │ ├── BertWeight.cc │ │ ├── BertWeight.h │ │ ├── CMakeLists.txt │ │ └── bert_gemm.cc │ ├── bert_fp8 │ │ ├── BertFP8.cc │ │ ├── BertFP8.h │ │ ├── BertFP8LayerWeight.cc │ │ ├── BertFP8LayerWeight.h │ │ ├── BertFP8Weight.cc │ │ ├── BertFP8Weight.h │ │ ├── CMakeLists.txt │ │ └── serialize.hpp │ ├── bert_int8 │ │ ├── BertINT8.cc │ │ ├── BertINT8.h │ │ ├── BertLayerINT8.cc │ │ ├── BertLayerINT8.h │ │ ├── BertLayerINT8Weight.cc │ │ ├── BertLayerINT8Weight.h │ │ └── CMakeLists.txt │ ├── deberta │ │ ├── CMakeLists.txt │ │ ├── Deberta.cc │ │ ├── Deberta.h │ │ ├── DebertaLayerWeight.cc │ │ ├── DebertaLayerWeight.h │ │ ├── DebertaWeight.cc │ │ └── DebertaWeight.h │ ├── decoder │ │ ├── CMakeLists.txt │ │ ├── Decoder.cc │ │ ├── Decoder.h │ │ └── DecoderLayerWeight.h │ ├── decoding │ │ ├── CMakeLists.txt │ │ ├── Decoding.cc │ │ ├── Decoding.h │ │ ├── DecodingWeight.h │ │ └── decoding_gemm.cc │ ├── gpt_fp8 │ │ ├── CMakeLists.txt │ │ ├── GptFP8.cc │ │ ├── GptFP8.h │ │ ├── GptFP8ContextDecoder.cc │ │ ├── GptFP8ContextDecoder.h │ │ ├── GptFP8Decoder.cc │ │ ├── GptFP8Decoder.h │ │ ├── GptFP8DecoderLayerWeight.cc │ │ ├── GptFP8DecoderLayerWeight.h │ │ ├── GptFP8Weight.cc │ │ ├── GptFP8Weight.h │ │ └── gpt_fp8_gemm.cc │ ├── gptj │ │ ├── CMakeLists.txt │ │ ├── GptJ.cc │ │ ├── GptJ.h │ │ ├── GptJContextDecoder.cc │ │ ├── GptJContextDecoder.h │ │ ├── GptJDecoder.cc │ │ ├── GptJDecoder.h │ │ ├── GptJDecoderLayerWeight.cc │ │ ├── GptJDecoderLayerWeight.h │ │ ├── GptJWeight.cc │ │ └── GptJWeight.h │ ├── gptneox │ │ ├── CMakeLists.txt │ │ ├── GptNeoX.cc │ │ ├── GptNeoX.h │ │ ├── GptNeoXContextDecoder.cc │ │ ├── GptNeoXContextDecoder.h │ │ ├── GptNeoXDecoder.cc │ │ ├── GptNeoXDecoder.h │ │ ├── GptNeoXDecoderLayerWeight.cc │ │ ├── GptNeoXDecoderLayerWeight.h │ │ ├── GptNeoXWeight.cc │ │ └── GptNeoXWeight.h │ ├── llama │ │ ├── CMakeLists.txt │ │ ├── Llama.cc │ │ ├── Llama.h │ │ ├── LlamaContextDecoder.cc │ │ ├── LlamaContextDecoder.h │ │ ├── LlamaDecoder.cc │ │ ├── LlamaDecoder.h │ │ ├── LlamaDecoderLayerWeight.cc │ │ ├── LlamaDecoderLayerWeight.h │ │ ├── LlamaWeight.cc │ │ └── LlamaWeight.h │ ├── longformer │ │ ├── CMakeLists.txt │ │ ├── LongformerEncoder.cc │ │ └── LongformerEncoder.h │ ├── multi_gpu_gpt │ │ ├── CMakeLists.txt │ │ ├── ParallelGpt.cc │ │ ├── ParallelGpt.h │ │ ├── ParallelGptContextDecoder.cc │ │ ├── ParallelGptContextDecoder.h │ │ ├── ParallelGptDecoder.cc │ │ ├── ParallelGptDecoder.h │ │ ├── ParallelGptDecoderLayerWeight.cc │ │ ├── ParallelGptDecoderLayerWeight.h │ │ ├── ParallelGptWeight.cc │ │ ├── ParallelGptWeight.h │ │ └── gpt_gemm.cc │ ├── swin │ │ ├── CMakeLists.txt │ │ ├── Swin.cc │ │ ├── Swin.h │ │ ├── SwinBasicLayer.cc │ │ ├── SwinBasicLayer.h │ │ ├── SwinBlock.cc │ │ ├── SwinBlock.h │ │ ├── SwinWeight.h │ │ └── swin_gemm.cc │ ├── swin_int8 │ │ ├── CMakeLists.txt │ │ ├── SwinBasicLayerINT8.cc │ │ ├── SwinBasicLayerINT8.h │ │ ├── SwinBlockINT8.cc │ │ ├── SwinBlockINT8.h │ │ ├── SwinINT8.cc │ │ ├── SwinINT8.h │ │ └── SwinINT8Weight.h │ ├── t5 │ │ ├── CMakeLists.txt │ │ ├── T5AdapterWeight.cc │ │ ├── T5AdapterWeight.h │ │ ├── T5Decoder.cc │ │ ├── T5Decoder.h │ │ ├── T5DecoderLayerWeight.cc │ │ ├── T5DecoderLayerWeight.h │ │ ├── T5Decoding.cc │ │ ├── T5Decoding.h │ │ ├── T5DecodingWeight.cc │ │ ├── T5DecodingWeight.h │ │ ├── T5Encoder.cc │ │ ├── T5Encoder.h │ │ ├── T5EncoderLayerWeight.cc │ │ ├── T5EncoderLayerWeight.h │ │ ├── T5EncoderWeight.cc │ │ ├── T5EncoderWeight.h │ │ └── t5_gemm.cc │ ├── vit │ │ ├── CMakeLists.txt │ │ ├── ViT.cc │ │ ├── ViT.h │ │ ├── ViTLayerWeight.h │ │ ├── ViTWeight.h │ │ └── vit_gemm.cc │ ├── vit_int8 │ │ ├── CMakeLists.txt │ │ ├── ViTINT8.cc │ │ ├── ViTINT8.h │ │ ├── ViTINT8Weight.h │ │ └── ViTLayerINT8Weight.h │ ├── wenet │ │ ├── CMakeLists.txt │ │ ├── ConformerConvLayer.cc │ │ ├── ConformerConvLayer.h │ │ ├── MultiHeadedAttentionLayer.cc │ │ ├── MultiHeadedAttentionLayer.h │ │ ├── RelPositionAttentionLayer.cc │ │ ├── RelPositionAttentionLayer.h │ │ ├── WenetDecoder.cc │ │ ├── WenetDecoder.h │ │ ├── WenetDecoderLayerWeight.cc │ │ ├── WenetDecoderLayerWeight.h │ │ ├── WenetDecoderWeight.cc │ │ ├── WenetDecoderWeight.h │ │ ├── WenetEncoder.cc │ │ ├── WenetEncoder.h │ │ ├── WenetEncoderLayerWeight.cc │ │ ├── WenetEncoderLayerWeight.h │ │ ├── WenetEncoderWeight.cc │ │ ├── WenetEncoderWeight.h │ │ ├── WenetKernels.cu │ │ ├── WenetKernels.h │ │ └── wenet_gemm.cc │ └── xlnet │ │ ├── CMakeLists.txt │ │ ├── Xlnet.cc │ │ ├── Xlnet.h │ │ ├── XlnetLayerWeight.h │ │ └── xlnet_gemm.cc │ ├── tensorrt_plugin │ ├── CMakeLists.txt │ ├── bert_fp8 │ │ ├── CMakeLists.txt │ │ ├── bertFp8Plugin.cu │ │ └── bertFp8Plugin.h │ ├── swin │ │ ├── CMakeLists.txt │ │ ├── serialize.hpp │ │ ├── swinTransformerINT8Plugin.cpp │ │ ├── swinTransformerINT8Plugin.h │ │ ├── swinTransformerPlugin.cpp │ │ └── swinTransformerPlugin.h │ ├── t5 │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── T5Plugin.cu │ │ ├── T5Plugin.h │ │ ├── T5PluginGemm.cc │ │ └── T5PluginGemm.h │ ├── vit │ │ ├── CMakeLists.txt │ │ ├── ViTINT8Plugin.cpp │ │ ├── ViTINT8Plugin.h │ │ ├── ViTPlugin.cpp │ │ └── ViTPlugin.h │ └── wenet │ │ ├── CMakeLists.txt │ │ ├── DecoderPlugin.cc │ │ ├── DecoderPlugin.h │ │ ├── EncoderPlugin.cc │ │ └── EncoderPlugin.h │ ├── tf_op │ ├── BaseOp.h │ ├── CMakeLists.txt │ ├── bert │ │ ├── BertINT8Op.cc │ │ ├── BertOp.cc │ │ ├── CMakeLists.txt │ │ └── weight_quantize_op.cc │ ├── deberta │ │ ├── CMakeLists.txt │ │ └── DebertaOp.cc │ ├── decoder │ │ ├── CMakeLists.txt │ │ ├── DecoderOp.cc │ │ └── FusedSelfAttentionOp.cc │ ├── decoding │ │ ├── CMakeLists.txt │ │ └── DecodingOp.cc │ ├── encoder │ │ ├── CMakeLists.txt │ │ └── EncoderOp.cc │ ├── gpt │ │ ├── CMakeLists.txt │ │ └── GptOp.cc │ └── t5 │ │ ├── CMakeLists.txt │ │ ├── T5DecodingOp.cc │ │ └── T5EncoderOp.cc │ ├── th_op │ ├── CMakeLists.txt │ ├── bart │ │ ├── BartDecoderOp.cc │ │ ├── BartDecoderOp.h │ │ ├── BartDecodingOp.cc │ │ ├── BartDecodingOp.h │ │ ├── BartEncoderOp.cc │ │ ├── BartEncoderOp.h │ │ └── CMakeLists.txt │ ├── bert │ │ ├── BertINT8Op.cc │ │ ├── BertINT8Op.h │ │ ├── BertOp.cc │ │ ├── BertOp.h │ │ ├── CMakeLists.txt │ │ └── WeightQuantizeOp.cc │ ├── common │ │ ├── CMakeLists.txt │ │ ├── DynamicDecodeOp.cc │ │ ├── DynamicDecodeOp.h │ │ ├── GptOps.cc │ │ └── WeightOnlyQuantOps.cc │ ├── deberta │ │ ├── CMakeLists.txt │ │ ├── DebertaOp.cc │ │ └── DebertaOp.h │ ├── decoder │ │ ├── CMakeLists.txt │ │ ├── DecoderOp.cc │ │ └── DecoderOp.h │ ├── decoding │ │ ├── CMakeLists.txt │ │ ├── DecodingOp.cc │ │ ├── DecodingOp.h │ │ ├── GatherTreeOp.cc │ │ └── GatherTreeOp.h │ ├── encoder │ │ ├── CMakeLists.txt │ │ ├── EncoderOp.cc │ │ └── EncoderOp.h │ ├── gpt │ │ ├── CMakeLists.txt │ │ ├── GptOp.cc │ │ └── GptOp.h │ ├── gpt_fp8 │ │ ├── CMakeLists.txt │ │ ├── GptFp8Op.cc │ │ └── GptFp8Op.h │ ├── gptneox │ │ ├── CMakeLists.txt │ │ ├── GptNeoXOp.cc │ │ └── GptNeoXOp.h │ ├── longformer │ │ ├── CMakeLists.txt │ │ ├── LongformerEncoderOp.cc │ │ └── LongformerEncoderOp.h │ ├── multi_gpu_gpt │ │ ├── CMakeLists.txt │ │ ├── ParallelGptContextDecoderOp.cc │ │ ├── ParallelGptContextDecoderOp.h │ │ ├── ParallelGptDecoderOp.cc │ │ ├── ParallelGptDecoderOp.h │ │ ├── ParallelGptOp.cc │ │ ├── ParallelGptOp.h │ │ ├── WeightTransposeCalibrateQuantizeOp.cc │ │ └── WeightTransposeCalibrateQuantizeOp.h │ ├── swin │ │ ├── CMakeLists.txt │ │ ├── SwinINT8Op.cc │ │ ├── SwinINT8Op.h │ │ ├── SwinOp.cc │ │ ├── SwinOp.h │ │ └── WeightQuantizeOp.cc │ ├── t5 │ │ ├── CMakeLists.txt │ │ ├── T5DecoderOp.cc │ │ ├── T5DecoderOp.h │ │ ├── T5DecodingOp.cc │ │ ├── T5DecodingOp.h │ │ ├── T5EncoderOp.cc │ │ └── T5EncoderOp.h │ ├── th_utils.cu │ ├── th_utils.h │ └── vit │ │ ├── CMakeLists.txt │ │ ├── ViTINT8Op.cc │ │ ├── ViTINT8Op.h │ │ ├── ViTOp.cc │ │ ├── ViTOp.h │ │ └── WeightQuantizeOp.cc │ ├── triton_backend │ ├── CMakeLists.txt │ ├── bert │ │ ├── BertTritonModel.cc │ │ ├── BertTritonModel.h │ │ ├── BertTritonModelInstance.cc │ │ ├── BertTritonModelInstance.h │ │ └── CMakeLists.txt │ ├── gptj │ │ ├── CMakeLists.txt │ │ ├── GptJTritonModel.cc │ │ ├── GptJTritonModel.h │ │ ├── GptJTritonModelInstance.cc │ │ └── GptJTritonModelInstance.h │ ├── gptneox │ │ ├── CMakeLists.txt │ │ ├── GptNeoXTritonModel.cc │ │ ├── GptNeoXTritonModel.h │ │ ├── GptNeoXTritonModelInstance.cc │ │ └── GptNeoXTritonModelInstance.h │ ├── llama │ │ ├── CMakeLists.txt │ │ ├── LlamaTritonModel.cc │ │ ├── LlamaTritonModel.h │ │ ├── LlamaTritonModelInstance.cc │ │ └── LlamaTritonModelInstance.h │ ├── multi_gpu_gpt │ │ ├── CMakeLists.txt │ │ ├── ParallelGptTritonModel.cc │ │ ├── ParallelGptTritonModel.h │ │ ├── ParallelGptTritonModelInstance.cc │ │ └── ParallelGptTritonModelInstance.h │ ├── multi_gpu_gpt_fp8 │ │ ├── CMakeLists.txt │ │ ├── ParallelGptFP8TritonModel.cc │ │ ├── ParallelGptFP8TritonModel.h │ │ ├── ParallelGptFP8TritonModelInstance.cc │ │ └── ParallelGptFP8TritonModelInstance.h │ ├── t5-encoder │ │ ├── CMakeLists.txt │ │ ├── T5EncoderTritonModel.cc │ │ ├── T5EncoderTritonModel.h │ │ ├── T5EncoderTritonModelInstance.cc │ │ └── T5EncoderTritonModelInstance.h │ ├── t5 │ │ ├── CMakeLists.txt │ │ ├── T5TritonModel.cc │ │ ├── T5TritonModel.h │ │ ├── T5TritonModelInstance.cc │ │ └── T5TritonModelInstance.h │ ├── transformer_triton_backend.cpp │ ├── transformer_triton_backend.hpp │ └── triton_utils.hpp │ └── utils │ ├── CMakeLists.txt │ ├── IA3.h │ ├── ScaleList.h │ ├── Tensor.cc │ ├── Tensor.h │ ├── activation_types.h │ ├── allocator.h │ ├── conv2d.h │ ├── convert_data_type.h │ ├── cublasAlgoMap.cc │ ├── cublasAlgoMap.h │ ├── cublasFP8MMWrapper.cu │ ├── cublasFP8MMWrapper.h │ ├── cublasINT8MMWrapper.cc │ ├── cublasINT8MMWrapper.h │ ├── cublasMMWrapper.cc │ ├── cublasMMWrapper.h │ ├── cuda_bf16_fallbacks.cuh │ ├── cuda_bf16_wrapper.h │ ├── cuda_fp8_utils.cu │ ├── cuda_fp8_utils.h │ ├── cuda_type_utils.cuh │ ├── cuda_utils.cc │ ├── cuda_utils.h │ ├── custom_ar_comm.cc │ ├── custom_ar_comm.h │ ├── gemm.cc │ ├── gemm.h │ ├── gemm_test │ ├── CMakeLists.txt │ ├── decoding_gemm_func.cc │ ├── decoding_gemm_func.h │ ├── encoder_gemm_func.cc │ ├── encoder_gemm_func.h │ ├── encoder_igemm_func.cc │ ├── encoder_igemm_func.h │ ├── gemm_func.cc │ ├── gemm_func.h │ ├── gpt_gemm_func.cc │ ├── gpt_gemm_func.h │ ├── swin_gemm_func.cc │ ├── swin_gemm_func.h │ ├── swin_igemm_func.cc │ ├── swin_igemm_func.h │ ├── t5_gemm_func.cc │ ├── t5_gemm_func.h │ ├── xlnet_gemm_func.cc │ └── xlnet_gemm_func.h │ ├── gpu_buf.h │ ├── logger.cc │ ├── logger.h │ ├── memory_utils.cu │ ├── memory_utils.h │ ├── mpi_utils.cc │ ├── mpi_utils.h │ ├── nccl_utils.cc │ ├── nccl_utils.h │ ├── nvtx_utils.cc │ ├── nvtx_utils.h │ ├── prompt_learning.h │ ├── string_utils.h │ ├── test_utils.h │ ├── wenet_conv2d.h │ ├── word_list.cc │ └── word_list.h ├── templates └── adding_a_new_model │ └── README.md └── tests ├── CMakeLists.txt ├── bert ├── tf_bert_unit_test.py └── th_bert_unit_test.py ├── data └── gpt_context_decoder_inputs │ ├── GPU-attention_mask.npy │ ├── GPU-batch_to_compact_idx.npy │ ├── GPU-compact_idx.npy │ ├── GPU-decoder_input.npy │ └── GPU-input_lengths.npy ├── decoding ├── tf_decoding_unit_test.py └── tf_fused_self_multihead_attention_unit_test.py ├── gemm_dequantize ├── CMakeLists.txt ├── th_gemm_dequantize.cc └── th_gemm_dequantize.py ├── int8_gemm ├── CMakeLists.txt └── int8_gemm_test.cu ├── longformer └── py_longformer_unit_test.py ├── moe ├── CMakeLists.txt ├── th_moe_ops.cc └── th_moe_unit_tests.py ├── unittests ├── CMakeLists.txt ├── fp8_gemm_test │ ├── 2022_03_21__fp8_stride_batch_example │ │ ├── Makefile │ │ ├── include │ │ │ ├── cuda_fp8.h │ │ │ └── cuda_fp8.hpp │ │ ├── main.cpp │ │ ├── worker.cpp │ │ └── worker.hpp │ ├── Makefile │ ├── cuda_utils.cu │ ├── cuda_utils.h │ ├── main.cpp │ ├── run_cublaslttest.sh │ ├── worker.cpp │ └── worker.hpp ├── gtest_utils.h ├── test_activation.cu ├── test_attention_kernels.cu ├── test_context_decoder_layer.cu ├── test_gemm.cu ├── test_gpt_kernels.cu ├── test_int8.cu ├── test_logprob_kernels.cu ├── test_penalty_kernels.cu ├── test_sampling.cu ├── test_sampling_kernels.cu ├── test_sampling_layer.cu ├── test_tensor.cu ├── th_op │ └── test_th_decode_op.py └── unittest_utils.h └── weight_only_quant_ops └── th_weight_quant_ops_unit_tests.py /.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/.clang-format -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/.dockerignore -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/.flake8 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/.github/ISSUE_TEMPLATE/bug_report.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/.gitmodules -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /3rdparty/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/INIReader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/INIReader.h -------------------------------------------------------------------------------- /3rdparty/common/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/common/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/common/cudaDriverWrapper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/common/cudaDriverWrapper.cpp -------------------------------------------------------------------------------- /3rdparty/common/cudaDriverWrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/common/cudaDriverWrapper.h -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_histogram.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/agent_histogram.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_radix_sort_downsweep.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/agent_radix_sort_downsweep.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_radix_sort_upsweep.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/agent_radix_sort_upsweep.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/agent_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_reduce_by_key.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/agent_reduce_by_key.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_rle.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/agent_rle.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/agent_scan.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_segment_fixup.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/agent_segment_fixup.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_select_if.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/agent_select_if.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/agent_spmv_orig.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/agent_spmv_orig.cuh -------------------------------------------------------------------------------- /3rdparty/cub/agent/single_pass_scan_operators.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/agent/single_pass_scan_operators.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_adjacent_difference.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_adjacent_difference.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_discontinuity.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_discontinuity.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_exchange.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_exchange.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_histogram.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_histogram.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_load.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_load.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_radix_rank.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_radix_rank.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_radix_sort.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_radix_sort.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_raking_layout.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_raking_layout.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_scan.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_shuffle.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_shuffle.cuh -------------------------------------------------------------------------------- /3rdparty/cub/block/block_store.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/block/block_store.cuh -------------------------------------------------------------------------------- /3rdparty/cub/cub.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/cub.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_histogram.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/device_histogram.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_partition.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/device_partition.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_radix_sort.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/device_radix_sort.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/device_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_run_length_encode.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/device_run_length_encode.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/device_scan.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_segmented_radix_sort.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/device_segmented_radix_sort.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_segmented_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/device_segmented_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_select.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/device_select.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/device_spmv.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/device_spmv.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_histogram.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_histogram.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_radix_sort.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_radix_sort.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_reduce_by_key.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_reduce_by_key.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_rle.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_rle.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_scan.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_select_if.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_select_if.cuh -------------------------------------------------------------------------------- /3rdparty/cub/device/dispatch/dispatch_spmv_orig.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/device/dispatch/dispatch_spmv_orig.cuh -------------------------------------------------------------------------------- /3rdparty/cub/grid/grid_barrier.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/grid/grid_barrier.cuh -------------------------------------------------------------------------------- /3rdparty/cub/grid/grid_even_share.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/grid/grid_even_share.cuh -------------------------------------------------------------------------------- /3rdparty/cub/grid/grid_mapping.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/grid/grid_mapping.cuh -------------------------------------------------------------------------------- /3rdparty/cub/grid/grid_queue.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/grid/grid_queue.cuh -------------------------------------------------------------------------------- /3rdparty/cub/host/mutex.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/host/mutex.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/arg_index_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/iterator/arg_index_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/cache_modified_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/iterator/cache_modified_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/constant_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/iterator/constant_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/counting_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/iterator/counting_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/discard_output_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/iterator/discard_output_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/tex_obj_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/iterator/tex_obj_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/tex_ref_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/iterator/tex_ref_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/iterator/transform_input_iterator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/iterator/transform_input_iterator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_load.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/thread/thread_load.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_operators.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/thread/thread_operators.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/thread/thread_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/thread/thread_scan.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_search.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/thread/thread_search.cuh -------------------------------------------------------------------------------- /3rdparty/cub/thread/thread_store.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/thread/thread_store.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_allocator.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/util_allocator.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_arch.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/util_arch.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_debug.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/util_debug.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_device.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/util_device.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_macro.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/util_macro.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_namespace.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/util_namespace.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_ptx.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/util_ptx.cuh -------------------------------------------------------------------------------- /3rdparty/cub/util_type.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/util_type.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/specializations/warp_reduce_shfl.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/warp/specializations/warp_reduce_shfl.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/specializations/warp_reduce_smem.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/warp/specializations/warp_reduce_smem.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/specializations/warp_scan_shfl.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/warp/specializations/warp_scan_shfl.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/specializations/warp_scan_smem.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/warp/specializations/warp_scan_smem.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/warp_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/warp/warp_reduce.cuh -------------------------------------------------------------------------------- /3rdparty/cub/warp/warp_scan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/cub/warp/warp_scan.cuh -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/compute.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/compute.cuh -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/conv1x1.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/conv1x1.cuh -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/conv1x1_interface.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/conv1x1_interface.hpp -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/dma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/dma.cuh -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/fp8_gemm_1x1.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/fp8_gemm_1x1.h -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/fp8_qgmma_1x1_utils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/fp8_qgmma_1x1_utils.cu -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/fp8_qgmma_1x1_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/fp8_qgmma_1x1_utils.h -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/parse_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/parse_profile.py -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/scheduler.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/scheduler.cuh -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/sharedCubinLoader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/sharedCubinLoader.h -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/tile_profile.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/tile_profile.cuh -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/traits.h -------------------------------------------------------------------------------- /3rdparty/fp8_qgmma_1x1/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/fp8_qgmma_1x1/utils.h -------------------------------------------------------------------------------- /3rdparty/trt_fp8_fmha/fused_multihead_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/trt_fp8_fmha/fused_multihead_attention.h -------------------------------------------------------------------------------- /3rdparty/trt_fused_multihead_attention/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/trt_fused_multihead_attention/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/trt_fused_multihead_attention/common.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/trt_fused_multihead_attention/common.cuh -------------------------------------------------------------------------------- /3rdparty/trt_fused_multihead_attention/qkvToContext.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/trt_fused_multihead_attention/qkvToContext.cu -------------------------------------------------------------------------------- /3rdparty/trt_fused_multihead_attention/qkvToContext.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/3rdparty/trt_fused_multihead_attention/qkvToContext.h -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/README.md -------------------------------------------------------------------------------- /benchmarks/bert/pyt_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/bert/pyt_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/pyt_int8_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/bert/pyt_int8_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/pyt_sp_fp16_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/bert/pyt_sp_fp16_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/pyt_sp_int8_mode2_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/bert/pyt_sp_int8_mode2_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/pyt_tp_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/bert/pyt_tp_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/tf_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/bert/tf_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bert/tf_int8_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/bert/tf_int8_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/decoding/tf_decoding_beamsearch_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/decoding/tf_decoding_beamsearch_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/decoding/tf_decoding_sampling_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/decoding/tf_decoding_sampling_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/gpt/cpp_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/gpt/cpp_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/t5/pyt_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/benchmarks/t5/pyt_benchmark.sh -------------------------------------------------------------------------------- /cmake/FasterTransformerConfig.cmake.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/cmake/FasterTransformerConfig.cmake.in -------------------------------------------------------------------------------- /cmake/Modules/FindCUDNN.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/cmake/Modules/FindCUDNN.cmake -------------------------------------------------------------------------------- /cmake/Modules/FindNCCL.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/cmake/Modules/FindNCCL.cmake -------------------------------------------------------------------------------- /docker/Dockerfile.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docker/Dockerfile.tf -------------------------------------------------------------------------------- /docker/Dockerfile.tf2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docker/Dockerfile.tf2 -------------------------------------------------------------------------------- /docker/Dockerfile.torch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docker/Dockerfile.torch -------------------------------------------------------------------------------- /docs/QAList.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/QAList.md -------------------------------------------------------------------------------- /docs/bart_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/bart_guide.md -------------------------------------------------------------------------------- /docs/bert_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/bert_guide.md -------------------------------------------------------------------------------- /docs/deberta_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/deberta_guide.md -------------------------------------------------------------------------------- /docs/decoder_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/decoder_guide.md -------------------------------------------------------------------------------- /docs/gpt_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/gpt_guide.md -------------------------------------------------------------------------------- /docs/gptj_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/gptj_guide.md -------------------------------------------------------------------------------- /docs/gptneox_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/gptneox_guide.md -------------------------------------------------------------------------------- /docs/images/FP-swin-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/FP-swin-flowchart.png -------------------------------------------------------------------------------- /docs/images/FT_Encoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/FT_Encoder_T4.png -------------------------------------------------------------------------------- /docs/images/FT_GPT_A100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/FT_GPT_A100.png -------------------------------------------------------------------------------- /docs/images/INT8-swin-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/INT8-swin-flowchart.png -------------------------------------------------------------------------------- /docs/images/PyTorch_Encoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/PyTorch_Encoder_T4.png -------------------------------------------------------------------------------- /docs/images/Py_Decoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/Py_Decoder_T4.png -------------------------------------------------------------------------------- /docs/images/Py_Encoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/Py_Encoder_T4.png -------------------------------------------------------------------------------- /docs/images/TF_Decoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/TF_Decoder_T4.png -------------------------------------------------------------------------------- /docs/images/TF_Encoder_T4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/TF_Encoder_T4.png -------------------------------------------------------------------------------- /docs/images/decoding/decoding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/decoding/decoding.png -------------------------------------------------------------------------------- /docs/images/effective_transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/effective_transformer.png -------------------------------------------------------------------------------- /docs/images/encoder-decoding-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/encoder-decoding-2.png -------------------------------------------------------------------------------- /docs/images/encoder_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/encoder_flowchart.png -------------------------------------------------------------------------------- /docs/images/gpt/Megatron_530B_benchmark_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/Megatron_530B_benchmark_1.png -------------------------------------------------------------------------------- /docs/images/gpt/Megatron_530B_benchmark_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/Megatron_530B_benchmark_2.png -------------------------------------------------------------------------------- /docs/images/gpt/Megatron_530B_benchmark_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/Megatron_530B_benchmark_3.png -------------------------------------------------------------------------------- /docs/images/gpt/Megatron_530B_benchmark_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/Megatron_530B_benchmark_4.png -------------------------------------------------------------------------------- /docs/images/gpt/SmoothQuant_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/SmoothQuant_workflow.png -------------------------------------------------------------------------------- /docs/images/gpt/gpt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/gpt.png -------------------------------------------------------------------------------- /docs/images/gpt/gpt_context.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/gpt_context.png -------------------------------------------------------------------------------- /docs/images/gpt/gpt_interactive_generation.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/gpt_interactive_generation.0.png -------------------------------------------------------------------------------- /docs/images/gpt/gpt_interactive_generation.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/gpt_interactive_generation.1.png -------------------------------------------------------------------------------- /docs/images/gpt/gpt_interactive_generation.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/gpt_interactive_generation.2.png -------------------------------------------------------------------------------- /docs/images/gpt/parallelgpt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt/parallelgpt.png -------------------------------------------------------------------------------- /docs/images/gpt_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/gpt_flowchart.png -------------------------------------------------------------------------------- /docs/images/longformer_compute_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/longformer_compute_flow.png -------------------------------------------------------------------------------- /docs/images/vit/vit-FMHA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/vit/vit-FMHA.png -------------------------------------------------------------------------------- /docs/images/vit/vit-fp32-fp16-compute-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/vit/vit-fp32-fp16-compute-flow.png -------------------------------------------------------------------------------- /docs/images/workflow-of-int8-inference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/workflow-of-int8-inference.png -------------------------------------------------------------------------------- /docs/images/xlnet_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/images/xlnet_flowchart.png -------------------------------------------------------------------------------- /docs/longformer_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/longformer_guide.md -------------------------------------------------------------------------------- /docs/models/megatron-345m-model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/models/megatron-345m-model.md -------------------------------------------------------------------------------- /docs/models/megatron-530b-model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/models/megatron-530b-model.md -------------------------------------------------------------------------------- /docs/swin_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/swin_guide.md -------------------------------------------------------------------------------- /docs/t5_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/t5_guide.md -------------------------------------------------------------------------------- /docs/vit_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/vit_guide.md -------------------------------------------------------------------------------- /docs/xlnet_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/docs/xlnet_guide.md -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/CMakeLists.txt -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/bert/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/bert/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/bert/bert_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/bert/bert_config.ini -------------------------------------------------------------------------------- /examples/cpp/bert/bert_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/bert/bert_example.cc -------------------------------------------------------------------------------- /examples/cpp/bert/bert_triton_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/bert/bert_triton_example.cc -------------------------------------------------------------------------------- /examples/cpp/bert_fp8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/bert_fp8/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/bert_fp8/bert_fp8_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/bert_fp8/bert_fp8_example.cc -------------------------------------------------------------------------------- /examples/cpp/bert_fp8/bert_fp8_example_squad.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/bert_fp8/bert_fp8_example_squad.cc -------------------------------------------------------------------------------- /examples/cpp/bert_int8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/bert_int8/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/bert_int8/bert_int8_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/bert_int8/bert_int8_example.cc -------------------------------------------------------------------------------- /examples/cpp/decoding/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/decoding/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/decoding/decoding_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/decoding/decoding_example.cc -------------------------------------------------------------------------------- /examples/cpp/decoding/layernorm_test.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/decoding/layernorm_test.cc -------------------------------------------------------------------------------- /examples/cpp/gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gpt/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/gpt/gpt_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gpt/gpt_config.ini -------------------------------------------------------------------------------- /examples/cpp/gpt/gpt_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gpt/gpt_example.cc -------------------------------------------------------------------------------- /examples/cpp/gpt/start_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gpt/start_ids.csv -------------------------------------------------------------------------------- /examples/cpp/gpt_fp8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gpt_fp8/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/gpt_fp8/gpt_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gpt_fp8/gpt_config.ini -------------------------------------------------------------------------------- /examples/cpp/gpt_fp8/gpt_fp8_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gpt_fp8/gpt_fp8_example.cc -------------------------------------------------------------------------------- /examples/cpp/gpt_fp8/gpt_fp8_triton_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gpt_fp8/gpt_fp8_triton_example.cc -------------------------------------------------------------------------------- /examples/cpp/gpt_fp8/start_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gpt_fp8/start_ids.csv -------------------------------------------------------------------------------- /examples/cpp/gptj/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptj/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/gptj/bad_words.csv: -------------------------------------------------------------------------------- 1 | 7768,3908 2 | 1,2 3 | -------------------------------------------------------------------------------- /examples/cpp/gptj/gptj_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptj/gptj_config.ini -------------------------------------------------------------------------------- /examples/cpp/gptj/gptj_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptj/gptj_example.cc -------------------------------------------------------------------------------- /examples/cpp/gptj/gptj_triton_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptj/gptj_triton_example.cc -------------------------------------------------------------------------------- /examples/cpp/gptj/start_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptj/start_ids.csv -------------------------------------------------------------------------------- /examples/cpp/gptj/stop_words.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptj/stop_words.csv -------------------------------------------------------------------------------- /examples/cpp/gptneox/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptneox/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/gptneox/bad_words.csv: -------------------------------------------------------------------------------- 1 | 7768,3908 2 | 1,2 3 | -------------------------------------------------------------------------------- /examples/cpp/gptneox/gptneox_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptneox/gptneox_config.ini -------------------------------------------------------------------------------- /examples/cpp/gptneox/gptneox_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptneox/gptneox_example.cc -------------------------------------------------------------------------------- /examples/cpp/gptneox/gptneox_triton_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptneox/gptneox_triton_example.cc -------------------------------------------------------------------------------- /examples/cpp/gptneox/start_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptneox/start_ids.csv -------------------------------------------------------------------------------- /examples/cpp/gptneox/stop_words.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/gptneox/stop_words.csv -------------------------------------------------------------------------------- /examples/cpp/llama/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/llama/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/llama/bad_words.csv: -------------------------------------------------------------------------------- 1 | 7768,3908 2 | 1,2 3 | -------------------------------------------------------------------------------- /examples/cpp/llama/check_with_huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/llama/check_with_huggingface.py -------------------------------------------------------------------------------- /examples/cpp/llama/huggingface_llama_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/llama/huggingface_llama_convert.py -------------------------------------------------------------------------------- /examples/cpp/llama/llama_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/llama/llama_config.ini -------------------------------------------------------------------------------- /examples/cpp/llama/llama_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/llama/llama_example.cc -------------------------------------------------------------------------------- /examples/cpp/llama/llama_triton_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/llama/llama_triton_example.cc -------------------------------------------------------------------------------- /examples/cpp/llama/model_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/llama/model_config.json -------------------------------------------------------------------------------- /examples/cpp/llama/start_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/llama/start_ids.csv -------------------------------------------------------------------------------- /examples/cpp/llama/stop_words.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/llama/stop_words.csv -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/concat_interactive_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/concat_interactive_ids.csv -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/gpt_config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/gpt_config.ini -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/gpt_example_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/gpt_example_utils.cc -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/gpt_example_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/gpt_example_utils.h -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/interactive_inputs_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/interactive_inputs_ids.csv -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/multi_gpu_gpt_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/multi_gpu_gpt_example.cc -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/start_ids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/start_ids.csv -------------------------------------------------------------------------------- /examples/cpp/multi_gpu_gpt/start_ids_opt.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/multi_gpu_gpt/start_ids_opt.csv -------------------------------------------------------------------------------- /examples/cpp/swin/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/swin/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/swin/functions.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/swin/functions.h -------------------------------------------------------------------------------- /examples/cpp/swin/swin_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/swin/swin_example.cc -------------------------------------------------------------------------------- /examples/cpp/swin_int8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/swin_int8/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/swin_int8/swin_int8_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/swin_int8/swin_int8_example.cc -------------------------------------------------------------------------------- /examples/cpp/vit/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/vit/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/vit/vit_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/vit/vit_example.cc -------------------------------------------------------------------------------- /examples/cpp/vit_int8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/vit_int8/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/vit_int8/vit_int8_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/vit_int8/vit_int8_example.cc -------------------------------------------------------------------------------- /examples/cpp/wenet/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/wenet/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/wenet/wenet_decoder_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/wenet/wenet_decoder_example.cc -------------------------------------------------------------------------------- /examples/cpp/wenet/wenet_encoder_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/wenet/wenet_encoder_example.cc -------------------------------------------------------------------------------- /examples/cpp/xlnet/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/xlnet/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/xlnet/cnpy.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/xlnet/cnpy.cpp -------------------------------------------------------------------------------- /examples/cpp/xlnet/cnpy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/xlnet/cnpy.h -------------------------------------------------------------------------------- /examples/cpp/xlnet/xlnet_correctness_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/xlnet/xlnet_correctness_example.cc -------------------------------------------------------------------------------- /examples/cpp/xlnet/xlnet_example.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/cpp/xlnet/xlnet_example.cc -------------------------------------------------------------------------------- /examples/onnx/multi_gpu_gpt/onnx_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/onnx/multi_gpu_gpt/onnx_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/pytorch/bart/bart.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bart/bart.ipynb -------------------------------------------------------------------------------- /examples/pytorch/bart/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bart/requirement.txt -------------------------------------------------------------------------------- /examples/pytorch/bart/translate_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bart/translate_example.py -------------------------------------------------------------------------------- /examples/pytorch/bart/utils/ft_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bart/utils/ft_decoding.py -------------------------------------------------------------------------------- /examples/pytorch/bart/utils/ft_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bart/utils/ft_encoder.py -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/bert-quantization-sparsity/NOTICE -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/checkpoints/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/pytorch/bert/bert-quantization-sparsity/processors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/pytorch/bert/bert_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/bert_example.py -------------------------------------------------------------------------------- /examples/pytorch/bert/run_glue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/run_glue.py -------------------------------------------------------------------------------- /examples/pytorch/bert/run_squad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/run_squad.py -------------------------------------------------------------------------------- /examples/pytorch/bert/scripts/run_mrpc.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/scripts/run_mrpc.sh -------------------------------------------------------------------------------- /examples/pytorch/bert/scripts/run_squad.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/scripts/run_squad.sh -------------------------------------------------------------------------------- /examples/pytorch/bert/utils/checkpoint_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/utils/checkpoint_quantization.py -------------------------------------------------------------------------------- /examples/pytorch/bert/utils/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/utils/encoder.py -------------------------------------------------------------------------------- /examples/pytorch/bert/utils/get_mrpc_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/utils/get_mrpc_data.py -------------------------------------------------------------------------------- /examples/pytorch/bert/utils/huggingface_bert_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/utils/huggingface_bert_convert.py -------------------------------------------------------------------------------- /examples/pytorch/bert/utils/modeling_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/utils/modeling_bert.py -------------------------------------------------------------------------------- /examples/pytorch/bert/utils/update_bert_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/bert/utils/update_bert_config.py -------------------------------------------------------------------------------- /examples/pytorch/decoder/decoder_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoder/decoder_example.py -------------------------------------------------------------------------------- /examples/pytorch/decoder/utils/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoder/utils/decoder.py -------------------------------------------------------------------------------- /examples/pytorch/decoder/utils/ft_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoder/utils/ft_decoder.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/decoding_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/decoding_example.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/translate_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/translate_example.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/utils/__init__.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/bleu_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/utils/bleu_score.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/utils/decoding.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/download_model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/utils/download_model.sh -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/ft_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/utils/ft_decoding.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/recover_bpe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/utils/recover_bpe.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/translation/test.de: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/utils/translation/test.de -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/translation/test.en: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/utils/translation/test.en -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/translation_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/utils/translation_model.py -------------------------------------------------------------------------------- /examples/pytorch/decoding/utils/translator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/decoding/utils/translator.py -------------------------------------------------------------------------------- /examples/pytorch/encoder/encoder_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/encoder/encoder_example.py -------------------------------------------------------------------------------- /examples/pytorch/encoder/utils/ft_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/encoder/utils/ft_encoder.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/bloom_lambada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/bloom_lambada.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/duplicate_input_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/duplicate_input_ids.txt -------------------------------------------------------------------------------- /examples/pytorch/gpt/evaluate_zeroshot_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/evaluate_zeroshot_gpt.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/gpt_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/gpt_example.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/gpt_summarization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/gpt_summarization.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/lambada_task_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/lambada_task_example.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/multi_gpu_gpt_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/multi_gpu_gpt_example.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/opt_summarization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/opt_summarization.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/requirement.txt -------------------------------------------------------------------------------- /examples/pytorch/gpt/scripts/evaluate_zeroshot_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/scripts/evaluate_zeroshot_gpt.sh -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/bloom.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/comm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/comm.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/generate_gpt_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/generate_gpt_config.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/generate_start_ids.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/generate_start_ids.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/gpt.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/gpt_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/gpt_decoder.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/gpt_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/gpt_fp8.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/gpt_token_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/gpt_token_converter.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/huggingface_bloom_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/huggingface_bloom_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/huggingface_gpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/huggingface_gpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/huggingface_opt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/huggingface_opt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/megatron_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/megatron_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/megatron_fp8_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/megatron_fp8_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/nemo_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/nemo_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/parallel_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/parallel_gpt.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/profiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/profiler.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/tokenizer.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/update_gpt_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/update_gpt_config.py -------------------------------------------------------------------------------- /examples/pytorch/gpt/utils/word_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gpt/utils/word_list.py -------------------------------------------------------------------------------- /examples/pytorch/gptj/utils/generate_gptj_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gptj/utils/generate_gptj_config.py -------------------------------------------------------------------------------- /examples/pytorch/gptj/utils/gptj_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gptj/utils/gptj_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/gptj/utils/reference_gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gptj/utils/reference_gptj.py -------------------------------------------------------------------------------- /examples/pytorch/gptneox/gptneox_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gptneox/gptneox_example.py -------------------------------------------------------------------------------- /examples/pytorch/gptneox/utils/gptneox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gptneox/utils/gptneox.py -------------------------------------------------------------------------------- /examples/pytorch/gptneox/utils/hftokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/gptneox/utils/hftokenizer.py -------------------------------------------------------------------------------- /examples/pytorch/longformer/longformer_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/longformer/longformer_qa.py -------------------------------------------------------------------------------- /examples/pytorch/longformer/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/longformer/model.py -------------------------------------------------------------------------------- /examples/pytorch/nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/nemo.py -------------------------------------------------------------------------------- /examples/pytorch/swin/SwinTransformerINT8Weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/SwinTransformerINT8Weight.py -------------------------------------------------------------------------------- /examples/pytorch/swin/checkpoint_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/checkpoint_quantization.py -------------------------------------------------------------------------------- /examples/pytorch/swin/infer_swintransformer_acc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/infer_swintransformer_acc.py -------------------------------------------------------------------------------- /examples/pytorch/swin/infer_swintransformer_int8_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/infer_swintransformer_int8_op.py -------------------------------------------------------------------------------- /examples/pytorch/swin/infer_swintransformer_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/infer_swintransformer_op.py -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test_fp16_accuracy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/run_test_fp16_accuracy.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test_fp32_accuracy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/run_test_fp32_accuracy.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test_v1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/run_test_v1.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test_v1_int8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/run_test_v1_int8.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test_v1_int8_accuracy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/run_test_v1_int8_accuracy.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test_v2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/run_test_v2.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test_v2_int8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/run_test_v2_int8.sh -------------------------------------------------------------------------------- /examples/pytorch/swin/run_test_v2_int8_accuracy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/swin/run_test_v2_int8_accuracy.sh -------------------------------------------------------------------------------- /examples/pytorch/t5/mnli_task_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/mnli_task_example.py -------------------------------------------------------------------------------- /examples/pytorch/t5/perf_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/perf_benchmark.py -------------------------------------------------------------------------------- /examples/pytorch/t5/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/requirement.txt -------------------------------------------------------------------------------- /examples/pytorch/t5/summarization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/summarization.py -------------------------------------------------------------------------------- /examples/pytorch/t5/translate_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/translate_example.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/ft_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/utils/ft_decoding.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/ft_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/utils/ft_encoder.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/megatron_t5_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/utils/megatron_t5_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/nemo_t5_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/utils/nemo_t5_ckpt_convert.py -------------------------------------------------------------------------------- /examples/pytorch/t5/utils/nemo_t5_ia3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/utils/nemo_t5_ia3.py -------------------------------------------------------------------------------- /examples/pytorch/t5/xnli_task_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/t5/xnli_task_example.py -------------------------------------------------------------------------------- /examples/pytorch/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/tokenizer.py -------------------------------------------------------------------------------- /examples/pytorch/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/utils.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/README.md -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/calib.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/calib.sh -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/config.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/data.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/eval.sh -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/eval_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/eval_engine.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/eval_int8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/eval_int8.sh -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/main.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/qat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/qat.sh -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/quant_utils.py -------------------------------------------------------------------------------- /examples/pytorch/vit/ViT-quantization/vit_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/ViT-quantization/vit_int8.py -------------------------------------------------------------------------------- /examples/pytorch/vit/VisionTransformerWeightLoader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/VisionTransformerWeightLoader.py -------------------------------------------------------------------------------- /examples/pytorch/vit/checkpoint_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/checkpoint_quantization.py -------------------------------------------------------------------------------- /examples/pytorch/vit/infer_visiontransformer_int8_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/infer_visiontransformer_int8_op.py -------------------------------------------------------------------------------- /examples/pytorch/vit/infer_visiontransformer_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/infer_visiontransformer_op.py -------------------------------------------------------------------------------- /examples/pytorch/vit/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/requirement.txt -------------------------------------------------------------------------------- /examples/pytorch/vit/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/run.sh -------------------------------------------------------------------------------- /examples/pytorch/vit/run2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/run2.sh -------------------------------------------------------------------------------- /examples/pytorch/vit/run_int8_accuracy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/pytorch/vit/run_int8_accuracy.sh -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/Dockerfile -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/LICENSE -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/NOTICE -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/README.md -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/__init__.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/modeling.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert-quantization/run_squad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/bert-quantization/run_squad.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/bert_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/bert_example.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/__init__.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/my_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/my_modeling.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/tensorflow_bert/sample.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/tensorflow_bert/sample.md -------------------------------------------------------------------------------- /examples/tensorflow/bert/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/utils/__init__.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/utils/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/utils/bert.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/utils/common.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/utils/position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/utils/position.py -------------------------------------------------------------------------------- /examples/tensorflow/bert/utils/reducer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/bert/utils/reducer.py -------------------------------------------------------------------------------- /examples/tensorflow/ckpt_type_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/ckpt_type_convert.py -------------------------------------------------------------------------------- /examples/tensorflow/common_utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/common_utils/common.py -------------------------------------------------------------------------------- /examples/tensorflow/common_utils/position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/common_utils/position.py -------------------------------------------------------------------------------- /examples/tensorflow/common_utils/reducer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/common_utils/reducer.py -------------------------------------------------------------------------------- /examples/tensorflow/deberta/deberta_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/deberta/deberta_example.py -------------------------------------------------------------------------------- /examples/tensorflow/deberta/requirement.txt: -------------------------------------------------------------------------------- 1 | SentencePiece~=0.1.96 2 | transformers~=4.20.1 3 | numpy -------------------------------------------------------------------------------- /examples/tensorflow/deberta/utils/ft_deberta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/deberta/utils/ft_deberta.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/decoder_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoder/decoder_example.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/beam_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/beam_search.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/common.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/decoder.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/decoding.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/position.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/reducer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/reducer.py -------------------------------------------------------------------------------- /examples/tensorflow/decoder/utils/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoder/utils/sampling.py -------------------------------------------------------------------------------- /examples/tensorflow/decoding/decoding_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoding/decoding_example.py -------------------------------------------------------------------------------- /examples/tensorflow/decoding/translate_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoding/translate_example.py -------------------------------------------------------------------------------- /examples/tensorflow/decoding/utils/bleu_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoding/utils/bleu_score.py -------------------------------------------------------------------------------- /examples/tensorflow/decoding/utils/ft_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoding/utils/ft_decoding.py -------------------------------------------------------------------------------- /examples/tensorflow/decoding/utils/translation/test.de: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoding/utils/translation/test.de -------------------------------------------------------------------------------- /examples/tensorflow/decoding/utils/translation/test.en: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/decoding/utils/translation/test.en -------------------------------------------------------------------------------- /examples/tensorflow/encoder/encoder_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/encoder/encoder_example.py -------------------------------------------------------------------------------- /examples/tensorflow/encoder/utils/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/encoder/utils/encoder.py -------------------------------------------------------------------------------- /examples/tensorflow/gpt/gpt_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/gpt/gpt_example.py -------------------------------------------------------------------------------- /examples/tensorflow/gpt/utils/download_gpt2_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/gpt/utils/download_gpt2_model.py -------------------------------------------------------------------------------- /examples/tensorflow/gpt/utils/gpt_token_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/gpt/utils/gpt_token_converter.py -------------------------------------------------------------------------------- /examples/tensorflow/gpt/utils/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/gpt/utils/gpt_token_encoder.py -------------------------------------------------------------------------------- /examples/tensorflow/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/requirement.txt -------------------------------------------------------------------------------- /examples/tensorflow/t5/requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/t5/requirement.txt -------------------------------------------------------------------------------- /examples/tensorflow/t5/translate_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/t5/translate_example.py -------------------------------------------------------------------------------- /examples/tensorflow/t5/utils/ft_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/t5/utils/ft_decoding.py -------------------------------------------------------------------------------- /examples/tensorflow/t5/utils/ft_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/t5/utils/ft_encoder.py -------------------------------------------------------------------------------- /examples/tensorflow/t5/utils/jax_t5_ckpt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/t5/utils/jax_t5_ckpt_convert.py -------------------------------------------------------------------------------- /examples/tensorflow/t5/utils/ul2_config.template: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/t5/utils/ul2_config.template -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/convertInput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/xlnet/convertInput.py -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/convertModel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/xlnet/convertModel.py -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/downloadModel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/xlnet/downloadModel.sh -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/xlnet/modeling.py -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/runData.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/xlnet/runData.py -------------------------------------------------------------------------------- /examples/tensorflow/xlnet/verifyCorrectness.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorflow/xlnet/verifyCorrectness.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/builder.py -------------------------------------------------------------------------------- /examples/tensorrt/swin/builder_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/builder_int8.py -------------------------------------------------------------------------------- /examples/tensorrt/swin/infer_swintransformer_plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/infer_swintransformer_plugin.py -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_builder_fp16_v1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/run_builder_fp16_v1.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_builder_fp16_v2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/run_builder_fp16_v2.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_builder_fp32_v1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/run_builder_fp32_v1.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_builder_fp32_v2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/run_builder_fp32_v2.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_builder_int8_v1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/run_builder_int8_v1.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_builder_int8_v2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/run_builder_int8_v2.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_infer_fp16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/run_infer_fp16.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_infer_fp32.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/run_infer_fp32.sh -------------------------------------------------------------------------------- /examples/tensorrt/swin/run_infer_int8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/swin/run_infer_int8.sh -------------------------------------------------------------------------------- /examples/tensorrt/t5/createT5TestData.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/t5/createT5TestData.py -------------------------------------------------------------------------------- /examples/tensorrt/t5/extractT5ModelToBIN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/t5/extractT5ModelToBIN.py -------------------------------------------------------------------------------- /examples/tensorrt/t5/testT5Plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/t5/testT5Plugin.py -------------------------------------------------------------------------------- /examples/tensorrt/vit/infer_visiontransformer_plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/vit/infer_visiontransformer_plugin.py -------------------------------------------------------------------------------- /examples/tensorrt/vit/plugin_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/vit/plugin_loader.py -------------------------------------------------------------------------------- /examples/tensorrt/vit/plugin_loader_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/tensorrt/vit/plugin_loader_int8.py -------------------------------------------------------------------------------- /examples/utils/hf_detokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/utils/hf_detokenize.py -------------------------------------------------------------------------------- /examples/utils/hf_tokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/examples/utils/hf_tokenize.py -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/kernels/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/kernels/activation_fp8_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/activation_fp8_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/activation_fp8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/activation_fp8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/activation_int8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/activation_int8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/activation_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/activation_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/activation_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/activation_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/add_residual_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/add_residual_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/add_residual_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/add_residual_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/ban_bad_words.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/ban_bad_words.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/ban_bad_words.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/ban_bad_words.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/bert_preprocess_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/bert_preprocess_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/custom_ar_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/custom_ar_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/custom_ar_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/custom_ar_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/decoding_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/decoding_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/decoding_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/decoding_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/dequantize_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/dequantize_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/dequantize_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/dequantize_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/gen_relative_pos_bias.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/gen_relative_pos_bias.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/gen_relative_pos_bias.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/gen_relative_pos_bias.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/gpt_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/gpt_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/gpt_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/gpt_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/image_merge_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/image_merge_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/image_merge_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/image_merge_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/int8_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/int8_utils.cuh -------------------------------------------------------------------------------- /src/fastertransformer/kernels/layernorm_fp8_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/layernorm_fp8_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/layernorm_fp8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/layernorm_fp8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/layernorm_int8_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/layernorm_int8_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/layernorm_int8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/layernorm_int8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/layernorm_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/layernorm_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/layernorm_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/layernorm_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/logprob_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/logprob_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/logprob_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/logprob_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/longformer_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/longformer_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/longformer_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/longformer_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/moe_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/moe_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/moe_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/moe_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/normalize_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/normalize_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/normalize_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/normalize_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/penalty_types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/penalty_types.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/quantize_weight.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/quantize_weight.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/quantize_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/quantize_weight.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/reduce_kernel_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/reduce_kernel_utils.cuh -------------------------------------------------------------------------------- /src/fastertransformer/kernels/reverse_roll_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/reverse_roll_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/reverse_roll_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/reverse_roll_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/softmax_int8_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/softmax_int8_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/kernels/vit_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/vit_kernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/kernels/vit_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/kernels/vit_kernels.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/BaseLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/BaseLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/layers/DenseWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/DenseWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/DynamicDecodeLayer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/DynamicDecodeLayer.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/DynamicDecodeLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/DynamicDecodeLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnFP8Layer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/FfnFP8Layer.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnFP8Layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/FfnFP8Layer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnFP8Weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/FfnFP8Weight.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnINT8Weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/FfnINT8Weight.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnLayer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/FfnLayer.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/FfnLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnLayerINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/FfnLayerINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnLayerINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/FfnLayerINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/layers/FfnWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/layers/FfnWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/BaseWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/BaseWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/bart/BartDecoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bart/BartDecoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bart/BartDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bart/BartDecoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bart/BartDecoding.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bart/BartDecoding.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bart/BartDecoding.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bart/BartDecoding.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bart/BartEncoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bart/BartEncoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bart/BartEncoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bart/BartEncoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bart/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bart/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/Bert.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert/Bert.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/Bert.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert/Bert.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/BertLayerWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert/BertLayerWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/BertLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert/BertLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/BertWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert/BertWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/BertWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert/BertWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/bert/bert_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert/bert_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_fp8/BertFP8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert_fp8/BertFP8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_fp8/BertFP8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert_fp8/BertFP8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_fp8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert_fp8/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_fp8/serialize.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert_fp8/serialize.hpp -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_int8/BertINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert_int8/BertINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/bert_int8/BertINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/bert_int8/BertINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/deberta/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/deberta/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/deberta/Deberta.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/deberta/Deberta.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/deberta/Deberta.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/deberta/Deberta.h -------------------------------------------------------------------------------- /src/fastertransformer/models/deberta/DebertaWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/deberta/DebertaWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/decoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/decoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/decoder/Decoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/decoder/Decoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/decoder/Decoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/decoder/Decoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/decoding/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/decoding/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/decoding/Decoding.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/decoding/Decoding.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/decoding/Decoding.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/decoding/Decoding.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt_fp8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gpt_fp8/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt_fp8/GptFP8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gpt_fp8/GptFP8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt_fp8/GptFP8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gpt_fp8/GptFP8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt_fp8/GptFP8Decoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gpt_fp8/GptFP8Decoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt_fp8/GptFP8Weight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gpt_fp8/GptFP8Weight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt_fp8/GptFP8Weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gpt_fp8/GptFP8Weight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gpt_fp8/gpt_fp8_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gpt_fp8/gpt_fp8_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptj/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJ.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJ.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJ.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJ.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJDecoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJDecoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJDecoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gptj/GptJWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptj/GptJWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gptneox/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptneox/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/gptneox/GptNeoX.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptneox/GptNeoX.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/gptneox/GptNeoX.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptneox/GptNeoX.h -------------------------------------------------------------------------------- /src/fastertransformer/models/gptneox/GptNeoXWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/gptneox/GptNeoXWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/llama/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/llama/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/llama/Llama.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/llama/Llama.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/llama/Llama.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/llama/Llama.h -------------------------------------------------------------------------------- /src/fastertransformer/models/llama/LlamaDecoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/llama/LlamaDecoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/llama/LlamaDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/llama/LlamaDecoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/llama/LlamaWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/llama/LlamaWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/llama/LlamaWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/llama/LlamaWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/Swin.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin/Swin.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/Swin.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin/Swin.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/SwinBasicLayer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin/SwinBasicLayer.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/SwinBasicLayer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin/SwinBasicLayer.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/SwinBlock.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin/SwinBlock.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/SwinBlock.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin/SwinBlock.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/SwinWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin/SwinWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/swin/swin_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin/swin_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin_int8/SwinINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin_int8/SwinINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/swin_int8/SwinINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/swin_int8/SwinINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5AdapterWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5AdapterWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5AdapterWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5AdapterWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Decoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Decoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Decoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Decoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Decoding.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Decoding.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Decoding.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Decoding.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5DecodingWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5DecodingWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5DecodingWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5DecodingWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Encoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Encoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5Encoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5Encoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5EncoderWeight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5EncoderWeight.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/T5EncoderWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/T5EncoderWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/t5/t5_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/t5/t5_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/vit/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/ViT.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/vit/ViT.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/ViT.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/vit/ViT.h -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/ViTLayerWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/vit/ViTLayerWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/ViTWeight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/vit/ViTWeight.h -------------------------------------------------------------------------------- /src/fastertransformer/models/vit/vit_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/vit/vit_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/vit_int8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/vit_int8/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/vit_int8/ViTINT8.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/vit_int8/ViTINT8.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/vit_int8/ViTINT8.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/vit_int8/ViTINT8.h -------------------------------------------------------------------------------- /src/fastertransformer/models/wenet/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/wenet/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/wenet/WenetDecoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/wenet/WenetDecoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/wenet/WenetDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/wenet/WenetDecoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/wenet/WenetEncoder.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/wenet/WenetEncoder.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/wenet/WenetEncoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/wenet/WenetEncoder.h -------------------------------------------------------------------------------- /src/fastertransformer/models/wenet/WenetKernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/wenet/WenetKernels.cu -------------------------------------------------------------------------------- /src/fastertransformer/models/wenet/WenetKernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/wenet/WenetKernels.h -------------------------------------------------------------------------------- /src/fastertransformer/models/wenet/wenet_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/wenet/wenet_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/xlnet/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/xlnet/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/models/xlnet/Xlnet.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/xlnet/Xlnet.cc -------------------------------------------------------------------------------- /src/fastertransformer/models/xlnet/Xlnet.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/xlnet/Xlnet.h -------------------------------------------------------------------------------- /src/fastertransformer/models/xlnet/xlnet_gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/models/xlnet/xlnet_gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/t5/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/t5/README.md -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/t5/T5Plugin.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/t5/T5Plugin.cu -------------------------------------------------------------------------------- /src/fastertransformer/tensorrt_plugin/t5/T5Plugin.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tensorrt_plugin/t5/T5Plugin.h -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/BaseOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/BaseOp.h -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/bert/BertINT8Op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/bert/BertINT8Op.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/bert/BertOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/bert/BertOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/bert/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/bert/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/deberta/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/deberta/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/deberta/DebertaOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/deberta/DebertaOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/decoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/decoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/decoder/DecoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/decoder/DecoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/decoding/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/decoding/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/decoding/DecodingOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/decoding/DecodingOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/encoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/encoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/encoder/EncoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/encoder/EncoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/gpt/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/gpt/GptOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/gpt/GptOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/t5/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/t5/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/t5/T5DecodingOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/t5/T5DecodingOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/tf_op/t5/T5EncoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/tf_op/t5/T5EncoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bart/BartDecoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bart/BartDecoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bart/BartDecoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bart/BartDecoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bart/BartDecodingOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bart/BartDecodingOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bart/BartDecodingOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bart/BartDecodingOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bart/BartEncoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bart/BartEncoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bart/BartEncoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bart/BartEncoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bart/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bart/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/BertINT8Op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/BertINT8Op.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/BertINT8Op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/BertINT8Op.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/BertOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/BertOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/BertOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/BertOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/bert/WeightQuantizeOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/bert/WeightQuantizeOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/common/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/common/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/common/DynamicDecodeOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/common/DynamicDecodeOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/common/GptOps.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/common/GptOps.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/deberta/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/deberta/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/deberta/DebertaOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/deberta/DebertaOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/deberta/DebertaOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/deberta/DebertaOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/decoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoder/DecoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/decoder/DecoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoder/DecoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/decoder/DecoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoding/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/decoding/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoding/DecodingOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/decoding/DecodingOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoding/DecodingOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/decoding/DecodingOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoding/GatherTreeOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/decoding/GatherTreeOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/decoding/GatherTreeOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/decoding/GatherTreeOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/encoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/encoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/encoder/EncoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/encoder/EncoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/encoder/EncoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/encoder/EncoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/gpt/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gpt/GptOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/gpt/GptOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gpt/GptOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/gpt/GptOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gpt_fp8/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/gpt_fp8/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gpt_fp8/GptFp8Op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/gpt_fp8/GptFp8Op.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gpt_fp8/GptFp8Op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/gpt_fp8/GptFp8Op.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gptneox/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/gptneox/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gptneox/GptNeoXOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/gptneox/GptNeoXOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/gptneox/GptNeoXOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/gptneox/GptNeoXOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/SwinINT8Op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/SwinINT8Op.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/SwinINT8Op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/SwinINT8Op.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/SwinOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/SwinOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/SwinOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/SwinOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/swin/WeightQuantizeOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/swin/WeightQuantizeOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5DecoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5DecoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5DecoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5DecoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5DecodingOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5DecodingOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5DecodingOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5DecodingOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5EncoderOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5EncoderOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/t5/T5EncoderOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/t5/T5EncoderOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/th_utils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/th_utils.cu -------------------------------------------------------------------------------- /src/fastertransformer/th_op/th_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/th_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/ViTINT8Op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/ViTINT8Op.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/ViTINT8Op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/ViTINT8Op.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/ViTOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/ViTOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/ViTOp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/ViTOp.h -------------------------------------------------------------------------------- /src/fastertransformer/th_op/vit/WeightQuantizeOp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/th_op/vit/WeightQuantizeOp.cc -------------------------------------------------------------------------------- /src/fastertransformer/triton_backend/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/triton_backend/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/utils/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/utils/IA3.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/IA3.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/ScaleList.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/ScaleList.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/Tensor.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/Tensor.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/Tensor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/Tensor.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/activation_types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/activation_types.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/allocator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/allocator.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/conv2d.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/conv2d.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/convert_data_type.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/convert_data_type.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasAlgoMap.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cublasAlgoMap.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasAlgoMap.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cublasAlgoMap.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasFP8MMWrapper.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cublasFP8MMWrapper.cu -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasFP8MMWrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cublasFP8MMWrapper.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasINT8MMWrapper.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cublasINT8MMWrapper.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasINT8MMWrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cublasINT8MMWrapper.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasMMWrapper.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cublasMMWrapper.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/cublasMMWrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cublasMMWrapper.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cuda_bf16_fallbacks.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cuda_bf16_fallbacks.cuh -------------------------------------------------------------------------------- /src/fastertransformer/utils/cuda_bf16_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cuda_bf16_wrapper.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cuda_fp8_utils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cuda_fp8_utils.cu -------------------------------------------------------------------------------- /src/fastertransformer/utils/cuda_fp8_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cuda_fp8_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/cuda_type_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cuda_type_utils.cuh -------------------------------------------------------------------------------- /src/fastertransformer/utils/cuda_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cuda_utils.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/cuda_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/cuda_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/custom_ar_comm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/custom_ar_comm.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/custom_ar_comm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/custom_ar_comm.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/gemm.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/gemm.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/CMakeLists.txt -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/gemm_func.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/gemm_func.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/gemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/gemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gemm_test/t5_gemm_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/gemm_test/t5_gemm_func.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/gpu_buf.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/gpu_buf.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/logger.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/logger.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/logger.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/logger.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/memory_utils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/memory_utils.cu -------------------------------------------------------------------------------- /src/fastertransformer/utils/memory_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/memory_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/mpi_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/mpi_utils.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/mpi_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/mpi_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/nccl_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/nccl_utils.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/nccl_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/nccl_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/nvtx_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/nvtx_utils.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/nvtx_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/nvtx_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/prompt_learning.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/prompt_learning.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/string_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/string_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/test_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/test_utils.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/wenet_conv2d.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/wenet_conv2d.h -------------------------------------------------------------------------------- /src/fastertransformer/utils/word_list.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/word_list.cc -------------------------------------------------------------------------------- /src/fastertransformer/utils/word_list.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/src/fastertransformer/utils/word_list.h -------------------------------------------------------------------------------- /templates/adding_a_new_model/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/templates/adding_a_new_model/README.md -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/CMakeLists.txt -------------------------------------------------------------------------------- /tests/bert/tf_bert_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/bert/tf_bert_unit_test.py -------------------------------------------------------------------------------- /tests/bert/th_bert_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/bert/th_bert_unit_test.py -------------------------------------------------------------------------------- /tests/decoding/tf_decoding_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/decoding/tf_decoding_unit_test.py -------------------------------------------------------------------------------- /tests/gemm_dequantize/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/gemm_dequantize/CMakeLists.txt -------------------------------------------------------------------------------- /tests/gemm_dequantize/th_gemm_dequantize.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/gemm_dequantize/th_gemm_dequantize.cc -------------------------------------------------------------------------------- /tests/gemm_dequantize/th_gemm_dequantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/gemm_dequantize/th_gemm_dequantize.py -------------------------------------------------------------------------------- /tests/int8_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/int8_gemm/CMakeLists.txt -------------------------------------------------------------------------------- /tests/int8_gemm/int8_gemm_test.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/int8_gemm/int8_gemm_test.cu -------------------------------------------------------------------------------- /tests/longformer/py_longformer_unit_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/longformer/py_longformer_unit_test.py -------------------------------------------------------------------------------- /tests/moe/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/moe/CMakeLists.txt -------------------------------------------------------------------------------- /tests/moe/th_moe_ops.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/moe/th_moe_ops.cc -------------------------------------------------------------------------------- /tests/moe/th_moe_unit_tests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/moe/th_moe_unit_tests.py -------------------------------------------------------------------------------- /tests/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/CMakeLists.txt -------------------------------------------------------------------------------- /tests/unittests/fp8_gemm_test/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/fp8_gemm_test/Makefile -------------------------------------------------------------------------------- /tests/unittests/fp8_gemm_test/cuda_utils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/fp8_gemm_test/cuda_utils.cu -------------------------------------------------------------------------------- /tests/unittests/fp8_gemm_test/cuda_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/fp8_gemm_test/cuda_utils.h -------------------------------------------------------------------------------- /tests/unittests/fp8_gemm_test/main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/fp8_gemm_test/main.cpp -------------------------------------------------------------------------------- /tests/unittests/fp8_gemm_test/run_cublaslttest.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/fp8_gemm_test/run_cublaslttest.sh -------------------------------------------------------------------------------- /tests/unittests/fp8_gemm_test/worker.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/fp8_gemm_test/worker.cpp -------------------------------------------------------------------------------- /tests/unittests/fp8_gemm_test/worker.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/fp8_gemm_test/worker.hpp -------------------------------------------------------------------------------- /tests/unittests/gtest_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/gtest_utils.h -------------------------------------------------------------------------------- /tests/unittests/test_activation.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_activation.cu -------------------------------------------------------------------------------- /tests/unittests/test_attention_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_attention_kernels.cu -------------------------------------------------------------------------------- /tests/unittests/test_context_decoder_layer.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_context_decoder_layer.cu -------------------------------------------------------------------------------- /tests/unittests/test_gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_gemm.cu -------------------------------------------------------------------------------- /tests/unittests/test_gpt_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_gpt_kernels.cu -------------------------------------------------------------------------------- /tests/unittests/test_int8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_int8.cu -------------------------------------------------------------------------------- /tests/unittests/test_logprob_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_logprob_kernels.cu -------------------------------------------------------------------------------- /tests/unittests/test_penalty_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_penalty_kernels.cu -------------------------------------------------------------------------------- /tests/unittests/test_sampling.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_sampling.cu -------------------------------------------------------------------------------- /tests/unittests/test_sampling_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_sampling_kernels.cu -------------------------------------------------------------------------------- /tests/unittests/test_sampling_layer.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_sampling_layer.cu -------------------------------------------------------------------------------- /tests/unittests/test_tensor.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/test_tensor.cu -------------------------------------------------------------------------------- /tests/unittests/th_op/test_th_decode_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/th_op/test_th_decode_op.py -------------------------------------------------------------------------------- /tests/unittests/unittest_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/void-main/FasterTransformer/HEAD/tests/unittests/unittest_utils.h --------------------------------------------------------------------------------