├── .clang-format
├── .dockerignore
├── .github
    ├── PULL_REQUEST_TEMPLATE.md
    ├── runs-on.yml
    └── workflows
    │   ├── build.yml
    │   ├── clang-format-check.yml
    │   ├── docker-build.yml
    │   ├── gpu-ci.yml
    │   ├── helpers
    │       ├── free_space_on_runner.sh
    │       ├── gpu_ci_helper.py
    │       ├── install_cudnn.sh
    │       ├── install_dependencies.sh
    │       ├── install_nccl.sh
    │       └── oracle_con.py
    │   ├── pip-deploy.yml
    │   ├── pip-install.yml
    │   └── shell-check.yml
├── .gitignore
├── .gitmodules
├── .readthedocs.yaml
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── INSTALL.md
├── LICENSE
├── MANIFEST.in
├── MULTI-NODE.md
├── README.md
├── SERVE.md
├── cmake
    ├── cuda.cmake
    ├── cudnn.cmake
    ├── flash_attn.cmake
    ├── hip.cmake
    ├── json.cmake
    ├── legion.cmake
    ├── nccl.cmake
    ├── optional.cmake
    ├── pip_install
    │   └── CMakeLists.txt
    ├── utils.cmake
    └── variant.cmake
├── conda
    └── flexflow.yml
├── config
    ├── config.inc
    └── config.linux
├── docker
    ├── README.md
    ├── build.sh
    ├── flexflow-environment
    │   ├── Dockerfile
    │   └── install_pytorch.sh
    ├── flexflow
    │   └── Dockerfile
    ├── publish.sh
    ├── pull.sh
    └── run.sh
├── docs
    ├── Makefile
    ├── doxygen
    │   ├── Doxyfile
    │   ├── README.md
    │   └── theme
    │   │   ├── rust_customdoxygen.css
    │   │   ├── rust_footer.html
    │   │   └── rust_header.html
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── chatbot.rst
    │   ├── conf.py
    │   ├── cpp_api.rst
    │   ├── developers_guide
    │       ├── developers_guide.rst
    │       ├── ff_internals.rst
    │       └── internals.md
    │   ├── docker.rst
    │   ├── imgs
    │       ├── gradio_api.png
    │       └── gradio_interface.png
    │   ├── index.rst
    │   ├── installation.rst
    │   ├── keras.rst
    │   ├── mt5.rst
    │   ├── multinode.rst
    │   ├── onnx.rst
    │   ├── prompt_template.rst
    │   ├── python
    │       ├── create.rst
    │       ├── dataloader.rst
    │       ├── init.rst
    │       ├── layers.rst
    │       ├── models.rst
    │       └── train.rst
    │   ├── pytorch.rst
    │   ├── rag.rst
    │   ├── serve_api.rst
    │   ├── serve_fastapi.rst
    │   ├── serve_gradioapi.rst
    │   ├── serve_overview.rst
    │   ├── serve_usecases.rst
    │   ├── train_examples.rst
    │   ├── train_interface.rst
    │   ├── train_overview.rst
    │   ├── train_python_api.rst
    │   └── welcome.rst
├── img
    ├── overview.png
    ├── performance.png
    └── spec_infer_demo.gif
├── include
    └── flexflow
    │   ├── accessor.h
    │   ├── attention_config.h
    │   ├── basic_graph.h
    │   ├── batch_config.h
    │   ├── config.h
    │   ├── dataloader.h
    │   ├── device.h
    │   ├── dominators.h
    │   ├── ffconst.h
    │   ├── ffconst_utils.h
    │   ├── fftype.h
    │   ├── flash_api.h
    │   ├── flexflow_c.h
    │   ├── gpt_tokenizer.h
    │   ├── graph.h
    │   ├── graph_structures.h
    │   ├── inference.h
    │   ├── initializer.h
    │   ├── layer.h
    │   ├── loss_functions.h
    │   ├── machine_view.h
    │   ├── mapper.h
    │   ├── memory_optimization.h
    │   ├── metrics_functions.h
    │   ├── model.h
    │   ├── node.h
    │   ├── op_meta.h
    │   ├── operator.h
    │   ├── operator_params.h
    │   ├── ops
    │       ├── add_bias_residual_layer_norm.h
    │       ├── add_bias_residual_layer_norm_params.h
    │       ├── aggregate.h
    │       ├── aggregate_params.h
    │       ├── aggregate_spec.h
    │       ├── aggregate_spec_params.h
    │       ├── arg_topk.h
    │       ├── arg_topk_params.h
    │       ├── argmax.h
    │       ├── argmax_params.h
    │       ├── attention.h
    │       ├── attention_params.h
    │       ├── batch_matmul.h
    │       ├── batch_matmul_params.h
    │       ├── batch_norm.h
    │       ├── beam_topk.h
    │       ├── beam_topk_params.h
    │       ├── cache.h
    │       ├── cast.h
    │       ├── cast_params.h
    │       ├── concat.h
    │       ├── concat_params.h
    │       ├── conv_2d.h
    │       ├── conv_2d_params.h
    │       ├── dropout.h
    │       ├── dropout_params.h
    │       ├── element_binary.h
    │       ├── element_binary_params.h
    │       ├── element_unary.h
    │       ├── element_unary_params.h
    │       ├── embedding.h
    │       ├── embedding_params.h
    │       ├── experts.h
    │       ├── experts_params.h
    │       ├── flat.h
    │       ├── flat_params.h
    │       ├── fused.h
    │       ├── gather.h
    │       ├── gather_params.h
    │       ├── groupby.h
    │       ├── groupby_params.h
    │       ├── inc_multihead_self_attention.h
    │       ├── inc_multihead_self_attention_params.h
    │       ├── kernels
    │       │   ├── batch_matmul_kernels.h
    │       │   ├── cast_kernels.h
    │       │   ├── concat_kernels.h
    │       │   ├── conv_2d_kernels.h
    │       │   ├── decompress_kernels.h
    │       │   ├── dropout_kernels.h
    │       │   ├── element_binary_kernels.h
    │       │   ├── embedding_kernels.h
    │       │   ├── flat_kernels.h
    │       │   ├── gather_kernels.h
    │       │   ├── inc_multihead_self_attention_kernels.h
    │       │   ├── inc_multihead_self_attention_utils.cuh
    │       │   ├── linear_kernels.h
    │       │   ├── lora_linear_kernels.h
    │       │   ├── pool_2d_kernels.h
    │       │   ├── reshape_kernels.h
    │       │   ├── residual_rms_norm_kernels.h
    │       │   ├── rms_norm_kernels.h
    │       │   ├── softmax_kernels.h
    │       │   ├── split_kernels.h
    │       │   └── transpose_kernels.h
    │       ├── layer_norm.h
    │       ├── layer_norm_params.h
    │       ├── linear.h
    │       ├── linear_params.h
    │       ├── lora_linear.h
    │       ├── lora_linear_params.h
    │       ├── mean.h
    │       ├── noop.h
    │       ├── pool_2d.h
    │       ├── pool_2d_params.h
    │       ├── reduce.h
    │       ├── reduce_params.h
    │       ├── reshape.h
    │       ├── reshape_params.h
    │       ├── residual_layer_norm.h
    │       ├── residual_layer_norm_params.h
    │       ├── residual_rms_norm.h
    │       ├── residual_rms_norm_params.h
    │       ├── reverse.h
    │       ├── rms_norm.h
    │       ├── rms_norm_params.h
    │       ├── sampling.h
    │       ├── sampling_params.h
    │       ├── sigmoid_silu_multi.h
    │       ├── sigmoid_silu_multi_params.h
    │       ├── softmax.h
    │       ├── softmax_params.h
    │       ├── spec_inc_multihead_self_attention.h
    │       ├── spec_inc_multihead_self_attention_params.h
    │       ├── split.h
    │       ├── split_params.h
    │       ├── topk.h
    │       ├── topk_params.h
    │       ├── transpose.h
    │       ├── transpose_params.h
    │       ├── tree_inc_multihead_self_attention.h
    │       └── tree_inc_multihead_self_attention_params.h
    │   ├── optimizer.h
    │   ├── page_manager.h
    │   ├── parallel_ops
    │       ├── allreduce.h
    │       ├── allreduce_params.h
    │       ├── combine.h
    │       ├── combine_params.h
    │       ├── fused_parallel_op.h
    │       ├── fused_parallel_op_params.h
    │       ├── kernels
    │       │   ├── allreduce_kernels.h
    │       │   ├── combine_kernels.h
    │       │   ├── parallel_identity_kernels.h
    │       │   ├── partition_kernels.h
    │       │   ├── reduction_kernels.h
    │       │   └── replicate_kernels.h
    │       ├── parallel_identity.h
    │       ├── parallel_identity_params.h
    │       ├── parallel_op.h
    │       ├── parallel_op_info.h
    │       ├── partition.h
    │       ├── partition_params.h
    │       ├── reduction.h
    │       ├── reduction_params.h
    │       ├── replicate.h
    │       └── replicate_params.h
    │   ├── parallel_tensor.h
    │   ├── recompile.h
    │   ├── request_manager.h
    │   ├── runtime.h
    │   ├── simulator.h
    │   ├── substitution.h
    │   ├── substitution_loader.h
    │   ├── tensor.h
    │   └── utils
    │       ├── cuda_helper.h
    │       ├── disjoint_set.h
    │       ├── dot
    │           ├── dot_file.h
    │           └── record_formatter.h
    │       ├── file_loader.h
    │       ├── hash_utils.h
    │       ├── hip_helper.h
    │       ├── memory_allocator.h
    │       ├── peft_weight_allocator.h
    │       ├── random_utils.h
    │       ├── recursive_logger.h
    │       ├── test_utils.h
    │       └── tuple.h
├── inference
    ├── .gitignore
    ├── README.md
    ├── flexllm
    │   ├── CMakeLists.txt
    │   └── peft_train.cc
    ├── incr_decoding
    │   ├── CMakeLists.txt
    │   └── incr_decoding.cc
    ├── inference_wrapper.in
    ├── models
    │   ├── falcon.cc
    │   ├── falcon.h
    │   ├── llama.cc
    │   ├── llama.h
    │   ├── mpt.cc
    │   ├── mpt.h
    │   ├── opt.cc
    │   ├── opt.h
    │   ├── starcoder.cc
    │   └── starcoder.h
    ├── peft
    │   ├── CMakeLists.txt
    │   └── peft.cc
    ├── python
    │   ├── chat.py
    │   ├── entrypoint
    │   │   ├── fastapi_incr.py
    │   │   └── fastapi_specinfer.py
    │   ├── ff_peft.py
    │   ├── incr_decoding.py
    │   ├── peft_demo
    │   │   ├── INSTRUCTIONS.md
    │   │   ├── demo.ipynb
    │   │   └── demo.py
    │   ├── save_dataset.py
    │   ├── spec_infer.py
    │   ├── streamlit
    │   │   ├── README.md
    │   │   ├── app.py
    │   │   └── fastapi_incr.py
    │   └── usecases
    │   │   ├── gradio_incr.py
    │   │   ├── gradio_specinfer.py
    │   │   ├── prompt_template_incr.py
    │   │   ├── prompt_template_specinfer.py
    │   │   ├── rag_incr.py
    │   │   └── rag_specinfer.py
    ├── spec_infer
    │   ├── CMakeLists.txt
    │   └── spec_infer.cc
    └── utils
    │   ├── compress_llama_weights.py
    │   ├── download_hf_model.py
    │   ├── download_peft_model.py
    │   ├── mem_analysis.py
    │   └── upload_peft_model.py
├── pyproject.toml
├── python
    ├── flexflow
    │   ├── __init__.py
    │   ├── config.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── flexflow_cffi.py
    │   │   ├── flexflow_logger.py
    │   │   ├── flexflow_top.py
    │   │   └── flexflowlib.py
    │   ├── findpylib.py
    │   ├── serve
    │   │   ├── __init__.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── falcon.py
    │   │   │   ├── llama.py
    │   │   │   ├── mpt.py
    │   │   │   ├── opt.py
    │   │   │   └── starcoder.py
    │   │   └── serve.py
    │   ├── torch
    │   │   ├── __init__.py
    │   │   ├── model.py
    │   │   └── nn
    │   │   │   ├── __init__.py
    │   │   │   └── modules
    │   │   │       ├── __init__.py
    │   │   │       └── module.py
    │   └── type.py
    ├── flexflow_cffi_build.py
    ├── flexflow_cffi_header.py.in
    └── flexflow_python_build.py
├── requirements.txt
├── scripts
    ├── format.sh
    ├── gdb
    │   └── pretty_print.py
    ├── install_tokenizer.sh
    ├── mnist_mlp_run.sh
    └── rdelacou
    │   └── generate_trace.py
├── setup.py
├── src
    ├── c
    │   └── flexflow_c.cc
    ├── dataloader
    │   ├── dataloader.cc
    │   ├── dataloader.cpp
    │   └── dataloader.cu
    ├── loss_functions
    │   ├── loss_functions.cc
    │   ├── loss_functions.cpp
    │   └── loss_functions.cu
    ├── mapper
    │   └── mapper.cc
    ├── metrics_functions
    │   ├── metrics_functions.cc
    │   ├── metrics_functions.cpp
    │   └── metrics_functions.cu
    ├── ops
    │   ├── add_bias_residual_layer_norm.cc
    │   ├── add_bias_residual_layer_norm.cpp
    │   ├── add_bias_residual_layer_norm.cu
    │   ├── aggregate.cc
    │   ├── aggregate.cpp
    │   ├── aggregate.cu
    │   ├── aggregate_spec.cc
    │   ├── aggregate_spec.cpp
    │   ├── aggregate_spec.cu
    │   ├── arg_topk.cc
    │   ├── arg_topk.cpp
    │   ├── arg_topk.cu
    │   ├── argmax.cc
    │   ├── argmax.cpp
    │   ├── argmax.cu
    │   ├── attention.cc
    │   ├── attention.cpp
    │   ├── attention.cu
    │   ├── attention_impl.cu
    │   ├── batch_matmul.cc
    │   ├── batch_norm.cc
    │   ├── batch_norm.cpp
    │   ├── batch_norm.cu
    │   ├── beam_topk.cc
    │   ├── beam_topk.cpp
    │   ├── beam_topk.cu
    │   ├── cache.cc
    │   ├── cache.cpp
    │   ├── cache.cu
    │   ├── cast.cc
    │   ├── concat.cc
    │   ├── conv_2d.cc
    │   ├── dropout.cc
    │   ├── element_binary.cc
    │   ├── element_unary.cc
    │   ├── element_unary.cpp
    │   ├── element_unary.cu
    │   ├── embedding.cc
    │   ├── experts.cc
    │   ├── experts.cpp
    │   ├── experts.cu
    │   ├── flat.cc
    │   ├── fused.cc
    │   ├── fused.cpp
    │   ├── fused.cu
    │   ├── gather.cc
    │   ├── group_by.cc
    │   ├── group_by.cpp
    │   ├── group_by.cu
    │   ├── inc_multihead_self_attention.cc
    │   ├── inc_multihead_self_attention.cpp
    │   ├── inc_multihead_self_attention.cu
    │   ├── kernels
    │   │   ├── batch_matmul.cpp
    │   │   ├── batch_matmul.cu
    │   │   ├── cast_kernels.cpp
    │   │   ├── cast_kernels.cu
    │   │   ├── concat_kernels.cpp
    │   │   ├── concat_kernels.cu
    │   │   ├── conv_2d_kernels.cpp
    │   │   ├── conv_2d_kernels.cu
    │   │   ├── decompress_kernels.cpp
    │   │   ├── decompress_kernels.cu
    │   │   ├── dropout_kernels.cpp
    │   │   ├── dropout_kernels.cu
    │   │   ├── element_binary_kernels.cpp
    │   │   ├── element_binary_kernels.cu
    │   │   ├── embedding_kernels.cpp
    │   │   ├── embedding_kernels.cu
    │   │   ├── flat_kernels.cpp
    │   │   ├── flat_kernels.cu
    │   │   ├── gather_kernels.cpp
    │   │   ├── gather_kernels.cu
    │   │   ├── linear_kernels.cpp
    │   │   ├── linear_kernels.cu
    │   │   ├── lora_linear_kernels.cpp
    │   │   ├── lora_linear_kernels.cu
    │   │   ├── pool_2d_kernels.cpp
    │   │   ├── pool_2d_kernels.cu
    │   │   ├── reshape_kernels.cpp
    │   │   ├── reshape_kernels.cu
    │   │   ├── residual_rms_norm_kernels.cpp
    │   │   ├── residual_rms_norm_kernels.cu
    │   │   ├── rms_norm_kernels.cpp
    │   │   ├── rms_norm_kernels.cu
    │   │   ├── softmax.cpp
    │   │   ├── softmax.cu
    │   │   ├── split_kernels.cpp
    │   │   ├── split_kernels.cu
    │   │   ├── transpose_kernels.cpp
    │   │   └── transpose_kernels.cu
    │   ├── layer_norm.cc
    │   ├── layer_norm.cpp
    │   ├── layer_norm.cu
    │   ├── linear.cc
    │   ├── lora_linear.cc
    │   ├── lora_linear_params.cc
    │   ├── mean.cc
    │   ├── mean.cpp
    │   ├── mean.cu
    │   ├── moe.cc
    │   ├── noop.cc
    │   ├── pool_2d.cc
    │   ├── reduce.cc
    │   ├── reduce.cpp
    │   ├── reduce.cu
    │   ├── reshape.cc
    │   ├── residual_layer_norm.cc
    │   ├── residual_layer_norm.cpp
    │   ├── residual_layer_norm.cu
    │   ├── residual_rms_norm.cc
    │   ├── reverse.cc
    │   ├── reverse.cpp
    │   ├── reverse.cu
    │   ├── rms_norm.cc
    │   ├── sampling.cc
    │   ├── sampling.cpp
    │   ├── sampling.cu
    │   ├── sigmoid_silu_multi.cc
    │   ├── sigmoid_silu_multi.cpp
    │   ├── sigmoid_silu_multi.cu
    │   ├── softmax.cc
    │   ├── spec_inc_multihead_self_attention.cc
    │   ├── spec_inc_multihead_self_attention.cpp
    │   ├── spec_inc_multihead_self_attention.cu
    │   ├── split.cc
    │   ├── topk.cc
    │   ├── topk.cpp
    │   ├── topk.cu
    │   ├── transpose.cc
    │   ├── tree_inc_multihead_self_attention.cc
    │   ├── tree_inc_multihead_self_attention.cpp
    │   └── tree_inc_multihead_self_attention.cu
    ├── parallel_ops
    │   ├── allreduce.cc
    │   ├── combine.cc
    │   ├── fused_parallel_op.cc
    │   ├── fused_parallel_op.cpp
    │   ├── fused_parallel_op.cu
    │   ├── kernels
    │   │   ├── allreduce_kernels.cpp
    │   │   ├── allreduce_kernels.cu
    │   │   ├── combine_kernels.cpp
    │   │   ├── combine_kernels.cu
    │   │   ├── parallel_identity_kernels.cpp
    │   │   ├── parallel_identity_kernels.cu
    │   │   ├── partition_kernels.cpp
    │   │   ├── partition_kernels.cu
    │   │   ├── reduction_kernels.cpp
    │   │   ├── reduction_kernels.cu
    │   │   ├── replicate_kernels.cpp
    │   │   └── replicate_kernels.cu
    │   ├── parallel_identity.cc
    │   ├── partition.cc
    │   ├── reduction.cc
    │   └── replicate.cc
    ├── recompile
    │   └── recompile_state.cc
    ├── runtime
    │   ├── accessor.cc
    │   ├── accessor_kernel.cpp
    │   ├── accessor_kernel.cu
    │   ├── batch_config.cc
    │   ├── beam_search_batch_config.cc
    │   ├── compile.sh
    │   ├── cpp_driver.cc
    │   ├── cuda_helper.cu
    │   ├── ffconst_utils.cc
    │   ├── fftype.cc
    │   ├── file_loader.cc
    │   ├── gpt_tokenizer.cc
    │   ├── graph.cc
    │   ├── hip_helper.cpp
    │   ├── inference_manager.cc
    │   ├── initializer.cc
    │   ├── initializer_kernel.cpp
    │   ├── initializer_kernel.cu
    │   ├── layer.cc
    │   ├── machine_model.cc
    │   ├── machine_view.cc
    │   ├── memory_allocator.cc
    │   ├── memory_optimization.cc
    │   ├── model.cc
    │   ├── model.cpp
    │   ├── model.cu
    │   ├── network.cc
    │   ├── operator.cc
    │   ├── operator_params.cc
    │   ├── optimizer.cc
    │   ├── optimizer_kernel.cpp
    │   ├── optimizer_kernel.cu
    │   ├── page_manager.cc
    │   ├── parallel_op.cc
    │   ├── parallel_tensor.cc
    │   ├── peft_weight_allocator.cc
    │   ├── peft_weight_allocator.cpp
    │   ├── peft_weight_allocator.cu
    │   ├── recursive_logger.cc
    │   ├── request_manager.cc
    │   ├── request_manager.cpp
    │   ├── request_manager.cu
    │   ├── simulator.cc
    │   ├── simulator.cpp
    │   ├── simulator.cu
    │   ├── strategy.cc
    │   ├── substitution.cc
    │   ├── substitution_loader.cc
    │   ├── tensor.cpp
    │   ├── tensor.cu
    │   └── tree_verify_batch_config.cc
    └── utils
    │   └── dot
    │       └── record_formatter.cc
├── tests
    ├── align
    │   ├── README.md
    │   ├── align_create_tensor_ff.py
    │   ├── align_create_tensor_torch.py
    │   ├── align_ff_utils.py
    │   ├── align_test.py
    │   ├── align_utils.py
    │   ├── mt5_encoder
    │   │   └── align_mt5_encoder_ff.py
    │   ├── mt5_ff_utils.py
    │   ├── peft_flash_attn
    │   │   ├── align.py
    │   │   ├── align_2_tensors_from_pt.py
    │   │   ├── launch.json
    │   │   ├── peft_flash_debug_note
    │   │   └── test_bwd_2x_grad_buf.py
    │   └── test_all_operators.sh
    ├── fine_grained_alignment_test.sh
    ├── inference
    │   ├── cpp_inference_tests.sh
    │   ├── generate_inf_test_configs.py
    │   ├── huggingface_inference.py
    │   ├── huggingface_inference_simple.py
    │   ├── huggingface_pipeline.py
    │   ├── inference_alignment_test.py
    │   └── test_inference_output.py
    ├── inference_tests.sh
    ├── multinode_helpers
    │   ├── mpi_wrapper1.sh
    │   └── mpi_wrapper2.sh
    ├── peft
    │   ├── alignment
    │   │   ├── align_test_utils.py
    │   │   ├── llama_alignment_tests.ipynb
    │   │   └── opt_alignment_tests.ipynb
    │   ├── hf_finetune.py
    │   ├── hf_serve.py
    │   ├── hf_train.py
    │   ├── hf_utils.py
    │   └── peft_alignment_test.py
    ├── peft_test.sh
    └── python_interface_test.sh
└── triton
    ├── CMakeLists.txt
    ├── Dockerfile.QA
    ├── README.md
    ├── cmake
        └── TritonLegionBackendConfig.cmake.in
    ├── qa
        ├── L0_e2e
        │   ├── models
        │   │   ├── add
        │   │   │   ├── 1
        │   │   │   │   ├── model.onnx
        │   │   │   │   └── model.strategy
        │   │   │   └── config.pbtxt
        │   │   ├── cast
        │   │   │   ├── 1
        │   │   │   │   ├── model.onnx
        │   │   │   │   └── model.strategy
        │   │   │   └── config.pbtxt
        │   │   ├── identity
        │   │   │   ├── 1
        │   │   │   │   ├── model.onnx
        │   │   │   │   └── model.strategy
        │   │   │   └── config.pbtxt
        │   │   ├── mul
        │   │   │   ├── 1
        │   │   │   │   ├── model.onnx
        │   │   │   │   └── model.strategy
        │   │   │   └── config.pbtxt
        │   │   ├── reciprocal
        │   │   │   ├── 1
        │   │   │   │   ├── model.onnx
        │   │   │   │   └── model.strategy
        │   │   │   └── config.pbtxt
        │   │   ├── softmax
        │   │   │   ├── 1
        │   │   │   │   ├── model.onnx
        │   │   │   │   └── model.strategy
        │   │   │   └── config.pbtxt
        │   │   ├── softmax1
        │   │   │   ├── 1
        │   │   │   │   ├── model.onnx
        │   │   │   │   └── model.strategy
        │   │   │   └── config.pbtxt
        │   │   ├── sqrt
        │   │   │   ├── 1
        │   │   │   │   ├── model.onnx
        │   │   │   │   └── model.strategy
        │   │   │   └── config.pbtxt
        │   │   ├── sub
        │   │   │   ├── 1
        │   │   │   │   ├── model.onnx
        │   │   │   │   └── model.strategy
        │   │   │   └── config.pbtxt
        │   │   └── tanh
        │   │   │   ├── 1
        │   │   │       ├── model.onnx
        │   │   │       └── model.strategy
        │   │   │   └── config.pbtxt
        │   ├── operator_test.py
        │   ├── test.sh
        │   └── test_helpers.py
        ├── L0_parser
        │   └── test.sh
        └── common
        │   └── util.sh
    └── src
        ├── CMakeLists.txt
        ├── Makefile
        ├── accessor.h
        ├── backend.cc
        ├── common.h
        ├── config.h
        ├── cudahelp.h
        ├── instance.cc
        ├── instance.h
        ├── libtriton_legion.ldscript
        ├── model.cc
        ├── model.h
        ├── onnx
            ├── onnx-data.proto
            ├── onnx-ml.proto
            └── onnx-operators-ml.proto
        ├── onnx_parser.cc
        ├── onnx_parser.h
        ├── operator.cc
        ├── operator.h
        ├── operators
            ├── binary.cc
            ├── binary.cu
            ├── binary.h
            ├── concat.cc
            ├── concat.h
            ├── conv2d.cc
            ├── conv2d.h
            ├── flat.h
            ├── linear.h
            ├── matmul.cc
            ├── matmul.h
            ├── pool2d.cc
            ├── pool2d.h
            ├── reshape.cc
            ├── reshape.h
            ├── softmax.cc
            ├── softmax.h
            ├── unary.cc
            ├── unary.cu
            └── unary.h
        ├── runtime.cc
        ├── runtime.h
        ├── strategy.cc
        ├── strategy.h
        ├── tensor.cc
        ├── tensor.h
        ├── test
            ├── CMakeLists.txt
            ├── data
            │   ├── add.onnx
            │   ├── avg_pool.onnx
            │   ├── avg_pool_autopad.onnx
            │   ├── avg_pool_ceil.onnx
            │   ├── avg_pool_count_include_pad.onnx
            │   ├── avg_pool_pad.onnx
            │   ├── cast.onnx
            │   ├── conv2d_with_bias.onnx
            │   ├── identity.onnx
            │   ├── max_pool.onnx
            │   ├── max_pool_autopad.onnx
            │   ├── max_pool_ceil.onnx
            │   ├── max_pool_dilations.onnx
            │   ├── max_pool_order.onnx
            │   ├── mul.onnx
            │   ├── reciprocal.onnx
            │   ├── softmax.onnx
            │   ├── softmax_default_axis.onnx
            │   ├── softmax_negative_axis.onnx
            │   ├── sqrt.onnx
            │   ├── sub.onnx
            │   └── tanh.onnx
            ├── mock
            │   ├── binary.cc
            │   ├── concat.cc
            │   ├── conv2d.cc
            │   ├── legion.cc
            │   ├── matmul.cc
            │   ├── pool2d.cc
            │   ├── reshape.cc
            │   ├── softmax.cc
            │   ├── strategy.cc
            │   ├── triton_error.cc
            │   └── unary.cc
            ├── onnx_parser_test.cc
            └── scripts
            │   └── onnx_maker.py
        └── types.h


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Ignore all folders which start with "build"
 2 | /build*/
 3 | 
 4 | # Ignore compiled files
 5 | /.tools/
 6 | /python/flexflow_python
 7 | /python/flexflow/core/legion_cffi.py
 8 | python/flexflow/core/flexflow_cffi_header.py
 9 | python/flexflow/core/legion_cffi_header.py
10 | *.pb.cc
11 | *.pb.h
12 | *.o
13 | *.a
14 | 
15 | # Ignore inference assets
16 | /inference/weights/*
17 | /inference/tokenizer/*
18 | /inference/prompt/*
19 | /inference/output/*
20 | 
21 | /tests/inference/python_test_configs/*.json
22 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | **Description of changes:**
 2 | 
 3 | 
 4 | 
 5 | **Related Issues:**
 6 | 
 7 | Linked Issues:
 8 | - Issue #
 9 | 
10 | Issues closed by this PR:
11 | - Closes #
12 | 
13 | 


--------------------------------------------------------------------------------
/.github/runs-on.yml:
--------------------------------------------------------------------------------
 1 | images:
 2 |   dlami-x64:
 3 |     platform: "linux"
 4 |     arch: "x64"
 5 |     ami: "ami-04a2add47e78915e6"
 6 | 
 7 | runners:
 8 |   gpu-nvidia:
 9 |     family: ["g5.12xlarge"]
10 |     image: dlami-x64
11 |   rocm-builder:
12 |     family: ["c4.8xlarge"]
13 |     image: dlami-x64


--------------------------------------------------------------------------------
/.github/workflows/clang-format-check.yml:
--------------------------------------------------------------------------------
 1 | name: Clang format
 2 | on: [push, pull_request, workflow_dispatch]
 3 | jobs:
 4 |   formatting-check:
 5 |     name: Formatting Check
 6 |     runs-on: ubuntu-22.04
 7 |     strategy:
 8 |       matrix:
 9 |         path:
10 |           - check: "src"
11 |             exclude: '\.proto$'
12 |           - check: "include"
13 |           - check: "inference"
14 |           - check: "python"
15 |           - check: "scripts"
16 |           - check: "tests"
17 |     steps:
18 |       - uses: actions/checkout@v2
19 |       - name: Run clang-format style check for C/C++/Protobuf programs.
20 |         uses: jidicula/clang-format-action@v4.8.0
21 |         with:
22 |           clang-format-version: "15"
23 |           check-path: ${{ matrix.path['check'] }}
24 |           exclude-regex: ${{ matrix.path['exclude'] }}
25 | 


--------------------------------------------------------------------------------
/.github/workflows/helpers/free_space_on_runner.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -euo pipefail
 3 | set -x
 4 | 
 5 | sudo rm -rf /usr/share/dotnet
 6 | sudo rm -rf /usr/local/lib/android
 7 | sudo rm -rf /opt/ghc
 8 | sudo rm -rf "/usr/local/share/boost"
 9 | sudo rm -rf "$AGENT_TOOLSDIRECTORY"
10 | 


--------------------------------------------------------------------------------
/.github/workflows/helpers/install_cudnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -euo pipefail
 3 | set -x
 4 | 
 5 | # Cd into directory holding this script
 6 | cd "${BASH_SOURCE[0]%/*}"
 7 | 
 8 | ubuntu_version=$(lsb_release -rs)
 9 | ubuntu_version=${ubuntu_version//./}
10 | 
11 | wget -c -q "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${ubuntu_version}/x86_64/cuda-keyring_1.1-1_all.deb"
12 | sudo dpkg -i cuda-keyring_1.1-1_all.deb
13 | sudo apt update -y
14 | rm -f cuda-keyring_1.1-1_all.deb
15 | sudo apt-get -y install libcudnn9-cuda-12
16 | sudo apt-get -y install libcudnn9-dev-cuda-12
17 | sudo apt-get -y install libcudnn9-samples
18 | sudo ldconfig
19 | 


--------------------------------------------------------------------------------
/.github/workflows/helpers/install_nccl.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -euo pipefail
 3 | set -x
 4 | 
 5 | # Cd into directory holding this script
 6 | cd "${BASH_SOURCE[0]%/*}"
 7 | 
 8 | ubuntu_version=$(lsb_release -rs)
 9 | ubuntu_version=${ubuntu_version//./}
10 | wget -c -q "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${ubuntu_version}/x86_64/cuda-keyring_1.1-1_all.deb"
11 | sudo dpkg -i cuda-keyring_1.1-1_all.deb
12 | sudo apt-get update -y --allow-change-held-packages
13 | rm -f cuda-keyring_1.1-1_all.deb
14 | sudo apt install -y --allow-change-held-packages libnccl2 libnccl-dev
15 | 


--------------------------------------------------------------------------------
/.github/workflows/helpers/oracle_con.py:
--------------------------------------------------------------------------------
 1 | import oci
 2 | import argparse
 3 | import os
 4 | 
 5 | parser = argparse.ArgumentParser(description="Program with optional flags")
 6 | group = parser.add_mutually_exclusive_group()
 7 | group.add_argument("--start", action="store_true", help="Start action")
 8 | group.add_argument("--stop", action="store_true", help="Stop action")
 9 | parser.add_argument("--instance_id", type=str, required=True, help="instance id required")
10 | args = parser.parse_args()
11 | 
12 | oci_key_content = os.getenv("OCI_CLI_KEY_CONTENT")
13 | 
14 | config = {
15 |     "user": os.getenv("OCI_CLI_USER"),
16 |     "key_content": os.getenv("OCI_CLI_KEY_CONTENT"),
17 |     "fingerprint": os.getenv("OCI_CLI_FINGERPRINT"),
18 |     "tenancy": os.getenv("OCI_CLI_TENANCY"),
19 |     "region": os.getenv("OCI_CLI_REGION")
20 | }
21 | 
22 | # Initialize the OCI configuration
23 | oci.config.validate_config(config)
24 | 
25 | # Initialize the ComputeClient to interact with VM instances
26 | compute = oci.core.ComputeClient(config)
27 | 
28 | # Replace 'your_instance_id' with the actual instance ID of your VM
29 | instance_id = args.instance_id
30 | 
31 | # Perform the action
32 | if args.start:
33 |     # Start the VM
34 |     compute.instance_action(instance_id, "START")
35 | else:
36 |     # Stop the VM
37 |     compute.instance_action(instance_id, "STOP")
38 | 


--------------------------------------------------------------------------------
/.github/workflows/shell-check.yml:
--------------------------------------------------------------------------------
 1 | name: Shell Check
 2 | on: [push, pull_request, workflow_dispatch]
 3 | jobs:
 4 |   shellcheck:
 5 |     name: Shellcheck
 6 |     runs-on: ubuntu-22.04
 7 |     steps:
 8 |       - uses: actions/checkout@v3
 9 |       - name: Run ShellCheck
10 |         uses: ludeeus/action-shellcheck@master
11 |         with:
12 |           ignore_paths: ./triton/**
13 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "deps/legion"]
 2 | 	path = deps/legion
 3 | 	url = https://github.com/flexflow/legion.git
 4 | [submodule "deps/nccl"]
 5 | 	path = deps/nccl
 6 | 	url = https://github.com/NVIDIA/nccl.git
 7 | [submodule "deps/variant"]
 8 | 	path = deps/variant
 9 | 	url = https://github.com/mpark/variant
10 | [submodule "deps/optional"]
11 | 	path = deps/optional
12 | 	url = https://github.com/TartanLlama/optional.git
13 | [submodule "deps/json"]
14 | 	path = deps/json
15 | 	url = https://github.com/nlohmann/json.git
16 | [submodule "deps/tokenizers-cpp"]
17 | 	path = deps/tokenizers-cpp
18 | 	url = https://github.com/mlc-ai/tokenizers-cpp.git
19 | 	fetchRecurseSubmodules = true
20 | [submodule "deps/flashinfer"]
21 | 	path = deps/flashinfer
22 | 	url = https://github.com/flashinfer-ai/flashinfer.git
23 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-20.04
11 |   tools:
12 |     python: "3.8"
13 |     # You can also specify other tool versions:
14 |     # nodejs: "16"
15 |     # rust: "1.55"
16 |     # golang: "1.17"
17 | 
18 | # Build documentation in the docs/ directory with Sphinx
19 | sphinx:
20 |   configuration: docs/source/conf.py
21 | 
22 | #If using Sphinx, optionally build your docs in additional formats such as PDF
23 | formats:
24 |   - pdf
25 | 
26 | # Optionally declare the Python requirements required to build your docs
27 | python:
28 |   install:
29 |   - requirements: docs/requirements.txt
30 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | graft deps
2 | recursive-exclude . .git
3 | prune triton
4 | include python/flexflow/version.txt
5 | 


--------------------------------------------------------------------------------
/cmake/cudnn.cmake:
--------------------------------------------------------------------------------
 1 | # find cudnn in CUDNN_ROOT and CUDA_ROOT
 2 | if(CUDNN_PATH)
 3 |   set(CUDNN_ROOT ${CUDNN_PATH})
 4 | else()
 5 | 	# if CUDNN_PATH is not set, let's try to find it in the CUDA root
 6 | 	set(CUDNN_ROOT ${CUDA_TOOLKIT_ROOT_DIR})
 7 | endif()
 8 | find_library(CUDNN_LIBRARY 
 9 |   NAMES libcudnn${LIBEXT}
10 |   PATHS ${CUDNN_ROOT} ${CUDA_ROOT}
11 |   PATH_SUFFIXES lib lib64
12 |   DOC "CUDNN library." )
13 |   
14 | find_path(CUDNN_INCLUDE_DIR 
15 |     NAMES cudnn.h
16 |     HINTS ${CUDNN_ROOT} ${CUDA_ROOT}
17 |     PATH_SUFFIXES include 
18 |     DOC "CUDNN include directory." )
19 | 
20 | # find cudnn, set cudnn lib and include    
21 | if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR)
22 |   set(CUDNN_FOUND ON)
23 |   set(CUDNN_LIBRARIES ${CUDNN_LIBRARY})
24 |   set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})
25 | endif()
26 | 
27 | # find cuda and cudnn
28 | if(CUDNN_FOUND)
29 |   list(APPEND FLEXFLOW_EXT_LIBRARIES
30 |     ${CUDNN_LIBRARIES})
31 | 
32 |   list(APPEND FLEXFLOW_INCLUDE_DIRS
33 |     ${CUDNN_INCLUDE_DIR})
34 | endif()
35 | 
36 | if(CUDNN_FOUND)
37 | message( STATUS "CUDNN include : ${CUDNN_INCLUDE_DIR}" )
38 |   message( STATUS "CUDNN libraries : ${CUDNN_LIBRARIES}" )
39 | else()
40 |   message( FATAL_ERROR "CUDNN package not found -> specify search path via CUDNN_DIR variable")
41 | endif()
42 | 


--------------------------------------------------------------------------------
/cmake/hip.cmake:
--------------------------------------------------------------------------------
 1 | if (NOT FF_HIP_ARCH STREQUAL "")
 2 |     if (FF_HIP_ARCH STREQUAL "all")
 3 |         set(FF_HIP_ARCH "gfx900,gfx902,gfx904,gfx906,gfx908,gfx909,gfx90a,gfx90c,gfx940,gfx1010,gfx1011,gfx1012,gfx1013,gfx1030,gfx1031,gfx1032,gfx1033,gfx1034,gfx1035,gfx1036,gfx1100,gfx1101,gfx1102,gfx1103")
 4 |     endif()
 5 |     string(REPLACE "," "," HIP_ARCH_LIST "${FF_HIP_ARCH}")
 6 | endif()
 7 | 
 8 | message(STATUS "FF_HIP_ARCH: ${FF_HIP_ARCH}")
 9 | if(FF_GPU_BACKEND STREQUAL "hip_rocm")
10 |     #set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE STRING "Path to the clang compiler by ROCM" FORCE)
11 |     set(GPU_TARGETS "${FF_HIP_ARCH}" CACHE STRING "The GPU TARGETs")
12 | endif()
13 | 


--------------------------------------------------------------------------------
/cmake/json.cmake:
--------------------------------------------------------------------------------
1 | include(FetchContent)
2 | 
3 | FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz)
4 | FetchContent_MakeAvailable(json)


--------------------------------------------------------------------------------
/cmake/optional.cmake:
--------------------------------------------------------------------------------
1 | set(OPTIONAL_BUILD_TESTS OFF)
2 | set(OPTIONAL_BUILD_PACKAGE OFF)
3 | 
4 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/optional)
5 | 
6 | list(APPEND FLEXFLOW_EXT_LIBRARIES optional)
7 | list(APPEND FLEXFLOW_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/deps/optional/include/)
8 | 


--------------------------------------------------------------------------------
/cmake/variant.cmake:
--------------------------------------------------------------------------------
1 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/variant)
2 | 
3 | list(APPEND FLEXFLOW_EXT_LIBRARIES mpark_variant)
4 | list(APPEND FLEXFLOW_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/deps/variant/include/)


--------------------------------------------------------------------------------
/conda/flexflow.yml:
--------------------------------------------------------------------------------
 1 | name: flexflow
 2 | channels:
 3 |   - defaults
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python
 7 |   - cffi
 8 |   - rust
 9 |   - cmake-build-extension
10 |   - jq
11 |   - pytest
12 |   - pip
13 |   - pip:
14 |     - numpy
15 |     - torch
16 |     - torchaudio
17 |     - torchvision
18 |     - regex
19 |     - transformers>=4.47.1
20 |     - sentencepiece
21 |     - einops
22 |     - requests
23 |     - scipy
24 |     - bitsandbytes
25 |     - datasets
26 |     - accelerate
27 |     - loralib
28 |     - triton
29 |     - peft
30 |     - pytest
31 | 


--------------------------------------------------------------------------------
/docker/flexflow/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG FF_GPU_BACKEND=cuda
 2 | ARG gpu_backend_version=12.1
 3 | FROM flexflow-environment-$FF_GPU_BACKEND$gpu_backend_version:latest
 4 | 
 5 | LABEL org.opencontainers.image.source=https://github.com/flexflow/flexflow-serve
 6 | LABEL org.opencontainers.image.description="flexflow-serve container"
 7 | 
 8 | # Copy flexflow-serve repository
 9 | RUN mkdir flexflow-serve
10 | WORKDIR /usr/flexflow-serve
11 | COPY . .
12 | 
13 | # Args to build flexflow-serve
14 | ARG BUILD_CONFIGS
15 | ARG N_BUILD_CORES
16 | 
17 | # Create install directory if needed
18 | RUN for pair in $BUILD_CONFIGS; do \
19 |         key=${pair%%=*}; \
20 |         value=${pair#*=}; \
21 |         if [ "$key" = "INSTALL_DIR" ] && [ -n "$value" ]; then \
22 |             mkdir -p "$value"; \
23 |         fi; \
24 |     done
25 | 
26 | # Build and install C++ and Python versions of flexflow-serve
27 | RUN mkdir -p build && cd build && \
28 |     eval "$BUILD_CONFIGS" ../config/config.linux && \
29 |     make -j $N_BUILD_CORES install && \
30 |     ldconfig
31 | 
32 | ENTRYPOINT ["/bin/bash"]
33 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile clean
16 | 
17 | clean:
18 | 	rm -rf build doxygen/output doxygen/cpp_api
19 | 	@$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
20 | 
21 | # Catch-all target: route all unknown targets to Sphinx using the new
22 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
23 | %: Makefile
24 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
25 | 


--------------------------------------------------------------------------------
/docs/doxygen/README.md:
--------------------------------------------------------------------------------
 1 | # Doxygen Documentation
 2 | 
 3 | This directory holds the configuration file for building
 4 | the HTML Doxygen documentation for the C++ and Python code.
 5 | This documentation is mainly for the developers of FlexFlow for now.
 6 | 
 7 | ## Generate documentation locally
 8 | 
 9 | 1. Install [doxygen](https://www.doxygen.nl/index.html). The configuration file is based on Doxygen 1.9.3. But all recent Doxygen versions should work.
10 | 2. Define `$FF_HOME` environmental variable to be the root directory of the FlexFlow repo.
11 | 3. Run Doxygen with `doxygen $FF_HOME/docs/doxygen/Doxyfile`
12 | 4. Now, you may browse the docs by opening the index page `$FF_HOME/docs/doxygen/output/html/index.html` with your favorite web browser.
13 | 


--------------------------------------------------------------------------------
/docs/doxygen/theme/rust_footer.html:
--------------------------------------------------------------------------------
 1 | <!-- HTML footer for doxygen 1.8.16-->
 2 | <link href="https://fonts.googleapis.com/css?family=Open+Sans:400&display=swap" rel="stylesheet">
 3 | <link href="https://fonts.googleapis.com/css?family=Source+Code+Pro:500,600&display=swap" rel="stylesheet">
 4 | <link href="https://fonts.googleapis.com/css?family=Roboto&display=swap" rel="stylesheet">
 5 | <!-- start footer part -->
 6 | <!--BEGIN GENERATE_TREEVIEW-->
 7 | <div id="nav-path" class="navpath">
 8 |   <!-- id is needed for treeview function! -->
 9 |   <ul>
10 |     $navpath
11 |     <li class="footer">$generatedby
12 |       <a href="http://www.doxygen.org/index.html">
13 |         Doxygen</a> $doxygenversion </li>
14 |   </ul>
15 | </div>
16 | <!--END GENERATE_TREEVIEW-->
17 | <!--BEGIN !GENERATE_TREEVIEW-->
18 | <hr class="footer" />
19 | <address class="footer"><small>
20 |     $generatedby &#160;<a href="http://www.doxygen.org/index.html">
21 |       <img class="footer" src="$relpath^doxygen.png" alt="doxygen" />
22 |     </a> $doxygenversion
23 |   </small></address>
24 | <!--END !GENERATE_TREEVIEW-->
25 | </body>
26 | 
27 | </html>
28 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx_rtd_theme
3 | m2r2
4 | breathe
5 | exhale
6 | cffi
7 | numpy
8 | qualname
9 | 


--------------------------------------------------------------------------------
/docs/source/chatbot.rst:
--------------------------------------------------------------------------------
 1 | :tocdepth: 1
 2 | ********
 3 | Chatbot
 4 | ********
 5 | 
 6 | The chatbot use case involves setting up a conversational AI model using FlexFlow Serve, capable of engaging in interactive dialogues with users.
 7 | 
 8 | Requirements
 9 | ============
10 | 
11 | - FlexFlow Serve setup with required configurations.
12 | - Gradio or any interactive interface tool.
13 | 
14 | Implementation
15 | ==============
16 | 
17 | 1. FlexFlow Initialization
18 |    Initialize FlexFlow Serve with desired configurations and specific LLM model.
19 | 
20 | 2. Gradio Interface Setup
21 |    Define a function for response generation based on user inputs. Setup Gradio Chat Interface for interaction. 
22 | 
23 |    .. code-block:: python
24 |       
25 |       def generate_response(user_input):
26 |          result = llm.generate(user_input)
27 |          return result.output_text.decode('utf-8')
28 |       
29 | 
30 | 3. Running the Interface
31 |    Launch the Gradio interface and interact with the model by entering text inputs.
32 | 
33 |    .. image:: /imgs/gradio_interface.png
34 |       :alt: Gradio Chatbot Interface
35 |       :align: center
36 | 
37 | 4. Shutdown
38 |    Stop the FlexFlow server after interaction.
39 | 
40 | Example
41 | =======
42 | 
43 | Complete code example can be found here: 
44 | 
45 | 1. `Chatbot Example with incremental decoding <https://github.com/flexflow/FlexFlow/blob/inference/inference/python/usecases/gradio_incr.py>`__
46 | 
47 | 2. `Chatbot Example with speculative inference <https://github.com/flexflow/FlexFlow/blob/inference/inference/python/usecases/gradio_specinfer.py>`__
48 | 
49 | 
50 | Example Implementation:
51 | 
52 |    .. code-block:: python
53 | 
54 |       import gradio as gr
55 |       import flexflow.serve as ff
56 | 
57 |       ff.init(num_gpus=2, memory_per_gpu=14000, ...)
58 | 
59 |       def generate_response(user_input):
60 |          result = llm.generate(user_input)
61 |          return result.output_text.decode('utf-8')
62 | 
63 |       iface = gr.ChatInterface(fn=generate_response)
64 |       iface.launch()


--------------------------------------------------------------------------------
/docs/source/cpp_api.rst:
--------------------------------------------------------------------------------
 1 | *************
 2 | C++ API
 3 | *************
 4 | 
 5 | The FlexFlow backend is at the core of FlexFlow Train and FlexFlow Serve. It is written entirely in C/C++ and CUDA/HIP. This section documents the API, which is generated by Doxygen and it is available at the following links:
 6 | 
 7 | * `CUDA version <./cuda_api/index.html>`_ (default version)
 8 | * `HIP version <./hip_api/index.html>`_
 9 | 
10 | The two versions only differ when it comes to the GPU kernels, so the great majority of the entries are identical. If you are unsure which version to use, take a look at the CUDA version.
11 | 


--------------------------------------------------------------------------------
/docs/source/developers_guide/developers_guide.rst:
--------------------------------------------------------------------------------
1 | ******************
2 | Developers Guide
3 | ******************
4 | 
5 | .. mdinclude:: ../../../CONTRIBUTING.md
6 |    :start-line: 2
7 | 


--------------------------------------------------------------------------------
/docs/source/developers_guide/ff_internals.rst:
--------------------------------------------------------------------------------
1 | *******************
2 | FlexFlow Internals
3 | *******************
4 | 
5 | .. mdinclude:: internals.md
6 |    :start-line: 2
7 | 


--------------------------------------------------------------------------------
/docs/source/developers_guide/internals.md:
--------------------------------------------------------------------------------
 1 | # FlexFlow Internals
 2 | 
 3 | ## The Parallel Computation Graph (PCG)
 4 | 
 5 | FlexFlow uses a _Parallel Computation Graph (PCG)_ to simultaneously represent tensor operations, as well as parallelism choices and data movement across nodes. 
 6 | 
 7 | ### Tensor representations
 8 | 
 9 | There are two types of tensor representations in FlexFlow: a [Tensor](./cuda_api/de/da9/structFlexFlow_1_1TensorBase.html) and a [ParallelTensor](./cuda_api/d3/dfc/structFlexFlow_1_1ParallelTensorBase.html). The first variant is used when writing a FlexFlow DNN program, whereas the second is used by the runtime to run all the computations in a distributed fashion. `Tensor` and `ParallelTensor` are implemented as typedef-ed pointers to, respectively, the `TensorBase` (defined in `include/flexflow/tensor.h`) and `ParallelTensorBase` (defined in `include/flexflow/parallel_tensor.h`) structs. 
10 | 
11 | The `ParallelTensor` struct contains all the information that a `Tensor` also stores, but in addition, it also codifies how the tensor should be parallelized. For instance, a ParallelTensor records how each dimension is *partitioned*, how many *replicas* of the tensors have been created, and the *mapping* between the partitions of the tensors and the physical machines that will store them. 
12 | 
13 | ## Transformation generation
14 | 
15 | ## Joint optimization
16 | 


--------------------------------------------------------------------------------
/docs/source/docker.rst:
--------------------------------------------------------------------------------
1 | :tocdepth: 1
2 | *************
3 | Docker
4 | *************
5 | We provide a ready-to-use Docker container to quickly run FlexFlow with no manual installation required. To use it, follow the steps below.
6 | 
7 | .. mdinclude:: ../../docker/README.md
8 |    :start-line: 3
9 | 


--------------------------------------------------------------------------------
/docs/source/imgs/gradio_api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/docs/source/imgs/gradio_api.png


--------------------------------------------------------------------------------
/docs/source/imgs/gradio_interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/docs/source/imgs/gradio_interface.png


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. FlexFlow documentation master file, created by
 2 |    sphinx-quickstart on Tue Dec 15 14:16:53 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to FlexFlow's documentation!
 7 | ====================================
 8 | 
 9 | .. toctree::
10 |    :caption: Getting Started
11 |    
12 |    welcome
13 |    installation
14 |    docker
15 |    multinode
16 | 
17 | .. toctree::
18 |    :caption: FlexFlow Serve
19 |    
20 |    serve_overview
21 |    serve_usecases
22 |    serve_api
23 | 
24 | .. toctree::
25 |    :caption: FlexFlow Train
26 |    
27 |    train_overview
28 |    train_interface
29 |    train_examples
30 |    
31 |    train_python_api
32 | 
33 | .. toctree::
34 |    :caption: FlexFlow Backend
35 | 
36 |    cpp_api
37 | 
38 | .. toctree::
39 |    :maxdepth: 3
40 |    :caption: Developers Guide
41 | 
42 |    developers_guide/developers_guide.rst
43 | ..   developers_guide/ff_internals.rst
44 | 
45 | 
46 | .. Indices and tables
47 | .. ==================
48 | ..
49 | .. * :ref:`genindex`
50 | .. * :ref:`modindex`
51 | .. * :ref:`search`
52 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
1 | :tocdepth: 1
2 | *************
3 | Building from source
4 | *************
5 | 
6 | .. mdinclude:: ../../INSTALL.md
7 |    :start-line: 2
8 | 


--------------------------------------------------------------------------------
/docs/source/keras.rst:
--------------------------------------------------------------------------------
 1 | :tocdepth: 1
 2 | ****************
 3 | Keras Interface
 4 | ****************
 5 | 
 6 | FlexFlow provides a drop-in replacement for TensorFlow Keras. Running an existing Keras program on the FlexFlow backend only requires a few lines of changes to the program. The detailed instructions are as follows:
 7 | 
 8 | 1. Replace the Keras header files
 9 | =================================
10 | 
11 | Redirect the program to import Keras functions from FlexFlow by using the following import header lines::
12 | 
13 |     from flexflow.keras.models import Model, Sequential
14 |     from flexflow.keras.layers import Input, Dense, Conv2D, ...
15 |     from flexflow.keras.callbacks import Callback, ...
16 | 
17 | 2. Modify the main Keras program
18 | ================================
19 | 
20 | FlexFlow requires a Keras program to wrap its model construction in a Python function called ``top_level_task()``. This allows FlexFlow to automatically parallelize DNN training across all GPUs on all compute nodes. For example, the following code snippet shows parallelizing AlexNet training in FlexFlow:: 
21 | 
22 |     def top_level_task():
23 |       model = Sequential()
24 |       model.add(Conv2D(filters=64, input_shape=(3,229,229), kernel_size=(11,11), strides=(4,4), padding=(2,2), activation="relu"))
25 |       model.add(MaxPooling2D(pool_size=(3,3), strides=(2,2), padding="valid"))
26 |       model.add(Conv2D(filters=192, kernel_size=(5,5), strides=(1,1), padding=(2,2), activation="relu"))
27 |       ## More lines for model construction
28 |       model.add(Activation("softmax"))
29 |       ## Model compilation
30 |       model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
31 |       ## Model training
32 |       (x_train, y_train) = cifar10.load_data()
33 |       model.fit(x_train, y_train, epochs=30)
34 | 
35 |     if __name__ == "__main__":
36 |       top_level_task()
37 | 
38 | More FlexFlow Keras examples are available on `GitHub <https://github.com/flexflow/FlexFlow/tree/master/examples/python/keras>`_.
39 | 


--------------------------------------------------------------------------------
/docs/source/mt5.rst:
--------------------------------------------------------------------------------
1 | ************************
2 | mT5 Model
3 | ************************
4 | 
5 | .. mdinclude:: ../../examples/python/pytorch/mt5/README.md
6 |    :start-line: 2
7 | 


--------------------------------------------------------------------------------
/docs/source/multinode.rst:
--------------------------------------------------------------------------------
1 | :tocdepth: 1
2 | ******************
3 | Multinode tutorial
4 | ******************
5 | 
6 | 
7 | .. mdinclude:: ../../MULTI-NODE.md
8 |    :start-line: 3
9 | 


--------------------------------------------------------------------------------
/docs/source/onnx.rst:
--------------------------------------------------------------------------------
 1 | :tocdepth: 1
 2 | *************
 3 | ONNX Support
 4 | *************
 5 | 
 6 | Similar to the PyTorch front-end, FlexFlow also supports training existing ONNX models. Since both ONNX and FlexFlow use Protocol Buffer, make sure they are linked with the Protocol Buffer of the same version. 
 7 | 
 8 | 1. Export a ONNX Model to a external file
 9 | ===============================================
10 | 
11 | A PyTorch model can be exported to the FlexFlow model format and saved into an external file::
12 | 
13 |     import onnx
14 |     import torch
15 |     import torch.nn as nn
16 |     from torch.onnx import TrainingMode
17 |     
18 |     # create a PyTorch Model
19 |     class MyPyTorchModule(nn.Module):
20 |     ...
21 | 
22 |     # export the PyTorch model to a ONNX model
23 |     model = MyPyTorchModule()
24 |     torch.onnx.export(model, (input), "filename", export_params=False, training=TrainingMode.TRAINING)
25 | 
26 | 2. Import a FlexFlow model from a external file
27 | ===============================================
28 | 
29 | A FlexFlow program can directly import a previously saved ONNX model and autotune the parallelization performance for a given parallel machine::
30 | 
31 |     from flexflow.torch.model import PyTorchModel
32 | 
33 |     #create input tensors
34 |     dims_input = [ffconfig.get_batch_size(), 3, 32, 32]
35 |     input_tensor = ffmodel.create_tensor(dims_input, DataType.DT_FLOAT)
36 | 
37 |     # create a flexflow model from the file
38 |     onnx_model = ONNXModel("cifar10_cnn.onnx")
39 |     output_tensor = onnx_model.apply(ffmodel, {"input.1": input_tensor})
40 | 
41 |     # use the Python API to train the model
42 |     ffoptimizer = SGDOptimizer(ffmodel, 0.01)
43 |     ffmodel.set_sgd_optimizer(ffoptimizer)
44 |     ffmodel.compile(loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY])
45 |     ...
46 |     ffmodel.fit(x=dataloader_input, y=dataloader_label, epochs=epochs)
47 | 
48 | More FlexFlow ONNX examples are available on `GitHub <https://github.com/flexflow/FlexFlow/tree/master/examples/python/onnx>`_.
49 | 


--------------------------------------------------------------------------------
/docs/source/prompt_template.rst:
--------------------------------------------------------------------------------
 1 | :tocdepth: 1
 2 | ****************
 3 | Prompt Template
 4 | ****************
 5 | 
 6 | Prompt templates guide the model's response generation. This use case demonstrates setting up FlexFlow Serve to integrate with Langchain and using prompt templates to handle dynamic prompt templates.
 7 | 
 8 | Requirements
 9 | ============
10 | 
11 | - FlexFlow Serve setup with appropriate configurations.
12 | - Langchain integration with templates for prompt management.
13 | 
14 | Implementation
15 | ==============
16 | 
17 | 1. FlexFlow Initialization
18 |    Initialize and configure FlexFlow Serve.
19 | 
20 | 2. LLM Setup
21 |    Compile and start the server for text generation.
22 | 
23 | 3. Prompt Template Setup
24 |    Setup a prompt template for guiding model's responses.
25 | 
26 | 4. Response Generation
27 |    Use the LLM with the prompt template to generate a response.
28 | 
29 | 5. Shutdown
30 |    Stop the FlexFlow server after generating the response.
31 | 
32 | Example
33 | =======
34 | 
35 | Complete code example can be found here: 
36 | 
37 | 1. `Prompt Template Example with incremental decoding <https://github.com/flexflow/FlexFlow/blob/inference/inference/python/usecases/prompt_template_incr.py>`__
38 | 
39 | 2. `Prompt Template Example with speculative inference <https://github.com/flexflow/FlexFlow/blob/inference/inference/python/usecases/prompt_template_specinfer.py>`__
40 | 
41 | 
42 | Example Implementation:
43 | 
44 |    .. code-block:: python
45 | 
46 |       import flexflow.serve as ff
47 |       from langchain.prompts import PromptTemplate
48 | 
49 |       ff_llm = FlexFlowLLM(...)
50 |       ff_llm.compile_and_start(...)
51 | 
52 |       template = "Question: {question}\nAnswer:"
53 |       prompt = PromptTemplate(template=template, input_variables=["question"])
54 | 
55 |       response = ff_llm.generate("Who was the US president in 1997?")
56 | 


--------------------------------------------------------------------------------
/docs/source/python/create.rst:
--------------------------------------------------------------------------------
 1 | **************
 2 | Model Creation
 3 | **************
 4 | .. automodule:: flexflow.core.flexflow_cffi
 5 |    :noindex:
 6 | 
 7 | Model Creation
 8 | ==============
 9 | .. autoclass:: FFModel()
10 |    :noindex:
11 |    :members: __init__
12 |    
13 | Tensor Creation
14 | ===============
15 | .. autoclass:: FFModel()
16 |    :noindex:
17 |    :members: create_tensor
18 | 


--------------------------------------------------------------------------------
/docs/source/python/dataloader.rst:
--------------------------------------------------------------------------------
 1 | **************
 2 | Dataloader API
 3 | **************
 4 | .. automodule:: flexflow.core.flexflow_cffi
 5 |    :noindex:
 6 | 
 7 | Dataloader Creation
 8 | ===================
 9 | .. autoclass:: FFModel()
10 |    :noindex:
11 |    :members: create_data_loader
12 |    
13 | Use Dataloader for Training
14 | ===========================
15 | .. autoclass:: SingleDataLoader()
16 |    :noindex:
17 |    :members: reset, next_batch


--------------------------------------------------------------------------------
/docs/source/python/init.rst:
--------------------------------------------------------------------------------
 1 | ********************
 2 | Model Initialization
 3 | ********************
 4 | .. automodule:: flexflow.core.flexflow_cffi
 5 |    :noindex:
 6 | 
 7 | Compile
 8 | =======
 9 | .. autoclass:: FFModel()
10 |    :noindex:
11 |    :members: compile
12 |    
13 | Initialization
14 | ==============
15 | .. autoclass:: FFModel()
16 |    :noindex:
17 |    :members: init_layers
18 | 


--------------------------------------------------------------------------------
/docs/source/python/models.rst:
--------------------------------------------------------------------------------
 1 | ************
 2 | Models API
 3 | ************
 4 | 
 5 | Models API in FlexFlow is used to create models . 
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 |    
10 |    create
11 |    init
12 |    train


--------------------------------------------------------------------------------
/docs/source/python/train.rst:
--------------------------------------------------------------------------------
 1 | **************************
 2 | Model Training and Testing
 3 | **************************
 4 | .. automodule:: flexflow.core.flexflow_cffi
 5 |    :noindex:
 6 | 
 7 | Fit
 8 | =======
 9 | .. autoclass:: FFModel()
10 |    :noindex:
11 |    :members: fit
12 |    
13 | Evaluate
14 | ==============
15 | .. autoclass:: FFModel()
16 |    :noindex:
17 |    :members: eval
18 |    
19 | Customized Training
20 | ===================
21 | .. autoclass:: FFModel()
22 |    :noindex:
23 |    :members: forward, backward, zero_gradients, update, reset_metrics, compute_metrics
24 | 
25 | 


--------------------------------------------------------------------------------
/docs/source/serve_api.rst:
--------------------------------------------------------------------------------
1 | **************************
2 | FlexFlow Serve Python API
3 | **************************
4 | 
5 | .. toctree::
6 |    serve_fastapi
7 |    serve_gradioapi


--------------------------------------------------------------------------------
/docs/source/serve_gradioapi.rst:
--------------------------------------------------------------------------------
 1 | :tocdepth: 1
 2 | *************************
 3 | FlexFlow Serve Gradio API
 4 | *************************
 5 | 
 6 | Introduction
 7 | ============
 8 | 
 9 | Users can also set up the API endpoints with a Gradio Chatbot Interface.
10 | 
11 | Requirements
12 | ------------
13 | 
14 | - FlexFlow Serve setup with necessary configurations.
15 | - Running the gradio chatbot interface.
16 | 
17 | Example
18 | ========
19 | 
20 | In a running gradio chatbot interface, hit the "Use via API" button on the bottom left.
21 | 
22 |    .. image:: /imgs/gradio_interface.png
23 |       :alt: Gradio Chatbot Interface
24 |       :align: center
25 | 
26 | Users can easily access an API endpoint for sending prompts to the model.
27 | 
28 |    .. image:: /imgs/gradio_api.png
29 |       :alt: Gradio API
30 |       :align: center


--------------------------------------------------------------------------------
/docs/source/serve_overview.rst:
--------------------------------------------------------------------------------
1 | :tocdepth: 1
2 | *************
3 | Serving Overview
4 | *************
5 | 
6 | .. mdinclude:: ../../SERVE.md
7 |    :start-line: 3
8 | 


--------------------------------------------------------------------------------
/docs/source/serve_usecases.rst:
--------------------------------------------------------------------------------
1 | *******************
2 | Serving Usecases
3 | *******************
4 | 
5 | .. toctree::
6 |    chatbot
7 |    prompt_template
8 |    rag


--------------------------------------------------------------------------------
/docs/source/train_examples.rst:
--------------------------------------------------------------------------------
1 | *************
2 | Training Examples
3 | *************
4 | 
5 | .. toctree::
6 |    mt5


--------------------------------------------------------------------------------
/docs/source/train_interface.rst:
--------------------------------------------------------------------------------
1 | *******************
2 | Training Interface
3 | *******************
4 | 
5 | .. toctree::
6 |    keras
7 |    pytorch
8 |    onnx


--------------------------------------------------------------------------------
/docs/source/train_overview.rst:
--------------------------------------------------------------------------------
1 | :tocdepth: 1
2 | *************
3 | Training Overview
4 | *************
5 | 
6 | .. mdinclude:: ../../TRAIN.md
7 |    :start-line: 3
8 | 


--------------------------------------------------------------------------------
/docs/source/train_python_api.rst:
--------------------------------------------------------------------------------
 1 | *******************
 2 | Python API
 3 | *******************
 4 | This section documents the Python API for FlexFlow Train.
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 3
 8 |    
 9 |    python/models
10 |    python/layers
11 |    python/dataloader


--------------------------------------------------------------------------------
/docs/source/welcome.rst:
--------------------------------------------------------------------------------
1 | :tocdepth: 1
2 | *************
3 | Overview
4 | *************
5 | 
6 | .. mdinclude:: ../../README.md
7 |    :start-line: 3
8 | 


--------------------------------------------------------------------------------
/img/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/img/overview.png


--------------------------------------------------------------------------------
/img/performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/img/performance.png


--------------------------------------------------------------------------------
/img/spec_infer_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/img/spec_infer_demo.gif


--------------------------------------------------------------------------------
/include/flexflow/device.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_DEVICE_H_
 2 | #define _FLEXFLOW_DEVICE_H_
 3 | 
 4 | #if defined(FF_USE_CUDA) || defined(FF_USE_HIP_CUDA)
 5 | #include <cuda_runtime.h>
 6 | #include <cudnn.h>
 7 | #elif defined(FF_USE_HIP_ROCM)
 8 | #include <hip/hip_runtime.h>
 9 | #include <miopen/miopen.h>
10 | #else
11 | #error "Unknown device"
12 | #endif
13 | 
14 | namespace FlexFlow {
15 | #if defined(FF_USE_CUDA) || defined(FF_USE_HIP_CUDA)
16 | typedef cudaStream_t ffStream_t;
17 | cudaError_t get_legion_stream(cudaStream_t *stream);
18 | typedef cudnnTensorDescriptor_t ffTensorDescriptor_t;
19 | typedef cudnnActivationDescriptor_t ffActivationDescriptor_t;
20 | typedef cudnnPoolingDescriptor_t ffPoolingDescriptor_t;
21 | #elif defined(FF_USE_HIP_ROCM)
22 | typedef hipStream_t ffStream_t;
23 | hipError_t get_legion_stream(hipStream_t *stream);
24 | typedef miopenTensorDescriptor_t ffTensorDescriptor_t;
25 | typedef miopenActivationDescriptor_t ffActivationDescriptor_t;
26 | typedef miopenPoolingDescriptor_t ffPoolingDescriptor_t;
27 | #else
28 | #error "Unknown device"
29 | #endif
30 | }; // namespace FlexFlow
31 | 
32 | #endif // _FLEXFLOW_DEVICE_H_
33 | 


--------------------------------------------------------------------------------
/include/flexflow/ffconst_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_FFCONST_UTILS_H
 2 | #define _FLEXFLOW_FFCONST_UTILS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include <string>
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | std::string get_operator_type_name(OperatorType type);
10 | 
11 | size_t data_type_size(DataType type);
12 | 
13 | #define INT4_NUM_OF_ELEMENTS_PER_GROUP 32
14 | 
15 | size_t get_quantization_to_byte_size(DataType type,
16 |                                      DataType quantization_type,
17 |                                      size_t num_elements);
18 | 
19 | std::ostream &operator<<(std::ostream &, OperatorType);
20 | 
21 | }; // namespace FlexFlow
22 | 
23 | #endif // _FLEXFLOW_FFCONST_UTILS_H
24 | 


--------------------------------------------------------------------------------
/include/flexflow/fftype.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FF_TYPE_H
 2 | #define _FF_TYPE_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include <cstddef>
 6 | #include <functional>
 7 | #include <iostream>
 8 | 
 9 | namespace FlexFlow {
10 | 
11 | class LayerID {
12 | public:
13 |   static const LayerID NO_ID;
14 |   LayerID();
15 |   LayerID(size_t id, size_t transformer_layer_id, size_t model_id);
16 |   bool is_valid_id() const;
17 |   friend bool operator==(LayerID const &lhs, LayerID const &rhs);
18 | 
19 | public:
20 |   size_t id, transformer_layer_id, model_id;
21 | };
22 | 
23 | class PEFTModelID {
24 | public:
25 |   static const PEFTModelID NO_ID;
26 |   PEFTModelID();
27 |   PEFTModelID(size_t id);
28 |   bool is_valid_id() const;
29 |   friend bool operator==(PEFTModelID const &lhs, PEFTModelID const &rhs);
30 |   friend bool operator!=(PEFTModelID const &lhs, PEFTModelID const &rhs);
31 |   friend std::ostream &operator<<(std::ostream &os,
32 |                                   PEFTModelID const &peft_model_id);
33 | 
34 | public:
35 |   size_t id;
36 | };
37 | 
38 | }; // namespace FlexFlow
39 | 
40 | namespace std {
41 | template <>
42 | struct hash<FlexFlow::PEFTModelID> {
43 |   size_t operator()(FlexFlow::PEFTModelID const &n) const {
44 |     return n.id;
45 |   }
46 | };
47 | } // namespace std
48 | 
49 | #endif // _FF_TYPE_H
50 | 


--------------------------------------------------------------------------------
/include/flexflow/node.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NODE_H
 2 | #define _NODE_H
 3 | 
 4 | #include <string>
 5 | 
 6 | #include "tl/optional.hpp"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | class Op;
11 | 
12 | namespace PCG {
13 | 
14 | struct Node {
15 |   Node(void);
16 |   Node(size_t _guid, Op *_ptr) : guid(_guid), ptr(_ptr) {}
17 |   inline bool operator==(Node const &b) const {
18 |     if (guid != b.guid) {
19 |       return false;
20 |     }
21 |     if (ptr != b.ptr) {
22 |       return false;
23 |     }
24 |     if (original_guid != b.original_guid) {
25 |       return false;
26 |     }
27 |     return true;
28 |   }
29 |   inline bool operator!=(Node const &b) const {
30 |     if (guid != b.guid) {
31 |       return true;
32 |     }
33 |     if (ptr != b.ptr) {
34 |       return true;
35 |     }
36 |     if (original_guid != b.original_guid) {
37 |       return false;
38 |     }
39 |     return false;
40 |   }
41 |   inline bool operator<(Node const &b) const {
42 |     if (guid != b.guid) {
43 |       return guid < b.guid;
44 |     }
45 |     if (ptr != b.ptr) {
46 |       return ptr < b.ptr;
47 |     }
48 |     if (original_guid != b.original_guid) {
49 |       return false;
50 |     }
51 |     return false;
52 |   }
53 |   Node &operator=(Node const &n) {
54 |     guid = n.guid;
55 |     ptr = n.ptr;
56 |     original_guid = n.original_guid;
57 |     return *this;
58 |   }
59 |   std::string op_to_string(Op const *ptr) const;
60 |   std::string to_string(void) const {
61 |     if (ptr != NULL) {
62 |       return op_to_string(ptr) + "_" + std::to_string(guid);
63 |     } else {
64 |       return "UnmappedOp_" + std::to_string(guid);
65 |     }
66 |   }
67 |   static const Node INVALID_NODE;
68 |   size_t guid;
69 |   Op const *ptr;
70 | 
71 |   tl::optional<size_t> original_guid = tl::nullopt;
72 | };
73 | 
74 | }; // namespace PCG
75 | 
76 | }; // namespace FlexFlow
77 | 
78 | #endif // _NODE_H
79 | 


--------------------------------------------------------------------------------
/include/flexflow/op_meta.h:
--------------------------------------------------------------------------------
 1 | #ifndef _OP_META_H
 2 | #define _OP_META_H
 3 | 
 4 | #include "flexflow/config.h"
 5 | 
 6 | namespace FlexFlow {
 7 | 
 8 | class Op;
 9 | 
10 | class OpMeta {
11 | public:
12 |   // OpMeta(FFHandler _handle);
13 |   OpMeta(FFHandler _handle, Op const *op);
14 | 
15 | public:
16 |   FFHandler handle;
17 |   bool profiling; // Measure the run time of the task
18 |   bool inference_debugging;
19 |   bool enable_peft_finetuning;
20 |   int decoding_step;
21 |   int bwd_step;
22 |   char op_name[MAX_OPNAME];
23 |   LayerID layer_guid;
24 |   bool trainable_inputs[MAX_NUM_INPUTS];
25 |   bool reset_input_grads[MAX_NUM_INPUTS];
26 |   DataType input_type[MAX_NUM_INPUTS];
27 |   DataType weight_type[MAX_NUM_WEIGHTS];
28 |   DataType output_type[MAX_NUM_OUTPUTS];
29 | };
30 | 
31 | }; // namespace FlexFlow
32 | 
33 | #endif //_OP_META_H
34 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/add_bias_residual_layer_norm_params.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "flexflow/ffconst.h"
 4 | #include "flexflow/fftype.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct AddBiasResidualLayerNormParams {
10 |   LayerID layer_guid;
11 |   std::vector<int> axes;
12 |   bool elementwise_affine;
13 |   float eps;
14 |   bool use_bias;
15 |   bool inplace_residual;
16 |   char name[MAX_OPNAME];
17 |   bool is_valid(
18 |       std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
19 | };
20 | 
21 | bool operator==(AddBiasResidualLayerNormParams const &,
22 |                 AddBiasResidualLayerNormParams const &);
23 | 
24 | } // namespace FlexFlow
25 | 
26 | namespace std {
27 | template <>
28 | struct hash<FlexFlow::AddBiasResidualLayerNormParams> {
29 |   size_t operator()(FlexFlow::AddBiasResidualLayerNormParams const &) const;
30 | };
31 | } // namespace std
32 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/aggregate_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_AGGREGATE_PARAMS_H
 2 | #define _FLEXFLOW_AGGREGATE_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct AggregateParams {
10 |   int n;
11 |   float lambda_bal;
12 |   char name[MAX_OPNAME];
13 |   bool is_valid(std::vector<ParallelTensorShape> const &) const;
14 | };
15 | bool operator==(AggregateParams const &, AggregateParams const &);
16 | 
17 | } // namespace FlexFlow
18 | 
19 | namespace std {
20 | template <>
21 | struct hash<FlexFlow::AggregateParams> {
22 |   size_t operator()(FlexFlow::AggregateParams const &) const;
23 | };
24 | } // namespace std
25 | 
26 | #endif // _FLEXFLOW_AGGREGATE_PARAMS_H
27 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/aggregate_spec_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_AGGREGATE_SPEC_PARAMS_H
 2 | #define _FLEXFLOW_AGGREGATE_SPEC_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct AggregateSpecParams {
10 |   int n;
11 |   float lambda_bal;
12 |   char name[MAX_OPNAME];
13 |   bool is_valid(ParallelTensorShape const &) const;
14 | };
15 | bool operator==(AggregateSpecParams const &, AggregateSpecParams const &);
16 | 
17 | } // namespace FlexFlow
18 | 
19 | namespace std {
20 | template <>
21 | struct hash<FlexFlow::AggregateSpecParams> {
22 |   size_t operator()(FlexFlow::AggregateSpecParams const &) const;
23 | };
24 | } // namespace std
25 | 
26 | #endif // _FLEXFLOW_AGGREGATE_SPEC_PARAMS_H
27 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/arg_topk_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_ARG_TOPK_PARAMS_H
 2 | #define _FLEXFLOW_ARG_TOPK_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct ArgTopKParams {
11 |   LayerID layer_guid;
12 |   int k;
13 |   bool sorted;
14 |   bool speculative_decoding;
15 |   char name[MAX_OPNAME];
16 |   bool is_valid(ParallelTensorShape const &) const;
17 | };
18 | bool operator==(ArgTopKParams const &, ArgTopKParams const &);
19 | 
20 | } // namespace FlexFlow
21 | 
22 | namespace std {
23 | template <>
24 | struct hash<FlexFlow::ArgTopKParams> {
25 |   size_t operator()(FlexFlow::ArgTopKParams const &) const;
26 | };
27 | } // namespace std
28 | 
29 | #endif // _FLEXFLOW_ARG_TOPK_PARAMS_H
30 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/argmax_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_ARGMAX_PARAMS_H
 2 | #define _FLEXFLOW_ARGMAX_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct ArgMaxParams {
10 |   bool beam_search;
11 |   bool is_valid(ParallelTensorShape const &) const;
12 |   char name[MAX_OPNAME];
13 | };
14 | bool operator==(ArgMaxParams const &, ArgMaxParams const &);
15 | 
16 | } // namespace FlexFlow
17 | 
18 | namespace std {
19 | template <>
20 | struct hash<FlexFlow::ArgMaxParams> {
21 |   size_t operator()(FlexFlow::ArgMaxParams const &) const;
22 | };
23 | } // namespace std
24 | 
25 | #endif // _FLEXFLOW_ARGMAX_PARAMS_H


--------------------------------------------------------------------------------
/include/flexflow/ops/attention_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_ATTENTION_PARAMS_H
 2 | #define _FLEXFLOW_ATTENTION_PARAMS_H
 3 | 
 4 | #include "flexflow/fftype.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct MultiHeadAttentionParams {
10 |   LayerID layer_guid;
11 |   int embed_dim, num_heads, kdim, vdim;
12 |   float dropout;
13 |   bool bias, add_bias_kv, add_zero_attn;
14 |   char name[MAX_OPNAME];
15 | 
16 |   bool is_valid(std::tuple<ParallelTensorShape,
17 |                            ParallelTensorShape,
18 |                            ParallelTensorShape> const &) const;
19 | };
20 | 
21 | bool operator==(MultiHeadAttentionParams const &,
22 |                 MultiHeadAttentionParams const &);
23 | 
24 | } // namespace FlexFlow
25 | 
26 | namespace std {
27 | template <>
28 | struct hash<FlexFlow::MultiHeadAttentionParams> {
29 |   size_t operator()(FlexFlow::MultiHeadAttentionParams const &) const;
30 | };
31 | } // namespace std
32 | 
33 | #endif // _FLEXFLOW_ATTENTION_PARAMS_H
34 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/batch_matmul_params.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "flexflow/parallel_tensor.h"
 4 | 
 5 | namespace FlexFlow {
 6 | 
 7 | struct BatchMatmulParams {
 8 |   int a_seq_length_dim, b_seq_length_dim;
 9 |   char name[MAX_OPNAME];
10 |   bool is_valid(
11 |       std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
12 | };
13 | 
14 | bool operator==(BatchMatmulParams const &, BatchMatmulParams const &);
15 | 
16 | } // namespace FlexFlow
17 | 
18 | namespace std {
19 | template <>
20 | struct hash<FlexFlow::BatchMatmulParams> {
21 |   size_t operator()(FlexFlow::BatchMatmulParams const &) const;
22 | };
23 | } // namespace std
24 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/beam_topk_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_BEAM_TOPK_PARAMS_H
 2 | #define _FLEXFLOW_BEAM_TOPK_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct BeamTopKParams {
11 |   LayerID layer_guid;
12 |   bool sorted;
13 |   int max_beam_width;
14 |   char name[MAX_OPNAME];
15 |   bool is_valid(ParallelTensorShape const &) const;
16 | };
17 | bool operator==(BeamTopKParams const &, BeamTopKParams const &);
18 | 
19 | } // namespace FlexFlow
20 | 
21 | namespace std {
22 | template <>
23 | struct hash<FlexFlow::BeamTopKParams> {
24 |   size_t operator()(FlexFlow::BeamTopKParams const &) const;
25 | };
26 | } // namespace std
27 | 
28 | #endif // _FLEXFLOW_BEAM_TOPK_PARAMS_H
29 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/cast_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_CAST_PARAMS_H
 2 | #define _FLEXFLOW_CAST_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct CastParams {
10 |   DataType dtype;
11 |   char name[MAX_OPNAME];
12 |   bool is_valid(ParallelTensorShape const &) const;
13 | };
14 | bool operator==(CastParams const &, CastParams const &);
15 | 
16 | } // namespace FlexFlow
17 | 
18 | namespace std {
19 | template <>
20 | struct hash<FlexFlow::CastParams> {
21 |   size_t operator()(FlexFlow::CastParams const &) const;
22 | };
23 | } // namespace std
24 | 
25 | #endif // _FLEXFLOW_CAST_PARAMS_H
26 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/concat_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_CONCAT_PARAMS_H
 2 | #define _FLEXFLOW_CONCAT_PARAMS_H
 3 | 
 4 | #include "flexflow/parallel_tensor.h"
 5 | 
 6 | namespace FlexFlow {
 7 | 
 8 | struct ConcatParams {
 9 |   int axis;
10 |   char name[MAX_OPNAME];
11 |   bool is_valid(std::vector<ParallelTensorShape> const &) const;
12 | };
13 | 
14 | bool operator==(ConcatParams const &, ConcatParams const &);
15 | 
16 | } // namespace FlexFlow
17 | 
18 | namespace std {
19 | template <>
20 | struct hash<FlexFlow::ConcatParams> {
21 |   size_t operator()(FlexFlow::ConcatParams const &) const;
22 | };
23 | } // namespace std
24 | 
25 | #endif // _FLEXFLOW_CONCAT_PARAMS_H
26 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/conv_2d_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_CONV_2D_PARAMS_H
 2 | #define _FLEXFLOW_CONV_2D_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct Conv2DParams {
11 |   LayerID layer_guid;
12 |   int out_channels, kernel_h, kernel_w, stride_h, stride_w, padding_h,
13 |       padding_w, groups;
14 |   ActiMode activation;
15 |   bool use_bias;
16 |   char name[MAX_OPNAME];
17 | 
18 |   bool is_valid(ParallelTensorShape const &input) const;
19 |   void solve_dims(ParallelTensorShape const &input,
20 |                   ParallelDim output_dims[MAX_TENSOR_DIM],
21 |                   int *output_ndims,
22 |                   ParallelDim kernel_dims[MAX_TENSOR_DIM],
23 |                   int *kernel_ndims,
24 |                   ParallelDim bias_dims[MAX_TENSOR_DIM],
25 |                   int *bias_ndims) const;
26 | 
27 |   friend bool operator==(Conv2DParams const &lhs, Conv2DParams const &rhs);
28 | 
29 | private:
30 |   void mark_replica_dims(ParallelTensorShape const &input,
31 |                          ParallelDim output_dims[MAX_TENSOR_DIM],
32 |                          ParallelDim kernel_dims[MAX_TENSOR_DIM],
33 |                          ParallelDim bias_dims[MAX_TENSOR_DIM]) const;
34 |   int output_size(ParallelTensorShape const &input,
35 |                   ParallelDim output_dims[MAX_TENSOR_DIM]) const;
36 |   int kernel_size(ParallelTensorShape const &input_shape,
37 |                   ParallelDim kernel_dims[MAX_TENSOR_DIM]) const;
38 |   int bias_size(ParallelTensorShape const &input,
39 |                 ParallelDim bias_dims[MAX_TENSOR_DIM]) const;
40 | };
41 | 
42 | } // namespace FlexFlow
43 | 
44 | namespace std {
45 | template <>
46 | struct hash<FlexFlow::Conv2DParams> {
47 |   size_t operator()(FlexFlow::Conv2DParams const &) const;
48 | };
49 | } // namespace std
50 | 
51 | #endif // _FLEXFLOW_CONV_2D_PARAMS_H
52 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/dropout_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_DROPOUT_PARAMS_H
 2 | #define _FLEXFLOW_DROPOUT_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct DropoutParams {
10 |   float rate;
11 |   unsigned long long seed;
12 |   char name[MAX_OPNAME];
13 |   bool is_valid(ParallelTensorShape const &) const;
14 | };
15 | bool operator==(DropoutParams const &, DropoutParams const &);
16 | 
17 | } // namespace FlexFlow
18 | 
19 | namespace std {
20 | template <>
21 | struct hash<FlexFlow::DropoutParams> {
22 |   size_t operator()(FlexFlow::DropoutParams const &) const;
23 | };
24 | } // namespace std
25 | 
26 | #endif // _FLEXFLOW_DROPOUT_PARAMS_H
27 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/element_binary_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_ELEMENT_BINARY_PARAMS_H
 2 | #define _FLEXFLOW_ELEMENT_BINARY_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct ElementBinaryParams {
11 |   LayerID layer_guid;
12 |   OperatorType type;
13 |   bool inplace_a;
14 |   char name[MAX_OPNAME];
15 | 
16 |   bool is_valid(
17 |       std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
18 | };
19 | 
20 | bool operator==(ElementBinaryParams const &, ElementBinaryParams const &);
21 | 
22 | } // namespace FlexFlow
23 | 
24 | namespace std {
25 | template <>
26 | struct hash<FlexFlow::ElementBinaryParams> {
27 |   size_t operator()(FlexFlow::ElementBinaryParams const &) const;
28 | };
29 | } // namespace std
30 | 
31 | #endif // _FLEXFLOW_ELEMENT_BINARY_PARAMS_H
32 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/element_unary_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_ELEMENTARY_UNARY_PARAMS_H
 2 | #define _FLEXFLOW_ELEMENTARY_UNARY_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct ElementUnaryParams {
11 |   OperatorType op_type;
12 |   bool inplace;
13 |   float scalar = 0.0;
14 |   LayerID layer_guid;
15 |   char name[MAX_OPNAME];
16 | 
17 |   bool is_valid(ParallelTensorShape const &) const;
18 | };
19 | 
20 | bool operator==(ElementUnaryParams const &, ElementUnaryParams const &);
21 | 
22 | } // namespace FlexFlow
23 | 
24 | namespace std {
25 | template <>
26 | struct hash<FlexFlow::ElementUnaryParams> {
27 |   size_t operator()(FlexFlow::ElementUnaryParams const &) const;
28 | };
29 | } // namespace std
30 | 
31 | #endif // _FLEXFLOW_ELEMENTARY_UNARY_PARAMS_H
32 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/embedding_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_EMBEDDING_PARAMS_H
 2 | #define _FLEXFLOW_EMBEDDING_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct EmbeddingParams {
11 |   int num_entries, out_channels;
12 |   LayerID layer_guid;
13 |   AggrMode aggr;
14 |   DataType data_type;
15 |   char name[MAX_OPNAME];
16 | 
17 |   bool is_valid(ParallelTensorShape const &) const;
18 | };
19 | bool operator==(EmbeddingParams const &, EmbeddingParams const &);
20 | 
21 | } // namespace FlexFlow
22 | 
23 | namespace std {
24 | template <>
25 | struct hash<FlexFlow::EmbeddingParams> {
26 |   size_t operator()(FlexFlow::EmbeddingParams const &) const;
27 | };
28 | } // namespace std
29 | 
30 | #endif // _FLEXFLOW_EMBEDDING_PARAMS_H
31 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/experts_params.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "flexflow/ffconst.h"
 4 | #include "flexflow/fftype.h"
 5 | #include "flexflow/operator.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct ExpertsParams {
11 |   LayerID layer_guid;
12 |   int num_experts;
13 |   int experts_start_idx;
14 |   int experts_output_dim_size;
15 |   float alpha;
16 |   int experts_num_layers;
17 |   int experts_internal_dim_size;
18 |   bool use_bias;
19 |   ActiMode activation;
20 |   char name[MAX_OPNAME];
21 | 
22 |   bool is_valid(std::vector<ParallelTensorShape> const &) const;
23 | };
24 | 
25 | bool operator==(ExpertsParams const &, ExpertsParams const &);
26 | 
27 | } // namespace FlexFlow
28 | 
29 | namespace std {
30 | template <>
31 | struct hash<FlexFlow::ExpertsParams> {
32 |   size_t operator()(FlexFlow::ExpertsParams const &) const;
33 | };
34 | } // namespace std
35 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/flat_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_FLAT_PARAMS_H
 2 | #define _FLEXFLOW_FLAT_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct FlatParams {
10 |   char name[MAX_OPNAME];
11 |   bool is_valid(ParallelTensorShape const &) const;
12 |   void solve_dims(ParallelTensorShape const &input,
13 |                   ParallelDim output_dims[MAX_TENSOR_DIM],
14 |                   int *output_ndims) const;
15 | 
16 | private:
17 |   int output_size(ParallelTensorShape const &input,
18 |                   ParallelDim output_dims[MAX_TENSOR_DIM]) const;
19 | };
20 | 
21 | bool operator==(FlatParams const &, FlatParams const &);
22 | 
23 | } // namespace FlexFlow
24 | 
25 | namespace std {
26 | template <>
27 | struct hash<FlexFlow::FlatParams> {
28 |   size_t operator()(FlexFlow::FlatParams const &) const;
29 | };
30 | } // namespace std
31 | 
32 | #endif // _FLEXFLOW_FLAT_PARAMS_H
33 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/gather_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_GATHER_PARAMS_H
 2 | #define _FLEXFLOW_GATHER_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct GatherParams {
11 |   int legion_dim;
12 |   LayerID layer_guid;
13 |   char name[MAX_OPNAME];
14 |   bool is_valid(
15 |       std::pair<ParallelTensorShape, ParallelTensorShape> const &input) const;
16 | };
17 | 
18 | bool operator==(GatherParams const &, GatherParams const &);
19 | 
20 | } // namespace FlexFlow
21 | 
22 | namespace std {
23 | template <>
24 | struct hash<FlexFlow::GatherParams> {
25 |   size_t operator()(FlexFlow::GatherParams const &) const;
26 | };
27 | } // namespace std
28 | 
29 | #endif // _FLEXFLOW_GATHER_PARAMS_H
30 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/groupby_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_GROUPBY_PARAMS_H
 2 | #define _FLEXFLOW_GROUPBY_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct Group_byParams {
10 |   int n;
11 |   float alpha;
12 |   char name[MAX_OPNAME];
13 |   bool is_valid(
14 |       std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
15 | };
16 | bool operator==(Group_byParams const &, Group_byParams const &);
17 | 
18 | } // namespace FlexFlow
19 | 
20 | namespace std {
21 | template <>
22 | struct hash<FlexFlow::Group_byParams> {
23 |   size_t operator()(FlexFlow::Group_byParams const &) const;
24 | };
25 | } // namespace std
26 | 
27 | #endif // _FLEXFLOW_GROUPBY_PARAMS_H
28 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/inc_multihead_self_attention_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_INC_MULTIHEAD_SELF_ATTENTION_PARAMS_H
 2 | #define _FLEXFLOW_INC_MULTIHEAD_SELF_ATTENTION_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/inference.h"
 7 | #include "flexflow/parallel_tensor.h"
 8 | 
 9 | namespace FlexFlow {
10 | 
11 | struct IncMultiHeadSelfAttentionParams {
12 |   LayerID layer_guid;
13 |   int embed_dim, num_q_heads, kdim, vdim, num_kv_heads,
14 |       tensor_parallelism_degree, num_kv_cache_pages;
15 |   float dropout, scaling_factor;
16 |   bool add_zero_attn, scaling_query, qk_prod_scaling, position_bias;
17 |   RotaryEmbeddingMeta rotary_embedding_meta;
18 |   DataType quantization_type;
19 |   bool offload;
20 |   char name[MAX_OPNAME];
21 |   bool is_valid(ParallelTensorShape const &) const;
22 | };
23 | 
24 | bool operator==(IncMultiHeadSelfAttentionParams const &,
25 |                 IncMultiHeadSelfAttentionParams const &);
26 | 
27 | } // namespace FlexFlow
28 | 
29 | namespace std {
30 | template <>
31 | struct hash<FlexFlow::IncMultiHeadSelfAttentionParams> {
32 |   size_t operator()(FlexFlow::IncMultiHeadSelfAttentionParams const &) const;
33 | };
34 | } // namespace std
35 | 
36 | #endif // _FLEXFLOW_INC_MULTIHEAD_SELF_ATTENTION_PARAMS_H
37 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/kernels/cast_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_CAST_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_CAST_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/op_meta.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | class Cast;
11 | 
12 | class CastMeta : public OpMeta {
13 | public:
14 |   CastMeta(FFHandler handle, Cast const *cast);
15 |   DataType input_data_type, output_data_type;
16 | };
17 | 
18 | namespace Kernels {
19 | namespace Cast {
20 | template <typename IDT, typename ODT>
21 | void forward_kernel_wrapper(CastMeta const *m,
22 |                             IDT const *input_ptr,
23 |                             ODT *output_ptr,
24 |                             size_t volume);
25 | 
26 | template <typename IDT, typename ODT>
27 | void backward_kernel_wrapper(IDT const *src_ptr, ODT *dst_ptr, size_t volume);
28 | 
29 | namespace Internal {
30 | 
31 | template <typename IDT, typename ODT>
32 | void forward_kernel(IDT const *input_ptr,
33 |                     ODT *output_ptr,
34 |                     size_t volume,
35 |                     ffStream_t stream);
36 | template <typename IDT, typename ODT>
37 | void backward_kernel(IDT const *src_ptr,
38 |                      ODT *dst_ptr,
39 |                      size_t volume,
40 |                      ffStream_t stream);
41 | } // namespace Internal
42 | } // namespace Cast
43 | } // namespace Kernels
44 | } // namespace FlexFlow
45 | 
46 | #endif // _FLEXFLOW_OPS_KERNELS_CAST_KERNELS_H
47 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/kernels/concat_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_CONCAT_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_CONCAT_KERNELS_H
 3 | 
 4 | #include "flexflow/accessor.h"
 5 | #include "flexflow/device.h"
 6 | #include "flexflow/fftype.h"
 7 | #include "flexflow/op_meta.h"
 8 | 
 9 | namespace FlexFlow {
10 | 
11 | class Concat;
12 | 
13 | class ConcatMeta : public OpMeta {
14 | public:
15 |   ConcatMeta(FFHandler handle, Concat const *cc);
16 |   int legion_axis;
17 | };
18 | 
19 | namespace Kernels {
20 | namespace Concat {
21 | 
22 | void init_meta(ConcatMeta *meta, int legion_axis);
23 | void forward_kernel_wrapper(ConcatMeta const *m,
24 |                             GenericTensorAccessorW const &output,
25 |                             GenericTensorAccessorR const *inputs,
26 |                             int num_inputs,
27 |                             int axis);
28 | void backward_kernel_wrapper(ConcatMeta const *m,
29 |                              GenericTensorAccessorR const &output_grad,
30 |                              GenericTensorAccessorW const *input_grads,
31 |                              int num_inputs,
32 |                              int axis);
33 | 
34 | namespace Internal {
35 | 
36 | void forward_kernel(GenericTensorAccessorW const &output,
37 |                     GenericTensorAccessorR const *inputs,
38 |                     int num_inputs,
39 |                     int axis,
40 |                     ffStream_t stream);
41 | 
42 | void backward_kernel(GenericTensorAccessorR const &output_grad,
43 |                      GenericTensorAccessorW const *input_grads,
44 |                      int num_inputs,
45 |                      int axis,
46 |                      ffStream_t stream);
47 | } // namespace Internal
48 | } // namespace Concat
49 | } // namespace Kernels
50 | } // namespace FlexFlow
51 | 
52 | #endif // _FLEXFLOW_OPS_KERNELS_CONCAT_KERNELS_H
53 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/kernels/decompress_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_DECOMPRESS_KERNELS_H
 2 | #define _FLEXFLOW_DECOMPRESS_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | 
 6 | namespace FlexFlow {
 7 | namespace Kernels {
 8 | 
 9 | template <typename DT>
10 | __global__ void decompress_int4_general_weights(char const *input_weight_ptr,
11 |                                                 DT *weight_ptr,
12 |                                                 int in_dim,
13 |                                                 int valueSize);
14 | template <typename DT>
15 | __global__ void decompress_int8_general_weights(char const *input_weight_ptr,
16 |                                                 DT *weight_ptr,
17 |                                                 int in_dim,
18 |                                                 int valueSize);
19 | 
20 | template <typename DT>
21 | __global__ void decompress_int4_attention_weights(char *input_weight_ptr,
22 |                                                   DT *weight_ptr,
23 |                                                   int qProjSize,
24 |                                                   int qSize,
25 |                                                   int num_heads);
26 | 
27 | template <typename DT>
28 | __global__ void decompress_int8_attention_weights(char *input_weight_ptr,
29 |                                                   DT *weight_ptr,
30 |                                                   int qProjSize,
31 |                                                   int qSize,
32 |                                                   int num_heads);
33 | // template <typename T1, typename T2>
34 | // void decompress_weight_bias(T1 *input_weight_ptr,
35 | //                             T2 *weight_ptr,
36 | //                             T2 *params,
37 | //                             int group_size,
38 | //                             int tensor_size);
39 | 
40 | } // namespace Kernels
41 | } // namespace FlexFlow
42 | 
43 | #endif // _FLEXFLOW_DECOMPRESS_KERNELS_H
44 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/kernels/dropout_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_DROPOUT_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_DROPOUT_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/op_meta.h"
 7 | #include "flexflow/ops/dropout.h"
 8 | 
 9 | namespace FlexFlow {
10 | 
11 | class DropoutMeta : public OpMeta {
12 | public:
13 |   DropoutMeta(FFHandler handle,
14 |               Dropout const *dropout,
15 |               Legion::Memory gpu_mem,
16 |               Legion::Domain const &output_domain);
17 |   ~DropoutMeta(void);
18 |   Realm::RegionInstance reserveInst;
19 | #if defined(FF_USE_CUDA) || defined(FF_USE_HIP_CUDA)
20 |   cudnnTensorDescriptor_t inputTensor, outputTensor;
21 |   cudnnDropoutDescriptor_t dropoutDesc;
22 | #else
23 |   miopenTensorDescriptor_t inputTensor, outputTensor;
24 |   miopenDropoutDescriptor_t dropoutDesc;
25 | #endif
26 |   void *reserveSpace, *dropoutStates;
27 |   size_t reserveSpaceSize, dropoutStateSize;
28 | };
29 | 
30 | namespace Kernels {
31 | namespace Dropout {
32 | void forward_kernel_wrapper(DropoutMeta *m,
33 |                             float const *input_ptr,
34 |                             float *output_ptr);
35 | void backward_kernel_wrapper(DropoutMeta *m,
36 |                              float const *output_grad_ptr,
37 |                              float *input_grad_ptr);
38 | 
39 | namespace Internal {
40 | void forward_kernel(DropoutMeta *m,
41 |                     float const *input_ptr,
42 |                     float *output_ptr,
43 |                     ffStream_t stream);
44 | void backward_kernel(DropoutMeta *m,
45 |                      float const *output_grad_ptr,
46 |                      float *input_grad_ptr,
47 |                      ffStream_t stream);
48 | } // namespace Internal
49 | } // namespace Dropout
50 | } // namespace Kernels
51 | } // namespace FlexFlow
52 | 
53 | #endif // _FLEXFLOW_OPS_KERNELS_DROPOUT_KERNELS_H
54 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/kernels/embedding_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_EMBEDDING_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_EMBEDDING_KERNELS_H
 3 | 
 4 | #include "flexflow/accessor.h"
 5 | #include "flexflow/device.h"
 6 | #include "flexflow/fftype.h"
 7 | #include "flexflow/op_meta.h"
 8 | 
 9 | namespace FlexFlow {
10 | 
11 | class EmbeddingMeta : public OpMeta {
12 | public:
13 |   EmbeddingMeta(FFHandler handle, Op const *op);
14 |   DataType input_data_type;
15 |   AggrMode aggr;
16 | };
17 | 
18 | namespace Kernels {
19 | namespace Embedding {
20 | void forward_kernel_wrapper(EmbeddingMeta const *m,
21 |                             GenericTensorAccessorR const &input,
22 |                             GenericTensorAccessorW const &output,
23 |                             GenericTensorAccessorR const &weight,
24 |                             int in_dim,
25 |                             int out_dim,
26 |                             int batch_size);
27 | void backward_kernel_wrapper(EmbeddingMeta const *m,
28 |                              GenericTensorAccessorR const &input,
29 |                              GenericTensorAccessorR const &output,
30 |                              GenericTensorAccessorW const &weight_grad,
31 |                              int in_dim,
32 |                              int out_dim,
33 |                              int batch_size);
34 | 
35 | namespace Internal {
36 | template <typename TI, typename TD>
37 | void forward_kernel(TI const *input_ptr,
38 |                     TD *output_ptr,
39 |                     TD const *weight_ptr,
40 |                     int in_dim,
41 |                     int out_dim,
42 |                     int batch_size,
43 |                     AggrMode aggr,
44 |                     int outputSize,
45 |                     ffStream_t stream);
46 | 
47 | ;
48 | } // namespace Internal
49 | } // namespace Embedding
50 | } // namespace Kernels
51 | } // namespace FlexFlow
52 | 
53 | #endif // _FLEXFLOW_OPS_KERNELS_EMBEDDING_KERNELS_H


--------------------------------------------------------------------------------
/include/flexflow/ops/kernels/flat_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_FLAT_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_FLAT_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/op_meta.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | class Flat;
11 | 
12 | class FlatMeta : public OpMeta {
13 | public:
14 |   FlatMeta(FFHandler handle, Flat const *flat);
15 | };
16 | 
17 | namespace Kernels {
18 | namespace Flat {
19 | 
20 | void forward_kernel_wrapper(float const *input_ptr,
21 |                             float *output_ptr,
22 |                             size_t num_elements);
23 | void backward_kernel_wrapper(float *input_grad_ptr,
24 |                              float const *output_grad_ptr,
25 |                              size_t num_elements);
26 | 
27 | namespace Internal {
28 | 
29 | void forward_kernel(float const *input_ptr,
30 |                     float *output_ptr,
31 |                     size_t num_elements,
32 |                     ffStream_t stream);
33 | void backward_kernel(float *input_grad_ptr,
34 |                      float const *output_grad_ptr,
35 |                      size_t num_elements,
36 |                      ffStream_t stream);
37 | 
38 | } // namespace Internal
39 | } // namespace Flat
40 | } // namespace Kernels
41 | } // namespace FlexFlow
42 | 
43 | #endif // _FLEXFLOW_OPS_KERNELS_FLAT_KERNELS_H
44 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/kernels/gather_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_GATHER_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_GATHER_KERNELS_H
 3 | 
 4 | #include "flexflow/accessor.h"
 5 | #include "flexflow/device.h"
 6 | #include "flexflow/fftype.h"
 7 | #include "flexflow/op_meta.h"
 8 | 
 9 | namespace FlexFlow {
10 | 
11 | class Gather;
12 | 
13 | class GatherMeta : public OpMeta {
14 | public:
15 |   GatherMeta(FFHandler handler, Gather const *gather);
16 | 
17 | public:
18 |   int legion_dim;
19 | };
20 | 
21 | namespace Kernels {
22 | namespace Gather {
23 | void forward_kernel_wrapper(GatherMeta const *m,
24 |                             GenericTensorAccessorR const &input,
25 |                             GenericTensorAccessorR const &index,
26 |                             GenericTensorAccessorW const &output);
27 | void backward_kernel_wrapper(GatherMeta const *m,
28 |                              GenericTensorAccessorR const &output_grad,
29 |                              GenericTensorAccessorR const &index,
30 |                              GenericTensorAccessorW const &input_grad);
31 | namespace Internal {
32 | template <typename IndexType>
33 | void forward_kernel(float const *input_ptr,
34 |                     IndexType const *index_ptr,
35 |                     float *output_ptr,
36 |                     Legion::coord_t output_size,
37 |                     Legion::coord_t stride,
38 |                     Legion::coord_t input_dim_size,
39 |                     Legion::coord_t output_dim_size,
40 |                     ffStream_t stream);
41 | template <typename IndexType>
42 | void backward_kernel(float const *output_grad_ptr,
43 |                      IndexType const *index_ptr,
44 |                      float *input_grad_ptr,
45 |                      Legion::coord_t output_size,
46 |                      Legion::coord_t stride,
47 |                      Legion::coord_t input_dim_size,
48 |                      Legion::coord_t output_dim_size,
49 |                      ffStream_t stream);
50 | } // namespace Internal
51 | } // namespace Gather
52 | } // namespace Kernels
53 | } // namespace FlexFlow
54 | 
55 | #endif // _FLEXFLOW_OPS_KERNELS_GATHER_KERNELS_H
56 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/kernels/reshape_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_RESHAPE_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_RESHAPE_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/op_meta.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | class Reshape;
11 | 
12 | class ReshapeMeta : public OpMeta {
13 | public:
14 |   ReshapeMeta(FFHandler handler, Reshape const *reshape);
15 |   DataType data_type;
16 | };
17 | 
18 | namespace Kernels {
19 | namespace Reshape {
20 | 
21 | template <typename T>
22 | void forward_kernel_wrapper(T const *input_ptr,
23 |                             T *output_ptr,
24 |                             size_t num_elements);
25 | 
26 | template <typename T>
27 | void backward_kernel_wrapper(T *input_grad_ptr,
28 |                              T const *output_grad_ptr,
29 |                              size_t num_elements);
30 | 
31 | namespace Internal {
32 | 
33 | template <typename T>
34 | void forward_kernel(T const *input_ptr,
35 |                     T *output_ptr,
36 |                     size_t num_elements,
37 |                     ffStream_t stream);
38 | template <typename T>
39 | void backward_kernel(T *input_grad_ptr,
40 |                      T const *output_grad_ptr,
41 |                      size_t num_elements,
42 |                      ffStream_t stream);
43 | 
44 | } // namespace Internal
45 | } // namespace Reshape
46 | } // namespace Kernels
47 | } // namespace FlexFlow
48 | 
49 | #endif // _FLEXFLOW_OPS_KERNELS_RESHAPE_KERNELS_H
50 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/kernels/split_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_SPLIT_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_SPLIT_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/op_meta.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | namespace Kernels {
11 | namespace Split {
12 | void forward_kernel_wrapper(float **out_ptrs,
13 |                             float const *in_ptr,
14 |                             Legion::coord_t const *out_blk_sizes,
15 |                             Legion::coord_t in_blk_size,
16 |                             Legion::coord_t num_blks,
17 |                             int numOutputs);
18 | 
19 | void backward_kernel_wrapper(float *in_grad_ptr,
20 |                              float const **out_grad_ptr,
21 |                              Legion::coord_t const *out_blk_sizes,
22 |                              Legion::coord_t in_blk_size,
23 |                              Legion::coord_t num_blks,
24 |                              int numOutputs);
25 | 
26 | namespace Internal {
27 | void forward_kernel(float **out_ptrs,
28 |                     float const *in_ptr,
29 |                     Legion::coord_t const *out_blk_sizes,
30 |                     Legion::coord_t in_blk_size,
31 |                     Legion::coord_t num_blks,
32 |                     int numOutputs,
33 |                     ffStream_t stream);
34 | void backward_kernel(float *in_grad_ptr,
35 |                      float const **out_grad_ptr,
36 |                      Legion::coord_t const *out_blk_sizes,
37 |                      Legion::coord_t in_blk_size,
38 |                      Legion::coord_t num_blks,
39 |                      int numOutputs,
40 |                      ffStream_t stream);
41 | } // namespace Internal
42 | } // namespace Split
43 | } // namespace Kernels
44 | } // namespace FlexFlow
45 | 
46 | #endif // _FLEXFLOW_OPS_KERNELS_SPLIT_KERNELS_H
47 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/kernels/transpose_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_TRANSPOSE_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_TRANSPOSE_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/op_meta.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | class Transpose;
11 | 
12 | class TransposeMeta : public OpMeta {
13 | public:
14 |   TransposeMeta(FFHandler handler, Transpose const *transpose);
15 |   int num_dim;
16 |   int perm[MAX_TENSOR_DIM];
17 | };
18 | 
19 | namespace Kernels {
20 | namespace Transpose {
21 | 
22 | void forward_kernel_wrapper(TransposeMeta const *m,
23 |                             float const *input_ptr,
24 |                             float *output_ptr,
25 |                             Legion::Domain in_domain,
26 |                             Legion::Domain out_domain);
27 | void backward_kernel_wrapper(TransposeMeta const *m,
28 |                              float *input_grad_ptr,
29 |                              float const *output_grad_ptr,
30 |                              Legion::Domain in_grad_domain,
31 |                              Legion::Domain out_grad_domain);
32 | 
33 | namespace Internal {
34 | 
35 | void forward_kernel(TransposeMeta const *m,
36 |                     float const *input_ptr,
37 |                     float *output_ptr,
38 |                     Legion::Domain in_domain,
39 |                     Legion::Domain out_domain,
40 |                     ffStream_t stream);
41 | void backward_kernel(TransposeMeta const *m,
42 |                      float *input_grad_ptr,
43 |                      float const *output_grad_ptr,
44 |                      Legion::Domain in_grad_domain,
45 |                      Legion::Domain out_grad_domain,
46 |                      ffStream_t stream);
47 | 
48 | } // namespace Internal
49 | } // namespace Transpose
50 | } // namespace Kernels
51 | } // namespace FlexFlow
52 | 
53 | #endif // _FLEXFLOW_OPS_KERNELS_TRANSPOSE_KERNELS_H
54 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/layer_norm_params.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "flexflow/ffconst.h"
 4 | #include "flexflow/fftype.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct LayerNormParams {
10 |   LayerID layer_guid;
11 |   std::vector<int> axes;
12 |   bool elementwise_affine;
13 |   float eps;
14 |   bool use_bias;
15 |   char name[MAX_OPNAME];
16 |   bool is_valid(ParallelTensorShape const &) const;
17 | };
18 | 
19 | bool operator==(LayerNormParams const &, LayerNormParams const &);
20 | 
21 | } // namespace FlexFlow
22 | 
23 | namespace std {
24 | template <>
25 | struct hash<FlexFlow::LayerNormParams> {
26 |   size_t operator()(FlexFlow::LayerNormParams const &) const;
27 | };
28 | } // namespace std
29 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/mean.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "flexflow/model.h"
 4 | 
 5 | namespace FlexFlow {
 6 | 
 7 | class Mean : public Op {
 8 | public:
 9 |   Mean(FFModel &model,
10 |        const ParallelTensor input,
11 |        std::vector<int> const &dims,
12 |        bool keepdims,
13 |        char const *name);
14 |   void init(FFModel const &) override;
15 |   void forward(FFModel const &) override;
16 |   void backward(FFModel const &) override;
17 |   void print_layer(FFModel const &model) override {
18 |     assert(0);
19 |   }
20 | 
21 |   static OpMeta *init_task(Legion::Task const *task,
22 |                            std::vector<Legion::PhysicalRegion> const &regions,
23 |                            Legion::Context ctx,
24 |                            Legion::Runtime *runtime);
25 |   static void forward_task(Legion::Task const *task,
26 |                            std::vector<Legion::PhysicalRegion> const &regions,
27 |                            Legion::Context ctx,
28 |                            Legion::Runtime *runtime);
29 |   static void backward_task(Legion::Task const *task,
30 |                             std::vector<Legion::PhysicalRegion> const &regions,
31 |                             Legion::Context ctx,
32 |                             Legion::Runtime *runtime);
33 |   bool measure_operator_cost(Simulator *sim,
34 |                              MachineView const &pc,
35 |                              CostMetrics &cost_metrics) const override;
36 | };
37 | 
38 | }; // namespace FlexFlow
39 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/noop.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_NOOP_H
 2 | #define _FLEXFLOW_NOOP_H
 3 | 
 4 | #include "flexflow/inference.h"
 5 | #include "flexflow/model.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | class NoOp : public Op {
10 | public:
11 |   NoOp(FFModel &model,
12 |        OperatorType type,
13 |        const ParallelTensor output,
14 |        char const *name = NULL);
15 |   NoOp(FFModel &model,
16 |        OperatorType type,
17 |        size_t input_tensor_guid,
18 |        const ParallelTensor output,
19 |        char const *name = NULL);
20 |   void init(FFModel const &) override;
21 |   void init_inference(FFModel const &,
22 |                       std::vector<ParallelTensor> const &,
23 |                       std::vector<ParallelTensor> const &,
24 |                       MachineView const *mv = nullptr) override;
25 |   void forward(FFModel const &) override;
26 |   Legion::FutureMap inference(FFModel const &,
27 |                               BatchConfigFuture const &,
28 |                               std::vector<ParallelTensor> const &,
29 |                               std::vector<ParallelTensor> const &,
30 |                               MachineView const *mv = nullptr) override;
31 |   void backward(FFModel const &) override;
32 |   void print_layer(FFModel const &model) override {
33 |     assert(0);
34 |   }
35 |   bool measure_operator_cost(Simulator *sim,
36 |                              MachineView const &pc,
37 |                              CostMetrics &cost_metrics) const override;
38 |   static OpMeta *init_task(Legion::Task const *task,
39 |                            std::vector<Legion::PhysicalRegion> const &regions,
40 |                            Legion::Context ctx,
41 |                            Legion::Runtime *runtime);
42 | 
43 |   size_t get_params_hash() const override;
44 |   tl::optional<RecordFormatter> as_dot() const override;
45 | 
46 | public:
47 |   size_t input_tensor_guid;
48 | };
49 | 
50 | }; // namespace FlexFlow
51 | 
52 | #endif // _FLEXFLOW_NOOP_H
53 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/pool_2d_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_POOL_2D_PARAMS_H
 2 | #define _FLEXFLOW_POOL_2D_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct Pool2DParams {
10 |   int kernel_h, kernel_w, stride_h, stride_w, padding_h, padding_w;
11 |   PoolType pool_type;
12 |   ActiMode activation;
13 |   char name[MAX_OPNAME];
14 | 
15 |   bool is_valid(ParallelTensorShape const &input) const;
16 |   void solve_dims(ParallelTensorShape const &input,
17 |                   ParallelDim output_dims[MAX_TENSOR_DIM],
18 |                   int *output_ndims) const;
19 | 
20 | private:
21 |   int output_size(ParallelTensorShape const &input,
22 |                   ParallelDim output_dims[MAX_TENSOR_DIM]) const;
23 | };
24 | 
25 | bool operator==(Pool2DParams const &, Pool2DParams const &);
26 | 
27 | } // namespace FlexFlow
28 | 
29 | namespace std {
30 | template <>
31 | struct hash<FlexFlow::Pool2DParams> {
32 |   size_t operator()(FlexFlow::Pool2DParams const &) const;
33 | };
34 | } // namespace std
35 | 
36 | #endif // _FLEXFLOW_POOL_2D_PARAMS_H
37 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/reduce_params.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "flexflow/ffconst.h"
 4 | #include "flexflow/fftype.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct ReduceParams {
10 |   std::vector<int> axes;
11 |   bool keepdims;
12 |   LayerID layer_guid;
13 |   char name[MAX_OPNAME];
14 | 
15 |   bool is_valid(ParallelTensorShape const &) const;
16 | };
17 | 
18 | bool operator==(ReduceParams const &, ReduceParams const &);
19 | 
20 | } // namespace FlexFlow
21 | 
22 | namespace std {
23 | template <>
24 | struct hash<FlexFlow::ReduceParams> {
25 |   size_t operator()(FlexFlow::ReduceParams const &) const;
26 | };
27 | } // namespace std
28 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/reshape_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_RESHAPE_PARAMS_H
 2 | #define _FLEXFLOW_RESHAPE_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct ReshapeParams {
11 |   std::vector<int> shape;
12 |   LayerID layer_guid;
13 |   char name[MAX_OPNAME];
14 | 
15 |   bool is_valid(ParallelTensorShape const &) const;
16 | };
17 | bool operator==(ReshapeParams const &, ReshapeParams const &);
18 | 
19 | } // namespace FlexFlow
20 | 
21 | namespace std {
22 | template <>
23 | struct hash<FlexFlow::ReshapeParams> {
24 |   size_t operator()(FlexFlow::ReshapeParams const &) const;
25 | };
26 | } // namespace std
27 | 
28 | #endif // _FLEXFLOW_RESHAPE_PARAMS_H
29 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/residual_layer_norm_params.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "flexflow/ffconst.h"
 4 | #include "flexflow/fftype.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct ResidualLayerNormParams {
10 |   LayerID layer_guid;
11 |   std::vector<int> axes;
12 |   bool elementwise_affine;
13 |   float eps;
14 |   bool use_bias;
15 |   bool use_two_residuals;
16 |   bool inplace_residual;
17 |   char name[MAX_OPNAME];
18 |   bool is_valid(std::tuple<ParallelTensorShape,
19 |                            ParallelTensorShape,
20 |                            ParallelTensorShape> const &) const;
21 | };
22 | 
23 | bool operator==(ResidualLayerNormParams const &,
24 |                 ResidualLayerNormParams const &);
25 | 
26 | } // namespace FlexFlow
27 | 
28 | namespace std {
29 | template <>
30 | struct hash<FlexFlow::ResidualLayerNormParams> {
31 |   size_t operator()(FlexFlow::ResidualLayerNormParams const &) const;
32 | };
33 | } // namespace std
34 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/residual_rms_norm_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_RESIDUAL_RMSNORM_PARAMS_H
 2 | #define _FLEXFLOW_RESIDUAL_RMSNORM_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct ResidualRMSNormParams {
11 |   LayerID layer_guid;
12 |   float eps;
13 |   int dim;
14 |   bool inplace_residual;
15 |   char name[MAX_OPNAME];
16 |   bool is_valid(
17 |       std::pair<ParallelTensorShape, ParallelTensorShape> const &input) const;
18 | };
19 | 
20 | bool operator==(ResidualRMSNormParams const &, ResidualRMSNormParams const &);
21 | 
22 | } // namespace FlexFlow
23 | 
24 | namespace std {
25 | template <>
26 | struct hash<FlexFlow::ResidualRMSNormParams> {
27 |   size_t operator()(FlexFlow::ResidualRMSNormParams const &) const;
28 | };
29 | } // namespace std
30 | 
31 | #endif // _FLEXFLOW_RESIDUAL_RMSNORM_PARAMS_H


--------------------------------------------------------------------------------
/include/flexflow/ops/rms_norm_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_RMSNORM_PARAMS_H
 2 | #define _FLEXFLOW_RMSNORM_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct RMSNormParams {
11 |   LayerID layer_guid;
12 |   float eps;
13 |   int dim;
14 |   char name[MAX_OPNAME];
15 |   bool is_valid(ParallelTensorShape const &) const;
16 | };
17 | 
18 | bool operator==(RMSNormParams const &, RMSNormParams const &);
19 | 
20 | } // namespace FlexFlow
21 | 
22 | namespace std {
23 | template <>
24 | struct hash<FlexFlow::RMSNormParams> {
25 |   size_t operator()(FlexFlow::RMSNormParams const &) const;
26 | };
27 | } // namespace std
28 | 
29 | #endif // _FLEXFLOW_RMSNORM_PARAMS_H


--------------------------------------------------------------------------------
/include/flexflow/ops/sampling_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_SAMPLING_PARAMS_H
 2 | #define _FLEXFLOW_SAMPLING_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct SamplingParams {
10 |   float top_p;
11 |   char name[MAX_OPNAME];
12 |   bool is_valid(ParallelTensorShape const &) const;
13 | };
14 | bool operator==(SamplingParams const &, SamplingParams const &);
15 | 
16 | } // namespace FlexFlow
17 | 
18 | namespace std {
19 | template <>
20 | struct hash<FlexFlow::SamplingParams> {
21 |   size_t operator()(FlexFlow::SamplingParams const &) const;
22 | };
23 | } // namespace std
24 | 
25 | #endif // _FLEXFLOW_SAMPLING_PARAMS_H


--------------------------------------------------------------------------------
/include/flexflow/ops/sigmoid_silu_multi_params.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "flexflow/ffconst.h"
 4 | #include "flexflow/fftype.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct SigmoidSiluMultiParams {
10 |   LayerID layer_guid;
11 |   char name[MAX_OPNAME];
12 |   bool is_valid(
13 |       std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
14 | };
15 | 
16 | bool operator==(SigmoidSiluMultiParams const &, SigmoidSiluMultiParams const &);
17 | 
18 | } // namespace FlexFlow
19 | 
20 | namespace std {
21 | template <>
22 | struct hash<FlexFlow::SigmoidSiluMultiParams> {
23 |   size_t operator()(FlexFlow::SigmoidSiluMultiParams const &) const;
24 | };
25 | } // namespace std
26 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/softmax_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_SOFTMAX_PARAMS_H
 2 | #define _FLEXFLOW_SOFTMAX_PARAMS_H
 3 | 
 4 | #include "flexflow/parallel_tensor.h"
 5 | 
 6 | namespace FlexFlow {
 7 | 
 8 | struct SoftmaxParams {
 9 |   LayerID layer_guid;
10 |   int dim;
11 |   char name[MAX_OPNAME];
12 |   bool is_valid(ParallelTensorShape const &) const;
13 | };
14 | bool operator==(SoftmaxParams const &, SoftmaxParams const &);
15 | 
16 | } // namespace FlexFlow
17 | 
18 | namespace std {
19 | template <>
20 | struct hash<FlexFlow::SoftmaxParams> {
21 |   size_t operator()(FlexFlow::SoftmaxParams const &) const;
22 | };
23 | } // namespace std
24 | 
25 | #endif // _FLEXFLOW_SOFTMAX_PARAMS_H
26 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/spec_inc_multihead_self_attention_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_SPEC_INC_MULTIHEAD_SELF_ATTENTION_PARAMS_H
 2 | #define _FLEXFLOW_SPEC_INC_MULTIHEAD_SELF_ATTENTION_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct SpecIncMultiHeadSelfAttentionParams {
11 |   LayerID layer_guid;
12 |   int embed_dim, num_q_heads, num_kv_heads, kdim, vdim;
13 |   float dropout, scaling_factor;
14 |   bool add_zero_attn, scaling_query, qk_prod_scaling, position_bias;
15 |   int num_kv_cache_pages;
16 |   RotaryEmbeddingMeta rotary_embedding_meta;
17 |   char name[MAX_OPNAME];
18 |   bool is_valid(ParallelTensorShape const &) const;
19 | };
20 | 
21 | bool operator==(SpecIncMultiHeadSelfAttentionParams const &,
22 |                 SpecIncMultiHeadSelfAttentionParams const &);
23 | 
24 | } // namespace FlexFlow
25 | 
26 | namespace std {
27 | template <>
28 | struct hash<FlexFlow::SpecIncMultiHeadSelfAttentionParams> {
29 |   size_t
30 |       operator()(FlexFlow::SpecIncMultiHeadSelfAttentionParams const &) const;
31 | };
32 | } // namespace std
33 | 
34 | #endif // _FLEXFLOW_SPEC_INC_MULTIHEAD_SELF_ATTENTION_PARAMS_H
35 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/split_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_SPLIT_PARAMS_H
 2 | #define _FLEXFLOW_SPLIT_PARAMS_H
 3 | 
 4 | #include "flexflow/parallel_tensor.h"
 5 | 
 6 | namespace FlexFlow {
 7 | 
 8 | struct SplitParams {
 9 |   std::vector<int> splits;
10 |   int legion_axis;
11 |   char name[MAX_OPNAME];
12 |   bool is_valid(ParallelTensorShape const &) const;
13 | };
14 | 
15 | bool operator==(SplitParams const &, SplitParams const &);
16 | 
17 | } // namespace FlexFlow
18 | 
19 | namespace std {
20 | template <>
21 | struct hash<FlexFlow::SplitParams> {
22 |   size_t operator()(FlexFlow::SplitParams const &) const;
23 | };
24 | } // namespace std
25 | 
26 | #endif // _FLEXFLOW_SPLIT_PARAMS_H
27 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/topk_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_TOPK_PARAMS_H
 2 | #define _FLEXFLOW_TOPK_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/parallel_tensor.h"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct TopKParams {
10 |   int k;
11 |   bool sorted;
12 |   char name[MAX_OPNAME];
13 |   bool is_valid(ParallelTensorShape const &) const;
14 | };
15 | bool operator==(TopKParams const &, TopKParams const &);
16 | 
17 | } // namespace FlexFlow
18 | 
19 | namespace std {
20 | template <>
21 | struct hash<FlexFlow::TopKParams> {
22 |   size_t operator()(FlexFlow::TopKParams const &) const;
23 | };
24 | } // namespace std
25 | 
26 | #endif // _FLEXFLOW_TOPK_PARAMS_H
27 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/transpose_params.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "flexflow/parallel_tensor.h"
 4 | 
 5 | namespace FlexFlow {
 6 | 
 7 | struct TransposeParams {
 8 |   std::vector<int> perm;
 9 |   char name[MAX_OPNAME];
10 |   bool is_valid(ParallelTensorShape const &) const;
11 | };
12 | 
13 | bool operator==(TransposeParams const &, TransposeParams const &);
14 | 
15 | } // namespace FlexFlow
16 | 
17 | namespace std {
18 | template <>
19 | struct hash<FlexFlow::TransposeParams> {
20 |   size_t operator()(FlexFlow::TransposeParams const &) const;
21 | };
22 | } // namespace std
23 | 


--------------------------------------------------------------------------------
/include/flexflow/ops/tree_inc_multihead_self_attention_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_INC_MULTIHEAD_SELF_ATTENTION_VERIFY_PARAMS_H
 2 | #define _FLEXFLOW_INC_MULTIHEAD_SELF_ATTENTION_VERIFY_PARAMS_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/parallel_tensor.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | struct TreeIncMultiHeadSelfAttentionParams {
11 |   LayerID layer_guid;
12 |   int embed_dim, num_q_heads, kdim, vdim, num_kv_heads,
13 |       tensor_parallelism_degree, num_kv_cache_pages;
14 |   float dropout, scaling_factor;
15 |   bool add_zero_attn, scaling_query, qk_prod_scaling, position_bias;
16 |   RotaryEmbeddingMeta rotary_embedding_meta;
17 |   DataType quantization_type;
18 |   bool offload;
19 |   char name[MAX_OPNAME];
20 |   bool is_valid(ParallelTensorShape const &) const;
21 | };
22 | 
23 | bool operator==(TreeIncMultiHeadSelfAttentionParams const &,
24 |                 TreeIncMultiHeadSelfAttentionParams const &);
25 | 
26 | } // namespace FlexFlow
27 | 
28 | namespace std {
29 | template <>
30 | struct hash<FlexFlow::TreeIncMultiHeadSelfAttentionParams> {
31 |   size_t
32 |       operator()(FlexFlow::TreeIncMultiHeadSelfAttentionParams const &) const;
33 | };
34 | } // namespace std
35 | 
36 | #endif // _FLEXFLOW_INC_MULTIHEAD_SELF_ATTENTION_VERIFY_PARAMS_H
37 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/allreduce_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_ALLREDUCE_PARAMS_H
 2 | #define _FLEXFLOW_ALLREDUCE_PARAMS_H
 3 | 
 4 | namespace FlexFlow {
 5 | 
 6 | struct AllReduceParams {
 7 |   LayerID layer_guid;
 8 |   int allreduce_legion_dim;
 9 |   char name[MAX_OPNAME];
10 |   bool is_valid(ParallelTensorShape const &) const;
11 | };
12 | bool operator==(AllReduceParams const &, AllReduceParams const &);
13 | 
14 | } // namespace FlexFlow
15 | 
16 | namespace std {
17 | template <>
18 | struct hash<FlexFlow::AllReduceParams> {
19 |   size_t operator()(FlexFlow::AllReduceParams const &) const;
20 | };
21 | } // namespace std
22 | 
23 | #endif // _FLEXFLOW_ALLREDUCE_PARAMS_H
24 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/combine_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_COMBINE_PARAMS_H
 2 | #define _FLEXFLOW_COMBINE_PARAMS_H
 3 | 
 4 | namespace FlexFlow {
 5 | 
 6 | struct CombineParams {
 7 |   int combine_legion_dim;
 8 |   int combine_degree;
 9 |   char name[MAX_OPNAME];
10 |   bool is_valid(ParallelTensorShape const &) const;
11 | };
12 | bool operator==(CombineParams const &, CombineParams const &);
13 | 
14 | } // namespace FlexFlow
15 | 
16 | namespace std {
17 | template <>
18 | struct hash<FlexFlow::CombineParams> {
19 |   size_t operator()(FlexFlow::CombineParams const &) const;
20 | };
21 | } // namespace std
22 | 
23 | #endif // _FLEXFLOW_COMBINE_PARAMS_H
24 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/fused_parallel_op_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_FUSED_PARALLEL_OP_PARAMS_H
 2 | #define _FLEXFLOW_FUSED_PARALLEL_OP_PARAMS_H
 3 | 
 4 | #include "parallel_op_info.h"
 5 | 
 6 | namespace FlexFlow {
 7 | 
 8 | struct FusedParallelOpParams {
 9 |   std::vector<ParallelOpInfo> parallel_ops;
10 |   char name[MAX_OPNAME];
11 |   bool is_valid(ParallelTensorShape const &) const;
12 | };
13 | bool operator==(FusedParallelOpParams const &, FusedParallelOpParams const &);
14 | 
15 | } // namespace FlexFlow
16 | 
17 | namespace std {
18 | template <>
19 | struct hash<FlexFlow::FusedParallelOpParams> {
20 |   size_t operator()(FlexFlow::FusedParallelOpParams const &) const;
21 | };
22 | } // namespace std
23 | 
24 | #endif // _FLEXFLOW_FUSED_PARALLEL_OP_PARAMS_H
25 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/kernels/allreduce_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_ALLREDUCE_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_ALLREDUCE_KERNELS_H
 3 | 
 4 | #include "flexflow/batch_config.h"
 5 | #include "flexflow/device.h"
 6 | #include "flexflow/fftype.h"
 7 | #include "flexflow/op_meta.h"
 8 | #include "flexflow/parallel_ops/allreduce.h"
 9 | 
10 | namespace FlexFlow {
11 | 
12 | class AllReduceMeta : public OpMeta {
13 | public:
14 |   AllReduceMeta(FFHandler handle, AllReduce const *reduct);
15 | };
16 | 
17 | namespace Kernels {
18 | namespace AllReduce {
19 | 
20 | void forward_kernel_wrapper(AllReduceMeta const *m,
21 |                             GenericTensorAccessorR const &input,
22 |                             GenericTensorAccessorW const &output);
23 | 
24 | void backward_kernel_wrapper(AllReduceMeta const *m,
25 |                              GenericTensorAccessorW const &input_grad,
26 |                              GenericTensorAccessorR const &output_grad);
27 | 
28 | void inference_kernel_wrapper(AllReduceMeta const *m,
29 |                               BatchConfig const *bc,
30 |                               GenericTensorAccessorR const &input,
31 |                               GenericTensorAccessorW const &output);
32 | 
33 | void peft_bwd_kernel_wrapper(AllReduceMeta const *m,
34 |                              BatchConfig const *bc,
35 |                              GenericTensorAccessorW const &input_grad,
36 |                              GenericTensorAccessorR const &output_grad);
37 | } // namespace AllReduce
38 | } // namespace Kernels
39 | } // namespace FlexFlow
40 | 
41 | #endif // _FLEXFLOW_OPS_KERNELS_ALLREDUCE_KERNELS_H
42 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/kernels/combine_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/op_meta.h"
 7 | #include "flexflow/parallel_ops/combine.h"
 8 | 
 9 | namespace FlexFlow {
10 | 
11 | class Combine;
12 | 
13 | class CombineMeta : public OpMeta {
14 | public:
15 |   CombineMeta(FFHandler handle, Combine const *comb);
16 |   DataType data_type;
17 | };
18 | 
19 | namespace Kernels {
20 | namespace Combine {
21 | 
22 | template <typename T>
23 | void forward_kernel(T const *input_ptr, T *output_ptr, size_t num_elements);
24 | 
25 | template <typename T>
26 | void backward_kernel(T const *output_grad_ptr,
27 |                      T *input_grad_ptr,
28 |                      size_t num_elements);
29 | 
30 | } // namespace Combine
31 | } // namespace Kernels
32 | } // namespace FlexFlow
33 | 
34 | #endif // _FLEXFLOW_OPS_KERNELS_COMBINE_KERNELS_H
35 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/kernels/parallel_identity_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_PARALLEL_IDENTITY_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_PARALLEL_IDENTITY_KERNELS_H
 3 | 
 4 | #include "flexflow/batch_config.h"
 5 | #include "flexflow/device.h"
 6 | #include "flexflow/fftype.h"
 7 | #include "flexflow/op_meta.h"
 8 | #include "flexflow/parallel_ops/parallel_identity.h"
 9 | 
10 | namespace FlexFlow {
11 | 
12 | class ParallelIdentityMeta : public OpMeta {
13 | public:
14 |   ParallelIdentityMeta(FFHandler handle, ParallelIdentity const *reduct);
15 | };
16 | 
17 | namespace Kernels {
18 | namespace ParallelIdentity {
19 | 
20 | void forward_kernel_wrapper(ParallelIdentityMeta const *m,
21 |                             GenericTensorAccessorR const &input,
22 |                             GenericTensorAccessorW const &output);
23 | 
24 | void backward_kernel_wrapper(ParallelIdentityMeta const *m,
25 |                              GenericTensorAccessorW const &input_grad,
26 |                              GenericTensorAccessorR const &output_grad);
27 | 
28 | void inference_kernel_wrapper(ParallelIdentityMeta const *m,
29 |                               BatchConfig const *bc,
30 |                               GenericTensorAccessorR const &input,
31 |                               GenericTensorAccessorW const &output);
32 | 
33 | void peft_bwd_kernel_wrapper(ParallelIdentityMeta const *m,
34 |                              BatchConfig const *bc,
35 |                              GenericTensorAccessorW const &input_grad,
36 |                              GenericTensorAccessorR const &output_grad);
37 | } // namespace ParallelIdentity
38 | } // namespace Kernels
39 | } // namespace FlexFlow
40 | 
41 | #endif // _FLEXFLOW_OPS_KERNELS_PARALLEL_IDENTITY_KERNELS_H
42 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/kernels/partition_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_PARTITION_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_PARTITION_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/op_meta.h"
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | class Repartition;
11 | 
12 | class RepartitionMeta : public OpMeta {
13 | public:
14 |   RepartitionMeta(FFHandler handle, Repartition const *repart);
15 |   DataType data_type;
16 | };
17 | 
18 | namespace Kernels {
19 | namespace Repartition {
20 | 
21 | template <typename T>
22 | void forward_kernel(T const *input_ptr, T *output_ptr, size_t num_elements);
23 | 
24 | template <typename T>
25 | void backward_kernel(T const *output_grad_ptr,
26 |                      T *input_grad_ptr,
27 |                      size_t num_elements);
28 | 
29 | } // namespace Repartition
30 | } // namespace Kernels
31 | } // namespace FlexFlow
32 | 
33 | #endif // _FLEXFLOW_OPS_KERNELS_PARTITION_KERNELS_H
34 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/kernels/reduction_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_REDUCTION_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_REDUCTION_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/op_meta.h"
 7 | #include "flexflow/parallel_ops/reduction.h"
 8 | 
 9 | namespace FlexFlow {
10 | 
11 | class ReductionMeta : public OpMeta {
12 | public:
13 |   ReductionMeta(FFHandler handle, Reduction const *reduct);
14 | };
15 | 
16 | namespace Kernels {
17 | namespace Reduction {
18 | 
19 | template <typename T>
20 | void forward_kernel(T const *input_ptr,
21 |                     T *output_ptr,
22 |                     size_t num_elements,
23 |                     size_t num_replicas);
24 | 
25 | template <typename T>
26 | void backward_kernel(T const *output_grad_ptr,
27 |                      T *input_grad_ptr,
28 |                      size_t num_elements);
29 | 
30 | } // namespace Reduction
31 | } // namespace Kernels
32 | } // namespace FlexFlow
33 | 
34 | #endif // _FLEXFLOW_OPS_KERNELS_REDUCTION_KERNELS_H
35 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/kernels/replicate_kernels.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_OPS_KERNELS_REPLICATE_KERNELS_H
 2 | #define _FLEXFLOW_OPS_KERNELS_REPLICATE_KERNELS_H
 3 | 
 4 | #include "flexflow/device.h"
 5 | #include "flexflow/fftype.h"
 6 | #include "flexflow/op_meta.h"
 7 | #include "flexflow/parallel_ops/replicate.h"
 8 | 
 9 | namespace FlexFlow {
10 | 
11 | class ReplicateMeta : public OpMeta {
12 | public:
13 |   ReplicateMeta(FFHandler handle, Replicate const *repl);
14 | };
15 | 
16 | namespace Kernels {
17 | namespace Replicate {
18 | 
19 | template <typename T>
20 | void forward_kernel(T const *input_ptr, T *output_ptr, size_t num_elements);
21 | 
22 | template <typename T>
23 | void backward_kernel(T const *output_grad_ptr,
24 |                      T *input_grad_ptr,
25 |                      size_t num_elements,
26 |                      size_t num_replicas);
27 | 
28 | } // namespace Replicate
29 | } // namespace Kernels
30 | } // namespace FlexFlow
31 | 
32 | #endif // _FLEXFLOW_OPS_KERNELS_REPLICATE_KERNELS_H
33 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/parallel_identity_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_PARALLEL_IDENTITY_PARAMS_H
 2 | #define _FLEXFLOW_PARALLEL_IDENTITY_PARAMS_H
 3 | 
 4 | namespace FlexFlow {
 5 | 
 6 | struct ParallelIdentityParams {
 7 |   LayerID layer_guid;
 8 |   int parallel_identity_legion_dim;
 9 |   char name[MAX_OPNAME];
10 |   bool is_valid(ParallelTensorShape const &) const;
11 | };
12 | bool operator==(ParallelIdentityParams const &, ParallelIdentityParams const &);
13 | 
14 | } // namespace FlexFlow
15 | 
16 | namespace std {
17 | template <>
18 | struct hash<FlexFlow::ParallelIdentityParams> {
19 |   size_t operator()(FlexFlow::ParallelIdentityParams const &) const;
20 | };
21 | } // namespace std
22 | 
23 | #endif // _FLEXFLOW_PARALLEL_IDENTITY_PARAMS_H
24 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/parallel_op.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_PARALLEL_OP_H
 2 | #define _FLEXFLOW_PARALLEL_OP_H
 3 | 
 4 | #include "flexflow/model.h"
 5 | #include "tl/optional.hpp"
 6 | 
 7 | namespace FlexFlow {
 8 | 
 9 | struct ParallelOpJoinResult {
10 |   tl::optional<ParallelOpInfo> op = tl::nullopt;
11 |   bool join_did_succeed = false;
12 | };
13 | 
14 | ParallelOpJoinResult try_join_parallel_ops(ParallelOpInfo const &,
15 |                                            ParallelOpInfo const &);
16 | 
17 | class ParallelOp : public Op {
18 | public:
19 |   ParallelOp(FFModel &model,
20 |              OperatorType type,
21 |              char const *_name,
22 |              const ParallelTensor input);
23 |   virtual void init(FFModel const &) = 0;
24 |   virtual void forward(FFModel const &) = 0;
25 |   virtual void backward(FFModel const &) = 0;
26 |   virtual void create_input_partition(FFModel &model) = 0;
27 |   virtual void create_input_partition_inference(
28 |       FFModel &model,
29 |       std::vector<ParallelTensor> const &batch_inputs,
30 |       std::vector<ParallelTensor> const &batch_outputs) {
31 |     assert(false);
32 |   }
33 |   void print_layer(FFModel const &model){};
34 |   virtual bool measure_operator_cost(Simulator *sim,
35 |                                      MachineView const &pc,
36 |                                      CostMetrics &cost_metrics) const = 0;
37 |   virtual bool append_parallel_op_info(
38 |       std::vector<ParallelOpInfo> &parallel_ops) const = 0;
39 |   virtual bool is_parallel_op() const;
40 | 
41 | public:
42 |   Legion::LogicalPartition input_lp, output_grad_lp;
43 |   std::unordered_map<ParallelTensor, Legion::LogicalPartition>
44 |       inference_input_lps, inference_output_grad_lps;
45 | };
46 | 
47 | }; // namespace FlexFlow
48 | 
49 | #endif // _FLEXFLOW_PARALLEL_OP_H
50 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/parallel_op_info.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_PARALLEL_OPS_PARALLEL_OP_INFO_H
 2 | #define _FLEXFLOW_PARALLEL_OPS_PARALLEL_OP_INFO_H
 3 | 
 4 | #include "flexflow/ffconst.h"
 5 | 
 6 | namespace FlexFlow {
 7 | 
 8 | struct ParallelOpInfo {
 9 |   friend void swap(ParallelOpInfo &, ParallelOpInfo &);
10 | 
11 |   OperatorType op_type;
12 |   int parallel_dim;
13 |   int parallel_degree;
14 | };
15 | bool operator==(ParallelOpInfo const &, ParallelOpInfo const &);
16 | 
17 | } // namespace FlexFlow
18 | 
19 | #endif /* _FLEXFLOW_PARALLEL_OPS_PARALLEL_OP_INFO_H */
20 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/partition_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_PARTITION_PARAMS_H
 2 | #define _FLEXFLOW_PARTITION_PARAMS_H
 3 | 
 4 | namespace FlexFlow {
 5 | 
 6 | struct RepartitionParams {
 7 |   int repartition_legion_dim;
 8 |   int repartition_degree;
 9 |   char name[MAX_OPNAME];
10 |   bool is_valid(ParallelTensorShape const &) const;
11 | };
12 | bool operator==(RepartitionParams const &, RepartitionParams const &);
13 | 
14 | } // namespace FlexFlow
15 | 
16 | namespace std {
17 | template <>
18 | struct hash<FlexFlow::RepartitionParams> {
19 |   size_t operator()(FlexFlow::RepartitionParams const &) const;
20 | };
21 | } // namespace std
22 | 
23 | #endif // _FLEXFLOW_PARTITION_PARAMS_H
24 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/reduction_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_REDUCTION_PARAMS_H
 2 | #define _FLEXFLOW_REDUCTION_PARAMS_H
 3 | 
 4 | namespace FlexFlow {
 5 | 
 6 | struct ReductionParams {
 7 |   int reduction_legion_dim;
 8 |   int reduction_degree;
 9 |   char name[MAX_OPNAME];
10 |   bool is_valid(ParallelTensorShape const &) const;
11 | };
12 | bool operator==(ReductionParams const &, ReductionParams const &);
13 | 
14 | } // namespace FlexFlow
15 | 
16 | namespace std {
17 | template <>
18 | struct hash<FlexFlow::ReductionParams> {
19 |   size_t operator()(FlexFlow::ReductionParams const &) const;
20 | };
21 | } // namespace std
22 | 
23 | #endif // _FLEXFLOW_REDUCTION_PARAMS_H
24 | 


--------------------------------------------------------------------------------
/include/flexflow/parallel_ops/replicate_params.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_REPLICATE_PARAMS_H
 2 | #define _FLEXFLOW_REPLICATE_PARAMS_H
 3 | 
 4 | namespace FlexFlow {
 5 | 
 6 | struct ReplicateParams {
 7 |   int replicate_legion_dim;
 8 |   int replicate_degree;
 9 |   char name[MAX_OPNAME];
10 |   bool is_valid(ParallelTensorShape const &) const;
11 | };
12 | bool operator==(ReplicateParams const &, ReplicateParams const &);
13 | 
14 | } // namespace FlexFlow
15 | 
16 | namespace std {
17 | template <>
18 | struct hash<FlexFlow::ReplicateParams> {
19 |   size_t operator()(FlexFlow::ReplicateParams const &) const;
20 | };
21 | } // namespace std
22 | 
23 | #endif // _FLEXFLOW_REPLICATE_PARAMS_H
24 | 


--------------------------------------------------------------------------------
/include/flexflow/recompile.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #ifndef _FLEXFLOW_RECOMPILE_H_
17 | #define _FLEXFLOW_RECOMPILE_H_
18 | 
19 | #include "legion.h"
20 | #include <functional>
21 | 
22 | namespace FlexFlow {
23 | 
24 | class FFModel;
25 | 
26 | class RecompileState {
27 | public:
28 |   RecompileState(std::function<bool(FFModel *)> _trigger_func,
29 |                  std::function<void(FFModel *)> _alter_func,
30 |                  FFModel *_ff);
31 |   bool trigger();
32 |   void alter();
33 | 
34 | public:
35 |   int recompilations;
36 | 
37 | private:
38 |   std::function<bool(FFModel *)> trigger_func;
39 |   std::function<void(FFModel *)> alter_func;
40 |   FFModel *ff;
41 | };
42 | 
43 | }; // namespace FlexFlow
44 | #endif
45 | 


--------------------------------------------------------------------------------
/include/flexflow/runtime.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #ifndef _FLEXFLOW_RUNTIME_H_
17 | #define _FLEXFLOW_RUNTIME_H_
18 | 
19 | #include "config.h"
20 | 
21 | namespace FlexFlow {
22 | 
23 | class FFRuntime {
24 | public:
25 |   FFRuntime(FFConfig &config);
26 |   FFHandler handlers[MAX_NUM_WORKERS];
27 | };
28 | 
29 | } // namespace FlexFlow
30 | 
31 | #endif // _FLEXFLOW_RUNTIME_H_
32 | 


--------------------------------------------------------------------------------
/include/flexflow/utils/disjoint_set.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_DISJOINT_SET_H
 2 | #define _FLEXFLOW_DISJOINT_SET_H
 3 | 
 4 | #include <cassert>
 5 | #include <map>
 6 | #include <set>
 7 | #include <unordered_map>
 8 | 
 9 | template <typename T>
10 | class m_disjoint_set {
11 | public:
12 |   void m_union(T const *l, T const *r) {
13 |     this->add_node_if_missing(l);
14 |     this->add_node_if_missing(r);
15 |     T const *ll = this->find(l);
16 |     T const *rr = this->find(r);
17 |     if (ll != rr) {
18 |       this->mapping[ll] = rr;
19 |     }
20 |   }
21 |   T const *find(T const *t) {
22 |     this->add_node_if_missing(t);
23 |     T const *parent = this->mapping.at(t);
24 |     if (parent == nullptr) {
25 |       return t;
26 |     } else {
27 |       return this->find(parent);
28 |     }
29 |   }
30 | 
31 | private:
32 |   void add_node_if_missing(T const *t) {
33 |     if (mapping.find(t) == mapping.end()) {
34 |       mapping[t] = nullptr;
35 |     }
36 |   }
37 |   std::unordered_map<T const *, T const *> mapping;
38 | };
39 | 
40 | template <typename T, typename Compare = std::less<T>>
41 | class disjoint_set {
42 | public:
43 |   void m_union(T const &l, T const &r) {
44 |     this->nodes.insert(l);
45 |     this->nodes.insert(r);
46 |     this->ds.m_union(this->get_node(l), this->get_node(r));
47 |   }
48 |   T const &find(T const &t) {
49 |     this->nodes.insert(t);
50 |     return *this->ds.find(this->get_node(t));
51 |   }
52 |   std::map<T, T, Compare> get_mapping() const {
53 |     std::map<T, T, Compare> mapping;
54 |     for (T const &t : this->nodes) {
55 |       mapping[t] = this->ds.find(&t);
56 |     }
57 |     return mapping;
58 |   }
59 | 
60 | private:
61 |   T const *get_node(T const &t) {
62 |     auto it = this->nodes.find(t);
63 |     assert(it != this->nodes.end());
64 |     return &*it;
65 |   }
66 | 
67 |   m_disjoint_set<T> ds;
68 |   std::set<T, Compare> nodes;
69 | };
70 | 
71 | #endif // _FLEXFLOW_DISJOINT_SET_H
72 | 


--------------------------------------------------------------------------------
/include/flexflow/utils/dot/record_formatter.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RECORD_FORMATTER_H
 2 | #define _RECORD_FORMATTER_H
 3 | 
 4 | #include <sstream>
 5 | #include <vector>
 6 | 
 7 | class RecordFormatter {
 8 |   friend RecordFormatter &operator<<(RecordFormatter &r,
 9 |                                      std::string const &tok);
10 |   friend RecordFormatter &operator<<(RecordFormatter &r, int tok);
11 |   friend RecordFormatter &operator<<(RecordFormatter &r, float tok);
12 |   friend RecordFormatter &operator<<(RecordFormatter &r,
13 |                                      RecordFormatter const &sub_r);
14 |   friend RecordFormatter &operator<<(RecordFormatter &r,
15 |                                      std::ostringstream &oss);
16 |   friend std::ostream &operator<<(std::ostream &s, RecordFormatter const &r);
17 | 
18 | private:
19 |   std::vector<std::string> pieces;
20 | };
21 | 
22 | #endif // _RECORD_FORMATTER_H


--------------------------------------------------------------------------------
/include/flexflow/utils/random_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RANDOM_UTILS_H
 2 | #define _RANDOM_UTILS_H
 3 | 
 4 | #include <cstdlib>
 5 | #include <stdexcept>
 6 | #include <vector>
 7 | 
 8 | float randf();
 9 | 
10 | template <typename T>
11 | T select_random(std::vector<T> const &values) {
12 |   return values[std::rand() % values.size()];
13 | }
14 | 
15 | template <typename T>
16 | T select_random_determistic(std::vector<T> const &values,
17 |                             std::vector<float> const &weights,
18 |                             float value) {
19 |   if (values.empty()) {
20 |     throw std::invalid_argument("Values list must not be empty.");
21 |   }
22 |   float total = 0.0f;
23 |   for (auto const &w : weights) {
24 |     if (w < 0) {
25 |       throw std::invalid_argument("Weights must not be negative");
26 |     }
27 |     total += w;
28 |   }
29 | 
30 |   float r = value * total;
31 |   float curr = 0.0f;
32 |   int i = -1;
33 |   while (curr <= r && (i < 0 || i < (int)values.size() - 1)) {
34 |     i++;
35 |     curr += weights[i];
36 |   }
37 |   return values[i];
38 | }
39 | 
40 | template <typename T>
41 | T select_random(std::vector<T> const &values,
42 |                 std::vector<float> const &weights) {
43 |   return select_random_determistic<T>(values, weights, randf());
44 | }
45 | 
46 | #endif // _RANDOM_UTILS_H
47 | 


--------------------------------------------------------------------------------
/include/flexflow/utils/recursive_logger.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_RECURSIVE_LOGGER_H
 2 | #define _FLEXFLOW_RECURSIVE_LOGGER_H
 3 | 
 4 | #include "legion/legion_utilities.h"
 5 | #include <memory>
 6 | 
 7 | #define CONCAT(a, b) CONCAT_INNER(a, b)
 8 | #define CONCAT_INNER(a, b) a##b
 9 | #define UNIQUE_TAG() CONCAT(tag, __COUNTER__)
10 | #define TAG_ENTER(mlogger) auto UNIQUE_TAG() = mlogger->enter_tag()
11 | 
12 | namespace FlexFlow {
13 | 
14 | class RecursiveLogger;
15 | 
16 | class DepthTag {
17 | public:
18 |   DepthTag() = delete;
19 |   DepthTag(RecursiveLogger &);
20 |   DepthTag(DepthTag const &) = delete;
21 |   ~DepthTag();
22 | 
23 | private:
24 |   RecursiveLogger &logger;
25 | };
26 | 
27 | class RecursiveLogger {
28 | public:
29 |   /* RecursiveLogger(Legion::Logger const &); */
30 |   RecursiveLogger(std::string const &category_name);
31 | 
32 |   Realm::LoggerMessage info();
33 |   Realm::LoggerMessage debug();
34 |   Realm::LoggerMessage spew();
35 |   void enter();
36 |   void leave();
37 | 
38 |   std::unique_ptr<DepthTag> enter_tag();
39 | 
40 | private:
41 |   int depth = 0;
42 | 
43 |   void print_prefix(Realm::LoggerMessage &) const;
44 | 
45 |   Legion::Logger logger;
46 | };
47 | 
48 | };     // namespace FlexFlow
49 | #endif // _FLEXFLOW_RECURSIVE_LOGGER_H
50 | 


--------------------------------------------------------------------------------
/include/flexflow/utils/test_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_UTILS_H_
 2 | #define _FLEXFLOW_UTILS_H_
 3 | #include "flexflow/model.h"
 4 | #include <fstream>
 5 | #include <iomanip>
 6 | #include <iostream>
 7 | #include <sstream>
 8 | 
 9 | namespace FlexFlow {
10 | 
11 | struct ArgsConfig;
12 | 
13 | void initialize_tensor_from_file(const std::string file_path,
14 |                                  Tensor label,
15 |                                  FFModel const &ff,
16 |                                  std::string data_type = "float",
17 |                                  int num_dim = 3);
18 | 
19 | void initialize_tensor_gradient_from_file(const std::string file_path,
20 |                                           Tensor label,
21 |                                           FFModel const &ff,
22 |                                           std::string data_type,
23 |                                           int num_dim);
24 | 
25 | void initialize_tensor_from_file(const std::string file_path,
26 |                                  Tensor label,
27 |                                  FFModel const &ff,
28 |                                  std::string data_type,
29 |                                  int num_dim);
30 | 
31 | template <int DIM>
32 | void initialize_tensor_from_file_task(
33 |     Legion::Task const *task,
34 |     std::vector<Legion::PhysicalRegion> const &regions,
35 |     Legion::Context ctx,
36 |     Legion::Runtime *runtime);
37 | 
38 | void dump_region_to_file(FFModel &ff,
39 |                          Legion::LogicalRegion &region,
40 |                          std::string file_path,
41 |                          int dims = 4);
42 | 
43 | template <int DIM>
44 | void dump_tensor_task(Legion::Task const *task,
45 |                       std::vector<Legion::PhysicalRegion> const &regions,
46 |                       Legion::Context ctx,
47 |                       Legion::Runtime *runtime);
48 | 
49 | void register_custom_tasks();
50 | 
51 | }; // namespace FlexFlow
52 | #endif
53 | 


--------------------------------------------------------------------------------
/include/flexflow/utils/tuple.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FLEXFLOW_UTILS_TUPLE_H
 2 | #define _FLEXFLOW_UTILS_TUPLE_H
 3 | 
 4 | #include <cstddef>
 5 | #include <tuple>
 6 | #include <type_traits>
 7 | 
 8 | // Adapted from
 9 | // https://github.com/bitwizeshift/BackportCpp/blob/4f33a7f9b219f169e60d8ed2fd5731a3a23288e4/include/bpstd/tuple.hpp
10 | 
11 | namespace FlexFlow {
12 | 
13 | namespace TupleUtils {
14 | 
15 | template <typename T, std::size_t Index, typename... Types>
16 | struct index_of_impl;
17 | 
18 | template <typename T, std::size_t Index, typename Type0, typename... Types>
19 | struct index_of_impl<T, Index, Type0, Types...>
20 |     : index_of_impl<T, Index + 1, Types...> {};
21 | 
22 | template <typename T, std::size_t Index, typename... Types>
23 | struct index_of_impl<T, Index, T, Types...>
24 |     : std::integral_constant<std::size_t, Index> {};
25 | 
26 | template <typename T, typename... Types>
27 | struct index_of : index_of_impl<T, 0, Types...> {};
28 | 
29 | }; // namespace TupleUtils
30 | 
31 | template <typename T, typename... Types>
32 | T &get(std::tuple<Types...> &t) noexcept {
33 |   return std::get<TupleUtils::index_of<T, Types...>::value>(t);
34 | }
35 | 
36 | template <typename T, typename... Types>
37 | T &&get(std::tuple<Types...> &&t) noexcept {
38 |   return move(std::get<TupleUtils::index_of<T, Types...>::value>(t));
39 | }
40 | 
41 | template <typename T, typename... Types>
42 | T const &get(std::tuple<Types...> const &t) noexcept {
43 |   return std::get<TupleUtils::index_of<T, Types...>::value>(t);
44 | }
45 | 
46 | template <typename T, typename... Types>
47 | T const &&get(std::tuple<Types...> const &&t) noexcept {
48 |   return move(std::get<TupleUtils::index_of<T, Types...>::value>(t));
49 | }
50 | 
51 | }; // namespace FlexFlow
52 | 
53 | #endif // _FLEXFLOW_UTILS_TUPLE_H


--------------------------------------------------------------------------------
/inference/.gitignore:
--------------------------------------------------------------------------------
1 | configs
2 | weights
3 | tokenizers
4 | prompt
5 | output
6 | .env


--------------------------------------------------------------------------------
/inference/README.md:
--------------------------------------------------------------------------------
 1 | # Inference Examples
 2 | This folder contains the code to run inference examples in FlexFlow
 3 | 
 4 | To create a sample prompt, call (from the `build` folder):
 5 | 
 6 | ```bash
 7 | mkdir -p ../inference/prompt
 8 | echo '["San Francisco is a "]' > ../inference/prompt/test.json
 9 | ```
10 | 
11 | To download a model for use in C++, call:
12 | ```bash
13 | huggingface-cli login # if needed
14 | python ../inference/utils/download_hf_model.py meta-llama/Llama-2-7b-hf --half-precision-only
15 | ```
16 | 
17 | To run the incremental decoding example in C++, call:
18 | 
19 | ```bash
20 | ./inference/incr_decoding/incr_decoding -ll:cpu 4 -ll:gpu 4 -ll:fsize 14000 -ll:zsize 30000 --fusion -llm-model meta-llama/Llama-2-7b-hf -prompt ../inference/prompt/test.json -tensor-parallelism-degree 4
21 | ```
22 | 
23 | To run the speculative inference example in C++, call:
24 | 
25 | ```bash
26 | ./inference/spec_infer/spec_infer -ll:cpu 4 -ll:gpu 4 -ll:fsize 14000 -ll:zsize 30000 --fusion -llm-model meta-llama/Llama-2-7b-hf -ssm-model JackFram/llama-160m -prompt ../inference/prompt/test.json -tensor-parallelism-degree 4
27 | ```
28 | 
29 | To run a PEFT model example in C++, call:
30 | 
31 | ```bash
32 | ./inference/peft/peft \
33 |     -ll:gpu 4 -ll:cpu 4 -ll:util 4 \
34 |     -tensor-parallelism-degree 4 \
35 |     -ll:fsize 8192 -ll:zsize 12000 \
36 |     -llm-model JackFram/llama-160m \
37 |     -finetuning-dataset ../inference/prompt/peft_dataset.json \
38 |     -peft-model goliaro/llama-160m-lora \
39 |     -enable-peft \
40 |     --use-full-precision \
41 |     --inference-debugging
42 | ```


--------------------------------------------------------------------------------
/inference/inference_wrapper.in:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Set the LD_LIBRARY_PATH using LIBTORCH_PYTHON_DIR from CMake.
3 | export LD_LIBRARY_PATH="@LIBTORCH_PYTHON_DIR@:$LD_LIBRARY_PATH"
4 | # Launch the executable using the specified launcher.
5 | @LAUNCHER@ "${BASH_SOURCE[0]%/*}/@TARGET_PATH@" "$@"


--------------------------------------------------------------------------------
/inference/python/peft_demo/INSTRUCTIONS.md:
--------------------------------------------------------------------------------
 1 | ## Peft Demo
 2 | * `git clone -b peft --recursive https://github.com/flexflow/FlexFlow.git`
 3 | * `cd FlexFlow/`
 4 | 
 5 | * If you wish to run the demo by installing FlexFlow
 6 |     * `conda env create -f conda/flexflow.yml`
 7 |     * `conda activate flexflow`
 8 | 
 9 | * If you wish to run the demo using a Docker container
10 |     * `export FF_CUDA_ARCH=all && export cuda_version=12.0 && ./docker/build.sh flexflow && ./docker/run.sh flexflow`
11 | 
12 | * Then, install the Llama2 model (the `meta-llama/Llama-2-7b-hf` model is gated, so make sure to add your HF access token)
13 | 
14 |     * `export HUGGINGFACE_TOKEN="[Your token]"`
15 |     * `huggingface-cli login --token "$HUGGINGFACE_TOKEN"`
16 |     * `python3 inference/utils/download_peft_model.py "goliaro/llama-2-7b-lora-full"`
17 | 
18 | * Run the demo
19 |     ```
20 |     mkdir inference/output
21 |     cd inference/python/peft_demo/
22 |     python3 demo.py -config-file demo_config.json
23 |     ```
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/inference/python/streamlit/README.md:
--------------------------------------------------------------------------------
 1 | # Streamlit demo
 2 | 
 3 | ## Instructions
 4 | 
 5 | 1. Build and install FlexFlow, or build and run `source ./set_python_envs.sh` from the build folder
 6 | 2. Edit the flexflow-serve/inference/python/streamlit/fastapi_incr.py to configure the model to run and the system configs (num gpus, amount of memory, etc)
 7 | 3. In one terminal, launch the LLM engine with the commands below, and wait until the model's weights loading completes
 8 | ```
 9 | cd flexflow-serve/inference/python/streamlit
10 | export PORT_NUMBER=8080
11 | uvicorn fastapi_incr:app --reload --port $PORT_NUMBER
12 | ```
13 | 4. In another terminal, launch the streamlit app:
14 | ```
15 | cd flexflow-serve/inference/python/streamlit
16 | streamlit run app.py --server.port 8501 --server.address 0.0.0.0
17 | ```
18 | 5. Open the URL printed to the terminal, e.g. `http://localhost:8501` and interact with the app via browser
19 | 
20 | 


--------------------------------------------------------------------------------
/inference/utils/download_hf_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import flexflow.serve as ff
 3 | import argparse, os
 4 | 
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument(
 9 |         "model_names", type=str, nargs="+", help="Name of the model(s) to download"
10 |     )
11 |     parser.add_argument(
12 |         "--cache-folder",
13 |         type=str,
14 |         help="Folder to use to store the model(s) assets in FlexFlow format",
15 |         default=os.environ.get("FF_CACHE_PATH", ""),
16 |     )
17 |     parser.add_argument(
18 |         "--refresh-cache",
19 |         action="store_true",
20 |         help="Use this flag to force the refresh of the model(s) weights/tokenizer cache",
21 |     )
22 |     group = parser.add_mutually_exclusive_group()
23 |     group.add_argument(
24 |         "--full-precision-only",
25 |         action="store_true",
26 |         help="Only download the full precision version of the weights",
27 |     )
28 |     group.add_argument(
29 |         "--half-precision-only",
30 |         action="store_true",
31 |         help="Only download the half precision version of the weights",
32 |     )
33 |     args = parser.parse_args()
34 |     return args
35 | 
36 | 
37 | def main(args):
38 |     if args.full_precision_only:
39 |         data_types = (ff.DataType.DT_FLOAT,)
40 |     elif args.half_precision_only:
41 |         data_types = (ff.DataType.DT_HALF,)
42 |     else:
43 |         data_types = (ff.DataType.DT_FLOAT, ff.DataType.DT_HALF)
44 | 
45 |     for model_name in args.model_names:
46 |         for data_type in data_types:
47 |             llm = ff.LLM(
48 |                 model_name,
49 |                 data_type=data_type,
50 |                 cache_path=args.cache_folder,
51 |                 refresh_cache=args.refresh_cache,
52 |             )
53 |             llm.download_hf_weights_if_needed()
54 |             llm.download_hf_tokenizer_if_needed()
55 |             llm.download_hf_config()
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     args = parse_args()
60 |     main(args)
61 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "wheel",
 4 |     "setuptools>=45",
 5 |     "setuptools_scm[toml]>=6.0",
 6 |     "cmake-build-extension",
 7 |     "ninja",
 8 |     "requests",
 9 |     "pip",
10 | ]
11 | build-backend = "setuptools.build_meta"
12 | 


--------------------------------------------------------------------------------
/python/flexflow/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | 
16 | from .config import flexflow_dir
17 | 


--------------------------------------------------------------------------------
/python/flexflow/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | 
16 | import os
17 | 
18 | # python binding
19 | _FF_PYTHON_BINDING = "cffi"
20 | 
21 | 
22 | def flexflow_python_binding():
23 |     return _FF_PYTHON_BINDING
24 | 
25 | 
26 | _FF_ALREADY_INITIALIZED = False
27 | 
28 | 
29 | def flexflow_already_initialized():
30 |     global _FF_ALREADY_INITIALIZED
31 |     return _FF_ALREADY_INITIALIZED
32 | 
33 | 
34 | def set_flexflow_initialized():
35 |     global _FF_ALREADY_INITIALIZED
36 |     if _FF_ALREADY_INITIALIZED == True:
37 |         raise RuntimeError(
38 |             "Attempting to set _FF_ALREADY_INITIALIZED=True, but _FF_ALREADY_INITIALIZED is already True"
39 |         )
40 |     _FF_ALREADY_INITIALIZED = True
41 | 
42 | 
43 | # FlexFlow dir
44 | _FF_DIR = os.path.dirname(os.path.realpath(__file__))
45 | 
46 | 
47 | def flexflow_dir():
48 |     return _FF_DIR
49 | 
50 | # Get runtime configs from the command line 
51 | def get_configs():
52 |   import argparse,json
53 |   parser = argparse.ArgumentParser()
54 |   parser.add_argument(
55 |     "-config-file",
56 |     help="The path to a JSON file with the configs. If omitted, a sample model and configs will be used instead.",
57 |     type=str,
58 |     default=None,
59 |   )
60 |   args, unknown = parser.parse_known_args()
61 |   if args.config_file is not None:
62 |     with open(args.config_file) as f:
63 |       return json.load(f)
64 |   else:
65 |     return None
66 | 


--------------------------------------------------------------------------------
/python/flexflow/serve/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .llama import FlexFlowLLAMA, LLAMAConfig
16 | from .opt import FlexFlowOPT, OPTConfig
17 | from .falcon import FlexFlowFalcon, FalconConfig
18 | from .starcoder import FlexFlowSTARCODER, STARCODERConfig
19 | from .mpt import FlexFlowMPT, MPTConfig
20 | 


--------------------------------------------------------------------------------
/python/flexflow/serve/models/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from flexflow.core import *
15 | 
16 | 
17 | class FlexFlowModel:
18 |     def __init__(
19 |         self,
20 |         ffmodel: FFModel,
21 |         mode: InferenceMode,
22 |         generation_config: GenerationConfig,
23 |         ffconfig: FFConfig,
24 |         hf_config: any,
25 |         data_type: DataType,
26 |     ):
27 |         self.build_model()
28 | 
29 |     def build_model(self):
30 |         assert False, "Not implemented yet"
31 | 
32 |     def convert_hf_weight_name(name):
33 |         assert False, "Not implemented yet"
34 | 
35 |     def convert_hf_model(model, dst_folder):
36 |         assert False, "Not implemented yet"
37 | 


--------------------------------------------------------------------------------
/python/flexflow/torch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/python/flexflow/torch/__init__.py


--------------------------------------------------------------------------------
/python/flexflow/torch/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules import *
2 | 


--------------------------------------------------------------------------------
/python/flexflow/torch/nn/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .module import Module
2 | 
3 | from torch.nn import Conv2d as Conv2d
4 | from torch.nn import MaxPool2d as MaxPool2d
5 | from torch.nn import Linear as Linear
6 | from torch.nn import Dropout as Dropout
7 | from torch.nn import Flatten as Flatten
8 | from torch.nn import ReLU as ReLU
9 | 


--------------------------------------------------------------------------------
/python/flexflow/torch/nn/modules/module.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import flexflow.core as ff
 3 | import flexflow.torch.fx as fx
 4 | 
 5 | class Module(nn.Module):
 6 |   def __init__(self):
 7 |     super(Module, self).__init__()
 8 |     self._ffconfig = ff.FFConfig()
 9 |     self._ffconfig.parse_args()
10 |     self._ffmodel = ff.FFModel(self._ffconfig)
11 |     self._graph = None
12 |   
13 |   def __call__(self, input):
14 |     print("forward");
15 |   
16 |   # TODO: automatically call this function  
17 |   def symbolic_trace(self):
18 |     self._graph = fx.symbolic_trace(self)
19 |     for node in self._graph:
20 |       if type(node) == fx.ModuleNode:
21 |         print(node.name, node.module)


--------------------------------------------------------------------------------
/python/flexflow_cffi_header.py.in:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # IMPORTANT:
19 | #   * legion_cffi.py.in is used as an input to string.format()
20 | #   * legion_cffi.py is a generated file and should not be modified by hand
21 | 
22 | from __future__ import absolute_import, division, print_function, unicode_literals
23 | 
24 | flexflow_header = {header}
25 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cffi
 2 | numpy
 3 | cmake-build-extension
 4 | ninja
 5 | requests
 6 | regex
 7 | torch
 8 | torchaudio
 9 | torchvision
10 | flash-attn
11 | transformers>=4.47.1
12 | sentencepiece
13 | einops
14 | pip
15 | # peft-related
16 | scipy
17 | bitsandbytes 
18 | datasets 
19 | accelerate 
20 | loralib
21 | triton
22 | peft
23 | 


--------------------------------------------------------------------------------
/scripts/format.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | GIT_ROOT="$(git rev-parse --show-toplevel)"
 6 | cd "$GIT_ROOT"
 7 | 
 8 | TOOLS_PATH="$GIT_ROOT/.tools"
 9 | RELEASE="master-1d7ec53d"
10 | CLANG_FORMAT_VERSION="15"
11 | CLANG_FORMAT_PATH="$TOOLS_PATH/clang-format-$CLANG_FORMAT_VERSION-$RELEASE"
12 | 
13 | mkdir -p "$TOOLS_PATH"
14 | 
15 | error() {
16 |   >&2 echo "$@"
17 |   exit 1
18 | }
19 | 
20 | get_os() {
21 |   UNAME_OUTPUT="$(uname -s)"
22 |   case "$UNAME_OUTPUT" in
23 |     Linux*)
24 |       OS=Linux
25 |       ;;
26 |     Darwin*)
27 |       OS=Mac
28 |       ;;
29 |     *)
30 |       error "Unknown OS $UNAME_OUTPUT. Exiting..."
31 |   esac
32 | 
33 |   echo "$OS"
34 | }
35 | 
36 | download_clang_tool() {
37 |   TOOL="$1"
38 |   VERSION="$2"
39 |   TARGET_PATH="$3"
40 | 
41 |   BASE_URL="https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/$RELEASE/"
42 | 
43 |   OS="$(get_os)"
44 |   case "$OS" in
45 |     Linux)
46 |       URL_OS="linux"
47 |       ;;
48 |     Mac)
49 |       URL_OS="macosx"
50 |       ;;
51 |     *)
52 |       error "Unknown return value from get_os: $OS. Exiting..."
53 |   esac
54 |   URL="$BASE_URL/clang-${TOOL}-${VERSION}_${URL_OS}-amd64"
55 |   echo "Downloading from $URL..."
56 | 
57 |   if command -v wget &> /dev/null; then
58 |     wget "$URL" -O "$TARGET_PATH"
59 |   elif command -v curl &> /dev/null; then
60 |     curl -L "$URL" -o "$TARGET_PATH"
61 |   else
62 |     error "Could not find either wget or curl. Exiting..."
63 |   fi
64 | }
65 | 
66 | if [[ ! -e $CLANG_FORMAT_PATH ]]; then
67 |   download_clang_tool format "$CLANG_FORMAT_VERSION" "$CLANG_FORMAT_PATH"
68 |   chmod u+x "$CLANG_FORMAT_PATH"
69 | fi
70 | 
71 | mapfile -t FILES < <(git ls-files ':!:triton/**' '*.h' '*.cc' '*.cpp' '*.cu' '*.c')
72 | "$CLANG_FORMAT_PATH" -i "${FILES[@]}"
73 | 


--------------------------------------------------------------------------------
/scripts/install_tokenizer.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env bash
 2 | set -x
 3 | set -e
 4 | 
 5 | # Cd into directory holding this script
 6 | cd "${BASH_SOURCE[0]%/*}"
 7 | cd ../deps/tokenizers-cpp/example
 8 | cmake -D CMAKE_CXX_FLAGS=-fPIC
 9 | make -j
10 | 


--------------------------------------------------------------------------------
/scripts/mnist_mlp_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | eval "$(conda shell.bash hook)"
 3 | conda activate flexflow
 4 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib
 5 | 
 6 | # Path to your FlexFlow build
 7 | FLEXFLOW_DIR=/home/ubuntu/FlexFlow/build
 8 | 
 9 | # Path to your UCX installation
10 | UCX_DIR=/home/ubuntu/ucx-1.15.0/install
11 | 
12 | export REALM_UCP_BOOTSTRAP_PLUGIN=$FLEXFLOW_DIR/deps/legion/lib/realm_ucp_bootstrap_mpi.so
13 | export LD_LIBRARY_PATH=$FLEXFLOW_DIR/deps/legion/lib:$LD_LIBRARY_PATH
14 | export LD_LIBRARY_PATH=$FLEXFLOW_DIR:$LD_LIBRARY_PATH
15 | export LD_LIBRARY_PATH=$UCX_DIR/lib:$LD_LIBRARY_PATH
16 | export LD_LIBRARY_PATH=/opt/conda/envs/flexflow/lib:$LD_LIBRARY_PATH
17 | 
18 | mpiexec -x REALM_UCP_BOOTSTRAP_PLUGIN -x PATH -x LD_LIBRARY_PATH --hostfile ~/hostfile --mca btl_tcp_if_include ens5 -np 2 "$FLEXFLOW_DIR"/flexflow_python "$FLEXFLOW_DIR"/../examples/python/native/mnist_mlp.py -ll:py 1 -ll:gpu 1 -ll:fsize 8000 -ll:zsize 8000
19 | 


--------------------------------------------------------------------------------
/src/ops/mean.cpp:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #include "flexflow/ops/mean.h"
17 | #include "flexflow/utils/hip_helper.h"
18 | #include <hip/hip_runtime.h>
19 | 
20 | namespace FlexFlow {}; // namespace FlexFlow
21 | 


--------------------------------------------------------------------------------
/src/ops/mean.cu:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #include "flexflow/ops/mean.h"
17 | #include "flexflow/utils/cuda_helper.h"
18 | 
19 | namespace FlexFlow {}; // namespace FlexFlow
20 | 


--------------------------------------------------------------------------------
/src/ops/moe.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023 CMU, Facebook, Stanford
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #include "flexflow/model.h"
17 | 
18 | using namespace FlexFlow;
19 | 
20 | Tensor FFModel::moe(const Tensor input,
21 |                     int num_exp,
22 |                     int num_select,
23 |                     int expert_hidden_size,
24 |                     float alpha,
25 |                     float lambda) {
26 |   // MoE model
27 |   Tensor gate_preds = dense(input, num_exp, AC_MODE_RELU);
28 |   Tensor topK_output[2];
29 |   top_k(gate_preds, topK_output, num_select, false);
30 |   Tensor exp_tensors[num_exp];
31 |   group_by(input, topK_output[1], exp_tensors, num_exp, alpha);
32 |   Tensor agg_inputs[num_exp + 4];
33 |   agg_inputs[0] = softmax(topK_output[0]); // gate preds
34 |   agg_inputs[1] = topK_output[1];          // gate assign
35 |   agg_inputs[2] = topK_output[1];          // gate assign TopK (for cache)
36 |   agg_inputs[3] = gate_preds;              // full gate preds
37 |   for (int i = 0; i < num_exp; i++) {
38 |     Tensor exp_pred = dense(exp_tensors[i], expert_hidden_size, AC_MODE_RELU);
39 |     agg_inputs[i + 4] = softmax(exp_pred);
40 |   }
41 |   Tensor coop_output = aggregate(agg_inputs, num_exp, lambda);
42 |   // get_metrics();
43 |   return coop_output;
44 | }
45 | 


--------------------------------------------------------------------------------
/src/parallel_ops/fused_parallel_op.cpp:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #include "flexflow/parallel_ops/fused_parallel_op.h"
17 | #include "flexflow/utils/hip_helper.h"
18 | #include <hip/hip_runtime.h>
19 | 
20 | namespace FlexFlow {}; // namespace FlexFlow
21 | 


--------------------------------------------------------------------------------
/src/parallel_ops/fused_parallel_op.cu:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #include "flexflow/parallel_ops/fused_parallel_op.h"
17 | #include "flexflow/utils/cuda_helper.h"
18 | 
19 | namespace FlexFlow {}; // namespace FlexFlow
20 | 


--------------------------------------------------------------------------------
/src/recompile/recompile_state.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #include "flexflow/model.h"
17 | #include "flexflow/recompile.h"
18 | #include "legion.h"
19 | 
20 | namespace FlexFlow {
21 | 
22 | RecompileState::RecompileState(std::function<bool(FFModel *)> _trigger_func,
23 |                                std::function<void(FFModel *)> _alter_func,
24 |                                FFModel *_ff)
25 |     : trigger_func(_trigger_func), alter_func(_alter_func), ff(_ff) {
26 |   recompilations = 0;
27 | }
28 | 
29 | bool RecompileState::trigger() {
30 |   return trigger_func(ff);
31 | }
32 | 
33 | void RecompileState::alter() {
34 |   if (recompilations == 0) {
35 |     alter_func(ff);
36 |   }
37 |   recompilations++;
38 | }
39 | 
40 | }; // namespace FlexFlow
41 | 


--------------------------------------------------------------------------------
/src/runtime/accessor_kernel.cpp:
--------------------------------------------------------------------------------
 1 | #include "flexflow/utils/hip_helper.h"
 2 | #include <hip/hip_runtime.h>
 3 | 
 4 | namespace FlexFlow {
 5 | 
 6 | using namespace Legion;
 7 | 
 8 | template <typename DT>
 9 | __global__ void zero_array(DT *ptr, coord_t size) {
10 |   CUDA_KERNEL_LOOP(i, size) {
11 |     ptr[i] = 0;
12 |   }
13 | }
14 | 
15 | }; // namespace FlexFlow
16 | 


--------------------------------------------------------------------------------
/src/runtime/accessor_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "flexflow/utils/cuda_helper.h"
 2 | 
 3 | namespace FlexFlow {
 4 | 
 5 | using namespace Legion;
 6 | 
 7 | template <typename DT>
 8 | __global__ void zero_array(DT *ptr, coord_t size) {
 9 |   CUDA_KERNEL_LOOP(i, size) {
10 |     ptr[i] = 0;
11 |   }
12 | }
13 | 
14 | }; // namespace FlexFlow
15 | 


--------------------------------------------------------------------------------
/src/runtime/compile.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | g++ dlrm_strategy.cc strategy.pb.cc -o generator -std=c++11 -lprotobuf -L/usr/local/lib -I/usr/local/include -I"${PROTOBUF}"/src -pthread -O2
4 | g++ dlrm_strategy_hetero.cc strategy.pb.cc -o generator_hetero -std=c++11 -lprotobuf -L/usr/local/lib -I/usr/local/include -I"${PROTOBUF}"/src -pthread -O2
5 | 


--------------------------------------------------------------------------------
/src/runtime/cpp_driver.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #include "dirent.h"
17 | #include "flexflow/mapper.h"
18 | #include "flexflow/model.h"
19 | 
20 | using namespace Legion;
21 | using namespace FlexFlow;
22 | 
23 | // ========================================================
24 | // Task and mapper registrations
25 | // ========================================================
26 | int main(int argc, char **argv) {
27 |   // This needs to be set, otherwise NCCL will try to use group kernel launches,
28 |   // which are not compatible with the Realm CUDA hijack.
29 |   setenv("NCCL_LAUNCH_MODE", "PARALLEL", true);
30 | 
31 |   Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID);
32 |   {
33 |     TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level");
34 |     registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
35 |     registrar.set_replicable();
36 |     Runtime::preregister_task_variant<top_level_task>(registrar, "top_level");
37 |   }
38 | 
39 |   register_flexflow_internal_tasks();
40 | 
41 |   // Register custom tasks
42 |   register_custom_tasks();
43 | 
44 |   Runtime::add_registration_callback(FFMapper::update_mappers);
45 |   return Runtime::start(argc, argv);
46 | }
47 | 


--------------------------------------------------------------------------------
/src/runtime/fftype.cc:
--------------------------------------------------------------------------------
 1 | #include "flexflow/fftype.h"
 2 | #include "flexflow/config.h"
 3 | #include <cassert>
 4 | 
 5 | namespace FlexFlow {
 6 | 
 7 | const LayerID LayerID::NO_ID = LayerID();
 8 | 
 9 | LayerID::LayerID()
10 |     : id(0), transformer_layer_id(MAX_NUM_TRANSFORMER_LAYERS), model_id(0) {}
11 | 
12 | LayerID::LayerID(size_t _id, size_t _transformer_layer_id, size_t _model_id)
13 |     : id(_id), transformer_layer_id(_transformer_layer_id),
14 |       model_id(_model_id) {
15 |   assert(is_valid_id());
16 | }
17 | 
18 | bool LayerID::is_valid_id() const {
19 |   return (id >= LAYER_GUID_FIRST_VALID && id <= LAYER_GUID_LAST_VALID &&
20 |           transformer_layer_id >= 0 &&
21 |           transformer_layer_id < MAX_NUM_TRANSFORMER_LAYERS && model_id >= 0);
22 | }
23 | 
24 | bool operator==(LayerID const &lhs, LayerID const &rhs) {
25 |   // id should be sufficient to distinguish different layers
26 |   if (lhs.id == rhs.id) {
27 |     assert(lhs.transformer_layer_id == rhs.transformer_layer_id);
28 |     assert(lhs.model_id == rhs.model_id);
29 |   }
30 |   return lhs.id == rhs.id;
31 | }
32 | 
33 | const PEFTModelID PEFTModelID::NO_ID = PEFTModelID();
34 | 
35 | PEFTModelID::PEFTModelID() : id(0) {}
36 | 
37 | PEFTModelID::PEFTModelID(size_t _id) : id(_id) {
38 |   assert(is_valid_id());
39 | }
40 | 
41 | bool PEFTModelID::is_valid_id() const {
42 |   return (id >= PEFT_MODEL_ID_FIRST_VALID && id <= PEFT_MODEL_ID_LAST_VALID);
43 | }
44 | 
45 | bool operator==(PEFTModelID const &lhs, PEFTModelID const &rhs) {
46 |   return lhs.id == rhs.id;
47 | }
48 | 
49 | bool operator!=(PEFTModelID const &lhs, PEFTModelID const &rhs) {
50 |   return !(lhs == rhs);
51 | }
52 | 
53 | std::ostream &operator<<(std::ostream &os, PEFTModelID const &peft_model_id) {
54 |   if (peft_model_id == PEFTModelID::NO_ID) {
55 |     os << "NO_ID";
56 |   } else {
57 |     os << peft_model_id.id;
58 |   }
59 |   return os;
60 | }
61 | 
62 | }; // namespace FlexFlow
63 | 


--------------------------------------------------------------------------------
/src/runtime/memory_optimization.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #include "flexflow/memory_optimization.h"
17 | 
18 | namespace FlexFlow {
19 | 
20 | namespace PCG {
21 | 
22 | std::string MemoryUsage::to_string() const {
23 |   std::string type_name;
24 |   switch (usage_type) {
25 |     case MemoryUsageType::GLOBAL:
26 |       type_name = "GLOBAL";
27 |       break;
28 |     case MemoryUsageType::PER_DEVICE_MAX:
29 |       type_name = "PER_DEVICE_MAX";
30 |       break;
31 |   }
32 |   return "(MemoryUsageType:" + type_name + ", Usage:" + std::to_string(num) +
33 |          ")";
34 | }
35 | 
36 | MemoryUsage &MemoryUsage::operator+=(MemoryUsage const &rhs) {
37 |   assert(usage_type == rhs.usage_type);
38 | 
39 |   // Handle the merge of memory usage differently here.
40 |   switch (usage_type) {
41 |     case MemoryUsageType::GLOBAL:
42 |       num += rhs.num;
43 |       break;
44 |     case MemoryUsageType::PER_DEVICE_MAX:
45 |       num = std::max(num, rhs.num);
46 |       break;
47 |   }
48 | 
49 |   return *this;
50 | }
51 | 
52 | MemoryUsage operator+(MemoryUsage lhs, MemoryUsage const &rhs) {
53 |   lhs += rhs;
54 |   return lhs;
55 | }
56 | 
57 | std::ostream &operator<<(std::ostream &s, MemoryUsage const &usage) {
58 |   s << usage.to_string();
59 |   return s;
60 | }
61 | 
62 | } // namespace PCG
63 | 
64 | } // namespace FlexFlow
65 | 


--------------------------------------------------------------------------------
/src/runtime/operator.cc:
--------------------------------------------------------------------------------
 1 | #include "flexflow/operator.h"
 2 | #include "flexflow/ffconst_utils.h"
 3 | #include "flexflow/simulator.h"
 4 | #include <stdexcept>
 5 | #include <unistd.h>
 6 | #include <wordexp.h>
 7 | 
 8 | namespace FlexFlow {
 9 | 
10 | size_t Op::get_untyped_params_hash() const {
11 |   size_t hash = this->get_params_hash();
12 |   hash_combine(hash, this->op_type);
13 |   return hash;
14 | }
15 | 
16 | size_t Op::get_params_hash() const {
17 |   throw std::runtime_error(
18 |       "No overload of get_params_hash defined for op type " +
19 |       get_operator_type_name(this->op_type));
20 | }
21 | 
22 | fs::path get_dst_folder(std::string const &subdir,
23 |                         int step_idx,
24 |                         int shard_idx,
25 |                         bool before_kernel) {
26 |   std::vector<std::string> debug_subdirs = {"fwd", "bwd", "optim", "weights"};
27 |   assert(std::find(debug_subdirs.begin(), debug_subdirs.end(), subdir) !=
28 |          debug_subdirs.end());
29 |   std::string step_substr = "step_" + std::to_string(step_idx);
30 |   if (before_kernel) {
31 |     step_substr += "_pre";
32 |   }
33 |   char cwd[PATH_MAX];
34 |   char *result = getcwd(cwd, sizeof(cwd));
35 |   assert(result && "getcwd failed");
36 | 
37 |   // char const *ff_cache_path = std::string(std::getenv("FF_DEBUG_PATH")) ==
38 |   // "." ?
39 |   //     cwd : std::getenv("FF_DEBUG_PATH");
40 | 
41 |   char const *ff_cache_path = std::getenv("FF_CACHE_PATH");
42 | 
43 |   std::string debug_dir_ =
44 |       ff_cache_path ? std::string(ff_cache_path) + "/debug/flexflow"
45 |                     : std::string("~/.cache/flexflow/debug/flexflow");
46 |   wordexp_t p;
47 |   wordexp(debug_dir_.c_str(), &p, 0);
48 |   debug_dir_ = p.we_wordv[0];
49 |   wordfree(&p);
50 |   fs::path debug_dir = debug_dir_;
51 |   if (!fs::is_directory(debug_dir)) {
52 |     printf("invalid debug directory: %s\n", debug_dir.c_str());
53 |   }
54 |   assert(fs::is_directory(debug_dir));
55 |   fs::path dst_folder =
56 |       debug_dir / subdir / step_substr / ("shard_" + std::to_string(shard_idx));
57 |   fs::create_directories(dst_folder);
58 |   return dst_folder;
59 | }
60 | 
61 | }; // namespace FlexFlow


--------------------------------------------------------------------------------
/src/runtime/recursive_logger.cc:
--------------------------------------------------------------------------------
 1 | #include "flexflow/utils/recursive_logger.h"
 2 | 
 3 | namespace FlexFlow {
 4 | 
 5 | RecursiveLogger::RecursiveLogger(std::string const &category_name)
 6 |     : logger(category_name) {}
 7 | 
 8 | Realm::LoggerMessage RecursiveLogger::info() {
 9 |   Realm::LoggerMessage msg = this->logger.info();
10 |   this->print_prefix(msg);
11 |   return msg;
12 | }
13 | 
14 | Realm::LoggerMessage RecursiveLogger::debug() {
15 |   Realm::LoggerMessage msg = this->logger.debug();
16 |   this->print_prefix(msg);
17 |   return msg;
18 | }
19 | 
20 | Realm::LoggerMessage RecursiveLogger::spew() {
21 |   Realm::LoggerMessage msg = this->logger.spew();
22 |   this->print_prefix(msg);
23 |   return msg;
24 | }
25 | 
26 | void RecursiveLogger::print_prefix(Realm::LoggerMessage &msg) const {
27 |   msg << this->depth << " ";
28 |   for (int i = 0; i < this->depth; i++) {
29 |     msg << " ";
30 |   }
31 | }
32 | 
33 | void RecursiveLogger::enter() {
34 |   this->depth++;
35 | }
36 | 
37 | void RecursiveLogger::leave() {
38 |   this->depth--;
39 |   assert(this->depth >= 0);
40 | }
41 | 
42 | std::unique_ptr<DepthTag> RecursiveLogger::enter_tag() {
43 |   return std::unique_ptr<DepthTag>(new DepthTag(*this));
44 | }
45 | 
46 | DepthTag::DepthTag(RecursiveLogger &_logger) : logger(_logger) {
47 |   this->logger.enter();
48 | }
49 | 
50 | DepthTag::~DepthTag() {
51 |   this->logger.leave();
52 | }
53 | 
54 | }; // namespace FlexFlow
55 | 


--------------------------------------------------------------------------------
/src/runtime/tensor.cpp:
--------------------------------------------------------------------------------
1 | #include "flexflow/accessor.h"
2 | #include "flexflow/config.h"
3 | #include "flexflow/model.h"
4 | #include "flexflow/parallel_tensor.h"
5 | #include "flexflow/utils/hip_helper.h"
6 | #include <hip/hip_runtime.h>
7 | 
8 | namespace FlexFlow {} // namespace FlexFlow
9 | 


--------------------------------------------------------------------------------
/src/runtime/tensor.cu:
--------------------------------------------------------------------------------
1 | #include "flexflow/accessor.h"
2 | #include "flexflow/config.h"
3 | #include "flexflow/model.h"
4 | #include "flexflow/parallel_tensor.h"
5 | #include "flexflow/utils/cuda_helper.h"
6 | 
7 | namespace FlexFlow {} // namespace FlexFlow
8 | 


--------------------------------------------------------------------------------
/src/utils/dot/record_formatter.cc:
--------------------------------------------------------------------------------
 1 | #include "flexflow/utils/dot/record_formatter.h"
 2 | 
 3 | RecordFormatter &operator<<(RecordFormatter &r, std::string const &tok) {
 4 |   r.pieces.push_back(tok);
 5 | 
 6 |   return r;
 7 | }
 8 | 
 9 | RecordFormatter &operator<<(RecordFormatter &r, int tok) {
10 |   std::ostringstream oss;
11 |   oss << tok;
12 | 
13 |   r << oss;
14 | 
15 |   return r;
16 | }
17 | 
18 | RecordFormatter &operator<<(RecordFormatter &r, float tok) {
19 |   std::ostringstream oss;
20 |   oss << std::scientific;
21 |   oss << tok;
22 | 
23 |   r << oss;
24 | 
25 |   return r;
26 | }
27 | 
28 | RecordFormatter &operator<<(RecordFormatter &r, RecordFormatter const &sub_r) {
29 |   std::ostringstream oss;
30 |   oss << sub_r;
31 |   r << oss.str();
32 | 
33 |   return r;
34 | }
35 | 
36 | RecordFormatter &operator<<(RecordFormatter &r, std::ostringstream &oss) {
37 |   r << oss.str();
38 | 
39 |   return r;
40 | }
41 | 
42 | std::ostream &operator<<(std::ostream &s, RecordFormatter const &r) {
43 |   s << "{ ";
44 |   for (size_t i = 0; i < r.pieces.size(); i++) {
45 |     s << r.pieces[i];
46 |     if (i + 1 < r.pieces.size()) {
47 |       s << " | ";
48 |     }
49 |   }
50 |   s << " }";
51 | 
52 |   return s;
53 | }


--------------------------------------------------------------------------------
/tests/align/README.md:
--------------------------------------------------------------------------------
 1 | # FlexFlow-PyTorch Alignment
 2 | 
 3 | This is an ongoing effort to align FlexFlow with PyTorch as a means to verify
 4 | the correctness of FlexFlow. Support for additional operators will be coming
 5 | soon, and all alignment files here are subject to change.
 6 | ## Install the Python dependencies
 7 | install `pytest` module in flexflow environment.
 8 | 
 9 | ## Running the Alignment Tests
10 | Note that FlexFlow requires a CPU installation of PyTorch, so we recommend a
11 | separate `conda` environment for each (e.g. named `flexflow` and `pytorch`,
12 | respectively).
13 | 
14 | Assuming those two `conda` environments, we may run
15 | ```
16 | cd FlexFlow
17 | conda activate flexflow
18 | ./tests/align/test_all_operators.sh
19 | ```
20 | 
21 | 


--------------------------------------------------------------------------------
/tests/align/mt5_encoder/align_mt5_encoder_ff.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | from flexflow.core import *
 5 | 
 6 | sys.path.append("./align/")
 7 | from align_ff_utils import run_fwd_bwd
 8 | from mt5_ff_utils import init_ff_mt5_encoder
 9 | 
10 | # NOTE: We use the PyTorch mT5 encoder output as the labels
11 | ENCODER_LABELS_PATH = os.path.join(
12 |     "align", "mt5_encoder", "out", "hidden_states.pt",
13 | )
14 | 
15 | 
16 | def run():
17 |     assert os.path.exists(ENCODER_LABELS_PATH), \
18 |         "Make sure to generate the encoder labels file (e.g. by modifying " \
19 |         "the transformers library source code)"
20 |     ffmodel, input_dls, label_dl = init_ff_mt5_encoder(
21 |         ENCODER_LABELS_PATH,
22 |     )
23 |     run_fwd_bwd(ffmodel, ffmodel._ffconfig, input_dls, label_dl)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     run()
28 | 


--------------------------------------------------------------------------------
/tests/align/peft_flash_attn/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |       {
 5 |         "name": "Debug FlexFlow PEFT",
 6 |         "type": "cppdbg",
 7 |         "request": "launch",
 8 |         "program": "${workspaceFolder}/build/inference/peft/peft",
 9 |         "args": [
10 |           "-ll:gpu", "1",
11 |           "-ll:cpu", "4",
12 |           "-ll:util", "4",
13 |           "-tensor-parallelism-degree", "1",
14 |           "-ll:fsize", "8192",
15 |           "-ll:zsize", "12000",
16 |           "--max-requests-per-batch", "1",
17 |           "--max-sequence-length", "128",
18 |           "--max-tokens-per-batch", "128",
19 |           "-llm-model", "JackFram/llama-160m",
20 |           "-finetuning-dataset", "./inference/prompt/peft_dataset.json",
21 |           "-peft-model", "goliaro/llama-160m-lora",
22 |           "-enable-peft",
23 |           "--use-full-precision",
24 |           "--inference-debugging"
25 |         ],
26 |         "stopAtEntry": false,
27 |         "cwd": "${workspaceFolder}",
28 |         "environment": [
29 |           {
30 |             "name": "LD_LIBRARY_PATH",
31 |             "value": "/root/flexflow-serve/build:/root/flexflow-serve/build/deps/legion/lib:/opt/conda/lib/python3.12/site-packages/torch/lib:/opt/conda/lib/python3.12/site-packages:/opt/conda/lib"
32 |           }
33 |         ],
34 |         "externalConsole": false,
35 |         "MIMode": "gdb",
36 |         "miDebuggerPath": "/usr/bin/gdb",
37 |         "setupCommands": [
38 |           {
39 |             "description": "Enable pretty-printing for gdb",
40 |             "text": "-enable-pretty-printing",
41 |             "ignoreFailures": true
42 |           }
43 |         ]
44 |       }
45 |     ]
46 |   }


--------------------------------------------------------------------------------
/tests/align/peft_flash_attn/peft_flash_debug_note:
--------------------------------------------------------------------------------
 1 | # Memo for peft debug steps
 2 | 
 3 | #export libtorch as env var (add to docker build later):
 4 | export LD_LIBRARY_PATH=/opt/conda/lib/python3.12/site-packages/torch/lib:$LD_LIBRARY_PATH
 5 | 
 6 | #before running the tests, run:
 7 | source ./build/set_python_envs.sh
 8 | 
 9 | # perform peft test:
10 | ./tests/peft_test.sh
11 | 
12 | # build debug:
13 | export BUILD_TYPE=Debug
14 | 
15 | # build:
16 | ../config/config.linux
17 | make -j
18 | 
19 | # install build
20 | make -j install
21 | 
22 | # Steps to debug:
23 | # 1. install gdb-python3
24 | # apt-get update && apt-get install -y python3-dbg
25 | # 2. do not do env setup before this, run in gdb and set env in gdb:
26 | gdb -ex run --args ./build/inference/peft/peft -ll:gpu 1 -ll:cpu 4 -ll:util 4 -tensor-parallelism-degree 1 -ll:fsize 8192 -ll:zsize 12000 --max-requests-per-batch 1 --max-sequence-length 128 --max-tokens-per-batch 128 -llm-model JackFram/llama-160m -finetuning-dataset ./inference/prompt/peft_dataset.json -peft-model goliaro/llama-160m-lora -enable-peft  --inference-debugging
27 | # --use-full-precision: ignore this case for now
28 | # 3.in gdb, export all in /build/set_python_envs.sh
29 | set environment LD_LIBRARY_PATH=/root/flexflow-serve/build:/root/flexflow-serve/build/deps/legion/lib:$(dirname $(/root/flexflow-serve/python/flexflow/findpylib.py)):/opt/conda/lib/python3.12/site-packages/torch/lib:/opt/conda/lib/python3.12/site-packages:$LD_LIBRARY_PATH
30 | # 4. and then run
31 | run
32 | 
33 | # single C++ tests:
34 | ./build/inference/peft/peft -ll:gpu 2 -ll:cpu 4 -ll:util 4 -tensor-parallelism-degree 2 -ll:fsize 8192 -ll:zsize 12000 --max-requests-per-batch 1 --max-sequence-length 128 --max-tokens-per-batch 128 -llm-model JackFram/llama-160m -finetuning-dataset ./inference/prompt/peft_dataset.json -peft-model goliaro/llama-160m-lora -enable-peft  --inference-debugging


--------------------------------------------------------------------------------
/tests/align/test_all_operators.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env bash
 2 | eval "$(conda shell.bash hook)"
 3 | 
 4 | rm -rf align/out
 5 | 
 6 | function generate_ff_tensor(){
 7 |     python tests/align/align_create_tensor_ff.py -ll:gpu 1 -ll:fsize 5000 -ll:zsize 4096 -b 16 -o "$1"
 8 | }
 9 | 
10 | function generate_torch_tensor(){
11 |     python tests/align/align_create_tensor_torch.py -o "$1"
12 | }
13 | 
14 | ops=(add concat conv2d cos embedding exp flat getitem identity multiply pool2d reducesum relu reshape scalar_add scalar_multiply scalar_sub scalar_truediv sigmoid sin subtract tanh transpose view_embedding max min linear layernorm gather)
15 | 
16 | #create flexflow tensors
17 | conda activate flexflow
18 | conda info --envs
19 | for(( i=0;i<${#ops[@]};i++)) 
20 | do
21 |     generate_ff_tensor "${ops[i]}";
22 | done;
23 | 
24 | #create torch tensorss
25 | conda activate pytorch
26 | for(( i=0;i<${#ops[@]};i++)) 
27 | do
28 |     generate_torch_tensor "${ops[i]}";
29 | done;
30 | 
31 | conda activate flexflow
32 | python -m pytest tests/align/align_test.py
33 | 


--------------------------------------------------------------------------------
/tests/inference/huggingface_inference_simple.py:
--------------------------------------------------------------------------------
 1 | from transformers import (
 2 |     AutoModelForCausalLM,
 3 |     AutoTokenizer,
 4 |     AutoConfig,
 5 |     GenerationConfig,
 6 | )
 7 | 
 8 | model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 9 | do_sample = False
10 | max_length = 128
11 | model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="auto",)
12 | hf_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
13 | tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
14 | generation_config = GenerationConfig.from_pretrained(model_name)
15 | print(generation_config.do_sample)
16 | generation_config.do_sample = do_sample
17 | generation_config.num_beams=1
18 | generation_config.temperature = None
19 | generation_config.top_p = None
20 | 
21 | 
22 | def run_text_completion():
23 |     prompt = "Help me plan a 1-week trip to Dubai"
24 |     batch = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
25 | 
26 |     generated = model.generate(
27 |         batch["input_ids"],
28 |         max_new_tokens=max_length,
29 |         generation_config=generation_config,
30 |     )
31 |     out = tokenizer.decode(generated[0])
32 |     print(out)
33 | 
34 | def run_chat_completion():
35 |     messages=[
36 |         {"role": "system", "content": "You are a helpful an honest programming assistant."},
37 |         {"role": "user", "content": "Is Rust better than Python?"},
38 |     ]
39 |     tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
40 |     batch = tokenizer(tokenized_chat, return_tensors="pt")
41 | 
42 |     generated = model.generate(
43 |         batch["input_ids"],
44 |         max_new_tokens=max_length,
45 |         generation_config=generation_config,
46 |     )
47 |     out = tokenizer.decode(generated[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
48 |     prompt_length = len(tokenizer.decode(batch["input_ids"][0], skip_special_tokens=True, clean_up_tokenization_spaces=True))
49 |     all_text = out[prompt_length:]
50 |     print(all_text)
51 | run_chat_completion()


--------------------------------------------------------------------------------
/tests/inference/huggingface_pipeline.py:
--------------------------------------------------------------------------------
 1 | import transformers
 2 | from transformers import GenerationConfig
 3 | 
 4 | model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 5 | do_sample = False
 6 | 
 7 | generation_config = GenerationConfig.from_pretrained(model_id)
 8 | generation_config.do_sample = do_sample
 9 | generation_config.num_beams=1
10 | # generation_config.max_length = 128
11 | generation_config.temperature = None
12 | generation_config.top_p = None
13 | print(generation_config)
14 | 
15 | pipeline = transformers.pipeline(
16 |     "text-generation",
17 |     model=model_id,
18 |     # model_kwargs={"torch_dtype": torch.bfloat16},
19 |     device_map="auto",
20 | )
21 | 
22 | messages=[
23 |         {"role": "system", "content": "You are a helpful an honest programming assistant."},
24 |         {"role": "user", "content": "Is Rust better than Python?"},
25 |     ]
26 |     
27 | # messages="Help me plan a 1-week trip to Dubai"
28 | outputs = pipeline(
29 |     messages,
30 |     max_new_tokens=128,
31 |     generation_config=generation_config,
32 | )
33 | print(outputs[0]["generated_text"][-1]['content'])


--------------------------------------------------------------------------------
/tests/multinode_helpers/mpi_wrapper1.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env bash
 2 | set -x
 3 | set -e
 4 | 
 5 | if [ -z "$FF_HOME" ]; then echo "FF_HOME variable is not defined, aborting tests"; exit; fi
 6 | if [ -z "$NUM_NODES" ]; then echo "NUM_NODES variable is not defined, aborting tests"; exit; fi
 7 | if [ -z "$GPUS" ]; then echo "GPUS variable is not defined, aborting tests"; exit; fi
 8 | 
 9 | # We need to wrap the instruction below in its own script because MPI throws an error if we try
10 | # to run "mpirun" more than once in the same script. Hence, we cannot simply call "mpirun" in the
11 | # training_tests.sh script
12 | mpirun -np "$NUM_NODES" "$FF_HOME"/tests/multinode_helpers/mpi_wrapper2.sh "$@"
13 | 


--------------------------------------------------------------------------------
/tests/multinode_helpers/mpi_wrapper2.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env bash
 2 | set -x
 3 | set -e
 4 | 
 5 | if [ -z "$NUM_NODES" ]; then echo "NUM_NODES variable is not defined, aborting tests"; exit; fi
 6 | if [ -z "$GPUS" ]; then echo "GPUS variable is not defined, aborting tests"; exit; fi
 7 | 
 8 | # We need to wrap the instruction below in its own script because the CUDA_VISIBLE_DEVICES environment
 9 | # variable will need to be set differently for each node, but the "mpirun" command should take a single
10 | # executable as its first argument
11 | CUDA_VISIBLE_DEVICES=$(seq -s, $((OMPI_COMM_WORLD_RANK * GPUS ))  $(( OMPI_COMM_WORLD_RANK * GPUS +1 )) )
12 | export CUDA_VISIBLE_DEVICES
13 | 
14 | python "$@"
15 | 


--------------------------------------------------------------------------------
/tests/python_interface_test.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env bash
 2 | set -x
 3 | set -e
 4 | 
 5 | 
 6 | FF_HOME="$(realpath "${BASH_SOURCE[0]%/*}/..")"
 7 | export FF_HOME
 8 | # Edit the folder below if you did not build FlexFlow in $FF_HOME/build
 9 | BUILD_FOLDER="${FF_HOME}/build"
10 | export BUILD_FOLDER
11 | 
12 | # Token to access private huggingface models (e.g. LLAMA-2)
13 | HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN:-none}
14 | if [[ "$HUGGINGFACE_TOKEN" != "none" ]]; then
15 |     huggingface-cli login --token "$HUGGINGFACE_TOKEN"
16 | fi
17 | 
18 | installation_status=${1:-"before-installation"}
19 | echo "Running Python interface tests (installation status: ${installation_status})"
20 | if [[ "$installation_status" == "before-installation" ]]; then
21 | 	# Check availability of flexflow modules in Python
22 | 	export PYTHONPATH="${FF_HOME}/python:${BUILD_FOLDER}/deps/legion/bindings/python:${PYTHONPATH}"
23 | 	export LD_LIBRARY_PATH="${BUILD_FOLDER}:${LD_LIBRARY_PATH}"
24 | 	python -c "import flexflow.core; import flexflow.serve as ff; exit()"
25 | 	unset PYTHONPATH
26 | 	unset LD_LIBRARY_PATH
27 | 	# Run simple python inference test
28 | 	export LD_LIBRARY_PATH="${BUILD_FOLDER}:${BUILD_FOLDER}/deps/legion/lib:${LD_LIBRARY_PATH}"
29 | 	export PYTHONPATH="${FF_HOME}/python:${BUILD_FOLDER}/deps/legion/bindings/python:${PYTHONPATH}"
30 | 	python "$FF_HOME"/inference/python/incr_decoding.py
31 | 	unset PYTHONPATH
32 | 	unset LD_LIBRARY_PATH
33 | elif [[ "$installation_status" == "after-installation" ]]; then
34 | 	# Check availability of flexflow modules in Python
35 | 	python -c "import flexflow.core; import flexflow.serve as ff; exit()"
36 | 	# Run simple python inference test
37 | 	python "$FF_HOME"/inference/python/incr_decoding.py
38 | else
39 | 	echo "Invalid installation status!"
40 | 	echo "Usage: $0 {before-installation, after-installation}"
41 | 	exit 1
42 | fi
43 | 


--------------------------------------------------------------------------------
/triton/README.md:
--------------------------------------------------------------------------------
 1 | # Legion Triton Backend
 2 | 
 3 | This directory contains an incomplete prototype for a new 
 4 | [backend for Triton](https://github.com/triton-inference-server/backend) built on top of the 
 5 | [Legion runtime](https://legion.stanford.edu) for handling multi-node multi-GPU inference
 6 | requests. While Legion is the primary runtime carrying out multi-node inference jobs, users
 7 | do not need to understand Legion at all to use this backend.  
 8 | 
 9 | ## Build instructions
10 | 
11 | ### CMake
12 | 
13 | A simple CMake is provided to build Legion backend and to resolve its dependencies.
14 | Note that the build will install protobuf with customized settting, please make sure
15 | that the system doesn't have protobuf installed to avoid conflict.
16 | 
17 | ```
18 | $ mkdir build
19 | $ cd build
20 | $ cmake  ..
21 | $ make
22 | ```
23 | 
24 | After build, the backend shared library can be found at `/PATH/TO/BUILDDIR/triton-legion/backends/legion`
25 | 
26 | By default, the unit tests and test data are installed at `/PATH/TO/BUILDDIR/triton-legion/test`,
27 | which can be run after switching the current directory to the installed location.
28 | 
29 | ### Make
30 | 
31 | Protobuf is required for the backend and it must be installed from source with the following command
32 | to build the static protobuf library that can be linked with the backend shared library
33 | 
34 | ```
35 | git clone https://github.com/protocolbuffers/protobuf.git
36 | git checkout v3.17.1
37 | cd protobuf/cmake
38 | cmake -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -Dprotobuf_BUILD_TESTS:BOOL=OFF -Dprotobuf_WITH_ZLIB:BOOL=OFF -Dprotobuf_MSVC_STATIC_RUNTIME:BOOL=OFF -DCMAKE_BUILD_TYPE:STRING=RELEASE -DBUILD_SHARED_LIBS:STRING=no .
39 | make install
40 | ```
41 | 
42 | Set the `LG_RT_DIR` environment variable to point to the `legion/runtime` directory in a Legion repo
43 | 
44 | Set the `TRITON_DIR` to point to an installation of the Triton server
45 | 
46 | Go into the `src` directory and type `make`
47 | 
48 | Copy the `libtriton_flexflow.so` shared object to a triton model repository
49 | 


--------------------------------------------------------------------------------
/triton/cmake/TritonLegionBackendConfig.cmake.in:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | include(CMakeFindDependencyMacro)
18 | 
19 | get_filename_component(
20 |   TRITONLEGIONBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
21 | )
22 | 
23 | list(APPEND CMAKE_MODULE_PATH ${TRITONLEGIONBACKEND_CMAKE_DIR})
24 | 
25 | if(NOT TARGET TritonLegionBackend::triton-legion-backend)
26 |   include("${TRITONLEGIONBACKEND_CMAKE_DIR}/TritonLegionBackendTargets.cmake")
27 | endif()
28 | 
29 | set(TRITONLEGIONBACKEND_LIBRARIES TritonLegionBackend::triton-legion-backend)
30 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/add/1/model.onnx:
--------------------------------------------------------------------------------
 1 | model:y
 2 | 
 3 | input0
 4 | input1output"Add
 5 | test_graphZ
 6 | input0
 7 | 
 8 | 
 9 | Z
10 | input1
11 | 
12 | 
13 | b
14 | output
15 | 
16 | 
17 | B


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/add/1/model.strategy:
--------------------------------------------------------------------------------
1 | 1 Add 0 2 1 1 1 0


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/add/config.pbtxt:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | name: "add"
18 | backend: "legion"
19 | max_batch_size: 0
20 | input [
21 |   {
22 |     name: "input0"
23 |     data_type: TYPE_FP32
24 |     dims: [ 4, 2 ]
25 |   },
26 |   {
27 |     name: "input1"
28 |     data_type: TYPE_FP32
29 |     dims: [ 4, 2 ]
30 |   }
31 | ]
32 | output [
33 |   {
34 |     name: "output"
35 |     data_type: TYPE_FP32
36 |     dims: [ 4, 2 ]
37 |   }
38 | ]
39 | instance_group [ { kind : KIND_MODEL }]
40 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/cast/1/model.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/qa/L0_e2e/models/cast/1/model.onnx


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/cast/1/model.strategy:
--------------------------------------------------------------------------------
1 | 1 Cast 0 2 1 1 1 0


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/cast/config.pbtxt:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | name: "cast"
18 | backend: "legion"
19 | max_batch_size: 0
20 | input [
21 |   {
22 |     name: "input"
23 |     data_type: TYPE_FP32
24 |     dims: [ 1, 3 ]
25 |   }
26 | ]
27 | output [
28 |   {
29 |     name: "output"
30 |     data_type: TYPE_FP64
31 |     dims: [ 1, 3 ]
32 |   }
33 | ]
34 | instance_group [ { kind : KIND_MODEL }]
35 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/identity/1/model.onnx:
--------------------------------------------------------------------------------
 1 | model:j
 2 | 
 3 | inputoutput"Identity
 4 | test_graphZ
 5 | input
 6 | 
 7 | 
 8 | 
 9 | 
10 | b 
11 | output
12 | 
13 | 
14 | 
15 | 
16 | B


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/identity/1/model.strategy:
--------------------------------------------------------------------------------
1 | 1 Identity_0 0 4 1 1 1 1 1 0


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/identity/config.pbtxt:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | name: "identity"
18 | backend: "legion"
19 | max_batch_size: 0
20 | input [
21 |   {
22 |     name: "input"
23 |     data_type: TYPE_FP32
24 |     dims: [ 4, 1, 5, 5 ]
25 |   }
26 | ]
27 | output [
28 |   {
29 |     name: "output"
30 |     data_type: TYPE_FP32
31 |     dims: [ 4, 1, 5, 5 ]
32 |   }
33 | ]
34 | instance_group [ { kind : KIND_MODEL }]
35 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/mul/1/model.onnx:
--------------------------------------------------------------------------------
 1 | model:y
 2 | 
 3 | input0
 4 | input1output"Mul
 5 | test_graphZ
 6 | input0
 7 | 
 8 | 
 9 | Z
10 | input1
11 | 
12 | 
13 | b
14 | output
15 | 
16 | 
17 | B


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/mul/1/model.strategy:
--------------------------------------------------------------------------------
1 | 1 Mul 0 2 1 1 1 0


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/mul/config.pbtxt:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | name: "mul"
18 | backend: "legion"
19 | max_batch_size: 0
20 | input [
21 |   {
22 |     name: "input0"
23 |     data_type: TYPE_FP32
24 |     dims: [ 4, 2 ]
25 |   },
26 |   {
27 |     name: "input1"
28 |     data_type: TYPE_FP32
29 |     dims: [ 4, 2 ]
30 |   }
31 | ]
32 | output [
33 |   {
34 |     name: "output"
35 |     data_type: TYPE_FP32
36 |     dims: [ 4, 2 ]
37 |   }
38 | ]
39 | instance_group [ { kind : KIND_MODEL }]
40 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/reciprocal/1/model.onnx:
--------------------------------------------------------------------------------
 1 | model:\
 2 | 
 3 | inputoutput"
 4 | Reciprocal
 5 | test_graphZ
 6 | input
 7 | 
 8 | 
 9 | b
10 | output
11 | 
12 | 
13 | B


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/reciprocal/1/model.strategy:
--------------------------------------------------------------------------------
1 | 1 Reciprocal 0 2 1 1 1 0


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/reciprocal/config.pbtxt:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | name: "reciprocal"
18 | backend: "legion"
19 | max_batch_size: 0
20 | input [
21 |   {
22 |     name: "input"
23 |     data_type: TYPE_FP32
24 |     dims: [ 1, 3 ]
25 |   }
26 | ]
27 | output [
28 |   {
29 |     name: "output"
30 |     data_type: TYPE_FP32
31 |     dims: [ 1, 3 ]
32 |   }
33 | ]
34 | instance_group [ { kind : KIND_MODEL }]
35 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/softmax/1/model.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/qa/L0_e2e/models/softmax/1/model.onnx


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/softmax/1/model.strategy:
--------------------------------------------------------------------------------
1 | 1 Softmax 0 2 1 1 1 0


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/softmax/config.pbtxt:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | name: "softmax"
18 | backend: "legion"
19 | max_batch_size: 0
20 | input [
21 |   {
22 |     name: "input"
23 |     data_type: TYPE_FP32
24 |     dims: [ 3, 1 ]
25 |   }
26 | ]
27 | output [
28 |   {
29 |     name: "output"
30 |     data_type: TYPE_FP32
31 |     dims: [ 3, 1 ]
32 |   }
33 | ]
34 | instance_group [ { kind : KIND_MODEL }]
35 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/softmax1/1/model.onnx:
--------------------------------------------------------------------------------
 1 | model:Y
 2 | 
 3 | inputoutput"Softmax
 4 | test_graphZ
 5 | input
 6 | 
 7 | 
 8 | b
 9 | output
10 | 
11 | 
12 | B


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/softmax1/1/model.strategy:
--------------------------------------------------------------------------------
1 | 1 Softmax_1 0 2 1 1 1 0


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/softmax1/config.pbtxt:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | name: "softmax1"
18 | backend: "legion"
19 | max_batch_size: 0
20 | input [
21 |   {
22 |     name: "input"
23 |     data_type: TYPE_FP32
24 |     dims: [ 3, 1 ]
25 |   }
26 | ]
27 | output [
28 |   {
29 |     name: "output"
30 |     data_type: TYPE_FP32
31 |     dims: [ 3, 1 ]
32 |   }
33 | ]
34 | instance_group [ { kind : KIND_MODEL }]
35 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/sqrt/1/model.onnx:
--------------------------------------------------------------------------------
 1 | model:V
 2 | 
 3 | inputoutput"Sqrt
 4 | test_graphZ
 5 | input
 6 | 
 7 | 
 8 | b
 9 | output
10 | 
11 | 
12 | B


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/sqrt/1/model.strategy:
--------------------------------------------------------------------------------
1 | 1 Sqrt 0 2 1 1 1 0


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/sqrt/config.pbtxt:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | name: "sqrt"
18 | backend: "legion"
19 | max_batch_size: 0
20 | input [
21 |   {
22 |     name: "input"
23 |     data_type: TYPE_FP32
24 |     dims: [ 3, 1 ]
25 |   }
26 | ]
27 | output [
28 |   {
29 |     name: "output"
30 |     data_type: TYPE_FP32
31 |     dims: [ 3, 1 ]
32 |   }
33 | ]
34 | instance_group [ { kind : KIND_MODEL }]
35 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/sub/1/model.onnx:
--------------------------------------------------------------------------------
 1 | model:y
 2 | 
 3 | input0
 4 | input1output"Sub
 5 | test_graphZ
 6 | input0
 7 | 
 8 | 
 9 | Z
10 | input1
11 | 
12 | 
13 | b
14 | output
15 | 
16 | 
17 | B


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/sub/1/model.strategy:
--------------------------------------------------------------------------------
1 | 1 Sub 0 2 1 1 1 0


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/sub/config.pbtxt:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | name: "sub"
18 | backend: "legion"
19 | max_batch_size: 0
20 | input [
21 |   {
22 |     name: "input0"
23 |     data_type: TYPE_FP32
24 |     dims: [ 4, 2 ]
25 |   },
26 |   {
27 |     name: "input1"
28 |     data_type: TYPE_FP32
29 |     dims: [ 4, 2 ]
30 |   }
31 | ]
32 | output [
33 |   {
34 |     name: "output"
35 |     data_type: TYPE_FP32
36 |     dims: [ 4, 2 ]
37 |   }
38 | ]
39 | instance_group [ { kind : KIND_MODEL }]
40 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/tanh/1/model.onnx:
--------------------------------------------------------------------------------
 1 | model:V
 2 | 
 3 | inputoutput"Tanh
 4 | test_graphZ
 5 | input
 6 | 
 7 | 
 8 | b
 9 | output
10 | 
11 | 
12 | B


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/tanh/1/model.strategy:
--------------------------------------------------------------------------------
1 | 1 Tanh 0 2 1 1 1 0


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/models/tanh/config.pbtxt:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | name: "tanh"
18 | backend: "legion"
19 | max_batch_size: 0
20 | input [
21 |   {
22 |     name: "input"
23 |     data_type: TYPE_FP32
24 |     dims: [ 3, 1 ]
25 |   }
26 | ]
27 | output [
28 |   {
29 |     name: "output"
30 |     data_type: TYPE_FP32
31 |     dims: [ 3, 1 ]
32 |   }
33 | ]
34 | instance_group [ { kind : KIND_MODEL }]
35 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #------------------------------------------------------------------------------#
 3 | # Copyright 2022 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #------------------------------------------------------------------------------#
17 | 
18 | TEST_PY=operator_test.py
19 | DATADIR="./models"
20 | SERVER=/opt/tritonserver/bin/tritonserver
21 | SERVER_ARGS="--model-repository=$DATADIR"
22 | source ../common/util.sh
23 | 
24 | rm -f *.log*
25 | 
26 | RET=0
27 | 
28 | # 1 GPU 1 node
29 | export REALM_DEFAULT_ARGS="-ll:gpu 1"
30 | TEST_LOG="./single_device_single_node.log"
31 | 
32 | run_server
33 | if [ "$SERVER_PID" == "0" ]; then
34 |     echo -e "\n***\n*** Failed to start $SERVER\n***"
35 |     cat $SERVER_LOG
36 |     exit 1
37 | fi
38 | 
39 | set +e
40 | python $TEST_PY >>$TEST_LOG 2>&1
41 | if [ $? -ne 0 ]; then
42 |     echo -e "\n***\n*** Test Failed\n***"
43 |     cat $TEST_LOG
44 |     RET=1
45 | fi
46 | set -e
47 | 
48 | # [issue #7] WAR to ignore core dump on server exit
49 | set +e
50 | kill_server
51 | set -e
52 | 
53 | # [gluo FIXME] add test for multi-GPU / multi-node
54 | 
55 | if [ $RET -eq 0 ]; then
56 |   echo -e "\n***\n*** Test Passed\n***"
57 | else
58 |   echo -e "\n***\n*** Test Failed\n***"
59 | fi
60 | 
61 | exit $RET
62 | 


--------------------------------------------------------------------------------
/triton/qa/L0_e2e/test_helpers.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | 
17 | import numpy as np
18 | 
19 | def softmax(input, axis):
20 |     output = np.exp(input - np.max(input, axis, keepdims=True))
21 |     return output / np.sum(output, axis, keepdims=True)
22 | 


--------------------------------------------------------------------------------
/triton/qa/L0_parser/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #------------------------------------------------------------------------------#
 3 | # Copyright 2022 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #------------------------------------------------------------------------------#
17 | 
18 | TEST_BIN=./onnx_parser_test
19 | 
20 | rm -f *.log*
21 | 
22 | RET=0
23 | 
24 | TEST_LOG="./parser.log"
25 | 
26 | set +e
27 | $TEST_BIN >>$TEST_LOG 2>&1
28 | if [ $? -ne 0 ]; then
29 |     echo -e "\n***\n*** Test Failed\n***"
30 |     cat $TEST_LOG
31 |     RET=1
32 | fi
33 | set -e
34 | 
35 | if [ $RET -eq 0 ]; then
36 |   echo -e "\n***\n*** Test Passed\n***"
37 | else
38 |   echo -e "\n***\n*** Test Failed\n***"
39 | fi
40 | 
41 | exit $RET
42 | 


--------------------------------------------------------------------------------
/triton/src/config.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022 NVIDIA CORPORATION
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #ifndef __LEGION_TRITON_CONFIG_H__
17 | #define __LEGION_TRITON_CONFIG_H__
18 | 
19 | // Configuration constants for upper bounds for some static properties
20 | 
21 | // Maximum number of instances per model that we expect to see
22 | #define MAX_NUM_INSTANCES 8
23 | 
24 | // Maximum number of local processors that we need to handle in this process
25 | #define MAX_LOCAL_PROCS 16
26 | 
27 | #endif  // __LEGION_TRITON_CONFIG_H__
28 | 


--------------------------------------------------------------------------------
/triton/src/libtriton_legion.ldscript:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | # Copyright 2022 NVIDIA CORPORATION
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #------------------------------------------------------------------------------#
16 | {
17 |   global:
18 |     TRITONBACKEND_*;
19 |     extern "C++" {
20 |         triton::backend::legion::*;
21 |     };
22 |   local: *;
23 | };
24 | 


--------------------------------------------------------------------------------
/triton/src/operator.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022 NVIDIA CORPORATION
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #include "operator.h"
17 | #include "operators/binary.h"
18 | #include "operators/concat.h"
19 | #include "operators/conv2d.h"
20 | #include "operators/matmul.h"
21 | #include "operators/reshape.h"
22 | #include "operators/softmax.h"
23 | #include "operators/unary.h"
24 | #include "tensor.h"
25 | 
26 | namespace triton { namespace backend { namespace legion {
27 | 
28 | Operator::Operator(
29 |     LegionModelState* m, const LayerStrategy* s, OperatorType t,
30 |     const char* name, unsigned in, unsigned wts, unsigned out)
31 |     : op_type(t), op_name(name), model(m), strategy(s), num_inputs(in),
32 |       num_weights(wts), num_outputs(out)
33 | {
34 | }
35 | 
36 | Operator::~Operator(void)
37 | {
38 |   // Delete all the weight and output tensors
39 |   for (auto wts : weights) delete wts;
40 |   for (auto tensor : outputs) delete tensor;
41 | }
42 | 
43 | /*static*/ void
44 | Operator::PreregisterTaskVariants(void)
45 | {
46 |   BinaryOperator::PreregisterTaskVariants();
47 |   Concat::PreregisterTaskVariants();
48 |   Conv2D::PreregisterTaskVariants();
49 |   MatMul::PreregisterTaskVariants();
50 |   Reshape::PreregisterTaskVariants();
51 |   Softmax::PreregisterTaskVariants();
52 |   UnaryOperator::PreregisterTaskVariants();
53 | }
54 | 
55 | }}}  // namespace triton::backend::legion
56 | 


--------------------------------------------------------------------------------
/triton/src/operators/flat.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022 NVIDIA CORPORATION
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #ifndef __LEGION_TRITON_FLAT_H__
17 | #define __LEGION_TRITON_FLAT_H__
18 | 
19 | #include "operator.h"
20 | #include "tensor.h"
21 | 
22 | namespace triton { namespace backend { namespace legion {
23 | 
24 | struct FlatArgs : public OperatorArgs {
25 |  public:
26 | };
27 | 
28 | class Flat : public Operator {
29 |  public:
30 |   Flat(LegionModelState* state, const char* name);
31 | 
32 |   void configure(Tensor* input, Tensor* output);
33 | 
34 |   virtual void initialize(LegionModelInstance* instance);
35 |   virtual void forward(LegionModelInstance* instance);
36 |   virtual void finalize(LegionModelInstance* instance);
37 | 
38 |   static FlatArgs initalize_gpu(
39 |       const Legion::Task* task,
40 |       const std::vector<Legion::PhysicalRegion>& regions, Legion::Context ctx,
41 |       Legion::Runtime* runtime);
42 |   static void forward_gpu(
43 |       const Legion::Task* task,
44 |       const std::vector<Legion::PhysicalRegion>& regions, Legion::Context ctx,
45 |       Legion::Runtime* runtime);
46 |   static void forward_kernel(
47 |       const FlatArgs* args, const void* input_ptr, void* output_ptr,
48 |       size_t num_elements);
49 | 
50 |  public:
51 |   LegionModelState* const model;
52 | };
53 | 
54 | }}}  // namespace triton::backend::legion
55 | 
56 | #endif  // __LEGION_TRITON_FLAT_H__
57 | 


--------------------------------------------------------------------------------
/triton/src/tensor.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2022 NVIDIA CORPORATION
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  *     http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | #ifndef __LEGION_TRITON_TENSOR_H__
17 | #define __LEGION_TRITON_TENSOR_H__
18 | 
19 | #include "config.h"
20 | #include "legion.h"
21 | #include "types.h"
22 | 
23 | namespace triton { namespace backend { namespace legion {
24 | 
25 | class Tensor {
26 |  public:
27 |   Tensor(Operator* op, DataType type, const size_t* dims, size_t num_dims);
28 |   Tensor(Operator* op, DataType type, const std::vector<size_t>& dims);
29 |   virtual ~Tensor(void);
30 | 
31 |  public:
32 |   Operator* const owner;
33 |   const DataType type;
34 |   const std::vector<size_t> bounds;
35 | 
36 |  public:
37 |   Legion::LogicalRegion region[MAX_NUM_INSTANCES];
38 |   Legion::LogicalPartition partition[MAX_NUM_INSTANCES];
39 | };
40 | 
41 | class Weights : public Tensor {
42 |  public:
43 |   Weights(Operator* op, DataType type, const size_t* dims, size_t num_dims);
44 |   Weights(Operator* op, DataType type, const std::vector<size_t>& dims);
45 |   virtual ~Weights(void);
46 | 
47 |  public:
48 |   Legion::Domain local_bounds[MAX_LOCAL_PROCS];
49 |   Legion::Memory local_memory[MAX_LOCAL_PROCS];
50 |   void* local_allocation[MAX_LOCAL_PROCS];
51 |   size_t local_strides[MAX_LOCAL_PROCS][LEGION_MAX_DIM];
52 | };
53 | 
54 | }}}  // namespace triton::backend::legion
55 | 
56 | #endif  // __LEGION_TRITON_TENSOR_H__
57 | 


--------------------------------------------------------------------------------
/triton/src/test/data/add.onnx:
--------------------------------------------------------------------------------
 1 | model:y
 2 | 
 3 | input0
 4 | input1output"Add
 5 | test_graphZ
 6 | input0
 7 | 
 8 | 
 9 | Z
10 | input1
11 | 
12 | 
13 | b
14 | output
15 | 
16 | 
17 | B


--------------------------------------------------------------------------------
/triton/src/test/data/avg_pool.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/avg_pool.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/avg_pool_autopad.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/avg_pool_autopad.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/avg_pool_ceil.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/avg_pool_ceil.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/avg_pool_count_include_pad.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/avg_pool_count_include_pad.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/avg_pool_pad.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/avg_pool_pad.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/cast.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/cast.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/conv2d_with_bias.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/conv2d_with_bias.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/identity.onnx:
--------------------------------------------------------------------------------
 1 | model:j
 2 | 
 3 | inputoutput"Identity
 4 | test_graphZ
 5 | input
 6 | 
 7 | 
 8 | 
 9 | 
10 | b 
11 | output
12 | 
13 | 
14 | 
15 | 
16 | B


--------------------------------------------------------------------------------
/triton/src/test/data/max_pool.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/max_pool.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/max_pool_autopad.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/max_pool_autopad.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/max_pool_ceil.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/max_pool_ceil.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/max_pool_dilations.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/max_pool_dilations.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/max_pool_order.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/max_pool_order.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/mul.onnx:
--------------------------------------------------------------------------------
 1 | model:y
 2 | 
 3 | input0
 4 | input1output"Mul
 5 | test_graphZ
 6 | input0
 7 | 
 8 | 
 9 | Z
10 | input1
11 | 
12 | 
13 | b
14 | output
15 | 
16 | 
17 | B


--------------------------------------------------------------------------------
/triton/src/test/data/reciprocal.onnx:
--------------------------------------------------------------------------------
 1 | model:\
 2 | 
 3 | inputoutput"
 4 | Reciprocal
 5 | test_graphZ
 6 | input
 7 | 
 8 | 
 9 | b
10 | output
11 | 
12 | 
13 | B


--------------------------------------------------------------------------------
/triton/src/test/data/softmax.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/softmax.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/softmax_default_axis.onnx:
--------------------------------------------------------------------------------
 1 | model:Y
 2 | 
 3 | inputoutput"Softmax
 4 | test_graphZ
 5 | input
 6 | 
 7 | 
 8 | b
 9 | output
10 | 
11 | 
12 | B


--------------------------------------------------------------------------------
/triton/src/test/data/softmax_negative_axis.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flexflow/flexflow-serve/dd7bee0e9d6e43b1aaf9355a4d2091fdae2b6942/triton/src/test/data/softmax_negative_axis.onnx


--------------------------------------------------------------------------------
/triton/src/test/data/sqrt.onnx:
--------------------------------------------------------------------------------
 1 | model:V
 2 | 
 3 | inputoutput"Sqrt
 4 | test_graphZ
 5 | input
 6 | 
 7 | 
 8 | b
 9 | output
10 | 
11 | 
12 | B


--------------------------------------------------------------------------------
/triton/src/test/data/sub.onnx:
--------------------------------------------------------------------------------
 1 | model:y
 2 | 
 3 | input0
 4 | input1output"Sub
 5 | test_graphZ
 6 | input0
 7 | 
 8 | 
 9 | Z
10 | input1
11 | 
12 | 
13 | b
14 | output
15 | 
16 | 
17 | B


--------------------------------------------------------------------------------
/triton/src/test/data/tanh.onnx:
--------------------------------------------------------------------------------
 1 | model:V
 2 | 
 3 | inputoutput"Tanh
 4 | test_graphZ
 5 | input
 6 | 
 7 | 
 8 | b
 9 | output
10 | 
11 | 
12 | B


--------------------------------------------------------------------------------