├── .gitignore ├── .gitmodules ├── README.md ├── benchmark ├── longbench_config │ ├── dataset2maxlen.json │ ├── dataset2prompt.json │ ├── model2maxlen.json │ └── model2path.json ├── longbench_eval.py ├── longbench_metrics.py ├── longbench_pred.py ├── passkey_pred.py └── performance.py ├── media ├── arkvale-nips24-paper.pdf ├── arkvale-nips24-poster.pdf └── arkvale-nips24-talk.pdf ├── requirements.txt └── source ├── arkvale ├── __init__.py ├── adapter │ ├── __init__.py │ ├── generate.py │ └── modeling.py ├── infer_state.py ├── kernels.py ├── kv_cache.py └── utils.py ├── arkvale_cpp ├── CMakeLists.txt └── src │ ├── api.cu │ ├── append.cu │ ├── batch_decode.cu │ ├── batch_prefill.cu │ ├── estimate.cu │ ├── flashinfer_ops.h │ ├── gen_dispatch.py │ ├── generated │ └── dispatch.inc │ ├── pytorch_extension_utils.h │ ├── rms_norm.cu │ ├── rotary.cu │ └── select.cu └── setup.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/.gitmodules -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/README.md -------------------------------------------------------------------------------- /benchmark/longbench_config/dataset2maxlen.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/benchmark/longbench_config/dataset2maxlen.json -------------------------------------------------------------------------------- /benchmark/longbench_config/dataset2prompt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/benchmark/longbench_config/dataset2prompt.json -------------------------------------------------------------------------------- /benchmark/longbench_config/model2maxlen.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/benchmark/longbench_config/model2maxlen.json -------------------------------------------------------------------------------- /benchmark/longbench_config/model2path.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/benchmark/longbench_config/model2path.json -------------------------------------------------------------------------------- /benchmark/longbench_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/benchmark/longbench_eval.py -------------------------------------------------------------------------------- /benchmark/longbench_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/benchmark/longbench_metrics.py -------------------------------------------------------------------------------- /benchmark/longbench_pred.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/benchmark/longbench_pred.py -------------------------------------------------------------------------------- /benchmark/passkey_pred.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/benchmark/passkey_pred.py -------------------------------------------------------------------------------- /benchmark/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/benchmark/performance.py -------------------------------------------------------------------------------- /media/arkvale-nips24-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/media/arkvale-nips24-paper.pdf -------------------------------------------------------------------------------- /media/arkvale-nips24-poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/media/arkvale-nips24-poster.pdf -------------------------------------------------------------------------------- /media/arkvale-nips24-talk.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/media/arkvale-nips24-talk.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/requirements.txt -------------------------------------------------------------------------------- /source/arkvale/__init__.py: -------------------------------------------------------------------------------- 1 | from . import adapter 2 | -------------------------------------------------------------------------------- /source/arkvale/adapter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale/adapter/__init__.py -------------------------------------------------------------------------------- /source/arkvale/adapter/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale/adapter/generate.py -------------------------------------------------------------------------------- /source/arkvale/adapter/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale/adapter/modeling.py -------------------------------------------------------------------------------- /source/arkvale/infer_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale/infer_state.py -------------------------------------------------------------------------------- /source/arkvale/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale/kernels.py -------------------------------------------------------------------------------- /source/arkvale/kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale/kv_cache.py -------------------------------------------------------------------------------- /source/arkvale/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale/utils.py -------------------------------------------------------------------------------- /source/arkvale_cpp/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/CMakeLists.txt -------------------------------------------------------------------------------- /source/arkvale_cpp/src/api.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/api.cu -------------------------------------------------------------------------------- /source/arkvale_cpp/src/append.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/append.cu -------------------------------------------------------------------------------- /source/arkvale_cpp/src/batch_decode.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/batch_decode.cu -------------------------------------------------------------------------------- /source/arkvale_cpp/src/batch_prefill.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/batch_prefill.cu -------------------------------------------------------------------------------- /source/arkvale_cpp/src/estimate.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/estimate.cu -------------------------------------------------------------------------------- /source/arkvale_cpp/src/flashinfer_ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/flashinfer_ops.h -------------------------------------------------------------------------------- /source/arkvale_cpp/src/gen_dispatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/gen_dispatch.py -------------------------------------------------------------------------------- /source/arkvale_cpp/src/generated/dispatch.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/generated/dispatch.inc -------------------------------------------------------------------------------- /source/arkvale_cpp/src/pytorch_extension_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/pytorch_extension_utils.h -------------------------------------------------------------------------------- /source/arkvale_cpp/src/rms_norm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/rms_norm.cu -------------------------------------------------------------------------------- /source/arkvale_cpp/src/rotary.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/rotary.cu -------------------------------------------------------------------------------- /source/arkvale_cpp/src/select.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/arkvale_cpp/src/select.cu -------------------------------------------------------------------------------- /source/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pku-liang/ArkVale/HEAD/source/setup.py --------------------------------------------------------------------------------