├── .coveragerc ├── .gitattributes ├── .github ├── CODEOWNERS └── workflows │ ├── add_label.yaml │ ├── e2e_tests.yaml │ ├── release.yaml │ ├── run_maxtext_jetstream_tests.yaml │ ├── scripts │ └── create_release.js │ ├── test_llama_benchmarks.sh │ ├── test_moe_benchmarks.sh │ ├── unit_tests.yaml │ └── utils │ └── setup_runner.sh ├── .gitignore ├── AUTHORS ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── benchmarks ├── README.md ├── __init__.py ├── benchmark_prefix_cache.sh ├── benchmark_serving.py ├── eval_accuracy.py ├── eval_accuracy_longcontext.py ├── eval_accuracy_mmlu.py ├── huggingfaceh4_math500.json ├── math_utils.py ├── metrics.py ├── mlperf │ ├── README.md │ ├── backend.py │ ├── dataset.py │ ├── evaluate-accuracy.py │ ├── main.py │ ├── mlperf.conf │ ├── scripts │ │ ├── config_utils.sh │ │ ├── config_w-b16-kv-b16.sh │ │ ├── config_w-i8-kv-b16.sh │ │ ├── config_w-i8-kv-i8.sh │ │ ├── config_w-i8w-kv-b16.sh │ │ ├── config_w-i8w-kv-i8.sh │ │ ├── download_loadgen_data.sh │ │ ├── generate_server_accuracy_run.sh │ │ ├── generate_server_audit_run.sh │ │ ├── generate_server_performance_run.sh │ │ ├── init.sh │ │ ├── init_loadgen.sh │ │ ├── init_xprof.sh │ │ ├── launch_microbenchmark.sh │ │ ├── launch_server.sh │ │ ├── run_utils.sh │ │ └── tpu_script.sh │ ├── user.conf │ ├── user100.conf │ └── user2000.conf ├── mmlu_test_dataset │ ├── abstract_algebra_test.csv │ └── world_religions_test.csv ├── open_orca_gpt4_tokenized_llama.calibration_1000.pkl ├── open_orca_gpt4_tokenized_llama.sampled_24576.pkl ├── requirements.in └── tests │ ├── __init__.py │ ├── test_benchmark_serving.py │ ├── test_eval_accuracy.py │ └── test_metrics.py ├── docs ├── observability-prometheus-metrics-in-jetstream-server.md ├── online-inference-with-maxtext-engine.md └── profiling-with-jax-profiler-and-tensorboard.md ├── experimental ├── jax │ ├── README.md │ ├── inference │ │ ├── config │ │ │ ├── __init__.py │ │ │ └── config.py │ │ ├── entrypoint │ │ │ ├── __init__.py │ │ │ ├── mini_offline_benchmarking.py │ │ │ ├── open_orca_gpt4_tokenized_llama.calibration_1000.pkl │ │ │ ├── open_orca_gpt4_tokenized_llama.sampled_24576.pkl │ │ │ └── run_simple_server.py │ │ ├── kernel │ │ │ ├── __init__.py │ │ │ ├── attention │ │ │ │ └── tpu │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── chunked_prefill_attention.py │ │ │ │ │ ├── chunked_prefill_attention_test.py │ │ │ │ │ ├── paged_attention.py │ │ │ │ │ ├── paged_attention_test.py │ │ │ │ │ └── quantization_utils.py │ │ │ ├── attention_ops.py │ │ │ ├── collective_matmul_ops.py │ │ │ └── linear │ │ │ │ └── tpu │ │ │ │ ├── collective_matmul.py │ │ │ │ └── collective_matmul_test.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── llama.py │ │ │ ├── llama_test.py │ │ │ ├── management │ │ │ │ ├── hf_llama_ckpt_conversion.py │ │ │ │ ├── registry.py │ │ │ │ └── util.py │ │ │ ├── postprocess.py │ │ │ └── sampling │ │ │ │ └── sampler.py │ │ ├── nn │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── attention_test.py │ │ │ ├── embedding.py │ │ │ ├── embedding_test.py │ │ │ ├── linear.py │ │ │ ├── module.py │ │ │ ├── module_test.py │ │ │ ├── norm.py │ │ │ ├── norm_test.py │ │ │ └── parameter.py │ │ ├── parallel │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── device.py │ │ │ ├── mesh.py │ │ │ ├── operations.py │ │ │ ├── operations_test.py │ │ │ └── util.py │ │ ├── runtime │ │ │ ├── __init__.py │ │ │ ├── batch_scheduler.py │ │ │ ├── engine.py │ │ │ ├── kv_cache.py │ │ │ ├── model_executor.py │ │ │ ├── offline_inference.py │ │ │ └── request_type.py │ │ ├── server │ │ │ ├── __init__.py │ │ │ └── simple_server.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── pytree_utils.py │ └── requirements.txt └── jetstream-maxtext-stable-stack │ ├── Dockerfile │ ├── README.md │ ├── build.sh │ ├── generate_manifest.sh │ ├── pipeline.sh │ ├── test.sh │ └── test_script │ ├── benchmark_chunked_prefill_example.sh │ ├── benchmark_serving_example.sh │ └── true.sh ├── jetstream ├── __init__.py ├── core │ ├── README.md │ ├── __init__.py │ ├── config_lib.py │ ├── implementations │ │ ├── __init__.py │ │ └── mock │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ └── server.py │ ├── lora │ │ ├── __init__.py │ │ ├── adapter_tensorstore.py │ │ └── multi_lora_inference_api.py │ ├── metrics │ │ ├── __init__.py │ │ └── prometheus.py │ ├── orchestrator.py │ ├── prefix_cache.py │ ├── proto │ │ ├── __init__.py │ │ ├── jetstream.proto │ │ ├── jetstream_pb2.py │ │ ├── jetstream_pb2_grpc.py │ │ ├── multi_lora_decoding.proto │ │ ├── multi_lora_decoding_pb2.py │ │ └── multi_lora_decoding_pb2_grpc.py │ ├── server_lib.py │ └── utils │ │ ├── __init__.py │ │ ├── async_multifuture.py │ │ ├── proxy_util.py │ │ └── return_sample.py ├── engine │ ├── README.md │ ├── __init__.py │ ├── chunked_prefill.py │ ├── engine_api.py │ ├── mock_engine.py │ ├── mock_utils.py │ ├── sampling_utils.py │ ├── token_utils.py │ ├── tokenizer.proto │ ├── tokenizer_api.py │ ├── tokenizer_pb2.py │ ├── tokenizer_pb2_grpc.py │ └── warmup_utils.py ├── entrypoints │ ├── __init__.py │ ├── config.py │ └── http │ │ ├── __init__.py │ │ ├── api_server.py │ │ ├── protocol.py │ │ └── utils.py ├── external_tokenizers │ ├── __init__.py │ └── llama3 │ │ ├── __init__.py │ │ └── llama3_tokenizer.py ├── tests │ ├── __init__.py │ ├── core │ │ ├── __init__.py │ │ ├── lora │ │ │ ├── __init__.py │ │ │ ├── test_adapter_tensorstore.py │ │ │ └── test_multi_lora_manager.py │ │ ├── test_config_lib.py │ │ ├── test_orchestrator.py │ │ ├── test_prefix_cache.py │ │ └── test_server.py │ ├── engine │ │ ├── __init__.py │ │ ├── external_tokenizers │ │ │ ├── gpt2 │ │ │ │ ├── blobs │ │ │ │ │ ├── 10c66461e4c109db5a2196bff4bb59be30396ed8 │ │ │ │ │ ├── 1f1d9aaca301414e7f6c9396df506798ff4eb9a6 │ │ │ │ │ ├── 226b0752cac7789c48f0cb3ec53eda48b7be36cc │ │ │ │ │ ├── 4b988bccc9dc5adacd403c00b4704976196548f8 │ │ │ │ │ └── be4d21d94f3b4687e5a54d84bf6ab46ed0f8defd │ │ │ │ ├── refs │ │ │ │ │ └── main │ │ │ │ └── snapshots │ │ │ │ │ └── 607a30d783dfa663caf39e06633721c8d4cfcd7e │ │ │ │ │ ├── config.json │ │ │ │ │ ├── merges.txt │ │ │ │ │ ├── tokenizer.json │ │ │ │ │ ├── tokenizer_config.json │ │ │ │ │ └── vocab.json │ │ │ ├── llama2 │ │ │ │ └── tokenizer.model │ │ │ └── llama3 │ │ │ │ └── tokenizer.model │ │ ├── test_init.py │ │ ├── test_mock_engine.py │ │ ├── test_sampling_utils.py │ │ ├── test_token_utils.py │ │ └── test_utils.py │ └── entrypoints │ │ ├── __init__.py │ │ └── http │ │ ├── __init__.py │ │ └── test_api_server.py └── tools │ ├── load_tester.py │ ├── maxtext │ ├── model_ckpt_conversion.sh │ └── model_ckpt_finetune_with_aqt.sh │ ├── multi_adapter_service_client.py │ ├── multi_lora_decode_requester.py │ ├── proxy_dev │ ├── base.Dockerfile │ └── dev.Dockerfile │ └── requester.py ├── license_preamble.txt ├── pylintrc ├── requirements.txt └── setup.py /.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.coveragerc -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.gitattributes -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @mailvijayasingh 2 | * @yuyanpeng-google 3 | * @vipannalla 4 | -------------------------------------------------------------------------------- /.github/workflows/add_label.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.github/workflows/add_label.yaml -------------------------------------------------------------------------------- /.github/workflows/e2e_tests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.github/workflows/e2e_tests.yaml -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.github/workflows/release.yaml -------------------------------------------------------------------------------- /.github/workflows/run_maxtext_jetstream_tests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.github/workflows/run_maxtext_jetstream_tests.yaml -------------------------------------------------------------------------------- /.github/workflows/scripts/create_release.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.github/workflows/scripts/create_release.js -------------------------------------------------------------------------------- /.github/workflows/test_llama_benchmarks.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.github/workflows/test_llama_benchmarks.sh -------------------------------------------------------------------------------- /.github/workflows/test_moe_benchmarks.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.github/workflows/test_moe_benchmarks.sh -------------------------------------------------------------------------------- /.github/workflows/unit_tests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.github/workflows/unit_tests.yaml -------------------------------------------------------------------------------- /.github/workflows/utils/setup_runner.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.github/workflows/utils/setup_runner.sh -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/.gitignore -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Google LLC -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/README.md -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/README.md -------------------------------------------------------------------------------- /benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/benchmark_prefix_cache.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/benchmark_prefix_cache.sh -------------------------------------------------------------------------------- /benchmarks/benchmark_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/benchmark_serving.py -------------------------------------------------------------------------------- /benchmarks/eval_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/eval_accuracy.py -------------------------------------------------------------------------------- /benchmarks/eval_accuracy_longcontext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/eval_accuracy_longcontext.py -------------------------------------------------------------------------------- /benchmarks/eval_accuracy_mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/eval_accuracy_mmlu.py -------------------------------------------------------------------------------- /benchmarks/huggingfaceh4_math500.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/huggingfaceh4_math500.json -------------------------------------------------------------------------------- /benchmarks/math_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/math_utils.py -------------------------------------------------------------------------------- /benchmarks/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/metrics.py -------------------------------------------------------------------------------- /benchmarks/mlperf/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/README.md -------------------------------------------------------------------------------- /benchmarks/mlperf/backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/backend.py -------------------------------------------------------------------------------- /benchmarks/mlperf/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/dataset.py -------------------------------------------------------------------------------- /benchmarks/mlperf/evaluate-accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/evaluate-accuracy.py -------------------------------------------------------------------------------- /benchmarks/mlperf/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/main.py -------------------------------------------------------------------------------- /benchmarks/mlperf/mlperf.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/mlperf.conf -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/config_utils.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/config_utils.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/config_w-b16-kv-b16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/config_w-b16-kv-b16.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/config_w-i8-kv-b16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/config_w-i8-kv-b16.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/config_w-i8-kv-i8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/config_w-i8-kv-i8.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/config_w-i8w-kv-b16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/config_w-i8w-kv-b16.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/config_w-i8w-kv-i8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/config_w-i8w-kv-i8.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/download_loadgen_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/download_loadgen_data.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/generate_server_accuracy_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/generate_server_accuracy_run.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/generate_server_audit_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/generate_server_audit_run.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/generate_server_performance_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/generate_server_performance_run.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/init.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/init.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/init_loadgen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/init_loadgen.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/init_xprof.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/init_xprof.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/launch_microbenchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/launch_microbenchmark.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/launch_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/launch_server.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/run_utils.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/run_utils.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/scripts/tpu_script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/scripts/tpu_script.sh -------------------------------------------------------------------------------- /benchmarks/mlperf/user.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/user.conf -------------------------------------------------------------------------------- /benchmarks/mlperf/user100.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/user100.conf -------------------------------------------------------------------------------- /benchmarks/mlperf/user2000.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mlperf/user2000.conf -------------------------------------------------------------------------------- /benchmarks/mmlu_test_dataset/abstract_algebra_test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mmlu_test_dataset/abstract_algebra_test.csv -------------------------------------------------------------------------------- /benchmarks/mmlu_test_dataset/world_religions_test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/mmlu_test_dataset/world_religions_test.csv -------------------------------------------------------------------------------- /benchmarks/open_orca_gpt4_tokenized_llama.calibration_1000.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/open_orca_gpt4_tokenized_llama.calibration_1000.pkl -------------------------------------------------------------------------------- /benchmarks/open_orca_gpt4_tokenized_llama.sampled_24576.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/open_orca_gpt4_tokenized_llama.sampled_24576.pkl -------------------------------------------------------------------------------- /benchmarks/requirements.in: -------------------------------------------------------------------------------- 1 | nltk==3.8.1 2 | evaluate 3 | rouge-score 4 | transformers 5 | tqdm 6 | scikit-learn 7 | -------------------------------------------------------------------------------- /benchmarks/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/tests/test_benchmark_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/tests/test_benchmark_serving.py -------------------------------------------------------------------------------- /benchmarks/tests/test_eval_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/tests/test_eval_accuracy.py -------------------------------------------------------------------------------- /benchmarks/tests/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/benchmarks/tests/test_metrics.py -------------------------------------------------------------------------------- /docs/observability-prometheus-metrics-in-jetstream-server.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/docs/observability-prometheus-metrics-in-jetstream-server.md -------------------------------------------------------------------------------- /docs/online-inference-with-maxtext-engine.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/docs/online-inference-with-maxtext-engine.md -------------------------------------------------------------------------------- /docs/profiling-with-jax-profiler-and-tensorboard.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/docs/profiling-with-jax-profiler-and-tensorboard.md -------------------------------------------------------------------------------- /experimental/jax/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/README.md -------------------------------------------------------------------------------- /experimental/jax/inference/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/config/__init__.py -------------------------------------------------------------------------------- /experimental/jax/inference/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/config/config.py -------------------------------------------------------------------------------- /experimental/jax/inference/entrypoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/entrypoint/__init__.py -------------------------------------------------------------------------------- /experimental/jax/inference/entrypoint/mini_offline_benchmarking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/entrypoint/mini_offline_benchmarking.py -------------------------------------------------------------------------------- /experimental/jax/inference/entrypoint/open_orca_gpt4_tokenized_llama.calibration_1000.pkl: -------------------------------------------------------------------------------- 1 | ../../../../benchmarks/open_orca_gpt4_tokenized_llama.calibration_1000.pkl -------------------------------------------------------------------------------- /experimental/jax/inference/entrypoint/open_orca_gpt4_tokenized_llama.sampled_24576.pkl: -------------------------------------------------------------------------------- 1 | ../../../../benchmarks/open_orca_gpt4_tokenized_llama.sampled_24576.pkl -------------------------------------------------------------------------------- /experimental/jax/inference/entrypoint/run_simple_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/entrypoint/run_simple_server.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/__init__.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/attention/tpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/attention/tpu/__init__.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/attention/tpu/chunked_prefill_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/attention/tpu/chunked_prefill_attention.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/attention/tpu/chunked_prefill_attention_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/attention/tpu/chunked_prefill_attention_test.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/attention/tpu/paged_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/attention/tpu/paged_attention.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/attention/tpu/paged_attention_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/attention/tpu/paged_attention_test.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/attention/tpu/quantization_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/attention/tpu/quantization_utils.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/attention_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/attention_ops.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/collective_matmul_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/collective_matmul_ops.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/linear/tpu/collective_matmul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/linear/tpu/collective_matmul.py -------------------------------------------------------------------------------- /experimental/jax/inference/kernel/linear/tpu/collective_matmul_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/kernel/linear/tpu/collective_matmul_test.py -------------------------------------------------------------------------------- /experimental/jax/inference/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/model/__init__.py -------------------------------------------------------------------------------- /experimental/jax/inference/model/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/model/llama.py -------------------------------------------------------------------------------- /experimental/jax/inference/model/llama_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/model/llama_test.py -------------------------------------------------------------------------------- /experimental/jax/inference/model/management/hf_llama_ckpt_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/model/management/hf_llama_ckpt_conversion.py -------------------------------------------------------------------------------- /experimental/jax/inference/model/management/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/model/management/registry.py -------------------------------------------------------------------------------- /experimental/jax/inference/model/management/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/model/management/util.py -------------------------------------------------------------------------------- /experimental/jax/inference/model/postprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/model/postprocess.py -------------------------------------------------------------------------------- /experimental/jax/inference/model/sampling/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/model/sampling/sampler.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/__init__.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/attention.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/attention_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/attention_test.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/embedding.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/embedding_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/embedding_test.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/linear.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/module.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/module_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/module_test.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/norm.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/norm_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/norm_test.py -------------------------------------------------------------------------------- /experimental/jax/inference/nn/parameter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/nn/parameter.py -------------------------------------------------------------------------------- /experimental/jax/inference/parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/parallel/__init__.py -------------------------------------------------------------------------------- /experimental/jax/inference/parallel/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/parallel/config.py -------------------------------------------------------------------------------- /experimental/jax/inference/parallel/device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/parallel/device.py -------------------------------------------------------------------------------- /experimental/jax/inference/parallel/mesh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/parallel/mesh.py -------------------------------------------------------------------------------- /experimental/jax/inference/parallel/operations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/parallel/operations.py -------------------------------------------------------------------------------- /experimental/jax/inference/parallel/operations_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/parallel/operations_test.py -------------------------------------------------------------------------------- /experimental/jax/inference/parallel/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/parallel/util.py -------------------------------------------------------------------------------- /experimental/jax/inference/runtime/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/runtime/__init__.py -------------------------------------------------------------------------------- /experimental/jax/inference/runtime/batch_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/runtime/batch_scheduler.py -------------------------------------------------------------------------------- /experimental/jax/inference/runtime/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/runtime/engine.py -------------------------------------------------------------------------------- /experimental/jax/inference/runtime/kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/runtime/kv_cache.py -------------------------------------------------------------------------------- /experimental/jax/inference/runtime/model_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/runtime/model_executor.py -------------------------------------------------------------------------------- /experimental/jax/inference/runtime/offline_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/runtime/offline_inference.py -------------------------------------------------------------------------------- /experimental/jax/inference/runtime/request_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/runtime/request_type.py -------------------------------------------------------------------------------- /experimental/jax/inference/server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/server/__init__.py -------------------------------------------------------------------------------- /experimental/jax/inference/server/simple_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/server/simple_server.py -------------------------------------------------------------------------------- /experimental/jax/inference/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/utils/__init__.py -------------------------------------------------------------------------------- /experimental/jax/inference/utils/pytree_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/inference/utils/pytree_utils.py -------------------------------------------------------------------------------- /experimental/jax/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jax/requirements.txt -------------------------------------------------------------------------------- /experimental/jetstream-maxtext-stable-stack/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jetstream-maxtext-stable-stack/Dockerfile -------------------------------------------------------------------------------- /experimental/jetstream-maxtext-stable-stack/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jetstream-maxtext-stable-stack/README.md -------------------------------------------------------------------------------- /experimental/jetstream-maxtext-stable-stack/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jetstream-maxtext-stable-stack/build.sh -------------------------------------------------------------------------------- /experimental/jetstream-maxtext-stable-stack/generate_manifest.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jetstream-maxtext-stable-stack/generate_manifest.sh -------------------------------------------------------------------------------- /experimental/jetstream-maxtext-stable-stack/pipeline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jetstream-maxtext-stable-stack/pipeline.sh -------------------------------------------------------------------------------- /experimental/jetstream-maxtext-stable-stack/test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jetstream-maxtext-stable-stack/test.sh -------------------------------------------------------------------------------- /experimental/jetstream-maxtext-stable-stack/test_script/benchmark_chunked_prefill_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jetstream-maxtext-stable-stack/test_script/benchmark_chunked_prefill_example.sh -------------------------------------------------------------------------------- /experimental/jetstream-maxtext-stable-stack/test_script/benchmark_serving_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/experimental/jetstream-maxtext-stable-stack/test_script/benchmark_serving_example.sh -------------------------------------------------------------------------------- /experimental/jetstream-maxtext-stable-stack/test_script/true.sh: -------------------------------------------------------------------------------- 1 | true 2 | -------------------------------------------------------------------------------- /jetstream/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/__init__.py -------------------------------------------------------------------------------- /jetstream/core/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/README.md -------------------------------------------------------------------------------- /jetstream/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/__init__.py -------------------------------------------------------------------------------- /jetstream/core/config_lib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/config_lib.py -------------------------------------------------------------------------------- /jetstream/core/implementations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/implementations/__init__.py -------------------------------------------------------------------------------- /jetstream/core/implementations/mock/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/implementations/mock/README.md -------------------------------------------------------------------------------- /jetstream/core/implementations/mock/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/implementations/mock/__init__.py -------------------------------------------------------------------------------- /jetstream/core/implementations/mock/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/implementations/mock/config.py -------------------------------------------------------------------------------- /jetstream/core/implementations/mock/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/implementations/mock/server.py -------------------------------------------------------------------------------- /jetstream/core/lora/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/lora/__init__.py -------------------------------------------------------------------------------- /jetstream/core/lora/adapter_tensorstore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/lora/adapter_tensorstore.py -------------------------------------------------------------------------------- /jetstream/core/lora/multi_lora_inference_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/lora/multi_lora_inference_api.py -------------------------------------------------------------------------------- /jetstream/core/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/metrics/__init__.py -------------------------------------------------------------------------------- /jetstream/core/metrics/prometheus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/metrics/prometheus.py -------------------------------------------------------------------------------- /jetstream/core/orchestrator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/orchestrator.py -------------------------------------------------------------------------------- /jetstream/core/prefix_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/prefix_cache.py -------------------------------------------------------------------------------- /jetstream/core/proto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/proto/__init__.py -------------------------------------------------------------------------------- /jetstream/core/proto/jetstream.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/proto/jetstream.proto -------------------------------------------------------------------------------- /jetstream/core/proto/jetstream_pb2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/proto/jetstream_pb2.py -------------------------------------------------------------------------------- /jetstream/core/proto/jetstream_pb2_grpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/proto/jetstream_pb2_grpc.py -------------------------------------------------------------------------------- /jetstream/core/proto/multi_lora_decoding.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/proto/multi_lora_decoding.proto -------------------------------------------------------------------------------- /jetstream/core/proto/multi_lora_decoding_pb2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/proto/multi_lora_decoding_pb2.py -------------------------------------------------------------------------------- /jetstream/core/proto/multi_lora_decoding_pb2_grpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/proto/multi_lora_decoding_pb2_grpc.py -------------------------------------------------------------------------------- /jetstream/core/server_lib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/server_lib.py -------------------------------------------------------------------------------- /jetstream/core/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/utils/__init__.py -------------------------------------------------------------------------------- /jetstream/core/utils/async_multifuture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/utils/async_multifuture.py -------------------------------------------------------------------------------- /jetstream/core/utils/proxy_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/utils/proxy_util.py -------------------------------------------------------------------------------- /jetstream/core/utils/return_sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/core/utils/return_sample.py -------------------------------------------------------------------------------- /jetstream/engine/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/README.md -------------------------------------------------------------------------------- /jetstream/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/__init__.py -------------------------------------------------------------------------------- /jetstream/engine/chunked_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/chunked_prefill.py -------------------------------------------------------------------------------- /jetstream/engine/engine_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/engine_api.py -------------------------------------------------------------------------------- /jetstream/engine/mock_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/mock_engine.py -------------------------------------------------------------------------------- /jetstream/engine/mock_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/mock_utils.py -------------------------------------------------------------------------------- /jetstream/engine/sampling_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/sampling_utils.py -------------------------------------------------------------------------------- /jetstream/engine/token_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/token_utils.py -------------------------------------------------------------------------------- /jetstream/engine/tokenizer.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/tokenizer.proto -------------------------------------------------------------------------------- /jetstream/engine/tokenizer_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/tokenizer_api.py -------------------------------------------------------------------------------- /jetstream/engine/tokenizer_pb2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/tokenizer_pb2.py -------------------------------------------------------------------------------- /jetstream/engine/tokenizer_pb2_grpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/tokenizer_pb2_grpc.py -------------------------------------------------------------------------------- /jetstream/engine/warmup_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/engine/warmup_utils.py -------------------------------------------------------------------------------- /jetstream/entrypoints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/entrypoints/__init__.py -------------------------------------------------------------------------------- /jetstream/entrypoints/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/entrypoints/config.py -------------------------------------------------------------------------------- /jetstream/entrypoints/http/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/entrypoints/http/__init__.py -------------------------------------------------------------------------------- /jetstream/entrypoints/http/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/entrypoints/http/api_server.py -------------------------------------------------------------------------------- /jetstream/entrypoints/http/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/entrypoints/http/protocol.py -------------------------------------------------------------------------------- /jetstream/entrypoints/http/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/entrypoints/http/utils.py -------------------------------------------------------------------------------- /jetstream/external_tokenizers/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /jetstream/external_tokenizers/llama3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jetstream/external_tokenizers/llama3/llama3_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/external_tokenizers/llama3/llama3_tokenizer.py -------------------------------------------------------------------------------- /jetstream/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/__init__.py -------------------------------------------------------------------------------- /jetstream/tests/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/core/__init__.py -------------------------------------------------------------------------------- /jetstream/tests/core/lora/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/core/lora/__init__.py -------------------------------------------------------------------------------- /jetstream/tests/core/lora/test_adapter_tensorstore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/core/lora/test_adapter_tensorstore.py -------------------------------------------------------------------------------- /jetstream/tests/core/lora/test_multi_lora_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/core/lora/test_multi_lora_manager.py -------------------------------------------------------------------------------- /jetstream/tests/core/test_config_lib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/core/test_config_lib.py -------------------------------------------------------------------------------- /jetstream/tests/core/test_orchestrator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/core/test_orchestrator.py -------------------------------------------------------------------------------- /jetstream/tests/core/test_prefix_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/core/test_prefix_cache.py -------------------------------------------------------------------------------- /jetstream/tests/core/test_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/core/test_server.py -------------------------------------------------------------------------------- /jetstream/tests/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/__init__.py -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/blobs/10c66461e4c109db5a2196bff4bb59be30396ed8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/external_tokenizers/gpt2/blobs/10c66461e4c109db5a2196bff4bb59be30396ed8 -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/blobs/1f1d9aaca301414e7f6c9396df506798ff4eb9a6: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/external_tokenizers/gpt2/blobs/1f1d9aaca301414e7f6c9396df506798ff4eb9a6 -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/external_tokenizers/gpt2/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/blobs/4b988bccc9dc5adacd403c00b4704976196548f8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/external_tokenizers/gpt2/blobs/4b988bccc9dc5adacd403c00b4704976196548f8 -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/blobs/be4d21d94f3b4687e5a54d84bf6ab46ed0f8defd: -------------------------------------------------------------------------------- 1 | {"model_max_length": 1024} -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/refs/main: -------------------------------------------------------------------------------- 1 | 607a30d783dfa663caf39e06633721c8d4cfcd7e -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e/config.json: -------------------------------------------------------------------------------- 1 | ../../blobs/10c66461e4c109db5a2196bff4bb59be30396ed8 -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e/merges.txt: -------------------------------------------------------------------------------- 1 | ../../blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e/tokenizer.json: -------------------------------------------------------------------------------- 1 | ../../blobs/4b988bccc9dc5adacd403c00b4704976196548f8 -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | ../../blobs/be4d21d94f3b4687e5a54d84bf6ab46ed0f8defd -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e/vocab.json: -------------------------------------------------------------------------------- 1 | ../../blobs/1f1d9aaca301414e7f6c9396df506798ff4eb9a6 -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/llama2/tokenizer.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/external_tokenizers/llama2/tokenizer.model -------------------------------------------------------------------------------- /jetstream/tests/engine/external_tokenizers/llama3/tokenizer.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/external_tokenizers/llama3/tokenizer.model -------------------------------------------------------------------------------- /jetstream/tests/engine/test_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/test_init.py -------------------------------------------------------------------------------- /jetstream/tests/engine/test_mock_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/test_mock_engine.py -------------------------------------------------------------------------------- /jetstream/tests/engine/test_sampling_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/test_sampling_utils.py -------------------------------------------------------------------------------- /jetstream/tests/engine/test_token_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/test_token_utils.py -------------------------------------------------------------------------------- /jetstream/tests/engine/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/engine/test_utils.py -------------------------------------------------------------------------------- /jetstream/tests/entrypoints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/entrypoints/__init__.py -------------------------------------------------------------------------------- /jetstream/tests/entrypoints/http/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/entrypoints/http/__init__.py -------------------------------------------------------------------------------- /jetstream/tests/entrypoints/http/test_api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tests/entrypoints/http/test_api_server.py -------------------------------------------------------------------------------- /jetstream/tools/load_tester.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tools/load_tester.py -------------------------------------------------------------------------------- /jetstream/tools/maxtext/model_ckpt_conversion.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tools/maxtext/model_ckpt_conversion.sh -------------------------------------------------------------------------------- /jetstream/tools/maxtext/model_ckpt_finetune_with_aqt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tools/maxtext/model_ckpt_finetune_with_aqt.sh -------------------------------------------------------------------------------- /jetstream/tools/multi_adapter_service_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tools/multi_adapter_service_client.py -------------------------------------------------------------------------------- /jetstream/tools/multi_lora_decode_requester.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tools/multi_lora_decode_requester.py -------------------------------------------------------------------------------- /jetstream/tools/proxy_dev/base.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tools/proxy_dev/base.Dockerfile -------------------------------------------------------------------------------- /jetstream/tools/proxy_dev/dev.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tools/proxy_dev/dev.Dockerfile -------------------------------------------------------------------------------- /jetstream/tools/requester.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/jetstream/tools/requester.py -------------------------------------------------------------------------------- /license_preamble.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/license_preamble.txt -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/pylintrc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Hypercomputer/JetStream/HEAD/setup.py --------------------------------------------------------------------------------