├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── question.md ├── pull_request_template.md └── workflows │ ├── pre-commit.yaml │ └── run-tests.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── conf ├── archive │ ├── old-benchmarking │ │ ├── gpt2-benchmark-config.yaml │ │ ├── gpt2-intensive-config.yaml │ │ └── gpt2-toy-config.yaml │ ├── partial-checkpointing │ │ └── gpt2-mistral-medium-gcheck-config.yaml │ └── v1 │ │ ├── gpt2-debug-config.yaml │ │ ├── gpt2-mistral-medium-config.yaml │ │ ├── gpt2-mistral-medium-gcp-config.yaml │ │ ├── gpt2-mistral-mini-config.yaml │ │ ├── gpt2-mistral-small-gcp-config.yaml │ │ ├── gpt2-scaling-config.yaml │ │ └── tutorial-gpt2-micro.yaml ├── datasets │ ├── openwebtext.yaml │ ├── shakespeare.yaml │ ├── wikitext103.yaml │ └── wikitext2.yaml ├── deepspeed │ ├── debug-conf.json │ ├── hostfile │ ├── z1-conf.json │ ├── z1-offload-conf.json │ ├── z2-debug-conf.json │ ├── z2-medium-conf.json │ ├── z2-offload-conf.json │ ├── z2-small-conf.json │ ├── z3-conf.json │ └── z3-offload-conf.json ├── mistral-medium.yaml ├── mistral-micro.yaml ├── mistral-small.yaml ├── models │ ├── mistral-medium.yaml │ ├── mistral-micro.json │ ├── mistral-micro.yaml │ └── mistral-small.yaml ├── train_schema.py ├── trainers │ ├── benchmark.yaml │ ├── gpt2-medium.yaml │ ├── gpt2-small-short.yaml │ ├── gpt2-small.yaml │ └── intensive.yaml └── tutorial-shakespeare-gpt2-micro.yaml ├── docs ├── LICENSE ├── Makefile ├── README.md ├── _static │ ├── pydata-custom.css │ └── readthedocs-custom.css ├── _templates │ ├── custom-class-template.rst │ ├── custom-module-template.rst │ └── layout.html ├── api.rst ├── conf.py ├── contributing.rst ├── fork.png ├── getting_started.rst ├── getting_started │ ├── config.rst │ ├── download.rst │ ├── evaluate.rst │ ├── install.rst │ ├── train-output.txt │ ├── train.rst │ └── wandb_example.png ├── hugging_face_differences.rst ├── index.rst ├── mistral_components.png ├── scripts │ └── build_download_tables.py └── tutorials │ ├── cluster_basics.png │ ├── deepspeed.rst │ ├── gcp_plus_kubernetes.rst │ ├── generate.rst │ ├── gke_standard.png │ ├── kubernetes_menu.png │ ├── multi-gpu.rst │ ├── node_pool.png │ ├── node_pool_gpu.png │ ├── resume.rst │ └── tutorial_cluster.png ├── environments ├── Dockerfile ├── environment-cpu.yaml ├── environment-gpu.yaml ├── environment-m1.yaml └── export.py ├── gcp ├── Dockerfile ├── job-gpt2-micro.yaml ├── pod-gpu.yaml ├── pod.yaml └── run-demo-job.sh ├── generate_text.ipynb ├── mistral_models.json ├── mypy.ini ├── pyproject.toml ├── scripts ├── README.md ├── benchmarking │ ├── dial-in │ │ ├── mistral-gpt2-medium.sh │ │ └── mistral-gpt2-small.sh │ ├── intensive-benchmarking │ │ ├── ddp-multi.sh │ │ ├── deepspeed-multi.sh │ │ └── fairscale-multi.sh │ └── standard-benchmarking │ │ ├── README.md │ │ ├── ddp-multi.sh │ │ ├── ddp-single.sh │ │ ├── deepspeed-multi.sh │ │ ├── deepspeed-single.sh │ │ ├── ds-evaluation-bsz.sh │ │ ├── fairscale-multi.sh │ │ ├── fairscale-single.sh │ │ └── vanilla.sh ├── debugging │ ├── resuming │ │ └── resume-single-node.sh │ └── sanity │ │ └── mistral-sanity-gpt2-small.sh ├── forget-me-not.sh ├── mistral-gcp-gpt2-medium.sh ├── mistral-gcp-gpt2-small.sh ├── mistral-gpt2-medium.sh ├── mistral-gpt2-small.sh └── run │ ├── ddp.sh │ ├── deepspeed.sh │ ├── fairscale.sh │ ├── multi-node.sh │ └── single-node.sh ├── setup ├── conda-requirements.txt ├── pip-requirements.txt ├── setup.sh └── test-requirements.txt ├── src ├── __init__.py ├── args │ ├── __init__.py │ └── training_args.py ├── core │ ├── __init__.py │ ├── callbacks.py │ └── trainer.py ├── corpora │ ├── __init__.py │ ├── auto.py │ ├── detokenization.py │ ├── indexer.py │ └── tokenization_utils.py ├── models │ ├── __init__.py │ └── auto_clm.py ├── overwatch │ ├── __init__.py │ └── overwatch.py └── util │ ├── __init__.py │ ├── paths.py │ └── registry.py ├── tests ├── README.md ├── __init__.py ├── conf │ ├── datasets │ │ ├── wikitext103.yaml │ │ ├── wikitext2-detokenized.yaml │ │ └── wikitext2.yaml │ ├── deepspeed │ │ ├── z1-conf.json │ │ └── z2-small-conf.json │ ├── models │ │ ├── gpt2-micro.json │ │ ├── gpt2-micro.yaml │ │ └── gpt2-small.yaml │ ├── train-diff.yaml │ ├── train.yaml │ └── trainers │ │ ├── gpt2-small-diff.yaml │ │ └── gpt2-small.yaml ├── run_deepspeed_tests.py ├── setup │ └── pip-requirements.txt ├── test_args.py ├── test_checkpoint.py ├── test_eval_loss_is_defined.py ├── test_fp.py ├── test_indexed_dataset.py ├── test_online_benchmark_trainer.py ├── test_seed.py └── test_valid_configs.py ├── train.py └── tutorials ├── custom-dataset ├── README.md └── shakespeare │ ├── shakespeare.train.jsonl │ └── shakespeare.validation.jsonl └── gcp-on-demand └── README.md /.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/.flake8 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/.github/ISSUE_TEMPLATE/bug_report.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/.github/ISSUE_TEMPLATE/feature_request.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/.github/ISSUE_TEMPLATE/question.md -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/.github/pull_request_template.md -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/.github/workflows/pre-commit.yaml -------------------------------------------------------------------------------- /.github/workflows/run-tests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/.github/workflows/run-tests.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/README.md -------------------------------------------------------------------------------- /conf/archive/old-benchmarking/gpt2-benchmark-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/old-benchmarking/gpt2-benchmark-config.yaml -------------------------------------------------------------------------------- /conf/archive/old-benchmarking/gpt2-intensive-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/old-benchmarking/gpt2-intensive-config.yaml -------------------------------------------------------------------------------- /conf/archive/old-benchmarking/gpt2-toy-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/old-benchmarking/gpt2-toy-config.yaml -------------------------------------------------------------------------------- /conf/archive/partial-checkpointing/gpt2-mistral-medium-gcheck-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/partial-checkpointing/gpt2-mistral-medium-gcheck-config.yaml -------------------------------------------------------------------------------- /conf/archive/v1/gpt2-debug-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/v1/gpt2-debug-config.yaml -------------------------------------------------------------------------------- /conf/archive/v1/gpt2-mistral-medium-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/v1/gpt2-mistral-medium-config.yaml -------------------------------------------------------------------------------- /conf/archive/v1/gpt2-mistral-medium-gcp-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/v1/gpt2-mistral-medium-gcp-config.yaml -------------------------------------------------------------------------------- /conf/archive/v1/gpt2-mistral-mini-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/v1/gpt2-mistral-mini-config.yaml -------------------------------------------------------------------------------- /conf/archive/v1/gpt2-mistral-small-gcp-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/v1/gpt2-mistral-small-gcp-config.yaml -------------------------------------------------------------------------------- /conf/archive/v1/gpt2-scaling-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/v1/gpt2-scaling-config.yaml -------------------------------------------------------------------------------- /conf/archive/v1/tutorial-gpt2-micro.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/archive/v1/tutorial-gpt2-micro.yaml -------------------------------------------------------------------------------- /conf/datasets/openwebtext.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/datasets/openwebtext.yaml -------------------------------------------------------------------------------- /conf/datasets/shakespeare.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/datasets/shakespeare.yaml -------------------------------------------------------------------------------- /conf/datasets/wikitext103.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/datasets/wikitext103.yaml -------------------------------------------------------------------------------- /conf/datasets/wikitext2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/datasets/wikitext2.yaml -------------------------------------------------------------------------------- /conf/deepspeed/debug-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/deepspeed/debug-conf.json -------------------------------------------------------------------------------- /conf/deepspeed/hostfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/deepspeed/hostfile -------------------------------------------------------------------------------- /conf/deepspeed/z1-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/deepspeed/z1-conf.json -------------------------------------------------------------------------------- /conf/deepspeed/z1-offload-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/deepspeed/z1-offload-conf.json -------------------------------------------------------------------------------- /conf/deepspeed/z2-debug-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/deepspeed/z2-debug-conf.json -------------------------------------------------------------------------------- /conf/deepspeed/z2-medium-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/deepspeed/z2-medium-conf.json -------------------------------------------------------------------------------- /conf/deepspeed/z2-offload-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/deepspeed/z2-offload-conf.json -------------------------------------------------------------------------------- /conf/deepspeed/z2-small-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/deepspeed/z2-small-conf.json -------------------------------------------------------------------------------- /conf/deepspeed/z3-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/deepspeed/z3-conf.json -------------------------------------------------------------------------------- /conf/deepspeed/z3-offload-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/deepspeed/z3-offload-conf.json -------------------------------------------------------------------------------- /conf/mistral-medium.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/mistral-medium.yaml -------------------------------------------------------------------------------- /conf/mistral-micro.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/mistral-micro.yaml -------------------------------------------------------------------------------- /conf/mistral-small.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/mistral-small.yaml -------------------------------------------------------------------------------- /conf/models/mistral-medium.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/models/mistral-medium.yaml -------------------------------------------------------------------------------- /conf/models/mistral-micro.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/models/mistral-micro.json -------------------------------------------------------------------------------- /conf/models/mistral-micro.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/models/mistral-micro.yaml -------------------------------------------------------------------------------- /conf/models/mistral-small.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/models/mistral-small.yaml -------------------------------------------------------------------------------- /conf/train_schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/train_schema.py -------------------------------------------------------------------------------- /conf/trainers/benchmark.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/trainers/benchmark.yaml -------------------------------------------------------------------------------- /conf/trainers/gpt2-medium.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/trainers/gpt2-medium.yaml -------------------------------------------------------------------------------- /conf/trainers/gpt2-small-short.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/trainers/gpt2-small-short.yaml -------------------------------------------------------------------------------- /conf/trainers/gpt2-small.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/trainers/gpt2-small.yaml -------------------------------------------------------------------------------- /conf/trainers/intensive.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/trainers/intensive.yaml -------------------------------------------------------------------------------- /conf/tutorial-shakespeare-gpt2-micro.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/conf/tutorial-shakespeare-gpt2-micro.yaml -------------------------------------------------------------------------------- /docs/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/LICENSE -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/_static/pydata-custom.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/_static/pydata-custom.css -------------------------------------------------------------------------------- /docs/_static/readthedocs-custom.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/_static/readthedocs-custom.css -------------------------------------------------------------------------------- /docs/_templates/custom-class-template.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/_templates/custom-class-template.rst -------------------------------------------------------------------------------- /docs/_templates/custom-module-template.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/_templates/custom-module-template.rst -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/_templates/layout.html -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/api.rst -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/contributing.rst -------------------------------------------------------------------------------- /docs/fork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/fork.png -------------------------------------------------------------------------------- /docs/getting_started.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/getting_started.rst -------------------------------------------------------------------------------- /docs/getting_started/config.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/getting_started/config.rst -------------------------------------------------------------------------------- /docs/getting_started/download.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/getting_started/download.rst -------------------------------------------------------------------------------- /docs/getting_started/evaluate.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/getting_started/evaluate.rst -------------------------------------------------------------------------------- /docs/getting_started/install.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/getting_started/install.rst -------------------------------------------------------------------------------- /docs/getting_started/train-output.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/getting_started/train-output.txt -------------------------------------------------------------------------------- /docs/getting_started/train.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/getting_started/train.rst -------------------------------------------------------------------------------- /docs/getting_started/wandb_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/getting_started/wandb_example.png -------------------------------------------------------------------------------- /docs/hugging_face_differences.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/hugging_face_differences.rst -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/mistral_components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/mistral_components.png -------------------------------------------------------------------------------- /docs/scripts/build_download_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/scripts/build_download_tables.py -------------------------------------------------------------------------------- /docs/tutorials/cluster_basics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/cluster_basics.png -------------------------------------------------------------------------------- /docs/tutorials/deepspeed.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/deepspeed.rst -------------------------------------------------------------------------------- /docs/tutorials/gcp_plus_kubernetes.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/gcp_plus_kubernetes.rst -------------------------------------------------------------------------------- /docs/tutorials/generate.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/generate.rst -------------------------------------------------------------------------------- /docs/tutorials/gke_standard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/gke_standard.png -------------------------------------------------------------------------------- /docs/tutorials/kubernetes_menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/kubernetes_menu.png -------------------------------------------------------------------------------- /docs/tutorials/multi-gpu.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/multi-gpu.rst -------------------------------------------------------------------------------- /docs/tutorials/node_pool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/node_pool.png -------------------------------------------------------------------------------- /docs/tutorials/node_pool_gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/node_pool_gpu.png -------------------------------------------------------------------------------- /docs/tutorials/resume.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/resume.rst -------------------------------------------------------------------------------- /docs/tutorials/tutorial_cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/docs/tutorials/tutorial_cluster.png -------------------------------------------------------------------------------- /environments/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/environments/Dockerfile -------------------------------------------------------------------------------- /environments/environment-cpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/environments/environment-cpu.yaml -------------------------------------------------------------------------------- /environments/environment-gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/environments/environment-gpu.yaml -------------------------------------------------------------------------------- /environments/environment-m1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/environments/environment-m1.yaml -------------------------------------------------------------------------------- /environments/export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/environments/export.py -------------------------------------------------------------------------------- /gcp/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/gcp/Dockerfile -------------------------------------------------------------------------------- /gcp/job-gpt2-micro.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/gcp/job-gpt2-micro.yaml -------------------------------------------------------------------------------- /gcp/pod-gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/gcp/pod-gpu.yaml -------------------------------------------------------------------------------- /gcp/pod.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/gcp/pod.yaml -------------------------------------------------------------------------------- /gcp/run-demo-job.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/gcp/run-demo-job.sh -------------------------------------------------------------------------------- /generate_text.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/generate_text.ipynb -------------------------------------------------------------------------------- /mistral_models.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/mistral_models.json -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/mypy.ini -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/README.md -------------------------------------------------------------------------------- /scripts/benchmarking/dial-in/mistral-gpt2-medium.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/dial-in/mistral-gpt2-medium.sh -------------------------------------------------------------------------------- /scripts/benchmarking/dial-in/mistral-gpt2-small.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/dial-in/mistral-gpt2-small.sh -------------------------------------------------------------------------------- /scripts/benchmarking/intensive-benchmarking/ddp-multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/intensive-benchmarking/ddp-multi.sh -------------------------------------------------------------------------------- /scripts/benchmarking/intensive-benchmarking/deepspeed-multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/intensive-benchmarking/deepspeed-multi.sh -------------------------------------------------------------------------------- /scripts/benchmarking/intensive-benchmarking/fairscale-multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/intensive-benchmarking/fairscale-multi.sh -------------------------------------------------------------------------------- /scripts/benchmarking/standard-benchmarking/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/standard-benchmarking/README.md -------------------------------------------------------------------------------- /scripts/benchmarking/standard-benchmarking/ddp-multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/standard-benchmarking/ddp-multi.sh -------------------------------------------------------------------------------- /scripts/benchmarking/standard-benchmarking/ddp-single.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/standard-benchmarking/ddp-single.sh -------------------------------------------------------------------------------- /scripts/benchmarking/standard-benchmarking/deepspeed-multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/standard-benchmarking/deepspeed-multi.sh -------------------------------------------------------------------------------- /scripts/benchmarking/standard-benchmarking/deepspeed-single.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/standard-benchmarking/deepspeed-single.sh -------------------------------------------------------------------------------- /scripts/benchmarking/standard-benchmarking/ds-evaluation-bsz.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/standard-benchmarking/ds-evaluation-bsz.sh -------------------------------------------------------------------------------- /scripts/benchmarking/standard-benchmarking/fairscale-multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/standard-benchmarking/fairscale-multi.sh -------------------------------------------------------------------------------- /scripts/benchmarking/standard-benchmarking/fairscale-single.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/standard-benchmarking/fairscale-single.sh -------------------------------------------------------------------------------- /scripts/benchmarking/standard-benchmarking/vanilla.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/benchmarking/standard-benchmarking/vanilla.sh -------------------------------------------------------------------------------- /scripts/debugging/resuming/resume-single-node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/debugging/resuming/resume-single-node.sh -------------------------------------------------------------------------------- /scripts/debugging/sanity/mistral-sanity-gpt2-small.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/debugging/sanity/mistral-sanity-gpt2-small.sh -------------------------------------------------------------------------------- /scripts/forget-me-not.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | sleep 6h 3 | pkill -f "train.py" 4 | -------------------------------------------------------------------------------- /scripts/mistral-gcp-gpt2-medium.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/mistral-gcp-gpt2-medium.sh -------------------------------------------------------------------------------- /scripts/mistral-gcp-gpt2-small.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/mistral-gcp-gpt2-small.sh -------------------------------------------------------------------------------- /scripts/mistral-gpt2-medium.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/mistral-gpt2-medium.sh -------------------------------------------------------------------------------- /scripts/mistral-gpt2-small.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/mistral-gpt2-small.sh -------------------------------------------------------------------------------- /scripts/run/ddp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/run/ddp.sh -------------------------------------------------------------------------------- /scripts/run/deepspeed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/run/deepspeed.sh -------------------------------------------------------------------------------- /scripts/run/fairscale.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/run/fairscale.sh -------------------------------------------------------------------------------- /scripts/run/multi-node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/run/multi-node.sh -------------------------------------------------------------------------------- /scripts/run/single-node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/scripts/run/single-node.sh -------------------------------------------------------------------------------- /setup/conda-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/setup/conda-requirements.txt -------------------------------------------------------------------------------- /setup/pip-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/setup/pip-requirements.txt -------------------------------------------------------------------------------- /setup/setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/setup/setup.sh -------------------------------------------------------------------------------- /setup/test-requirements.txt: -------------------------------------------------------------------------------- 1 | pytest>=7.1.0 2 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/args/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/args/__init__.py -------------------------------------------------------------------------------- /src/args/training_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/args/training_args.py -------------------------------------------------------------------------------- /src/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/core/__init__.py -------------------------------------------------------------------------------- /src/core/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/core/callbacks.py -------------------------------------------------------------------------------- /src/core/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/core/trainer.py -------------------------------------------------------------------------------- /src/corpora/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/corpora/__init__.py -------------------------------------------------------------------------------- /src/corpora/auto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/corpora/auto.py -------------------------------------------------------------------------------- /src/corpora/detokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/corpora/detokenization.py -------------------------------------------------------------------------------- /src/corpora/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/corpora/indexer.py -------------------------------------------------------------------------------- /src/corpora/tokenization_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/corpora/tokenization_utils.py -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/auto_clm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/models/auto_clm.py -------------------------------------------------------------------------------- /src/overwatch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/overwatch/__init__.py -------------------------------------------------------------------------------- /src/overwatch/overwatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/overwatch/overwatch.py -------------------------------------------------------------------------------- /src/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/util/__init__.py -------------------------------------------------------------------------------- /src/util/paths.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/util/paths.py -------------------------------------------------------------------------------- /src/util/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/src/util/registry.py -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/README.md -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/__init__.py -------------------------------------------------------------------------------- /tests/conf/datasets/wikitext103.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/datasets/wikitext103.yaml -------------------------------------------------------------------------------- /tests/conf/datasets/wikitext2-detokenized.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/datasets/wikitext2-detokenized.yaml -------------------------------------------------------------------------------- /tests/conf/datasets/wikitext2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/datasets/wikitext2.yaml -------------------------------------------------------------------------------- /tests/conf/deepspeed/z1-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/deepspeed/z1-conf.json -------------------------------------------------------------------------------- /tests/conf/deepspeed/z2-small-conf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/deepspeed/z2-small-conf.json -------------------------------------------------------------------------------- /tests/conf/models/gpt2-micro.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/models/gpt2-micro.json -------------------------------------------------------------------------------- /tests/conf/models/gpt2-micro.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/models/gpt2-micro.yaml -------------------------------------------------------------------------------- /tests/conf/models/gpt2-small.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/models/gpt2-small.yaml -------------------------------------------------------------------------------- /tests/conf/train-diff.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/train-diff.yaml -------------------------------------------------------------------------------- /tests/conf/train.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/train.yaml -------------------------------------------------------------------------------- /tests/conf/trainers/gpt2-small-diff.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/trainers/gpt2-small-diff.yaml -------------------------------------------------------------------------------- /tests/conf/trainers/gpt2-small.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/conf/trainers/gpt2-small.yaml -------------------------------------------------------------------------------- /tests/run_deepspeed_tests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/run_deepspeed_tests.py -------------------------------------------------------------------------------- /tests/setup/pip-requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/test_args.py -------------------------------------------------------------------------------- /tests/test_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/test_checkpoint.py -------------------------------------------------------------------------------- /tests/test_eval_loss_is_defined.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/test_eval_loss_is_defined.py -------------------------------------------------------------------------------- /tests/test_fp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/test_fp.py -------------------------------------------------------------------------------- /tests/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/test_indexed_dataset.py -------------------------------------------------------------------------------- /tests/test_online_benchmark_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/test_online_benchmark_trainer.py -------------------------------------------------------------------------------- /tests/test_seed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/test_seed.py -------------------------------------------------------------------------------- /tests/test_valid_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tests/test_valid_configs.py -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/train.py -------------------------------------------------------------------------------- /tutorials/custom-dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tutorials/custom-dataset/README.md -------------------------------------------------------------------------------- /tutorials/custom-dataset/shakespeare/shakespeare.train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tutorials/custom-dataset/shakespeare/shakespeare.train.jsonl -------------------------------------------------------------------------------- /tutorials/custom-dataset/shakespeare/shakespeare.validation.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tutorials/custom-dataset/shakespeare/shakespeare.validation.jsonl -------------------------------------------------------------------------------- /tutorials/gcp-on-demand/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-crfm/mistral/HEAD/tutorials/gcp-on-demand/README.md --------------------------------------------------------------------------------