├── .dvc ├── .gitignore └── config ├── .dvcignore ├── .gitignore ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── banner_image.png ├── data ├── .gitignore ├── final │ └── .gitignore ├── processed │ └── .gitignore ├── raw │ ├── .gitignore │ ├── 1M-GPT4-Augmented.parquet.dvc │ └── open_platypus.parquet.dvc └── split │ ├── .gitignore │ ├── train_identity_subset.jsonl │ ├── train_platypus_processed_subset.jsonl │ ├── val_identity_subset.jsonl │ └── val_platypus_processed_subset.jsonl ├── dvc.lock ├── dvc.yaml ├── dvclive ├── .gitignore ├── artifacts.dvc ├── dvc.yaml ├── metrics.json ├── params.yaml ├── plots │ └── metrics │ │ ├── epoch.tsv │ │ ├── eval │ │ ├── loss.tsv │ │ ├── runtime.tsv │ │ ├── samples_per_second.tsv │ │ └── steps_per_second.tsv │ │ ├── learning_rate.tsv │ │ ├── loss.tsv │ │ └── train │ │ ├── loss.tsv │ │ ├── runtime.tsv │ │ ├── samples_per_second.tsv │ │ └── steps_per_second.tsv └── report.html ├── models ├── .gitignore ├── Llama-2-13b-chat-hf.dvc └── Llama-2-7b-chat-hf.dvc ├── params.yaml ├── requirements.txt ├── sanity_check_result └── result.csv ├── sky-training.yaml ├── sky-vscode.yaml └── src ├── data_split.py ├── generate_identity_data.py ├── merge_data_splits.py ├── merge_model.py ├── process_orca_data.py ├── process_platypus_data.py ├── prompt_formatters.py ├── sanity_check.py ├── train.py └── utils.py /.dvc/.gitignore: -------------------------------------------------------------------------------- 1 | /config.local 2 | /tmp 3 | /cache 4 | -------------------------------------------------------------------------------- /.dvc/config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/.dvc/config -------------------------------------------------------------------------------- /.dvcignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/.dvcignore -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/.gitignore -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/README.md -------------------------------------------------------------------------------- /banner_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/banner_image.png -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/final/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/data/final/.gitignore -------------------------------------------------------------------------------- /data/processed/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/data/processed/.gitignore -------------------------------------------------------------------------------- /data/raw/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/data/raw/.gitignore -------------------------------------------------------------------------------- /data/raw/1M-GPT4-Augmented.parquet.dvc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/data/raw/1M-GPT4-Augmented.parquet.dvc -------------------------------------------------------------------------------- /data/raw/open_platypus.parquet.dvc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/data/raw/open_platypus.parquet.dvc -------------------------------------------------------------------------------- /data/split/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/data/split/.gitignore -------------------------------------------------------------------------------- /data/split/train_identity_subset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/data/split/train_identity_subset.jsonl -------------------------------------------------------------------------------- /data/split/train_platypus_processed_subset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/data/split/train_platypus_processed_subset.jsonl -------------------------------------------------------------------------------- /data/split/val_identity_subset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/data/split/val_identity_subset.jsonl -------------------------------------------------------------------------------- /data/split/val_platypus_processed_subset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/data/split/val_platypus_processed_subset.jsonl -------------------------------------------------------------------------------- /dvc.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvc.lock -------------------------------------------------------------------------------- /dvc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvc.yaml -------------------------------------------------------------------------------- /dvclive/.gitignore: -------------------------------------------------------------------------------- 1 | /artifacts 2 | -------------------------------------------------------------------------------- /dvclive/artifacts.dvc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/artifacts.dvc -------------------------------------------------------------------------------- /dvclive/dvc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/dvc.yaml -------------------------------------------------------------------------------- /dvclive/metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/metrics.json -------------------------------------------------------------------------------- /dvclive/params.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/params.yaml -------------------------------------------------------------------------------- /dvclive/plots/metrics/epoch.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/plots/metrics/epoch.tsv -------------------------------------------------------------------------------- /dvclive/plots/metrics/eval/loss.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/plots/metrics/eval/loss.tsv -------------------------------------------------------------------------------- /dvclive/plots/metrics/eval/runtime.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/plots/metrics/eval/runtime.tsv -------------------------------------------------------------------------------- /dvclive/plots/metrics/eval/samples_per_second.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/plots/metrics/eval/samples_per_second.tsv -------------------------------------------------------------------------------- /dvclive/plots/metrics/eval/steps_per_second.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/plots/metrics/eval/steps_per_second.tsv -------------------------------------------------------------------------------- /dvclive/plots/metrics/learning_rate.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/plots/metrics/learning_rate.tsv -------------------------------------------------------------------------------- /dvclive/plots/metrics/loss.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/plots/metrics/loss.tsv -------------------------------------------------------------------------------- /dvclive/plots/metrics/train/loss.tsv: -------------------------------------------------------------------------------- 1 | step loss 2 | 214 0.8070883238426992 3 | -------------------------------------------------------------------------------- /dvclive/plots/metrics/train/runtime.tsv: -------------------------------------------------------------------------------- 1 | step runtime 2 | 214 13147.1757 3 | -------------------------------------------------------------------------------- /dvclive/plots/metrics/train/samples_per_second.tsv: -------------------------------------------------------------------------------- 1 | step samples_per_second 2 | 214 0.65 3 | -------------------------------------------------------------------------------- /dvclive/plots/metrics/train/steps_per_second.tsv: -------------------------------------------------------------------------------- 1 | step steps_per_second 2 | 214 0.081 3 | -------------------------------------------------------------------------------- /dvclive/report.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/dvclive/report.html -------------------------------------------------------------------------------- /models/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/models/.gitignore -------------------------------------------------------------------------------- /models/Llama-2-13b-chat-hf.dvc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/models/Llama-2-13b-chat-hf.dvc -------------------------------------------------------------------------------- /models/Llama-2-7b-chat-hf.dvc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/models/Llama-2-7b-chat-hf.dvc -------------------------------------------------------------------------------- /params.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/params.yaml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/requirements.txt -------------------------------------------------------------------------------- /sanity_check_result/result.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/sanity_check_result/result.csv -------------------------------------------------------------------------------- /sky-training.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/sky-training.yaml -------------------------------------------------------------------------------- /sky-vscode.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/sky-vscode.yaml -------------------------------------------------------------------------------- /src/data_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/src/data_split.py -------------------------------------------------------------------------------- /src/generate_identity_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/src/generate_identity_data.py -------------------------------------------------------------------------------- /src/merge_data_splits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/src/merge_data_splits.py -------------------------------------------------------------------------------- /src/merge_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/src/merge_model.py -------------------------------------------------------------------------------- /src/process_orca_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/src/process_orca_data.py -------------------------------------------------------------------------------- /src/process_platypus_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/src/process_platypus_data.py -------------------------------------------------------------------------------- /src/prompt_formatters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/src/prompt_formatters.py -------------------------------------------------------------------------------- /src/sanity_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/src/sanity_check.py -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/src/train.py -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/HEAD/src/utils.py --------------------------------------------------------------------------------