├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── team_recommend.md ├── actionlint.yaml ├── actions │ └── free-disk-space │ │ └── action.yml ├── dependabot.yml ├── mergify.yml └── workflows │ ├── actionlint.dockerfile │ ├── actionlint.yml │ ├── constraints-update.yml │ ├── e2e-nvidia-l4-x1.yml │ ├── e2e-nvidia-l40s-x4-release.yml │ ├── e2e-nvidia-l40s-x4.yml │ ├── functional-gpu-nvidia-t4-x1.yml │ ├── lint.yml │ ├── matchers │ ├── actionlint.json │ └── pylint.json │ ├── pypi.yaml │ ├── stale_bot.yml │ └── test.yml ├── .gitignore ├── .isort.cfg ├── .markdownlint-cli2.yaml ├── .pre-commit-config.yaml ├── .pylintrc ├── .spellcheck-en-custom.txt ├── .spellcheck.yml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── DCO.txt ├── LICENSE ├── Makefile ├── README.md ├── assets └── imgs │ └── overview.png ├── constraints-dev.txt ├── constraints-dev.txt.in ├── docs ├── FAQ.md ├── README.md ├── ci.md ├── data_mixing.md ├── dataset_formats.md ├── examples │ ├── README.md │ ├── annotation │ │ ├── annotation_example.ipynb │ │ ├── annotation_pipeline.yaml │ │ ├── annotation_results.jsonl │ │ ├── detailed_annotation_config.yaml │ │ └── simple_annotation_config.yaml │ ├── blocks │ │ ├── README.md │ │ └── iterblock │ │ │ ├── README.md │ │ │ ├── input.jsonl │ │ │ └── pipeline.yaml │ ├── mix_datasets │ │ ├── README.md │ │ ├── concatenate_recipe.yaml │ │ ├── dataset_1.jsonl │ │ ├── dataset_2.jsonl │ │ ├── example_mixing.py │ │ └── weighted_recipe.yaml │ └── multiple_llms │ │ ├── README.md │ │ └── pipeline.yaml ├── pipeline_config.md ├── release-strategy.md ├── subset_selection.md └── teacher_model_validation.md ├── pyproject.toml ├── requirements-dev.txt ├── requirements-files.in ├── requirements.txt ├── scripts ├── ruff.sh ├── test_freeform_skills.py ├── test_grounded_skills.py └── validate_pipelines.py ├── src └── instructlab │ ├── __init__.py │ └── sdg │ ├── __init__.py │ ├── blocks │ ├── __init__.py │ ├── block.py │ ├── filterblock.py │ ├── iterblock.py │ ├── llmblock.py │ └── utilblocks.py │ ├── checkpointing.py │ ├── cli │ └── run_pipeline.py │ ├── configs │ ├── __init__.py │ ├── knowledge │ │ ├── __init__.py │ │ ├── atomic_facts.yaml │ │ ├── detailed_summary.yaml │ │ ├── evaluate_faithfulness.yaml │ │ ├── evaluate_question.yaml │ │ ├── evaluate_relevancy.yaml │ │ ├── extractive_summary.yaml │ │ ├── generate_questions_responses.yaml │ │ ├── mcq_generation.yaml │ │ ├── simple_generate_qa.yaml │ │ └── spellcheck.yaml │ └── skills │ │ ├── __init__.py │ │ ├── contexts.yaml │ │ ├── evaluate_freeform_pair.yaml │ │ ├── evaluate_freeform_questions.yaml │ │ ├── evaluate_grounded_pair.yaml │ │ ├── evaluate_grounded_questions.yaml │ │ ├── freeform_questions.yaml │ │ ├── freeform_responses.yaml │ │ ├── grounded_questions.yaml │ │ ├── grounded_responses.yaml │ │ ├── simple_generate_qa_freeform.yaml │ │ └── simple_generate_qa_grounded.yaml │ ├── datamixing.py │ ├── encoders │ ├── __init__.py │ └── arctic_encoder.py │ ├── eval_data.py │ ├── generate_data.py │ ├── pipeline.py │ ├── pipelines │ ├── __init__.py │ ├── eval │ │ └── mmlu_bench.yaml │ ├── full │ │ ├── __init__.py │ │ ├── freeform_skills.yaml │ │ ├── grounded_skills.yaml │ │ └── knowledge.yaml │ ├── llama │ │ ├── __init__.py │ │ ├── freeform_skills.yaml │ │ ├── grounded_skills.yaml │ │ └── knowledge.yaml │ ├── schema │ │ ├── __init__.py │ │ └── v1.json │ └── simple │ │ ├── __init__.py │ │ ├── freeform_skills.yaml │ │ ├── grounded_skills.yaml │ │ └── knowledge.yaml │ ├── prompts.py │ ├── py.typed │ ├── registry.py │ ├── subset_select.py │ ├── subset_selection.py │ └── utils │ ├── __init__.py │ ├── chunkers.py │ ├── json.py │ ├── logging.py │ ├── model_formats.py │ ├── models.py │ ├── pandas.py │ ├── subset_selection_utils.py │ └── taxonomy.py ├── tests ├── __init__.py ├── conftest.py ├── functional │ ├── __init__.py │ ├── conftest.py │ ├── llama_cpp_helpers.py │ ├── test_chunkers.py │ ├── test_custom_block.py │ ├── test_examples.py │ ├── test_full_pipeline.py │ ├── test_granular_api.py │ ├── test_imports.py │ ├── test_simple_pipeline.py │ └── test_subset_selection.py ├── mockllmblock.py ├── taxonomy.py ├── test_subset_selection.py ├── testdata │ ├── custom_block.py │ ├── custom_block_pipeline.yaml │ ├── custom_prompt.py │ ├── datasets │ │ ├── auxiliary.jsonl │ │ ├── knowledge.jsonl │ │ ├── knowledge_skills.jsonl │ │ ├── precomputed_skills_07x.jsonl │ │ └── samples.jsonl │ ├── default_data_recipes │ │ ├── knowledge.yaml │ │ └── skills.yaml │ ├── leanimports.py │ ├── mock_pipelines │ │ ├── freeform_skills.yaml │ │ ├── grounded_skills.yaml │ │ └── knowledge.yaml │ ├── mock_xdg_data_dir │ │ └── instructlab │ │ │ └── sdg │ │ │ └── models │ │ │ └── config.yaml │ ├── models │ │ ├── instructlab │ │ │ └── granite-7b-lab │ │ │ │ ├── added_tokens.json │ │ │ │ ├── config.json │ │ │ │ ├── empty.safetensors │ │ │ │ ├── special_tokens_map.json │ │ │ │ ├── tokenizer.json │ │ │ │ ├── tokenizer.model │ │ │ │ └── tokenizer_config.json │ │ ├── invalid_gguf.gguf │ │ └── invalid_safetensors_dir │ │ │ └── .gitkeep │ ├── relative_path_recipe.yaml │ ├── sample_documents │ │ ├── moo_deng.md │ │ ├── moo_deng.pdf │ │ ├── phoenix.md │ │ ├── phoenix.pdf │ │ └── qna.yaml │ ├── test_valid_compositional_skill.yaml │ ├── test_valid_knowledge_skill.yaml │ └── testdata.py └── unit │ ├── __init__.py │ ├── test_checkpointing.py │ ├── test_chunkers.py │ ├── test_datamixing.py │ ├── test_default_pipeline_configs.py │ ├── test_filterblock.py │ ├── test_generate_data.py │ ├── test_iterblock.py │ ├── test_llmblock.py │ ├── test_models.py │ ├── test_pipeline.py │ ├── test_registry.py │ ├── test_sample_populator_block.py │ ├── test_taxonomy.py │ └── test_utilblocks.py └── tox.ini /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/ISSUE_TEMPLATE/bug_report.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/ISSUE_TEMPLATE/feature_request.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/team_recommend.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/ISSUE_TEMPLATE/team_recommend.md -------------------------------------------------------------------------------- /.github/actionlint.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/actionlint.yaml -------------------------------------------------------------------------------- /.github/actions/free-disk-space/action.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/actions/free-disk-space/action.yml -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/mergify.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/mergify.yml -------------------------------------------------------------------------------- /.github/workflows/actionlint.dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/actionlint.dockerfile -------------------------------------------------------------------------------- /.github/workflows/actionlint.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/actionlint.yml -------------------------------------------------------------------------------- /.github/workflows/constraints-update.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/constraints-update.yml -------------------------------------------------------------------------------- /.github/workflows/e2e-nvidia-l4-x1.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/e2e-nvidia-l4-x1.yml -------------------------------------------------------------------------------- /.github/workflows/e2e-nvidia-l40s-x4-release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/e2e-nvidia-l40s-x4-release.yml -------------------------------------------------------------------------------- /.github/workflows/e2e-nvidia-l40s-x4.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/e2e-nvidia-l40s-x4.yml -------------------------------------------------------------------------------- /.github/workflows/functional-gpu-nvidia-t4-x1.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/functional-gpu-nvidia-t4-x1.yml -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/lint.yml -------------------------------------------------------------------------------- /.github/workflows/matchers/actionlint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/matchers/actionlint.json -------------------------------------------------------------------------------- /.github/workflows/matchers/pylint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/matchers/pylint.json -------------------------------------------------------------------------------- /.github/workflows/pypi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/pypi.yaml -------------------------------------------------------------------------------- /.github/workflows/stale_bot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/stale_bot.yml -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.github/workflows/test.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.gitignore -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.isort.cfg -------------------------------------------------------------------------------- /.markdownlint-cli2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.markdownlint-cli2.yaml -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.pylintrc -------------------------------------------------------------------------------- /.spellcheck-en-custom.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.spellcheck-en-custom.txt -------------------------------------------------------------------------------- /.spellcheck.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/.spellcheck.yml -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /DCO.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/DCO.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/README.md -------------------------------------------------------------------------------- /assets/imgs/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/assets/imgs/overview.png -------------------------------------------------------------------------------- /constraints-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/constraints-dev.txt -------------------------------------------------------------------------------- /constraints-dev.txt.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/constraints-dev.txt.in -------------------------------------------------------------------------------- /docs/FAQ.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/FAQ.md -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/ci.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/ci.md -------------------------------------------------------------------------------- /docs/data_mixing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/data_mixing.md -------------------------------------------------------------------------------- /docs/dataset_formats.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/dataset_formats.md -------------------------------------------------------------------------------- /docs/examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/README.md -------------------------------------------------------------------------------- /docs/examples/annotation/annotation_example.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/annotation/annotation_example.ipynb -------------------------------------------------------------------------------- /docs/examples/annotation/annotation_pipeline.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/annotation/annotation_pipeline.yaml -------------------------------------------------------------------------------- /docs/examples/annotation/annotation_results.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/annotation/annotation_results.jsonl -------------------------------------------------------------------------------- /docs/examples/annotation/detailed_annotation_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/annotation/detailed_annotation_config.yaml -------------------------------------------------------------------------------- /docs/examples/annotation/simple_annotation_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/annotation/simple_annotation_config.yaml -------------------------------------------------------------------------------- /docs/examples/blocks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/blocks/README.md -------------------------------------------------------------------------------- /docs/examples/blocks/iterblock/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/blocks/iterblock/README.md -------------------------------------------------------------------------------- /docs/examples/blocks/iterblock/input.jsonl: -------------------------------------------------------------------------------- 1 | {"foo": "bar"} 2 | -------------------------------------------------------------------------------- /docs/examples/blocks/iterblock/pipeline.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/blocks/iterblock/pipeline.yaml -------------------------------------------------------------------------------- /docs/examples/mix_datasets/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/mix_datasets/README.md -------------------------------------------------------------------------------- /docs/examples/mix_datasets/concatenate_recipe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/mix_datasets/concatenate_recipe.yaml -------------------------------------------------------------------------------- /docs/examples/mix_datasets/dataset_1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/mix_datasets/dataset_1.jsonl -------------------------------------------------------------------------------- /docs/examples/mix_datasets/dataset_2.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/mix_datasets/dataset_2.jsonl -------------------------------------------------------------------------------- /docs/examples/mix_datasets/example_mixing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/mix_datasets/example_mixing.py -------------------------------------------------------------------------------- /docs/examples/mix_datasets/weighted_recipe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/mix_datasets/weighted_recipe.yaml -------------------------------------------------------------------------------- /docs/examples/multiple_llms/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/multiple_llms/README.md -------------------------------------------------------------------------------- /docs/examples/multiple_llms/pipeline.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/examples/multiple_llms/pipeline.yaml -------------------------------------------------------------------------------- /docs/pipeline_config.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/pipeline_config.md -------------------------------------------------------------------------------- /docs/release-strategy.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/release-strategy.md -------------------------------------------------------------------------------- /docs/subset_selection.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/subset_selection.md -------------------------------------------------------------------------------- /docs/teacher_model_validation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/docs/teacher_model_validation.md -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /requirements-files.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/requirements-files.in -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/ruff.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/scripts/ruff.sh -------------------------------------------------------------------------------- /scripts/test_freeform_skills.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/scripts/test_freeform_skills.py -------------------------------------------------------------------------------- /scripts/test_grounded_skills.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/scripts/test_grounded_skills.py -------------------------------------------------------------------------------- /scripts/validate_pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/scripts/validate_pipelines.py -------------------------------------------------------------------------------- /src/instructlab/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/__init__.py -------------------------------------------------------------------------------- /src/instructlab/sdg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/__init__.py -------------------------------------------------------------------------------- /src/instructlab/sdg/blocks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/instructlab/sdg/blocks/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/blocks/block.py -------------------------------------------------------------------------------- /src/instructlab/sdg/blocks/filterblock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/blocks/filterblock.py -------------------------------------------------------------------------------- /src/instructlab/sdg/blocks/iterblock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/blocks/iterblock.py -------------------------------------------------------------------------------- /src/instructlab/sdg/blocks/llmblock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/blocks/llmblock.py -------------------------------------------------------------------------------- /src/instructlab/sdg/blocks/utilblocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/blocks/utilblocks.py -------------------------------------------------------------------------------- /src/instructlab/sdg/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/checkpointing.py -------------------------------------------------------------------------------- /src/instructlab/sdg/cli/run_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/cli/run_pipeline.py -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/atomic_facts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/knowledge/atomic_facts.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/detailed_summary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/knowledge/detailed_summary.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/evaluate_faithfulness.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/knowledge/evaluate_faithfulness.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/evaluate_question.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/knowledge/evaluate_question.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/evaluate_relevancy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/knowledge/evaluate_relevancy.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/extractive_summary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/knowledge/extractive_summary.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/generate_questions_responses.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/knowledge/generate_questions_responses.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/mcq_generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/knowledge/mcq_generation.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/simple_generate_qa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/knowledge/simple_generate_qa.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/knowledge/spellcheck.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/knowledge/spellcheck.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/contexts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/contexts.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/evaluate_freeform_pair.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/evaluate_freeform_pair.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/evaluate_freeform_questions.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/evaluate_freeform_questions.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/evaluate_grounded_pair.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/evaluate_grounded_pair.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/evaluate_grounded_questions.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/evaluate_grounded_questions.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/freeform_questions.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/freeform_questions.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/freeform_responses.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/freeform_responses.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/grounded_questions.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/grounded_questions.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/grounded_responses.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/grounded_responses.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/simple_generate_qa_freeform.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/simple_generate_qa_freeform.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/configs/skills/simple_generate_qa_grounded.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/configs/skills/simple_generate_qa_grounded.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/datamixing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/datamixing.py -------------------------------------------------------------------------------- /src/instructlab/sdg/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/encoders/__init__.py -------------------------------------------------------------------------------- /src/instructlab/sdg/encoders/arctic_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/encoders/arctic_encoder.py -------------------------------------------------------------------------------- /src/instructlab/sdg/eval_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/eval_data.py -------------------------------------------------------------------------------- /src/instructlab/sdg/generate_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/generate_data.py -------------------------------------------------------------------------------- /src/instructlab/sdg/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipeline.py -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/eval/mmlu_bench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/eval/mmlu_bench.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/full/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/full/freeform_skills.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/full/freeform_skills.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/full/grounded_skills.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/full/grounded_skills.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/full/knowledge.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/full/knowledge.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/llama/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/llama/freeform_skills.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/llama/freeform_skills.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/llama/grounded_skills.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/llama/grounded_skills.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/llama/knowledge.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/llama/knowledge.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/schema/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/schema/v1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/schema/v1.json -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/simple/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/simple/freeform_skills.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/simple/freeform_skills.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/simple/grounded_skills.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/simple/grounded_skills.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/pipelines/simple/knowledge.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/pipelines/simple/knowledge.yaml -------------------------------------------------------------------------------- /src/instructlab/sdg/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/prompts.py -------------------------------------------------------------------------------- /src/instructlab/sdg/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/instructlab/sdg/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/registry.py -------------------------------------------------------------------------------- /src/instructlab/sdg/subset_select.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/subset_select.py -------------------------------------------------------------------------------- /src/instructlab/sdg/subset_selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/subset_selection.py -------------------------------------------------------------------------------- /src/instructlab/sdg/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/utils/__init__.py -------------------------------------------------------------------------------- /src/instructlab/sdg/utils/chunkers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/utils/chunkers.py -------------------------------------------------------------------------------- /src/instructlab/sdg/utils/json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/utils/json.py -------------------------------------------------------------------------------- /src/instructlab/sdg/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/utils/logging.py -------------------------------------------------------------------------------- /src/instructlab/sdg/utils/model_formats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/utils/model_formats.py -------------------------------------------------------------------------------- /src/instructlab/sdg/utils/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/utils/models.py -------------------------------------------------------------------------------- /src/instructlab/sdg/utils/pandas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/utils/pandas.py -------------------------------------------------------------------------------- /src/instructlab/sdg/utils/subset_selection_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/utils/subset_selection_utils.py -------------------------------------------------------------------------------- /src/instructlab/sdg/utils/taxonomy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/src/instructlab/sdg/utils/taxonomy.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/functional/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/functional/conftest.py -------------------------------------------------------------------------------- /tests/functional/llama_cpp_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/functional/llama_cpp_helpers.py -------------------------------------------------------------------------------- /tests/functional/test_chunkers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/functional/test_chunkers.py -------------------------------------------------------------------------------- /tests/functional/test_custom_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/functional/test_custom_block.py -------------------------------------------------------------------------------- /tests/functional/test_examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/functional/test_examples.py -------------------------------------------------------------------------------- /tests/functional/test_full_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/functional/test_full_pipeline.py -------------------------------------------------------------------------------- /tests/functional/test_granular_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/functional/test_granular_api.py -------------------------------------------------------------------------------- /tests/functional/test_imports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/functional/test_imports.py -------------------------------------------------------------------------------- /tests/functional/test_simple_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/functional/test_simple_pipeline.py -------------------------------------------------------------------------------- /tests/functional/test_subset_selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/functional/test_subset_selection.py -------------------------------------------------------------------------------- /tests/mockllmblock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/mockllmblock.py -------------------------------------------------------------------------------- /tests/taxonomy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/taxonomy.py -------------------------------------------------------------------------------- /tests/test_subset_selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/test_subset_selection.py -------------------------------------------------------------------------------- /tests/testdata/custom_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/custom_block.py -------------------------------------------------------------------------------- /tests/testdata/custom_block_pipeline.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/custom_block_pipeline.yaml -------------------------------------------------------------------------------- /tests/testdata/custom_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/custom_prompt.py -------------------------------------------------------------------------------- /tests/testdata/datasets/auxiliary.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/datasets/auxiliary.jsonl -------------------------------------------------------------------------------- /tests/testdata/datasets/knowledge.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/datasets/knowledge.jsonl -------------------------------------------------------------------------------- /tests/testdata/datasets/knowledge_skills.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/datasets/knowledge_skills.jsonl -------------------------------------------------------------------------------- /tests/testdata/datasets/precomputed_skills_07x.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/datasets/precomputed_skills_07x.jsonl -------------------------------------------------------------------------------- /tests/testdata/datasets/samples.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/datasets/samples.jsonl -------------------------------------------------------------------------------- /tests/testdata/default_data_recipes/knowledge.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | - path: test/knowledge.jsonl 3 | sampling_size: 1.0 4 | -------------------------------------------------------------------------------- /tests/testdata/default_data_recipes/skills.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | - path: test/skills.jsonl 3 | sampling_size: 1.0 4 | -------------------------------------------------------------------------------- /tests/testdata/leanimports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/leanimports.py -------------------------------------------------------------------------------- /tests/testdata/mock_pipelines/freeform_skills.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/mock_pipelines/freeform_skills.yaml -------------------------------------------------------------------------------- /tests/testdata/mock_pipelines/grounded_skills.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/mock_pipelines/grounded_skills.yaml -------------------------------------------------------------------------------- /tests/testdata/mock_pipelines/knowledge.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/mock_pipelines/knowledge.yaml -------------------------------------------------------------------------------- /tests/testdata/mock_xdg_data_dir/instructlab/sdg/models/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/mock_xdg_data_dir/instructlab/sdg/models/config.yaml -------------------------------------------------------------------------------- /tests/testdata/models/instructlab/granite-7b-lab/added_tokens.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/models/instructlab/granite-7b-lab/added_tokens.json -------------------------------------------------------------------------------- /tests/testdata/models/instructlab/granite-7b-lab/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/models/instructlab/granite-7b-lab/config.json -------------------------------------------------------------------------------- /tests/testdata/models/instructlab/granite-7b-lab/empty.safetensors: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/testdata/models/instructlab/granite-7b-lab/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/models/instructlab/granite-7b-lab/special_tokens_map.json -------------------------------------------------------------------------------- /tests/testdata/models/instructlab/granite-7b-lab/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/models/instructlab/granite-7b-lab/tokenizer.json -------------------------------------------------------------------------------- /tests/testdata/models/instructlab/granite-7b-lab/tokenizer.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/models/instructlab/granite-7b-lab/tokenizer.model -------------------------------------------------------------------------------- /tests/testdata/models/instructlab/granite-7b-lab/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/models/instructlab/granite-7b-lab/tokenizer_config.json -------------------------------------------------------------------------------- /tests/testdata/models/invalid_gguf.gguf: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/testdata/models/invalid_safetensors_dir/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/testdata/relative_path_recipe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/relative_path_recipe.yaml -------------------------------------------------------------------------------- /tests/testdata/sample_documents/moo_deng.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/sample_documents/moo_deng.md -------------------------------------------------------------------------------- /tests/testdata/sample_documents/moo_deng.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/sample_documents/moo_deng.pdf -------------------------------------------------------------------------------- /tests/testdata/sample_documents/phoenix.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/sample_documents/phoenix.md -------------------------------------------------------------------------------- /tests/testdata/sample_documents/phoenix.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/sample_documents/phoenix.pdf -------------------------------------------------------------------------------- /tests/testdata/sample_documents/qna.yaml: -------------------------------------------------------------------------------- 1 | version: 3 2 | domain: pop_culture -------------------------------------------------------------------------------- /tests/testdata/test_valid_compositional_skill.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/test_valid_compositional_skill.yaml -------------------------------------------------------------------------------- /tests/testdata/test_valid_knowledge_skill.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/test_valid_knowledge_skill.yaml -------------------------------------------------------------------------------- /tests/testdata/testdata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/testdata/testdata.py -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/test_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_checkpointing.py -------------------------------------------------------------------------------- /tests/unit/test_chunkers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_chunkers.py -------------------------------------------------------------------------------- /tests/unit/test_datamixing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_datamixing.py -------------------------------------------------------------------------------- /tests/unit/test_default_pipeline_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_default_pipeline_configs.py -------------------------------------------------------------------------------- /tests/unit/test_filterblock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_filterblock.py -------------------------------------------------------------------------------- /tests/unit/test_generate_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_generate_data.py -------------------------------------------------------------------------------- /tests/unit/test_iterblock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_iterblock.py -------------------------------------------------------------------------------- /tests/unit/test_llmblock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_llmblock.py -------------------------------------------------------------------------------- /tests/unit/test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_models.py -------------------------------------------------------------------------------- /tests/unit/test_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_pipeline.py -------------------------------------------------------------------------------- /tests/unit/test_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_registry.py -------------------------------------------------------------------------------- /tests/unit/test_sample_populator_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_sample_populator_block.py -------------------------------------------------------------------------------- /tests/unit/test_taxonomy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_taxonomy.py -------------------------------------------------------------------------------- /tests/unit/test_utilblocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tests/unit/test_utilblocks.py -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructlab/sdg/HEAD/tox.ini --------------------------------------------------------------------------------