├── .gitignore ├── .gitmodules ├── LICENSE ├── README-SC2INST.md ├── README.md ├── evaluation ├── README.md ├── ds_1000.py ├── evalplus_results │ ├── evalplus-starcoder2-15b-instruct-v0.1-humaneval.jsonl │ ├── evalplus-starcoder2-15b-instruct-v0.1-humaneval_eval_results.json │ ├── evalplus-starcoder2-15b-instruct-v0.1-mbpp.jsonl │ └── evalplus-starcoder2-15b-instruct-v0.1-mbpp_eval_results.json ├── text2code.py └── text2code_vllm.py ├── prompts └── self-ossinstruct-fewshot.txt ├── pyproject.toml ├── requirements.txt ├── sanitize.sh ├── seed_gathering ├── README.md ├── benchmark_data.py ├── filter_dataset.py ├── generate_from_the_stack.py ├── high_quality_subset.py ├── requirements.txt └── tree_sitter_parser.py ├── self_ossinstruct_sc2.sh ├── self_ossinstruct_sc2_parallel.sh └── src └── star_align ├── __init__.py ├── clean_data.py ├── collect_snippets.py ├── decontamination ├── benchmark_data.py ├── find_substrings.py └── utils.py ├── execution_filter.py ├── llm_wrapper.py ├── minhash_dedup.py ├── prompt_template.py ├── sanitize_data.py ├── self_ossinstruct.py ├── train.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/.gitmodules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/LICENSE -------------------------------------------------------------------------------- /README-SC2INST.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/README-SC2INST.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/README.md -------------------------------------------------------------------------------- /evaluation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/evaluation/README.md -------------------------------------------------------------------------------- /evaluation/ds_1000.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/evaluation/ds_1000.py -------------------------------------------------------------------------------- /evaluation/evalplus_results/evalplus-starcoder2-15b-instruct-v0.1-humaneval.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/evaluation/evalplus_results/evalplus-starcoder2-15b-instruct-v0.1-humaneval.jsonl -------------------------------------------------------------------------------- /evaluation/evalplus_results/evalplus-starcoder2-15b-instruct-v0.1-humaneval_eval_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/evaluation/evalplus_results/evalplus-starcoder2-15b-instruct-v0.1-humaneval_eval_results.json -------------------------------------------------------------------------------- /evaluation/evalplus_results/evalplus-starcoder2-15b-instruct-v0.1-mbpp.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/evaluation/evalplus_results/evalplus-starcoder2-15b-instruct-v0.1-mbpp.jsonl -------------------------------------------------------------------------------- /evaluation/evalplus_results/evalplus-starcoder2-15b-instruct-v0.1-mbpp_eval_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/evaluation/evalplus_results/evalplus-starcoder2-15b-instruct-v0.1-mbpp_eval_results.json -------------------------------------------------------------------------------- /evaluation/text2code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/evaluation/text2code.py -------------------------------------------------------------------------------- /evaluation/text2code_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/evaluation/text2code_vllm.py -------------------------------------------------------------------------------- /prompts/self-ossinstruct-fewshot.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/prompts/self-ossinstruct-fewshot.txt -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/requirements.txt -------------------------------------------------------------------------------- /sanitize.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/sanitize.sh -------------------------------------------------------------------------------- /seed_gathering/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/seed_gathering/README.md -------------------------------------------------------------------------------- /seed_gathering/benchmark_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/seed_gathering/benchmark_data.py -------------------------------------------------------------------------------- /seed_gathering/filter_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/seed_gathering/filter_dataset.py -------------------------------------------------------------------------------- /seed_gathering/generate_from_the_stack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/seed_gathering/generate_from_the_stack.py -------------------------------------------------------------------------------- /seed_gathering/high_quality_subset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/seed_gathering/high_quality_subset.py -------------------------------------------------------------------------------- /seed_gathering/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/seed_gathering/requirements.txt -------------------------------------------------------------------------------- /seed_gathering/tree_sitter_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/seed_gathering/tree_sitter_parser.py -------------------------------------------------------------------------------- /self_ossinstruct_sc2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/self_ossinstruct_sc2.sh -------------------------------------------------------------------------------- /self_ossinstruct_sc2_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/self_ossinstruct_sc2_parallel.sh -------------------------------------------------------------------------------- /src/star_align/__init__.py: -------------------------------------------------------------------------------- 1 | from . import utils 2 | -------------------------------------------------------------------------------- /src/star_align/clean_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/clean_data.py -------------------------------------------------------------------------------- /src/star_align/collect_snippets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/collect_snippets.py -------------------------------------------------------------------------------- /src/star_align/decontamination/benchmark_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/decontamination/benchmark_data.py -------------------------------------------------------------------------------- /src/star_align/decontamination/find_substrings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/decontamination/find_substrings.py -------------------------------------------------------------------------------- /src/star_align/decontamination/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/decontamination/utils.py -------------------------------------------------------------------------------- /src/star_align/execution_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/execution_filter.py -------------------------------------------------------------------------------- /src/star_align/llm_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/llm_wrapper.py -------------------------------------------------------------------------------- /src/star_align/minhash_dedup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/minhash_dedup.py -------------------------------------------------------------------------------- /src/star_align/prompt_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/prompt_template.py -------------------------------------------------------------------------------- /src/star_align/sanitize_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/sanitize_data.py -------------------------------------------------------------------------------- /src/star_align/self_ossinstruct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/self_ossinstruct.py -------------------------------------------------------------------------------- /src/star_align/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/train.py -------------------------------------------------------------------------------- /src/star_align/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/selfcodealign/HEAD/src/star_align/utils.py --------------------------------------------------------------------------------