├── .gitignore ├── LICENSE ├── README.md ├── dataset ├── commitpack │ ├── licenses_optout_langs.py │ ├── programming_languages.json │ ├── renamedirs.sh │ ├── scrape_github.py │ ├── shard.py │ ├── shard.sh │ └── sql │ │ ├── sql_1_commits_table_base.sql │ │ ├── sql_2_commits_table_dedup.sql │ │ ├── sql_3_commits_table_dedup_diff.sql │ │ ├── sql_4_commits_table_dedup_files.sql │ │ └── sql_5.sql ├── commitpackft │ ├── commitpackft_filters1.py │ ├── commitpackft_filters2.py │ └── search_examples.py ├── oasst │ ├── add_commitpackft.py │ ├── count_languages.py │ ├── filter_oasst.py │ ├── oasstcommitpackft.jsonl │ ├── oasstcommitpackftmanual.jsonl │ ├── raw.jsonl │ └── rawin.jsonl ├── other │ ├── add_proba.py │ ├── add_proba_missing.py │ ├── check_diffs.py │ ├── commits_to_jsonl.py │ ├── filter_diff_old.py │ ├── filter_old.py │ ├── get_messages.py │ ├── instruct_bottom_500.json │ ├── instruct_top_500.json │ └── misc_scripts.py └── xp3x │ └── filter_xp3x.py ├── evaluation ├── create │ ├── create_instructions.py │ ├── humaneval-x │ │ ├── .gitattributes │ │ ├── HumanEval_original.jsonl │ │ ├── README.md │ │ ├── data │ │ │ ├── cpp │ │ │ │ └── data │ │ │ │ │ ├── humaneval.jsonl │ │ │ │ │ ├── humanevalpack.json │ │ │ │ │ └── humanevalpack.jsonl │ │ │ ├── go │ │ │ │ └── data │ │ │ │ │ ├── humaneval.jsonl │ │ │ │ │ ├── humanevalpack.json │ │ │ │ │ └── humanevalpack.jsonl │ │ │ ├── java │ │ │ │ └── data │ │ │ │ │ ├── humaneval.jsonl │ │ │ │ │ ├── humanevalpack.json │ │ │ │ │ └── humanevalpack.jsonl │ │ │ ├── js │ │ │ │ └── data │ │ │ │ │ ├── humaneval.jsonl │ │ │ │ │ ├── humanevalpack.json │ │ │ │ │ └── humanevalpack.jsonl │ │ │ ├── python │ │ │ │ └── data │ │ │ │ │ ├── humaneval.jsonl │ │ │ │ │ ├── humanevalpack.json │ │ │ │ │ └── humanevalpack.jsonl │ │ │ └── rust │ │ │ │ └── data │ │ │ │ ├── humaneval.jsonl │ │ │ │ ├── humanevalpack.json │ │ │ │ └── humanevalpack.jsonl │ │ └── humaneval-x.py │ └── prepare_humaneval.py ├── other │ ├── QuixBugs │ │ ├── .gitignore │ │ ├── JavaDeserialization.java │ │ ├── LICENSE │ │ ├── README.md │ │ ├── _config.yml │ │ ├── build.gradle │ │ ├── conftest.py │ │ ├── correct_java_programs │ │ │ ├── BITCOUNT.java │ │ │ ├── BREADTH_FIRST_SEARCH.java │ │ │ ├── BUCKETSORT.java │ │ │ ├── DEPTH_FIRST_SEARCH.java │ │ │ ├── DETECT_CYCLE.java │ │ │ ├── FIND_FIRST_IN_SORTED.java │ │ │ ├── FIND_IN_SORTED.java │ │ │ ├── FLATTEN.java │ │ │ ├── GCD.java │ │ │ ├── GET_FACTORS.java │ │ │ ├── HANOI.java │ │ │ ├── IS_VALID_PARENTHESIZATION.java │ │ │ ├── KHEAPSORT.java │ │ │ ├── KNAPSACK.java │ │ │ ├── KTH.java │ │ │ ├── LCS_LENGTH.java │ │ │ ├── LEVENSHTEIN.java │ │ │ ├── LIS.java │ │ │ ├── LONGEST_COMMON_SUBSEQUENCE.java │ │ │ ├── MAX_SUBLIST_SUM.java │ │ │ ├── MERGESORT.java │ │ │ ├── MINIMUM_SPANNING_TREE.java │ │ │ ├── NEXT_PALINDROME.java │ │ │ ├── NEXT_PERMUTATION.java │ │ │ ├── PASCAL.java │ │ │ ├── POSSIBLE_CHANGE.java │ │ │ ├── POWERSET.java │ │ │ ├── QUICKSORT.java │ │ │ ├── REVERSE_LINKED_LIST.java │ │ │ ├── RPN_EVAL.java │ │ │ ├── SHORTEST_PATHS.java │ │ │ ├── SHORTEST_PATH_LENGTH.java │ │ │ ├── SHORTEST_PATH_LENGTHS.java │ │ │ ├── SHUNTING_YARD.java │ │ │ ├── SIEVE.java │ │ │ ├── SQRT.java │ │ │ ├── SUBSEQUENCES.java │ │ │ ├── TOPOLOGICAL_ORDERING.java │ │ │ ├── TO_BASE.java │ │ │ └── WRAP.java │ │ ├── correct_python_programs │ │ │ ├── bitcount.py │ │ │ ├── breadth_first_search.py │ │ │ ├── breadth_first_search_test.py │ │ │ ├── bucketsort.py │ │ │ ├── depth_first_search.py │ │ │ ├── depth_first_search_test.py │ │ │ ├── detect_cycle.py │ │ │ ├── detect_cycle_test.py │ │ │ ├── find_first_in_sorted.py │ │ │ ├── find_in_sorted.py │ │ │ ├── flatten.py │ │ │ ├── gcd.py │ │ │ ├── get_factors.py │ │ │ ├── hanoi.py │ │ │ ├── is_valid_parenthesization.py │ │ │ ├── kheapsort.py │ │ │ ├── knapsack.py │ │ │ ├── kth.py │ │ │ ├── lcs_length.py │ │ │ ├── levenshtein.py │ │ │ ├── lis.py │ │ │ ├── longest_common_subsequence.py │ │ │ ├── max_sublist_sum.py │ │ │ ├── mergesort.py │ │ │ ├── minimum_spanning_tree.py │ │ │ ├── minimum_spanning_tree_test.py │ │ │ ├── next_palindrome.py │ │ │ ├── next_permutation.py │ │ │ ├── node.py │ │ │ ├── pascal.py │ │ │ ├── possible_change.py │ │ │ ├── powerset.py │ │ │ ├── quicksort.py │ │ │ ├── reverse_linked_list.py │ │ │ ├── reverse_linked_list_test.py │ │ │ ├── rpn_eval.py │ │ │ ├── shortest_path_length.py │ │ │ ├── shortest_path_length_test.py │ │ │ ├── shortest_path_lengths.py │ │ │ ├── shortest_path_lengths_test.py │ │ │ ├── shortest_paths.py │ │ │ ├── shortest_paths_test.py │ │ │ ├── shunting_yard.py │ │ │ ├── sieve.py │ │ │ ├── sqrt.py │ │ │ ├── subsequences.py │ │ │ ├── to_base.py │ │ │ ├── topological_ordering.py │ │ │ ├── topological_ordering_test.py │ │ │ └── wrap.py │ │ ├── final_progress.txt │ │ ├── format.py │ │ ├── java_programs │ │ │ ├── BITCOUNT.java │ │ │ ├── BREADTH_FIRST_SEARCH.java │ │ │ ├── BUCKETSORT.java │ │ │ ├── DEPTH_FIRST_SEARCH.java │ │ │ ├── DETECT_CYCLE.java │ │ │ ├── FIND_FIRST_IN_SORTED.java │ │ │ ├── FIND_IN_SORTED.java │ │ │ ├── FLATTEN.java │ │ │ ├── GCD.java │ │ │ ├── GET_FACTORS.java │ │ │ ├── HANOI.java │ │ │ ├── IS_VALID_PARENTHESIZATION.java │ │ │ ├── KHEAPSORT.java │ │ │ ├── KNAPSACK.java │ │ │ ├── KTH.java │ │ │ ├── LCS_LENGTH.java │ │ │ ├── LEVENSHTEIN.java │ │ │ ├── LIS.java │ │ │ ├── LONGEST_COMMON_SUBSEQUENCE.java │ │ │ ├── MAX_SUBLIST_SUM.java │ │ │ ├── MERGESORT.java │ │ │ ├── MINIMUM_SPANNING_TREE.java │ │ │ ├── NEXT_PALINDROME.java │ │ │ ├── NEXT_PERMUTATION.java │ │ │ ├── Node.java │ │ │ ├── PASCAL.java │ │ │ ├── POSSIBLE_CHANGE.java │ │ │ ├── POWERSET.java │ │ │ ├── QUICKSORT.java │ │ │ ├── REVERSE_LINKED_LIST.java │ │ │ ├── RPN_EVAL.java │ │ │ ├── SHORTEST_PATHS.java │ │ │ ├── SHORTEST_PATH_LENGTH.java │ │ │ ├── SHORTEST_PATH_LENGTHS.java │ │ │ ├── SHUNTING_YARD.java │ │ │ ├── SIEVE.java │ │ │ ├── SQRT.java │ │ │ ├── SUBSEQUENCES.java │ │ │ ├── TOPOLOGICAL_ORDERING.java │ │ │ ├── TO_BASE.java │ │ │ ├── WRAP.java │ │ │ ├── WeightedEdge.java │ │ │ └── extra │ │ │ │ ├── BINARY_SEARCH.java │ │ │ │ ├── MAXIMUM_WEIGHTED_SUBSET.java │ │ │ │ ├── MODULO_INVERSE.java │ │ │ │ ├── NESTED_PARENS.java │ │ │ │ ├── NESTED_PARENS_orig.java │ │ │ │ └── QC_LEVENSHTEIN.java │ │ ├── java_testcases │ │ │ ├── BREADTH_FIRST_SEARCH_TEST.java │ │ │ ├── DEPTH_FIRST_SEARCH_TEST.java │ │ │ ├── DETECT_CYCLE_TEST.java │ │ │ ├── JavaTest.java │ │ │ ├── MINIMUM_SPANNING_TREE_TEST.java │ │ │ ├── REVERSE_LINKED_LIST_TEST.java │ │ │ ├── SHORTEST_PATHS_TEST.java │ │ │ ├── SHORTEST_PATH_LENGTHS_TEST.java │ │ │ ├── SHORTEST_PATH_LENGTH_TEST.java │ │ │ ├── TOPOLOGICAL_ORDERING_TEST.java │ │ │ ├── TestsGenerator.java │ │ │ └── junit │ │ │ │ ├── BITCOUNT_TEST.java │ │ │ │ ├── BREADTH_FIRST_SEARCH_TEST.java │ │ │ │ ├── BUCKETSORT_TEST.java │ │ │ │ ├── DEPTH_FIRST_SEARCH_TEST.java │ │ │ │ ├── DETECT_CYCLE_TEST.java │ │ │ │ ├── FIND_FIRST_IN_SORTED_TEST.java │ │ │ │ ├── FIND_IN_SORTED_TEST.java │ │ │ │ ├── FLATTEN_TEST.java │ │ │ │ ├── GCD_TEST.java │ │ │ │ ├── GET_FACTORS_TEST.java │ │ │ │ ├── HANOI_TEST.java │ │ │ │ ├── IS_VALID_PARENTHESIZATION_TEST.java │ │ │ │ ├── KHEAPSORT_TEST.java │ │ │ │ ├── KNAPSACK_TEST.java │ │ │ │ ├── KTH_TEST.java │ │ │ │ ├── LCS_LENGTH_TEST.java │ │ │ │ ├── LEVENSHTEIN_TEST.java │ │ │ │ ├── LIS_TEST.java │ │ │ │ ├── LONGEST_COMMON_SUBSEQUENCE_TEST.java │ │ │ │ ├── MAX_SUBLIST_SUM_TEST.java │ │ │ │ ├── MERGESORT_TEST.java │ │ │ │ ├── MINIMUM_SPANNING_TREE_TEST.java │ │ │ │ ├── NEXT_PALINDROME_TEST.java │ │ │ │ ├── NEXT_PERMUTATION_TEST.java │ │ │ │ ├── PASCAL_TEST.java │ │ │ │ ├── POSSIBLE_CHANGE_TEST.java │ │ │ │ ├── POWERSET_TEST.java │ │ │ │ ├── QUICKSORT_TEST.java │ │ │ │ ├── QuixFixOracleHelper.java │ │ │ │ ├── REVERSE_LINKED_LIST_TEST.java │ │ │ │ ├── RPN_EVAL_TEST.java │ │ │ │ ├── SHORTEST_PATHS_TEST.java │ │ │ │ ├── SHORTEST_PATH_LENGTHS_TEST.java │ │ │ │ ├── SHORTEST_PATH_LENGTH_TEST.java │ │ │ │ ├── SHUNTING_YARD_TEST.java │ │ │ │ ├── SIEVE_TEST.java │ │ │ │ ├── SQRT_TEST.java │ │ │ │ ├── SUBSEQUENCES_TEST.java │ │ │ │ ├── TOPOLOGICAL_ORDERING_TEST.java │ │ │ │ ├── TO_BASE_TEST.java │ │ │ │ ├── WRAP_TEST.java │ │ │ │ └── crt_program │ │ │ │ ├── BITCOUNT_TEST.java │ │ │ │ ├── BREADTH_FIRST_SEARCH_TEST.java │ │ │ │ ├── BUCKETSORT_TEST.java │ │ │ │ ├── DEPTH_FIRST_SEARCH_TEST.java │ │ │ │ ├── DETECT_CYCLE_TEST.java │ │ │ │ ├── FIND_FIRST_IN_SORTED_TEST.java │ │ │ │ ├── FIND_IN_SORTED_TEST.java │ │ │ │ ├── FLATTEN_TEST.java │ │ │ │ ├── GCD_TEST.java │ │ │ │ ├── GET_FACTORS_TEST.java │ │ │ │ ├── HANOI_TEST.java │ │ │ │ ├── IS_VALID_PARENTHESIZATION_TEST.java │ │ │ │ ├── KHEAPSORT_TEST.java │ │ │ │ ├── KNAPSACK_TEST.java │ │ │ │ ├── KTH_TEST.java │ │ │ │ ├── LCS_LENGTH_TEST.java │ │ │ │ ├── LEVENSHTEIN_TEST.java │ │ │ │ ├── LIS_TEST.java │ │ │ │ ├── LONGEST_COMMON_SUBSEQUENCE_TEST.java │ │ │ │ ├── MAX_SUBLIST_SUM_TEST.java │ │ │ │ ├── MERGESORT_TEST.java │ │ │ │ ├── MINIMUM_SPANNING_TREE_TEST.java │ │ │ │ ├── NEXT_PALINDROME_TEST.java │ │ │ │ ├── NEXT_PERMUTATION_TEST.java │ │ │ │ ├── PASCAL_TEST.java │ │ │ │ ├── POSSIBLE_CHANGE_TEST.java │ │ │ │ ├── POWERSET_TEST.java │ │ │ │ ├── QUICKSORT_TEST.java │ │ │ │ ├── QuixFixOracleHelper.java │ │ │ │ ├── REVERSE_LINKED_LIST_TEST.java │ │ │ │ ├── RPN_EVAL_TEST.java │ │ │ │ ├── SHORTEST_PATHS_TEST.java │ │ │ │ ├── SHORTEST_PATH_LENGTHS_TEST.java │ │ │ │ ├── SHORTEST_PATH_LENGTH_TEST.java │ │ │ │ ├── SHUNTING_YARD_TEST.java │ │ │ │ ├── SIEVE_TEST.java │ │ │ │ ├── SQRT_TEST.java │ │ │ │ ├── SUBSEQUENCES_TEST.java │ │ │ │ ├── TOPOLOGICAL_ORDERING_TEST.java │ │ │ │ ├── TO_BASE_TEST.java │ │ │ │ └── WRAP_TEST.java │ │ ├── json_testcases │ │ │ ├── bitcount.json │ │ │ ├── bucketsort.json │ │ │ ├── find_first_in_sorted.json │ │ │ ├── find_in_sorted.json │ │ │ ├── flatten.json │ │ │ ├── gcd.json │ │ │ ├── get_factors.json │ │ │ ├── hanoi.json │ │ │ ├── is_valid_parenthesization.json │ │ │ ├── kheapsort.json │ │ │ ├── knapsack.json │ │ │ ├── kth.json │ │ │ ├── lcs_length.json │ │ │ ├── levenshtein.json │ │ │ ├── lis.json │ │ │ ├── longest_common_subsequence.json │ │ │ ├── max_sublist_sum.json │ │ │ ├── mergesort.json │ │ │ ├── next_palindrome.json │ │ │ ├── next_permutation.json │ │ │ ├── pascal.json │ │ │ ├── possible_change.json │ │ │ ├── powerset.json │ │ │ ├── quicksort.json │ │ │ ├── rpn_eval.json │ │ │ ├── shunting_yard.json │ │ │ ├── sieve.json │ │ │ ├── sqrt.json │ │ │ ├── subsequences.json │ │ │ ├── to_base.json │ │ │ └── wrap.json │ │ ├── legal_notes.txt │ │ ├── python_programs │ │ │ ├── bitcount.py │ │ │ ├── breadth_first_search.py │ │ │ ├── breadth_first_search_test.py │ │ │ ├── bucketsort.py │ │ │ ├── depth_first_search.py │ │ │ ├── depth_first_search_test.py │ │ │ ├── detect_cycle.py │ │ │ ├── detect_cycle_test.py │ │ │ ├── find_first_in_sorted.py │ │ │ ├── find_in_sorted.py │ │ │ ├── flatten.py │ │ │ ├── gcd.py │ │ │ ├── get_factors.py │ │ │ ├── hanoi.py │ │ │ ├── is_valid_parenthesization.py │ │ │ ├── kheapsort.py │ │ │ ├── knapsack.py │ │ │ ├── kth.py │ │ │ ├── lcs_length.py │ │ │ ├── levenshtein.py │ │ │ ├── lis.py │ │ │ ├── longest_common_subsequence.py │ │ │ ├── max_sublist_sum.py │ │ │ ├── mergesort.py │ │ │ ├── minimum_spanning_tree.py │ │ │ ├── minimum_spanning_tree_test.py │ │ │ ├── next_palindrome.py │ │ │ ├── next_permutation.py │ │ │ ├── node.py │ │ │ ├── pascal.py │ │ │ ├── possible_change.py │ │ │ ├── powerset.py │ │ │ ├── quicksort.py │ │ │ ├── reverse_linked_list.py │ │ │ ├── reverse_linked_list_test.py │ │ │ ├── rpn_eval.py │ │ │ ├── shortest_path_length.py │ │ │ ├── shortest_path_length_test.py │ │ │ ├── shortest_path_lengths.py │ │ │ ├── shortest_path_lengths_test.py │ │ │ ├── shortest_paths.py │ │ │ ├── shortest_paths_test.py │ │ │ ├── shunting_yard.py │ │ │ ├── sieve.py │ │ │ ├── sqrt.py │ │ │ ├── subsequences.py │ │ │ ├── to_base.py │ │ │ ├── topological_ordering.py │ │ │ ├── topological_ordering_test.py │ │ │ └── wrap.py │ │ ├── python_testcases │ │ │ ├── load_testdata.py │ │ │ ├── node.py │ │ │ ├── test_bitcount.py │ │ │ ├── test_breadth_first_search.py │ │ │ ├── test_bucketsort.py │ │ │ ├── test_depth_first_search.py │ │ │ ├── test_detect_cycle.py │ │ │ ├── test_find_first_in_sorted.py │ │ │ ├── test_find_in_sorted.py │ │ │ ├── test_flatten.py │ │ │ ├── test_gcd.py │ │ │ ├── test_get_factors.py │ │ │ ├── test_hanoi.py │ │ │ ├── test_is_valid_parenthesization.py │ │ │ ├── test_kheapsort.py │ │ │ ├── test_knapsack.py │ │ │ ├── test_kth.py │ │ │ ├── test_lcs_length.py │ │ │ ├── test_levenshtein.py │ │ │ ├── test_lis.py │ │ │ ├── test_longest_common_subsequence.py │ │ │ ├── test_max_sublist_sum.py │ │ │ ├── test_mergesort.py │ │ │ ├── test_minimum_spanning_tree.py │ │ │ ├── test_next_palindrome.py │ │ │ ├── test_next_permutation.py │ │ │ ├── test_pascal.py │ │ │ ├── test_possible_change.py │ │ │ ├── test_powerset.py │ │ │ ├── test_quicksort.py │ │ │ ├── test_reverse_linked_list.py │ │ │ ├── test_rpn_eval.py │ │ │ ├── test_shortest_path_length.py │ │ │ ├── test_shortest_path_lengths.py │ │ │ ├── test_shortest_paths.py │ │ │ ├── test_shunting_yard.py │ │ │ ├── test_sieve.py │ │ │ ├── test_sqrt.py │ │ │ ├── test_subsequences.py │ │ │ ├── test_to_base.py │ │ │ ├── test_topological_ordering.py │ │ │ └── test_wrap.py │ │ ├── quixbugs.pdf │ │ ├── quixbugs_python.jsonl │ │ └── tester.py │ ├── examples_santacoderpjj.py │ ├── humanevalpack_stats.py │ ├── load_model.py │ ├── nlg_eval.py │ ├── nlg_eval_avg.py │ ├── rename.py │ ├── utils_hack.py │ └── val.sh └── run │ ├── check_missing.py │ ├── clean_explanations.py │ ├── eval_scripts │ ├── bloomz │ │ └── eval_bloomz_humanevalfix.sh │ ├── diffcodegen2b │ │ └── eval_diffcodegen2b_humanevalfix.sh │ ├── eval_codegeex2.sh │ ├── eval_humanevalexplaindescribe_range.sh │ ├── eval_humanevalexplainsynthesize_range.sh │ ├── eval_humanevalfix_range.sh │ ├── eval_humanevalsynthesize_range.sh │ ├── instructcodet5p │ │ ├── eval_instructcodet5p_humanevalexplain.sh │ │ ├── eval_instructcodet5p_humanevalfix.sh │ │ └── eval_instructcodet5p_humanevalsynthesize.sh │ ├── octocoder │ │ ├── eval_octocoder_humanevalexplain.sh │ │ ├── eval_octocoder_humanevalfix.sh │ │ └── eval_octocoder_humanevalsynthesize.sh │ ├── starchatbeta │ │ ├── eval_starchatbeta_humanevalexplain.sh │ │ ├── eval_starchatbeta_humanevalfix.sh │ │ └── eval_starchatbeta_humanevalsynthesize.sh │ ├── starcoder │ │ └── eval_starcoder_humanevalfix.sh │ └── wizardcoder │ │ ├── eval_wizardcoder_humanevalexplaindescribe_range.sh │ │ ├── eval_wizardcoder_humanevalexplainsynthesize_range.sh │ │ ├── eval_wizardcoder_humanevalfix_range.sh │ │ ├── eval_wizardcoder_humanevalsynthesize.sh │ │ └── eval_wizardcoder_humanevalsynthesize_range.sh │ ├── humanevalpack_evaluation.ipynb │ ├── humanevalpack_setup.sh │ ├── humanevalpack_setup_java.sh │ ├── merge_generations.py │ └── postprocess_humanevalpack_gpt4.py ├── finetuning ├── santacoder │ ├── finetune.py │ ├── finetune.sh │ └── zero_stage1_config.json └── starcoder │ ├── README.md │ ├── finetune.py │ ├── finetune.sh │ ├── languages.py │ └── merge-peft-adapters.py ├── training ├── check_ckpt_equivalence.py ├── conv_lumi.sh ├── convert_large.sh ├── finetune_santacoder.sh ├── finetune_santacoder_constant.sh ├── finetune_santacoder_ql.sh ├── finetune_santacoderlong.sh ├── finetune_santacoderlong64bs.sh ├── finetune_santacoderlongfullloss.sh ├── finetune_santacoderlonglr.sh ├── finetune_santacoderlonglrinput.sh ├── finetune_starcoderbase.sh ├── preprocess.sh ├── preprocess_santacoderpack.sh ├── pretraining_santacoderpack.sh ├── sbatch_xp3commits.sh └── to_meg.sh └── visuals ├── OCTOPACK_5MIN_SLIDES.pdf ├── OCTOPACK_POSTER.png ├── OCTOPACK_POSTER.pptx ├── OCTOPACK_THUMB.png ├── ablations.pdf ├── banner.png ├── count_lines.sh ├── distribution.pdf ├── distribution_languages.py ├── distribution_tasks.py ├── humanevalpack.pdf ├── kilobytes.txt ├── kilobytes_filtered.txt ├── line_counts.txt ├── line_counts_filtered.txt ├── logo ├── logo_old │ ├── octobadpack_v2.png │ ├── octopack_v1.png │ ├── octopack_v1_transp.png │ ├── octopack_v2.png │ └── octopack_v3.png ├── octobadpack.png └── octopack.png ├── main.pdf ├── misc ├── message_category.json └── python.png ├── plots.ipynb ├── tasks.pdf └── visuals.drawio /.gitignore: -------------------------------------------------------------------------------- 1 | *DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Muennighoff 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dataset/commitpack/renamedirs.sh: -------------------------------------------------------------------------------- 1 | for i in */; do mv -v "$i" "$(echo "$i" | tr '[:upper:]' '[:lower:]' | tr ' ' '-')"; done 2 | mv "cap'n-proto" capn-proto 3 | mv 'graphviz-(dot)' graphviz-dot 4 | mv "ren'py" renpy -------------------------------------------------------------------------------- /dataset/commitpack/shard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | input_file="diffs_33554432_41943040.jsonl" 4 | input_file="diffs_58720256_67108864.jsonl" 5 | input_file="diffs_0_8388608.jsonl" 6 | total_lines=8388608 7 | num_shards=16 8 | 9 | lines_per_shard=$((total_lines / num_shards)) 10 | start_line=1 11 | 12 | for i in $(seq 1 $num_shards); do 13 | end_line=$((start_line + lines_per_shard - 1)) 14 | start_range=$((start_line + 0)) 15 | end_range=$((end_line + 0)) 16 | output_file="diffs_${start_range}_${end_range}.jsonl" 17 | sed -n "${start_line},${end_line}p" $input_file > $output_file 18 | start_line=$((end_line + 1)) 19 | done 20 | 21 | -------------------------------------------------------------------------------- /dataset/commitpack/sql/sql_2_commits_table_dedup.sql: -------------------------------------------------------------------------------- 1 | SELECT commit, subject, message, STRING_AGG(unnested_repo_name), license AS repos 2 | FROM `commits_table_base` 3 | GROUP BY commit, subject, message, license 4 | -------------------------------------------------------------------------------- /dataset/commitpack/sql/sql_3_commits_table_dedup_diff.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM ( 3 | SELECT 4 | commit,subject,message,repos,difference,license 5 | FROM 6 | `commits_table_dedup` AS commits_table_dedup 7 | JOIN ( 8 | SELECT 9 | commit AS commit_base,difference 10 | FROM 11 | `bigquery-public-data.github_repos.commits` AS commits_table_base 12 | ) commits_table_base 13 | ON 14 | commits_table_base.commit_base = commits_table_dedup.commit 15 | ) 16 | -------------------------------------------------------------------------------- /dataset/commitpack/sql/sql_4_commits_table_dedup_files.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | commit,subject,message,repos,license,d.old_path as old_file,d.new_path as new_file 3 | FROM 4 | `commits_table_dedup_difference` AS commits_table, 5 | UNNEST(difference) AS d 6 | WHERE (d.old_path = d.new_path) AND (d.old_path IS NOT NULL) AND (d.new_path IS NOT NULL) 7 | -------------------------------------------------------------------------------- /dataset/commitpack/sql/sql_5.sql: -------------------------------------------------------------------------------- 1 | SELECT commit,repos,licenses 2 | FROM ( 3 | ( 4 | SELECT commit AS commit_base 5 | FROM `commits_table_dedup_files` 6 | GROUP BY commit 7 | HAVING COUNT(*) = 1 8 | ) 9 | JOIN ( 10 | SELECT 11 | commit,subject,message,repos,old_file,new_file 12 | FROM 13 | `commits_table_dedup_files` AS commits_table_base 14 | ) commits_table_base 15 | ON commits_table_base.commit = commit_base 16 | ) 17 | -------------------------------------------------------------------------------- /dataset/commitpackft/search_examples.py: -------------------------------------------------------------------------------- 1 | 2 | from datasets import load_dataset 3 | import os 4 | import glob 5 | 6 | paths = list(glob.glob("*.jsonl")) 7 | ds = load_dataset("json", data_files=paths)["train"] 8 | 9 | #dsf = ds.filter(lambda x: "Fix" in x["subject"]) 10 | ds = ds.filter(lambda x: any([y in x["new_contents"] for y in ["torch", "numpy", "tensorflow", "chainer"]])) # "tensorflow", "jax" 11 | ds = ds.filter(lambda x: len(x["old_contents"]) > 0) # "tensorflow", "jax" 12 | ds = ds.filter(lambda x: len(x["subject"]) < 50) # "tensorflow", "jax" 13 | ds = ds.filter(lambda x: "version" not in x["subject"]) # "tensorflow", "jax" 14 | ds = ds.filter(lambda x: 20 < (len(x["old_contents"]) + len(x["new_contents"])) < 1000) 15 | -------------------------------------------------------------------------------- /dataset/other/add_proba.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import os 4 | import datasets 5 | 6 | NUM_PROC = 16 7 | if __name__ == "__main__": 8 | for lang in ["python", "java", "javascript"]: 9 | 10 | paths = os.listdir(f"tasky-commits/{lang}") 11 | paths = [f"tasky-commits/{lang}/{path}" for path in paths] 12 | ds = datasets.load_dataset("json", data_files=paths)["train"] 13 | 14 | COMMIT_TO_PROBA = {} 15 | for i in range(len(ds)): 16 | COMMIT_TO_PROBA[ds[i]["commit"]] = ds[i]["proba"] 17 | 18 | def map_col(example): 19 | example["proba"] = COMMIT_TO_PROBA[example["commit"]] 20 | return example 21 | 22 | paths = os.listdir(f"{lang}") 23 | for i in range(len(paths)): 24 | ds = datasets.load_dataset("json", data_files=[f"{lang}/{paths[i]}"])["train"] 25 | ds.map(map_col).to_json(f"{lang}/{paths[i]}") -------------------------------------------------------------------------------- /dataset/other/add_proba_missing.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import os 4 | import datasets 5 | 6 | NUM_PROC = 16 7 | if __name__ == "__main__": 8 | for lang in ["python", "java", "javascript"]: 9 | 10 | paths = os.listdir(f"tasky-commits/{lang}_add") 11 | paths = [f"tasky-commits/{lang}_add/{path}" for path in paths] 12 | ds = datasets.load_dataset("json", data_files=paths)["train"] 13 | 14 | COMMIT_TO_PROBA = {} 15 | for i in range(len(ds)): 16 | COMMIT_TO_PROBA[ds[i]["commit"]] = ds[i]["proba"] 17 | 18 | def map_col(example): 19 | if example["proba"] == -1: 20 | example["proba"] = COMMIT_TO_PROBA.get(example["commit"], -1) 21 | return example 22 | 23 | paths = os.listdir(f"data/{lang}") 24 | for i in range(len(paths)): 25 | ds = datasets.load_dataset("json", data_files=[f"data/{lang}/{paths[i]}"])["train"] 26 | ds.map(map_col).to_json(f"data/{lang}/{paths[i]}", num_proc=NUM_PROC) -------------------------------------------------------------------------------- /dataset/other/check_diffs.py: -------------------------------------------------------------------------------- 1 | import difflib 2 | comms_neg = {'d6a51edc3e1cc7e7890b551c4f85d996e208153a', 'a5335eb51e6f26be07617599aa100fa18e5c3bb3', '7626b811492867af0eb76972135fd9e57f89badf', '4f38cab0095951af83ea628611c27363b3038c93', 'ac5035cb0c469261b27bbc1b290deb2d211bf0eb'} 3 | neg = ds.filter(lambda x: x["commit"] in comms_neg) 4 | diff = difflib.ndiff(neg[1]["old_contents"], neg[1]["new_contents"]) 5 | for i,s in enumerate(diff): 6 | if s[0]==' ': continue 7 | elif s[0]=='-': 8 | print(u'Delete "{}" from position {}'.format(s[-1],i)) 9 | elif s[0]=='+': 10 | print(u'Add "{}" to position {}'.format(s[-1],i)) -------------------------------------------------------------------------------- /dataset/other/commits_to_jsonl.py: -------------------------------------------------------------------------------- 1 | import datasets 2 | import random 3 | 4 | NUM_PROC = 32 5 | ds = datasets.load_dataset("commits-8192")["train"] 6 | 7 | def prepare(example): 8 | example["inputs"] = f"{example['old_contents']}" 9 | example["targets"] = f"{example['subject']}{example['new_contents']}<|endoftext|>" 10 | return example 11 | 12 | def prepare_code(example): 13 | example["inputs"] = f"```\n{example['old_contents']}\n```\n" 14 | example["targets"] = f"{example['subject']}\n```\n{example['new_contents']}\n```<|endoftext|>" 15 | return example 16 | 17 | def prepare_bigcode(example): 18 | # With 50% probability add filename 19 | if random.random() < 0.5: 20 | example["inputs"] = f"{example['old_file'].split('/')[-1]}{example['old_contents']}" 21 | else: 22 | example["inputs"] = f"{example['old_contents']}" 23 | example["targets"] = f"{example['subject']}{example['new_contents']}<|endoftext|>" 24 | return example 25 | 26 | ds = ds.map(prepare_bigcode, num_proc=NUM_PROC).select_columns(["inputs", "targets"]) 27 | ds.to_json("out.jsonl", orient="records", lines=True, force_ascii=False, num_proc=NUM_PROC) 28 | -------------------------------------------------------------------------------- /dataset/other/get_messages.py: -------------------------------------------------------------------------------- 1 | import datasets 2 | path = "javascript_new.jsonl" 3 | ds = datasets.load_dataset("json", data_files=[path])["train"] 4 | # Remove all columns that are not "commit" or "message" 5 | ds = ds.filter(lambda x: x["returncode"] == 0).select_columns(["commit", "message"]).to_json("javascript_messages.jsonl", num_proc=8) -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.class 3 | 4 | .gradle/ 5 | build/ 6 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017-2019 James Koppel 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-slate -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def pytest_addoption(parser): 5 | parser.addoption( 6 | "--correct", action="store_true", help="run tests on the correct version" 7 | ) 8 | parser.addoption("--runslow", action="store_true", help="run slow tests") 9 | 10 | 11 | def pytest_configure(config): 12 | pytest.use_correct = config.getoption("--correct") 13 | pytest.run_slow = config.getoption("--runslow") 14 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/BITCOUNT.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class BITCOUNT { 13 | public static int bitcount(int n) { 14 | int count = 0; 15 | while (n != 0) { 16 | n = (n & (n - 1)); 17 | count++; 18 | } 19 | return count; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/BUCKETSORT.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class BUCKETSORT { 14 | public static ArrayList bucketsort(ArrayList arr, int k) { 15 | ArrayList counts = new ArrayList(Collections.nCopies(k,0)); 16 | for (Integer x : arr) { 17 | counts.set(x,counts.get(x)+1); 18 | } 19 | 20 | ArrayList sorted_arr = new ArrayList(100); 21 | int i = 0; 22 | for (Integer count : counts) { 23 | sorted_arr.addAll(Collections.nCopies(count, i)); 24 | i++; 25 | } 26 | 27 | return sorted_arr; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/DEPTH_FIRST_SEARCH.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | import java_programs.Node; 9 | 10 | /** 11 | * 12 | * @author derricklin 13 | */ 14 | public class DEPTH_FIRST_SEARCH { 15 | public static boolean depth_first_search(Node startnode, Node goalnode) { 16 | Set nodesvisited = new HashSet<>(); 17 | class Search { 18 | boolean search(Node node) { 19 | if (nodesvisited.contains(node)) { 20 | return false; 21 | } else if (node == goalnode) { 22 | return true; 23 | } else { 24 | nodesvisited.add(node); 25 | for (Node successornodes : node.getSuccessors()) { 26 | if (search(successornodes)) { return true; } 27 | } 28 | } 29 | return false; 30 | } 31 | }; 32 | 33 | Search s = new Search(); 34 | return s.search(startnode); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/DETECT_CYCLE.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | import java_programs.Node; 9 | 10 | /** 11 | * 12 | * @author derricklin 13 | */ 14 | public class DETECT_CYCLE { 15 | public static boolean detect_cycle(Node node) { 16 | Node hare = node; 17 | Node tortoise = node; 18 | 19 | while (true) { 20 | if (null==hare ||hare.getSuccessor() == null) 21 | return false; 22 | 23 | tortoise = tortoise.getSuccessor(); 24 | hare = hare.getSuccessor().getSuccessor(); 25 | 26 | if (hare == tortoise) 27 | return true; 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/FIND_FIRST_IN_SORTED.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class FIND_FIRST_IN_SORTED { 14 | 15 | public static int find_first_in_sorted(int[] arr, int x) { 16 | int lo = 0; 17 | int hi = arr.length; 18 | 19 | while (lo < hi) { 20 | int mid = (lo + hi) / 2; // check if this is floor division 21 | 22 | if (x == arr[mid] && (mid == 0 || x != arr[mid-1])) { 23 | return mid; 24 | } else if (x <= arr[mid]) { 25 | hi = mid; 26 | } else { 27 | lo = mid + 1; 28 | } 29 | } 30 | 31 | return -1; 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/FIND_IN_SORTED.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class FIND_IN_SORTED { 13 | public static int binsearch(int[] arr, int x, int start, int end) { 14 | if (start == end) { 15 | return -1; 16 | } 17 | int mid = start + (end - start) / 2; // check this is floor division 18 | if (x < arr[mid]) { 19 | return binsearch(arr, x, start, mid); 20 | } else if (x > arr[mid]) { 21 | return binsearch(arr, x, mid+1, end); 22 | } else { 23 | return mid; 24 | } 25 | } 26 | 27 | public static int find_in_sorted(int[] arr, int x) { 28 | return binsearch(arr, x, 0, arr.length); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/FLATTEN.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class FLATTEN { 14 | public static Object flatten(Object arr) { 15 | if (arr instanceof ArrayList) { 16 | ArrayList narr = (ArrayList) arr; 17 | ArrayList result = new ArrayList(50); 18 | for (Object x : narr) { 19 | if (x instanceof ArrayList) { 20 | result.addAll((ArrayList) flatten(x)); 21 | } else { 22 | result.add((x)); 23 | } 24 | } 25 | return result; 26 | } else { 27 | return arr; 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/GCD.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class GCD { 14 | 15 | public static int gcd(int a, int b) { 16 | if (b == 0) { 17 | return a; 18 | } else { 19 | return gcd(b, a%b); 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/GET_FACTORS.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class GET_FACTORS { 14 | public static ArrayList get_factors(int n) { 15 | if (n == 1) { 16 | return new ArrayList(); 17 | } 18 | int max = (int)(Math.sqrt(n) + 1.0); 19 | for (int i=2; i < max; i++) { 20 | if (n % i == 0) { 21 | ArrayList prepend = new ArrayList(0); 22 | prepend.add(i); 23 | prepend.addAll(get_factors(n / i)); 24 | return prepend; 25 | } 26 | } 27 | 28 | return new ArrayList(Arrays.asList(n)); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/IS_VALID_PARENTHESIZATION.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class IS_VALID_PARENTHESIZATION { 14 | public static Boolean is_valid_parenthesization(String parens) { 15 | int depth = 0; 16 | for (int i = 0; i < parens.length(); i++) { 17 | Character paren = parens.charAt(i); 18 | if (paren.equals('(')) { 19 | depth++; 20 | } else { 21 | depth--; 22 | if (depth < 0) { return false; } 23 | } 24 | } 25 | return depth==0; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/KHEAPSORT.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class KHEAPSORT { 14 | // import heapq 15 | // heap is data structure used for priority queue 16 | // pq O(log n) to pull off lowest priority item 17 | // heap is a type of binary tree 18 | // every node its value smaller than everything below it 19 | // priority queue in java is least-value first (at head) 20 | 21 | public static ArrayList kheapsort(ArrayList arr, int k) { 22 | PriorityQueue heap = new PriorityQueue(); 23 | for (Integer v : arr.subList(0,k)) { 24 | heap.add(v); 25 | } 26 | 27 | ArrayList output = new ArrayList(); 28 | for (Integer x : arr.subList(k, arr.size())) { 29 | heap.add(x); 30 | Integer popped = heap.poll(); 31 | output.add(popped); 32 | } 33 | 34 | while (!heap.isEmpty()) { 35 | output.add(heap.poll()); 36 | } 37 | 38 | return output; 39 | 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/KNAPSACK.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | import java.lang.*; 4 | 5 | /* 6 | * To change this template, choose Tools | Templates 7 | * and open the template in the editor. 8 | */ 9 | 10 | /** 11 | * 12 | * @author derricklin 13 | */ 14 | public class KNAPSACK { 15 | public static int knapsack(int capacity, int [][] items) { 16 | int weight = 0, value = 0; 17 | int n = items.length; 18 | int memo[][] = new int[n + 1][capacity + 1]; 19 | 20 | for (int i = 0; i <= n ; i++) 21 | { 22 | if (i - 1 >= 0) { 23 | weight = items[i - 1][0]; 24 | value = items[i - 1][1]; 25 | } 26 | for (int j = 0; j <= capacity; j++) 27 | { 28 | if (i == 0 || j == 0) { 29 | memo[i][j] = 0; 30 | } 31 | else if (weight <= j) { 32 | memo[i][j] = Math.max(memo[i - 1][j], value + memo[i - 1][j - weight]); 33 | } 34 | else { 35 | memo[i][j] = memo [i-1][j]; 36 | } 37 | 38 | } 39 | } 40 | return memo[n][capacity]; 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/KTH.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class KTH { 14 | public static Integer kth(ArrayList arr, int k) { 15 | int pivot = arr.get(0); 16 | ArrayList below, above; 17 | below = new ArrayList(arr.size()); 18 | above = new ArrayList(arr.size()); 19 | for (Integer x : arr) { 20 | if (x < pivot) { 21 | below.add(x); 22 | } else if (x > pivot) { 23 | above.add(x); 24 | } 25 | } 26 | 27 | int num_less = below.size(); 28 | int num_lessoreq = arr.size() - above.size(); 29 | if (k < num_less) { 30 | return kth(below, k); 31 | } else if (k >= num_lessoreq) { 32 | return kth(above, k-num_lessoreq); 33 | } else { 34 | return pivot; 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/LEVENSHTEIN.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class LEVENSHTEIN { 13 | public static int levenshtein(String source, String target) { 14 | if (source.isEmpty() || target.isEmpty()) { 15 | return source.isEmpty() ? target.length() : source.length(); 16 | } else if (source.charAt(0) == target.charAt(0)) { 17 | return levenshtein(source.substring(1), target.substring(1)); 18 | } else { 19 | return 1 + Math.min(Math.min( 20 | levenshtein(source, target.substring(1)), 21 | levenshtein(source.substring(1), target.substring(1))), 22 | levenshtein(source.substring(1), target) 23 | ); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/LIS.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class LIS { 13 | public static int lis(int[] arr) { 14 | Map ends = new HashMap(100); 15 | int longest = 0; 16 | 17 | int i = 0; 18 | for (int val : arr) { 19 | 20 | ArrayList prefix_lengths = new ArrayList(100); 21 | for (int j=1; j < longest+1; j++) { 22 | if (arr[ends.get(j)] < val) { 23 | prefix_lengths.add(j); 24 | } 25 | } 26 | 27 | int length = !prefix_lengths.isEmpty() ? Collections.max(prefix_lengths) : 0; 28 | 29 | if (length == longest || val < arr[ends.get(length+1)]) { 30 | ends.put(length+1, i); 31 | longest = Math.max(longest,length + 1); 32 | } 33 | 34 | i++; 35 | } 36 | return longest; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/LONGEST_COMMON_SUBSEQUENCE.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class LONGEST_COMMON_SUBSEQUENCE { 14 | public static String longest_common_subsequence(String a, String b) { 15 | if (a.isEmpty() || b.isEmpty()) { 16 | return ""; 17 | } else if (a.charAt(0) == b.charAt(0)) { 18 | return a.charAt(0) + longest_common_subsequence(a.substring(1), b.substring(1)); 19 | } else { 20 | String fst = longest_common_subsequence(a, b.substring(1)); 21 | String snd = longest_common_subsequence(a.substring(1), b); 22 | return fst.length() >= snd.length() ? fst : snd; 23 | } 24 | 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/MAX_SUBLIST_SUM.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class MAX_SUBLIST_SUM { 14 | public static int max_sublist_sum(int[] arr) { 15 | int max_ending_here = 0; 16 | int max_so_far = 0; 17 | 18 | for (int x : arr) { 19 | max_ending_here = Math.max(0,max_ending_here + x); 20 | max_so_far = Math.max(max_so_far, max_ending_here); 21 | } 22 | 23 | return max_so_far; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/NEXT_PALINDROME.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | import java.lang.Math.*; 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class NEXT_PALINDROME { 14 | public static String next_palindrome(int[] digit_list) { 15 | int high_mid = Math.floorDiv(digit_list.length, 2); 16 | int low_mid = Math.floorDiv(digit_list.length - 1, 2); 17 | 18 | while (high_mid < digit_list.length && low_mid >= 0) { 19 | if (digit_list[high_mid] == 9) { 20 | digit_list[high_mid] = 0; 21 | digit_list[low_mid] = 0; 22 | high_mid += 1; 23 | low_mid -= 1; 24 | } else { 25 | digit_list[high_mid] += 1; 26 | if (low_mid != high_mid) { 27 | digit_list[low_mid] += 1; 28 | } 29 | return Arrays.toString(digit_list); 30 | } 31 | } 32 | 33 | ArrayList otherwise = new ArrayList(); 34 | otherwise.add(1); 35 | otherwise.addAll(Collections.nCopies(digit_list.length-1, 0)); 36 | otherwise.add(1); 37 | 38 | return String.valueOf(otherwise); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/PASCAL.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class PASCAL { 14 | public static ArrayList> pascal(int n) { 15 | ArrayList> rows = new ArrayList>(); 16 | ArrayList init = new ArrayList(); 17 | init.add(1); 18 | rows.add(init); 19 | 20 | for (int r=1; r row = new ArrayList(); 22 | for (int c=0; c 0) { 25 | upleft = rows.get(r-1).get(c-1); 26 | } else { 27 | upleft = 0; 28 | } 29 | if (c < r) { 30 | upright = rows.get(r-1).get(c); 31 | } else { 32 | upright = 0; 33 | } 34 | row.add(upleft+upright); 35 | } 36 | rows.add(row); 37 | } 38 | 39 | return rows; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/POSSIBLE_CHANGE.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class POSSIBLE_CHANGE { 13 | public static int possible_change(int[] coins, int total) { 14 | if (total == 0) { 15 | return 1; 16 | } 17 | if (total < 0 ||coins.length==0) { 18 | return 0; 19 | } 20 | 21 | int first = coins[0]; 22 | int[] rest = Arrays.copyOfRange(coins, 1, coins.length); 23 | return possible_change(coins, total-first) + possible_change(rest, total); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/POWERSET.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | 3 | import java.util.*; 4 | 5 | /* 6 | * To change this template, choose Tools | Templates 7 | * and open the template in the editor. 8 | */ 9 | 10 | /** 11 | * 12 | * @author derricklin 13 | */ 14 | public class POWERSET { 15 | public static ArrayList powerset(ArrayList arr) { 16 | if (!arr.isEmpty()) { 17 | Object first = arr.get(0); 18 | arr.remove(0); 19 | ArrayList rest = arr; 20 | ArrayList rest_subsets = powerset(rest); 21 | 22 | ArrayList output = new ArrayList(100); 23 | ArrayList to_add = new ArrayList(100); 24 | 25 | for (ArrayList subset : rest_subsets) { 26 | ArrayList r = new ArrayList(); 27 | r.add(first); 28 | r.addAll(subset); 29 | to_add.add(r); 30 | } 31 | 32 | output.addAll(to_add); 33 | rest_subsets.addAll(output); 34 | 35 | return rest_subsets; 36 | } else { 37 | ArrayList empty_set = new ArrayList(); 38 | empty_set.add(new ArrayList()); 39 | return empty_set; 40 | } 41 | } 42 | } 43 | 44 | 45 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/QUICKSORT.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class QUICKSORT { 14 | public static ArrayList quicksort(ArrayList arr) { 15 | if (arr.isEmpty()) { 16 | return new ArrayList(); 17 | } 18 | 19 | Integer pivot = arr.get(0); 20 | ArrayList lesser = new ArrayList(); 21 | ArrayList greater = new ArrayList(); 22 | 23 | for (Integer x : arr.subList(1, arr.size())) { 24 | if (x < pivot) { 25 | lesser.add(x); 26 | } else if (x >= pivot) { 27 | greater.add(x); 28 | } 29 | } 30 | ArrayList middle = new ArrayList(); 31 | middle.add(pivot); 32 | lesser = quicksort(lesser); 33 | greater = quicksort(greater); 34 | middle.addAll(greater); 35 | lesser.addAll(middle); 36 | return lesser; 37 | 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/REVERSE_LINKED_LIST.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | import java_programs.Node; 5 | 6 | /* 7 | * To change this template, choose Tools | Templates 8 | * and open the template in the editor. 9 | */ 10 | 11 | /** 12 | * 13 | * @author derricklin 14 | */ 15 | public class REVERSE_LINKED_LIST { 16 | public static Node reverse_linked_list(Node node) { 17 | Node prevnode = null; 18 | Node nextnode; 19 | while (node != null) { 20 | nextnode = node.getSuccessor(); 21 | node.setSuccessor(prevnode); 22 | prevnode = node; 23 | node = nextnode; 24 | } 25 | return prevnode; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/RPN_EVAL.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | import java.util.function.BinaryOperator; 4 | 5 | /* 6 | * To change this template, choose Tools | Templates 7 | * and open the template in the editor. 8 | */ 9 | 10 | /** 11 | * 12 | * @author derricklin 13 | */ 14 | public class RPN_EVAL { 15 | public static Double rpn_eval(ArrayList tokens) { 16 | Map> op = new HashMap>(); 17 | op.put("+", (a, b) -> a + b); 18 | op.put("-", (a, b) -> a - b); 19 | op.put("*", (a, b) -> a * b); 20 | op.put("/", (a, b) -> a / b); 21 | 22 | 23 | Stack stack = new Stack(); 24 | 25 | for (Object token : tokens) { 26 | if (Double.class.isInstance(token)) { 27 | stack.push((Double) token); 28 | } else { 29 | token = (String) token; 30 | Double a = (Double) stack.pop(); 31 | Double b = (Double) stack.pop(); 32 | Double c = 0.0; 33 | BinaryOperator bin_op = op.get(token); 34 | c = bin_op.apply(b,a); 35 | stack.push(c); 36 | } 37 | } 38 | 39 | return (Double) stack.pop(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/SIEVE.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class SIEVE { 14 | 15 | public static boolean all(ArrayList arr) { 16 | for (boolean value : arr) { 17 | if (!value) { return false; } 18 | } 19 | return true; 20 | } 21 | 22 | public static boolean any(ArrayList arr) { 23 | for (boolean value: arr) { 24 | if (value) { return true; } 25 | } 26 | return false; 27 | } 28 | 29 | public static ArrayList list_comp(int n, ArrayList primes) { 30 | ArrayList built_comprehension = new ArrayList(); 31 | for (Integer p : primes) { 32 | built_comprehension.add(n % p > 0); 33 | } 34 | return built_comprehension; 35 | } 36 | 37 | 38 | public static ArrayList sieve(Integer max) { 39 | ArrayList primes = new ArrayList(); 40 | for (int n=2; n epsilon) { 17 | approx = 0.5d * (approx + x / approx); 18 | } 19 | return approx; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/SUBSEQUENCES.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class SUBSEQUENCES { 14 | public static ArrayList subsequences(int a, int b, int k) { 15 | if (k == 0) { 16 | ArrayList empty_set = new ArrayList(); 17 | empty_set.add(new ArrayList()); 18 | return empty_set; 19 | } 20 | 21 | ArrayList ret = new ArrayList(50); 22 | for (int i=a; i topological_ordering (List directedGraph) { 8 | ArrayList orderedNodes = new ArrayList(); 9 | for (Node node : directedGraph) { 10 | if (node.getPredecessors().isEmpty()) { 11 | orderedNodes.add(node); 12 | } 13 | } 14 | 15 | int listSize = orderedNodes.size(); 16 | for (int i = 0; i < listSize; i++) { 17 | Node node = orderedNodes.get(i); 18 | for (Node nextNode : node.getSuccessors()) { 19 | if (orderedNodes.containsAll(nextNode.getPredecessors()) && !orderedNodes.contains(nextNode)) { 20 | orderedNodes.add(nextNode); 21 | listSize++; 22 | } 23 | } 24 | } 25 | return orderedNodes; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/TO_BASE.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class TO_BASE { 14 | public static String to_base(int num, int b) { 15 | String result = ""; 16 | String alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 17 | int i; 18 | while (num > 0) { 19 | i = num % b; 20 | num = num / b; // floor division? 21 | result = String.valueOf(alphabet.charAt(i))+result; 22 | } 23 | 24 | return result; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_java_programs/WRAP.java: -------------------------------------------------------------------------------- 1 | package correct_java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class WRAP { 14 | public static void main(String[] args) { 15 | System.out.println("abc".lastIndexOf("c",30)); 16 | } 17 | 18 | public static ArrayList wrap(String text, int cols) { 19 | ArrayList lines = new ArrayList(); 20 | 21 | String line; 22 | while (text.length() > cols) { 23 | int end = text.lastIndexOf(" ", cols); // off by one? 24 | if (end == -1) { 25 | end = cols; 26 | } 27 | line = text.substring(0,end); 28 | text = text.substring(end); 29 | lines.add(line); 30 | } 31 | lines.add(text); 32 | return lines; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/bitcount.py: -------------------------------------------------------------------------------- 1 | 2 | def bitcount(n): 3 | count = 0 4 | while n: 5 | n &= n - 1 6 | count += 1 7 | return count 8 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/bucketsort.py: -------------------------------------------------------------------------------- 1 | 2 | def bucketsort(arr, k): 3 | counts = [0] * k 4 | for x in arr: 5 | counts[x] += 1 6 | 7 | sorted_arr = [] 8 | for i, count in enumerate(counts): 9 | sorted_arr.extend([i] * count) 10 | 11 | return sorted_arr 12 | 13 | """ 14 | def bucketsort(arr, k): 15 | counts = [0] * k 16 | for x in arr: 17 | counts[x] += 1 18 | 19 | sorted_arr = [] 20 | for i, count in enumerate(arr): 21 | sorted_arr.extend([i] * counts[i]) 22 | 23 | return sorted_arr 24 | """ 25 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/depth_first_search.py: -------------------------------------------------------------------------------- 1 | 2 | def depth_first_search(startnode, goalnode): 3 | nodesvisited = set() 4 | 5 | def search_from(node): 6 | if node in nodesvisited: 7 | return False 8 | elif node is goalnode: 9 | return True 10 | else: 11 | nodesvisited.add(node) 12 | return any( 13 | search_from(nextnode) for nextnode in node.successors 14 | ) 15 | 16 | return search_from(startnode) 17 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/detect_cycle.py: -------------------------------------------------------------------------------- 1 | def detect_cycle(node): 2 | hare = tortoise = node 3 | 4 | while True: 5 | if hare is None or hare.successor is None: 6 | return False 7 | 8 | tortoise = tortoise.successor 9 | hare = hare.successor.successor 10 | 11 | if hare is tortoise: 12 | return True 13 | 14 | 15 | """ 16 | def detect_cycle(node): 17 | hare = tortoise = node 18 | 19 | while True: 20 | if hare.successor is None or hare.successor.successor is None: 21 | return False 22 | 23 | tortoise = tortoise.successor 24 | hare = hare.successor.successor 25 | 26 | if hare is tortoise: 27 | return True 28 | """ 29 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/find_first_in_sorted.py: -------------------------------------------------------------------------------- 1 | 2 | def find_first_in_sorted(arr, x): 3 | lo = 0 4 | hi = len(arr) 5 | 6 | while lo < hi: 7 | mid = (lo + hi) // 2 8 | 9 | if x == arr[mid] and (mid == 0 or x != arr[mid - 1]): 10 | return mid 11 | 12 | elif x <= arr[mid]: 13 | hi = mid 14 | 15 | else: 16 | lo = mid + 1 17 | 18 | return -1 19 | 20 | """ 21 | def find_first_in_sorted(arr, x): 22 | lo = 0 23 | hi = len(arr) 24 | 25 | while lo <= hi - 1: 26 | mid = (lo + hi) // 2 27 | 28 | if x == arr[mid] and (mid == 0 or x != arr[mid - 1]): 29 | return mid 30 | 31 | elif x <= arr[mid]: 32 | hi = mid 33 | 34 | else: 35 | lo = mid + 1 36 | 37 | return -1 38 | 39 | def find_first_in_sorted(arr, x): 40 | lo = 0 41 | hi = len(arr) 42 | 43 | while lo + 1 <= hi: 44 | mid = (lo + hi) // 2 45 | 46 | if x == arr[mid] and (mid == 0 or x != arr[mid - 1]): 47 | return mid 48 | 49 | elif x <= arr[mid]: 50 | hi = mid 51 | 52 | else: 53 | lo = mid + 1 54 | 55 | return -1 56 | 57 | """ 58 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/find_in_sorted.py: -------------------------------------------------------------------------------- 1 | 2 | def find_in_sorted(arr, x): 3 | def binsearch(start, end): 4 | if start == end: 5 | return -1 6 | mid = start + (end - start) // 2 7 | if x < arr[mid]: 8 | return binsearch(start, mid) 9 | elif x > arr[mid]: 10 | return binsearch(mid + 1, end) 11 | else: 12 | return mid 13 | 14 | return binsearch(0, len(arr)) 15 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/flatten.py: -------------------------------------------------------------------------------- 1 | 2 | def flatten(arr): 3 | for x in arr: 4 | if isinstance(x, list): 5 | for y in flatten(x): 6 | yield y 7 | else: 8 | yield x 9 | 10 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/gcd.py: -------------------------------------------------------------------------------- 1 | 2 | def gcd(a, b): 3 | if b == 0: 4 | return a 5 | else: 6 | return gcd(b, a % b) 7 | 8 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/get_factors.py: -------------------------------------------------------------------------------- 1 | 2 | def get_factors(n): 3 | if n == 1: 4 | return [] 5 | 6 | for i in range(2, int(n ** 0.5) + 1): 7 | if n % i == 0: 8 | return [i] + get_factors(n // i) 9 | 10 | return [n] 11 | 12 | """ 13 | def get_factors(n): 14 | if n == 1: 15 | return [] 16 | 17 | for i in range(2, n + 1): 18 | if n % i == 0: 19 | return [i] + get_factors(n // i) 20 | 21 | return [] 22 | """ 23 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/hanoi.py: -------------------------------------------------------------------------------- 1 | 2 | def hanoi(height, start=1, end=3): 3 | steps = [] 4 | if height > 0: 5 | helper = ({1, 2, 3} - {start} - {end}).pop() 6 | steps.extend(hanoi(height - 1, start, helper)) 7 | steps.append((start, end)) 8 | steps.extend(hanoi(height - 1, helper, end)) 9 | 10 | return steps 11 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/kheapsort.py: -------------------------------------------------------------------------------- 1 | 2 | def kheapsort(arr, k): 3 | import heapq 4 | 5 | heap = arr[:k] 6 | heapq.heapify(heap) 7 | 8 | for x in arr[k:]: 9 | yield heapq.heappushpop(heap, x) 10 | 11 | while heap: 12 | yield heapq.heappop(heap) 13 | 14 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/knapsack.py: -------------------------------------------------------------------------------- 1 | 2 | def knapsack(capacity, items): 3 | from collections import defaultdict 4 | memo = defaultdict(int) 5 | 6 | for i in range(1, len(items) + 1): 7 | weight, value = items[i - 1] 8 | 9 | for j in range(1, capacity + 1): 10 | memo[i, j] = memo[i - 1, j] 11 | 12 | if weight <= j: 13 | memo[i, j] = max( 14 | memo[i, j], 15 | value + memo[i - 1, j - weight] 16 | ) 17 | 18 | return memo[len(items), capacity] 19 | 20 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/kth.py: -------------------------------------------------------------------------------- 1 | 2 | def kth(arr, k): 3 | pivot = arr[0] 4 | below = [x for x in arr if x < pivot] 5 | above = [x for x in arr if x > pivot] 6 | 7 | num_less = len(below) 8 | num_lessoreq = len(arr) - len(above) 9 | 10 | if k < num_less: 11 | return kth(below, k) 12 | elif k >= num_lessoreq: 13 | return kth(above, k - num_lessoreq) 14 | else: 15 | return pivot 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/lcs_length.py: -------------------------------------------------------------------------------- 1 | 2 | def lcs_length(s, t): 3 | from collections import Counter 4 | 5 | dp = Counter() 6 | 7 | for i in range(len(s)): 8 | for j in range(len(t)): 9 | if s[i] == t[j]: 10 | dp[i, j] = dp[i - 1, j - 1] + 1 11 | 12 | return max(dp.values()) if dp else 0 13 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/levenshtein.py: -------------------------------------------------------------------------------- 1 | 2 | def levenshtein(source, target): 3 | if source == '' or target == '': 4 | return len(source) or len(target) 5 | 6 | elif source[0] == target[0]: 7 | return levenshtein(source[1:], target[1:]) 8 | 9 | else: 10 | return 1 + min( 11 | levenshtein(source, target[1:]), 12 | levenshtein(source[1:], target[1:]), 13 | levenshtein(source[1:], target) 14 | ) 15 | 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/lis.py: -------------------------------------------------------------------------------- 1 | 2 | def lis(arr): 3 | ends = {} 4 | longest = 0 5 | 6 | for i, val in enumerate(arr): 7 | 8 | prefix_lengths = [j for j in range(1, longest + 1) if arr[ends[j]] < val] 9 | 10 | length = max(prefix_lengths) if prefix_lengths else 0 11 | 12 | if length == longest or val < arr[ends[length + 1]]: 13 | ends[length + 1] = i 14 | longest = max(longest, length + 1) 15 | 16 | return longest 17 | 18 | """ 19 | def lis(arr): 20 | ends = {} 21 | longest = 0 22 | 23 | for i, val in enumerate(arr): 24 | 25 | prefix_lengths = [j for j in range(1, longest + 1) if arr[ends[j]] < val] 26 | 27 | length = max(prefix_lengths) if prefix_lengths else 0 28 | 29 | if length == longest or val < arr[ends[length + 1]]: 30 | ends[length + 1] = i 31 | longest = max(length + 1, longest) 32 | 33 | return longest 34 | """ 35 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/longest_common_subsequence.py: -------------------------------------------------------------------------------- 1 | 2 | def longest_common_subsequence(a, b): 3 | if not a or not b: 4 | return '' 5 | 6 | elif a[0] == b[0]: 7 | return a[0] + longest_common_subsequence(a[1:], b[1:]) 8 | 9 | else: 10 | return max( 11 | longest_common_subsequence(a, b[1:]), 12 | longest_common_subsequence(a[1:], b), 13 | key=len 14 | ) 15 | 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/max_sublist_sum.py: -------------------------------------------------------------------------------- 1 | 2 | def max_sublist_sum(arr): 3 | max_ending_here = 0 4 | max_so_far = 0 5 | 6 | for x in arr: 7 | max_ending_here = max(0, max_ending_here + x) 8 | max_so_far = max(max_so_far, max_ending_here) 9 | 10 | return max_so_far 11 | 12 | """ 13 | def max_sublist_sum(arr): 14 | max_ending_here = 0 15 | max_so_far = 0 16 | 17 | for x in arr: 18 | max_ending_here = max(max_ending_here + x, 0) 19 | max_so_far = max(max_so_far, max_ending_here) 20 | 21 | return max_so_far 22 | 23 | def max_sublist_sum(arr): 24 | max_ending_here = 0 25 | max_so_far = 0 26 | 27 | for x in arr: 28 | max_ending_here = max(x, max_ending_here + x) 29 | max_so_far = max(max_so_far, max_ending_here) 30 | 31 | return max_so_far 32 | 33 | 34 | def max_sublist_sum(arr): 35 | max_ending_here = 0 36 | max_so_far = 0 37 | 38 | for x in arr: 39 | max_ending_here = max(max_ending_here + x, x) 40 | max_so_far = max(max_so_far, max_ending_here) 41 | 42 | return max_so_far 43 | 44 | """ 45 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/minimum_spanning_tree.py: -------------------------------------------------------------------------------- 1 | 2 | def minimum_spanning_tree(weight_by_edge): 3 | group_by_node = {} 4 | mst_edges = set() 5 | 6 | for edge in sorted(weight_by_edge, key=weight_by_edge.__getitem__): 7 | u, v = edge 8 | if group_by_node.setdefault(u, {u}) != group_by_node.setdefault(v, {v}): 9 | mst_edges.add(edge) 10 | group_by_node[u].update(group_by_node[v]) 11 | for node in group_by_node[v]: 12 | group_by_node[node] = group_by_node[u] 13 | 14 | return mst_edges 15 | 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/minimum_spanning_tree_test.py: -------------------------------------------------------------------------------- 1 | from .minimum_spanning_tree import minimum_spanning_tree 2 | 3 | 4 | """ 5 | Driver to test minimum spanning tree 6 | """ 7 | def main(): 8 | # Case 1: Simple tree input. 9 | # Output: (1, 2) (3, 4) (1, 4) 10 | result = minimum_spanning_tree({ 11 | (1, 2): 10, 12 | (2, 3): 15, 13 | (3, 4): 10, 14 | (1, 4): 10}) 15 | for edge in result: 16 | print(edge), 17 | print() 18 | 19 | # Case 2: Strongly connected tree input. 20 | # Output: (2, 5) (1, 3) (2, 3) (4, 6) (3, 6) 21 | result = minimum_spanning_tree({ 22 | (1, 2): 6, 23 | (1, 3): 1, 24 | (1, 4): 5, 25 | (2, 3): 5, 26 | (2, 5): 3, 27 | (3, 4): 5, 28 | (3, 5): 6, 29 | (3, 6): 4, 30 | (4, 6): 2, 31 | (5, 6): 6}) 32 | for edge in result: 33 | print(edge), 34 | print() 35 | 36 | # Case 3: Minimum spanning tree input. 37 | # Output: (1, 2) (1, 3) (2, 4) 38 | result = minimum_spanning_tree({ 39 | (1, 2): 6, 40 | (1, 3): 1, 41 | (2, 4): 2}) 42 | for edge in result: 43 | print(edge), 44 | print() 45 | 46 | 47 | if __name__ == "__main__": 48 | main() 49 | 50 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/next_palindrome.py: -------------------------------------------------------------------------------- 1 | def next_palindrome(digit_list): 2 | high_mid = len(digit_list) // 2 3 | low_mid = (len(digit_list) - 1) // 2 4 | while high_mid < len(digit_list) and low_mid >= 0: 5 | if digit_list[high_mid] == 9: 6 | digit_list[high_mid] = 0 7 | digit_list[low_mid] = 0 8 | high_mid += 1 9 | low_mid -= 1 10 | else: 11 | digit_list[high_mid] += 1 12 | if low_mid != high_mid: 13 | digit_list[low_mid] += 1 14 | return digit_list 15 | return [1] + (len(digit_list) - 1) * [0] + [1] 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/next_permutation.py: -------------------------------------------------------------------------------- 1 | 2 | def next_permutation(perm): 3 | for i in range(len(perm) - 2, -1, -1): 4 | if perm[i] < perm[i + 1]: 5 | for j in range(len(perm) - 1, i, -1): 6 | if perm[i] < perm[j]: 7 | next_perm = list(perm) 8 | next_perm[i], next_perm[j] = perm[j], perm[i] 9 | next_perm[i + 1:] = reversed(next_perm[i + 1:]) 10 | return next_perm 11 | 12 | """ 13 | def next_permutation(perm): 14 | for i in range(len(perm) - 2, -1, -1): 15 | if perm[i] < perm[i + 1]: 16 | for j in range(len(perm) - 1, i, -1): 17 | if perm[j] > perm[i]: 18 | next_perm = list(perm) 19 | next_perm[i], next_perm[j] = perm[j], perm[i] 20 | next_perm[i + 1:] = reversed(next_perm[i + 1:]) 21 | return next_perm 22 | """ 23 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/node.py: -------------------------------------------------------------------------------- 1 | class Node: 2 | def __init__(self, value=None, successor=None, successors=[], predecessors=[], incoming_nodes=[], outgoing_nodes=[]): 3 | self.value = value 4 | self.successor = successor 5 | self.successors = successors 6 | self.predecessors = predecessors 7 | self.incoming_nodes = incoming_nodes 8 | self.outgoing_nodes = outgoing_nodes 9 | 10 | def successor(self): 11 | return self.successor 12 | 13 | def successors(self): 14 | return self.successors 15 | 16 | def predecessors(self): 17 | return self.predecessors 18 | 19 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/pascal.py: -------------------------------------------------------------------------------- 1 | 2 | def pascal(n): 3 | rows = [[1]] 4 | for r in range(1, n): 5 | row = [] 6 | for c in range(0, r + 1): 7 | upleft = rows[r - 1][c - 1] if c > 0 else 0 8 | upright = rows[r - 1][c] if c < r else 0 9 | row.append(upleft + upright) 10 | rows.append(row) 11 | 12 | return rows 13 | 14 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/powerset.py: -------------------------------------------------------------------------------- 1 | 2 | def powerset(arr): 3 | if arr: 4 | first, *rest = arr 5 | rest_subsets = powerset(rest) 6 | return rest_subsets + [[first] + subset for subset in rest_subsets] 7 | else: 8 | return [[]] 9 | 10 | """ 11 | def powerset(arr): 12 | if arr: 13 | first, *rest = arr 14 | rest_subsets = powerset(rest) 15 | return [[first] + subset for subset in rest_subsets] + rest_subsets 16 | else: 17 | return [[]] 18 | """ 19 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/quicksort.py: -------------------------------------------------------------------------------- 1 | 2 | def quicksort(arr): 3 | if not arr: 4 | return [] 5 | 6 | pivot = arr[0] 7 | lesser = quicksort([x for x in arr[1:] if x < pivot]) 8 | greater = quicksort([x for x in arr[1:] if x >= pivot]) 9 | return lesser + [pivot] + greater 10 | 11 | """ 12 | def quicksort(arr): 13 | if not arr: 14 | return [] 15 | 16 | pivot = arr[0] 17 | lesser = quicksort([x for x in arr[1:] if x <= pivot]) 18 | greater = quicksort([x for x in arr[1:] if x > pivot]) 19 | return lesser + [pivot] + greater 20 | """ 21 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/reverse_linked_list.py: -------------------------------------------------------------------------------- 1 | 2 | def reverse_linked_list(node): 3 | prevnode = None 4 | while node: 5 | nextnode = node.successor 6 | node.successor = prevnode 7 | prevnode = node 8 | node = nextnode 9 | return prevnode 10 | 11 | """ 12 | def reverse_linked_list(node): 13 | prevnode = None 14 | while node: 15 | nextnode = node.successor 16 | node.successor = prevnode 17 | prevnode, node = node, nextnode 18 | return prevnode 19 | 20 | def reverse_linked_list(node): 21 | prevnode = None 22 | while node: 23 | nextnode = node.successor 24 | node.successor = prevnode 25 | node, prevnode = nextnode, node 26 | return prevnode 27 | 28 | """ 29 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/rpn_eval.py: -------------------------------------------------------------------------------- 1 | 2 | def rpn_eval(tokens): 3 | def op(symbol, a, b): 4 | return { 5 | '+': lambda a, b: a + b, 6 | '-': lambda a, b: a - b, 7 | '*': lambda a, b: a * b, 8 | '/': lambda a, b: a / b 9 | }[symbol](a, b) 10 | 11 | stack = [] 12 | 13 | for token in tokens: 14 | if isinstance(token, float): 15 | stack.append(token) 16 | else: 17 | a = stack.pop() 18 | b = stack.pop() 19 | stack.append( 20 | op(token, b, a) 21 | ) 22 | 23 | return stack.pop() 24 | 25 | """ 26 | def rpn_eval(tokens): 27 | def op(symbol, a, b): 28 | return { 29 | '+': lambda a, b: a + b, 30 | '-': lambda a, b: a - b, 31 | '*': lambda a, b: a * b, 32 | '/': lambda a, b: a / b 33 | }[symbol](b, a) 34 | 35 | stack = Stack() 36 | 37 | for token in tokens: 38 | if isinstance(token, float): 39 | stack.push(token) 40 | else: 41 | a = stack.pop() 42 | b = stack.pop() 43 | stack.push( 44 | op(token, a, b) 45 | ) 46 | 47 | return stack.pop() 48 | """ 49 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/shortest_path_lengths.py: -------------------------------------------------------------------------------- 1 | 2 | from collections import defaultdict 3 | 4 | def shortest_path_lengths(n, length_by_edge): 5 | length_by_path = defaultdict(lambda: float('inf')) 6 | length_by_path.update({(i, i): 0 for i in range(n)}) 7 | length_by_path.update(length_by_edge) 8 | 9 | for k in range(n): 10 | for i in range(n): 11 | for j in range(n): 12 | length_by_path[i, j] = min( 13 | length_by_path[i, j], 14 | length_by_path[i, k] + length_by_path[k, j] 15 | ) 16 | 17 | return length_by_path 18 | 19 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/shortest_paths.py: -------------------------------------------------------------------------------- 1 | 2 | def shortest_paths(source, weight_by_edge): 3 | weight_by_node = { 4 | v: float('inf') for u, v in weight_by_edge 5 | } 6 | weight_by_node[source] = 0 7 | 8 | for i in range(len(weight_by_node) - 1): 9 | for (u, v), weight in weight_by_edge.items(): 10 | weight_by_node[v] = min( 11 | weight_by_node[u] + weight, 12 | weight_by_node[v] 13 | ) 14 | 15 | return weight_by_node 16 | 17 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/shunting_yard.py: -------------------------------------------------------------------------------- 1 | 2 | def shunting_yard(tokens): 3 | precedence = { 4 | '+': 1, 5 | '-': 1, 6 | '*': 2, 7 | '/': 2 8 | } 9 | 10 | rpntokens = [] 11 | opstack = [] 12 | for token in tokens: 13 | if isinstance(token, int): 14 | rpntokens.append(token) 15 | else: 16 | while opstack and precedence[token] <= precedence[opstack[-1]]: 17 | rpntokens.append(opstack.pop()) 18 | opstack.append(token) 19 | 20 | while opstack: 21 | rpntokens.append(opstack.pop()) 22 | 23 | return rpntokens 24 | 25 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/sieve.py: -------------------------------------------------------------------------------- 1 | 2 | def sieve(max): 3 | primes = [] 4 | for n in range(2, max + 1): 5 | if all(n % p > 0 for p in primes): 6 | primes.append(n) 7 | return primes 8 | 9 | """ 10 | def sieve(max): 11 | primes = [] 12 | for n in range(2, max + 1): 13 | if not any(n % p == 0 for p in primes): 14 | primes.append(n) 15 | return primes 16 | 17 | def sieve(max): 18 | primes = [] 19 | for n in range(2, max + 1): 20 | if all(n % p for p in primes): 21 | primes.append(n) 22 | return primes 23 | 24 | def sieve(max): 25 | primes = [] 26 | for n in range(2, max + 1): 27 | if not any(n % p for p in primes): 28 | primes.append(n) 29 | return primes 30 | 31 | """ 32 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/sqrt.py: -------------------------------------------------------------------------------- 1 | 2 | def sqrt(x, epsilon): 3 | approx = x / 2 4 | while abs(x - approx ** 2) > epsilon: 5 | approx = 0.5 * (approx + x / approx) 6 | return approx 7 | 8 | """ 9 | def sqrt(x, epsilon): 10 | approx = x / 2 11 | while abs(x - approx * approx) > epsilon: 12 | approx = 0.5 * (approx + x / approx) 13 | return approx 14 | """ 15 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/subsequences.py: -------------------------------------------------------------------------------- 1 | 2 | def subsequences(a, b, k): 3 | if k == 0: 4 | return [[]] 5 | 6 | ret = [] 7 | for i in range(a, b + 1 - k): 8 | ret.extend( 9 | [i] + rest for rest in subsequences(i + 1, b, k - 1) 10 | ) 11 | 12 | return ret 13 | 14 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/to_base.py: -------------------------------------------------------------------------------- 1 | 2 | import string 3 | def to_base(num, b): 4 | result = '' 5 | alphabet = string.digits + string.ascii_uppercase 6 | while num > 0: 7 | i = num % b 8 | num = num // b 9 | result = alphabet[i] + result 10 | return result 11 | 12 | """ 13 | import string 14 | def to_base(num, b): 15 | result = '' 16 | alphabet = string.digits + string.ascii_uppercase 17 | while num > 0: 18 | i = num % b 19 | num = num // b 20 | result = result + alphabet[i] 21 | return result[::-1] 22 | """ 23 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/topological_ordering.py: -------------------------------------------------------------------------------- 1 | def topological_ordering(nodes): 2 | ordered_nodes = [node for node in nodes if not node.incoming_nodes] 3 | 4 | for node in ordered_nodes: 5 | for nextnode in node.outgoing_nodes: 6 | if set(ordered_nodes).issuperset(nextnode.incoming_nodes) and nextnode not in ordered_nodes: 7 | ordered_nodes.append(nextnode) 8 | 9 | return ordered_nodes 10 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/correct_python_programs/wrap.py: -------------------------------------------------------------------------------- 1 | 2 | def wrap(text, cols): 3 | lines = [] 4 | while len(text) > cols: 5 | end = text.rfind(' ', 0, cols + 1) 6 | if end == -1: 7 | end = cols 8 | line, text = text[:end], text[end:] 9 | lines.append(line) 10 | 11 | lines.append(text) 12 | return lines 13 | 14 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/final_progress.txt: -------------------------------------------------------------------------------- 1 | bitcount.py 2 | bucketsort.py 3 | find_first_in_sorted.py 4 | find_in_sorted.py 5 | flatten.py 6 | gcd.py 7 | get_factors.py 8 | hanoi.py 9 | is_valid_parenthesization.py 10 | kheapsort.py 11 | knapsack.py 12 | kth.py 13 | lcs_length.py 14 | levenshtein.py 15 | lis.py 16 | longest_common_subsequence.py 17 | max_sublist_sum.py 18 | mergesort.py 19 | next_palindrome.py 20 | next_permutation.py 21 | pascal.py 22 | possible_change.py 23 | powerset.py 24 | quicksort.py 25 | rpn_eval.py 26 | shunting_yard.py 27 | sieve.py 28 | sqrt.py 29 | subsequences.py 30 | to_base.py 31 | wrap.py 32 | 33 | check for explicit relative imports 34 | check Java version is in fact buggy 35 | check tester works for each 36 | check py3 prints well 37 | check that the hardcoded tests are correct in each folder 38 | 39 | breadth_first_search.py * python, java 40 | depth_first_search.py * python, java 41 | detect_cycle.py * python, java 42 | minimum_spanning_tree.py * python, java (Java is not buggy) 43 | reverse_linked_list.py * python, java 44 | shortest_path_length.py * java (no Python yet) 45 | shortest_path_lengths.py * python, java 46 | shortest_paths.py * python, java 47 | topological_ordering.py * python, java 48 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/BITCOUNT.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | /* 3 | * To change this template, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /** 8 | * 9 | * @author derricklin 10 | */ 11 | public class BITCOUNT { 12 | public static int bitcount(int n) { 13 | int count = 0; 14 | while (n != 0) { 15 | n = (n ^ (n - 1)); 16 | count++; 17 | } 18 | return count; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/BUCKETSORT.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class BUCKETSORT { 14 | public static ArrayList bucketsort(ArrayList arr, int k) { 15 | ArrayList counts = new ArrayList(Collections.nCopies(k,0)); 16 | for (Integer x : arr) { 17 | counts.set(x,counts.get(x)+1); 18 | } 19 | 20 | ArrayList sorted_arr = new ArrayList(100); 21 | int i = 0; 22 | for (Integer count : arr) { // arr is counts in fixed version 23 | sorted_arr.addAll(Collections.nCopies(count, i)); 24 | i++; 25 | } 26 | 27 | return sorted_arr; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/DEPTH_FIRST_SEARCH.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class DEPTH_FIRST_SEARCH { 13 | public static boolean depth_first_search(Node startnode, Node goalnode) { 14 | Set nodesvisited = new HashSet<>(); 15 | class Search { 16 | boolean search(Node node) { 17 | if (nodesvisited.contains(node)) { 18 | return false; 19 | } else if (node == goalnode) { 20 | return true; 21 | } else { 22 | for (Node successornodes : node.getSuccessors()) { 23 | if (search(successornodes)) { return true; } 24 | } 25 | } 26 | return false; 27 | } 28 | }; 29 | 30 | Search s = new Search(); 31 | return s.search(startnode); 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/DETECT_CYCLE.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class DETECT_CYCLE { 13 | public static boolean detect_cycle(Node node) { 14 | Node hare = node; 15 | Node tortoise = node; 16 | 17 | while (true) { 18 | if (hare.getSuccessor() == null) 19 | return false; 20 | 21 | tortoise = tortoise.getSuccessor(); 22 | hare = hare.getSuccessor().getSuccessor(); 23 | 24 | if (hare == tortoise) 25 | return true; 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/FIND_FIRST_IN_SORTED.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class FIND_FIRST_IN_SORTED { 14 | 15 | public static int find_first_in_sorted(int[] arr, int x) { 16 | int lo = 0; 17 | int hi = arr.length; 18 | 19 | while (lo <= hi) { 20 | int mid = (lo + hi) / 2; // check if this is floor division 21 | 22 | if (x == arr[mid] && (mid == 0 || x != arr[mid-1])) { 23 | return mid; 24 | } else if (x <= arr[mid]) { 25 | hi = mid; 26 | } else { 27 | lo = mid + 1; 28 | } 29 | } 30 | 31 | return -1; 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/FIND_IN_SORTED.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | /* 3 | * To change this template, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /** 8 | * 9 | * @author derricklin 10 | */ 11 | public class FIND_IN_SORTED { 12 | public static int binsearch(int[] arr, int x, int start, int end) { 13 | if (start == end) { 14 | return -1; 15 | } 16 | int mid = start + (end - start) / 2; // check this is floor division 17 | if (x < arr[mid]) { 18 | return binsearch(arr, x, start, mid); 19 | } else if (x > arr[mid]) { 20 | return binsearch(arr, x, mid, end); 21 | } else { 22 | return mid; 23 | } 24 | } 25 | 26 | public static int find_in_sorted(int[] arr, int x) { 27 | return binsearch(arr, x, 0, arr.length); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/FLATTEN.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class FLATTEN { 13 | public static Object flatten(Object arr) { 14 | if (arr instanceof ArrayList) { 15 | ArrayList narr = (ArrayList) arr; 16 | ArrayList result = new ArrayList(50); 17 | for (Object x : narr) { 18 | if (x instanceof ArrayList) { 19 | result.addAll((ArrayList) flatten(x)); 20 | } else { 21 | result.add(flatten(x)); 22 | } 23 | } 24 | return result; 25 | } else { 26 | return flatten(arr); 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/GCD.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class GCD { 14 | 15 | public static int gcd(int a, int b) { 16 | if (b == 0) { 17 | return a; 18 | } else { 19 | return gcd(a % b, b); 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/GET_FACTORS.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class GET_FACTORS { 14 | public static ArrayList get_factors(int n) { 15 | if (n == 1) { 16 | return new ArrayList(); 17 | } 18 | int max = (int)(Math.sqrt(n) + 1.0); 19 | for (int i=2; i < max; i++) { 20 | if (n % i == 0) { 21 | ArrayList prepend = new ArrayList(0); 22 | prepend.add(i); 23 | prepend.addAll(get_factors(n / i)); 24 | return prepend; 25 | } 26 | } 27 | return new ArrayList(); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/IS_VALID_PARENTHESIZATION.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class IS_VALID_PARENTHESIZATION { 13 | public static Boolean is_valid_parenthesization(String parens) { 14 | int depth = 0; 15 | for (int i = 0; i < parens.length(); i++) { 16 | Character paren = parens.charAt(i); 17 | if (paren.equals('(')) { 18 | depth++; 19 | } else { 20 | depth--; 21 | if (depth < 0) { return false; } 22 | } 23 | } 24 | return true; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/KHEAPSORT.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class KHEAPSORT { 13 | // import heapq 14 | // heap is data structure used for priority queue 15 | // pq O(log n) to pull off lowest priority item 16 | // heap is a type of binary tree 17 | // every node its value smaller than everything below it 18 | // priority queue in java is least-value first (at head) 19 | 20 | public static ArrayList kheapsort(ArrayList arr, int k) { 21 | PriorityQueue heap = new PriorityQueue(); 22 | for (Integer v : arr.subList(0,k)) { 23 | heap.add(v); 24 | } 25 | 26 | ArrayList output = new ArrayList(); 27 | for (Integer x : arr) { 28 | heap.add(x); 29 | Integer popped = heap.poll(); 30 | output.add(popped); 31 | } 32 | 33 | while (!heap.isEmpty()) { 34 | output.add(heap.poll()); 35 | } 36 | 37 | return output; 38 | 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/KNAPSACK.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | import java.lang.*; 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class KNAPSACK { 14 | public static int knapsack(int capacity, int [][] items) { 15 | int weight = 0, value = 0; 16 | int n = items.length; 17 | int memo[][] = new int[n + 1][capacity + 1]; 18 | 19 | for (int i = 0; i <= n ; i++) 20 | { 21 | if (i - 1 >= 0) { 22 | weight = items[i - 1][0]; 23 | value = items[i - 1][1]; 24 | } 25 | for (int j = 0; j <= capacity; j++) 26 | { 27 | if (i == 0 || j == 0) { 28 | memo[i][j] = 0; 29 | } 30 | else if (weight < j) { 31 | memo[i][j] = Math.max(memo[i - 1][j], value + memo[i - 1][j - weight]); 32 | } 33 | else { 34 | memo[i][j] = memo [i-1][j]; 35 | } 36 | 37 | } 38 | } 39 | return memo[n][capacity]; 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/KTH.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | /* 4 | 5 | */ 6 | public class KTH { 7 | public static Integer kth(ArrayList arr, int k) { 8 | int pivot = arr.get(0); 9 | ArrayList below, above; 10 | below = new ArrayList(arr.size()); 11 | above = new ArrayList(arr.size()); 12 | for (Integer x : arr) { 13 | if (x < pivot) { 14 | below.add(x); 15 | } else if (x > pivot) { 16 | above.add(x); 17 | } 18 | } 19 | 20 | int num_less = below.size(); 21 | int num_lessoreq = arr.size() - above.size(); 22 | if (k < num_less) { 23 | return kth(below, k); 24 | } else if (k >= num_lessoreq) { 25 | return kth(above, k); 26 | } else { 27 | return pivot; 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/LEVENSHTEIN.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class LEVENSHTEIN { 13 | public static int levenshtein(String source, String target) { 14 | if (source.isEmpty() || target.isEmpty()) { 15 | return source.isEmpty() ? target.length() : source.length(); 16 | } else if (source.charAt(0) == target.charAt(0)) { 17 | return 1 + levenshtein(source.substring(1), target.substring(1)); 18 | } else { 19 | return 1 + Math.min(Math.min( 20 | levenshtein(source, target.substring(1)), 21 | levenshtein(source.substring(1), target.substring(1))), 22 | levenshtein(source.substring(1), target) 23 | ); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/LIS.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class LIS { 13 | public static int lis(int[] arr) { 14 | Map ends = new HashMap(100); 15 | int longest = 0; 16 | 17 | int i = 0; 18 | for (int val : arr) { 19 | 20 | ArrayList prefix_lengths = new ArrayList(100); 21 | for (int j=1; j < longest+1; j++) { 22 | if (arr[ends.get(j)] < val) { 23 | prefix_lengths.add(j); 24 | } 25 | } 26 | 27 | int length = !prefix_lengths.isEmpty() ? Collections.max(prefix_lengths) : 0; 28 | 29 | if (length == longest || val < arr[ends.get(length+1)]) { 30 | ends.put(length+1, i); 31 | longest = length + 1; 32 | } 33 | 34 | i++; 35 | } 36 | return longest; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/LONGEST_COMMON_SUBSEQUENCE.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class LONGEST_COMMON_SUBSEQUENCE { 14 | public static String longest_common_subsequence(String a, String b) { 15 | if (a.isEmpty() || b.isEmpty()) { 16 | return ""; 17 | } else if (a.charAt(0) == b.charAt(0)) { 18 | return a.charAt(0) + longest_common_subsequence(a.substring(1), b); 19 | } else { 20 | String fst = longest_common_subsequence(a, b.substring(1)); 21 | String snd = longest_common_subsequence(a.substring(1), b); 22 | return fst.length() >= snd.length() ? fst : snd; 23 | } 24 | 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/MAX_SUBLIST_SUM.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class MAX_SUBLIST_SUM { 14 | public static int max_sublist_sum(int[] arr) { 15 | int max_ending_here = 0; 16 | int max_so_far = 0; 17 | 18 | for (int x : arr) { 19 | max_ending_here = max_ending_here + x; 20 | max_so_far = Math.max(max_so_far, max_ending_here); 21 | } 22 | 23 | return max_so_far; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/NEXT_PALINDROME.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | import java.lang.Math.*; 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class NEXT_PALINDROME { 14 | public static String next_palindrome(int[] digit_list) { 15 | int high_mid = Math.floorDiv(digit_list.length, 2); 16 | int low_mid = Math.floorDiv(digit_list.length - 1, 2); 17 | 18 | while (high_mid < digit_list.length && low_mid >= 0) { 19 | if (digit_list[high_mid] == 9) { 20 | digit_list[high_mid] = 0; 21 | digit_list[low_mid] = 0; 22 | high_mid += 1; 23 | low_mid -= 1; 24 | } else { 25 | digit_list[high_mid] += 1; 26 | if (low_mid != high_mid) { 27 | digit_list[low_mid] += 1; 28 | } 29 | return Arrays.toString(digit_list); 30 | } 31 | } 32 | 33 | ArrayList otherwise = new ArrayList(); 34 | otherwise.add(1); 35 | otherwise.addAll(Collections.nCopies(digit_list.length, 0)); 36 | otherwise.add(1); 37 | 38 | return String.valueOf(otherwise); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/PASCAL.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class PASCAL { 14 | public static ArrayList> pascal(int n) { 15 | ArrayList> rows = new ArrayList>(); 16 | ArrayList init = new ArrayList(); 17 | init.add(1); 18 | rows.add(init); 19 | 20 | for (int r=1; r row = new ArrayList(); 22 | for (int c=0; c 0) { 25 | upleft = rows.get(r-1).get(c-1); 26 | } else { 27 | upleft = 0; 28 | } 29 | if (c < r) { 30 | upright = rows.get(r-1).get(c); 31 | } else { 32 | upright = 0; 33 | } 34 | row.add(upleft+upright); 35 | } 36 | rows.add(row); 37 | } 38 | 39 | return rows; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/POSSIBLE_CHANGE.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | /* 4 | * To change this template, choose Tools | Templates 5 | * and open the template in the editor. 6 | */ 7 | 8 | /** 9 | * 10 | * @author derricklin 11 | */ 12 | public class POSSIBLE_CHANGE { 13 | public static int possible_change(int[] coins, int total) { 14 | if (total == 0) { 15 | return 1; 16 | } 17 | if (total < 0) { 18 | return 0; 19 | } 20 | 21 | int first = coins[0]; 22 | int[] rest = Arrays.copyOfRange(coins, 1, coins.length); 23 | return possible_change(coins, total-first) + possible_change(rest, total); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/POWERSET.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class POWERSET { 14 | public static ArrayList powerset(ArrayList arr) { 15 | if (!arr.isEmpty()) { 16 | Object first = arr.get(0); 17 | arr.remove(0); 18 | ArrayList rest = arr; 19 | ArrayList rest_subsets = powerset(rest); 20 | 21 | ArrayList output = new ArrayList(100); 22 | ArrayList to_add = new ArrayList(100); 23 | to_add.add(first); 24 | for (ArrayList subset : rest_subsets) { 25 | to_add.addAll(subset); 26 | } 27 | output.add(to_add); 28 | 29 | return output; 30 | } else { 31 | ArrayList empty_set = new ArrayList(); 32 | empty_set.add(new ArrayList()); 33 | return empty_set; 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/QUICKSORT.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class QUICKSORT { 14 | public static ArrayList quicksort(ArrayList arr) { 15 | if (arr.isEmpty()) { 16 | return new ArrayList(); 17 | } 18 | 19 | Integer pivot = arr.get(0); 20 | ArrayList lesser = new ArrayList(); 21 | ArrayList greater = new ArrayList(); 22 | 23 | for (Integer x : arr.subList(1, arr.size())) { 24 | if (x < pivot) { 25 | lesser.add(x); 26 | } else if (x > pivot) { 27 | greater.add(x); 28 | } 29 | } 30 | ArrayList middle = new ArrayList(); 31 | middle.add(pivot); 32 | lesser = quicksort(lesser); 33 | greater = quicksort(greater); 34 | middle.addAll(greater); 35 | lesser.addAll(middle); 36 | return lesser; 37 | 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/REVERSE_LINKED_LIST.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class REVERSE_LINKED_LIST { 14 | public static Node reverse_linked_list(Node node) { 15 | Node prevnode = null; 16 | Node nextnode; 17 | while (node != null) { 18 | nextnode = node.getSuccessor(); 19 | node.setSuccessor(prevnode); 20 | node = nextnode; 21 | } 22 | return prevnode; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/RPN_EVAL.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | import java.util.function.BinaryOperator; 4 | 5 | /* 6 | * To change this template, choose Tools | Templates 7 | * and open the template in the editor. 8 | */ 9 | 10 | /** 11 | * 12 | * @author derricklin 13 | */ 14 | public class RPN_EVAL { 15 | public static Double rpn_eval(ArrayList tokens) { 16 | Map> op = new HashMap>(); 17 | op.put("+", (a, b) -> a + b); 18 | op.put("-", (a, b) -> a - b); 19 | op.put("*", (a, b) -> a * b); 20 | op.put("/", (a, b) -> a / b); 21 | 22 | 23 | Stack stack = new Stack(); 24 | 25 | for (Object token : tokens) { 26 | if (Double.class.isInstance(token)) { 27 | stack.push((Double) token); 28 | } else { 29 | token = (String) token; 30 | Double a = (Double) stack.pop(); 31 | Double b = (Double) stack.pop(); 32 | Double c = 0.0; 33 | BinaryOperator bin_op = op.get(token); 34 | c = bin_op.apply(a,b); 35 | stack.push(c); 36 | } 37 | } 38 | 39 | return (Double) stack.pop(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/SHUNTING_YARD.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class SHUNTING_YARD { 14 | public static List shunting_yard(ArrayList tokens) { 15 | Map precedence = new HashMap(); 16 | precedence.put("+",1); 17 | precedence.put("-",1); 18 | precedence.put("*",2); 19 | precedence.put("/",2); 20 | 21 | ArrayList rpntokens = new ArrayList(100); 22 | ArrayDeque opstack = new ArrayDeque(); 23 | 24 | for (Object token: tokens) { 25 | if (Integer.class.isInstance(token)) { 26 | // cover also Double case I guess? 27 | rpntokens.add((Integer) token); 28 | } else { 29 | String operator = (String) token; 30 | while (!opstack.isEmpty() && precedence.get(operator) <= precedence.get(opstack.getLast())) { 31 | rpntokens.add(opstack.pop()); 32 | } 33 | } 34 | } 35 | 36 | while (!opstack.isEmpty()) { 37 | rpntokens.add(opstack.pop()); 38 | } 39 | 40 | return rpntokens; 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/SIEVE.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class SIEVE { 14 | 15 | public static boolean all(ArrayList arr) { 16 | for (boolean value : arr) { 17 | if (!value) { return false; } 18 | } 19 | return true; 20 | } 21 | 22 | public static boolean any(ArrayList arr) { 23 | for (boolean value: arr) { 24 | if (value) { return true; } 25 | } 26 | return false; 27 | } 28 | 29 | public static ArrayList list_comp(int n, ArrayList primes) { 30 | ArrayList built_comprehension = new ArrayList(); 31 | for (Integer p : primes) { 32 | built_comprehension.add(n % p > 0); 33 | } 34 | return built_comprehension; 35 | } 36 | 37 | 38 | public static ArrayList sieve(Integer max) { 39 | ArrayList primes = new ArrayList(); 40 | for (int n=2; n epsilon) { 17 | approx = 0.5d * (approx + x / approx); 18 | } 19 | return approx; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/SUBSEQUENCES.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class SUBSEQUENCES { 14 | public static ArrayList subsequences(int a, int b, int k) { 15 | if (k == 0) { 16 | return new ArrayList(); 17 | } 18 | 19 | ArrayList ret = new ArrayList(50); 20 | for (int i=a; i topological_ordering (List directedGraph) { 6 | ArrayList orderedNodes = new ArrayList(); 7 | for (Node node : directedGraph) { 8 | if (node.getPredecessors().isEmpty()) { 9 | orderedNodes.add(node); 10 | } 11 | } 12 | 13 | int listSize = orderedNodes.size(); 14 | for (int i = 0; i < listSize; i++) { 15 | Node node = orderedNodes.get(i); 16 | for (Node nextNode : node.getSuccessors()) { 17 | if (orderedNodes.containsAll(nextNode.getSuccessors()) && !orderedNodes.contains(nextNode)) { 18 | orderedNodes.add(nextNode); 19 | listSize++; 20 | } 21 | } 22 | } 23 | return orderedNodes; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/TO_BASE.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class TO_BASE { 14 | public static String to_base(int num, int b) { 15 | String result = ""; 16 | String alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 17 | int i; 18 | while (num > 0) { 19 | i = num % b; 20 | num = num / b; // floor division? 21 | result = result + String.valueOf(alphabet.charAt(i)); 22 | } 23 | 24 | return result; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/WRAP.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class WRAP { 14 | public static void main(String[] args) { 15 | System.out.println("abc".lastIndexOf("c",30)); 16 | } 17 | 18 | public static ArrayList wrap(String text, int cols) { 19 | ArrayList lines = new ArrayList(); 20 | 21 | String line; 22 | while (text.length() > cols) { 23 | int end = text.lastIndexOf(" ", cols); // off by one? 24 | if (end == -1) { 25 | end = cols; 26 | } 27 | line = text.substring(0,end); 28 | text = text.substring(end); 29 | lines.add(line); 30 | } 31 | 32 | return lines; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/WeightedEdge.java: -------------------------------------------------------------------------------- 1 | package java_programs; 2 | import java.util.*; 3 | 4 | public class WeightedEdge implements Comparable{ 5 | public Node node1; 6 | public Node node2; 7 | public int weight; 8 | 9 | public WeightedEdge () { 10 | node1 = null; 11 | node2 = null; 12 | weight = 0; 13 | } 14 | public WeightedEdge (Node node1, Node node2, int weight) { 15 | this.node1 = node1; 16 | this.node2 = node2; 17 | this.weight = weight; 18 | } 19 | public int compareTo(WeightedEdge compareNode) { 20 | int compareWeight= ((WeightedEdge) compareNode).weight; 21 | 22 | //ascending order 23 | return this.weight - compareWeight; 24 | 25 | //descending order 26 | //return compareWeight - this.weight; 27 | } 28 | } -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/extra/BINARY_SEARCH.java: -------------------------------------------------------------------------------- 1 | //Corrected package name from quixey to java_programs.extra; 2 | //package quixey; 3 | package java_programs.extra; 4 | 5 | import java.util.*; 6 | 7 | public class BINARY_SEARCH { 8 | public static int findFirstInSorted(int[] arr, int x) { 9 | int lo = 0; 10 | int hi = arr.length; 11 | 12 | while (lo <= hi) { 13 | int mid = (lo + hi) / 2; 14 | 15 | if (x == arr[mid] && (mid == 0 || x != arr[mid - 1])) { 16 | return mid; 17 | } else if (x <= arr[mid]) { 18 | hi = mid; 19 | } else { 20 | lo = mid + 1; 21 | } 22 | } 23 | return -1; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/extra/MAXIMUM_WEIGHTED_SUBSET.java: -------------------------------------------------------------------------------- 1 | //Corrected package name from quixey to java_programs.extra; 2 | //package quixey; 3 | package java_programs.extra; 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class MAXIMUM_WEIGHTED_SUBSET { 14 | public static int maxSubsetWeight(int[] weights, int bound) { 15 | if (weights.length == 0) { 16 | return 0; 17 | } 18 | int[][] maxWeight = new int[weights.length][bound]; 19 | for (int w = 0; w <= bound; w++) 20 | maxWeight[0][w] = weights[0] <= w ? weights[0] : 0; 21 | 22 | for (int i = 1; i < weights.length; i++) { 23 | for (int w = 0; w <= bound; w++) { 24 | if (weights[i] > w) { 25 | maxWeight[i][w] = maxWeight[i-1][w]; 26 | } else { 27 | int include = weights[i] + maxWeight[i-1][w - weights[i]]; 28 | int exclude = maxWeight[i-1][w]; 29 | maxWeight[i][w] = Math.max(include, exclude); 30 | } 31 | } 32 | } 33 | return maxWeight[weights.length-1][bound]; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/extra/MODULO_INVERSE.java: -------------------------------------------------------------------------------- 1 | //Corrected package name from quixey to java_programs.extra; 2 | //package quixey; 3 | package java_programs.extra; 4 | /* 5 | * To change this template, choose Tools | Templates 6 | * and open the template in the editor. 7 | */ 8 | 9 | /** 10 | * 11 | * @author derricklin 12 | */ 13 | public class MODULO_INVERSE { 14 | public static int inverse(int base, int mod) { 15 | if(base == 1) { 16 | return base; 17 | } else { 18 | int coeff = base - inverse(mod % base, base); 19 | return (coeff*mod) / base; 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/java_programs/extra/NESTED_PARENS.java: -------------------------------------------------------------------------------- 1 | //Corrected package name from quixey to java_programs.extra; 2 | //package quixey; 3 | package java_programs.extra; 4 | import java.util.*; 5 | 6 | public class NESTED_PARENS { 7 | public static void main(String[] args) { 8 | Scanner in = new Scanner(System.in); 9 | String S = in.next(); 10 | int[] num = new int[S.length()]; 11 | for(int i=0; i>> bitcount(127) 23 | 7 24 | >>> bitcount(128) 25 | 1 26 | """ 27 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/breadth_first_search.py: -------------------------------------------------------------------------------- 1 | 2 | from collections import deque as Queue 3 | 4 | def breadth_first_search(startnode, goalnode): 5 | queue = Queue() 6 | queue.append(startnode) 7 | 8 | nodesseen = set() 9 | nodesseen.add(startnode) 10 | 11 | while True: 12 | node = queue.popleft() 13 | 14 | if node is goalnode: 15 | return True 16 | else: 17 | queue.extend(node for node in node.successors if node not in nodesseen) 18 | nodesseen.update(node.successors) 19 | 20 | return False 21 | 22 | 23 | 24 | """ 25 | Breadth-First Search 26 | 27 | 28 | Input: 29 | startnode: A digraph node 30 | goalnode: A digraph node 31 | 32 | Output: 33 | Whether goalnode is reachable from startnode 34 | """ 35 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/bucketsort.py: -------------------------------------------------------------------------------- 1 | def bucketsort(arr, k): 2 | counts = [0] * k 3 | for x in arr: 4 | counts[x] += 1 5 | 6 | sorted_arr = [] 7 | for i, count in enumerate(arr): 8 | sorted_arr.extend([i] * count) 9 | 10 | return sorted_arr 11 | 12 | 13 | 14 | """ 15 | Bucket Sort 16 | 17 | 18 | Input: 19 | arr: A list of small ints 20 | k: Upper bound of the size of the ints in arr (not inclusive) 21 | 22 | Precondition: 23 | all(isinstance(x, int) and 0 <= x < k for x in arr) 24 | 25 | Output: 26 | The elements of arr in sorted order 27 | """ 28 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/depth_first_search.py: -------------------------------------------------------------------------------- 1 | def depth_first_search(startnode, goalnode): 2 | nodesvisited = set() 3 | 4 | def search_from(node): 5 | if node in nodesvisited: 6 | return False 7 | elif node is goalnode: 8 | return True 9 | else: 10 | return any( 11 | search_from(nextnode) for nextnode in node.successors 12 | ) 13 | 14 | return search_from(startnode) 15 | 16 | 17 | 18 | """ 19 | Depth-first Search 20 | 21 | 22 | Input: 23 | startnode: A digraph node 24 | goalnode: A digraph node 25 | 26 | Output: 27 | Whether goalnode is reachable from startnode 28 | """ 29 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/detect_cycle.py: -------------------------------------------------------------------------------- 1 | def detect_cycle(node): 2 | hare = tortoise = node 3 | 4 | while True: 5 | if hare.successor is None: 6 | return False 7 | 8 | tortoise = tortoise.successor 9 | hare = hare.successor.successor 10 | 11 | if hare is tortoise: 12 | return True 13 | 14 | 15 | 16 | """ 17 | Linked List Cycle Detection 18 | tortoise-hare 19 | 20 | Implements the tortoise-and-hare method of cycle detection. 21 | 22 | Input: 23 | node: The head node of a linked list 24 | 25 | Output: 26 | Whether the linked list is cyclic 27 | """ 28 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/find_first_in_sorted.py: -------------------------------------------------------------------------------- 1 | def find_first_in_sorted(arr, x): 2 | lo = 0 3 | hi = len(arr) 4 | 5 | while lo <= hi: 6 | mid = (lo + hi) // 2 7 | 8 | if x == arr[mid] and (mid == 0 or x != arr[mid - 1]): 9 | return mid 10 | 11 | elif x <= arr[mid]: 12 | hi = mid 13 | 14 | else: 15 | lo = mid + 1 16 | 17 | return -1 18 | 19 | 20 | """ 21 | Fancy Binary Search 22 | fancy-binsearch 23 | 24 | 25 | Input: 26 | arr: A sorted list of ints 27 | x: A value to find 28 | 29 | Output: 30 | The lowest index i such that arr[i] == x, or -1 if x not in arr 31 | 32 | Example: 33 | >>> find_first_in_sorted([3, 4, 5, 5, 5, 5, 6], 5) 34 | 2 35 | """ 36 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/find_in_sorted.py: -------------------------------------------------------------------------------- 1 | def find_in_sorted(arr, x): 2 | def binsearch(start, end): 3 | if start == end: 4 | return -1 5 | mid = start + (end - start) // 2 6 | if x < arr[mid]: 7 | return binsearch(start, mid) 8 | elif x > arr[mid]: 9 | return binsearch(mid, end) 10 | else: 11 | return mid 12 | 13 | return binsearch(0, len(arr)) 14 | 15 | 16 | 17 | 18 | """ 19 | Binary Search 20 | 21 | Input: 22 | arr: A sorted list of ints 23 | x: A value to find 24 | 25 | Output: 26 | An index i such that arr[i] == x, or -1 if x not in arr 27 | 28 | Example: 29 | >>> find_in_sorted([3, 4, 5, 5, 5, 5, 6], 5) 30 | 3 31 | """ 32 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/flatten.py: -------------------------------------------------------------------------------- 1 | def flatten(arr): 2 | for x in arr: 3 | if isinstance(x, list): 4 | for y in flatten(x): 5 | yield y 6 | else: 7 | yield flatten(x) 8 | 9 | 10 | 11 | """ 12 | Flatten 13 | 14 | Flattens a nested list data structure into a single list. 15 | 16 | 17 | Input: 18 | arr: A list 19 | 20 | Precondition: 21 | The input has no list containment cycles 22 | 23 | Output: 24 | A generator for the input's non-list objects 25 | 26 | Example: 27 | >>> list(flatten([[1, [], [2, 3]], [[4]], 5])) 28 | [1, 2, 3, 4, 5] 29 | """ 30 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/gcd.py: -------------------------------------------------------------------------------- 1 | def gcd(a, b): 2 | if b == 0: 3 | return a 4 | else: 5 | return gcd(a % b, b) 6 | 7 | 8 | """ 9 | Input: 10 | a: A nonnegative int 11 | b: A nonnegative int 12 | 13 | 14 | Greatest Common Divisor 15 | 16 | Precondition: 17 | isinstance(a, int) and isinstance(b, int) 18 | 19 | Output: 20 | The greatest int that divides evenly into a and b 21 | 22 | Example: 23 | >>> gcd(35, 21) 24 | 7 25 | 26 | """ 27 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/get_factors.py: -------------------------------------------------------------------------------- 1 | 2 | def get_factors(n): 3 | if n == 1: 4 | return [] 5 | 6 | for i in range(2, int(n ** 0.5) + 1): 7 | if n % i == 0: 8 | return [i] + get_factors(n // i) 9 | 10 | return [] 11 | 12 | 13 | """ 14 | Prime Factorization 15 | 16 | 17 | Factors an int using naive trial division. 18 | 19 | Input: 20 | n: An int to factor 21 | 22 | Output: 23 | A list of the prime factors of n in sorted order with repetition 24 | 25 | Precondition: 26 | n >= 1 27 | 28 | Examples: 29 | >>> get_factors(1) 30 | [] 31 | >>> get_factors(100) 32 | [2, 2, 5, 5] 33 | >>> get_factors(101) 34 | [101] 35 | """ 36 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/hanoi.py: -------------------------------------------------------------------------------- 1 | def hanoi(height, start=1, end=3): 2 | steps = [] 3 | if height > 0: 4 | helper = ({1, 2, 3} - {start} - {end}).pop() 5 | steps.extend(hanoi(height - 1, start, helper)) 6 | steps.append((start, helper)) 7 | steps.extend(hanoi(height - 1, helper, end)) 8 | 9 | return steps 10 | 11 | 12 | """ 13 | Towers of Hanoi 14 | hanoi 15 | 16 | 17 | An algorithm for solving the Towers of Hanoi puzzle. Three pegs exist, with a stack of differently-sized 18 | disks beginning on one peg, ordered from smallest on top to largest on bottom. The goal is to move the 19 | entire stack to a different peg via a series of steps. Each step must move a single disk from one peg to 20 | another. At no point may a disk be placed on top of another smaller disk. 21 | 22 | Input: 23 | height: The height of the initial stack of disks. 24 | start: The numbered peg where the initial stack resides. 25 | end: The numbered peg which the stack must be moved onto. 26 | 27 | Preconditions: 28 | height >= 0 29 | start in (1, 2, 3) 30 | end in (1, 2, 3) 31 | 32 | Output: 33 | An ordered list of pairs (a, b) representing the shortest series of steps (each step moving 34 | the top disk from peg a to peg b) that solves the puzzle. 35 | """ 36 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/is_valid_parenthesization.py: -------------------------------------------------------------------------------- 1 | 2 | def is_valid_parenthesization(parens): 3 | depth = 0 4 | for paren in parens: 5 | if paren == '(': 6 | depth += 1 7 | else: 8 | depth -= 1 9 | if depth < 0: 10 | return False 11 | 12 | return True 13 | 14 | 15 | """ 16 | Nested Parens 17 | Input: 18 | parens: A string of parentheses 19 | 20 | Precondition: 21 | all(paren in '()' for paren in parens) 22 | 23 | Output: 24 | Whether the parentheses are properly nested 25 | 26 | Examples: 27 | >>> is_valid_parenthesization('((()()))()') 28 | True 29 | >>> is_valid_parenthesization(')()(') 30 | False 31 | """ 32 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/kheapsort.py: -------------------------------------------------------------------------------- 1 | def kheapsort(arr, k): 2 | import heapq 3 | 4 | heap = arr[:k] 5 | heapq.heapify(heap) 6 | 7 | for x in arr: 8 | yield heapq.heappushpop(heap, x) 9 | 10 | while heap: 11 | yield heapq.heappop(heap) 12 | 13 | 14 | """ 15 | K-Heapsort 16 | k-heapsort 17 | 18 | Sorts an almost-sorted array, wherein every element is no more than k units from its sorted position, in O(n log k) time. 19 | 20 | Input: 21 | arr: A list of ints 22 | k: an int indicating the maximum displacement of an element in arr from its final sorted location 23 | 24 | Preconditions: 25 | The elements of arr are unique. 26 | Each element in arr is at most k places from its sorted position. 27 | 28 | Output: 29 | A generator that yields the elements of arr in sorted order 30 | 31 | Example: 32 | >>> list(kheapsort([3, 2, 1, 5, 4], 2)) 33 | [1, 2, 3, 4, 5] 34 | >>> list(kheapsort([5, 4, 3, 2, 1], 4)) 35 | [1, 2, 3, 4, 5] 36 | >>> list(kheapsort([1, 2, 3, 4, 5], 0)) 37 | [1, 2, 3, 4, 5] 38 | """ 39 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/knapsack.py: -------------------------------------------------------------------------------- 1 | 2 | def knapsack(capacity, items): 3 | from collections import defaultdict 4 | memo = defaultdict(int) 5 | 6 | for i in range(1, len(items) + 1): 7 | weight, value = items[i - 1] 8 | 9 | for j in range(1, capacity + 1): 10 | memo[i, j] = memo[i - 1, j] 11 | 12 | if weight < j: 13 | memo[i, j] = max( 14 | memo[i, j], 15 | value + memo[i - 1, j - weight] 16 | ) 17 | 18 | return memo[len(items), capacity] 19 | 20 | """ 21 | Knapsack 22 | knapsack 23 | 24 | You have a knapsack that can hold a maximum weight. You are given a selection of items, each with a weight and a value. You may 25 | choose to take or leave each item, but you must choose items whose total weight does not exceed the capacity of your knapsack. 26 | 27 | Input: 28 | capacity: Max weight the knapsack can hold, an int 29 | items: The items to choose from, a list of (weight, value) pairs 30 | 31 | Output: 32 | The maximum total value of any combination of items that the knapsack can hold 33 | 34 | Example: 35 | >>> knapsack(100, [(60, 10), (50, 8), (20, 4), (20, 4), (8, 3), (3, 2)]) 36 | 19 37 | """ 38 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/kth.py: -------------------------------------------------------------------------------- 1 | def kth(arr, k): 2 | pivot = arr[0] 3 | below = [x for x in arr if x < pivot] 4 | above = [x for x in arr if x > pivot] 5 | 6 | num_less = len(below) 7 | num_lessoreq = len(arr) - len(above) 8 | 9 | if k < num_less: 10 | return kth(below, k) 11 | elif k >= num_lessoreq: 12 | return kth(above, k) 13 | else: 14 | return pivot 15 | 16 | 17 | 18 | """ 19 | QuickSelect 20 | 21 | This is an efficient equivalent to sorted(arr)[k]. 22 | 23 | Input: 24 | arr: A list of ints 25 | k: An int 26 | 27 | Precondition: 28 | 0 <= k < len(arr) 29 | 30 | Output: 31 | The kth-lowest element of arr (0-based) 32 | """ 33 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/lcs_length.py: -------------------------------------------------------------------------------- 1 | def lcs_length(s, t): 2 | from collections import Counter 3 | 4 | dp = Counter() 5 | 6 | for i in range(len(s)): 7 | for j in range(len(t)): 8 | if s[i] == t[j]: 9 | dp[i, j] = dp[i - 1, j] + 1 10 | 11 | return max(dp.values()) if dp else 0 12 | 13 | 14 | 15 | """ 16 | Longest Common Substring 17 | longest-common-substring 18 | 19 | Input: 20 | s: a string 21 | t: a string 22 | 23 | Output: 24 | Length of the longest substring common to s and t 25 | 26 | Example: 27 | >>> lcs_length('witch', 'sandwich') 28 | 2 29 | >>> lcs_length('meow', 'homeowner') 30 | 4 31 | """ 32 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/levenshtein.py: -------------------------------------------------------------------------------- 1 | def levenshtein(source, target): 2 | if source == '' or target == '': 3 | return len(source) or len(target) 4 | 5 | elif source[0] == target[0]: 6 | return 1 + levenshtein(source[1:], target[1:]) 7 | 8 | else: 9 | return 1 + min( 10 | levenshtein(source, target[1:]), 11 | levenshtein(source[1:], target[1:]), 12 | levenshtein(source[1:], target) 13 | ) 14 | 15 | """ 16 | Levenshtein Distance 17 | 18 | 19 | Calculates the Levenshtein distance between two strings. The Levenshtein distance is defined as the minimum amount of single-character edits (either removing a character, adding a character, or changing a character) necessary to transform a source string into a target string. 20 | 21 | Input: 22 | source: The string you begin with. 23 | target: The string to transform into. 24 | 25 | Output: 26 | The Levenshtein distance between the source and target. 27 | 28 | Example: 29 | electron can be transformed into neutron by removing the e, turning the l into n, and turning the c into u. 30 | >>> levenshtein(electron, neutron) 31 | 3 32 | """ 33 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/lis.py: -------------------------------------------------------------------------------- 1 | 2 | def lis(arr): 3 | ends = {} 4 | longest = 0 5 | 6 | for i, val in enumerate(arr): 7 | 8 | prefix_lengths = [j for j in range(1, longest + 1) if arr[ends[j]] < val] 9 | 10 | length = max(prefix_lengths) if prefix_lengths else 0 11 | 12 | if length == longest or val < arr[ends[length + 1]]: 13 | ends[length + 1] = i 14 | longest = length + 1 15 | 16 | return longest 17 | 18 | 19 | 20 | """ 21 | Longest Increasing Subsequence 22 | longest-increasing-subsequence 23 | 24 | 25 | Input: 26 | arr: A sequence of ints 27 | 28 | Precondition: 29 | The ints in arr are unique 30 | 31 | Output: 32 | The length of the longest monotonically increasing subsequence of arr 33 | 34 | Example: 35 | >>> lis([4, 1, 5, 3, 7, 6, 2]) 36 | 3 37 | """ 38 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/longest_common_subsequence.py: -------------------------------------------------------------------------------- 1 | def longest_common_subsequence(a, b): 2 | if not a or not b: 3 | return '' 4 | 5 | elif a[0] == b[0]: 6 | return a[0] + longest_common_subsequence(a[1:], b) 7 | 8 | else: 9 | return max( 10 | longest_common_subsequence(a, b[1:]), 11 | longest_common_subsequence(a[1:], b), 12 | key=len 13 | ) 14 | 15 | 16 | 17 | """ 18 | Longest Common Subsequence 19 | 20 | 21 | Calculates the longest subsequence common to the two input strings. (A subsequence is any sequence of letters in the same order 22 | they appear in the string, possibly skipping letters in between.) 23 | 24 | Input: 25 | a: The first string to consider. 26 | b: The second string to consider. 27 | 28 | Output: 29 | The longest string which is a subsequence of both strings. (If multiple subsequences of equal length exist, either is OK.) 30 | 31 | Example: 32 | >>> longest_common_subsequence('headache', 'pentadactyl') 33 | 'eadac' 34 | """ 35 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/max_sublist_sum.py: -------------------------------------------------------------------------------- 1 | 2 | def max_sublist_sum(arr): 3 | max_ending_here = 0 4 | max_so_far = 0 5 | 6 | for x in arr: 7 | max_ending_here = max_ending_here + x 8 | max_so_far = max(max_so_far, max_ending_here) 9 | 10 | return max_so_far 11 | 12 | 13 | """ 14 | Max Sublist Sum 15 | max-sublist-sum 16 | 17 | Efficient equivalent to max(sum(arr[i:j]) for 0 <= i <= j <= len(arr)) 18 | 19 | Algorithm source: WordAligned.org by Thomas Guest 20 | 21 | 22 | Input: 23 | arr: A list of ints 24 | 25 | Output: 26 | The maximum sublist sum 27 | 28 | Example: 29 | >>> max_sublist_sum([4, -5, 2, 1, -1, 3]) 30 | 5 31 | """ 32 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/mergesort.py: -------------------------------------------------------------------------------- 1 | 2 | def mergesort(arr): 3 | def merge(left, right): 4 | result = [] 5 | i = 0 6 | j = 0 7 | while i < len(left) and j < len(right): 8 | if left[i] <= right[j]: 9 | result.append(left[i]) 10 | i += 1 11 | else: 12 | result.append(right[j]) 13 | j += 1 14 | result.extend(left[i:] or right[j:]) 15 | return result 16 | 17 | if len(arr) == 0: 18 | return arr 19 | else: 20 | middle = len(arr) // 2 21 | left = mergesort(arr[:middle]) 22 | right = mergesort(arr[middle:]) 23 | return merge(left, right) 24 | 25 | 26 | 27 | """ 28 | Merge Sort 29 | 30 | 31 | Input: 32 | arr: A list of ints 33 | 34 | Output: 35 | The elements of arr in sorted order 36 | """ 37 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/minimum_spanning_tree.py: -------------------------------------------------------------------------------- 1 | 2 | def minimum_spanning_tree(weight_by_edge): 3 | group_by_node = {} 4 | mst_edges = set() 5 | 6 | for edge in sorted(weight_by_edge, key=weight_by_edge.__getitem__): 7 | u, v = edge 8 | if group_by_node.setdefault(u, {u}) != group_by_node.setdefault(v, {v}): 9 | mst_edges.add(edge) 10 | group_by_node[u].update(group_by_node[v]) 11 | for node in group_by_node[v]: 12 | group_by_node[node].update(group_by_node[u]) 13 | 14 | return mst_edges 15 | 16 | 17 | 18 | 19 | """ 20 | Minimum Spanning Tree 21 | 22 | 23 | Kruskal's algorithm implementation. 24 | 25 | Input: 26 | weight_by_edge: A dict of the form {(u, v): weight} for every undirected graph edge {u, v} 27 | 28 | Precondition: 29 | The input graph is connected 30 | 31 | Output: 32 | A set of edges that connects all the vertices of the input graph and has the least possible total weight. 33 | 34 | Example: 35 | >>> minimum_spanning_tree({ 36 | ... (1, 2): 10, 37 | ... (2, 3): 15, 38 | ... (3, 4): 10, 39 | ... (1, 4): 10 40 | ... }) 41 | {(1, 2), (3, 4), (1, 4)} 42 | """ 43 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/minimum_spanning_tree_test.py: -------------------------------------------------------------------------------- 1 | from .minimum_spanning_tree import minimum_spanning_tree 2 | 3 | 4 | """ 5 | Driver to test minimum spanning tree 6 | """ 7 | def main(): 8 | # Case 1: Simple tree input. 9 | # Output: (1, 2) (3, 4) (1, 4) 10 | result = minimum_spanning_tree({ 11 | (1, 2): 10, 12 | (2, 3): 15, 13 | (3, 4): 10, 14 | (1, 4): 10}) 15 | for edge in result: 16 | print(edge), 17 | print() 18 | 19 | # Case 2: Strongly connected tree input. 20 | # Output: (2, 5) (1, 3) (2, 3) (4, 6) (3, 6) 21 | result = minimum_spanning_tree({ 22 | (1, 2): 6, 23 | (1, 3): 1, 24 | (1, 4): 5, 25 | (2, 3): 5, 26 | (2, 5): 3, 27 | (3, 4): 5, 28 | (3, 5): 6, 29 | (3, 6): 4, 30 | (4, 6): 2, 31 | (5, 6): 6}) 32 | for edge in result: 33 | print(edge), 34 | print() 35 | 36 | # Case 3: Minimum spanning tree input. 37 | # Output: (1, 2) (1, 3) (2, 4) 38 | result = minimum_spanning_tree({ 39 | (1, 2): 6, 40 | (1, 3): 1, 41 | (2, 4): 2}) 42 | for edge in result: 43 | print(edge), 44 | print() 45 | 46 | 47 | if __name__ == "__main__": 48 | main() 49 | 50 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/next_palindrome.py: -------------------------------------------------------------------------------- 1 | def next_palindrome(digit_list): 2 | high_mid = len(digit_list) // 2 3 | low_mid = (len(digit_list) - 1) // 2 4 | while high_mid < len(digit_list) and low_mid >= 0: 5 | if digit_list[high_mid] == 9: 6 | digit_list[high_mid] = 0 7 | digit_list[low_mid] = 0 8 | high_mid += 1 9 | low_mid -= 1 10 | else: 11 | digit_list[high_mid] += 1 12 | if low_mid != high_mid: 13 | digit_list[low_mid] += 1 14 | return digit_list 15 | return [1] + (len(digit_list)) * [0] + [1] 16 | 17 | """ 18 | Finds the next palindromic integer when given the current integer 19 | Integers are stored as arrays of base 10 digits from most significant to least significant 20 | 21 | Input: 22 | digit_list: An array representing the current palindrome 23 | 24 | Output: 25 | An array which represents the next palindrome 26 | 27 | Preconditions: 28 | The initial input array represents a palindrome 29 | 30 | Example 31 | >>> next_palindrome([1,4,9,4,1]) 32 | [1,5,0,5,1] 33 | """ 34 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/next_permutation.py: -------------------------------------------------------------------------------- 1 | 2 | def next_permutation(perm): 3 | for i in range(len(perm) - 2, -1, -1): 4 | if perm[i] < perm[i + 1]: 5 | for j in range(len(perm) - 1, i, -1): 6 | if perm[j] < perm[i]: 7 | next_perm = list(perm) 8 | next_perm[i], next_perm[j] = perm[j], perm[i] 9 | next_perm[i + 1:] = reversed(next_perm[i + 1:]) 10 | return next_perm 11 | 12 | 13 | 14 | """ 15 | Next Permutation 16 | next-perm 17 | 18 | 19 | Input: 20 | perm: A list of unique ints 21 | 22 | Precondition: 23 | perm is not sorted in reverse order 24 | 25 | Output: 26 | The lexicographically next permutation of the elements of perm 27 | 28 | Example: 29 | >>> next_permutation([3, 2, 4, 1]) 30 | [3, 4, 1, 2] 31 | """ 32 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/node.py: -------------------------------------------------------------------------------- 1 | class Node: 2 | def __init__(self, value=None, successor=None, successors=[], predecessors=[], incoming_nodes=[], outgoing_nodes=[]): 3 | self.value = value 4 | self.successor = successor 5 | self.successors = successors 6 | self.predecessors = predecessors 7 | self.incoming_nodes = incoming_nodes 8 | self.outgoing_nodes = outgoing_nodes 9 | 10 | def successor(self): 11 | return self.successor 12 | 13 | def successors(self): 14 | return self.successors 15 | 16 | def predecessors(self): 17 | return self.predecessors 18 | 19 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/pascal.py: -------------------------------------------------------------------------------- 1 | 2 | def pascal(n): 3 | rows = [[1]] 4 | for r in range(1, n): 5 | row = [] 6 | for c in range(0, r): 7 | upleft = rows[r - 1][c - 1] if c > 0 else 0 8 | upright = rows[r - 1][c] if c < r else 0 9 | row.append(upleft + upright) 10 | rows.append(row) 11 | 12 | return rows 13 | 14 | 15 | """ 16 | Pascal's Triangle 17 | pascal 18 | 19 | 20 | 21 | Input: 22 | n: The number of rows to return 23 | 24 | Precondition: 25 | n >= 1 26 | 27 | Output: 28 | The first n rows of Pascal's triangle as a list of n lists 29 | 30 | Example: 31 | >>> pascal(5) 32 | [[1], [1, 1], [1, 2, 1], [1, 3, 3, 1], [1, 4, 6, 4, 1]] 33 | """ 34 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/possible_change.py: -------------------------------------------------------------------------------- 1 | # Python 3 2 | def possible_change(coins, total): 3 | if total == 0: 4 | return 1 5 | if total < 0: 6 | return 0 7 | 8 | first, *rest = coins 9 | return possible_change(coins, total - first) + possible_change(rest, total) 10 | 11 | 12 | 13 | """ 14 | Making Change 15 | change 16 | 17 | 18 | Input: 19 | coins: A list of positive ints representing coin denominations 20 | total: An int value to make change for 21 | 22 | Output: 23 | The number of distinct ways to make change adding up to total using only coins of the given values. 24 | For example, there are exactly four distinct ways to make change for the value 11 using coins [1, 5, 10, 25]: 25 | 1. {1: 11, 5: 0, 10: 0, 25: 0} 26 | 2. {1: 6, 5: 1, 10: 0, 25: 0} 27 | 3. {1: 1, 5: 2, 10: 0, 25: 0} 28 | 4. {1: 1, 5: 0, 10: 1, 25: 0} 29 | 30 | Example: 31 | >>> possible_change([1, 5, 10, 25], 11) 32 | 4 33 | """ 34 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/powerset.py: -------------------------------------------------------------------------------- 1 | 2 | def powerset(arr): 3 | if arr: 4 | first, *rest = arr #python3 just like car and cdr (in this case anyway..) 5 | rest_subsets = powerset(rest) 6 | return [[first] + subset for subset in rest_subsets] 7 | else: 8 | return [[]] 9 | 10 | 11 | """ 12 | Power Set 13 | 14 | Input: 15 | arr: A list 16 | 17 | Precondition: 18 | arr has no duplicate elements 19 | 20 | Output: 21 | A list of lists, each representing a different subset of arr. The empty set is always a subset of arr, and arr is always a subset of arr. 22 | 23 | Example: 24 | >>> powerset(['a', 'b', 'c']) 25 | [[], ['c'], ['b'], ['b', 'c'], ['a'], ['a', 'c'], ['a', 'b'], ['a', 'b', 'c']] 26 | """ 27 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/quicksort.py: -------------------------------------------------------------------------------- 1 | def quicksort(arr): 2 | if not arr: 3 | return [] 4 | 5 | pivot = arr[0] 6 | lesser = quicksort([x for x in arr[1:] if x < pivot]) 7 | greater = quicksort([x for x in arr[1:] if x > pivot]) 8 | return lesser + [pivot] + greater 9 | 10 | """ 11 | QuickSort 12 | 13 | 14 | Input: 15 | arr: A list of ints 16 | 17 | Output: 18 | The elements of arr in sorted order 19 | """ 20 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/reverse_linked_list.py: -------------------------------------------------------------------------------- 1 | def reverse_linked_list(node): 2 | prevnode = None 3 | while node: 4 | nextnode = node.successor 5 | node.successor = prevnode 6 | node = nextnode 7 | return prevnode 8 | 9 | 10 | """ 11 | Reverse Linked List 12 | 13 | Reverses a linked list and returns the new head. 14 | 15 | Input: 16 | node: The head of a singly-linked list 17 | 18 | Precondition: 19 | The input is acyclic 20 | 21 | Side effect: 22 | Mutates the list nodes' successor pointers 23 | 24 | Output: 25 | The head of the reversed linked list 26 | """ 27 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/rpn_eval.py: -------------------------------------------------------------------------------- 1 | 2 | def rpn_eval(tokens): 3 | def op(symbol, a, b): 4 | return { 5 | '+': lambda a, b: a + b, 6 | '-': lambda a, b: a - b, 7 | '*': lambda a, b: a * b, 8 | '/': lambda a, b: a / b 9 | }[symbol](a, b) 10 | 11 | stack = [] 12 | 13 | for token in tokens: 14 | if isinstance(token, float): 15 | stack.append(token) 16 | else: 17 | a = stack.pop() 18 | b = stack.pop() 19 | stack.append( 20 | op(token, a, b) 21 | ) 22 | 23 | return stack.pop() 24 | 25 | 26 | 27 | """ 28 | Reverse Polish Notation 29 | 30 | Four-function calculator with input given in Reverse Polish Notation (RPN). 31 | 32 | Input: 33 | A list of values and operators encoded as floats and strings 34 | 35 | Precondition: 36 | all( 37 | isinstance(token, float) or token in ('+', '-', '*', '/') for token in tokens 38 | ) 39 | 40 | Example: 41 | >>> rpn_eval([3.0, 5.0, '+', 2.0, '/']) 42 | 4.0 43 | """ 44 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/shortest_path_length_test.py: -------------------------------------------------------------------------------- 1 | from .node import Node 2 | from .shortest_path_length import shortest_path_length 3 | 4 | 5 | """ 6 | Test shortest path length 7 | """ 8 | def main(): 9 | 10 | node1 = Node("1") 11 | node5 = Node("5") 12 | node4 = Node("4", None, [node5]) 13 | node3 = Node("3", None, [node4]) 14 | node2 = Node("2", None, [node1, node3, node4]) 15 | node0 = Node("0", None, [node2, node5]) 16 | 17 | length_by_edge = { 18 | (node0, node2): 3, 19 | (node0, node5): 10, 20 | (node2, node1): 1, 21 | (node2, node3): 2, 22 | (node2, node4): 4, 23 | (node3, node4): 1, 24 | (node4, node5): 1 25 | } 26 | 27 | # Case 1: One path 28 | # Output: 4 29 | result = shortest_path_length(length_by_edge, node0, node1) 30 | print(result) 31 | 32 | # Case 2: Multiple path 33 | # Output: 7 34 | result = shortest_path_length(length_by_edge, node0, node5) 35 | print(result) 36 | 37 | # Case 3: Start point is same as end point 38 | # Output: 0 39 | result = shortest_path_length(length_by_edge, node2, node2) 40 | print(result) 41 | 42 | # Case 4: Unreachable path 43 | # Output: INT_MAX 44 | result = shortest_path_length(length_by_edge, node1, node5) 45 | print(result) 46 | 47 | if __name__ == "__main__": 48 | main() 49 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/shortest_path_lengths.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | def shortest_path_lengths(n, length_by_edge): 4 | length_by_path = defaultdict(lambda: float('inf')) 5 | length_by_path.update({(i, i): 0 for i in range(n)}) 6 | length_by_path.update(length_by_edge) 7 | 8 | for k in range(n): 9 | for i in range(n): 10 | for j in range(n): 11 | length_by_path[i, j] = min( 12 | length_by_path[i, j], 13 | length_by_path[i, k] + length_by_path[j, k] 14 | ) 15 | 16 | return length_by_path 17 | 18 | 19 | """ 20 | All Shortest Paths 21 | floyd-warshall 22 | 23 | Floyd-Warshall algorithm implementation. 24 | 25 | Calculates the length of the shortest path connecting every ordered pair of nodes in a directed graph. 26 | 27 | 28 | 29 | Input: 30 | n: The number of nodes in the graph. The nodes are assumed to have ids 0..n-1 31 | length_by_edge: A dict containing edge length keyed by an ordered pair of node ids 32 | 33 | Precondition: 34 | There are no negative-length cycles in the input graph 35 | 36 | Output: 37 | A dict containing shortest path length keyed by an ordered pair of node ids 38 | """ 39 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/shunting_yard.py: -------------------------------------------------------------------------------- 1 | 2 | def shunting_yard(tokens): 3 | precedence = { 4 | '+': 1, 5 | '-': 1, 6 | '*': 2, 7 | '/': 2 8 | } 9 | 10 | rpntokens = [] 11 | opstack = [] 12 | for token in tokens: 13 | if isinstance(token, int): 14 | rpntokens.append(token) 15 | else: 16 | while opstack and precedence[token] <= precedence[opstack[-1]]: 17 | rpntokens.append(opstack.pop()) 18 | 19 | while opstack: 20 | rpntokens.append(opstack.pop()) 21 | 22 | return rpntokens 23 | 24 | 25 | """ 26 | Infix to RPN Conversion 27 | shunting-yard 28 | 29 | 30 | Uses Dijkstra's shunting-yard algorithm to transform infix notation into equivalent Reverse Polish Notation. 31 | 32 | Input: 33 | tokens: A list of tokens in infix notation 34 | 35 | Precondition: 36 | all(isinstance(token, int) or token in '+-*/' for token in tokens) 37 | 38 | Output: 39 | The input tokens reordered into Reverse Polish Notation 40 | 41 | Examples: 42 | >>> shunting_yard([10, '-', 5, '-', 2]) 43 | [10, 5, '-', 2, '-'] 44 | >>> shunting_yard([34, '-', 12, '/', 5]) 45 | [34, 12, 5, '/' ,'-'] 46 | >>> shunting_yard([4, '+', 9, '*', 9, '-', 10, '+', 13]) 47 | [4, 9, 9, '*', '+', 10, '-', 13, '+'] 48 | """ 49 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/sieve.py: -------------------------------------------------------------------------------- 1 | def sieve(max): 2 | primes = [] 3 | for n in range(2, max + 1): 4 | if any(n % p > 0 for p in primes): 5 | primes.append(n) 6 | return primes 7 | 8 | """ 9 | Sieve of Eratosthenes 10 | prime-sieve 11 | 12 | Input: 13 | max: A positive int representing an upper bound. 14 | 15 | Output: 16 | A list containing all primes up to and including max 17 | """ 18 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/sqrt.py: -------------------------------------------------------------------------------- 1 | 2 | def sqrt(x, epsilon): 3 | approx = x / 2 4 | while abs(x - approx) > epsilon: 5 | approx = 0.5 * (approx + x / approx) 6 | return approx 7 | 8 | """ 9 | Square Root 10 | 11 | Newton-Raphson method implementation. 12 | 13 | 14 | Input: 15 | x: A float 16 | epsilon: A float 17 | 18 | Precondition: 19 | x >= 1 and epsilon > 0 20 | 21 | Output: 22 | A float in the interval [sqrt(x) - epsilon, sqrt(x) + epsilon] 23 | 24 | Example: 25 | >>> sqrt(2, 0.01) 26 | 1.4166666666666665 27 | """ 28 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/subsequences.py: -------------------------------------------------------------------------------- 1 | def subsequences(a, b, k): 2 | if k == 0: 3 | return [] 4 | 5 | ret = [] 6 | for i in range(a, b + 1 - k): 7 | ret.extend( 8 | [i] + rest for rest in subsequences(i + 1, b, k - 1) 9 | ) 10 | 11 | return ret 12 | 13 | 14 | """ 15 | Subsequences 16 | 17 | 18 | Input: 19 | a: An int 20 | b: An int 21 | k: A positive int 22 | 23 | Output: 24 | A list of all length-k ascending sequences of ints in range(a, b) 25 | 26 | Example: 27 | >>> subsequences(a=1, b=5, k=3) 28 | [[1, 2, 3], [1, 2, 4], [1, 3, 4], [2, 3, 4]] 29 | """ 30 | 31 | 32 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/to_base.py: -------------------------------------------------------------------------------- 1 | 2 | import string 3 | def to_base(num, b): 4 | result = '' 5 | alphabet = string.digits + string.ascii_uppercase 6 | while num > 0: 7 | i = num % b 8 | num = num // b 9 | result = result + alphabet[i] 10 | return result 11 | 12 | 13 | 14 | """ 15 | Integer Base Conversion 16 | base-conversion 17 | 18 | 19 | Input: 20 | num: A base-10 integer to convert. 21 | b: The target base to convert it to. 22 | 23 | Precondition: 24 | num > 0, 2 <= b <= 36. 25 | 26 | Output: 27 | A string representing the value of num in base b. 28 | 29 | Example: 30 | >>> to_base(31, 16) 31 | '1F' 32 | """ 33 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/topological_ordering.py: -------------------------------------------------------------------------------- 1 | def topological_ordering(nodes): 2 | ordered_nodes = [node for node in nodes if not node.incoming_nodes] 3 | 4 | for node in ordered_nodes: 5 | for nextnode in node.outgoing_nodes: 6 | if set(ordered_nodes).issuperset(nextnode.outgoing_nodes) and nextnode not in ordered_nodes: 7 | ordered_nodes.append(nextnode) 8 | 9 | return ordered_nodes 10 | 11 | """ 12 | Topological Sort 13 | 14 | Input: 15 | nodes: A list of directed graph nodes 16 | 17 | Precondition: 18 | The input graph is acyclic 19 | 20 | Output: 21 | An OrderedSet containing the elements of nodes in an order that puts each node before all the nodes it has edges to 22 | """ 23 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_programs/wrap.py: -------------------------------------------------------------------------------- 1 | def wrap(text, cols): 2 | lines = [] 3 | while len(text) > cols: 4 | end = text.rfind(' ', 0, cols + 1) 5 | if end == -1: 6 | end = cols 7 | line, text = text[:end], text[end:] 8 | lines.append(line) 9 | 10 | return lines 11 | 12 | """ 13 | Wrap Text 14 | 15 | Given a long string and a column width, break the string on spaces into a list of lines such that each line is no longer than the column width. 16 | 17 | Input: 18 | text: The starting text. 19 | cols: The target column width, i.e. the maximum length of any single line after wrapping. 20 | 21 | Precondition: 22 | cols > 0. 23 | 24 | Output: 25 | An ordered list of strings, each no longer than the column width, such that the concatenation of the strings returns the original text, 26 | and such that no word in the original text is broken into two parts unless necessary. The original amount of spaces are preserved (e.g. spaces 27 | at the start or end of each line aren't trimmed.),Wrapping Text 28 | """ 29 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/load_testdata.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | 5 | def load_json_testcases(algorithm): 6 | 7 | quixbugs_root = Path(__file__).parent / ".." 8 | testdata_path = quixbugs_root / f"json_testcases/{algorithm}.json" 9 | with open(testdata_path) as data_file: 10 | testdata = [json.loads(line) for line in data_file] 11 | 12 | return testdata 13 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/node.py: -------------------------------------------------------------------------------- 1 | class Node: 2 | def __init__( 3 | self, 4 | value=None, 5 | successor=None, 6 | successors=[], 7 | predecessors=[], 8 | incoming_nodes=[], 9 | outgoing_nodes=[], 10 | ): 11 | self.value = value 12 | self.successor = successor 13 | self.successors = successors 14 | self.predecessors = predecessors 15 | self.incoming_nodes = incoming_nodes 16 | self.outgoing_nodes = outgoing_nodes 17 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_bitcount.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.bitcount import bitcount 6 | else: 7 | from python_programs.bitcount import bitcount 8 | 9 | 10 | testdata = load_json_testcases(bitcount.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_bitcount(input_data, expected): 15 | assert bitcount(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_bucketsort.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.bucketsort import bucketsort 6 | else: 7 | from python_programs.bucketsort import bucketsort 8 | 9 | 10 | testdata = load_json_testcases(bucketsort.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_bucketsort(input_data, expected): 15 | assert bucketsort(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_find_first_in_sorted.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.find_first_in_sorted import find_first_in_sorted 6 | else: 7 | from python_programs.find_first_in_sorted import find_first_in_sorted 8 | 9 | 10 | testdata = load_json_testcases(find_first_in_sorted.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_find_first_in_sorted(input_data, expected): 15 | assert find_first_in_sorted(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_find_in_sorted.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.find_in_sorted import find_in_sorted 6 | else: 7 | from python_programs.find_in_sorted import find_in_sorted 8 | 9 | 10 | testdata = load_json_testcases(find_in_sorted.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_find_in_sorted(input_data, expected): 15 | assert find_in_sorted(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_flatten.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.flatten import flatten 6 | else: 7 | from python_programs.flatten import flatten 8 | 9 | 10 | testdata = load_json_testcases(flatten.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_flatten(input_data, expected): 15 | assert list(flatten(*input_data)) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_gcd.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.gcd import gcd 6 | else: 7 | from python_programs.gcd import gcd 8 | 9 | 10 | testdata = load_json_testcases(gcd.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_gcd(input_data, expected): 15 | assert gcd(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_get_factors.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.get_factors import get_factors 6 | else: 7 | from python_programs.get_factors import get_factors 8 | 9 | 10 | testdata = load_json_testcases(get_factors.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_get_factors(input_data, expected): 15 | assert get_factors(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_hanoi.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.hanoi import hanoi 6 | else: 7 | from python_programs.hanoi import hanoi 8 | 9 | 10 | testdata = load_json_testcases(hanoi.__name__) 11 | testdata = [[inp, [tuple(x) for x in out]] for inp, out in testdata] 12 | 13 | 14 | @pytest.mark.parametrize("input_data,expected", testdata) 15 | def test_hanoi(input_data, expected): 16 | assert hanoi(*input_data) == expected 17 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_is_valid_parenthesization.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.is_valid_parenthesization import is_valid_parenthesization 6 | else: 7 | from python_programs.is_valid_parenthesization import is_valid_parenthesization 8 | 9 | 10 | testdata = load_json_testcases(is_valid_parenthesization.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_is_valid_parenthesization(input_data, expected): 15 | assert is_valid_parenthesization(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_kheapsort.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.kheapsort import kheapsort 6 | else: 7 | from python_programs.kheapsort import kheapsort 8 | 9 | 10 | testdata = load_json_testcases(kheapsort.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_kheapsort(input_data, expected): 15 | assert list(kheapsort(*input_data)) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_kth.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.kth import kth 6 | else: 7 | from python_programs.kth import kth 8 | 9 | 10 | testdata = load_json_testcases(kth.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_kth(input_data, expected): 15 | assert kth(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_lcs_length.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.lcs_length import lcs_length 6 | else: 7 | from python_programs.lcs_length import lcs_length 8 | 9 | 10 | testdata = load_json_testcases(lcs_length.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_lcs_length(input_data, expected): 15 | assert lcs_length(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_levenshtein.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.levenshtein import levenshtein 6 | else: 7 | from python_programs.levenshtein import levenshtein 8 | 9 | 10 | testdata = load_json_testcases(levenshtein.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_levenshtein(input_data, expected): 15 | if input_data == [ 16 | "amanaplanacanalpanama", 17 | "docnoteidissentafastneverpreventsafatnessidietoncod", 18 | ]: 19 | pytest.skip("Takes too long to pass!") 20 | 21 | assert levenshtein(*input_data) == expected 22 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_lis.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.lis import lis 6 | else: 7 | from python_programs.lis import lis 8 | 9 | 10 | testdata = load_json_testcases(lis.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_lis(input_data, expected): 15 | assert lis(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_longest_common_subsequence.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.longest_common_subsequence import longest_common_subsequence 6 | else: 7 | from python_programs.longest_common_subsequence import longest_common_subsequence 8 | 9 | 10 | testdata = load_json_testcases(longest_common_subsequence.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_longest_common_subsequence(input_data, expected): 15 | assert longest_common_subsequence(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_max_sublist_sum.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.max_sublist_sum import max_sublist_sum 6 | else: 7 | from python_programs.max_sublist_sum import max_sublist_sum 8 | 9 | 10 | testdata = load_json_testcases(max_sublist_sum.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_max_sublist_sum(input_data, expected): 15 | assert max_sublist_sum(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_mergesort.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.mergesort import mergesort 6 | else: 7 | from python_programs.mergesort import mergesort 8 | 9 | 10 | testdata = load_json_testcases(mergesort.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_mergesort(input_data, expected): 15 | assert mergesort(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_minimum_spanning_tree.py: -------------------------------------------------------------------------------- 1 | def test1(): 2 | """Case 1: Simple tree input. 3 | Output: (1, 2) (3, 4) (1, 4) 4 | """ 5 | 6 | result = minimum_spanning_tree( 7 | { 8 | (1, 2): 10, 9 | (2, 3): 15, 10 | (3, 4): 10, 11 | (1, 4): 10, 12 | } 13 | ) 14 | 15 | assert result == {(1, 2), (3, 4), (1, 4)} 16 | 17 | 18 | def test2(): 19 | """Case 2: Strongly connected tree input. 20 | Output: (2, 5) (1, 3) (2, 3) (4, 6) (3, 6) 21 | """ 22 | 23 | result = minimum_spanning_tree( 24 | { 25 | (1, 2): 6, 26 | (1, 3): 1, 27 | (1, 4): 5, 28 | (2, 3): 5, 29 | (2, 5): 3, 30 | (3, 4): 5, 31 | (3, 5): 6, 32 | (3, 6): 4, 33 | (4, 6): 2, 34 | (5, 6): 6, 35 | } 36 | ) 37 | 38 | assert result == {(2, 5), (1, 3), (2, 3), (4, 6), (3, 6)} 39 | 40 | 41 | def test3(): 42 | """Case 3: Minimum spanning tree input. 43 | Output: (1, 2) (1, 3) (2, 4) 44 | """ 45 | 46 | result = minimum_spanning_tree( 47 | { 48 | (1, 2): 6, 49 | (1, 3): 1, 50 | (2, 4): 2, 51 | } 52 | ) 53 | 54 | assert result == {(1, 2), (1, 3), (2, 4)} 55 | 56 | 57 | test1() 58 | test2() 59 | test3() -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_next_palindrome.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.next_palindrome import next_palindrome 6 | else: 7 | from python_programs.next_palindrome import next_palindrome 8 | 9 | 10 | testdata = load_json_testcases(next_palindrome.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_next_palindrome(input_data, expected): 15 | assert next_palindrome(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_next_permutation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.next_permutation import next_permutation 6 | else: 7 | from python_programs.next_permutation import next_permutation 8 | 9 | 10 | testdata = load_json_testcases(next_permutation.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_next_permutation(input_data, expected): 15 | assert next_permutation(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_pascal.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.pascal import pascal 6 | else: 7 | from python_programs.pascal import pascal 8 | 9 | 10 | testdata = load_json_testcases(pascal.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_pascal(input_data, expected): 15 | assert pascal(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_possible_change.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.possible_change import possible_change 6 | else: 7 | from python_programs.possible_change import possible_change 8 | 9 | 10 | testdata = load_json_testcases(possible_change.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_possible_change(input_data, expected): 15 | assert possible_change(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_powerset.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.powerset import powerset 6 | else: 7 | from python_programs.powerset import powerset 8 | 9 | 10 | testdata = load_json_testcases(powerset.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_powerset(input_data, expected): 15 | assert powerset(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_quicksort.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.quicksort import quicksort 6 | else: 7 | from python_programs.quicksort import quicksort 8 | 9 | 10 | testdata = load_json_testcases(quicksort.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_quicksort(input_data, expected): 15 | assert quicksort(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_rpn_eval.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.rpn_eval import rpn_eval 6 | else: 7 | from python_programs.rpn_eval import rpn_eval 8 | 9 | 10 | testdata = load_json_testcases(rpn_eval.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_rpn_eval(input_data, expected): 15 | assert rpn_eval(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_shunting_yard.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.shunting_yard import shunting_yard 6 | else: 7 | from python_programs.shunting_yard import shunting_yard 8 | 9 | 10 | testdata = load_json_testcases(shunting_yard.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_shunting_yard(input_data, expected): 15 | assert shunting_yard(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_sieve.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.sieve import sieve 6 | else: 7 | from python_programs.sieve import sieve 8 | 9 | 10 | testdata = load_json_testcases(sieve.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_sieve(input_data, expected): 15 | assert sieve(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_sqrt.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.sqrt import sqrt 6 | else: 7 | from python_programs.sqrt import sqrt 8 | 9 | 10 | testdata = load_json_testcases(sqrt.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_sqrt(input_data, expected): 15 | # assert sqrt(*input_data) == pytest.approx(expected, abs=input_data[-1]) 16 | # Can also be written w/o pytest dependency: 17 | assert abs(sqrt(*input_data) - expected) <= input_data[-1] 18 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_subsequences.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.subsequences import subsequences 6 | else: 7 | from python_programs.subsequences import subsequences 8 | 9 | 10 | testdata = load_json_testcases(subsequences.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_subsequences(input_data, expected): 15 | assert subsequences(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_to_base.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.to_base import to_base 6 | else: 7 | from python_programs.to_base import to_base 8 | 9 | 10 | testdata = load_json_testcases(to_base.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_to_base(input_data, expected): 15 | assert to_base(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/python_testcases/test_wrap.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from load_testdata import load_json_testcases 3 | 4 | if pytest.use_correct: 5 | from correct_python_programs.wrap import wrap 6 | else: 7 | from python_programs.wrap import wrap 8 | 9 | 10 | testdata = load_json_testcases(wrap.__name__) 11 | 12 | 13 | @pytest.mark.parametrize("input_data,expected", testdata) 14 | def test_wrap(input_data, expected): 15 | assert wrap(*input_data) == expected 16 | -------------------------------------------------------------------------------- /evaluation/other/QuixBugs/quixbugs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/evaluation/other/QuixBugs/quixbugs.pdf -------------------------------------------------------------------------------- /evaluation/other/humanevalpack_stats.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | 4 | 5 | for lang in ['python', 'js', 'cpp', 'java', 'go', 'rust']: 6 | print(f'Language: {lang}') 7 | ds = load_dataset('bigcode/humanevalpack', lang, split="test") 8 | # Average docstring length 9 | print(f'Average docstring length: {sum([len(d) for d in ds["docstring"]]) / len(ds["docstring"])}') 10 | # Min docstring length 11 | print(f'Min docstring length: {min([len(d) for d in ds["docstring"]])}') 12 | # Max docstring length 13 | print(f'Max docstring length: {max([len(d) for d in ds["docstring"]])}') 14 | # Average solution length 15 | print(f'Average solution length: {sum([len(d) for d in ds["canonical_solution"]]) / len(ds["canonical_solution"])}') 16 | # Min solution length 17 | print(f'Min solution length: {min([len(d) for d in ds["canonical_solution"]])}') 18 | # Max solution length 19 | print(f'Max solution length: {max([len(d) for d in ds["canonical_solution"]])}') -------------------------------------------------------------------------------- /evaluation/other/nlg_eval.py: -------------------------------------------------------------------------------- 1 | import json 2 | import nlgeval 3 | from tqdm import tqdm 4 | from nlgeval import NLGEval 5 | 6 | def get_ref(file_path): 7 | ref = [] 8 | with open(file_path) as f: 9 | for line in f: 10 | ref.append(json.loads(line)['docstring']) 11 | return ref 12 | 13 | def get_hyp(file_path): 14 | with open(file_path) as f: 15 | hyp = json.load(f) 16 | return hyp 17 | 18 | 19 | n = NLGEval(no_skipthoughts=True, no_glove=True, metrics_to_omit=['SPICE', 'CIDEr', 'ROUGE_L']) 20 | 21 | for lang in ['cpp', 'java', 'go', 'js', 'python', 'rust']: 22 | metrics_dicts = [] 23 | print(f'Language: {lang}') 24 | ref = get_ref(f'data/{lang}/data/humanevalpack.jsonl') 25 | hyp = get_hyp(f'octocoder/humanevalexplain/generations_humanevalexplaindescribe{lang}_starcoderguanacocommits.json') 26 | for i in tqdm(range(len(hyp))): 27 | metrics_dicts.append({}) 28 | for j in range(len(hyp[i])): 29 | metrics_dict = n.compute_individual_metrics(ref[i], hyp[i][j]) 30 | for k in metrics_dict: 31 | metrics_dicts[i][k] = max(metrics_dicts[i].get(k, 0), metrics_dict[k]) 32 | with open(f'octocoder/humanevalexplain/metrics_humanevalexplaindescribe{lang}_starcoderguanacocommits.json', 'w') as f: 33 | json.dump(metrics_dicts, f, indent=4) -------------------------------------------------------------------------------- /evaluation/other/nlg_eval_avg.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | for lang in ['cpp', 'java', 'go', 'js', 'python', 'rust']: 4 | print(f'Language: {lang}') 5 | with open(f'evaluation/octocoder/humanevalexplain/metrics_humanevalexplaindescribe{lang}_starcoderguanacocommits.json', 'r') as f: 6 | data = json.load(f) 7 | 8 | bleu1 = [d['Bleu_1'] * 100 for d in data] 9 | bleu2 = [d['Bleu_2'] * 100 for d in data] 10 | bleu3 = [d['Bleu_3'] * 100 for d in data] 11 | bleu4 = [d['Bleu_4'] * 100 for d in data] 12 | meteor = [d['METEOR'] * 100 for d in data] 13 | 14 | # Average 15 | print(f'BLEU-1: {sum(bleu1) / len(bleu1)}') 16 | print(f'BLEU-2: {sum(bleu2) / len(bleu2)}') 17 | print(f'BLEU-3: {sum(bleu3) / len(bleu3)}') 18 | print(f'BLEU-4: {sum(bleu4) / len(bleu4)}') 19 | print(f'METEOR: {sum(meteor) / len(meteor)}') 20 | -------------------------------------------------------------------------------- /evaluation/other/rename.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | RENAME_MAP = { 5 | "humanevalxexpgen": "humanevalexplainsynthesize", 6 | "hexexpgenerate": "humanevalexplainsynthesize", 7 | "hexexplaingen": "humanevalexplainsynthesize", 8 | "hexexpdesc": "humanevalexplaindescribe", 9 | "humanevalxexpdescribe": "humanevalexplaindescribe", 10 | "humanevalxexpdesc": "humanevalexplaindescribe", 11 | "hexexplaindesc": "humanevalexplaindescribe", 12 | "hexexpdescribe": "humanevalexplaindescribe", 13 | "hexexplaindescribe": "humanevalexplaindescribe", 14 | "hexbugs": "humanevalfix", 15 | "humanevalxbugs": "humanevalfix", 16 | "humanevalxgenerate": "humanevalsynthesize", 17 | "hexgenerate": "humanevalsynthesize", 18 | } 19 | 20 | DIR = "evaluation" 21 | 22 | # Rename all file paths in the directory & subdirectories 23 | for root, dirs, files in os.walk(DIR): 24 | for file in files: 25 | if file.endswith(".json") and any(x in file for x in RENAME_MAP): 26 | # Rename file 27 | for k, v in RENAME_MAP.items(): 28 | if k in file: 29 | new_file_name = file.replace(k, v) 30 | os.rename(os.path.join(root, file), os.path.join(root, new_file_name)) 31 | print(f"Renamed {file} to {new_file_name}") 32 | break 33 | -------------------------------------------------------------------------------- /evaluation/run/check_missing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Check if there are missing files when generating using the range scripts. 3 | Usage: `python check_missing.py "generations_humanevalfixpython_wizardcoder*.json"` 4 | """ 5 | import sys 6 | import glob 7 | 8 | def find_missing_file(file_pattern): 9 | file_list = glob.glob(file_pattern) 10 | file_numbers = set() 11 | 12 | for file_name in file_list: 13 | try: 14 | file_number = int(file_name.split("_")[-1].split(".")[0]) 15 | file_numbers.add(file_number) 16 | except ValueError: 17 | pass 18 | 19 | missing_numbers = set(range(0, 164)) - file_numbers 20 | 21 | print("Missing files: ", missing_numbers) 22 | 23 | if __name__ == "__main__": 24 | if len(sys.argv) < 2: 25 | print("Usage: python check_missing ") 26 | sys.exit(1) 27 | 28 | file_pattern = sys.argv[1] 29 | 30 | find_missing_file(file_pattern) 31 | -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/bloomz/eval_bloomz_humanevalfix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks-per-node=1 4 | #SBATCH -p small-g 5 | #SBATCH -t 48:00:00 6 | #SBATCH --gpus-per-node=mi250:8 7 | #SBATCH --exclusive=user 8 | #SBATCH --hint=nomultithread 9 | #SBATCH --account=project_462000241 10 | #SBATCH -o logs/%j.out 11 | #SBATCH -e logs/%j.er 12 | 13 | source $ajs_ALL_CCFRWORK/start-tr13f-6B3-ml-t0 14 | conda activate bigcode 15 | 16 | cd /gpfswork/rech/ajs/commun/code/bigcode/bigcode-evaluation-harness 17 | 18 | accelerate launch --config_file config_8gpus_bf16.yaml main.py \ 19 | --model bloomz \ 20 | --tasks humanevalfixtests-python \ 21 | --do_sample True \ 22 | --temperature 0.2 \ 23 | --n_samples 20 \ 24 | --batch_size 2 \ 25 | --allow_code_execution \ 26 | --save_generations \ 27 | --trust_remote_code \ 28 | --prompt instruct \ 29 | --save_generations_path generations_humanevalfixpython_bloomz.json \ 30 | --metric_output_path evaluation_humanevalfixpython_bloomz.json \ 31 | --max_length_generation 2048 \ 32 | --precision bf16 \ 33 | --max_memory_per_gpu 50GB 34 | -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/diffcodegen2b/eval_diffcodegen2b_humanevalfix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=eval 3 | #SBATCH --ntasks=1 # number of MP tasks 4 | #SBATCH --nodes=1 5 | #SBATCH --cpus-per-task=8 # number of cores per tasks 6 | #SBATCH --hint=nomultithread # we get physical cores not logical 7 | #SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) 8 | #SBATCH --output=%x-%j.out # output file name 9 | #SBATCH --account=ajs@a100 10 | #SBATCH --constraint=a100 11 | #SBATCH --gres=gpu:1 # number of gpus 12 | 13 | source $ajs_ALL_CCFRWORK/start-tr13f-6B3-ml-t0 14 | conda activate bigcode 15 | 16 | cd /gpfswork/rech/ajs/commun/code/bigcode/bigcode-evaluation-harness 17 | 18 | accelerate launch --config_file config_1a100_fp16.yaml main.py \ 19 | --model diff-codegen-2b-v2 \ 20 | --tasks humanevalfixtests-python \ 21 | --do_sample True \ 22 | --temperature 0.2 \ 23 | --n_samples 20 \ 24 | --batch_size 20 \ 25 | --allow_code_execution \ 26 | --save_generations \ 27 | --trust_remote_code \ 28 | --prompt diff-carper \ 29 | --save_generations_path generations_humanevalfixpython_diffcodegen2b.json \ 30 | --metric_output_path evaluation_humanevalfixpython_diffcodegen2b.json \ 31 | --max_length_generation 1024 \ 32 | --precision fp16 33 | -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/eval_codegeex2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks-per-node=1 4 | #SBATCH -p small-g 5 | #SBATCH -t 48:00:00 6 | #SBATCH --cpus-per-task=8 7 | #SBATCH --gpus-per-node=mi250:1 8 | #SBATCH --exclusive=user 9 | #SBATCH --hint=nomultithread 10 | #SBATCH --account=project_462000241 11 | #SBATCH -o logs/%j.out 12 | #SBATCH -e logs/%j.err 13 | 14 | source /pfs/lustrep2/scratch/project_462000241/muennighoff/venv/bin/activate 15 | cd /pfs/lustrep2/scratch/project_462000185/muennighoff/bigcode-evaluation-harness 16 | 17 | accelerate launch --config_file config_1gpus_bf16.yaml --main_process_port 25900 main.py \ 18 | --model codegeex2-6b \ 19 | --tasks humanevalsynthesize-python \ 20 | --do_sample True \ 21 | --temperature 0.2 \ 22 | --n_samples 20 \ 23 | --batch_size 5 \ 24 | --allow_code_execution \ 25 | --save_generations \ 26 | --trust_remote_code \ 27 | --prompt continue \ 28 | --save_generations_path generations_humanevalsynthesizepython_codegeex2_temp02.json \ 29 | --metric_output_path evaluation_humanevalsynthesizepython_temp02.json \ 30 | --max_length_generation 2048 \ 31 | --precision bf16 32 | -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/instructcodet5p/eval_instructcodet5p_humanevalfix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=eval 3 | #SBATCH --ntasks=1 # number of MP tasks 4 | #SBATCH --nodes=1 5 | #SBATCH --cpus-per-task=8 # number of cores per tasks 6 | #SBATCH --hint=nomultithread # we get physical cores not logical 7 | #SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) 8 | #SBATCH --output=%x-%j.out # output file name 9 | #SBATCH --account=ajs@a100 10 | #SBATCH --constraint=a100 11 | #SBATCH --gres=gpu:1 # number of gpus 12 | 13 | source $ajs_ALL_CCFRWORK/start-tr13f-6B3-ml-t0 14 | conda activate bigcode 15 | 16 | cd /gpfswork/rech/ajs/commun/code/bigcode/bigcode-evaluation-harness 17 | 18 | accelerate launch --config_file config_1a100_fp16.yaml main.py \ 19 | --model instructcodet5p-16b \ 20 | --tasks humanevalfixtests-python \ 21 | --do_sample True \ 22 | --temperature 0.2 \ 23 | --n_samples 20 \ 24 | --batch_size 5 \ 25 | --allow_code_execution \ 26 | --save_generations \ 27 | --trust_remote_code \ 28 | --prompt instructcodet5p \ 29 | --save_generations_path generations_humanevalfixpython_instructcodet5p.json \ 30 | --metric_output_path evaluation_humanevalfixpython_instructcodet5p.json \ 31 | --modeltype seq2seq \ 32 | --max_length_generation 2048 \ 33 | --precision fp16 34 | -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/instructcodet5p/eval_instructcodet5p_humanevalsynthesize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -p g40x 3 | #SBATCH -t 24:00:00 4 | #SBATCH --gpus=1 5 | #SBATCH --exclusive=user 6 | #SBATCH --hint=nomultithread 7 | #SBATCH --account=your_account 8 | #SBATCH -o logs/%j.out 9 | #SBATCH -e logs/%j.err 10 | 11 | source /fsx/muennighoff/env/bin/activate 12 | cd /fsx/muennighoff/bigcode-evaluation-harness 13 | 14 | accelerate launch --config_file config_1gpus_fp16.yaml main.py \ 15 | --model instructcodet5p-16b \ 16 | --tasks humanevalsynthesize-python \ 17 | --do_sample True \ 18 | --temperature 0.2 \ 19 | --n_samples 1 \ 20 | --batch_size 1 \ 21 | --limit 1 \ 22 | --allow_code_execution \ 23 | --save_generations \ 24 | --trust_remote_code \ 25 | --prompt instructcodet5p \ 26 | --save_generations_path generations_humanevalsynthesizepython_instructcodet5p.json \ 27 | --metric_output_path evaluation_humanevalsynthesizepython_instructcodet5p.json \ 28 | --modeltype seq2seq \ 29 | --max_length_generation 2048 \ 30 | --precision fp16 31 | -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/octocoder/eval_octocoder_humanevalfix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks-per-node=1 4 | #SBATCH -p small-g 5 | #SBATCH -t 48:00:00 6 | #SBATCH --gpus-per-node=mi250:1 7 | #SBATCH --exclusive=user 8 | #SBATCH --hint=nomultithread 9 | #SBATCH --account=project_462000241 10 | #SBATCH -o logs/%j.out 11 | #SBATCH -e logs/%j.err 12 | 13 | source /pfs/lustrep2/scratch/project_462000241/muennighoff/venv/bin/activate 14 | cd /pfs/lustrep2/scratch/project_462000185/muennighoff/bigcode-evaluation-harness 15 | 16 | accelerate launch --config_file config_1gpus_bf16.yaml --main_process_port 25903 main.py \ 17 | --model bigcode/octocoder \ 18 | --tasks humanevalfixtests-python \ 19 | --do_sample True \ 20 | --temperature 0.2 \ 21 | --n_samples 20 \ 22 | --batch_size 5 \ 23 | --allow_code_execution \ 24 | --save_generations \ 25 | --trust_remote_code \ 26 | --prompt octocoder \ 27 | --save_generations_path generations_humanevalfixpython_octocoder.json \ 28 | --metric_output_path evaluation_humanevalfixpython_octocoder.json \ 29 | --max_length_generation 2048 \ 30 | --precision bf16 -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/octocoder/eval_octocoder_humanevalsynthesize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks-per-node=1 4 | #SBATCH -p small-g 5 | #SBATCH -t 48:00:00 6 | #SBATCH --gpus-per-node=mi250:1 7 | #SBATCH --exclusive=user 8 | #SBATCH --hint=nomultithread 9 | #SBATCH --account=project_462000241 10 | #SBATCH -o logs/%j.out 11 | #SBATCH -e logs/%j.err 12 | 13 | source /pfs/lustrep2/scratch/project_462000241/muennighoff/venv/bin/activate 14 | cd /pfs/lustrep2/scratch/project_462000185/muennighoff/bigcode-evaluation-harness 15 | 16 | accelerate launch --config_file config_1gpus_bf16.yaml --main_process_port 25903 main.py \ 17 | --model bigcode/octocoder \ 18 | --tasks humanevalsynthesize-python \ 19 | --do_sample True \ 20 | --temperature 0.2 \ 21 | --n_samples 20 \ 22 | --batch_size 5 \ 23 | --allow_code_execution \ 24 | --save_generations \ 25 | --trust_remote_code \ 26 | --prompt octocoder \ 27 | --save_generations_path generations_humanevalsynthesizepython_octocoder.json \ 28 | --metric_output_path evaluation_humanevalsynthesizepython_octocoder.json \ 29 | --max_length_generation 2048 \ 30 | --precision bf16 -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/starchatbeta/eval_starchatbeta_humanevalfix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks-per-node=1 4 | #SBATCH -p small-g 5 | #SBATCH -t 48:00:00 6 | #SBATCH --gpus-per-node=mi250:1 7 | #SBATCH --exclusive=user 8 | #SBATCH --hint=nomultithread 9 | #SBATCH --account=project_462000241 10 | #SBATCH -o logs/%j.out 11 | #SBATCH -e logs/%j.err 12 | 13 | source /pfs/lustrep2/scratch/project_462000241/muennighoff/venv/bin/activate 14 | cd /pfs/lustrep2/scratch/project_462000185/muennighoff/bigcode-evaluation-harness 15 | 16 | accelerate launch --config_file config_1gpus_bf16.yaml --main_process_port 20888 main.py \ 17 | --model starchat-beta \ 18 | --tasks humanevalfixtests-python \ 19 | --do_sample True \ 20 | --temperature 0.2 \ 21 | --n_samples 20 \ 22 | --batch_size 5 \ 23 | --allow_code_execution \ 24 | --save_generations \ 25 | --trust_remote_code \ 26 | --prompt starchat \ 27 | --save_generations_path generations_humanevalfixpython_starchatbeta_temp02.json \ 28 | --metric_output_path evaluation_humanevalfixpython_starchatbeta_temp02.json \ 29 | --max_length_generation 2048 \ 30 | --generation_only \ 31 | --precision bf16 -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/starchatbeta/eval_starchatbeta_humanevalsynthesize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks-per-node=1 4 | #SBATCH -p small-g 5 | #SBATCH -t 48:00:00 6 | #SBATCH --gpus-per-node=mi250:1 7 | #SBATCH --exclusive=user 8 | #SBATCH --hint=nomultithread 9 | #SBATCH --account=project_462000241 10 | #SBATCH -o logs/%j.out 11 | #SBATCH -e logs/%j.err 12 | 13 | source /pfs/lustrep2/scratch/project_462000241/muennighoff/venv/bin/activate 14 | cd /pfs/lustrep2/scratch/project_462000185/muennighoff/bigcode-evaluation-harness 15 | 16 | accelerate launch --config_file config_1gpus_bf16.yaml --main_process_port 20889 main.py \ 17 | --model starchat-beta \ 18 | --tasks humanevalsynthesize-python \ 19 | --do_sample True \ 20 | --temperature 0.2 \ 21 | --n_samples 20 \ 22 | --batch_size 5 \ 23 | --allow_code_execution \ 24 | --save_generations \ 25 | --trust_remote_code \ 26 | --prompt starchat \ 27 | --save_generations_path generations_humanevalsynthesizepython_starchatbeta.json \ 28 | --metric_output_path evaluation_humanevalsynthesizepython_starchatbeta.json \ 29 | --max_length_generation 2048 \ 30 | --precision bf16 -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/starcoder/eval_starcoder_humanevalfix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks-per-node=1 4 | #SBATCH -p small-g 5 | #SBATCH -t 48:00:00 6 | #SBATCH --cpus-per-task=8 7 | #SBATCH --gpus-per-node=mi250:1 8 | #SBATCH --exclusive=user 9 | #SBATCH --hint=nomultithread 10 | #SBATCH --account=project_462000241 11 | #SBATCH -o logs/%j.out 12 | #SBATCH -e logs/%j.err 13 | 14 | source /pfs/lustrep2/scratch/project_462000241/muennighoff/venv/bin/activate 15 | cd /pfs/lustrep2/scratch/project_462000185/muennighoff/bigcode-evaluation-harness 16 | 17 | accelerate launch --config_file config_1gpus_bf16.yaml --main_process_port 25900 main.py \ 18 | --model starcoder \ 19 | --tasks humanevalfixtests-python \ 20 | --do_sample True \ 21 | --temperature 0.2 \ 22 | --n_samples 20 \ 23 | --batch_size 5 \ 24 | --allow_code_execution \ 25 | --save_generations \ 26 | --trust_remote_code \ 27 | --prompt instruct \ 28 | --save_generations_path generations_humanevalfixpython_starcoder.json \ 29 | --metric_output_path evaluation_humanevalfixpython_starcoder.json \ 30 | --max_length_generation 1800 \ 31 | --precision bf16 32 | -------------------------------------------------------------------------------- /evaluation/run/eval_scripts/wizardcoder/eval_wizardcoder_humanevalsynthesize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --nodes=1 3 | #SBATCH --ntasks-per-node=1 4 | #SBATCH -p small-g 5 | #SBATCH -t 48:00:00 6 | #SBATCH --cpus-per-task=8 7 | #SBATCH --gpus-per-node=mi250:1 8 | #SBATCH --exclusive=user 9 | #SBATCH --hint=nomultithread 10 | #SBATCH --account=project_462000241 11 | #SBATCH -o logs/%j.out 12 | #SBATCH -e logs/%j.err 13 | 14 | source /pfs/lustrep2/scratch/project_462000241/muennighoff/venv/bin/activate 15 | cd /pfs/lustrep2/scratch/project_462000185/muennighoff/bigcode-evaluation-harness 16 | 17 | accelerate launch --config_file config_1gpus_bf16.yaml --main_process_port 20749 main.py \ 18 | --model WizardCoder-15B-V1.0 \ 19 | --tasks humanevalsynthesize-python \ 20 | --do_sample True \ 21 | --temperature 0.2 \ 22 | --n_samples 20 \ 23 | --batch_size 5 \ 24 | --allow_code_execution \ 25 | --save_generations \ 26 | --trust_remote_code \ 27 | --prompt wizardcoder \ 28 | --save_generations_path generations_humanevalsynthesizepython_wizardcoder.json \ 29 | --metric_output_path evaluation_humanevalsynthesizepython_wizardcoder.json \ 30 | --max_length_generation 2048 \ 31 | --precision bf16 32 | -------------------------------------------------------------------------------- /evaluation/run/humanevalpack_setup_java.sh: -------------------------------------------------------------------------------- 1 | # https://github.com/THUDM/CodeGeeX/blob/61529ba61de8e51c520dc67a3ce4bd62278770df/codegeex/docker/Dockerfile#L31 2 | mkdir -p /workspace/download/ 3 | curl -o /workspace/download/jdk.tar.gz -SL https://download.oracle.com/java/18/archive/jdk-18_linux-x64_bin.tar.gz \ 4 | && mkdir /usr/java && tar -zxf /workspace/download/jdk.tar.gz -C /usr/java && rm /workspace/download/jdk.tar.gz \ 5 | && java_path=`ls /usr/java/${path}` && echo "export JAVA_HOME=/usr/java/${java_path}" >> ~/.profile 6 | -------------------------------------------------------------------------------- /evaluation/run/merge_generations.py: -------------------------------------------------------------------------------- 1 | """ 2 | Merges generations when using the range eval scripts. 3 | Usage: `python merge_generations.py "generations_humanevalfixpython_wizardcoder*.json"` 4 | """ 5 | 6 | import json 7 | import os 8 | import glob 9 | import sys 10 | 11 | pattern = sys.argv[1] 12 | out_name = pattern.replace("_*", "") 13 | 14 | if not ".json" in out_name: 15 | out_name += ".json" 16 | 17 | print("Saving to ", out_name) 18 | 19 | assert "0" not in pattern 20 | 21 | 22 | files = sorted(glob.glob(pattern), key=lambda x: int(x.split("_")[-1].split(".")[0])) 23 | 24 | all_data = [] 25 | for fname in files: 26 | with open(fname, "r") as f: 27 | data = json.load(f) 28 | # It's of form [[x], [y]..] 29 | if (len(data) > 1) and (len(data[0]) == 1): 30 | all_data.extend([[x[0] for x in data]]) 31 | # It's of form [[x, y..]] 32 | else: 33 | all_data.extend(data) 34 | 35 | with open(out_name, "w") as f: 36 | json.dump(all_data, f) 37 | -------------------------------------------------------------------------------- /finetuning/santacoder/finetune.sh: -------------------------------------------------------------------------------- 1 | deepspeed finetune.py \ 2 | --max_input_length 2000 \ 3 | --dataset_name bigcode/commits-pjj-2048 \ 4 | --max_steps 250000 \ 5 | --batch_size 2 \ 6 | --gradient_accumulation_steps 4 \ 7 | --learning_rate 5e-5 \ 8 | --num_warmup_steps 1000 \ 9 | --eval_freq 10000 \ 10 | --save_freq 10000 \ 11 | --log_freq 10 \ 12 | --num_workers 8 \ 13 | --bf16 \ 14 | --data_packing \ 15 | --compute_loss_on_input \ 16 | --line_diff \ 17 | --add_file_name \ 18 | --deepspeed zero_stage1_config.json \ 19 | --cache_dir .cache \ 20 | --output_dir santacoder_line_diff_format -------------------------------------------------------------------------------- /finetuning/santacoder/zero_stage1_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "bf16": { 3 | "enabled": true 4 | }, 5 | "optimizer": { 6 | "type": "AdamW", 7 | "params": { 8 | "lr": "auto", 9 | "weight_decay": "auto", 10 | "torch_adam": true, 11 | "adam_w_mode": true 12 | } 13 | }, 14 | "scheduler": { 15 | "type": "WarmupDecayLR", 16 | "params": { 17 | "warmup_min_lr": "auto", 18 | "warmup_max_lr": "auto", 19 | "warmup_num_steps": "auto", 20 | "total_num_steps": "auto" 21 | } 22 | }, 23 | "zero_optimization": { 24 | "stage": 1, 25 | "allgather_partitions": true, 26 | "allgather_bucket_size": 2e8, 27 | "overlap_comm": true, 28 | "reduce_scatter": true, 29 | "reduce_bucket_size": "auto", 30 | "contiguous_gradients": true 31 | }, 32 | "gradient_accumulation_steps": "auto", 33 | "gradient_clipping": "auto", 34 | "steps_per_print": 2000, 35 | "train_batch_size": "auto", 36 | "train_micro_batch_size_per_gpu": "auto", 37 | "wall_clock_breakdown": false 38 | } -------------------------------------------------------------------------------- /finetuning/starcoder/finetune.sh: -------------------------------------------------------------------------------- 1 | CMD=" \ 2 | finetune.py \ 3 | --model_path="bigcode/starcoder" \ 4 | --dataset_name="ArmelR/guanaco-commits" \ 5 | --seq_length 2048 \ 6 | --max_steps 1000 \ 7 | --batch_size 1 \ 8 | --input_column_name="prompt" \ 9 | --output_column_name="completion" \ 10 | --gradient_accumulation_steps 4 \ 11 | --learning_rate 5e-4 \ 12 | --lr_scheduler_type="cosine"\ 13 | --log_freq 1 \ 14 | --eval_freq 1 \ 15 | --num_warmup_steps 5 \ 16 | --save_freq 5 \ 17 | --weight_decay 0.05 \ 18 | --output_dir="./checkpoints-guanaco-commits" \ 19 | " 20 | 21 | 22 | export LAUNCHER="python3 -u -m torch.distributed.run \ 23 | --nproc_per_node $GPUS_PER_NODE \ 24 | --nnodes $NNODES \ 25 | --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \ 26 | --rdzv_backend c10d \ 27 | --max_restarts 0 \ 28 | --tee 3 \ 29 | " -------------------------------------------------------------------------------- /training/check_ckpt_equivalence.py: -------------------------------------------------------------------------------- 1 | import torch 2 | for idx in ["00001", "00002", "00003", "00004", "00005", "00006", "00007"]: 3 | x = torch.load(f"/gpfsscratch/rech/ajs/commun/Bigcode-large-megatron_conv/base/shard2/pytorch_model-{idx}-of-00007.bin") 4 | y = torch.load(f"/gpfsscratch/rech/ajs/commun/starcoderbase/pytorch_model-{idx}-of-00007.bin") 5 | assert x.keys() == y.keys() 6 | for k in x.keys(): 7 | if not((x[k] == y[k]).all()): 8 | print(k) 9 | print(x[k].shape) 10 | print(y[k].shape) 11 | break -------------------------------------------------------------------------------- /training/preprocess_santacoderpack.sh: -------------------------------------------------------------------------------- 1 | INPUT=commitpack_cf.jsonl # merge datasets jsonl from commitpack-subset-cf 2 | NAME=commitpack_cf # you want data name 3 | TOKENIZER_FILE=starcoder-tokenizer/tokenizer.json 4 | VOCAD=starcoder-tokenizer/vocab.json 5 | 6 | python tools/preprocess_data.py \ 7 | --input $INPUT \ 8 | --output-prefix $NAME \ 9 | --dataset-impl mmap \ 10 | --tokenizer-type TokenizerFromFile \ 11 | --tokenizer-file $TOKENIZER_FILE \ 12 | --workers 30 \ 13 | --chunk-size 1000 -------------------------------------------------------------------------------- /visuals/OCTOPACK_5MIN_SLIDES.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/OCTOPACK_5MIN_SLIDES.pdf -------------------------------------------------------------------------------- /visuals/OCTOPACK_POSTER.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/OCTOPACK_POSTER.png -------------------------------------------------------------------------------- /visuals/OCTOPACK_POSTER.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/OCTOPACK_POSTER.pptx -------------------------------------------------------------------------------- /visuals/OCTOPACK_THUMB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/OCTOPACK_THUMB.png -------------------------------------------------------------------------------- /visuals/ablations.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/ablations.pdf -------------------------------------------------------------------------------- /visuals/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/banner.png -------------------------------------------------------------------------------- /visuals/count_lines.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Iterate through directories in the ./data/ directory 4 | for dir in ./data/*; do 5 | # Extract the directory name 6 | dirname=$(basename "$dir") 7 | # Count the total number of lines in jsonl files within the directory 8 | count=$(find "$dir" -name "*.jsonl" | xargs cat | wc -l) 9 | # Print the directory name and line count 10 | echo "data/$dirname: $count" 11 | done > line_count.txt 12 | -------------------------------------------------------------------------------- /visuals/distribution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/distribution.pdf -------------------------------------------------------------------------------- /visuals/humanevalpack.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/humanevalpack.pdf -------------------------------------------------------------------------------- /visuals/logo/logo_old/octobadpack_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/logo/logo_old/octobadpack_v2.png -------------------------------------------------------------------------------- /visuals/logo/logo_old/octopack_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/logo/logo_old/octopack_v1.png -------------------------------------------------------------------------------- /visuals/logo/logo_old/octopack_v1_transp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/logo/logo_old/octopack_v1_transp.png -------------------------------------------------------------------------------- /visuals/logo/logo_old/octopack_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/logo/logo_old/octopack_v2.png -------------------------------------------------------------------------------- /visuals/logo/logo_old/octopack_v3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/logo/logo_old/octopack_v3.png -------------------------------------------------------------------------------- /visuals/logo/octobadpack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/logo/octobadpack.png -------------------------------------------------------------------------------- /visuals/logo/octopack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/logo/octopack.png -------------------------------------------------------------------------------- /visuals/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/main.pdf -------------------------------------------------------------------------------- /visuals/misc/python.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/misc/python.png -------------------------------------------------------------------------------- /visuals/tasks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigcode-project/octopack/e17a8f6470264286bc6a52eb8263582083bf3bf6/visuals/tasks.pdf --------------------------------------------------------------------------------