├── .github ├── ISSUE_TEMPLATE │ ├── 1-bug-report.yml │ ├── 2-feature_request.yml │ ├── 3-documentation.yml │ └── config.yml └── pull_request_template.md ├── .gitignore ├── LICENSE ├── README.md ├── README_ko.md ├── app.py ├── assets ├── flagged_words.json └── stopwords.json ├── configs ├── README.md ├── README_ko.md ├── analyser.yaml ├── config_all.yaml ├── ko_process.yaml └── process.yaml ├── data ├── test.json └── test_data.json ├── data_modori ├── __init__.py ├── analysis │ ├── __init__.py │ ├── column_wise_analysis.py │ ├── diversity_analysis.py │ └── overall_analysis.py ├── config │ ├── __init__.py │ └── config.py ├── core │ ├── __init__.py │ ├── analyser.py │ ├── data.py │ ├── executor.py │ ├── exporter.py │ ├── ray_executor.py │ └── tracer.py ├── format │ ├── __init__.py │ ├── csv_formatter.py │ ├── formatter.py │ ├── json_formatter.py │ ├── load.py │ ├── mixture_formatter.py │ ├── parquet_formatter.py │ ├── text_formatter.py │ └── tsv_formatter.py ├── ops │ ├── __init__.py │ ├── base_op.py │ ├── common │ │ ├── __init__.py │ │ ├── aws_s3_downloader.py │ │ ├── helper_func.py │ │ └── special_characters.py │ ├── deduplicator │ │ ├── __init__.py │ │ ├── document_deduplicator.py │ │ ├── document_minhash_deduplicator.py │ │ └── document_simhash_deduplicator.py │ ├── filter │ │ ├── __init__.py │ │ ├── alphanumeric_filter.py │ │ ├── average_line_length_filter.py │ │ ├── character_repetition_filter.py │ │ ├── flagged_words_filter.py │ │ ├── language_id_score_filter.py │ │ ├── maximum_line_length_filter.py │ │ ├── perplexity_filter.py │ │ ├── special_characters_filter.py │ │ ├── specified_field_filter.py │ │ ├── specified_numeric_field_filter.py │ │ ├── stopwords_filter.py │ │ ├── suffix_filter.py │ │ ├── text_length_filter.py │ │ ├── token_num_filter.py │ │ ├── word_num_filter.py │ │ └── word_repetition_filter.py │ ├── load.py │ ├── mapper │ │ ├── __init__.py │ │ ├── clean_copyright_mapper.py │ │ ├── clean_email_mapper.py │ │ ├── clean_html_mapper.py │ │ ├── clean_ip_mapper.py │ │ ├── clean_links_mapper.py │ │ ├── expand_macro_mapper.py │ │ ├── fix_unicode_mapper.py │ │ ├── nlpaug_en_mapper.py │ │ ├── punctuation_normalization_mapper.py │ │ ├── remove_bibliography_mapper.py │ │ ├── remove_comments_mapper.py │ │ ├── remove_header_mapper.py │ │ ├── remove_long_words_mapper.py │ │ ├── remove_specific_chars_mapper.py │ │ ├── remove_table_text_mapper.py │ │ ├── remove_words_with_incorrect_substrings_mapper.py │ │ ├── sentence_split_mapper.py │ │ └── whitespace_normalization_mapper.py │ ├── op_fusion.py │ └── selector │ │ ├── __init__.py │ │ ├── frequency_specified_field_selector.py │ │ └── topk_specified_field_selector.py ├── tools │ └── __init__.py └── utils │ ├── __init__.py │ ├── asset_utils.py │ ├── cache_utils.py │ ├── ckpt_utils.py │ ├── compress.py │ ├── constant.py │ ├── file_utils.py │ ├── fingerprint_utils.py │ ├── logger_utils.py │ ├── mm_utils.py │ ├── model_utils.py │ └── registry.py ├── docs ├── DeveloperGuide.md ├── DeveloperGuide_ko.md ├── Operators.md ├── Operators_ko.md └── imgs │ ├── buri_heart.png │ ├── eval-01.png │ ├── eval-02.png │ └── streamlit_ex.png ├── environments ├── combined_requirements.txt ├── dev_requires.txt ├── minimal_requires.txt ├── preprocess_requires.txt ├── quality_classifier_requires.txt └── science_requires.txt ├── setup.cfg ├── setup.py ├── thirdparty ├── README.md ├── README_ZH.md ├── patch │ ├── helm.diff │ └── megatron.diff ├── setup_helm.sh └── setup_megatron.sh └── tools ├── __init__.py ├── analyze_data.py ├── converter ├── batch_convert.sh ├── convert_gpt_to_transformers.py └── modeling_megatron_llama.py ├── evaluator ├── README.md ├── lm_eval │ ├── __init__.py │ ├── base.py │ ├── datasets │ │ ├── README.md │ │ ├── __init__.py │ │ ├── asdiv │ │ │ ├── __init__.py │ │ │ ├── asdiv.py │ │ │ └── dataset_infos.json │ │ ├── bigbench_resources │ │ │ ├── causal_judgement.json │ │ │ ├── date_understanding.json │ │ │ ├── disambiguation_qa.json │ │ │ ├── dyck_languages.json │ │ │ ├── formal_fallacies_syllogisms_negation.json │ │ │ ├── geometric_shapes.json │ │ │ ├── hyperbaton.json │ │ │ ├── logical_deduction_five_objects.json │ │ │ ├── logical_deduction_seven_objects.json │ │ │ ├── logical_deduction_three_objects.json │ │ │ ├── movie_recommendation.json │ │ │ ├── navigate.json │ │ │ ├── reasoning_about_colored_objects.json │ │ │ ├── ruin_names.json │ │ │ ├── salient_translation_error_detection.json │ │ │ ├── snarks.json │ │ │ ├── sports_understanding.json │ │ │ ├── temporal_sequences.json │ │ │ ├── tracking_shuffled_objects_five_objects.json │ │ │ ├── tracking_shuffled_objects_seven_objects.json │ │ │ └── tracking_shuffled_objects_three_objects.json │ │ ├── coqa │ │ │ ├── __init__.py │ │ │ ├── coqa.py │ │ │ └── dataset_infos.json │ │ ├── drop │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── drop.py │ │ ├── headqa │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── headqa.py │ │ ├── hendrycks_ethics │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── hendrycks_ethics.py │ │ ├── hendrycks_math │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── hendrycks_math.py │ │ ├── kohatespeech │ │ │ └── kohatespeech.py │ │ ├── kold │ │ │ └── kold.py │ │ ├── kosbi │ │ │ └── kosbi.py │ │ ├── logiqa │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── logiqa.py │ │ ├── mutual │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── mutual.py │ │ ├── nsmc │ │ │ └── nsmc.py │ │ ├── pile │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── pile.py │ │ ├── quac │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── quac.py │ │ ├── sat_analogies │ │ │ ├── __init__.py │ │ │ └── sat_analogies.py │ │ ├── triviaqa │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── triviaqa.py │ │ └── unscramble │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── unscramble.py │ ├── decontamination │ │ ├── __init__.py │ │ ├── archiver.py │ │ ├── decontaminate.py │ │ └── janitor.py │ ├── evaluator.py │ ├── metrics.py │ ├── models │ │ ├── __init__.py │ │ ├── dummy.py │ │ ├── gpt2.py │ │ ├── gpt3.py │ │ ├── huggingface.py │ │ └── textsynth.py │ ├── tasks │ │ ├── __init__.py │ │ ├── anli.py │ │ ├── arc.py │ │ ├── arithmetic.py │ │ ├── asdiv.py │ │ ├── bigbench.py │ │ ├── blimp.py │ │ ├── cbt.py │ │ ├── coqa.py │ │ ├── crowspairs.py │ │ ├── drop.py │ │ ├── glue.py │ │ ├── gsm8k.py │ │ ├── headqa.py │ │ ├── hellaswag.py │ │ ├── hendrycks_ethics.py │ │ ├── hendrycks_math.py │ │ ├── hendrycks_test.py │ │ ├── json.py │ │ ├── klue.py │ │ ├── ko_translation.py │ │ ├── kobest.py │ │ ├── kohatespeech.py │ │ ├── kold.py │ │ ├── korquad.py │ │ ├── korunsmile.py │ │ ├── kosbi.py │ │ ├── kowikitable.py │ │ ├── lambada.py │ │ ├── lambada_cloze.py │ │ ├── lambada_multilingual.py │ │ ├── logiqa.py │ │ ├── mathqa.py │ │ ├── mc_taco.py │ │ ├── mgsm.py │ │ ├── mutual.py │ │ ├── naturalqs.py │ │ ├── nsmc.py │ │ ├── openbookqa.py │ │ ├── pawsx.py │ │ ├── pile.py │ │ ├── piqa.py │ │ ├── prost.py │ │ ├── pubmedqa.py │ │ ├── qa4mre.py │ │ ├── qasper.py │ │ ├── quac.py │ │ ├── race.py │ │ ├── sat.py │ │ ├── sciq.py │ │ ├── squad.py │ │ ├── storycloze.py │ │ ├── superglue.py │ │ ├── swag.py │ │ ├── toxigen.py │ │ ├── translation.py │ │ ├── triviaqa.py │ │ ├── truthfulqa.py │ │ ├── unscramble.py │ │ ├── webqs.py │ │ ├── wikitext.py │ │ ├── winogrande.py │ │ ├── wsc273.py │ │ ├── xcopa.py │ │ ├── xnli.py │ │ ├── xstorycloze.py │ │ └── xwinograd.py │ └── utils.py ├── main.py └── run.sh ├── finetuning ├── README.md ├── config │ ├── mistral_7b_lora_dpo.yaml │ └── mistral_7b_lora_sft.yaml ├── data_loaders │ ├── __init__.py │ ├── dpo.py │ ├── load_hf_instruction_data.py │ ├── sft.py │ └── utils.py ├── dpo_lora.py ├── models │ └── load_model.py ├── requirements.txt ├── scripts │ ├── run_mistral_lora_dpo.sh │ └── run_mistral_lora_sft.sh ├── sft_lora.py └── trainer │ ├── __init__.py │ ├── base.py │ ├── dpo.py │ └── lora.py ├── postprocess ├── README.md ├── README_ko.md ├── count_token.py ├── data_mixture.py └── deserialize_meta.py ├── preprocess ├── README.md ├── README_ko.md ├── dataset_split_by_language.py ├── raw_alpaca_cot_merge_add_meta.py ├── raw_arxiv_to_jsonl.py ├── raw_stackexchange_to_jsonl.py ├── reformat_csv_nan_value.py ├── reformat_jsonl_nan_value.py └── serialize_meta.py ├── process_data.py └── quality_classifier ├── README.md ├── README_ko.md ├── eval.py ├── predict.py ├── qc_utils.py └── train.py /.github/ISSUE_TEMPLATE/1-bug-report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/.github/ISSUE_TEMPLATE/1-bug-report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/2-feature_request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/.github/ISSUE_TEMPLATE/2-feature_request.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/3-documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/.github/ISSUE_TEMPLATE/3-documentation.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/.github/ISSUE_TEMPLATE/config.yml -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/.github/pull_request_template.md -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/README.md -------------------------------------------------------------------------------- /README_ko.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/README_ko.md -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/app.py -------------------------------------------------------------------------------- /assets/flagged_words.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/assets/flagged_words.json -------------------------------------------------------------------------------- /assets/stopwords.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/assets/stopwords.json -------------------------------------------------------------------------------- /configs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/configs/README.md -------------------------------------------------------------------------------- /configs/README_ko.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/configs/README_ko.md -------------------------------------------------------------------------------- /configs/analyser.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/configs/analyser.yaml -------------------------------------------------------------------------------- /configs/config_all.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/configs/config_all.yaml -------------------------------------------------------------------------------- /configs/ko_process.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/configs/ko_process.yaml -------------------------------------------------------------------------------- /configs/process.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/configs/process.yaml -------------------------------------------------------------------------------- /data/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data/test.json -------------------------------------------------------------------------------- /data/test_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data/test_data.json -------------------------------------------------------------------------------- /data_modori/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.5' -------------------------------------------------------------------------------- /data_modori/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/analysis/__init__.py -------------------------------------------------------------------------------- /data_modori/analysis/column_wise_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/analysis/column_wise_analysis.py -------------------------------------------------------------------------------- /data_modori/analysis/diversity_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/analysis/diversity_analysis.py -------------------------------------------------------------------------------- /data_modori/analysis/overall_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/analysis/overall_analysis.py -------------------------------------------------------------------------------- /data_modori/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import * # noqa: F401,F403 2 | -------------------------------------------------------------------------------- /data_modori/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/config/config.py -------------------------------------------------------------------------------- /data_modori/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/core/__init__.py -------------------------------------------------------------------------------- /data_modori/core/analyser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/core/analyser.py -------------------------------------------------------------------------------- /data_modori/core/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/core/data.py -------------------------------------------------------------------------------- /data_modori/core/executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/core/executor.py -------------------------------------------------------------------------------- /data_modori/core/exporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/core/exporter.py -------------------------------------------------------------------------------- /data_modori/core/ray_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/core/ray_executor.py -------------------------------------------------------------------------------- /data_modori/core/tracer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/core/tracer.py -------------------------------------------------------------------------------- /data_modori/format/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/format/__init__.py -------------------------------------------------------------------------------- /data_modori/format/csv_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/format/csv_formatter.py -------------------------------------------------------------------------------- /data_modori/format/formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/format/formatter.py -------------------------------------------------------------------------------- /data_modori/format/json_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/format/json_formatter.py -------------------------------------------------------------------------------- /data_modori/format/load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/format/load.py -------------------------------------------------------------------------------- /data_modori/format/mixture_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/format/mixture_formatter.py -------------------------------------------------------------------------------- /data_modori/format/parquet_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/format/parquet_formatter.py -------------------------------------------------------------------------------- /data_modori/format/text_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/format/text_formatter.py -------------------------------------------------------------------------------- /data_modori/format/tsv_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/format/tsv_formatter.py -------------------------------------------------------------------------------- /data_modori/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/__init__.py -------------------------------------------------------------------------------- /data_modori/ops/base_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/base_op.py -------------------------------------------------------------------------------- /data_modori/ops/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/common/__init__.py -------------------------------------------------------------------------------- /data_modori/ops/common/aws_s3_downloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/common/aws_s3_downloader.py -------------------------------------------------------------------------------- /data_modori/ops/common/helper_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/common/helper_func.py -------------------------------------------------------------------------------- /data_modori/ops/common/special_characters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/common/special_characters.py -------------------------------------------------------------------------------- /data_modori/ops/deduplicator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/deduplicator/__init__.py -------------------------------------------------------------------------------- /data_modori/ops/deduplicator/document_deduplicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/deduplicator/document_deduplicator.py -------------------------------------------------------------------------------- /data_modori/ops/deduplicator/document_minhash_deduplicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/deduplicator/document_minhash_deduplicator.py -------------------------------------------------------------------------------- /data_modori/ops/deduplicator/document_simhash_deduplicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/deduplicator/document_simhash_deduplicator.py -------------------------------------------------------------------------------- /data_modori/ops/filter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/__init__.py -------------------------------------------------------------------------------- /data_modori/ops/filter/alphanumeric_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/alphanumeric_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/average_line_length_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/average_line_length_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/character_repetition_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/character_repetition_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/flagged_words_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/flagged_words_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/language_id_score_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/language_id_score_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/maximum_line_length_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/maximum_line_length_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/perplexity_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/perplexity_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/special_characters_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/special_characters_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/specified_field_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/specified_field_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/specified_numeric_field_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/specified_numeric_field_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/stopwords_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/stopwords_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/suffix_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/suffix_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/text_length_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/text_length_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/token_num_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/token_num_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/word_num_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/word_num_filter.py -------------------------------------------------------------------------------- /data_modori/ops/filter/word_repetition_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/filter/word_repetition_filter.py -------------------------------------------------------------------------------- /data_modori/ops/load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/load.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/__init__.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/clean_copyright_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/clean_copyright_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/clean_email_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/clean_email_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/clean_html_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/clean_html_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/clean_ip_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/clean_ip_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/clean_links_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/clean_links_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/expand_macro_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/expand_macro_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/fix_unicode_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/fix_unicode_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/nlpaug_en_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/nlpaug_en_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/punctuation_normalization_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/punctuation_normalization_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/remove_bibliography_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/remove_bibliography_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/remove_comments_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/remove_comments_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/remove_header_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/remove_header_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/remove_long_words_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/remove_long_words_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/remove_specific_chars_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/remove_specific_chars_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/remove_table_text_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/remove_table_text_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/remove_words_with_incorrect_substrings_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/remove_words_with_incorrect_substrings_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/sentence_split_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/sentence_split_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/mapper/whitespace_normalization_mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/mapper/whitespace_normalization_mapper.py -------------------------------------------------------------------------------- /data_modori/ops/op_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/op_fusion.py -------------------------------------------------------------------------------- /data_modori/ops/selector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/selector/__init__.py -------------------------------------------------------------------------------- /data_modori/ops/selector/frequency_specified_field_selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/selector/frequency_specified_field_selector.py -------------------------------------------------------------------------------- /data_modori/ops/selector/topk_specified_field_selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/ops/selector/topk_specified_field_selector.py -------------------------------------------------------------------------------- /data_modori/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/tools/__init__.py -------------------------------------------------------------------------------- /data_modori/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_modori/utils/asset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/asset_utils.py -------------------------------------------------------------------------------- /data_modori/utils/cache_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/cache_utils.py -------------------------------------------------------------------------------- /data_modori/utils/ckpt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/ckpt_utils.py -------------------------------------------------------------------------------- /data_modori/utils/compress.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/compress.py -------------------------------------------------------------------------------- /data_modori/utils/constant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/constant.py -------------------------------------------------------------------------------- /data_modori/utils/file_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/file_utils.py -------------------------------------------------------------------------------- /data_modori/utils/fingerprint_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/fingerprint_utils.py -------------------------------------------------------------------------------- /data_modori/utils/logger_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/logger_utils.py -------------------------------------------------------------------------------- /data_modori/utils/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/mm_utils.py -------------------------------------------------------------------------------- /data_modori/utils/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/model_utils.py -------------------------------------------------------------------------------- /data_modori/utils/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/data_modori/utils/registry.py -------------------------------------------------------------------------------- /docs/DeveloperGuide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/docs/DeveloperGuide.md -------------------------------------------------------------------------------- /docs/DeveloperGuide_ko.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/docs/DeveloperGuide_ko.md -------------------------------------------------------------------------------- /docs/Operators.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/docs/Operators.md -------------------------------------------------------------------------------- /docs/Operators_ko.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/docs/Operators_ko.md -------------------------------------------------------------------------------- /docs/imgs/buri_heart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/docs/imgs/buri_heart.png -------------------------------------------------------------------------------- /docs/imgs/eval-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/docs/imgs/eval-01.png -------------------------------------------------------------------------------- /docs/imgs/eval-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/docs/imgs/eval-02.png -------------------------------------------------------------------------------- /docs/imgs/streamlit_ex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/docs/imgs/streamlit_ex.png -------------------------------------------------------------------------------- /environments/combined_requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/environments/combined_requirements.txt -------------------------------------------------------------------------------- /environments/dev_requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/environments/dev_requires.txt -------------------------------------------------------------------------------- /environments/minimal_requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/environments/minimal_requires.txt -------------------------------------------------------------------------------- /environments/preprocess_requires.txt: -------------------------------------------------------------------------------- 1 | fire 2 | jsonlines 3 | -------------------------------------------------------------------------------- /environments/quality_classifier_requires.txt: -------------------------------------------------------------------------------- 1 | pyspark 2 | fire 3 | wget 4 | -------------------------------------------------------------------------------- /environments/science_requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/environments/science_requires.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | per-file-ignores = 3 | */__init__.py: F401 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/setup.py -------------------------------------------------------------------------------- /thirdparty/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/thirdparty/README.md -------------------------------------------------------------------------------- /thirdparty/README_ZH.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/thirdparty/README_ZH.md -------------------------------------------------------------------------------- /thirdparty/patch/helm.diff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/thirdparty/patch/helm.diff -------------------------------------------------------------------------------- /thirdparty/patch/megatron.diff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/thirdparty/patch/megatron.diff -------------------------------------------------------------------------------- /thirdparty/setup_helm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/thirdparty/setup_helm.sh -------------------------------------------------------------------------------- /thirdparty/setup_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/thirdparty/setup_megatron.sh -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/analyze_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/analyze_data.py -------------------------------------------------------------------------------- /tools/converter/batch_convert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/converter/batch_convert.sh -------------------------------------------------------------------------------- /tools/converter/convert_gpt_to_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/converter/convert_gpt_to_transformers.py -------------------------------------------------------------------------------- /tools/converter/modeling_megatron_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/converter/modeling_megatron_llama.py -------------------------------------------------------------------------------- /tools/evaluator/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/README.md -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/base.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/README.md -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/asdiv/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/asdiv/asdiv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/asdiv/asdiv.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/asdiv/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/asdiv/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/causal_judgement.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/causal_judgement.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/date_understanding.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/date_understanding.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/disambiguation_qa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/disambiguation_qa.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/dyck_languages.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/dyck_languages.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/formal_fallacies_syllogisms_negation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/formal_fallacies_syllogisms_negation.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/geometric_shapes.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/geometric_shapes.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/hyperbaton.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/hyperbaton.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/logical_deduction_five_objects.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/logical_deduction_five_objects.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/logical_deduction_seven_objects.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/logical_deduction_seven_objects.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/logical_deduction_three_objects.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/logical_deduction_three_objects.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/movie_recommendation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/movie_recommendation.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/navigate.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/navigate.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/reasoning_about_colored_objects.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/reasoning_about_colored_objects.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/ruin_names.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/ruin_names.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/salient_translation_error_detection.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/salient_translation_error_detection.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/snarks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/snarks.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/sports_understanding.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/sports_understanding.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/temporal_sequences.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/temporal_sequences.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_five_objects.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_five_objects.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_seven_objects.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_seven_objects.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_three_objects.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_three_objects.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/coqa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/coqa/coqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/coqa/coqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/coqa/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/coqa/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/drop/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/drop/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/drop/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/drop/drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/drop/drop.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/headqa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/headqa/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/headqa/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/headqa/headqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/headqa/headqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/hendrycks_ethics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/hendrycks_ethics/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/hendrycks_ethics/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/hendrycks_ethics/hendrycks_ethics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/hendrycks_ethics/hendrycks_ethics.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/hendrycks_math/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/hendrycks_math/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/hendrycks_math/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/hendrycks_math/hendrycks_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/hendrycks_math/hendrycks_math.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/kohatespeech/kohatespeech.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/kohatespeech/kohatespeech.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/kold/kold.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/kold/kold.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/kosbi/kosbi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/kosbi/kosbi.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/logiqa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/logiqa/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/logiqa/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/logiqa/logiqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/logiqa/logiqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/mutual/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/mutual/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/mutual/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/mutual/mutual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/mutual/mutual.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/nsmc/nsmc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/nsmc/nsmc.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/pile/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/pile/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/pile/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/pile/pile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/pile/pile.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/quac/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/quac/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/quac/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/quac/quac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/quac/quac.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/sat_analogies/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/sat_analogies/sat_analogies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/sat_analogies/sat_analogies.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/triviaqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/triviaqa/README.md -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/triviaqa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/triviaqa/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/triviaqa/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/triviaqa/triviaqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/triviaqa/triviaqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/unscramble/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/unscramble/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/unscramble/dataset_infos.json -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/datasets/unscramble/unscramble.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/datasets/unscramble/unscramble.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/decontamination/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/decontamination/archiver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/decontamination/archiver.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/decontamination/decontaminate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/decontamination/decontaminate.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/decontamination/janitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/decontamination/janitor.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/evaluator.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/metrics.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/models/__init__.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/models/dummy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/models/dummy.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/models/gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/models/gpt2.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/models/gpt3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/models/gpt3.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/models/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/models/huggingface.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/models/textsynth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/models/textsynth.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/__init__.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/anli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/anli.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/arc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/arc.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/arithmetic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/arithmetic.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/asdiv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/asdiv.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/bigbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/bigbench.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/blimp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/blimp.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/cbt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/cbt.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/coqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/coqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/crowspairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/crowspairs.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/drop.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/glue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/glue.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/gsm8k.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/headqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/headqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/hellaswag.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/hendrycks_ethics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/hendrycks_ethics.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/hendrycks_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/hendrycks_math.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/hendrycks_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/hendrycks_test.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/json.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/klue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/klue.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/ko_translation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/ko_translation.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/kobest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/kobest.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/kohatespeech.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/kohatespeech.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/kold.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/kold.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/korquad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/korquad.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/korunsmile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/korunsmile.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/kosbi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/kosbi.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/kowikitable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/kowikitable.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/lambada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/lambada.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/lambada_cloze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/lambada_cloze.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/lambada_multilingual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/lambada_multilingual.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/logiqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/logiqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/mathqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/mathqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/mc_taco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/mc_taco.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/mgsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/mgsm.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/mutual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/mutual.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/naturalqs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/naturalqs.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/nsmc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/nsmc.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/openbookqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/openbookqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/pawsx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/pawsx.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/pile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/pile.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/piqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/piqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/prost.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/prost.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/pubmedqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/pubmedqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/qa4mre.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/qa4mre.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/qasper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/qasper.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/quac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/quac.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/race.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/race.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/sat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/sat.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/sciq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/sciq.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/squad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/squad.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/storycloze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/storycloze.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/superglue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/superglue.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/swag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/swag.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/toxigen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/toxigen.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/translation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/translation.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/triviaqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/triviaqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/truthfulqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/truthfulqa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/unscramble.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/unscramble.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/webqs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/webqs.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/wikitext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/wikitext.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/winogrande.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/winogrande.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/wsc273.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/wsc273.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/xcopa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/xcopa.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/xnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/xnli.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/xstorycloze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/xstorycloze.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/tasks/xwinograd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/tasks/xwinograd.py -------------------------------------------------------------------------------- /tools/evaluator/lm_eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/lm_eval/utils.py -------------------------------------------------------------------------------- /tools/evaluator/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/main.py -------------------------------------------------------------------------------- /tools/evaluator/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/evaluator/run.sh -------------------------------------------------------------------------------- /tools/finetuning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/README.md -------------------------------------------------------------------------------- /tools/finetuning/config/mistral_7b_lora_dpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/config/mistral_7b_lora_dpo.yaml -------------------------------------------------------------------------------- /tools/finetuning/config/mistral_7b_lora_sft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/config/mistral_7b_lora_sft.yaml -------------------------------------------------------------------------------- /tools/finetuning/data_loaders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/data_loaders/__init__.py -------------------------------------------------------------------------------- /tools/finetuning/data_loaders/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/data_loaders/dpo.py -------------------------------------------------------------------------------- /tools/finetuning/data_loaders/load_hf_instruction_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/data_loaders/load_hf_instruction_data.py -------------------------------------------------------------------------------- /tools/finetuning/data_loaders/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/data_loaders/sft.py -------------------------------------------------------------------------------- /tools/finetuning/data_loaders/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/data_loaders/utils.py -------------------------------------------------------------------------------- /tools/finetuning/dpo_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/dpo_lora.py -------------------------------------------------------------------------------- /tools/finetuning/models/load_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/models/load_model.py -------------------------------------------------------------------------------- /tools/finetuning/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/requirements.txt -------------------------------------------------------------------------------- /tools/finetuning/scripts/run_mistral_lora_dpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/scripts/run_mistral_lora_dpo.sh -------------------------------------------------------------------------------- /tools/finetuning/scripts/run_mistral_lora_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/scripts/run_mistral_lora_sft.sh -------------------------------------------------------------------------------- /tools/finetuning/sft_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/sft_lora.py -------------------------------------------------------------------------------- /tools/finetuning/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/trainer/__init__.py -------------------------------------------------------------------------------- /tools/finetuning/trainer/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/trainer/base.py -------------------------------------------------------------------------------- /tools/finetuning/trainer/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/trainer/dpo.py -------------------------------------------------------------------------------- /tools/finetuning/trainer/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/finetuning/trainer/lora.py -------------------------------------------------------------------------------- /tools/postprocess/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/postprocess/README.md -------------------------------------------------------------------------------- /tools/postprocess/README_ko.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/postprocess/README_ko.md -------------------------------------------------------------------------------- /tools/postprocess/count_token.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/postprocess/count_token.py -------------------------------------------------------------------------------- /tools/postprocess/data_mixture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/postprocess/data_mixture.py -------------------------------------------------------------------------------- /tools/postprocess/deserialize_meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/postprocess/deserialize_meta.py -------------------------------------------------------------------------------- /tools/preprocess/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/preprocess/README.md -------------------------------------------------------------------------------- /tools/preprocess/README_ko.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/preprocess/README_ko.md -------------------------------------------------------------------------------- /tools/preprocess/dataset_split_by_language.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/preprocess/dataset_split_by_language.py -------------------------------------------------------------------------------- /tools/preprocess/raw_alpaca_cot_merge_add_meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/preprocess/raw_alpaca_cot_merge_add_meta.py -------------------------------------------------------------------------------- /tools/preprocess/raw_arxiv_to_jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/preprocess/raw_arxiv_to_jsonl.py -------------------------------------------------------------------------------- /tools/preprocess/raw_stackexchange_to_jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/preprocess/raw_stackexchange_to_jsonl.py -------------------------------------------------------------------------------- /tools/preprocess/reformat_csv_nan_value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/preprocess/reformat_csv_nan_value.py -------------------------------------------------------------------------------- /tools/preprocess/reformat_jsonl_nan_value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/preprocess/reformat_jsonl_nan_value.py -------------------------------------------------------------------------------- /tools/preprocess/serialize_meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/preprocess/serialize_meta.py -------------------------------------------------------------------------------- /tools/process_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/process_data.py -------------------------------------------------------------------------------- /tools/quality_classifier/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/quality_classifier/README.md -------------------------------------------------------------------------------- /tools/quality_classifier/README_ko.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/quality_classifier/README_ko.md -------------------------------------------------------------------------------- /tools/quality_classifier/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/quality_classifier/eval.py -------------------------------------------------------------------------------- /tools/quality_classifier/predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/quality_classifier/predict.py -------------------------------------------------------------------------------- /tools/quality_classifier/qc_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/quality_classifier/qc_utils.py -------------------------------------------------------------------------------- /tools/quality_classifier/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teamreboott/data-modori/HEAD/tools/quality_classifier/train.py --------------------------------------------------------------------------------