├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── __init__.py ├── configs ├── coh_metrix_misc.json ├── default.json ├── llm_pretrain_filters.json ├── winobias.json └── word_based.json ├── data ├── README.md └── connectives │ ├── all_additive.txt │ ├── all_causal.txt │ ├── all_connective.txt │ ├── all_logical.txt │ ├── all_negative.txt │ ├── all_positive.txt │ ├── all_temporal.txt │ ├── positive_causal.txt │ ├── positive_intentional.txt │ └── positive_logical.txt ├── examples ├── coref_bias │ ├── Demo.ipynb │ ├── README.md │ ├── winobias_text_features_anti.jsonl │ └── winobias_text_features_pro.jsonl ├── hellaswag_sample_hardness │ ├── Demo.ipynb │ ├── hellaswag_opt6.7B_fs0_eval.outcomes.jsonl │ ├── hellaswag_opt6.7B_fs0_eval.texts.jsonl │ └── hellaswag_prompt_characteristics.tsv └── translation │ ├── Demo.ipynb │ ├── README.md │ ├── config.yaml │ ├── data │ ├── nllb_no_segmentation │ │ ├── .hydra │ │ │ └── config.yaml │ │ ├── translation_outcomes.jsonl │ │ └── translation_text_features.jsonl │ ├── nllb_segmentation │ │ ├── .hydra │ │ │ └── config.yaml │ │ ├── translation_outcomes.jsonl │ │ └── translation_text_features.jsonl │ └── translation_text_characteristics.tsv │ └── generate_data_and_metrics.py ├── scripts ├── convert_xlsx_resources.py ├── download_resources.sh └── parse_mrc_dct.py ├── setup.py ├── test ├── check_for_metric_changes.sh ├── ni_instructions.jsonl ├── ni_instructions_characteristics.csv └── repetition_factor │ ├── __init__.py │ ├── sample_results.txt │ ├── samples.txt │ └── test_repetition_fraction_calc.py ├── text_characterization ├── __init__.py ├── analysis.py ├── metrics.py ├── parser_backends.py └── utils.py └── tools ├── __init__.py ├── compute.py ├── diff_metrics.py ├── print_metric_descriptions.py └── text_extraction ├── natural_instructions.py └── winobias.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/.gitignore -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/README.md -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /configs/coh_metrix_misc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/configs/coh_metrix_misc.json -------------------------------------------------------------------------------- /configs/default.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/configs/default.json -------------------------------------------------------------------------------- /configs/llm_pretrain_filters.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/configs/llm_pretrain_filters.json -------------------------------------------------------------------------------- /configs/winobias.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/configs/winobias.json -------------------------------------------------------------------------------- /configs/word_based.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/configs/word_based.json -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/README.md -------------------------------------------------------------------------------- /data/connectives/all_additive.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/connectives/all_additive.txt -------------------------------------------------------------------------------- /data/connectives/all_causal.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/connectives/all_causal.txt -------------------------------------------------------------------------------- /data/connectives/all_connective.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/connectives/all_connective.txt -------------------------------------------------------------------------------- /data/connectives/all_logical.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/connectives/all_logical.txt -------------------------------------------------------------------------------- /data/connectives/all_negative.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/connectives/all_negative.txt -------------------------------------------------------------------------------- /data/connectives/all_positive.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/connectives/all_positive.txt -------------------------------------------------------------------------------- /data/connectives/all_temporal.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/connectives/all_temporal.txt -------------------------------------------------------------------------------- /data/connectives/positive_causal.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/connectives/positive_causal.txt -------------------------------------------------------------------------------- /data/connectives/positive_intentional.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/connectives/positive_intentional.txt -------------------------------------------------------------------------------- /data/connectives/positive_logical.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/data/connectives/positive_logical.txt -------------------------------------------------------------------------------- /examples/coref_bias/Demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/coref_bias/Demo.ipynb -------------------------------------------------------------------------------- /examples/coref_bias/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/coref_bias/README.md -------------------------------------------------------------------------------- /examples/coref_bias/winobias_text_features_anti.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/coref_bias/winobias_text_features_anti.jsonl -------------------------------------------------------------------------------- /examples/coref_bias/winobias_text_features_pro.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/coref_bias/winobias_text_features_pro.jsonl -------------------------------------------------------------------------------- /examples/hellaswag_sample_hardness/Demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/hellaswag_sample_hardness/Demo.ipynb -------------------------------------------------------------------------------- /examples/hellaswag_sample_hardness/hellaswag_opt6.7B_fs0_eval.outcomes.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/hellaswag_sample_hardness/hellaswag_opt6.7B_fs0_eval.outcomes.jsonl -------------------------------------------------------------------------------- /examples/hellaswag_sample_hardness/hellaswag_opt6.7B_fs0_eval.texts.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/hellaswag_sample_hardness/hellaswag_opt6.7B_fs0_eval.texts.jsonl -------------------------------------------------------------------------------- /examples/hellaswag_sample_hardness/hellaswag_prompt_characteristics.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/hellaswag_sample_hardness/hellaswag_prompt_characteristics.tsv -------------------------------------------------------------------------------- /examples/translation/Demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/Demo.ipynb -------------------------------------------------------------------------------- /examples/translation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/README.md -------------------------------------------------------------------------------- /examples/translation/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/config.yaml -------------------------------------------------------------------------------- /examples/translation/data/nllb_no_segmentation/.hydra/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/data/nllb_no_segmentation/.hydra/config.yaml -------------------------------------------------------------------------------- /examples/translation/data/nllb_no_segmentation/translation_outcomes.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/data/nllb_no_segmentation/translation_outcomes.jsonl -------------------------------------------------------------------------------- /examples/translation/data/nllb_no_segmentation/translation_text_features.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/data/nllb_no_segmentation/translation_text_features.jsonl -------------------------------------------------------------------------------- /examples/translation/data/nllb_segmentation/.hydra/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/data/nllb_segmentation/.hydra/config.yaml -------------------------------------------------------------------------------- /examples/translation/data/nllb_segmentation/translation_outcomes.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/data/nllb_segmentation/translation_outcomes.jsonl -------------------------------------------------------------------------------- /examples/translation/data/nllb_segmentation/translation_text_features.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/data/nllb_segmentation/translation_text_features.jsonl -------------------------------------------------------------------------------- /examples/translation/data/translation_text_characteristics.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/data/translation_text_characteristics.tsv -------------------------------------------------------------------------------- /examples/translation/generate_data_and_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/examples/translation/generate_data_and_metrics.py -------------------------------------------------------------------------------- /scripts/convert_xlsx_resources.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/scripts/convert_xlsx_resources.py -------------------------------------------------------------------------------- /scripts/download_resources.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/scripts/download_resources.sh -------------------------------------------------------------------------------- /scripts/parse_mrc_dct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/scripts/parse_mrc_dct.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/setup.py -------------------------------------------------------------------------------- /test/check_for_metric_changes.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/test/check_for_metric_changes.sh -------------------------------------------------------------------------------- /test/ni_instructions.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/test/ni_instructions.jsonl -------------------------------------------------------------------------------- /test/ni_instructions_characteristics.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/test/ni_instructions_characteristics.csv -------------------------------------------------------------------------------- /test/repetition_factor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/repetition_factor/sample_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/test/repetition_factor/sample_results.txt -------------------------------------------------------------------------------- /test/repetition_factor/samples.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/test/repetition_factor/samples.txt -------------------------------------------------------------------------------- /test/repetition_factor/test_repetition_fraction_calc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/test/repetition_factor/test_repetition_fraction_calc.py -------------------------------------------------------------------------------- /text_characterization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /text_characterization/analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/text_characterization/analysis.py -------------------------------------------------------------------------------- /text_characterization/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/text_characterization/metrics.py -------------------------------------------------------------------------------- /text_characterization/parser_backends.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/text_characterization/parser_backends.py -------------------------------------------------------------------------------- /text_characterization/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/text_characterization/utils.py -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/compute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/tools/compute.py -------------------------------------------------------------------------------- /tools/diff_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/tools/diff_metrics.py -------------------------------------------------------------------------------- /tools/print_metric_descriptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/tools/print_metric_descriptions.py -------------------------------------------------------------------------------- /tools/text_extraction/natural_instructions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/tools/text_extraction/natural_instructions.py -------------------------------------------------------------------------------- /tools/text_extraction/winobias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/text_characterization_toolkit/HEAD/tools/text_extraction/winobias.py --------------------------------------------------------------------------------