├── .gitattributes ├── .github └── workflows │ ├── ci.yml │ └── pypi.yml ├── .gitignore ├── LICENSE ├── README.md ├── benchmarking ├── benchmark-analysis.ipynb ├── config.yaml ├── loss_function_benchmark.py └── runtime_sanity_check.py ├── config ├── config.yaml ├── dataset_args │ ├── arithmetic.yaml │ ├── debug.yaml │ ├── gsm8k.yaml │ ├── mathematics_dataset.yaml │ ├── multiplication.yaml │ ├── multirc.yaml │ └── rjokes.yaml ├── hydra_config │ └── config.yaml ├── model_args │ ├── rt.yaml │ ├── rt_ntl.yaml │ ├── vanilla_t5.yaml │ ├── vanilla_t5_custom_tokenizer.yaml │ ├── vanilla_t5_ntl.yaml │ ├── vanilla_t5_ntl_default_tokenizer.yaml │ ├── vanilla_t5_regression_head.yaml │ └── xval.yaml ├── run_specific_config │ ├── config.yaml │ ├── debug_config.yaml │ ├── gm8k_runs.yaml │ ├── mathematics_dataset_run.yaml │ ├── t5_big.yaml │ └── training_with_warmup.yaml └── training_args │ ├── eval.yaml │ └── train.yaml ├── data ├── cnn_dataset │ └── generate_data.py ├── grade-school-math │ ├── LICENSE │ ├── README.md │ ├── grade_school_math │ │ ├── calculator.py │ │ ├── dataset.py │ │ ├── img │ │ │ └── example_problems.png │ │ ├── prepocess_data.py │ │ ├── sample.py │ │ ├── train.py │ │ └── view_model_solutions.py │ └── setup.py ├── mathematics_dataset-v1.0 │ ├── arith_create_splits.py │ └── create_data_splits.py ├── multirc │ ├── data │ │ └── preprocessed │ │ │ └── .gitkeep │ └── preprocess_data.py └── rjokes-dataset │ ├── data │ └── .gitkeep │ └── generate_dataset.py ├── docs ├── assets │ ├── ICML-head.svg │ ├── ICML-logo.svg │ ├── loss_comparison_v4.svg │ ├── ntl_v5.svg │ ├── pypi.png │ └── streamlit.png ├── index.html └── styles.css ├── pyproject.toml ├── requirements.txt ├── resources ├── avg_number_token_distribution.png ├── figure_sample_efficiency.pdf ├── last_digit_vs_distance_histogram.png ├── multiplication_label.pdf ├── neurips_mathai_poster.pdf └── ntl-image.jpg ├── scripts ├── llama.py ├── loss_integration.ipynb └── test_dist_loss.ipynb ├── src └── ntl │ ├── __init__.py │ ├── args.py │ ├── collators │ ├── __init__.py │ ├── question_answer_clm │ │ ├── __init__.py │ │ ├── vanilla_question_answer_collator.py │ │ └── xval_question_answer_collator.py │ └── question_answer_mlm │ │ ├── __init__.py │ │ ├── regression_head_question_answer_collator.py │ │ ├── vanilla_mlm_question_answer_collator.py │ │ └── xval_mask_question_collator.py │ ├── data │ ├── __init__.py │ ├── data.py │ └── regression_transformer_number_tokens.txt │ ├── encoding_decoding │ ├── __init__.py │ └── numerical_encodings.py │ ├── evaluation.py │ ├── loss_functions │ ├── __init__.py │ ├── abs_diff_number_token_loss.py │ ├── base_number_token_loss.py │ ├── number_token_loss.py │ └── wasserstein_distance_number_token_loss.py │ ├── metrics │ ├── rouge.py │ └── sacrebleu.py │ ├── results_sorting.py │ ├── run_language_modeling.py │ ├── tokenizer │ ├── __init__.py │ ├── abstract_tokenizer.py │ ├── auto_number_tokenizer.py │ ├── rt_tokenizer.py │ ├── t5custom_tokenizer.py │ └── xval_tokenizer.py │ ├── trainer.py │ ├── transformer_backbone │ ├── __init__.py │ └── t5 │ │ ├── __init__.py │ │ ├── t5_rt.py │ │ ├── t5_vanilla_for_number_token_loss.py │ │ └── t5_xval.py │ ├── utils │ ├── __init__.py │ ├── helper_functionality.py │ ├── label_smoother.py │ ├── number_token_selector.py │ └── numerical_operations.py │ └── xval │ ├── __init__.py │ ├── eval.py │ ├── numformer.py │ ├── train.py │ └── xval_mask_question_collator.py └── tests ├── encoding_decoding ├── __init__.py └── test_numerical_encodings.py ├── loss_functions ├── __init__.py ├── test_abs_diff_number_token_loss.py ├── test_number_token_loss.py └── test_wasserstein_number_token_loss.py ├── test_evaluation.py ├── test_number_head.py ├── test_run_language_modeling.py └── tokenizer ├── __init__.py ├── test_rt_tokenizer.py ├── test_t5custom_tokenizer.py └── test_xval_tokenizer.py /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/.gitattributes -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/.github/workflows/pypi.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/README.md -------------------------------------------------------------------------------- /benchmarking/benchmark-analysis.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/benchmarking/benchmark-analysis.ipynb -------------------------------------------------------------------------------- /benchmarking/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/benchmarking/config.yaml -------------------------------------------------------------------------------- /benchmarking/loss_function_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/benchmarking/loss_function_benchmark.py -------------------------------------------------------------------------------- /benchmarking/runtime_sanity_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/benchmarking/runtime_sanity_check.py -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/config.yaml -------------------------------------------------------------------------------- /config/dataset_args/arithmetic.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: arithmetic -------------------------------------------------------------------------------- /config/dataset_args/debug.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: debug -------------------------------------------------------------------------------- /config/dataset_args/gsm8k.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: gsm8k 2 | train_with_augmented_data: false -------------------------------------------------------------------------------- /config/dataset_args/mathematics_dataset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/dataset_args/mathematics_dataset.yaml -------------------------------------------------------------------------------- /config/dataset_args/multiplication.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: multiplication -------------------------------------------------------------------------------- /config/dataset_args/multirc.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: multirc 2 | compute_number_metrics: false -------------------------------------------------------------------------------- /config/dataset_args/rjokes.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: rjokes -------------------------------------------------------------------------------- /config/hydra_config/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/hydra_config/config.yaml -------------------------------------------------------------------------------- /config/model_args/rt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/model_args/rt.yaml -------------------------------------------------------------------------------- /config/model_args/rt_ntl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/model_args/rt_ntl.yaml -------------------------------------------------------------------------------- /config/model_args/vanilla_t5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/model_args/vanilla_t5.yaml -------------------------------------------------------------------------------- /config/model_args/vanilla_t5_custom_tokenizer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/model_args/vanilla_t5_custom_tokenizer.yaml -------------------------------------------------------------------------------- /config/model_args/vanilla_t5_ntl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/model_args/vanilla_t5_ntl.yaml -------------------------------------------------------------------------------- /config/model_args/vanilla_t5_ntl_default_tokenizer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/model_args/vanilla_t5_ntl_default_tokenizer.yaml -------------------------------------------------------------------------------- /config/model_args/vanilla_t5_regression_head.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/model_args/vanilla_t5_regression_head.yaml -------------------------------------------------------------------------------- /config/model_args/xval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/model_args/xval.yaml -------------------------------------------------------------------------------- /config/run_specific_config/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/run_specific_config/config.yaml -------------------------------------------------------------------------------- /config/run_specific_config/debug_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/run_specific_config/debug_config.yaml -------------------------------------------------------------------------------- /config/run_specific_config/gm8k_runs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/run_specific_config/gm8k_runs.yaml -------------------------------------------------------------------------------- /config/run_specific_config/mathematics_dataset_run.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/run_specific_config/mathematics_dataset_run.yaml -------------------------------------------------------------------------------- /config/run_specific_config/t5_big.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/run_specific_config/t5_big.yaml -------------------------------------------------------------------------------- /config/run_specific_config/training_with_warmup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/run_specific_config/training_with_warmup.yaml -------------------------------------------------------------------------------- /config/training_args/eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/training_args/eval.yaml -------------------------------------------------------------------------------- /config/training_args/train.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/config/training_args/train.yaml -------------------------------------------------------------------------------- /data/cnn_dataset/generate_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/cnn_dataset/generate_data.py -------------------------------------------------------------------------------- /data/grade-school-math/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/grade-school-math/LICENSE -------------------------------------------------------------------------------- /data/grade-school-math/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/grade-school-math/README.md -------------------------------------------------------------------------------- /data/grade-school-math/grade_school_math/calculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/grade-school-math/grade_school_math/calculator.py -------------------------------------------------------------------------------- /data/grade-school-math/grade_school_math/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/grade-school-math/grade_school_math/dataset.py -------------------------------------------------------------------------------- /data/grade-school-math/grade_school_math/img/example_problems.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/grade-school-math/grade_school_math/img/example_problems.png -------------------------------------------------------------------------------- /data/grade-school-math/grade_school_math/prepocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/grade-school-math/grade_school_math/prepocess_data.py -------------------------------------------------------------------------------- /data/grade-school-math/grade_school_math/sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/grade-school-math/grade_school_math/sample.py -------------------------------------------------------------------------------- /data/grade-school-math/grade_school_math/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/grade-school-math/grade_school_math/train.py -------------------------------------------------------------------------------- /data/grade-school-math/grade_school_math/view_model_solutions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/grade-school-math/grade_school_math/view_model_solutions.py -------------------------------------------------------------------------------- /data/grade-school-math/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/grade-school-math/setup.py -------------------------------------------------------------------------------- /data/mathematics_dataset-v1.0/arith_create_splits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/mathematics_dataset-v1.0/arith_create_splits.py -------------------------------------------------------------------------------- /data/mathematics_dataset-v1.0/create_data_splits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/mathematics_dataset-v1.0/create_data_splits.py -------------------------------------------------------------------------------- /data/multirc/data/preprocessed/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/multirc/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/multirc/preprocess_data.py -------------------------------------------------------------------------------- /data/rjokes-dataset/data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/rjokes-dataset/generate_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/data/rjokes-dataset/generate_dataset.py -------------------------------------------------------------------------------- /docs/assets/ICML-head.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/docs/assets/ICML-head.svg -------------------------------------------------------------------------------- /docs/assets/ICML-logo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/docs/assets/ICML-logo.svg -------------------------------------------------------------------------------- /docs/assets/loss_comparison_v4.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/docs/assets/loss_comparison_v4.svg -------------------------------------------------------------------------------- /docs/assets/ntl_v5.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/docs/assets/ntl_v5.svg -------------------------------------------------------------------------------- /docs/assets/pypi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/docs/assets/pypi.png -------------------------------------------------------------------------------- /docs/assets/streamlit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/docs/assets/streamlit.png -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/docs/index.html -------------------------------------------------------------------------------- /docs/styles.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/docs/styles.css -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/requirements.txt -------------------------------------------------------------------------------- /resources/avg_number_token_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/resources/avg_number_token_distribution.png -------------------------------------------------------------------------------- /resources/figure_sample_efficiency.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/resources/figure_sample_efficiency.pdf -------------------------------------------------------------------------------- /resources/last_digit_vs_distance_histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/resources/last_digit_vs_distance_histogram.png -------------------------------------------------------------------------------- /resources/multiplication_label.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/resources/multiplication_label.pdf -------------------------------------------------------------------------------- /resources/neurips_mathai_poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/resources/neurips_mathai_poster.pdf -------------------------------------------------------------------------------- /resources/ntl-image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/resources/ntl-image.jpg -------------------------------------------------------------------------------- /scripts/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/scripts/llama.py -------------------------------------------------------------------------------- /scripts/loss_integration.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/scripts/loss_integration.ipynb -------------------------------------------------------------------------------- /scripts/test_dist_loss.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/scripts/test_dist_loss.ipynb -------------------------------------------------------------------------------- /src/ntl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/args.py -------------------------------------------------------------------------------- /src/ntl/collators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/collators/question_answer_clm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/collators/question_answer_clm/vanilla_question_answer_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/collators/question_answer_clm/vanilla_question_answer_collator.py -------------------------------------------------------------------------------- /src/ntl/collators/question_answer_clm/xval_question_answer_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/collators/question_answer_clm/xval_question_answer_collator.py -------------------------------------------------------------------------------- /src/ntl/collators/question_answer_mlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/collators/question_answer_mlm/regression_head_question_answer_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/collators/question_answer_mlm/regression_head_question_answer_collator.py -------------------------------------------------------------------------------- /src/ntl/collators/question_answer_mlm/vanilla_mlm_question_answer_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/collators/question_answer_mlm/vanilla_mlm_question_answer_collator.py -------------------------------------------------------------------------------- /src/ntl/collators/question_answer_mlm/xval_mask_question_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/collators/question_answer_mlm/xval_mask_question_collator.py -------------------------------------------------------------------------------- /src/ntl/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/data/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/data/data.py -------------------------------------------------------------------------------- /src/ntl/data/regression_transformer_number_tokens.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/data/regression_transformer_number_tokens.txt -------------------------------------------------------------------------------- /src/ntl/encoding_decoding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/encoding_decoding/numerical_encodings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/encoding_decoding/numerical_encodings.py -------------------------------------------------------------------------------- /src/ntl/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/evaluation.py -------------------------------------------------------------------------------- /src/ntl/loss_functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/loss_functions/abs_diff_number_token_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/loss_functions/abs_diff_number_token_loss.py -------------------------------------------------------------------------------- /src/ntl/loss_functions/base_number_token_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/loss_functions/base_number_token_loss.py -------------------------------------------------------------------------------- /src/ntl/loss_functions/number_token_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/loss_functions/number_token_loss.py -------------------------------------------------------------------------------- /src/ntl/loss_functions/wasserstein_distance_number_token_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/loss_functions/wasserstein_distance_number_token_loss.py -------------------------------------------------------------------------------- /src/ntl/metrics/rouge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/metrics/rouge.py -------------------------------------------------------------------------------- /src/ntl/metrics/sacrebleu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/metrics/sacrebleu.py -------------------------------------------------------------------------------- /src/ntl/results_sorting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/results_sorting.py -------------------------------------------------------------------------------- /src/ntl/run_language_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/run_language_modeling.py -------------------------------------------------------------------------------- /src/ntl/tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/tokenizer/abstract_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/tokenizer/abstract_tokenizer.py -------------------------------------------------------------------------------- /src/ntl/tokenizer/auto_number_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/tokenizer/auto_number_tokenizer.py -------------------------------------------------------------------------------- /src/ntl/tokenizer/rt_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/tokenizer/rt_tokenizer.py -------------------------------------------------------------------------------- /src/ntl/tokenizer/t5custom_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/tokenizer/t5custom_tokenizer.py -------------------------------------------------------------------------------- /src/ntl/tokenizer/xval_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/tokenizer/xval_tokenizer.py -------------------------------------------------------------------------------- /src/ntl/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/trainer.py -------------------------------------------------------------------------------- /src/ntl/transformer_backbone/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/transformer_backbone/t5/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/transformer_backbone/t5/t5_rt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/transformer_backbone/t5/t5_rt.py -------------------------------------------------------------------------------- /src/ntl/transformer_backbone/t5/t5_vanilla_for_number_token_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/transformer_backbone/t5/t5_vanilla_for_number_token_loss.py -------------------------------------------------------------------------------- /src/ntl/transformer_backbone/t5/t5_xval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/transformer_backbone/t5/t5_xval.py -------------------------------------------------------------------------------- /src/ntl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/utils/helper_functionality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/utils/helper_functionality.py -------------------------------------------------------------------------------- /src/ntl/utils/label_smoother.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/utils/label_smoother.py -------------------------------------------------------------------------------- /src/ntl/utils/number_token_selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/utils/number_token_selector.py -------------------------------------------------------------------------------- /src/ntl/utils/numerical_operations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/utils/numerical_operations.py -------------------------------------------------------------------------------- /src/ntl/xval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ntl/xval/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/xval/eval.py -------------------------------------------------------------------------------- /src/ntl/xval/numformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/xval/numformer.py -------------------------------------------------------------------------------- /src/ntl/xval/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/xval/train.py -------------------------------------------------------------------------------- /src/ntl/xval/xval_mask_question_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/src/ntl/xval/xval_mask_question_collator.py -------------------------------------------------------------------------------- /tests/encoding_decoding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/encoding_decoding/test_numerical_encodings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/tests/encoding_decoding/test_numerical_encodings.py -------------------------------------------------------------------------------- /tests/loss_functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/loss_functions/test_abs_diff_number_token_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/tests/loss_functions/test_abs_diff_number_token_loss.py -------------------------------------------------------------------------------- /tests/loss_functions/test_number_token_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/tests/loss_functions/test_number_token_loss.py -------------------------------------------------------------------------------- /tests/loss_functions/test_wasserstein_number_token_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/tests/loss_functions/test_wasserstein_number_token_loss.py -------------------------------------------------------------------------------- /tests/test_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/tests/test_evaluation.py -------------------------------------------------------------------------------- /tests/test_number_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/tests/test_number_head.py -------------------------------------------------------------------------------- /tests/test_run_language_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/tests/test_run_language_modeling.py -------------------------------------------------------------------------------- /tests/tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tokenizer/test_rt_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/tests/tokenizer/test_rt_tokenizer.py -------------------------------------------------------------------------------- /tests/tokenizer/test_t5custom_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/tests/tokenizer/test_t5custom_tokenizer.py -------------------------------------------------------------------------------- /tests/tokenizer/test_xval_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-ai/number-token-loss/HEAD/tests/tokenizer/test_xval_tokenizer.py --------------------------------------------------------------------------------