├── .circleci ├── VERSIONS ├── assets │ └── .safety-policy.yml └── config.yml ├── .github ├── CODEOWNERS ├── CONTRIBUTING.rst ├── pull_request_template.md ├── release-drafter.yml └── workflows │ ├── release-drafter.yml │ └── stale.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md ├── advanced_usage.md ├── docs ├── Makefile ├── make.bat └── source │ ├── _templates │ ├── custom-class-template.rst │ └── custom-module-template.rst │ ├── conf.py │ ├── contributing_link.rst │ ├── generate_autosummary.rst │ ├── index.rst │ └── readme_link.rst ├── generative_data_prep ├── __init__.py ├── __main__.py ├── data_buffers │ ├── __init__.py │ ├── file_buffer.py │ └── hdf5_file_buffer.py ├── data_prep │ ├── __init__.py │ ├── data_prep.py │ └── pipeline.py ├── processors │ ├── __init__.py │ ├── article_tokenizer.py │ ├── metrics.py │ └── sequence_packer.py ├── tokenized_line │ ├── __init__.py │ ├── token.py │ └── tokenized_line.py └── utils │ ├── __init__.py │ ├── add_metadata_to_dataset.py │ ├── arg_configs.py │ ├── arg_parser.py │ ├── balance_hdf5_files.py │ ├── configs │ └── logger.conf │ ├── constants.py │ ├── convert_chat_template_to_prompt_completion.py │ ├── decode_hdf5.py │ ├── large_file_shuffle.py │ ├── logger.py │ ├── metadata_generation.py │ ├── path_verify.py │ ├── studio_integrations.py │ └── utils.py ├── img ├── SambaNova-dark-logo-1.png └── SambaNova-light-logo-1.png ├── pyproject.toml ├── requirements ├── all-requirements.txt ├── build-requirements.txt ├── dev-requirements.txt ├── docs-requirements.txt ├── requirements.txt └── tests-requirements.txt ├── setup.cfg └── tests ├── __init__.py ├── component └── test_running_command.py ├── conftest.py ├── examples ├── apply_chat_template │ ├── data_prepped_apply_chat_template.hdf5 │ ├── decoded_data_prepped_apply_chat_template.txt │ ├── example_apply_chat_template_data.jsonl │ └── pipelined_apply_chat_template │ │ ├── metadata.yaml │ │ ├── sha256 │ │ └── files_metadata.json │ │ ├── tokenizer │ │ ├── config.json │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ └── tokenizer_config.json │ │ ├── train_10_of_16.hdf5 │ │ ├── train_11_of_16.hdf5 │ │ ├── train_12_of_16.hdf5 │ │ ├── train_13_of_16.hdf5 │ │ ├── train_14_of_16.hdf5 │ │ ├── train_15_of_16.hdf5 │ │ ├── train_16_of_16.hdf5 │ │ ├── train_1_of_16.hdf5 │ │ ├── train_2_of_16.hdf5 │ │ ├── train_3_of_16.hdf5 │ │ ├── train_4_of_16.hdf5 │ │ ├── train_5_of_16.hdf5 │ │ ├── train_6_of_16.hdf5 │ │ ├── train_7_of_16.hdf5 │ │ ├── train_8_of_16.hdf5 │ │ └── train_9_of_16.hdf5 ├── category_ids │ ├── data_prepped_category_ids.hdf5 │ ├── example_category_ids_data.jsonl │ └── pipelined_category_ids │ │ ├── category_to_id.json │ │ ├── tokenizer │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ │ ├── train_10_of_32.hdf5 │ │ ├── train_11_of_32.hdf5 │ │ ├── train_12_of_32.hdf5 │ │ ├── train_13_of_32.hdf5 │ │ ├── train_14_of_32.hdf5 │ │ ├── train_15_of_32.hdf5 │ │ ├── train_16_of_32.hdf5 │ │ ├── train_17_of_32.hdf5 │ │ ├── train_18_of_32.hdf5 │ │ ├── train_19_of_32.hdf5 │ │ ├── train_1_of_32.hdf5 │ │ ├── train_20_of_32.hdf5 │ │ ├── train_21_of_32.hdf5 │ │ ├── train_22_of_32.hdf5 │ │ ├── train_23_of_32.hdf5 │ │ ├── train_24_of_32.hdf5 │ │ ├── train_25_of_32.hdf5 │ │ ├── train_26_of_32.hdf5 │ │ ├── train_27_of_32.hdf5 │ │ ├── train_28_of_32.hdf5 │ │ ├── train_29_of_32.hdf5 │ │ ├── train_2_of_32.hdf5 │ │ ├── train_30_of_32.hdf5 │ │ ├── train_31_of_32.hdf5 │ │ ├── train_32_of_32.hdf5 │ │ ├── train_3_of_32.hdf5 │ │ ├── train_4_of_32.hdf5 │ │ ├── train_5_of_32.hdf5 │ │ ├── train_6_of_32.hdf5 │ │ ├── train_7_of_32.hdf5 │ │ ├── train_8_of_32.hdf5 │ │ └── train_9_of_32.hdf5 ├── data_prep_from_main │ ├── example_data_prep_from_main_data.jsonl │ └── pipelined_data_prep_from_main │ │ ├── metadata.yaml │ │ ├── sha256 │ │ └── files_metadata.json │ │ ├── tokenizer │ │ ├── added_tokens.json │ │ ├── config.json │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ │ ├── train_1_of_2.hdf5 │ │ └── train_2_of_2.hdf5 ├── data_prep_test │ ├── data_prepped_data_prep_test.hdf5 │ └── example_data_prep_test_data.jsonl ├── dialogue │ ├── data_prepped_dialogue.hdf5 │ ├── decoded_data_prepped_dialogue.txt │ ├── example_dialogue_data.jsonl │ └── pipelined_dialogue │ │ ├── splits │ │ ├── train_10_of_32.jsonl │ │ ├── train_11_of_32.jsonl │ │ ├── train_12_of_32.jsonl │ │ ├── train_13_of_32.jsonl │ │ ├── train_14_of_32.jsonl │ │ ├── train_15_of_32.jsonl │ │ ├── train_16_of_32.jsonl │ │ ├── train_17_of_32.jsonl │ │ ├── train_18_of_32.jsonl │ │ ├── train_19_of_32.jsonl │ │ ├── train_1_of_32.jsonl │ │ ├── train_20_of_32.jsonl │ │ ├── train_21_of_32.jsonl │ │ ├── train_22_of_32.jsonl │ │ ├── train_23_of_32.jsonl │ │ ├── train_24_of_32.jsonl │ │ ├── train_25_of_32.jsonl │ │ ├── train_26_of_32.jsonl │ │ ├── train_27_of_32.jsonl │ │ ├── train_28_of_32.jsonl │ │ ├── train_29_of_32.jsonl │ │ ├── train_2_of_32.jsonl │ │ ├── train_30_of_32.jsonl │ │ ├── train_31_of_32.jsonl │ │ ├── train_32_of_32.jsonl │ │ ├── train_3_of_32.jsonl │ │ ├── train_4_of_32.jsonl │ │ ├── train_5_of_32.jsonl │ │ ├── train_6_of_32.jsonl │ │ ├── train_7_of_32.jsonl │ │ ├── train_8_of_32.jsonl │ │ └── train_9_of_32.jsonl │ │ ├── tokenizer │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ │ ├── train_10_of_32.hdf5 │ │ ├── train_11_of_32.hdf5 │ │ ├── train_12_of_32.hdf5 │ │ ├── train_13_of_32.hdf5 │ │ ├── train_14_of_32.hdf5 │ │ ├── train_15_of_32.hdf5 │ │ ├── train_16_of_32.hdf5 │ │ ├── train_17_of_32.hdf5 │ │ ├── train_18_of_32.hdf5 │ │ ├── train_19_of_32.hdf5 │ │ ├── train_1_of_32.hdf5 │ │ ├── train_20_of_32.hdf5 │ │ ├── train_21_of_32.hdf5 │ │ ├── train_22_of_32.hdf5 │ │ ├── train_23_of_32.hdf5 │ │ ├── train_24_of_32.hdf5 │ │ ├── train_25_of_32.hdf5 │ │ ├── train_26_of_32.hdf5 │ │ ├── train_27_of_32.hdf5 │ │ ├── train_28_of_32.hdf5 │ │ ├── train_29_of_32.hdf5 │ │ ├── train_2_of_32.hdf5 │ │ ├── train_30_of_32.hdf5 │ │ ├── train_31_of_32.hdf5 │ │ ├── train_32_of_32.hdf5 │ │ ├── train_3_of_32.hdf5 │ │ ├── train_4_of_32.hdf5 │ │ ├── train_5_of_32.hdf5 │ │ ├── train_6_of_32.hdf5 │ │ ├── train_7_of_32.hdf5 │ │ ├── train_8_of_32.hdf5 │ │ └── train_9_of_32.hdf5 ├── directory_input │ ├── example_directory_input_data │ │ ├── example_pretraining_data copy 2.jsonl │ │ ├── example_pretraining_data copy.jsonl │ │ └── example_pretraining_data.jsonl │ └── pipelined_directory_input │ │ ├── metadata.yaml │ │ ├── sha256 │ │ └── files_metadata.json │ │ ├── tokenizer │ │ ├── config.json │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ │ ├── train_10_of_32.hdf5 │ │ ├── train_11_of_32.hdf5 │ │ ├── train_12_of_32.hdf5 │ │ ├── train_13_of_32.hdf5 │ │ ├── train_14_of_32.hdf5 │ │ ├── train_15_of_32.hdf5 │ │ ├── train_16_of_32.hdf5 │ │ ├── train_17_of_32.hdf5 │ │ ├── train_18_of_32.hdf5 │ │ ├── train_19_of_32.hdf5 │ │ ├── train_1_of_32.hdf5 │ │ ├── train_20_of_32.hdf5 │ │ ├── train_21_of_32.hdf5 │ │ ├── train_22_of_32.hdf5 │ │ ├── train_23_of_32.hdf5 │ │ ├── train_24_of_32.hdf5 │ │ ├── train_25_of_32.hdf5 │ │ ├── train_26_of_32.hdf5 │ │ ├── train_27_of_32.hdf5 │ │ ├── train_28_of_32.hdf5 │ │ ├── train_29_of_32.hdf5 │ │ ├── train_2_of_32.hdf5 │ │ ├── train_30_of_32.hdf5 │ │ ├── train_31_of_32.hdf5 │ │ ├── train_32_of_32.hdf5 │ │ ├── train_3_of_32.hdf5 │ │ ├── train_4_of_32.hdf5 │ │ ├── train_5_of_32.hdf5 │ │ ├── train_6_of_32.hdf5 │ │ ├── train_7_of_32.hdf5 │ │ ├── train_8_of_32.hdf5 │ │ └── train_9_of_32.hdf5 ├── generative_tuning │ ├── data_prepped_generative_tuning.hdf5 │ ├── decoded_data_prepped_generative_tuning.txt │ ├── example_generative_tuning_data.jsonl │ └── pipelined_generative_tuning │ │ ├── splits │ │ ├── train_10_of_32.jsonl │ │ ├── train_11_of_32.jsonl │ │ ├── train_12_of_32.jsonl │ │ ├── train_13_of_32.jsonl │ │ ├── train_14_of_32.jsonl │ │ ├── train_15_of_32.jsonl │ │ ├── train_16_of_32.jsonl │ │ ├── train_17_of_32.jsonl │ │ ├── train_18_of_32.jsonl │ │ ├── train_19_of_32.jsonl │ │ ├── train_1_of_32.jsonl │ │ ├── train_20_of_32.jsonl │ │ ├── train_21_of_32.jsonl │ │ ├── train_22_of_32.jsonl │ │ ├── train_23_of_32.jsonl │ │ ├── train_24_of_32.jsonl │ │ ├── train_25_of_32.jsonl │ │ ├── train_26_of_32.jsonl │ │ ├── train_27_of_32.jsonl │ │ ├── train_28_of_32.jsonl │ │ ├── train_29_of_32.jsonl │ │ ├── train_2_of_32.jsonl │ │ ├── train_30_of_32.jsonl │ │ ├── train_31_of_32.jsonl │ │ ├── train_32_of_32.jsonl │ │ ├── train_3_of_32.jsonl │ │ ├── train_4_of_32.jsonl │ │ ├── train_5_of_32.jsonl │ │ ├── train_6_of_32.jsonl │ │ ├── train_7_of_32.jsonl │ │ ├── train_8_of_32.jsonl │ │ └── train_9_of_32.jsonl │ │ ├── tokenizer │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ │ ├── train_10_of_32.hdf5 │ │ ├── train_11_of_32.hdf5 │ │ ├── train_12_of_32.hdf5 │ │ ├── train_13_of_32.hdf5 │ │ ├── train_14_of_32.hdf5 │ │ ├── train_15_of_32.hdf5 │ │ ├── train_16_of_32.hdf5 │ │ ├── train_17_of_32.hdf5 │ │ ├── train_18_of_32.hdf5 │ │ ├── train_19_of_32.hdf5 │ │ ├── train_1_of_32.hdf5 │ │ ├── train_20_of_32.hdf5 │ │ ├── train_21_of_32.hdf5 │ │ ├── train_22_of_32.hdf5 │ │ ├── train_23_of_32.hdf5 │ │ ├── train_24_of_32.hdf5 │ │ ├── train_25_of_32.hdf5 │ │ ├── train_26_of_32.hdf5 │ │ ├── train_27_of_32.hdf5 │ │ ├── train_28_of_32.hdf5 │ │ ├── train_29_of_32.hdf5 │ │ ├── train_2_of_32.hdf5 │ │ ├── train_30_of_32.hdf5 │ │ ├── train_31_of_32.hdf5 │ │ ├── train_32_of_32.hdf5 │ │ ├── train_3_of_32.hdf5 │ │ ├── train_4_of_32.hdf5 │ │ ├── train_5_of_32.hdf5 │ │ ├── train_6_of_32.hdf5 │ │ ├── train_7_of_32.hdf5 │ │ ├── train_8_of_32.hdf5 │ │ └── train_9_of_32.hdf5 ├── json_load_error_test │ ├── example_json_load_error_test_data.jsonl │ └── pipelined_json_load_error_test │ │ ├── metadata.yaml │ │ ├── sha256 │ │ └── files_metadata.json │ │ ├── splits │ │ ├── train_10_of_32.jsonl │ │ ├── train_11_of_32.jsonl │ │ ├── train_12_of_32.jsonl │ │ ├── train_13_of_32.jsonl │ │ ├── train_14_of_32.jsonl │ │ ├── train_15_of_32.jsonl │ │ ├── train_16_of_32.jsonl │ │ ├── train_17_of_32.jsonl │ │ ├── train_18_of_32.jsonl │ │ ├── train_19_of_32.jsonl │ │ ├── train_1_of_32.jsonl │ │ ├── train_20_of_32.jsonl │ │ ├── train_21_of_32.jsonl │ │ ├── train_22_of_32.jsonl │ │ ├── train_23_of_32.jsonl │ │ ├── train_24_of_32.jsonl │ │ ├── train_25_of_32.jsonl │ │ ├── train_26_of_32.jsonl │ │ ├── train_27_of_32.jsonl │ │ ├── train_28_of_32.jsonl │ │ ├── train_29_of_32.jsonl │ │ ├── train_2_of_32.jsonl │ │ ├── train_30_of_32.jsonl │ │ ├── train_31_of_32.jsonl │ │ ├── train_32_of_32.jsonl │ │ ├── train_3_of_32.jsonl │ │ ├── train_4_of_32.jsonl │ │ ├── train_5_of_32.jsonl │ │ ├── train_6_of_32.jsonl │ │ ├── train_7_of_32.jsonl │ │ ├── train_8_of_32.jsonl │ │ └── train_9_of_32.jsonl │ │ ├── tokenizer │ │ ├── config.json │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ │ ├── train_10_of_32.hdf5 │ │ ├── train_11_of_32.hdf5 │ │ ├── train_12_of_32.hdf5 │ │ ├── train_13_of_32.hdf5 │ │ ├── train_14_of_32.hdf5 │ │ ├── train_15_of_32.hdf5 │ │ ├── train_16_of_32.hdf5 │ │ ├── train_17_of_32.hdf5 │ │ ├── train_18_of_32.hdf5 │ │ ├── train_19_of_32.hdf5 │ │ ├── train_1_of_32.hdf5 │ │ ├── train_20_of_32.hdf5 │ │ ├── train_21_of_32.hdf5 │ │ ├── train_22_of_32.hdf5 │ │ ├── train_23_of_32.hdf5 │ │ ├── train_24_of_32.hdf5 │ │ ├── train_25_of_32.hdf5 │ │ ├── train_26_of_32.hdf5 │ │ ├── train_27_of_32.hdf5 │ │ ├── train_28_of_32.hdf5 │ │ ├── train_29_of_32.hdf5 │ │ ├── train_2_of_32.hdf5 │ │ ├── train_30_of_32.hdf5 │ │ ├── train_31_of_32.hdf5 │ │ ├── train_32_of_32.hdf5 │ │ ├── train_3_of_32.hdf5 │ │ ├── train_4_of_32.hdf5 │ │ ├── train_5_of_32.hdf5 │ │ ├── train_6_of_32.hdf5 │ │ ├── train_7_of_32.hdf5 │ │ ├── train_8_of_32.hdf5 │ │ └── train_9_of_32.hdf5 ├── metaICL │ ├── data_prepped_metaICL.hdf5 │ ├── decoded_data_prepped_metaICL.txt │ ├── example_metaICL_data.jsonl │ └── pipelined_metaICL │ │ ├── splits │ │ ├── train_10_of_32.jsonl │ │ ├── train_11_of_32.jsonl │ │ ├── train_12_of_32.jsonl │ │ ├── train_13_of_32.jsonl │ │ ├── train_14_of_32.jsonl │ │ ├── train_15_of_32.jsonl │ │ ├── train_16_of_32.jsonl │ │ ├── train_17_of_32.jsonl │ │ ├── train_18_of_32.jsonl │ │ ├── train_19_of_32.jsonl │ │ ├── train_1_of_32.jsonl │ │ ├── train_20_of_32.jsonl │ │ ├── train_21_of_32.jsonl │ │ ├── train_22_of_32.jsonl │ │ ├── train_23_of_32.jsonl │ │ ├── train_24_of_32.jsonl │ │ ├── train_25_of_32.jsonl │ │ ├── train_26_of_32.jsonl │ │ ├── train_27_of_32.jsonl │ │ ├── train_28_of_32.jsonl │ │ ├── train_29_of_32.jsonl │ │ ├── train_2_of_32.jsonl │ │ ├── train_30_of_32.jsonl │ │ ├── train_31_of_32.jsonl │ │ ├── train_32_of_32.jsonl │ │ ├── train_3_of_32.jsonl │ │ ├── train_4_of_32.jsonl │ │ ├── train_5_of_32.jsonl │ │ ├── train_6_of_32.jsonl │ │ ├── train_7_of_32.jsonl │ │ ├── train_8_of_32.jsonl │ │ └── train_9_of_32.jsonl │ │ ├── train_10_of_32.hdf5 │ │ ├── train_11_of_32.hdf5 │ │ ├── train_12_of_32.hdf5 │ │ ├── train_13_of_32.hdf5 │ │ ├── train_14_of_32.hdf5 │ │ ├── train_15_of_32.hdf5 │ │ ├── train_16_of_32.hdf5 │ │ ├── train_17_of_32.hdf5 │ │ ├── train_18_of_32.hdf5 │ │ ├── train_19_of_32.hdf5 │ │ ├── train_1_of_32.hdf5 │ │ ├── train_20_of_32.hdf5 │ │ ├── train_21_of_32.hdf5 │ │ ├── train_22_of_32.hdf5 │ │ ├── train_23_of_32.hdf5 │ │ ├── train_24_of_32.hdf5 │ │ ├── train_25_of_32.hdf5 │ │ ├── train_26_of_32.hdf5 │ │ ├── train_27_of_32.hdf5 │ │ ├── train_28_of_32.hdf5 │ │ ├── train_29_of_32.hdf5 │ │ ├── train_2_of_32.hdf5 │ │ ├── train_30_of_32.hdf5 │ │ ├── train_31_of_32.hdf5 │ │ ├── train_32_of_32.hdf5 │ │ ├── train_3_of_32.hdf5 │ │ ├── train_4_of_32.hdf5 │ │ ├── train_5_of_32.hdf5 │ │ ├── train_6_of_32.hdf5 │ │ ├── train_7_of_32.hdf5 │ │ ├── train_8_of_32.hdf5 │ │ └── train_9_of_32.hdf5 ├── no_split_dir │ ├── data_prepped_no_split_dir.hdf5 │ ├── decoded_data_prepped_no_split_dir.txt │ ├── example_no_split_dir_data.jsonl │ └── pipelined_no_split_dir │ │ ├── tokenizer │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ │ ├── train_10_of_32.hdf5 │ │ ├── train_11_of_32.hdf5 │ │ ├── train_12_of_32.hdf5 │ │ ├── train_13_of_32.hdf5 │ │ ├── train_14_of_32.hdf5 │ │ ├── train_15_of_32.hdf5 │ │ ├── train_16_of_32.hdf5 │ │ ├── train_17_of_32.hdf5 │ │ ├── train_18_of_32.hdf5 │ │ ├── train_19_of_32.hdf5 │ │ ├── train_1_of_32.hdf5 │ │ ├── train_20_of_32.hdf5 │ │ ├── train_21_of_32.hdf5 │ │ ├── train_22_of_32.hdf5 │ │ ├── train_23_of_32.hdf5 │ │ ├── train_24_of_32.hdf5 │ │ ├── train_25_of_32.hdf5 │ │ ├── train_26_of_32.hdf5 │ │ ├── train_27_of_32.hdf5 │ │ ├── train_28_of_32.hdf5 │ │ ├── train_29_of_32.hdf5 │ │ ├── train_2_of_32.hdf5 │ │ ├── train_30_of_32.hdf5 │ │ ├── train_31_of_32.hdf5 │ │ ├── train_32_of_32.hdf5 │ │ ├── train_3_of_32.hdf5 │ │ ├── train_4_of_32.hdf5 │ │ ├── train_5_of_32.hdf5 │ │ ├── train_6_of_32.hdf5 │ │ ├── train_7_of_32.hdf5 │ │ ├── train_8_of_32.hdf5 │ │ └── train_9_of_32.hdf5 ├── pipeline_test │ ├── example_pipeline_test_data.jsonl │ └── pipelined_pipeline_test │ │ ├── dev_1_of_9.hdf5 │ │ ├── dev_2_of_9.hdf5 │ │ ├── dev_3_of_9.hdf5 │ │ ├── dev_4_of_9.hdf5 │ │ ├── dev_5_of_9.hdf5 │ │ ├── dev_6_of_9.hdf5 │ │ ├── dev_7_of_9.hdf5 │ │ ├── dev_8_of_9.hdf5 │ │ ├── dev_9_of_9.hdf5 │ │ ├── splits │ │ ├── dev_1_of_9.jsonl │ │ ├── dev_2_of_9.jsonl │ │ ├── dev_3_of_9.jsonl │ │ ├── dev_4_of_9.jsonl │ │ ├── dev_5_of_9.jsonl │ │ ├── dev_6_of_9.jsonl │ │ ├── dev_7_of_9.jsonl │ │ ├── dev_8_of_9.jsonl │ │ ├── dev_9_of_9.jsonl │ │ ├── train_10_of_32.jsonl │ │ ├── train_11_of_32.jsonl │ │ ├── train_12_of_32.jsonl │ │ ├── train_13_of_32.jsonl │ │ ├── train_14_of_32.jsonl │ │ ├── train_15_of_32.jsonl │ │ ├── train_16_of_32.jsonl │ │ ├── train_17_of_32.jsonl │ │ ├── train_18_of_32.jsonl │ │ ├── train_19_of_32.jsonl │ │ ├── train_1_of_32.jsonl │ │ ├── train_20_of_32.jsonl │ │ ├── train_21_of_32.jsonl │ │ ├── train_22_of_32.jsonl │ │ ├── train_23_of_32.jsonl │ │ ├── train_24_of_32.jsonl │ │ ├── train_25_of_32.jsonl │ │ ├── train_26_of_32.jsonl │ │ ├── train_27_of_32.jsonl │ │ ├── train_28_of_32.jsonl │ │ ├── train_29_of_32.jsonl │ │ ├── train_2_of_32.jsonl │ │ ├── train_30_of_32.jsonl │ │ ├── train_31_of_32.jsonl │ │ ├── train_32_of_32.jsonl │ │ ├── train_3_of_32.jsonl │ │ ├── train_4_of_32.jsonl │ │ ├── train_5_of_32.jsonl │ │ ├── train_6_of_32.jsonl │ │ ├── train_7_of_32.jsonl │ │ ├── train_8_of_32.jsonl │ │ └── train_9_of_32.jsonl │ │ ├── test_files │ │ ├── test_1_of_4.jsonl │ │ ├── test_2_of_4.jsonl │ │ ├── test_3_of_4.jsonl │ │ └── test_4_of_4.jsonl │ │ ├── tokenizer │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ │ ├── train_10_of_32.hdf5 │ │ ├── train_11_of_32.hdf5 │ │ ├── train_12_of_32.hdf5 │ │ ├── train_13_of_32.hdf5 │ │ ├── train_14_of_32.hdf5 │ │ ├── train_15_of_32.hdf5 │ │ ├── train_16_of_32.hdf5 │ │ ├── train_17_of_32.hdf5 │ │ ├── train_18_of_32.hdf5 │ │ ├── train_19_of_32.hdf5 │ │ ├── train_1_of_32.hdf5 │ │ ├── train_20_of_32.hdf5 │ │ ├── train_21_of_32.hdf5 │ │ ├── train_22_of_32.hdf5 │ │ ├── train_23_of_32.hdf5 │ │ ├── train_24_of_32.hdf5 │ │ ├── train_25_of_32.hdf5 │ │ ├── train_26_of_32.hdf5 │ │ ├── train_27_of_32.hdf5 │ │ ├── train_28_of_32.hdf5 │ │ ├── train_29_of_32.hdf5 │ │ ├── train_2_of_32.hdf5 │ │ ├── train_30_of_32.hdf5 │ │ ├── train_31_of_32.hdf5 │ │ ├── train_32_of_32.hdf5 │ │ ├── train_3_of_32.hdf5 │ │ ├── train_4_of_32.hdf5 │ │ ├── train_5_of_32.hdf5 │ │ ├── train_6_of_32.hdf5 │ │ ├── train_7_of_32.hdf5 │ │ ├── train_8_of_32.hdf5 │ │ └── train_9_of_32.hdf5 ├── pretraining │ ├── data_prepped_pretraining.hdf5 │ ├── decoded_data_prepped_pretraining.txt │ ├── example_pretraining_data.jsonl │ └── pipelined_pretraining │ │ ├── splits │ │ ├── train_10_of_32.jsonl │ │ ├── train_11_of_32.jsonl │ │ ├── train_12_of_32.jsonl │ │ ├── train_13_of_32.jsonl │ │ ├── train_14_of_32.jsonl │ │ ├── train_15_of_32.jsonl │ │ ├── train_16_of_32.jsonl │ │ ├── train_17_of_32.jsonl │ │ ├── train_18_of_32.jsonl │ │ ├── train_19_of_32.jsonl │ │ ├── train_1_of_32.jsonl │ │ ├── train_20_of_32.jsonl │ │ ├── train_21_of_32.jsonl │ │ ├── train_22_of_32.jsonl │ │ ├── train_23_of_32.jsonl │ │ ├── train_24_of_32.jsonl │ │ ├── train_25_of_32.jsonl │ │ ├── train_26_of_32.jsonl │ │ ├── train_27_of_32.jsonl │ │ ├── train_28_of_32.jsonl │ │ ├── train_29_of_32.jsonl │ │ ├── train_2_of_32.jsonl │ │ ├── train_30_of_32.jsonl │ │ ├── train_31_of_32.jsonl │ │ ├── train_32_of_32.jsonl │ │ ├── train_3_of_32.jsonl │ │ ├── train_4_of_32.jsonl │ │ ├── train_5_of_32.jsonl │ │ ├── train_6_of_32.jsonl │ │ ├── train_7_of_32.jsonl │ │ ├── train_8_of_32.jsonl │ │ └── train_9_of_32.jsonl │ │ ├── tokenizer │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ │ ├── train_10_of_32.hdf5 │ │ ├── train_11_of_32.hdf5 │ │ ├── train_12_of_32.hdf5 │ │ ├── train_13_of_32.hdf5 │ │ ├── train_14_of_32.hdf5 │ │ ├── train_15_of_32.hdf5 │ │ ├── train_16_of_32.hdf5 │ │ ├── train_17_of_32.hdf5 │ │ ├── train_18_of_32.hdf5 │ │ ├── train_19_of_32.hdf5 │ │ ├── train_1_of_32.hdf5 │ │ ├── train_20_of_32.hdf5 │ │ ├── train_21_of_32.hdf5 │ │ ├── train_22_of_32.hdf5 │ │ ├── train_23_of_32.hdf5 │ │ ├── train_24_of_32.hdf5 │ │ ├── train_25_of_32.hdf5 │ │ ├── train_26_of_32.hdf5 │ │ ├── train_27_of_32.hdf5 │ │ ├── train_28_of_32.hdf5 │ │ ├── train_29_of_32.hdf5 │ │ ├── train_2_of_32.hdf5 │ │ ├── train_30_of_32.hdf5 │ │ ├── train_31_of_32.hdf5 │ │ ├── train_32_of_32.hdf5 │ │ ├── train_3_of_32.hdf5 │ │ ├── train_4_of_32.hdf5 │ │ ├── train_5_of_32.hdf5 │ │ ├── train_6_of_32.hdf5 │ │ ├── train_7_of_32.hdf5 │ │ ├── train_8_of_32.hdf5 │ │ └── train_9_of_32.hdf5 ├── pretraining_split_with_new_metadata_and_sha256 │ ├── example_pretraining_data.jsonl │ ├── files_metadata.json │ └── metadata.yaml └── pretraining_txt │ ├── data_prepped_pretraining_txt.hdf5 │ ├── decoded_data_prepped_pretraining_txt.txt │ ├── example_pretraining_txt_data.txt │ └── pipelined_pretraining_txt │ ├── splits │ ├── train_10_of_32.txt │ ├── train_11_of_32.txt │ ├── train_12_of_32.txt │ ├── train_13_of_32.txt │ ├── train_14_of_32.txt │ ├── train_15_of_32.txt │ ├── train_16_of_32.txt │ ├── train_17_of_32.txt │ ├── train_18_of_32.txt │ ├── train_19_of_32.txt │ ├── train_1_of_32.txt │ ├── train_20_of_32.txt │ ├── train_21_of_32.txt │ ├── train_22_of_32.txt │ ├── train_23_of_32.txt │ ├── train_24_of_32.txt │ ├── train_25_of_32.txt │ ├── train_26_of_32.txt │ ├── train_27_of_32.txt │ ├── train_28_of_32.txt │ ├── train_29_of_32.txt │ ├── train_2_of_32.txt │ ├── train_30_of_32.txt │ ├── train_31_of_32.txt │ ├── train_32_of_32.txt │ ├── train_3_of_32.txt │ ├── train_4_of_32.txt │ ├── train_5_of_32.txt │ ├── train_6_of_32.txt │ ├── train_7_of_32.txt │ ├── train_8_of_32.txt │ └── train_9_of_32.txt │ ├── tokenizer │ ├── merges.txt │ ├── special_tokens_map.json │ ├── tokenizer.json │ ├── tokenizer_config.json │ └── vocab.json │ ├── train_10_of_32.hdf5 │ ├── train_11_of_32.hdf5 │ ├── train_12_of_32.hdf5 │ ├── train_13_of_32.hdf5 │ ├── train_14_of_32.hdf5 │ ├── train_15_of_32.hdf5 │ ├── train_16_of_32.hdf5 │ ├── train_17_of_32.hdf5 │ ├── train_18_of_32.hdf5 │ ├── train_19_of_32.hdf5 │ ├── train_1_of_32.hdf5 │ ├── train_20_of_32.hdf5 │ ├── train_21_of_32.hdf5 │ ├── train_22_of_32.hdf5 │ ├── train_23_of_32.hdf5 │ ├── train_24_of_32.hdf5 │ ├── train_25_of_32.hdf5 │ ├── train_26_of_32.hdf5 │ ├── train_27_of_32.hdf5 │ ├── train_28_of_32.hdf5 │ ├── train_29_of_32.hdf5 │ ├── train_2_of_32.hdf5 │ ├── train_30_of_32.hdf5 │ ├── train_31_of_32.hdf5 │ ├── train_32_of_32.hdf5 │ ├── train_3_of_32.hdf5 │ ├── train_4_of_32.hdf5 │ ├── train_5_of_32.hdf5 │ ├── train_6_of_32.hdf5 │ ├── train_7_of_32.hdf5 │ ├── train_8_of_32.hdf5 │ └── train_9_of_32.hdf5 ├── gpt2_vocab_and_merge_files ├── merges.txt └── vocab.json ├── test_add_sequence_metadata_to_dataset.py ├── test_arg_configs.py ├── test_article_tokenizer.py ├── test_e2e.py ├── test_hdf5_file_buffer.py ├── test_metadata.py ├── test_multiprocessing.py ├── test_sequence_packer.py ├── test_sha256.py ├── test_split.py ├── test_studio_integrations.py ├── test_token.py ├── test_tokenized_line.py ├── test_tokenizer.py ├── test_utils.py └── unit ├── configs └── test_logger.py └── utils └── test_utils.py /.circleci/VERSIONS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.circleci/VERSIONS -------------------------------------------------------------------------------- /.circleci/assets/.safety-policy.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.circleci/assets/.safety-policy.yml -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.circleci/config.yml -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/CONTRIBUTING.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.github/CONTRIBUTING.rst -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.github/pull_request_template.md -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.github/release-drafter.yml -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.github/workflows/release-drafter.yml -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.github/workflows/stale.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/LICENSE -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/Pipfile -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/Pipfile.lock -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/README.md -------------------------------------------------------------------------------- /advanced_usage.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/advanced_usage.md -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/source/_templates/custom-class-template.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/docs/source/_templates/custom-class-template.rst -------------------------------------------------------------------------------- /docs/source/_templates/custom-module-template.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/docs/source/_templates/custom-module-template.rst -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/contributing_link.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/docs/source/contributing_link.rst -------------------------------------------------------------------------------- /docs/source/generate_autosummary.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/docs/source/generate_autosummary.rst -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/readme_link.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../README.md 2 | -------------------------------------------------------------------------------- /generative_data_prep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/__init__.py -------------------------------------------------------------------------------- /generative_data_prep/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/__main__.py -------------------------------------------------------------------------------- /generative_data_prep/data_buffers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/data_buffers/__init__.py -------------------------------------------------------------------------------- /generative_data_prep/data_buffers/file_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/data_buffers/file_buffer.py -------------------------------------------------------------------------------- /generative_data_prep/data_buffers/hdf5_file_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/data_buffers/hdf5_file_buffer.py -------------------------------------------------------------------------------- /generative_data_prep/data_prep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/data_prep/__init__.py -------------------------------------------------------------------------------- /generative_data_prep/data_prep/data_prep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/data_prep/data_prep.py -------------------------------------------------------------------------------- /generative_data_prep/data_prep/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/data_prep/pipeline.py -------------------------------------------------------------------------------- /generative_data_prep/processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/processors/__init__.py -------------------------------------------------------------------------------- /generative_data_prep/processors/article_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/processors/article_tokenizer.py -------------------------------------------------------------------------------- /generative_data_prep/processors/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/processors/metrics.py -------------------------------------------------------------------------------- /generative_data_prep/processors/sequence_packer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/processors/sequence_packer.py -------------------------------------------------------------------------------- /generative_data_prep/tokenized_line/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/tokenized_line/__init__.py -------------------------------------------------------------------------------- /generative_data_prep/tokenized_line/token.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/tokenized_line/token.py -------------------------------------------------------------------------------- /generative_data_prep/tokenized_line/tokenized_line.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/tokenized_line/tokenized_line.py -------------------------------------------------------------------------------- /generative_data_prep/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/__init__.py -------------------------------------------------------------------------------- /generative_data_prep/utils/add_metadata_to_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/add_metadata_to_dataset.py -------------------------------------------------------------------------------- /generative_data_prep/utils/arg_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/arg_configs.py -------------------------------------------------------------------------------- /generative_data_prep/utils/arg_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/arg_parser.py -------------------------------------------------------------------------------- /generative_data_prep/utils/balance_hdf5_files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/balance_hdf5_files.py -------------------------------------------------------------------------------- /generative_data_prep/utils/configs/logger.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/configs/logger.conf -------------------------------------------------------------------------------- /generative_data_prep/utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/constants.py -------------------------------------------------------------------------------- /generative_data_prep/utils/convert_chat_template_to_prompt_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/convert_chat_template_to_prompt_completion.py -------------------------------------------------------------------------------- /generative_data_prep/utils/decode_hdf5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/decode_hdf5.py -------------------------------------------------------------------------------- /generative_data_prep/utils/large_file_shuffle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/large_file_shuffle.py -------------------------------------------------------------------------------- /generative_data_prep/utils/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/logger.py -------------------------------------------------------------------------------- /generative_data_prep/utils/metadata_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/metadata_generation.py -------------------------------------------------------------------------------- /generative_data_prep/utils/path_verify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/path_verify.py -------------------------------------------------------------------------------- /generative_data_prep/utils/studio_integrations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/studio_integrations.py -------------------------------------------------------------------------------- /generative_data_prep/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/generative_data_prep/utils/utils.py -------------------------------------------------------------------------------- /img/SambaNova-dark-logo-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/img/SambaNova-dark-logo-1.png -------------------------------------------------------------------------------- /img/SambaNova-light-logo-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/img/SambaNova-light-logo-1.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements/all-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/requirements/all-requirements.txt -------------------------------------------------------------------------------- /requirements/build-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/requirements/build-requirements.txt -------------------------------------------------------------------------------- /requirements/dev-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/requirements/dev-requirements.txt -------------------------------------------------------------------------------- /requirements/docs-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/requirements/docs-requirements.txt -------------------------------------------------------------------------------- /requirements/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/requirements/requirements.txt -------------------------------------------------------------------------------- /requirements/tests-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/requirements/tests-requirements.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/setup.cfg -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/__init__.py -------------------------------------------------------------------------------- /tests/component/test_running_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/component/test_running_command.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/data_prepped_apply_chat_template.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/data_prepped_apply_chat_template.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/decoded_data_prepped_apply_chat_template.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/decoded_data_prepped_apply_chat_template.txt -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/example_apply_chat_template_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/example_apply_chat_template_data.jsonl -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/metadata.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/metadata.yaml -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/tokenizer/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/tokenizer/config.json -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_10_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_10_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_11_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_11_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_12_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_12_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_13_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_13_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_14_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_14_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_15_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_15_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_16_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_16_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_1_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_1_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_2_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_2_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_3_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_3_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_4_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_4_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_5_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_5_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_6_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_6_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_7_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_7_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_8_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_8_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/apply_chat_template/pipelined_apply_chat_template/train_9_of_16.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/apply_chat_template/pipelined_apply_chat_template/train_9_of_16.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/data_prepped_category_ids.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/data_prepped_category_ids.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/example_category_ids_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/example_category_ids_data.jsonl -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/category_to_id.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/category_to_id.json -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/tokenizer/merges.txt -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/tokenizer/special_tokens_map.json -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/tokenizer/vocab.json -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_10_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_10_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_11_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_11_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_12_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_12_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_13_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_13_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_14_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_14_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_15_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_15_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_16_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_16_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_17_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_17_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_18_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_18_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_19_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_19_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_1_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_1_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_20_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_20_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_21_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_21_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_22_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_22_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_23_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_23_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_24_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_24_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_25_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_25_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_26_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_26_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_27_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_27_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_28_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_28_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_29_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_29_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_2_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_2_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_30_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_30_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_31_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_31_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_32_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_32_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_3_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_3_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_4_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_4_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_5_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_5_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_6_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_6_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_7_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_7_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_8_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_8_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/category_ids/pipelined_category_ids/train_9_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/category_ids/pipelined_category_ids/train_9_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/data_prep_from_main/example_data_prep_from_main_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/data_prep_from_main/example_data_prep_from_main_data.jsonl -------------------------------------------------------------------------------- /tests/examples/data_prep_from_main/pipelined_data_prep_from_main/metadata.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/data_prep_from_main/pipelined_data_prep_from_main/metadata.yaml -------------------------------------------------------------------------------- /tests/examples/data_prep_from_main/pipelined_data_prep_from_main/tokenizer/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/data_prep_from_main/pipelined_data_prep_from_main/tokenizer/config.json -------------------------------------------------------------------------------- /tests/examples/data_prep_from_main/pipelined_data_prep_from_main/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/data_prep_from_main/pipelined_data_prep_from_main/tokenizer/merges.txt -------------------------------------------------------------------------------- /tests/examples/data_prep_from_main/pipelined_data_prep_from_main/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/data_prep_from_main/pipelined_data_prep_from_main/tokenizer/vocab.json -------------------------------------------------------------------------------- /tests/examples/data_prep_from_main/pipelined_data_prep_from_main/train_1_of_2.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/data_prep_from_main/pipelined_data_prep_from_main/train_1_of_2.hdf5 -------------------------------------------------------------------------------- /tests/examples/data_prep_from_main/pipelined_data_prep_from_main/train_2_of_2.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/data_prep_from_main/pipelined_data_prep_from_main/train_2_of_2.hdf5 -------------------------------------------------------------------------------- /tests/examples/data_prep_test/data_prepped_data_prep_test.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/data_prep_test/data_prepped_data_prep_test.hdf5 -------------------------------------------------------------------------------- /tests/examples/data_prep_test/example_data_prep_test_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/data_prep_test/example_data_prep_test_data.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/data_prepped_dialogue.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/data_prepped_dialogue.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/decoded_data_prepped_dialogue.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/decoded_data_prepped_dialogue.txt -------------------------------------------------------------------------------- /tests/examples/dialogue/example_dialogue_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/example_dialogue_data.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_10_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_10_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_11_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_11_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_12_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_12_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_13_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_13_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_14_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_14_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_15_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_15_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_16_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_16_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_17_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_17_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_18_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_18_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_19_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_19_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_1_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_1_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_20_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_20_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_21_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_21_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_22_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_22_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_23_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_23_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_24_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_24_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_25_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_25_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_26_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_26_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_27_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_27_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_28_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_28_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_29_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_29_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_2_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_2_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_30_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_30_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_31_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_31_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_32_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_32_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_3_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_3_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_4_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_4_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_5_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_5_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_6_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_6_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_7_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_7_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_8_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_8_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/splits/train_9_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/splits/train_9_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/tokenizer/merges.txt -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/tokenizer/special_tokens_map.json -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/tokenizer/vocab.json -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_10_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_10_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_11_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_11_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_12_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_12_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_13_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_13_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_14_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_14_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_15_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_15_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_16_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_16_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_17_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_17_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_18_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_18_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_19_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_19_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_1_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_1_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_20_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_20_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_21_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_21_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_22_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_22_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_23_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_23_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_24_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_24_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_25_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_25_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_26_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_26_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_27_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_27_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_28_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_28_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_29_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_29_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_2_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_2_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_30_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_30_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_31_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_31_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_32_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_32_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_3_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_3_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_4_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_4_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_5_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_5_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_6_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_6_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_7_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_7_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_8_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_8_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/dialogue/pipelined_dialogue/train_9_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/dialogue/pipelined_dialogue/train_9_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/metadata.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/metadata.yaml -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/sha256/files_metadata.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/sha256/files_metadata.json -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/tokenizer/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/tokenizer/config.json -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/tokenizer/merges.txt -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/tokenizer/vocab.json -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_10_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_10_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_11_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_11_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_12_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_12_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_13_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_13_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_14_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_14_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_15_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_15_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_16_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_16_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_17_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_17_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_18_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_18_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_19_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_19_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_1_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_1_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_20_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_20_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_21_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_21_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_22_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_22_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_23_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_23_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_24_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_24_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_25_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_25_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_26_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_26_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_27_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_27_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_28_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_28_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_29_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_29_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_2_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_2_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_30_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_30_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_31_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_31_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_32_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_32_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_3_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_3_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_4_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_4_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_5_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_5_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_6_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_6_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_7_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_7_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_8_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_8_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/directory_input/pipelined_directory_input/train_9_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/directory_input/pipelined_directory_input/train_9_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/data_prepped_generative_tuning.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/data_prepped_generative_tuning.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/decoded_data_prepped_generative_tuning.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/decoded_data_prepped_generative_tuning.txt -------------------------------------------------------------------------------- /tests/examples/generative_tuning/example_generative_tuning_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/example_generative_tuning_data.jsonl -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/tokenizer/merges.txt -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/tokenizer/vocab.json -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_10_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_10_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_11_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_11_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_12_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_12_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_13_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_13_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_14_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_14_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_15_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_15_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_16_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_16_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_17_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_17_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_18_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_18_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_19_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_19_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_1_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_1_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_20_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_20_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_21_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_21_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_22_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_22_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_23_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_23_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_24_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_24_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_25_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_25_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_26_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_26_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_27_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_27_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_28_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_28_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_29_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_29_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_2_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_2_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_30_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_30_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_31_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_31_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_32_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_32_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_3_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_3_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_4_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_4_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_5_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_5_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_6_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_6_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_7_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_7_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_8_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_8_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/generative_tuning/pipelined_generative_tuning/train_9_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/generative_tuning/pipelined_generative_tuning/train_9_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/example_json_load_error_test_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/example_json_load_error_test_data.jsonl -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/metadata.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/metadata.yaml -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_10_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_10_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_11_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_11_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_12_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_12_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_13_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_13_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_14_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_14_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_15_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_15_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_16_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_16_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_17_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_17_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_18_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_18_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_19_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_19_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_1_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_1_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_20_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_20_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_21_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_21_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_22_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_22_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_23_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_23_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_24_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_24_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_25_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_25_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_26_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_26_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_27_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_27_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_28_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_28_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/json_load_error_test/pipelined_json_load_error_test/train_29_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/json_load_error_test/pipelined_json_load_error_test/train_29_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/data_prepped_metaICL.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/data_prepped_metaICL.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/decoded_data_prepped_metaICL.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/decoded_data_prepped_metaICL.txt -------------------------------------------------------------------------------- /tests/examples/metaICL/example_metaICL_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/example_metaICL_data.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_10_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_10_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_11_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_11_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_12_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_12_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_13_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_13_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_14_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_14_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_15_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_15_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_16_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_16_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_17_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_17_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_18_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_18_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_19_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_19_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_1_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_1_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_20_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_20_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_21_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_21_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_22_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_22_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_23_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_23_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_24_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_24_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_25_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_25_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_26_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_26_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_27_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_27_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_28_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_28_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_29_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_29_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_2_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_2_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_30_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_30_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_31_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_31_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_32_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_32_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_3_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_3_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_4_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_4_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_5_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_5_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_6_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_6_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_7_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_7_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_8_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_8_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/splits/train_9_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/splits/train_9_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_10_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_10_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_11_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_11_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_12_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_12_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_13_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_13_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_14_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_14_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_15_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_15_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_16_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_16_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_17_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_17_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_18_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_18_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_19_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_19_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_1_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_1_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_20_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_20_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_21_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_21_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_22_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_22_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_23_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_23_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_24_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_24_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_25_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_25_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_26_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_26_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_27_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_27_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_28_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_28_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_29_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_29_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_2_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_2_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_30_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_30_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_31_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_31_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_32_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_32_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_3_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_3_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_4_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_4_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_5_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_5_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_6_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_6_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_7_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_7_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_8_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_8_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/metaICL/pipelined_metaICL/train_9_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/metaICL/pipelined_metaICL/train_9_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/data_prepped_no_split_dir.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/data_prepped_no_split_dir.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/decoded_data_prepped_no_split_dir.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/decoded_data_prepped_no_split_dir.txt -------------------------------------------------------------------------------- /tests/examples/no_split_dir/example_no_split_dir_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/example_no_split_dir_data.jsonl -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/tokenizer/merges.txt -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/tokenizer/vocab.json -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_10_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_10_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_11_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_11_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_12_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_12_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_13_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_13_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_14_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_14_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_15_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_15_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_16_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_16_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_17_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_17_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_18_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_18_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_19_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_19_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_1_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_1_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_20_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_20_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_21_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_21_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_22_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_22_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_23_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_23_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_24_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_24_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_25_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_25_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_26_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_26_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_27_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_27_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_28_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_28_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_29_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_29_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_2_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_2_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_30_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_30_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_31_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_31_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_32_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_32_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_3_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_3_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_4_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_4_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_5_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_5_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_6_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_6_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_7_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_7_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_8_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_8_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/no_split_dir/pipelined_no_split_dir/train_9_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/no_split_dir/pipelined_no_split_dir/train_9_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/example_pipeline_test_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/example_pipeline_test_data.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/dev_1_of_9.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/dev_1_of_9.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/dev_2_of_9.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/dev_2_of_9.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/dev_3_of_9.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/dev_3_of_9.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/dev_4_of_9.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/dev_4_of_9.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/dev_5_of_9.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/dev_5_of_9.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/dev_6_of_9.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/dev_6_of_9.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/dev_7_of_9.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/dev_7_of_9.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/dev_8_of_9.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/dev_8_of_9.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/dev_9_of_9.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/dev_9_of_9.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_1_of_9.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_1_of_9.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_2_of_9.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_2_of_9.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_3_of_9.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_3_of_9.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_4_of_9.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_4_of_9.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_5_of_9.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_5_of_9.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_6_of_9.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_6_of_9.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_7_of_9.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_7_of_9.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_8_of_9.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_8_of_9.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_9_of_9.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/dev_9_of_9.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_10_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_10_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_11_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_11_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_12_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_12_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_13_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_13_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_14_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_14_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_15_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_15_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_16_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_16_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_17_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_17_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_18_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_18_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_19_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_19_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_1_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_1_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_20_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_20_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_21_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_21_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_22_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_22_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_23_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_23_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_24_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_24_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_25_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_25_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_26_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_26_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_27_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_27_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_28_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_28_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_29_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_29_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_2_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_2_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_30_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_30_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_31_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_31_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_32_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_32_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_3_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_3_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_4_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_4_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_5_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_5_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_6_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_6_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_7_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_7_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_8_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_8_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_9_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/splits/train_9_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/test_files/test_1_of_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/test_files/test_1_of_4.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/test_files/test_2_of_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/test_files/test_2_of_4.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/test_files/test_3_of_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/test_files/test_3_of_4.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/test_files/test_4_of_4.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/test_files/test_4_of_4.jsonl -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/tokenizer/merges.txt -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/tokenizer/vocab.json -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_10_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_10_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_11_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_11_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_12_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_12_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_13_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_13_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_14_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_14_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_15_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_15_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_16_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_16_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_17_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_17_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_18_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_18_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_19_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_19_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_1_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_1_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_20_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_20_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_21_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_21_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_22_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_22_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_23_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_23_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_24_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_24_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_25_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_25_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_26_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_26_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_27_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_27_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_28_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_28_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_29_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_29_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_2_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_2_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_30_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_30_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_31_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_31_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_32_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_32_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_3_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_3_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_4_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_4_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_5_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_5_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_6_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_6_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_7_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_7_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_8_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_8_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pipeline_test/pipelined_pipeline_test/train_9_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pipeline_test/pipelined_pipeline_test/train_9_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/data_prepped_pretraining.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/data_prepped_pretraining.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/decoded_data_prepped_pretraining.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/decoded_data_prepped_pretraining.txt -------------------------------------------------------------------------------- /tests/examples/pretraining/example_pretraining_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/example_pretraining_data.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_10_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_10_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_11_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_11_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_12_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_12_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_13_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_13_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_14_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_14_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_15_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_15_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_16_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_16_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_17_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_17_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_18_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_18_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_19_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_19_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_1_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_1_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_20_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_20_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_21_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_21_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_22_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_22_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_23_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_23_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_24_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_24_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_25_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_25_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_26_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_26_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_27_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_27_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_28_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_28_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_29_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_29_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_2_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_2_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_30_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_30_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_31_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_31_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_32_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_32_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_3_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_3_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_4_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_4_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_5_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_5_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_6_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_6_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_7_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_7_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_8_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_8_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/splits/train_9_of_32.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/splits/train_9_of_32.jsonl -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/tokenizer/merges.txt -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/tokenizer/special_tokens_map.json -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/tokenizer/vocab.json -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_10_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_10_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_11_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_11_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_12_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_12_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_13_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_13_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_14_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_14_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_15_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_15_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_16_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_16_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_17_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_17_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_18_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_18_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_19_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_19_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_1_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_1_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_20_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_20_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_21_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_21_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_22_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_22_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_23_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_23_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_24_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_24_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_25_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_25_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_26_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_26_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_27_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_27_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_28_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_28_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_29_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_29_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_2_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_2_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_30_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_30_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_31_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_31_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_32_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_32_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_3_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_3_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_4_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_4_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_5_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_5_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_6_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_6_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_7_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_7_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_8_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_8_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining/pipelined_pretraining/train_9_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining/pipelined_pretraining/train_9_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_split_with_new_metadata_and_sha256/files_metadata.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_split_with_new_metadata_and_sha256/files_metadata.json -------------------------------------------------------------------------------- /tests/examples/pretraining_split_with_new_metadata_and_sha256/metadata.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_split_with_new_metadata_and_sha256/metadata.yaml -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/data_prepped_pretraining_txt.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/data_prepped_pretraining_txt.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/decoded_data_prepped_pretraining_txt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/decoded_data_prepped_pretraining_txt.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/example_pretraining_txt_data.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/example_pretraining_txt_data.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_10_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_10_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_11_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_11_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_12_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_12_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_13_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_13_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_14_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_14_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_15_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_15_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_16_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_16_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_17_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_17_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_18_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_18_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_19_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_19_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_1_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_1_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_20_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_20_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_21_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_21_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_22_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_22_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_23_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_23_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_24_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_24_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_25_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_25_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_26_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_26_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_27_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_27_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_28_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_28_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_29_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_29_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_2_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_2_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_30_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_30_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_31_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_31_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_32_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_32_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_3_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_3_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_4_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_4_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_5_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_5_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_6_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_6_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_7_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_7_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_8_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_8_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_9_of_32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/splits/train_9_of_32.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/tokenizer/merges.txt -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/tokenizer/vocab.json -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_10_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_10_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_11_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_11_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_12_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_12_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_13_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_13_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_14_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_14_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_15_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_15_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_16_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_16_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_17_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_17_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_18_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_18_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_19_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_19_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_1_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_1_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_20_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_20_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_21_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_21_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_22_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_22_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_23_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_23_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_24_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_24_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_25_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_25_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_26_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_26_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_27_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_27_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_28_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_28_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_29_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_29_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_2_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_2_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_30_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_30_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_31_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_31_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_32_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_32_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_3_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_3_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_4_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_4_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_5_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_5_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_6_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_6_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_7_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_7_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_8_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_8_of_32.hdf5 -------------------------------------------------------------------------------- /tests/examples/pretraining_txt/pipelined_pretraining_txt/train_9_of_32.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/examples/pretraining_txt/pipelined_pretraining_txt/train_9_of_32.hdf5 -------------------------------------------------------------------------------- /tests/gpt2_vocab_and_merge_files/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/gpt2_vocab_and_merge_files/merges.txt -------------------------------------------------------------------------------- /tests/gpt2_vocab_and_merge_files/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/gpt2_vocab_and_merge_files/vocab.json -------------------------------------------------------------------------------- /tests/test_add_sequence_metadata_to_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_add_sequence_metadata_to_dataset.py -------------------------------------------------------------------------------- /tests/test_arg_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_arg_configs.py -------------------------------------------------------------------------------- /tests/test_article_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_article_tokenizer.py -------------------------------------------------------------------------------- /tests/test_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_e2e.py -------------------------------------------------------------------------------- /tests/test_hdf5_file_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_hdf5_file_buffer.py -------------------------------------------------------------------------------- /tests/test_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_metadata.py -------------------------------------------------------------------------------- /tests/test_multiprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_multiprocessing.py -------------------------------------------------------------------------------- /tests/test_sequence_packer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_sequence_packer.py -------------------------------------------------------------------------------- /tests/test_sha256.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_sha256.py -------------------------------------------------------------------------------- /tests/test_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_split.py -------------------------------------------------------------------------------- /tests/test_studio_integrations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_studio_integrations.py -------------------------------------------------------------------------------- /tests/test_token.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_token.py -------------------------------------------------------------------------------- /tests/test_tokenized_line.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_tokenized_line.py -------------------------------------------------------------------------------- /tests/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_tokenizer.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/test_utils.py -------------------------------------------------------------------------------- /tests/unit/configs/test_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/unit/configs/test_logger.py -------------------------------------------------------------------------------- /tests/unit/utils/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sambanova/generative_data_prep/HEAD/tests/unit/utils/test_utils.py --------------------------------------------------------------------------------