├── .github
    ├── CODE_OF_CONDUCT.md
    └── workflows
    │   ├── docs.yml
    │   └── tests.yml
├── .gitignore
├── LICENSE
├── README.md
├── conf
    ├── archived_experiments
    │   ├── archived
    │   │   ├── finetune100k
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── finetune100k_2
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── finetune100kpermute
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── finetune300k
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── finetune300kpermute
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── finetune30k
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── finetune30k_agai_old
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── finetune30kpermute
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── finetune_100k_atoms
    │   │   │   ├── atoms.yaml
    │   │   │   └── atoms_params.yaml
    │   │   ├── finetune_300k_atoms
    │   │   │   ├── atoms.yaml
    │   │   │   └── atoms_params.yaml
    │   │   ├── finetune_30k_again
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── finetune_30k_atoms
    │   │   │   ├── atoms.yaml
    │   │   │   └── atoms_params.yaml
    │   │   ├── finetune_test
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── finetune_zmatrix
    │   │   │   ├── zmatrix_100k.yaml
    │   │   │   ├── zmatrix_300k.yaml
    │   │   │   └── zmatrix_30k.yaml
    │   │   ├── pretrain100k_atoms
    │   │   │   ├── atoms.yaml
    │   │   │   └── atoms_params.yaml
    │   │   ├── pretrain2m
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── pretrain300k_atoms
    │   │   │   ├── atoms.yaml
    │   │   │   └── atoms_params.yaml
    │   │   ├── pretrain30k
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   ├── pretrain30k_atoms
    │   │   │   ├── atoms.yaml
    │   │   │   └── atoms_params.yaml
    │   │   ├── pretrainzmatrix
    │   │   │   ├── zmatrix_100k.yaml
    │   │   │   ├── zmatrix_300k.yaml
    │   │   │   └── zmatrix_30k.yaml
    │   │   ├── testing_100k
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   │   └── testing_30k
    │   │   │   ├── cifp1.yaml
    │   │   │   ├── cifsymmetrized.yaml
    │   │   │   ├── composition.yaml
    │   │   │   ├── crystal_llm.yaml
    │   │   │   └── slice.yaml
    │   ├── config-hydra.yaml
    │   ├── config-potential.yaml
    │   ├── config-sft.yaml
    │   ├── config-smiles.yaml
    │   ├── config.yaml
    │   ├── finetune2m
    │   │   ├── atoms_params.yaml
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── finetune30k_rt
    │   │   ├── atoms_params.yaml
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── finetune30k_rt_2
    │   │   ├── atoms.yaml
    │   │   ├── atoms_params.yaml
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── finetune_30k_spl_token
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   └── slice.yaml
    │   ├── llama_ft
    │   │   ├── cifp1.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── llama_sft
    │   │   ├── cifp1.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── llama_sft_10
    │   │   ├── cifp1.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   └── slice.yaml
    │   ├── llama_sft_2
    │   │   ├── cifp1.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── llama_sft_3
    │   │   ├── cifp1.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── llama_sft_4
    │   │   ├── cifp1.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── llama_sft_5
    │   │   ├── cifp1.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── potential
    │   │   ├── cryst_0.yaml
    │   │   ├── cryst_0_2.yaml
    │   │   ├── cryst_0_4.yaml
    │   │   ├── cryst_0_5.yaml
    │   │   ├── cryst_0_6.yaml
    │   │   ├── cryst_0_8.yaml
    │   │   └── cryst_1.yaml
    │   ├── potential_2
    │   │   ├── cryst_0.yaml
    │   │   ├── cryst_0_2.yaml
    │   │   ├── cryst_0_4.yaml
    │   │   ├── cryst_0_5.yaml
    │   │   ├── cryst_0_6.yaml
    │   │   ├── cryst_0_8.yaml
    │   │   └── cryst_1.yaml
    │   ├── potential_3
    │   │   ├── cryst_0.yaml
    │   │   ├── cryst_0_2.yaml
    │   │   ├── cryst_0_4.yaml
    │   │   ├── cryst_0_5.yaml
    │   │   ├── cryst_0_6.yaml
    │   │   ├── cryst_0_8.yaml
    │   │   └── cryst_1.yaml
    │   ├── potential_4
    │   │   ├── cryst_0.yaml
    │   │   ├── cryst_0_2.yaml
    │   │   ├── cryst_0_4.yaml
    │   │   ├── cryst_0_5.yaml
    │   │   ├── cryst_0_6.yaml
    │   │   ├── cryst_0_8.yaml
    │   │   └── cryst_1.yaml
    │   ├── potential_5
    │   │   ├── cryst_0.yaml
    │   │   ├── cryst_0_2.yaml
    │   │   ├── cryst_0_4.yaml
    │   │   ├── cryst_0_5.yaml
    │   │   ├── cryst_0_6.yaml
    │   │   ├── cryst_0_8.yaml
    │   │   └── cryst_1.yaml
    │   ├── potential_6
    │   │   ├── cryst_0.yaml
    │   │   ├── cryst_0_2.yaml
    │   │   ├── cryst_0_4.yaml
    │   │   ├── cryst_0_5.yaml
    │   │   ├── cryst_0_6.yaml
    │   │   ├── cryst_0_8.yaml
    │   │   └── cryst_1.yaml
    │   ├── potential_7
    │   │   ├── cryst_0.yaml
    │   │   ├── cryst_0_2.yaml
    │   │   ├── cryst_0_4.yaml
    │   │   ├── cryst_0_5.yaml
    │   │   ├── cryst_0_6.yaml
    │   │   ├── cryst_0_8.yaml
    │   │   └── cryst_1.yaml
    │   ├── pretrain-rt
    │   │   ├── atoms.yaml
    │   │   ├── atoms_params.yaml
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── pretrain-test
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   └── slice.yaml
    │   ├── pretrain100k_spl_token
    │   │   ├── atoms_params.yaml
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── pretrain30k_spl_token
    │   │   ├── atoms_params.yaml
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── pretrain_2m
    │   │   ├── atoms.yaml
    │   │   ├── atoms_params.yaml
    │   │   └── zmatrix.yaml
    │   ├── pretrain_rt_100
    │   │   ├── atoms.yaml
    │   │   ├── atoms_params.yaml
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── pretrain_rt_30
    │   │   ├── atoms.yaml
    │   │   ├── atoms_params.yaml
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── pretrain_rt_300
    │   │   ├── atoms.yaml
    │   │   ├── atoms_params.yaml
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── qmof_ft
    │   │   ├── atoms_params.yaml
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   ├── slice.yaml
    │   │   └── zmatrix.yaml
    │   ├── santiago
    │   │   └── cifp1.yaml
    │   ├── santiago_100k
    │   │   └── cifp1.yaml
    │   ├── santiago_2m
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   ├── composition.yaml
    │   │   ├── crystal_llm.yaml
    │   │   └── slice.yaml
    │   ├── smiles
    │   │   ├── slice_100.yaml
    │   │   ├── slice_2m.yaml
    │   │   ├── slice_30.yaml
    │   │   └── slice_300.yaml
    │   ├── testing_perturb
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   └── crystal_llm.yaml
    │   ├── testing_perturb_100
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   └── crystal_llm.yaml
    │   ├── testing_perturb_300
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   └── crystal_llm.yaml
    │   ├── testing_translate
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   └── crystal_llm.yaml
    │   ├── testing_translate_100
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   └── crystal_llm.yaml
    │   └── testing_translate_300
    │   │   ├── cifp1.yaml
    │   │   ├── cifsymmetrized.yaml
    │   │   └── crystal_llm.yaml
    ├── bandgap.yaml
    ├── benchmark.yaml
    ├── bg
    │   ├── atoms.yaml
    │   ├── atoms_params.yaml
    │   ├── cifp1.yaml
    │   ├── cifpsym.yaml
    │   ├── composition.yaml
    │   ├── crystal_llm.yaml
    │   ├── local_env.yaml
    │   ├── slices.yaml
    │   └── zmatrix.yaml
    ├── bg2m
    │   ├── atoms.yaml
    │   ├── atoms_params.yaml
    │   ├── cifp1.yaml
    │   ├── cifsymmetrized.yaml
    │   ├── composition.yaml
    │   ├── crystal_llm.yaml
    │   ├── local_env.yaml
    │   ├── slice.yaml
    │   └── zmatrix.yaml
    ├── classification.yaml
    ├── form
    │   ├── atoms.yaml
    │   ├── atoms_params.yaml
    │   ├── cifp1.yaml
    │   ├── cifpsym.yaml
    │   ├── composition.yaml
    │   ├── crystal_llm.yaml
    │   ├── local_env.yaml
    │   ├── slices.yaml
    │   └── zmatrix.yaml
    ├── form_energy.yaml
    ├── group-test
    │   ├── composition.yaml
    │   └── slices.yaml
    ├── is_metal
    │   ├── atoms.yaml
    │   ├── atoms_params.yaml
    │   ├── cifp1.yaml
    │   ├── cifpsym.yaml
    │   ├── composition.yaml
    │   ├── crystal_llm.yaml
    │   ├── local_env.yaml
    │   ├── slices.yaml
    │   └── zmatrix.yaml
    ├── llama_8b_bg
    │   ├── atoms.yaml
    │   ├── atoms_params.yaml
    │   ├── cifp1.yaml
    │   ├── cifpsym.yaml
    │   ├── composition.yaml
    │   ├── crystal_llm.yaml
    │   ├── local_env.yaml
    │   ├── slices.yaml
    │   └── zmatrix.yaml
    ├── llm_sft.yaml
    ├── model
    │   ├── archived_base
    │   │   ├── archived
    │   │   │   ├── finetune_template.yaml
    │   │   │   ├── finetune_template_dielectric.yaml
    │   │   │   ├── finetune_template_dielectric_atoms.yaml
    │   │   │   ├── finetune_template_dielectric_permuted.yaml
    │   │   │   ├── finetune_template_dielectric_test.yaml
    │   │   │   ├── finetune_template_dielectric_unfiltered.yaml
    │   │   │   ├── finetune_template_dielectric_zmatrix.yaml
    │   │   │   ├── finetune_template_gvrh.yaml
    │   │   │   ├── finetune_template_gvrh_atoms.yaml
    │   │   │   ├── finetune_template_gvrh_permuted copy.yaml
    │   │   │   ├── finetune_template_gvrh_permuted.yaml
    │   │   │   ├── finetune_template_gvrh_unfiltered.yaml
    │   │   │   ├── finetune_template_gvrh_zmatrix.yaml
    │   │   │   ├── finetune_template_jdft2d.yaml
    │   │   │   ├── finetune_template_kvrh.yaml
    │   │   │   ├── finetune_template_kvrh_atoms.yaml
    │   │   │   ├── finetune_template_kvrh_permuted copy.yaml
    │   │   │   ├── finetune_template_kvrh_permuted.yaml
    │   │   │   ├── finetune_template_kvrh_unfiltered.yaml
    │   │   │   ├── finetune_template_kvrh_zmatrix.yaml
    │   │   │   ├── finetune_template_perovskites.yaml
    │   │   │   ├── finetune_template_perovskites_atoms.yaml
    │   │   │   ├── finetune_template_perovskites_permuted copy.yaml
    │   │   │   ├── finetune_template_perovskites_permuted.yaml
    │   │   │   ├── finetune_template_perovskites_unfiltered.yaml
    │   │   │   ├── finetune_template_perovskites_zmatrix.yaml
    │   │   │   └── finetune_template_phonons.yaml
    │   │   ├── finetune_qmof.yaml
    │   │   ├── finetune_template_dielectric.yaml
    │   │   ├── finetune_template_dielectric_perturb.yaml
    │   │   ├── finetune_template_dielectric_potential.yaml
    │   │   ├── finetune_template_dielectric_smiles.yaml
    │   │   ├── finetune_template_dielectric_translate.yaml
    │   │   ├── finetune_template_gvrh.yaml
    │   │   ├── finetune_template_gvrh_perturb.yaml
    │   │   ├── finetune_template_gvrh_smiles.yaml
    │   │   ├── finetune_template_gvrh_translate.yaml
    │   │   ├── finetune_template_kvrh.yaml
    │   │   ├── finetune_template_kvrh_perturb.yaml
    │   │   ├── finetune_template_kvrh_potential.yaml
    │   │   ├── finetune_template_kvrh_smiles.yaml
    │   │   ├── finetune_template_kvrh_translate.yaml
    │   │   ├── finetune_template_perovskite_potential.yaml
    │   │   ├── finetune_template_perovskites.yaml
    │   │   ├── finetune_template_perovskites_perturb.yaml
    │   │   ├── finetune_template_perovskites_smiles.yaml
    │   │   ├── finetune_template_perovskites_translate.yaml
    │   │   ├── llama.yaml
    │   │   ├── llama3_gvrh_sft.yaml
    │   │   ├── llama_archived
    │   │   │   ├── llama_dielec.yaml
    │   │   │   ├── llama_gvrh.yaml
    │   │   │   ├── llama_kvrh.yaml
    │   │   │   └── llama_perov.yaml
    │   │   ├── llama_dielec.yaml
    │   │   ├── llama_dielectric_sft.yaml
    │   │   ├── llama_gvrh.yaml
    │   │   ├── llama_gvrh_sft.yaml
    │   │   ├── llama_gvrh_sft_nb_fold_2.yaml
    │   │   ├── llama_kvrh.yaml
    │   │   ├── llama_kvrh_sft.yaml
    │   │   ├── llama_perov.yaml
    │   │   ├── llama_perov_sft.yaml
    │   │   ├── pretrain_template.yaml
    │   │   └── pretrain_template_rt_token.yaml
    │   ├── benchmark_example.yaml
    │   ├── classification_example.yaml
    │   ├── formation_energy.yaml
    │   ├── llama_8b.yaml
    │   ├── llama_example.yaml
    │   ├── pretrain_example.yaml
    │   ├── pretrain_other_models.yaml
    │   └── pretrain_own_data_example.yaml
    └── pretrain.yaml
├── docs
    ├── api.md
    ├── benchmarking.md
    ├── getting_started.md
    ├── index.md
    ├── representations.md
    ├── static
    │   ├── logo.ai
    │   └── logo.png
    └── tokenizers.md
├── mkdocs.yml
├── notebooks
    ├── dataprep.ipynb
    ├── example_data
    │   ├── InCuS2_p1.cif
    │   ├── InCuS2_symmetrized.cif
    │   ├── N2_p1.cif
    │   ├── N2_symmetrized.cif
    │   ├── SrTiO3_p1.cif
    │   ├── SrTiO3_symmetrized.cif
    │   ├── TlCr5Se8_p1.cif
    │   └── TlCr5Se8_symmetrized.cif
    ├── example_mattext_representations.ipynb
    ├── linear_potential_dev.ipynb
    ├── tokens.ipynb
    └── tutorial.ipynb
├── pyproject.toml
├── revision-scripts
    ├── 5fold_split.py
    ├── matbench_is_metal.py
    ├── mp_classification.py
    ├── prep_json.py
    ├── prep_rep.py
    └── text_rep.py
├── scripts
    ├── filterdataset.py
    ├── linear_potential.py
    ├── llama-eval-responses
    │   ├── llama_evals_matbench_dielectric_cif_p1.json
    │   ├── llama_evals_matbench_dielectric_composition.json
    │   ├── llama_evals_matbench_dielectric_crystal_llm_rep.json
    │   ├── llama_evals_matbench_dielectric_slice.json
    │   ├── llama_evals_matbench_log_gvrh_cif_p1.json
    │   ├── llama_evals_matbench_log_gvrh_composition.json
    │   ├── llama_evals_matbench_log_gvrh_crystal_llm_rep.json
    │   ├── llama_evals_matbench_log_kvrh_cif_p1.json
    │   ├── llama_evals_matbench_log_kvrh_composition.json
    │   ├── llama_evals_matbench_log_kvrh_composition_old.json
    │   ├── llama_evals_matbench_log_kvrh_crystal_llm_rep.json
    │   ├── llama_evals_matbench_log_kvrh_slice.json
    │   ├── llama_evals_matbench_perovskites_cif_p1.json
    │   ├── llama_evals_matbench_perovskites_composition.json
    │   ├── llama_evals_matbench_perovskites_crystal_llm_rep.json
    │   └── llama_evals_matbench_perovskites_slice.json
    ├── llama_sft_evals.py
    ├── modify_mb_json.py
    ├── nomad_postprocess_parallel.py
    ├── qmof_prepare_data.py
    └── query_qmof.py
├── src
    └── mattext
    │   ├── __init__.py
    │   ├── analysis
    │       ├── __init__.py
    │       ├── attention.py
    │       └── xtal2pot.py
    │   ├── dataprep
    │       ├── __init__.py
    │       ├── download_matbench.py
    │       ├── matbench_prepare_data.py
    │       └── nomad_prepare_data.py
    │   ├── main.py
    │   ├── models
    │       ├── __init__.py
    │       ├── benchmark.py
    │       ├── finetune.py
    │       ├── helper.py
    │       ├── inference.py
    │       ├── llama.py
    │       ├── llama_sft.py
    │       ├── potential.py
    │       ├── predict.py
    │       ├── pretrain.py
    │       ├── score.py
    │       └── utils.py
    │   ├── representations
    │       ├── __init__.py
    │       ├── analysis.py
    │       ├── decoder.py
    │       └── transformations.py
    │   ├── tokenizer
    │       └── __init__.py
    │   └── utils.py
└── tests
    ├── test_imports.py
    └── test_xtal2pot.py


/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   docs:
11 |     runs-on: ubuntu-latest
12 |     permissions: "write-all"
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v4
16 | 
17 |       - uses: actions/setup-python@v5
18 |         with:
19 |           python-version: "3.10"
20 |           cache: pip
21 |           cache-dependency-path: pyproject.toml
22 | 
23 |       - name: Install dependencies
24 |         run: |
25 |           python -m pip install --upgrade pip
26 |           pip install wheel setuptools
27 |           pip install -e .
28 |           pip install mkdocs mkdocs-material "mkdocstrings[python]" mkdocs-autorefs 
29 | 
30 |       - name: Build
31 |         run: mkdocs build
32 |       - run: mkdocs gh-deploy --force
33 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   tests:
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       fail-fast: true
15 |       matrix:
16 |         python-version: ["3.9"]
17 |     timeout-minutes: 30
18 |     defaults:
19 |       run:
20 |         shell: bash -l {0}
21 |     steps:
22 |       - name: Check out repository
23 |         uses: actions/checkout@v4
24 | 
25 |       # - uses: pdm-project/setup-pdm@v3
26 |       #   name: Set up PDM
27 |       #   with:
28 |       #     python-version: ${{ matrix.python-version }}
29 |       #     cache: true
30 | 
31 |       - name: Setup Mambaforge
32 |         uses: conda-incubator/setup-miniconda@v3
33 |         with:
34 |           miniforge-variant: Mambaforge
35 |           miniforge-version: latest
36 |           use-mamba: true
37 |           python-version: ${{ matrix.python-version }}
38 |           conda-channels: anaconda, conda-forge
39 |           activate-environment: test
40 | 
41 |       - name: Install dependencies
42 |         run: |
43 |           mamba install -c conda-forge openbabel fftw -y
44 |           pip install -e ".[dev]" 
45 |           pip install pyxtal 
46 |           pip install "numpy<2.0"
47 |           
48 |       - name: Test
49 |         run: pytest tests


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 LamaLab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100k/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small
 6 |   finetune:
 7 |     model_name: ft_100k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/pretrain100k/cif_p1/checkpoint-39000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100k/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100k/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain100k/composition/checkpoint-2000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100k/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/pretrain100k/crystal_llm/checkpoint-156000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100k/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/pretrain100k/slice/checkpoint-39000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100k_2/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small_test
 6 |   finetune:
 7 |     model_name: ft_100k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100k_2/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small_test
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100k_2/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small_test
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/composition_pt_30k_wes/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100k_2/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small_test
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100k_2/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small_test
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100kpermute/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: permute_100k_7seed
 6 |   finetune:
 7 |     model_name: ft_100k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100kpermute/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: permute_100k_7seed
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100kpermute/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: permute_100k_7seed
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/composition_pt_30k_wes/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100kpermute/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: permute_100k_7seed
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune100kpermute/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: permute_100k_7seed
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune300k/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: ft_300k_mb_small
 6 |   finetune:
 7 |     model_name: ft_300k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain300k/cif_p1_pt_300k_wes_2/checkpoint-58000
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune300k/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: ft_300k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: ft_300k_mb_small
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain300k/cif_symmetrized_pt_300k_wes_2/checkpoint-58000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune300k/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: ft_300k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: ft_300k_mb_small
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain300k/composition_pt_300/checkpoint-7000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune300k/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: ft_300k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: ft_300k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain300k/crystal_llm_rep_pt_300/checkpoint-57000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune300k/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: ft_300k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: ft_300k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain300k/slice_pt_300/checkpoint-117000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune300kpermute/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: permute_300k_7seed
 6 |   finetune:
 7 |     model_name: ft_300k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain300k/cif_p1_pt_300k_wes_2/checkpoint-58000
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune300kpermute/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: permute_300k_7seed
 6 |   finetune:
 7 |     model_name: ft_300k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain300k/cif_symmetrized_pt_300k_wes_2/checkpoint-58000
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune300kpermute/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: permute_300k_7seed
 6 |   finetune:
 7 |     model_name: ft_300k_mb_small
 8 |     context_length: 32
 9 |     training_arguments:
10 |       per_device_train_batch_size: 512
11 |     path:
12 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain300k/composition_pt_300/checkpoint-7000
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune300kpermute/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: permute_300k_7seed
 6 |   finetune:
 7 |     model_name: ft_300k_mb_small
 8 |     context_length: 512
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain300k/crystal_llm_rep_pt_300/checkpoint-57000
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune300kpermute/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: permute_300k_7seed
 6 |   finetune:
 7 |     model_name: ft_300k_mb_small
 8 |     context_length: 512
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/pretrain300k/slice_pt_300/checkpoint-117000
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30k/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: pt_30k_mb_small
 6 |   finetune:
 7 |     model_name: pt_30k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30k/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: pt_30k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: pt_30k_mb_small
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30k/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: pt_30k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: pt_30k_mb_small
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/composition_pt_30k_wes/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30k/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: pt_30k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: pt_30k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30k/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: pt_30k_mb_small
 6 | 
 7 |   finetune:
 8 |     model_name: pt_30k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30k_agai_old/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: ft_30k_unfiltered_again
 6 |   finetune:
 7 |     model_name: ft_30k_unfiltered_again
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30k_agai_old/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: ft_30k_unfiltered_again
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_unfiltered_again
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30k_agai_old/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: ft_30k_unfiltered_again
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_unfiltered_again
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/composition_pt_30k_wes/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30k_agai_old/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: ft_30k_unfiltered_again
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_unfiltered_again
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30k_agai_old/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: ft_30k_unfiltered_again
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_unfiltered_again
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30kpermute/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: permute_30k_7seed
 6 |   finetune:
 7 |     model_name: pt_30k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30kpermute/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: permute_30k_7seed
 6 | 
 7 |   finetune:
 8 |     model_name: pt_30k_mb_small
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30kpermute/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: permute_30k_7seed
 6 | 
 7 |   finetune:
 8 |     model_name: pt_30k_mb_small
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/composition_pt_30k_wes/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30kpermute/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: permute_30k_7seed
 6 | 
 7 |   finetune:
 8 |     model_name: pt_30k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune30kpermute/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: permute_30k_7seed
 6 | 
 7 |   finetune:
 8 |     model_name: pt_30k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_100k_atoms/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms
 4 |   logging:
 5 |     wandb_project: ft_100k_atoms
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_atoms
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_pt_100k_atoms/checkpoint-4000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_100k_atoms/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   logging:
 5 |     wandb_project: ft_100k_atoms_params
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_atoms_params
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_100k_atoms/checkpoint-4000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_300k_atoms/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms
 4 |   logging:
 5 |     wandb_project: ft_300k_atoms
 6 | 
 7 |   finetune:
 8 |     model_name: ft_300k_atoms
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_pt_300k_atoms/checkpoint-14000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_300k_atoms/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   logging:
 5 |     wandb_project: ft_300k_atoms_params
 6 | 
 7 |   finetune:
 8 |     model_name: ft_300k_atoms_params
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_300k_atoms/checkpoint-14000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_30k_again/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: ft_30k_unfiltered_again
 6 |   finetune:
 7 |     model_name: ft_30k_unfiltered_again
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_30k_again/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: ft_30k_unfiltered_again
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_unfiltered_again
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_30k_again/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: ft_30k_unfiltered_again
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_unfiltered_again
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/composition_pt_30k_wes/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_30k_again/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: ft_30k_unfiltered_again
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_unfiltered_again
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_30k_again/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: ft_30k_unfiltered_again
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_unfiltered_again
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_30k_atoms/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms
 4 |   logging:
 5 |     wandb_project: ft_30k_atoms
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_atoms
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_30k_atoms/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   logging:
 5 |     wandb_project: ft_30k_atoms_params
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_atoms_params
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_test/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   finetune:
 5 |     context_length: 1024
 6 |     training_arguments:
 7 |       per_device_train_batch_size: 64
 8 |     path:
 9 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
10 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_test/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   finetune:
 5 |     context_length: 1024
 6 |     training_arguments:
 7 |       per_device_train_batch_size: 64
 8 |     path:
 9 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
10 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_test/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   finetune:
 5 |     context_length: 32
 6 |     training_arguments:
 7 |       per_device_train_batch_size: 512
 8 |     path:
 9 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/composition_pt_30k_wes/checkpoint-1000
10 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_test/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   finetune:
 5 |     context_length: 512
 6 |     training_arguments:
 7 |       per_device_train_batch_size: 64
 8 |     path:
 9 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
10 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_test/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   finetune:
 5 |     context_length: 512
 6 |     training_arguments:
 7 |       per_device_train_batch_size: 64
 8 |     path:
 9 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
10 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_zmatrix/zmatrix_100k.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: zmatrix_finetune
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_zmatrix
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_100k_zmatrix/checkpoint-4000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_zmatrix/zmatrix_300k.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: zmatrix_finetune
 6 | 
 7 |   finetune:
 8 |     model_name: ft_300k_zmatrix
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_300k_zmatrix/checkpoint-14000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/finetune_zmatrix/zmatrix_30k.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: zmatrix_finetune
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_zmatrix
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_30k_zmatrix/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain100k_atoms/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_atoms
 5 | 
 6 |   representation: atoms
 7 |   pretrain:
 8 |     name: pt_100k_atoms
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain100k_atoms/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_atoms
 5 | 
 6 |   representation: atoms_params
 7 |   pretrain:
 8 |     name: pt_100k_atoms
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain2m/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_2m_wes
 5 | 
 6 |   representation: cif_p1
 7 |   pretrain:
 8 |     name: pt_2m_wes
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/2m
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain2m/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_2m_wes
 5 | 
 6 |   representation: cif_symmetrized
 7 |   pretrain:
 8 |     name: pt_2m_wes
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/2m
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain2m/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_2m_wes
 5 | 
 6 |   representation: composition
 7 |   pretrain:
 8 |     name: pt_2m_wes
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/2m
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain2m/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_2m_wes
 5 | 
 6 |   representation: crystal_llm_rep
 7 |   pretrain:
 8 |     name: pt_2m_wes
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/2m
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain2m/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_2m_wes
 5 | 
 6 |   representation: slice
 7 |   pretrain:
 8 |     name: pt_2m_wes
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/2m
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain300k_atoms/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_300k_atoms
 5 | 
 6 |   representation: atoms
 7 |   pretrain:
 8 |     name: pt_300k_atoms
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain300k_atoms/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_300k_atoms
 5 | 
 6 |   representation: atoms_params
 7 |   pretrain:
 8 |     name: pt_300k_atoms
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain30k/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   pretrain:
 5 |     context_length: 1024
 6 |     training_arguments:
 7 |       per_device_train_batch_size: 32
 8 |     path:
 9 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
10 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain30k/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   pretrain:
 5 |     context_length: 1024
 6 |     training_arguments:
 7 |       per_device_train_batch_size: 32
 8 |     path:
 9 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
10 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain30k/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   pretrain:
 5 |     context_length: 32
 6 |     training_arguments:
 7 |       per_device_train_batch_size: 1024
 8 |     path:
 9 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
10 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain30k/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   pretrain:
 5 |     context_length: 512
 6 |     training_arguments:
 7 |       per_device_train_batch_size: 32
 8 |     path:
 9 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
10 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain30k/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   pretrain:
 5 |     context_length: 512
 6 |     training_arguments:
 7 |       per_device_train_batch_size: 32
 8 |     path:
 9 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
10 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain30k_atoms/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_atoms
 5 | 
 6 |   representation: atoms
 7 |   pretrain:
 8 |     name: pt_30k_atoms
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrain30k_atoms/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_atoms
 5 | 
 6 |   representation: atoms_params
 7 |   pretrain:
 8 |     name: pt_30k_atoms
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrainzmatrix/zmatrix_100k.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_zmatrix
 5 | 
 6 |   representation: zmatrix
 7 |   pretrain:
 8 |     name: pt_100k_zmatrix
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k/zmatrix
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrainzmatrix/zmatrix_300k.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_zmatrix
 5 | 
 6 |   representation: zmatrix
 7 |   pretrain:
 8 |     name: pt_300k_zmatrix
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k/zmatrix
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/pretrainzmatrix/zmatrix_30k.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_zmatrix
 5 | 
 6 |   representation: zmatrix
 7 |   pretrain:
 8 |     name: pt_30k_zmatrix
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/zmatrix
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/testing_100k/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small_test
 6 |   finetune:
 7 |     model_name: ft_100k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/testing_100k/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small_test
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/testing_100k/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small_test
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/composition_pt_30k_wes/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/testing_100k/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small_test
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/testing_100k/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: ft_100k_mb_small_test
 6 | 
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/testing_30k/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: pt_30k_mb_test
 6 |   finetune:
 7 |     model_name: finetune_30k_wes_3
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/testing_30k/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: pt_30k_mb_test
 6 | 
 7 |   finetune:
 8 |     model_name: finetune_30k_wes_3
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/testing_30k/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: pt_30k_mb_test
 6 | 
 7 |   finetune:
 8 |     model_name: finetune_30k_wes_3
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/composition_pt_30k_wes/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/testing_30k/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: pt_30k_mb_test
 6 | 
 7 |   finetune:
 8 |     model_name: finetune_30k_wes_3
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/archived/testing_30k/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: pt_30k_mb_test
 6 | 
 7 |   finetune:
 8 |     model_name: finetune_30k_wes_3
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/config-hydra.yaml:
--------------------------------------------------------------------------------
 1 | hydra:
 2 |   job:
 3 |     name: finetune
 4 |   run:
 5 |     dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 6 |   sweep:
 7 |     dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |     subdir: ${hydra.job.override_dirname}
 9 |     
10 |   launcher:
11 |     _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
12 |     submitit_folder: ${hydra.sweep.dir}/.submitit/%j
13 |     timeout_min: 3600
14 |     mem_gb: 160
15 |     nodes: 1
16 |     #gpus_per_task: 1
17 |     gres: gpu:1
18 |     #gpus_per_node: 2
19 |     name: ${hydra.job.name}
20 |     partition: 'gpu'
21 |     additional_parameters:
22 |       nodelist: 'gpu[005-007,013-014]'
23 |     tasks_per_node: 1
24 | 
25 | 
26 | defaults:
27 | - model: none
28 | - override hydra/launcher: submitit_slurm
29 | 
30 | runs:
31 | 
32 |   # - name: pretrain_run
33 |   #   tasks: [pretrain]
34 | 
35 |   - name: benchmark_run
36 |     tasks: [benchmark]
37 | 
38 |   # - name: test_run
39 |   #   tasks: [inference]
40 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/config-potential.yaml:
--------------------------------------------------------------------------------
 1 |   hydra:
 2 |     job:
 3 |       name: llama_instruct
 4 |     run:
 5 |       dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 6 |     sweep:
 7 |       dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |       subdir: ${hydra.job.override_dirname}
 9 |       
10 |     launcher:
11 |       _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
12 |       submitit_folder: ${hydra.sweep.dir}/.submitit/%j
13 |       timeout_min: 3600
14 |       mem_gb: 160
15 |       nodes: 1
16 |       #gpus_per_task: 1
17 |       gres: gpu:1
18 |       #gpus_per_node: 2
19 |       name: ${hydra.job.name}
20 |       partition: 'gpu'
21 |       additional_parameters:
22 |         nodelist: 'gpu[005,006,007,013-014]'
23 |       tasks_per_node: 1
24 | 
25 | 
26 |   defaults:
27 |   - model: none
28 |   - override hydra/launcher: submitit_slurm
29 | 
30 | 
31 | 
32 |   runs:
33 | 
34 |     # - name: pretrain_run
35 |     #   tasks: [pretrain]
36 | 
37 |     # - name: benchmark_run
38 |     #   tasks: [benchmark]
39 | 
40 |     # - name: test_run
41 |     #   tasks: [inference]
42 | 
43 |     # - name: qmof_run
44 |     #   tasks: [qmof]
45 | 
46 |     # - name: llama_run
47 |     #   tasks: [llama]
48 | 
49 |     # - name: llama_sft_run
50 |     #   tasks: [llama_sft]
51 | 
52 |     - name: potential_run
53 |       tasks: [potential]
54 | 
55 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/config-sft.yaml:
--------------------------------------------------------------------------------
 1 |   hydra:
 2 |     job:
 3 |       name: llama_instruct
 4 |     run:
 5 |       dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 6 |     sweep:
 7 |       dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |       subdir: ${hydra.job.override_dirname}
 9 |       
10 |     launcher:
11 |       _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
12 |       submitit_folder: ${hydra.sweep.dir}/.submitit/%j
13 |       timeout_min: 3600
14 |       mem_gb: 160
15 |       nodes: 1
16 |       #gpus_per_task: 1
17 |       gres: gpu:1
18 |       #gpus_per_node: 2
19 |       name: ${hydra.job.name}
20 |       partition: 'gpu'
21 |       additional_parameters:
22 |         nodelist: 'gpu[005,007,013-014]'
23 |       tasks_per_node: 1
24 | 
25 | 
26 |   defaults:
27 |   - model: none
28 |   - override hydra/launcher: submitit_slurm
29 | 
30 | 
31 | 
32 |   runs:
33 | 
34 |     # - name: pretrain_run
35 |     #   tasks: [pretrain]
36 | 
37 |     # - name: benchmark_run
38 |     #   tasks: [benchmark]
39 | 
40 |     # - name: test_run
41 |     #   tasks: [inference]
42 | 
43 |     # - name: qmof_run
44 |     #   tasks: [qmof]
45 | 
46 |     # - name: llama_run
47 |     #   tasks: [llama]
48 | 
49 |     - name: llama_sft_run
50 |       tasks: [llama_sft]
51 | 
52 |     # - name: potential_run
53 |     #   tasks: [potential]
54 | 
55 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/config-smiles.yaml:
--------------------------------------------------------------------------------
 1 |   hydra:
 2 |     job:
 3 |       name: llama_instruct
 4 |     run:
 5 |       dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 6 |     sweep:
 7 |       dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |       subdir: ${hydra.job.override_dirname}
 9 |       
10 |     launcher:
11 |       _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
12 |       submitit_folder: ${hydra.sweep.dir}/.submitit/%j
13 |       timeout_min: 3600
14 |       mem_gb: 160
15 |       nodes: 1
16 |       #gpus_per_task: 1
17 |       gres: gpu:1
18 |       #gpus_per_node: 2
19 |       name: ${hydra.job.name}
20 |       partition: 'gpu'
21 |       additional_parameters:
22 |         nodelist: 'gpu[005,006,007,013-014]'
23 |       tasks_per_node: 1
24 | 
25 | 
26 |   defaults:
27 |   - model: none
28 |   - override hydra/launcher: submitit_slurm
29 | 
30 | 
31 | 
32 |   runs:
33 | 
34 |     # - name: pretrain_run
35 |     #   tasks: [pretrain]
36 | 
37 |     - name: benchmark_run
38 |       tasks: [benchmark]
39 | 
40 |     # - name: test_run
41 |     #   tasks: [inference]
42 | 
43 |     # - name: qmof_run
44 |     #   tasks: [qmof]
45 | 
46 |     # - name: llama_run
47 |     #   tasks: [llama]
48 | 
49 |     # - name: llama_sft_run
50 |     #   tasks: [llama_sft]
51 | 
52 |     # - name: potential_run
53 |     #   tasks: [potential]
54 | 
55 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/config.yaml:
--------------------------------------------------------------------------------
 1 |   hydra:
 2 |     job:
 3 |       name: llama_sft
 4 |     run:
 5 |       dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 6 |     sweep:
 7 |       dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |       subdir: ${hydra.job.override_dirname}
 9 |       
10 |     launcher:
11 |       _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
12 |       submitit_folder: ${hydra.sweep.dir}/.submitit/%j
13 |       timeout_min: 3600
14 |       mem_gb: 160
15 |       nodes: 1
16 |       #gpus_per_task: 1
17 |       gres: gpu:1
18 |       #gpus_per_node: 2
19 |       name: ${hydra.job.name}
20 |       partition: 'gpu'
21 |       additional_parameters:
22 |         nodelist: 'gpu[005,007,013-014]'
23 |       tasks_per_node: 1
24 | 
25 | 
26 |   defaults:
27 |   - model: none
28 |   - override hydra/launcher: submitit_slurm
29 | 
30 | 
31 | 
32 |   runs:
33 | 
34 |     # - name: pretrain_run
35 |     #   tasks: [pretrain]
36 | 
37 |     # - name: benchmark_run
38 |     #   tasks: [benchmark]
39 | 
40 |     # - name: test_run
41 |     #   tasks: [inference]
42 | 
43 |     # - name: qmof_run
44 |     #   tasks: [qmof]
45 | 
46 |     # - name: llama_run
47 |     #   tasks: [llama]
48 | 
49 |     - name: llama_sft_run
50 |       tasks: [llama_sft]
51 | 
52 |     # - name: potential_run
53 |     #   tasks: [potential]
54 | 
55 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune2m/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune2m/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_p1_pt_30k_rt_2/checkpoint-46000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune2m/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_symmetrized_pt_30k_rt/checkpoint-45000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune2m/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/composition_pt_30k_rt/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune2m/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/crystal_llm_rep_pt_30k_rt/checkpoint-11000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune2m/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/slice_pt_30k_rt/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune2m/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   logging:
 5 |     wandb_project: 30k_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: 30k_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_p1_pt_30k_rt_2/checkpoint-46000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: 30k_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_symmetrized_pt_30k_rt/checkpoint-45000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: 30k_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/composition_pt_30k_rt/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: 30k_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/crystal_llm_rep_pt_30k_rt/checkpoint-11000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: 30k_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/slice_pt_30k_rt/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: 30k_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt_2/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms
 4 |   special_num_token: True
 5 |   logging:
 6 |     wandb_project: 30k_ft_rt2
 7 | 
 8 |   finetune:
 9 |     model_name: 30k_ft_rt2
10 |     context_length: 32
11 |     training_arguments:
12 |       per_device_train_batch_size: 1024
13 |     path:
14 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/atoms_pt_30k_rt/checkpoint-1000
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt_2/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   special_num_token: True
 5 |   logging:
 6 |     wandb_project: 30k_ft_rt2
 7 | 
 8 |   finetune:
 9 |     model_name: 30k_ft_rt2
10 |     context_length: 32
11 |     training_arguments:
12 |       per_device_train_batch_size: 1024
13 |     path:
14 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/atoms_params_pt_30k_rt/checkpoint-1000
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt_2/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: 30k_ft_rt2
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft_rt2
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_p1_pt_30k_rt_2/checkpoint-46000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt_2/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: 30k_ft_rt2
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft_rt2
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_symmetrized_pt_30k_rt/checkpoint-46000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt_2/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: 30k_ft_rt2
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft_rt2
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/composition_pt_30k_rt/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt_2/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: 30k_ft_rt2
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft_rt2
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/crystal_llm_rep_pt_30k_rt/checkpoint-11000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt_2/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: 30k_ft_rt2
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft_rt2
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/slice_pt_30k_rt/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune30k_rt_2/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   special_num_token: True
 5 |   logging:
 6 |     wandb_project: 30k_ft_rt2
 7 | 
 8 |   finetune:
 9 |     model_name: 30k_ft_rt2
10 |     context_length: 512
11 |     training_arguments:
12 |       per_device_train_batch_size: 64
13 |     path:
14 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/zmatrix_pt_30k_rt/checkpoint-46000
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune_30k_spl_token/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: ft_30k_spl
 6 |   finetune:
 7 |     model_name: ft_30k_spl
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_p1_30k_ft/checkpoint-46000
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune_30k_spl_token/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: ft_30k_spl
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_spl
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_symmetrized_30k_ft/checkpoint-46000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune_30k_spl_token/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: ft_30k_spl
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_spl
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/composition_30k_ft/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune_30k_spl_token/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: ft_30k_spl
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_spl
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/crystal_llm_rep_30k_ft/checkpoint-11000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/finetune_30k_spl_token/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: ft_30k_spl
 6 | 
 7 |   finetune:
 8 |     model_name: ft_30k_spl
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/slice_30k_ft/checkpoint-11000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_ft/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: lama_2
 6 |   finetune:
 7 |     model_name: lama_2
 8 |     # context_length: 1024
 9 |     # training_arguments:
10 |     #   per_device_train_batch_size: 64
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_ft/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: lama_2
 6 | 
 7 |   finetune:
 8 |     model_name: lama_2
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_ft/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: lama_2
 6 | 
 7 |   finetune:
 8 |     model_name: lama_2
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_ft/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: lama_2
 6 | 
 7 |   finetune:
 8 |     model_name: lama_2
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 128
12 |     # path:
13 |     #   pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_30k_zmatrix/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: llama_instruct_2
 6 |   finetune:
 7 |     model_name: llama_instruct_2
 8 |     # context_length: 1024
 9 |     # training_arguments:
10 |     #   per_device_train_batch_size: 64
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: llama_instruct_2
 6 |   finetune:
 7 |     model_name: llama_instruct_3
 8 |     # context_length: 1024
 9 |     # training_arguments:
10 |     #   per_device_train_batch_size: 64
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: llama_instruct_2
 6 | 
 7 |   finetune:
 8 |     model_name: llama_instruct_2
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: llama_instruct_2
 6 | 
 7 |   finetune:
 8 |     model_name: llama_instruct_2
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: llama_instruct
 6 | 
 7 |   finetune:
 8 |     model_name: llama_instruct
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 128
12 |     # path:
13 |     #   pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_30k_zmatrix/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_10/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: llama3_8_gen
 6 |   finetune:
 7 |     path:
 8 |       pretrained_checkpoint: "meta-llama/Meta-Llama-3-8B"
 9 |     model_name: llama3_8_gen
10 |     # context_length: 1024
11 |     training_arguments:
12 |       per_device_train_batch_size: 16
13 |     # path:
14 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_10/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: llama3_8_gen
 6 |   finetune:
 7 |     model_name: llama3_8_gen
 8 |     path:
 9 |       pretrained_checkpoint: "meta-llama/Meta-Llama-3-8B"
10 |     # context_length: 1024
11 |     # training_arguments:
12 |     #   per_device_train_batch_size: 64
13 |     # path:
14 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_10/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: llama3_8_gen
 6 | 
 7 |   finetune:
 8 |     model_name: llama3_8_gen
 9 |     path:
10 |       pretrained_checkpoint: "meta-llama/Meta-Llama-3-8B"
11 |     # context_length: 512
12 |     # training_arguments:
13 |     #   per_device_train_batch_size: 64
14 |     # path:
15 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_10/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: llama3_8_gen
 6 | 
 7 |   finetune:
 8 |     model_name: llama3_8_gen
 9 |     path:
10 |       pretrained_checkpoint: "meta-llama/Meta-Llama-3-8B"
11 |     # context_length: 512
12 |     # training_arguments:
13 |     #   per_device_train_batch_size: 64
14 |     # path:
15 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_2/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: llama_collator_2
 6 |   finetune:
 7 |     model_name: llama_collator_2
 8 |     # context_length: 1024
 9 |     # training_arguments:
10 |     #   per_device_train_batch_size: 64
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_2/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: llama_collator_2
 6 |   finetune:
 7 |     model_name: llama_collator_2
 8 |     # context_length: 1024
 9 |     # training_arguments:
10 |     #   per_device_train_batch_size: 64
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_2/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: llama_collator_2
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_2
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_2/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: llama_collator_2
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_2
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_2/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: llama_instruct
 6 | 
 7 |   finetune:
 8 |     model_name: llama_instruct
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 128
12 |     # path:
13 |     #   pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_30k_zmatrix/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_3/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: llama_collator_3
 6 |   finetune:
 7 |     model_name: llama_collator_3
 8 |     # context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 16
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_3/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: llama_collator_3
 6 |   finetune:
 7 |     model_name: llama_collator_3
 8 |     # context_length: 1024
 9 |     # training_arguments:
10 |     #   per_device_train_batch_size: 64
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_3/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: llama_collator_3
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_3
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_3/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: llama_collator_3
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_3
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_3/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: llama_collator_3
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_3
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 128
12 |     # path:
13 |     #   pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_30k_zmatrix/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_4/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: llama_collator_4
 6 |   finetune:
 7 |     model_name: llama_collator_4
 8 |     # context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 16
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_4/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: llama_collator_4
 6 |   finetune:
 7 |     model_name: llama_collator_4
 8 |     # context_length: 1024
 9 |     # training_arguments:
10 |     #   per_device_train_batch_size: 64
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_4/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: llama_collator_4
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_4
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_4/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: llama_collator_4
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_4
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_4/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: llama_collator_4
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_4
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 128
12 |     # path:
13 |     #   pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_30k_zmatrix/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_5/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: llama_collator_5
 6 |   finetune:
 7 |     model_name: llama_collator_5
 8 |     # context_length: 1024
 9 |     # training_arguments:
10 |     #   per_device_train_batch_size: 16
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_5/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: llama_collator_5
 6 |   finetune:
 7 |     model_name: llama_collator_5
 8 |     # context_length: 1024
 9 |     # training_arguments:
10 |     #   per_device_train_batch_size: 64
11 |     # path:
12 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_5/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: llama_collator_5
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_5
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_5/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: llama_collator_5
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_5
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 64
12 |     # path:
13 |     #   pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/llama_sft_5/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: llama_collator_5
 6 | 
 7 |   finetune:
 8 |     model_name: llama_collator_5
 9 |     # context_length: 512
10 |     # training_arguments:
11 |     #   per_device_train_batch_size: 128
12 |     # path:
13 |     #   pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_30k_zmatrix/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential/cryst_0.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0
 5 |   logging:
 6 |     wandb_project: potential_lj
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: potential_0
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential/cryst_0_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0.2
 5 |   logging:
 6 |     wandb_project: potential_lj
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: potential_0_2
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential/cryst_0_4.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0.4
 5 |   logging:
 6 |     wandb_project: potential_lj
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: potential_0_4
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential/cryst_0_5.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0.5
 5 |   logging:
 6 |     wandb_project: potential_lj
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: potential_0_5
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential/cryst_0_6.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0.6
 5 |   logging:
 6 |     wandb_project: potential_lj
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: potential_0_6
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential/cryst_0_8.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0.8
 5 |   logging:
 6 |     wandb_project: potential_lj
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: potential_0_8
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential/cryst_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 1
 5 |   logging:
 6 |     wandb_project: potential_lj
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: potential_1
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_2/cryst_0.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0
 5 |   logging:
 6 |     wandb_project: potential_lj_standard
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: standard_potential_0
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_2/cryst_0_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0.2
 5 |   logging:
 6 |     wandb_project: potential_lj_standard
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: standard_potential_0_2
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_2/cryst_0_4.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0.4
 5 |   logging:
 6 |     wandb_project: potential_lj_standard
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: standard_potential_0_4
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_2/cryst_0_5.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0.5
 5 |   logging:
 6 |     wandb_project: potential_lj_standard
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: standard_potential_0_5
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_2/cryst_0_6.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0.6
 5 |   logging:
 6 |     wandb_project: potential_lj_standard
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: standard_potential_0_6
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_2/cryst_0_8.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 0.8
 5 |   logging:
 6 |     wandb_project: potential_lj_standard
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: standard_potential_0_8
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_2/cryst_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   alpha: 1
 5 |   logging:
 6 |     wandb_project: potential_lj_standard
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: standard_potential_1
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/crystal_llm_rep_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_3/cryst_0.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   alpha: 0
 5 |   logging:
 6 |     wandb_project: potential_lj_composition
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: composition_potential_0
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/composition_pt_2m/checkpoint-12000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_3/cryst_0_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   alpha: 0.2
 5 |   logging:
 6 |     wandb_project: potential_lj_composition
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: composition_potential_0_2
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/composition_pt_2m/checkpoint-12000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_3/cryst_0_4.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   alpha: 0.4
 5 |   logging:
 6 |     wandb_project: potential_lj_composition
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: composition_potential_0_4
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/composition_pt_2m/checkpoint-12000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_3/cryst_0_5.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   alpha: 0.5
 5 |   logging:
 6 |     wandb_project: potential_lj_composition
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: composition_potential_0_5
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/composition_pt_2m/checkpoint-12000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_3/cryst_0_6.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   alpha: 0.6
 5 |   logging:
 6 |     wandb_project: potential_lj_composition
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: composition_potential_0_6
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/composition_pt_2m/checkpoint-12000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_3/cryst_0_8.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   alpha: 0.8
 5 |   logging:
 6 |     wandb_project: potential_lj_composition
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: composition_potential_0_8
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/composition_pt_2m/checkpoint-12000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_3/cryst_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   alpha: 1
 5 |   logging:
 6 |     wandb_project: potential_lj_composition
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: composition_potential_1
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/composition_pt_2m/checkpoint-12000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_4/cryst_0.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   alpha: 0
 5 |   logging:
 6 |     wandb_project: potential_lj_slice
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: slice_potential_0
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/slice_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_4/cryst_0_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   alpha: 0.2
 5 |   logging:
 6 |     wandb_project: potential_lj_slice
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: slice_potential_0_2
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/slice_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_4/cryst_0_4.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   alpha: 0.4
 5 |   logging:
 6 |     wandb_project: potential_lj_slice
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: slice_potential_0_4
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/slice_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_4/cryst_0_5.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   alpha: 0.5
 5 |   logging:
 6 |     wandb_project: potential_lj_slice
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: slice_potential_0_5
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/slice_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_4/cryst_0_6.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   alpha: 0.6
 5 |   logging:
 6 |     wandb_project: potential_lj_slice
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: slice_potential_0_6
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/slice_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_4/cryst_0_8.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   alpha: 0.8
 5 |   logging:
 6 |     wandb_project: potential_lj_slice
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: slice_potential_0_8
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/slice_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_4/cryst_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   alpha: 1
 5 |   logging:
 6 |     wandb_project: potential_lj_slice
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: slice_potential_1
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/slice_pt_2m/checkpoint-393000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_5/cryst_0.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   alpha: 0
 5 |   logging:
 6 |     wandb_project: potential_lj_zmatrix
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: zmatrix_potential_0
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_300k_zmatrix/checkpoint-85000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_5/cryst_0_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   alpha: 0.2
 5 |   logging:
 6 |     wandb_project: potential_lj_zmatrix
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: zmatrix_potential_0_2
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_300k_zmatrix/checkpoint-85000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_5/cryst_0_4.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   alpha: 0.4
 5 |   logging:
 6 |     wandb_project: potential_lj_zmatrix
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: zmatrix_potential_0_4
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_300k_zmatrix/checkpoint-85000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_5/cryst_0_5.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   alpha: 0.5
 5 |   logging:
 6 |     wandb_project: potential_lj_zmatrix
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: zmatrix_potential_0_5
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_300k_zmatrix/checkpoint-85000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_5/cryst_0_6.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   alpha: 0.6
 5 |   logging:
 6 |     wandb_project: potential_lj_zmatrix
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: zmatrix_potential_0_6
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_300k_zmatrix/checkpoint-85000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_5/cryst_0_8.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   alpha: 0.8
 5 |   logging:
 6 |     wandb_project: potential_lj_zmatrix
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: zmatrix_potential_0_8
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_300k_zmatrix/checkpoint-85000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_5/cryst_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   alpha: 1
 5 |   logging:
 6 |     wandb_project: potential_lj_zmatrix
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: zmatrix_potential_1
11 |     context_length: 512
12 |     training_arguments:
13 |       per_device_train_batch_size: 128
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_300k_zmatrix/checkpoint-85000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_6/cryst_0.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   alpha: 0
 5 |   logging:
 6 |     wandb_project: potential_lj_cif_p1
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: cif_p1_potential_0
11 |     context_length: 1024
12 |     training_arguments:
13 |       per_device_train_batch_size: 64
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/cif_p1_pt_2m/checkpoint-524000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_6/cryst_0_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   alpha: 0.2
 5 |   logging:
 6 |     wandb_project: potential_lj_cif_p1
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: cif_p1_potential_0_2
11 |     context_length: 1024
12 |     training_arguments:
13 |       per_device_train_batch_size: 64
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/cif_p1_pt_2m/checkpoint-524000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_6/cryst_0_4.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   alpha: 0.4
 5 |   logging:
 6 |     wandb_project: potential_lj_cif_p1
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: cif_p1_potential_0_4
11 |     context_length: 1024
12 |     training_arguments:
13 |       per_device_train_batch_size: 64
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/cif_p1_pt_2m/checkpoint-524000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_6/cryst_0_5.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   alpha: 0.5
 5 |   logging:
 6 |     wandb_project: potential_lj_cif_p1
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: cif_p1_potential_0_5
11 |     context_length: 1024
12 |     training_arguments:
13 |       per_device_train_batch_size: 64
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/cif_p1_pt_2m/checkpoint-524000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_6/cryst_0_6.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   alpha: 0.6
 5 |   logging:
 6 |     wandb_project: potential_lj_cif_p1
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: cif_p1_potential_0_6
11 |     context_length: 1024
12 |     training_arguments:
13 |       per_device_train_batch_size: 64
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/cif_p1_pt_2m/checkpoint-524000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_6/cryst_0_8.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   alpha: 0.8
 5 |   logging:
 6 |     wandb_project: potential_lj_cif_p1
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: cif_p1_potential_0_8
11 |     context_length: 1024
12 |     training_arguments:
13 |       per_device_train_batch_size: 64
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/cif_p1_pt_2m/checkpoint-524000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_6/cryst_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   alpha: 1
 5 |   logging:
 6 |     wandb_project: potential_lj_cif_p1
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: cif_p1_potential_1
11 |     context_length: 1024
12 |     training_arguments:
13 |       per_device_train_batch_size: 64
14 |     path:
15 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_normal/cif_p1_pt_2m/checkpoint-524000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_7/cryst_0.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   alpha: 0
 5 |   logging:
 6 |     wandb_project: potential_lj_atoms_params
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: atoms_params_potential_0
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/atoms_params_pt_2m_atoms_params/checkpoint-24000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_7/cryst_0_2.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   alpha: 0.2
 5 |   logging:
 6 |     wandb_project: potential_lj_atoms_params
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: atoms_params_potential_0_2
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/atoms_params_pt_2m_atoms_params/checkpoint-24000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_7/cryst_0_4.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   alpha: 0.4
 5 |   logging:
 6 |     wandb_project: potential_lj_atoms_params
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: atoms_params_potential_0_4
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/atoms_params_pt_2m_atoms_params/checkpoint-24000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_7/cryst_0_5.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   alpha: 0.5
 5 |   logging:
 6 |     wandb_project: potential_lj_atoms_params
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: atoms_params_potential_0_5
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/atoms_params_pt_2m_atoms_params/checkpoint-24000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_7/cryst_0_6.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   alpha: 0.6
 5 |   logging:
 6 |     wandb_project: potential_lj_atoms_params
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: atoms_params_potential_0_6
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/atoms_params_pt_2m_atoms_params/checkpoint-24000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_7/cryst_0_8.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   alpha: 0.8
 5 |   logging:
 6 |     wandb_project: potential_lj_atoms_params
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: atoms_params_potential_0_8
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/atoms_params_pt_2m_atoms_params/checkpoint-24000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/potential_7/cryst_1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   alpha: 1
 5 |   logging:
 6 |     wandb_project: potential_lj_atoms_params
 7 |   special_num_token: False
 8 | 
 9 |   finetune:
10 |     model_name: atoms_params_potential_1
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/atoms_params_pt_2m_atoms_params/checkpoint-24000
16 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-rt/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: atoms
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-rt/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: atoms_params
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-rt/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: cif_p1
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-rt/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: cif_symmetrized
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-rt/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: composition
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-rt/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: crystal_llm_rep
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-rt/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: slice
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-rt/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: zmatrix
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/zmatrix
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-test/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pretrain-test
 5 | 
 6 |   representation: cif_p1
 7 |   pretrain:
 8 |     name: pt_30k
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k #--> Change this to folder continaing 30k dataset train.json and test.json
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-test/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pretrain-test
 5 | 
 6 |   representation: cif_symmetrized
 7 |   pretrain:
 8 |     name: pt_30k
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k #--> Change this to folder continaing 30k dataset train.json and test.json
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-test/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pretrain-test
 5 | 
 6 |   representation: composition
 7 |   pretrain:
 8 |     name: pt_30k
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k #--> Change this to folder continaing 30k dataset train.json and test.json
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-test/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pretrain-test
 5 | 
 6 |   representation: crystal_llm_rep
 7 |   pretrain:
 8 |     name: pt_30k
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k #--> Change this to folder continaing 30k dataset train.json and test.json
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain-test/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pretrain-test
 5 | 
 6 |   representation: slice
 7 |   pretrain:
 8 |     name: pt_30k
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k #--> Change this to folder continaing 30k dataset train.json and test.json
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain100k_spl_token/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 100k_ft
 5 | 
 6 |   representation: atoms_params
 7 |   pretrain:
 8 |     name: 100k_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain100k_spl_token/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 100k_ft
 5 | 
 6 |   representation: cif_p1
 7 |   pretrain:
 8 |     name: 100k_ft
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain100k_spl_token/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 100k_ft
 5 | 
 6 |   representation: cif_symmetrized
 7 |   pretrain:
 8 |     name: 100k_ft
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain100k_spl_token/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 100k_ft
 5 | 
 6 |   representation: composition
 7 |   pretrain:
 8 |     name: 100k_ft
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain100k_spl_token/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 100k_ft
 5 | 
 6 |   representation: crystal_llm_rep
 7 |   pretrain:
 8 |     name: 100k_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain100k_spl_token/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 100k_ft
 5 | 
 6 |   representation: slice
 7 |   pretrain:
 8 |     name: 100k_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain100k_spl_token/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 100k_ft
 5 | 
 6 |   representation: zmatrix
 7 |   pretrain:
 8 |     name: 100k_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain30k_spl_token/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   logging:
 5 |     wandb_project: 30k_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain30k_spl_token/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 30k_ft
 5 | 
 6 |   representation: cif_p1
 7 |   pretrain:
 8 |     name: 30k_ft
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain30k_spl_token/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 30k_ft
 5 | 
 6 |   representation: cif_symmetrized
 7 |   pretrain:
 8 |     name: 30k_ft
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain30k_spl_token/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 30k_ft
 5 | 
 6 |   representation: composition
 7 |   pretrain:
 8 |     name: 30k_ft
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain30k_spl_token/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 30k_ft
 5 | 
 6 |   representation: crystal_llm_rep
 7 |   pretrain:
 8 |     name: 30k_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain30k_spl_token/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: 30k_ft
 5 | 
 6 |   representation: slice
 7 |   pretrain:
 8 |     name: 30k_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain30k_spl_token/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: 30k_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 30k_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/material_db/mattext_dataset/30k/zmatrix
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_2m/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_2m
 5 | 
 6 |   representation: atoms
 7 |   pretrain:
 8 |     name: pt_2m
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/2m/newreps
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_2m/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_2m
 5 | 
 6 |   representation: atoms_params
 7 |   pretrain:
 8 |     name: pt_2m
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/2m/newreps
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_2m/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: zmatrix
 5 | 
 6 |   representation: zmatrix
 7 |   pretrain:
 8 |     name: zmatrix
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/2m/zmatrix
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_100/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: atoms
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_100/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: atoms_params
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_100/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: cif_p1
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_100/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: cif_symmetrized
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_100/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: composition
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_100/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: crystal_llm_rep
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_100/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: slice
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_100/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: zmatrix
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k/zmatrix
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_30/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: atoms
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_30/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: atoms_params
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_30/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: cif_p1
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_30/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: cif_symmetrized
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_30/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: composition
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_30/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: crystal_llm_rep
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_30/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_100k_rt
 5 | 
 6 |   representation: slice
 7 |   pretrain:
 8 |     name: pt_100k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/100k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_30/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_30k_rt
 5 | 
 6 |   representation: zmatrix
 7 |   pretrain:
 8 |     name: pt_30k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/30k/zmatrix
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_300/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_300k_rt
 5 | 
 6 |   representation: atoms
 7 |   pretrain:
 8 |     name: pt_300k_rt
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_300/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_300k_rt
 5 | 
 6 |   representation: atoms_params
 7 |   pretrain:
 8 |     name: pt_300k_rt
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k/atoms
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_300/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_300k_rt
 5 | 
 6 |   representation: cif_p1
 7 |   pretrain:
 8 |     name: pt_300k_rt
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_300/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_300k_rt
 5 | 
 6 |   representation: cif_symmetrized
 7 |   pretrain:
 8 |     name: pt_300k_rt
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_300/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_300k_rt
 5 | 
 6 |   representation: composition
 7 |   pretrain:
 8 |     name: pt_300k_rt
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_300/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_300k_rt
 5 | 
 6 |   representation: crystal_llm_rep
 7 |   pretrain:
 8 |     name: pt_300k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_300/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_300k_rt
 5 | 
 6 |   representation: slice
 7 |   pretrain:
 8 |     name: pt_300k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/pretrain_rt_300/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   logging:
 4 |     wandb_project: pt_300k_rt
 5 | 
 6 |   representation: zmatrix
 7 |   pretrain:
 8 |     name: pt_300k_rt
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       data_root_path: /work/so87pot/material_db/mattext_dataset/300k/zmatrix
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/qmof_ft/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   logging:
 5 |     wandb_project: qmof_bg
 6 | 
 7 |   finetune:
 8 |     model_name: qmof_bg
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/qmof_ft/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: qmof_bg
 6 |   finetune:
 7 |     model_name: qmof_bg
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/qmof_ft/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: qmof_bg
 6 | 
 7 |   finetune:
 8 |     model_name: qmof_bg
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/qmof_ft/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: qmof_bg
 6 | 
 7 |   finetune:
 8 |     model_name: qmof_bg
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 512
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/composition_pt_30k_wes/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/qmof_ft/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: qmof_bg
 6 | 
 7 |   finetune:
 8 |     model_name: qmof_bg
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/qmof_ft/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: qmof_bg
 6 | 
 7 |   finetune:
 8 |     model_name: qmof_bg
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/slice_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/qmof_ft/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: qmof_bg
 6 | 
 7 |   finetune:
 8 |     model_name: qmof_bg
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 128
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/zmatrix_pt_30k_zmatrix/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/santiago/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: santiago_30k
 6 |   finetune:
 7 |     model_name: ft_30k_santiago
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: /home/so87pot/n0w0f/santiago_ckpt/cif_p1_pt_30k/checkpoint-23000
13 | 
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/santiago_100k/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: santiago_100
 6 |   finetune:
 7 |     model_name: ft_100k_santiago
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: /home/so87pot/n0w0f/santiago_ckpt/cif_p1_pt_100k/checkpoint-26000


--------------------------------------------------------------------------------
/conf/archived_experiments/santiago_2m/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: santiago_2m
 6 |   finetune:
 7 |     model_name: ft_2m_santiago
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: /home/so87pot/n0w0f/santiago_ckpt/cif_p1_pt_2m/checkpoint-524000
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/santiago_2m/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: santiago_2m
 6 | 
 7 |   finetune:
 8 |     model_name: ft_2m_santiago
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_symmetrized_pt_30k_rt/checkpoint-45000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/santiago_2m/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: santiago_2m
 6 | 
 7 |   finetune:
 8 |     model_name: ft_2m_santiago
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/composition_pt_30k_rt/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/santiago_2m/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: santiago_2m
 6 | 
 7 |   finetune:
 8 |     model_name: ft_2m_santiago
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/crystal_llm_rep_pt_30k_rt/checkpoint-11000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/santiago_2m/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: santiago_2m
 6 | 
 7 |   finetune:
 8 |     model_name: ft_2m_santiago
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/slice_pt_30k_rt/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/smiles/slice_100.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: local_env
 4 |   special_num_token: False
 5 |   logging:
 6 |     wandb_project: 100k_ft_smiles
 7 | 
 8 |   finetune:
 9 |     model_name: 100k_ft_smiles
10 |     context_length: 512
11 |     training_arguments:
12 |       per_device_train_batch_size: 64
13 |     path:
14 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/smiles/local_env_smiles_100k/checkpoint-38000
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/smiles/slice_2m.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: local_env
 4 |   special_num_token: False
 5 |   logging:
 6 |     wandb_project: 2m_ft_smiles
 7 | 
 8 |   finetune:
 9 |     model_name: 2m_ft_smiles
10 |     context_length: 512
11 |     training_arguments:
12 |       per_device_train_batch_size: 64
13 |     path:
14 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/santiago_ckpt_rt/checkpoint-95000
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/smiles/slice_30.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   special_num_token: False
 4 |   representation: local_env
 5 |   logging:
 6 |     wandb_project: 30k_ft_smiles
 7 | 
 8 |   finetune:
 9 |     model_name: 30k_ft_smiles
10 |     context_length: 512
11 |     training_arguments:
12 |       per_device_train_batch_size: 64
13 |     path:
14 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/smiles/local_env_smiles_30k/checkpoint-9000
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/smiles/slice_300.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: local_env
 4 |   special_num_token: False
 5 |   logging:
 6 |     wandb_project: 300k_ft_smiles
 7 | 
 8 |   finetune:
 9 |     model_name: 300k_ft_smiles
10 |     context_length: 512
11 |     training_arguments:
12 |       per_device_train_batch_size: 64
13 |     path:
14 |       pretrained_checkpoint: /home/so87pot/n0w0f/mattext_ckpt/smiles/local_env_smiles_300k/checkpoint-91000
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_perturb/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: perturb_1
 6 |   finetune:
 7 |     model_name: finetune_30k_wes_3
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_perturb/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: perturb_1
 6 | 
 7 |   finetune:
 8 |     model_name: finetune_30k_wes_3
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_perturb/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: perturb_1
 6 | 
 7 |   finetune:
 8 |     model_name: finetune_30k_wes_3
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_perturb_100/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: perturb_1
 6 |   finetune:
 7 |     model_name: ft_100k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: ft_100k_mb_small
13 | 
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_perturb_100/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: perturb_1
 6 | 
 7 |   
 8 |   finetune:
 9 |     model_name: ft_100k_mb_small
10 |     context_length: 1024
11 |     training_arguments:
12 |       per_device_train_batch_size: 64
13 |     path:
14 |       pretrained_checkpoint: ft_100k_mb_small


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_perturb_100/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: perturb_1
 6 |   
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: ft_100k_mb_small
14 | 
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_perturb_300/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: perturb_1
 6 |   finetune:
 7 |     model_name: ft_300k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: ft_300k_mb_small
13 | 
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_perturb_300/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: perturb_1
 6 | 
 7 |   
 8 |   finetune:
 9 |     model_name: ft_300k_mb_small
10 |     context_length: 1024
11 |     training_arguments:
12 |       per_device_train_batch_size: 64
13 |     path:
14 |       pretrained_checkpoint: ft_300k_mb_small


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_perturb_300/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: perturb_1
 6 |   
 7 |   finetune:
 8 |     model_name: ft_300k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: ft_300k_mb_small
14 | 
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_translate/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: translate_1
 6 |   finetune:
 7 |     model_name: finetune_30k_wes_3
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_p1_pt_30k_wes/checkpoint-46000"
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_translate/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: translate_1
 6 | 
 7 |   finetune:
 8 |     model_name: finetune_30k_wes_3
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/cif_symmetrized_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_translate/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: translate_1
 6 | 
 7 |   finetune:
 8 |     model_name: finetune_30k_wes_3
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: "/work/so87pot/mattext/megaloop/checkpoints/checkpoints/crystal_llm_rep_pt_30k_wes/checkpoint-46000"
14 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_translate_100/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: translate_1
 6 |   finetune:
 7 |     model_name: ft_100k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: ft_100k_mb_small
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_translate_100/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: translate_1
 6 | 
 7 |   
 8 |   finetune:
 9 |     model_name: ft_100k_mb_small
10 |     context_length: 1024
11 |     training_arguments:
12 |       per_device_train_batch_size: 64
13 |     path:
14 |       pretrained_checkpoint: ft_100k_mb_small


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_translate_100/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: translate_1
 6 |   
 7 |   finetune:
 8 |     model_name: ft_100k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: ft_100k_mb_small
14 | 
15 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_translate_300/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: translate_1
 6 |   finetune:
 7 |     model_name: ft_300k_mb_small
 8 |     context_length: 1024
 9 |     training_arguments:
10 |       per_device_train_batch_size: 64
11 |     path:
12 |       pretrained_checkpoint: ft_300k_mb_small
13 | 


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_translate_300/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: translate_1
 6 | 
 7 |   
 8 |   finetune:
 9 |     model_name: ft_300k_mb_small
10 |     context_length: 1024
11 |     training_arguments:
12 |       per_device_train_batch_size: 64
13 |     path:
14 |       pretrained_checkpoint: ft_300k_mb_small


--------------------------------------------------------------------------------
/conf/archived_experiments/testing_translate_300/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: translate_1
 6 |   
 7 |   finetune:
 8 |     model_name: ft_300k_mb_small
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: ft_300k_mb_small
14 | 
15 | 


--------------------------------------------------------------------------------
/conf/bandgap.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | hydra:
 4 |   job:
 5 |     name: bandgap
 6 |   run:
 7 |     dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |   sweep:
 9 |     dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
10 |     subdir: ${hydra.job.override_dirname}
11 |     
12 |   # launcher:
13 |   #   _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
14 |   #   submitit_folder: ${hydra.sweep.dir}/.submitit/%j
15 |   #   timeout_min: 3600
16 |   #   mem_gb: 160
17 |   #   nodes: 1
18 |   #   #gpus_per_task: 1
19 |   #   gres: gpu:1
20 |   #   #gpus_per_node: 2
21 |   #   name: ${hydra.job.name}
22 |   #   partition: 'gpu'
23 |   #   additional_parameters:
24 |   #     nodelist: 'gpu[008,013-017]'
25 |   #   tasks_per_node: 1
26 | 
27 | defaults:
28 | - model: none
29 | # - override hydra/launcher: submitit_slurm
30 | 
31 | runs:
32 |   - name: benchmark_run
33 |     tasks: [benchmark]


--------------------------------------------------------------------------------
/conf/benchmark.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | hydra:
 4 |   job:
 5 |     name: benchmark
 6 |   run:
 7 |     dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |   sweep:
 9 |     dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
10 |     subdir: ${hydra.job.override_dirname}
11 |     
12 |   # launcher:
13 |   #   _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
14 |   #   submitit_folder: ${hydra.sweep.dir}/.submitit/%j
15 |   #   timeout_min: 3600
16 |   #   mem_gb: 160
17 |   #   nodes: 1
18 |   #   #gpus_per_task: 1
19 |   #   gres: gpu:1
20 |   #   #gpus_per_node: 2
21 |   #   name: ${hydra.job.name}
22 |   #   partition: 'gpu'
23 |   #   additional_parameters:
24 |   #     nodelist: 'gpu[008,013-017]'
25 |   #   tasks_per_node: 1
26 | 
27 | defaults:
28 | - model: none
29 | # - override hydra/launcher: submitit_slurm
30 | 
31 | runs:
32 |   - name: benchmark_run
33 |     tasks: [benchmark]


--------------------------------------------------------------------------------
/conf/bg/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atom_sequences
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-atom-seq-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 32
14 |     training_arguments:
15 |       per_device_train_batch_size: 1024
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-atom-seq-2m
18 | 
19 |     


--------------------------------------------------------------------------------
/conf/bg/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atom_sequences_plusplus
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-atom-seq-plusplus-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 32
14 |     training_arguments:
15 |       per_device_train_batch_size: 1024
16 | 
17 |     


--------------------------------------------------------------------------------
/conf/bg/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-cifp1-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 1024
14 |     training_arguments:
15 |       per_device_train_batch_size: 128
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-cifp1-2m


--------------------------------------------------------------------------------
/conf/bg/cifpsym.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-cifsymmetrized-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 1024
14 |     training_arguments:
15 |       per_device_train_batch_size: 64
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-cifsymmetrized-2m


--------------------------------------------------------------------------------
/conf/bg/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-composition-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 32
14 |     training_arguments:
15 |       per_device_train_batch_size: 1024
16 | 
17 |     


--------------------------------------------------------------------------------
/conf/bg/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_text_llm
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: /home/so87pot/n0w0f/structllm_ckpt/alpaca_ckpt/checkpoint-393000
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 256
16 |     


--------------------------------------------------------------------------------
/conf/bg/local_env.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: local_env
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: /home/so87pot/n0w0f/structllm_ckpt/santiago_ckpt_rt/checkpoint-95000
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 256
16 |     path:
17 |       pretrained_checkpoint: /home/so87pot/n0w0f/structllm_ckpt/santiago_ckpt_rt/checkpoint-95000


--------------------------------------------------------------------------------
/conf/bg/slices.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slices
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-slices-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 256
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-slices-2m


--------------------------------------------------------------------------------
/conf/bg/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-zmatrix-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 256
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-zmatrix-2m


--------------------------------------------------------------------------------
/conf/bg2m/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/bg2m/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atoms_params
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/bg2m/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_p1_pt_30k_rt_2/checkpoint-46000
14 | 


--------------------------------------------------------------------------------
/conf/bg2m/cifsymmetrized.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 1024
10 |     training_arguments:
11 |       per_device_train_batch_size: 32
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_symmetrized_pt_30k_rt/checkpoint-45000
14 | 


--------------------------------------------------------------------------------
/conf/bg2m/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 32
10 |     training_arguments:
11 |       per_device_train_batch_size: 1024
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/composition_pt_30k_rt/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/bg2m/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_llm_rep
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/crystal_llm_rep_pt_30k_rt/checkpoint-11000
14 | 


--------------------------------------------------------------------------------
/conf/bg2m/local_env.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/bg2m/slice.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slice
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/slice_pt_30k_rt/checkpoint-23000
14 | 


--------------------------------------------------------------------------------
/conf/bg2m/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   logging:
 5 |     wandb_project: 2m_intel_ft
 6 | 
 7 |   finetune:
 8 |     model_name: 2m_intel_ft
 9 |     context_length: 512
10 |     training_arguments:
11 |       per_device_train_batch_size: 64
12 |     path:
13 |       pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
14 | 


--------------------------------------------------------------------------------
/conf/classification.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | hydra:
 4 |   job:
 5 |     name: is_metal
 6 |   run:
 7 |     dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |   sweep:
 9 |     dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
10 |     subdir: ${hydra.job.override_dirname}
11 |     
12 |   # launcher:
13 |   #   _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
14 |   #   submitit_folder: ${hydra.sweep.dir}/.submitit/%j
15 |   #   timeout_min: 3600
16 |   #   mem_gb: 160
17 |   #   nodes: 1
18 |   #   #gpus_per_task: 1
19 |   #   gres: gpu:1
20 |   #   #gpus_per_node: 2
21 |   #   name: ${hydra.job.name}
22 |   #   partition: 'gpu'
23 |   #   additional_parameters:
24 |   #     nodelist: 'gpu[008,013-017]'
25 |   #   tasks_per_node: 1
26 | 
27 | defaults:
28 | - model: none
29 | # - override hydra/launcher: submitit_slurm
30 | 
31 | runs:
32 |   - name: classification_run
33 |     tasks: [classification]


--------------------------------------------------------------------------------
/conf/form/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atom_sequences
 4 |   dataset: "form_energy"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-atom-seq-2m
 8 |   logging:
 9 |     wandb_project: revision-form
10 | 
11 |   finetune:
12 |     model_name: revision-form
13 |     context_length: 32
14 |     training_arguments:
15 |       per_device_train_batch_size: 1024
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-atom-seq-2m
18 | 
19 |     


--------------------------------------------------------------------------------
/conf/form/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atom_sequences_plusplus
 4 |   dataset: "form_energy"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-atom-seq-plusplus-2m
 8 |   logging:
 9 |     wandb_project: revision-form
10 | 
11 |   finetune:
12 |     model_name: revision-form
13 |     context_length: 32
14 |     training_arguments:
15 |       per_device_train_batch_size: 2048
16 | 
17 |     


--------------------------------------------------------------------------------
/conf/form/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   dataset: "form_energy"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-cifp1-2m
 8 |   logging:
 9 |     wandb_project: revision-form
10 | 
11 |   finetune:
12 |     model_name: revision-form
13 |     context_length: 1024
14 |     training_arguments:
15 |       per_device_train_batch_size: 64
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-cifp1-2m


--------------------------------------------------------------------------------
/conf/form/cifpsym.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   dataset: "form_energy"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-cifsymmetrized-2m
 8 |   logging:
 9 |     wandb_project: revision-form
10 | 
11 |   finetune:
12 |     model_name: revision-form
13 |     context_length: 1024
14 |     training_arguments:
15 |       per_device_train_batch_size: 64
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-cifsymmetrized-2m


--------------------------------------------------------------------------------
/conf/form/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   dataset: "form_energy"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-composition-2m
 8 |   logging:
 9 |     wandb_project: revision-form
10 | 
11 |   finetune:
12 |     model_name: revision-form
13 |     context_length: 32
14 |     training_arguments:
15 |       per_device_train_batch_size: 2048
16 | 
17 |     


--------------------------------------------------------------------------------
/conf/form/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_text_llm
 4 |   dataset: "form_energy"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: /home/so87pot/n0w0f/structllm_ckpt/alpaca_ckpt/cllm/checkpoint-393000
 8 |   logging:
 9 |     wandb_project: revision-form
10 | 
11 |   finetune:
12 |     model_name: revision-form
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 256
16 |     


--------------------------------------------------------------------------------
/conf/form/local_env.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: local_env
 4 |   dataset: "form_energy"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: /home/so87pot/n0w0f/structllm_ckpt/alpaca_ckpt/local_env/checkpoint-381000
 8 |   logging:
 9 |     wandb_project: revision-form
10 | 
11 |   finetune:
12 |     model_name: revision-form
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 256
16 |     path:
17 |       pretrained_checkpoint: /home/so87pot/n0w0f/structllm_ckpt/alpaca_ckpt/local_env/checkpoint-381000


--------------------------------------------------------------------------------
/conf/form/slices.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slices
 4 |   dataset: "form_energy"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-slices-2m
 8 |   logging:
 9 |     wandb_project: revision-form
10 | 
11 |   finetune:
12 |     model_name: revision-form
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 128
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-slices-2m


--------------------------------------------------------------------------------
/conf/form/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   dataset: "form_energy"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-zmatrix-2m
 8 |   logging:
 9 |     wandb_project: revision-form
10 | 
11 |   finetune:
12 |     model_name: revision-form
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 64
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-zmatrix-2m


--------------------------------------------------------------------------------
/conf/form_energy.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | hydra:
 4 |   job:
 5 |     name: formation_energy
 6 |   run:
 7 |     dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |   sweep:
 9 |     dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
10 |     subdir: ${hydra.job.override_dirname}
11 |     
12 | 
13 | defaults:
14 | - model: none
15 | 
16 | 
17 | runs:
18 |   - name: benchmark_run
19 |     tasks: [benchmark]


--------------------------------------------------------------------------------
/conf/group-test/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   dataset: "gvrh"
 5 |   special_num_token: False
 6 |   logging:
 7 |     wandb_project: test-benchmark
 8 | 
 9 |   finetune:
10 |     model_name: test-benchmark
11 |     context_length: 32
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/structllm/megaloop2/checkpoints/checkpoints/composition_30k_ft/checkpoint-1000
16 | 
17 |     


--------------------------------------------------------------------------------
/conf/group-test/slices.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slices
 4 |   dataset: "gvrh"
 5 |   special_num_token: False
 6 |   logging:
 7 |     wandb_project: test-benchmark
 8 | 
 9 |   finetune:
10 |     model_name: test-benchmark
11 |     context_length: 64
12 |     training_arguments:
13 |       per_device_train_batch_size: 1024
14 |     path:
15 |       pretrained_checkpoint: /work/so87pot/structllm/megaloop/checkpoints/checkpoints/slice_pretrain_30k_draco/checkpoint-23000
16 |     


--------------------------------------------------------------------------------
/conf/is_metal/atoms.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atom_sequences
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-atom-seq-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 32
14 |     training_arguments:
15 |       per_device_train_batch_size: 1024
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-atom-seq-2m
18 | 
19 |     


--------------------------------------------------------------------------------
/conf/is_metal/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atom_sequences_plusplus
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-atom-seq-plusplus-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 32
14 |     training_arguments:
15 |       per_device_train_batch_size: 1024
16 | 
17 |     


--------------------------------------------------------------------------------
/conf/is_metal/cifp1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_p1
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-cifp1-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 1024
14 |     training_arguments:
15 |       per_device_train_batch_size: 128
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-cifp1-2m


--------------------------------------------------------------------------------
/conf/is_metal/cifpsym.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: cif_symmetrized
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-cifsymmetrized-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 1024
14 |     training_arguments:
15 |       per_device_train_batch_size: 64
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-cifsymmetrized-2m


--------------------------------------------------------------------------------
/conf/is_metal/composition.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: composition
 4 |   dataset: "is-metal"
 5 |   dataset_type: filtered
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-composition-2m
 8 |   logging:
 9 |     wandb_project: revision-bg-filtered
10 | 
11 |   finetune:
12 |     model_name: revision-bg-filtered
13 |     context_length: 32
14 |     training_arguments:
15 |       per_device_train_batch_size: 1024
16 | 
17 |     


--------------------------------------------------------------------------------
/conf/is_metal/crystal_llm.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: crystal_text_llm
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: /home/so87pot/n0w0f/structllm_ckpt/alpaca_ckpt/checkpoint-393000
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 256
16 |     


--------------------------------------------------------------------------------
/conf/is_metal/local_env.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: local_env
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: /home/so87pot/n0w0f/structllm_ckpt/santiago_ckpt_rt/checkpoint-95000
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 256
16 |     path:
17 |       pretrained_checkpoint: /home/so87pot/n0w0f/structllm_ckpt/santiago_ckpt_rt/checkpoint-95000


--------------------------------------------------------------------------------
/conf/is_metal/slices.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: slices
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-slices-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 256
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-slices-2m


--------------------------------------------------------------------------------
/conf/is_metal/zmatrix.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: zmatrix
 4 |   dataset: "bandgap"
 5 |   dataset_type: matbench
 6 |   special_num_token: False
 7 |   checkpoint: n0w0f/MatText-zmatrix-2m
 8 |   logging:
 9 |     wandb_project: revision-bg
10 | 
11 |   finetune:
12 |     model_name: revision-bg
13 |     context_length: 512
14 |     training_arguments:
15 |       per_device_train_batch_size: 256
16 |     path:
17 |       pretrained_checkpoint: n0w0f/MatText-zmatrix-2m


--------------------------------------------------------------------------------
/conf/llama_8b_bg/atoms.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | model:
3 |   representation: atom_sequences
4 |   dataset: "bandgap"
5 |   dataset_type: filtered
6 |   logging:
7 |     wandb_project: llama-7B-ft
8 | 
9 | 


--------------------------------------------------------------------------------
/conf/llama_8b_bg/atoms_params.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | model:
 3 |   representation: atom_sequences_plusplus
 4 |   dataset: "bandgap"
 5 |   dataset_type: filtered
 6 |   logging:
 7 |     wandb_project: llama-7B-ft
 8 | 
 9 | 
10 | 
11 |     


--------------------------------------------------------------------------------
/conf/llama_8b_bg/cifp1.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | model:
3 |   representation: cif_p1
4 |   dataset: "bandgap"
5 |   dataset_type: filtered
6 |   logging:
7 |     wandb_project: llama-7B-ft
8 | 
9 | 


--------------------------------------------------------------------------------
/conf/llama_8b_bg/cifpsym.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | model:
3 |   representation: cif_symmetrized
4 |   dataset: "bandgap"
5 |   dataset_type: filtered
6 |   logging:
7 |     wandb_project: llama-7B-ft
8 | 


--------------------------------------------------------------------------------
/conf/llama_8b_bg/composition.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | model:
3 |   representation: composition
4 |   dataset: "bandgap"
5 |   dataset_type: filtered
6 |   logging:
7 |     wandb_project: llama-7B-ft
8 |     


--------------------------------------------------------------------------------
/conf/llama_8b_bg/crystal_llm.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | model:
3 |   representation: crystal_text_llm
4 |   dataset: "bandgap"
5 |   dataset_type: filtered
6 |   logging:
7 |     wandb_project: llama-7B-ft


--------------------------------------------------------------------------------
/conf/llama_8b_bg/local_env.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | model:
3 |   representation: local_env
4 |   dataset: "bandgap"
5 |   dataset_type: filtered
6 |   logging:
7 |     wandb_project: llama-7B-ft
8 | 


--------------------------------------------------------------------------------
/conf/llama_8b_bg/slices.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | model:
3 |   representation: slices
4 |   dataset: "bandgap"
5 |   dataset_type: filtered
6 |   logging:
7 |     wandb_project: llama-7B-ft
8 | 


--------------------------------------------------------------------------------
/conf/llama_8b_bg/zmatrix.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | model:
3 |   representation: zmatrix
4 |   dataset: "bandgap"
5 |   dataset_type: filtered
6 |   logging:
7 |     wandb_project: llama-7B-ft
8 | 


--------------------------------------------------------------------------------
/conf/llm_sft.yaml:
--------------------------------------------------------------------------------
 1 |   hydra:
 2 |     job:
 3 |       name: llama_sft
 4 |     run:
 5 |       dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 6 |     sweep:
 7 |       dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |       subdir: ${hydra.job.override_dirname}
 9 |       
10 | 
11 | 
12 |   defaults:
13 |   - model: none
14 | 
15 | 
16 | 
17 |   runs:
18 |   - name: llama_sft_run
19 |     tasks: [llama_sft]
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/conf/pretrain.yaml:
--------------------------------------------------------------------------------
 1 |   hydra:
 2 |     job:
 3 |       name: llama_sft
 4 |     run:
 5 |       dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 6 |     sweep:
 7 |       dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
 8 |       subdir: ${hydra.job.override_dirname}
 9 |       
10 | 
11 | 
12 |   defaults:
13 |   - model: none
14 | 
15 | 
16 | 
17 |   runs:
18 | 
19 |     - name: pretrain_run
20 |       tasks: [pretrain]
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/api.md:
--------------------------------------------------------------------------------
 1 | # API Documentation 
 2 | 
 3 | 
 4 | ## Text representation
 5 | 
 6 | ### Core class 
 7 | 
 8 | ::: mattext.representations.TextRep
 9 |     heading_level: 3
10 | 
11 | 
12 | ### Decoding 
13 | 
14 | ::: mattext.representations.decoder 
15 |     heading_level: 3
16 | 
17 | ### Transformations
18 | 
19 | ::: mattext.representations.transformations 
20 |     heading_level: 3
21 | 
22 | ## Tokenizer 
23 | 
24 | ::: mattext.tokenizer 
25 |     heading_level: 3
26 | 
27 | 
28 | ## Models  
29 | 
30 | ::: mattext.models.benchmark
31 |     heading_level: 3
32 | 
33 | ::: mattext.models.finetune
34 |     heading_level: 3
35 | 
36 | ::: mattext.models.llama_sft
37 |     heading_level: 3
38 | 
39 | ::: mattext.models.llama
40 |     heading_level: 3
41 | 
42 | ::: mattext.models.potential
43 |     heading_level: 3
44 | 
45 | ::: mattext.models.predict
46 |     heading_level: 3
47 | 
48 | ::: mattext.models.pretrain
49 |     heading_level: 3
50 | 
51 | <!-- ## Dataprep
52 | 
53 | 
54 | ::: mattext.dataprep
55 |     heading_level: 3 -->


--------------------------------------------------------------------------------
/docs/getting_started.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | 
 4 | The most recent code and data can be installed directly from GitHub with:
 5 | 
 6 | 
 7 | ```shell
 8 | $ pip install git+https://github.com/lamalab-org/mattext.git
 9 | ```
10 | 
11 | To install in development mode, use the following:
12 | 
13 | ```shell
14 | $ git clone git+https://github.com/lamalab-org/mattext.git
15 | $ cd mattext
16 | $ pip install -e .
17 | ```
18 | 
19 | 
20 | If you want to use the Local Env representation, you will also need to install OpenBabel, e.g. using 
21 | 
22 | ```bash 
23 | conda install openbabel -c conda-forge
24 | ```
25 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # MatText documentation
 2 | 
 3 | <div>
 4 |     <center>
 5 |         <img src='static/logo.png', width=40%>
 6 |     </center>
 7 | </div>
 8 | 
 9 | MatText is a framework for text-based materials modeling. It supports 
10 | 
11 | - conversion of crystal structures in to text representations 
12 | - transformations of crystal structures for sensitivity analyses
13 | - decoding of text representations to crystal structures
14 | - tokenization of text-representation of crystal structures
15 | - pre-training, finetuning and testing of language models on text-representations of crystal structures 
16 | - analysis of language models trained on text-representations of crystal structures


--------------------------------------------------------------------------------
/docs/static/logo.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lamalab-org/MatText/866a0e24e723835f8fe799e326d730112fbc148a/docs/static/logo.ai


--------------------------------------------------------------------------------
/docs/static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lamalab-org/MatText/866a0e24e723835f8fe799e326d730112fbc148a/docs/static/logo.png


--------------------------------------------------------------------------------
/notebooks/example_data/InCuS2_symmetrized.cif:
--------------------------------------------------------------------------------
 1 | # generated using pymatgen
 2 | data_InCuS2
 3 | _symmetry_space_group_name_H-M   I-42d
 4 | _cell_length_a   5.52040491
 5 | _cell_length_b   5.52040491
 6 | _cell_length_c   11.12616975
 7 | _cell_angle_alpha   90.00000000
 8 | _cell_angle_beta   90.00000000
 9 | _cell_angle_gamma   90.00000000
10 | _symmetry_Int_Tables_number   122
11 | _chemical_formula_structural   InCuS2
12 | _chemical_formula_sum   'In4 Cu4 S8'
13 | _cell_volume   339.06858092
14 | _cell_formula_units_Z   4
15 | loop_
16 |  _symmetry_equiv_pos_site_id
17 |  _symmetry_equiv_pos_as_xyz
18 |   1  'x, y, z'
19 |   2  'y, -x, -z'
20 |   3  '-x, -y, z'
21 |   4  '-y, x, -z'
22 |   5  'x, -y+1/2, -z+1/4'
23 |   6  'y, x+1/2, z+1/4'
24 |   7  '-x, y+1/2, -z+1/4'
25 |   8  '-y, -x+1/2, z+1/4'
26 |   9  'x+1/2, y+1/2, z+1/2'
27 |   10  'y+1/2, -x+1/2, -z+1/2'
28 |   11  '-x+1/2, -y+1/2, z+1/2'
29 |   12  '-y+1/2, x+1/2, -z+1/2'
30 |   13  'x+1/2, -y, -z+3/4'
31 |   14  'y+1/2, x, z+3/4'
32 |   15  '-x+1/2, y, -z+3/4'
33 |   16  '-y+1/2, -x, z+3/4'
34 | loop_
35 |  _atom_type_symbol
36 |  _atom_type_oxidation_number
37 |   In3+  3.0
38 |   Cu+  1.0
39 |   S2-  -2.0
40 | loop_
41 |  _atom_site_type_symbol
42 |  _atom_site_label
43 |  _atom_site_symmetry_multiplicity
44 |  _atom_site_fract_x
45 |  _atom_site_fract_y
46 |  _atom_site_fract_z
47 |  _atom_site_occupancy
48 |   In3+  In0  4  0.00000000  0.00000000  0.50000000  1
49 |   Cu+  Cu1  4  0.00000000  0.00000000  0.00000000  1
50 |   S2-  S2  8  0.22119731  0.25000000  0.12500000  1
51 | 


--------------------------------------------------------------------------------
/notebooks/example_data/N2_p1.cif:
--------------------------------------------------------------------------------
 1 | # generated using pymatgen
 2 | data_N2
 3 | _symmetry_space_group_name_H-M   'P 1'
 4 | _cell_length_a   5.60540879
 5 | _cell_length_b   5.60540879
 6 | _cell_length_c   5.60540879
 7 | _cell_angle_alpha   90.00000000
 8 | _cell_angle_beta   90.00000000
 9 | _cell_angle_gamma   90.00000000
10 | _symmetry_Int_Tables_number   1
11 | _chemical_formula_structural   N2
12 | _chemical_formula_sum   N4
13 | _cell_volume   176.12535061
14 | _cell_formula_units_Z   2
15 | loop_
16 |  _symmetry_equiv_pos_site_id
17 |  _symmetry_equiv_pos_as_xyz
18 |   1  'x, y, z'
19 | loop_
20 |  _atom_type_symbol
21 |  _atom_type_oxidation_number
22 |   N0+  0.0
23 | loop_
24 |  _atom_site_type_symbol
25 |  _atom_site_label
26 |  _atom_site_symmetry_multiplicity
27 |  _atom_site_fract_x
28 |  _atom_site_fract_y
29 |  _atom_site_fract_z
30 |  _atom_site_occupancy
31 |   N0+  N0  1  0.47679036  0.97679036  0.52320964  1
32 |   N0+  N1  1  0.97679036  0.52320964  0.47679036  1
33 |   N0+  N2  1  0.02320964  0.02320964  0.02320964  1
34 |   N0+  N3  1  0.52320964  0.47679036  0.97679036  1
35 | 


--------------------------------------------------------------------------------
/notebooks/example_data/N2_symmetrized.cif:
--------------------------------------------------------------------------------
 1 | # generated using pymatgen
 2 | data_N2
 3 | _symmetry_space_group_name_H-M   P2_13
 4 | _cell_length_a   5.60540879
 5 | _cell_length_b   5.60540879
 6 | _cell_length_c   5.60540879
 7 | _cell_angle_alpha   90.00000000
 8 | _cell_angle_beta   90.00000000
 9 | _cell_angle_gamma   90.00000000
10 | _symmetry_Int_Tables_number   198
11 | _chemical_formula_structural   N2
12 | _chemical_formula_sum   N4
13 | _cell_volume   176.12535061
14 | _cell_formula_units_Z   2
15 | loop_
16 |  _symmetry_equiv_pos_site_id
17 |  _symmetry_equiv_pos_as_xyz
18 |   1  'x, y, z'
19 |   2  '-x+1/2, -y, z+1/2'
20 |   3  'x+1/2, -y+1/2, -z'
21 |   4  '-x, y+1/2, -z+1/2'
22 |   5  'z, x, y'
23 |   6  'z+1/2, -x+1/2, -y'
24 |   7  '-z, x+1/2, -y+1/2'
25 |   8  '-z+1/2, -x, y+1/2'
26 |   9  'y, z, x'
27 |   10  '-y, z+1/2, -x+1/2'
28 |   11  '-y+1/2, -z, x+1/2'
29 |   12  'y+1/2, -z+1/2, -x'
30 | loop_
31 |  _atom_type_symbol
32 |  _atom_type_oxidation_number
33 |   N0+  0.0
34 | loop_
35 |  _atom_site_type_symbol
36 |  _atom_site_label
37 |  _atom_site_symmetry_multiplicity
38 |  _atom_site_fract_x
39 |  _atom_site_fract_y
40 |  _atom_site_fract_z
41 |  _atom_site_occupancy
42 |   N0+  N0  4  0.02320964  0.02320964  0.02320964  1
43 | 


--------------------------------------------------------------------------------
/notebooks/example_data/SrTiO3_p1.cif:
--------------------------------------------------------------------------------
 1 | # generated using pymatgen
 2 | data_SrTiO3
 3 | _symmetry_space_group_name_H-M   'P 1'
 4 | _cell_length_a   3.91270131
 5 | _cell_length_b   3.91270131
 6 | _cell_length_c   3.91270131
 7 | _cell_angle_alpha   90.00000000
 8 | _cell_angle_beta   90.00000000
 9 | _cell_angle_gamma   90.00000000
10 | _symmetry_Int_Tables_number   1
11 | _chemical_formula_structural   SrTiO3
12 | _chemical_formula_sum   'Sr1 Ti1 O3'
13 | _cell_volume   59.90045031
14 | _cell_formula_units_Z   1
15 | loop_
16 |  _symmetry_equiv_pos_site_id
17 |  _symmetry_equiv_pos_as_xyz
18 |   1  'x, y, z'
19 | loop_
20 |  _atom_type_symbol
21 |  _atom_type_oxidation_number
22 |   Sr2+  2.0
23 |   Ti4+  4.0
24 |   O2-  -2.0
25 | loop_
26 |  _atom_site_type_symbol
27 |  _atom_site_label
28 |  _atom_site_symmetry_multiplicity
29 |  _atom_site_fract_x
30 |  _atom_site_fract_y
31 |  _atom_site_fract_z
32 |  _atom_site_occupancy
33 |   Sr2+  Sr0  1  0.00000000  0.00000000  0.00000000  1
34 |   Ti4+  Ti1  1  0.50000000  0.50000000  0.50000000  1
35 |   O2-  O2  1  0.50000000  0.00000000  0.50000000  1
36 |   O2-  O3  1  0.50000000  0.50000000  0.00000000  1
37 |   O2-  O4  1  0.00000000  0.50000000  0.50000000  1
38 | 


--------------------------------------------------------------------------------
/revision-scripts/5fold_split.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import random
 4 | from sklearn.model_selection import KFold
 5 | import fire
 6 | 
 7 | def split_dataset(input_json, output_dir, n_splits=5, random_state=42):
 8 |     # Load the data
 9 |     with open(input_json, 'r') as f:
10 |         data = json.load(f)
11 | 
12 |     # Create KFold object
13 |     kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
14 | 
15 |     # Ensure output directory exists
16 |     os.makedirs(output_dir, exist_ok=True)
17 | 
18 |     # Perform the split
19 |     for fold, (train_index, test_index) in enumerate(kf.split(data), 1):
20 |         train_data = [data[i] for i in train_index]
21 |         test_data = [data[i] for i in test_index]
22 | 
23 |         # Save train data
24 |         train_file = os.path.join(output_dir, f'train_mp_classification_fold_{fold}.json')
25 |         with open(train_file, 'w') as f:
26 |             json.dump(train_data, f, indent=2)
27 | 
28 |         # Save test data
29 |         test_file = os.path.join(output_dir, f'test_mp_classification_fold_{fold}.json')
30 |         with open(test_file, 'w') as f:
31 |             json.dump(test_data, f, indent=2)
32 | 
33 |         print(f"Fold {fold} created: {train_file} and {test_file}")
34 | 
35 |     print("Dataset splitting completed.")
36 | 
37 | if __name__ == "__main__":
38 |     fire.Fire(split_dataset)


--------------------------------------------------------------------------------
/src/mattext/__init__.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import version
2 | 
3 | # Load the version
4 | __version__ = version("mattext")
5 | 


--------------------------------------------------------------------------------
/src/mattext/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lamalab-org/MatText/866a0e24e723835f8fe799e326d730112fbc148a/src/mattext/analysis/__init__.py


--------------------------------------------------------------------------------
/src/mattext/dataprep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lamalab-org/MatText/866a0e24e723835f8fe799e326d730112fbc148a/src/mattext/dataprep/__init__.py


--------------------------------------------------------------------------------
/src/mattext/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lamalab-org/MatText/866a0e24e723835f8fe799e326d730112fbc148a/src/mattext/models/__init__.py


--------------------------------------------------------------------------------
/src/mattext/representations/__init__.py:
--------------------------------------------------------------------------------
1 | from xtal2txt.core import TextRep
2 | 
3 | __all__ = ["TextRep"]
4 | 


--------------------------------------------------------------------------------
/src/mattext/representations/analysis.py:
--------------------------------------------------------------------------------
 1 | from xtal2txt.analysis import (
 2 |     ANALYSIS_MASK_TOKENS,
 3 |     ATOM_LIST_,
 4 |     CIF_ANALYSIS_DICT,
 5 |     COMPOSITION_ANALYSIS_DICT,
 6 |     CRYSTAL_LLM_ANALYSIS_DICT,
 7 |     NUMS_,
 8 |     SLICE_ANALYSIS_DICT,
 9 | )
10 | 
11 | __all__ = [
12 |     "ATOM_LIST_",
13 |     "NUMS_",
14 |     "COMPOSITION_ANALYSIS_DICT",
15 |     "SLICE_ANALYSIS_DICT",
16 |     "CRYSTAL_LLM_ANALYSIS_DICT",
17 |     "CIF_ANALYSIS_DICT",
18 |     "ANALYSIS_MASK_TOKENS",
19 | ]
20 | 


--------------------------------------------------------------------------------
/src/mattext/representations/decoder.py:
--------------------------------------------------------------------------------
1 | from xtal2txt.decoder import DecodeTextRep, MatchRep
2 | 
3 | __all__ = ["DecodeTextRep", "MatchRep"]
4 | 


--------------------------------------------------------------------------------
/src/mattext/representations/transformations.py:
--------------------------------------------------------------------------------
1 | from xtal2txt.transformations import TransformationCallback
2 | 
3 | __all__ = ["TransformationCallback"]
4 | 


--------------------------------------------------------------------------------
/src/mattext/tokenizer/__init__.py:
--------------------------------------------------------------------------------
 1 | from xtal2txt.tokenizer import (
 2 |     CifTokenizer,
 3 |     CompositionTokenizer,
 4 |     CrysllmTokenizer,
 5 |     NumTokenizer,
 6 |     RobocrysTokenizer,
 7 |     SliceTokenizer,
 8 |     SmilesTokenizer,
 9 | )
10 | 
11 | __all__ = [
12 |     "NumTokenizer",
13 |     "SliceTokenizer",
14 |     "CompositionTokenizer",
15 |     "CifTokenizer",
16 |     "CrysllmTokenizer",
17 |     "SmilesTokenizer",
18 |     "RobocrysTokenizer",
19 | ]
20 | 


--------------------------------------------------------------------------------
/src/mattext/utils.py:
--------------------------------------------------------------------------------
1 | import pystow
2 | 
3 | mattext_storage = pystow.module("mattext")
4 | 


--------------------------------------------------------------------------------
/tests/test_imports.py:
--------------------------------------------------------------------------------
 1 | from mattext.tokenizer import CifTokenizer, CompositionTokenizer, CrysllmTokenizer, NumTokenizer, RobocrysTokenizer, SliceTokenizer, SmilesTokenizer
 2 | from mattext.representations import TextRep
 3 | from pymatgen.core import Lattice, Structure, Molecule
 4 | 
 5 | 
 6 | def test_textrep():
 7 |     coords = [[0, 0, 0], [0.75,0.5,0.75]]
 8 |     lattice = Lattice.from_parameters(a=3.84, b=3.84, c=3.84, alpha=120,
 9 |                                     beta=90, gamma=60)
10 |     struct = Structure(lattice, ["Si", "Si"], coords)
11 | 
12 |     textrep = TextRep.from_input(struct)
13 | 
14 |     slices = textrep.get_slices()
15 |     assert isinstance(slices, str)


--------------------------------------------------------------------------------
/tests/test_xtal2pot.py:
--------------------------------------------------------------------------------
 1 | from mattext.analysis.xtal2pot import Xtal2Pot
 2 | from pymatgen.core import Structure, Lattice
 3 | 
 4 | 
 5 | def test_xtal2pot():
 6 |     bcc_fe = Structure(Lattice.cubic(2.8), ["Fe", "Fe"], [[0, 0, 0], [0.5, 0.5, 0.5]])
 7 |     xtal2pot = Xtal2Pot()
 8 |     potential = xtal2pot.get_potential(bcc_fe)
 9 |     assert isinstance(potential[0], float)
10 |     assert isinstance(potential[1], float)
11 |     assert len(potential) == 2


--------------------------------------------------------------------------------