├── README.md ├── continue_training.ipynb ├── data ├── 2Mix │ ├── 160M.txt │ └── 70M.txt ├── 3Mix │ ├── 160M.txt │ ├── 410M.txt │ └── 70M.txt ├── 5Mix │ ├── 160M.txt │ ├── 1B.txt │ ├── 305M.txt │ ├── 410M.txt │ ├── 70M.txt │ ├── mix5mlp.pt │ └── proportions.txt ├── RPJ.predictions.json ├── RedPajama │ ├── 160M.txt │ ├── 1B.txt │ ├── 305M.txt │ ├── 410M.txt │ ├── 70M.txt │ └── proportions.txt └── ct │ └── 70M.txt ├── law.py ├── mix_2_domains.ipynb ├── mix_3_domains.ipynb ├── mix_5_domains.ipynb ├── pipeline ├── find_opt.py ├── get_loss.py ├── mixlaw.py ├── ratios.txt ├── run.sh ├── sizelaw.py ├── steplaw.py ├── utils.py └── valid_weight.json └── predict_vs_observe.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Data Mixing Laws: Optimizing Data Mixture by Predicting Language Modeling Performance 2 | 3 | Code and data for "[Data Mixing Laws: Optimizing Data Mixture by Predicting Language Modeling Performance](https://arxiv.org/abs/2403.16952)" 4 | 5 | ## Data Mixing Laws 6 | 7 | We include the codes to reproduce experiments and figures to discover data mixing laws in 8 | * `mix_2_domains.ipynb`: two training domains, single validation domain 9 | * `mix_3_domains.ipynb`: multiple training domains, single validation domain 10 | * `mix_5_domains.ipynb`: multiple training domains, multiple validation domains 11 | 12 | ## Prediction Pipeline 13 | 14 | Our full prediction pipeline can be reproduced with 15 | ```bash 16 | cd pipeline 17 | bash run.sh 18 | ``` 19 | 20 | ## Citation 21 | ``` 22 | @article{ye2024datamixinglaws, 23 | title={Data Mixing Laws: Optimizing Data Mixtures by Predicting Language Modeling Performance}, 24 | author={Ye, Jiasheng and Liu, Peiju and Sun, Tianxiang and Zhou, Yunhua and Zhan, Jun and Qiu, Xipeng}, 25 | journal={arXiv preprint arXiv:2403.16952}, 26 | year={2024} 27 | } 28 | ``` 29 | -------------------------------------------------------------------------------- /data/2Mix/160M.txt: -------------------------------------------------------------------------------- 1 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1429043047823886 2 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.04642625569332 3 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.0679976119686234 4 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.15625 5 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.0411717959261133 6 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.058101515055668 7 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.0850321039979758 8 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1251719857034412 9 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1725589891194332 10 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1026141826923077 11 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2071063701923077 12 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.3731236563267815 13 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.508066838144963 14 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.4041385135135136 15 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.5321329852579852 16 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.4806223126535625 17 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.394425675675676 18 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.3660309812653564 19 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.427499232186732 20 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.555954391891892 21 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.4502504990786242 22 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_5-0.75-0.25/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.598625614250614 23 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1553268123734817 24 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2774781756072875 25 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.188000142332996 26 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2777588879048583 27 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1713649734311742 28 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.206770306174089 29 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1597865795799596 30 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.24947218180668 31 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.0762252498734817 32 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2316686962297572 33 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.0781922128036436 34 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.101576337930162 35 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.0902984248481782 36 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1197277486082995 37 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1916434716599191 38 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1578848526062753 39 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2184752182439271 40 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3393238391953441 41 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.235082711285425 42 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2315659001771255 43 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3604899418016194 44 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3007377593623481 45 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3226254111842106 46 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2130171432186234 47 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1360367377277327 48 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2268254206730769 49 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2530660899544535 50 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.186007480389676 51 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2341891763663968 52 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1216868041497976 53 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1111423013663968 54 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1066311361336032 55 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2605840397267207 56 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3363111241143724 57 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2452575436487854 58 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3672388980263157 59 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2819260817307692 60 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4930968496963564 61 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.210213973937247 62 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1503352732793521 63 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2578955275809716 64 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.1323815473178138 65 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.167221501771255 66 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.281977479757085 67 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.2277478117321867 68 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.3251545224201475 69 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.1921404714373462 70 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.250139166154791 71 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.1978558814496316 72 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.3360046836609336 73 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.2113789926289926 74 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.297743588759214 75 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.2979067490786242 76 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.32726121007371 77 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.469205889127764 78 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.293578201781327 79 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.406048449017199 80 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.3487983722358723 81 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.312216868857494 82 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.4512150644963144 83 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.406489941646192 84 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.3749280175061425 85 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_2-0.375-0.625/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.2801990555896805 86 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.2580812346437344 87 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.318959229115479 88 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.18176539465602 89 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.1690196944103195 90 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.163433852886978 91 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.3742369855651106 92 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.4798017122235874 93 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_4-0.625-0.375/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.5200927134520885 94 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.330884328931204 95 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.2300224585380835 96 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.388551904176904 97 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.2486947174447174 98 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.236304130835381 99 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.1972128378378377 100 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.213663237100737 101 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.294547566031941 102 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.330855535933661 103 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.4550925214987713 104 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_1-0.25-0.75/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.2370335534398036 105 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.4228923525798525 106 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.2630528255528257 107 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.3672498848280097 108 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.459761785933661 109 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.307331656941032 110 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//160M_base2_3-0.5-0.5/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.2851178593366095 111 | -------------------------------------------------------------------------------- /data/2Mix/70M.txt: -------------------------------------------------------------------------------- 1 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4475028466599191 2 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4690267586032388 3 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4580414189018218 4 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.5494369939271255 5 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4443359375 6 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.481441358805668 7 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.514166086791498 8 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.557593465334008 9 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.576401189271255 10 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.5005495635121457 11 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4647488613360324 12 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.620832806174089 13 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3983188891700404 14 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4904992725202428 15 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3889644483805668 16 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3859675480769231 17 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4968726277834008 18 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4163675037955465 19 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4282898690536436 20 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4566991396761133 21 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.5199266194331984 22 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.5498363170546559 23 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.5239701704545454 24 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.570288505835381 25 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.535751305282555 26 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6092118396805897 27 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.502212261977887 28 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.498733108108108 29 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.602253531941032 30 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.512001881142506 31 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6703489711302213 32 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.535132255835381 33 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.54479710534398 34 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6113425214987713 35 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.553962876228501 36 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6430868012899262 37 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.531590717137592 38 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_1-0.25-0.75/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.628767083845209 39 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.5592127994471743 40 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6000412699631448 41 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.702006871928747 42 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6643360334766584 43 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.5726495316339064 44 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_2-0.375-0.625/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6390653792997543 45 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3959130661690284 46 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2959182059716599 47 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.328846549215587 48 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3889802631578947 49 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2935973873987854 50 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3170427947874495 51 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3648390055668016 52 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3063243294534412 53 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3482097672064777 54 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4469295609817814 55 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4093160899544535 56 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3259801208248987 57 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3565856686487854 58 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2706303770242915 59 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2609655712297572 60 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2567707015435223 61 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.278539742535425 62 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3451377467105263 63 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.2927710652834008 64 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3749110418775303 65 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3099142839068827 66 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.411091298709514 67 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3968026473937247 68 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4356536247469636 69 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3327172159665992 70 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3278245192307692 71 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3401778371710527 72 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3486328125 73 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4161105136639676 74 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4640767332995952 75 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.3660488360323886 76 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.381837914347166 77 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Github.bin 1.4789030870445343 78 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.739605727886978 79 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.668803746928747 80 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.71570753992629 81 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.7486083384520885 82 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.630969748157248 83 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6530107877764126 84 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6268907401719903 85 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.641474393427518 86 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6905088682432434 87 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.759525683353808 88 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_4-0.625-0.375/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.801385902948403 89 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.7085908707002457 90 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.789753531941032 91 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.8346418151105652 92 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.725242820945946 93 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.7032881603194103 94 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.731812423218673 95 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.8148802211302213 96 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.751871544840295 97 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.85620777027027 98 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.770217483108108 99 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_5-0.75-0.25/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.888470324017199 100 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/16000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6435090985872236 101 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/13000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6879222972972974 102 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/27000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.572260826167076 103 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.5669533169533167 104 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/24000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.58225679514742 105 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/22000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.5920416154791153 106 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/14000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.673736947174447 107 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/20000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6117168304668303 108 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/12000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.7229681741400493 109 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/18000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.6260797374078626 110 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/base2//70M_base2_3-0.5-0.5/10000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.domains/pythia-pile.valid.Pile-CC.bin 3.7396681127149876 111 | -------------------------------------------------------------------------------- /data/3Mix/410M.txt: -------------------------------------------------------------------------------- 1 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//30-0.75-0.0-0.25/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.0560930383350202 2 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//2-0.0-0.25-0.75/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 2.509271413208502 3 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//19-0.375-0.125-0.5/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.1778806616902835 4 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//23-0.5-0.0-0.5/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.1187492092611335 5 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//22-0.375-0.5-0.125/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.2026149734311742 6 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//5-0.0-0.625-0.375/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 2.494842405743927 7 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//28-0.625-0.125-0.25/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.08768602131832 8 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//11-0.125-0.625-0.25/30000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.3996216314524292 9 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//22-0.375-0.5-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.1010396239245952 10 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//23-0.5-0.0-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.0315351602036944 11 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//2-0.0-0.25-0.75/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 2.279808878415992 12 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//11-0.125-0.625-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.28365186930668 13 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//5-0.0-0.625-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 2.26472751138664 14 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//30-0.75-0.0-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 0.9652322004681174 15 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//28-0.625-0.125-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.0020430715460527 16 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//19-0.375-0.125-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.0814362190030364 17 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//22-0.375-0.5-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.2697241660276073 18 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//23-0.5-0.0-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.10989263803681 19 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//2-0.0-0.25-0.75/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.0283418807515337 20 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//11-0.125-0.625-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.1964328508435584 21 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//5-0.0-0.625-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.1653338286042945 22 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//28-0.625-0.125-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.2183210314417177 23 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//30-0.75-0.0-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.248880847392638 24 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//19-0.375-0.125-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.09260089148773 25 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//22-0.375-0.5-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.9966951695884148 26 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//2-0.0-0.25-0.75/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.970974061547256 27 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//23-0.5-0.0-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.2394811118521343 28 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//11-0.125-0.625-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.9623532178925305 29 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//5-0.0-0.625-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.965984065358232 30 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//28-0.625-0.125-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.125276891196646 31 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//30-0.75-0.0-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.361797053639482 32 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//19-0.375-0.125-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.0555360375381095 33 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//18-0.375-0.0-0.625/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.0748819822241902 34 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//16-0.25-0.5-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.1750676081730769 35 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//24-0.5-0.125-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.0369601230389676 36 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//14-0.25-0.25-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.1520343734185223 37 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//10-0.125-0.5-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.268539861146255 38 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//31-0.75-0.125-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 0.9682849467042004 39 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//21-0.375-0.375-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.104219382591093 40 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//17-0.25-0.625-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.174045578188259 41 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//18-0.375-0.0-0.625/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.066437404141104 42 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//16-0.25-0.5-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.191545245398773 43 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//24-0.5-0.125-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.145511407208589 44 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//14-0.25-0.25-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.09027990797546 45 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//10-0.125-0.5-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.1448631614263802 46 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//21-0.375-0.375-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.2034317484662576 47 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//31-0.75-0.125-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.326724261886503 48 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//17-0.25-0.625-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.27063482553681 49 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//18-0.375-0.0-0.625/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.2011554997141767 50 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//16-0.25-0.5-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.9786972418064024 51 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//24-0.5-0.125-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.0851187357088414 52 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//14-0.25-0.25-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.0011611566310976 53 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//10-0.125-0.5-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.968745534012957 54 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//21-0.375-0.375-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.0180812928734757 55 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//31-0.75-0.125-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.172437714367378 56 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//17-0.25-0.625-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.9793165253429876 57 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//26-0.5-0.375-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.0533798155996963 58 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//1-0.0-0.125-0.875/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 2.185768281882591 59 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//25-0.5-0.25-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.044514150272014 60 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//29-0.625-0.25-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.010416501929403 61 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//15-0.25-0.375-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.1598577460779351 62 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//9-0.125-0.375-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.2529632939018218 63 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//12-0.25-0.0-0.75/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.1333759014423077 64 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//4-0.0-0.5-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 2.2222925101214575 65 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//1-0.0-0.125-0.875/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.0020525786042946 66 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//26-0.5-0.375-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.292016152223926 67 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//29-0.625-0.25-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.3059887845092026 68 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//25-0.5-0.25-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.2079490989263806 69 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//15-0.25-0.375-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.1366840011503068 70 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//9-0.125-0.375-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.085921922929448 71 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//12-0.25-0.0-0.75/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.02313794095092 72 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//4-0.0-0.5-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.113416650690184 73 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//1-0.0-0.125-0.875/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.0041146627286586 74 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//26-0.5-0.375-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.0464388219321648 75 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//29-0.625-0.25-0.125/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.0929729182545733 76 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//25-0.5-0.25-0.25/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.0574802305640243 77 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//15-0.25-0.375-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.9880252000762195 78 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//9-0.125-0.375-0.5/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.963758515148628 79 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//12-0.25-0.0-0.75/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.1623564929496952 80 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//3-0.0-0.375-0.625/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 2.2137248545040484 81 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//7-0.125-0.125-0.75/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.2367719271887652 82 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//13-0.25-0.125-0.625/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.1408147773279351 83 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//6-0.125-0.0-0.875/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.2276804070723684 84 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//27-0.625-0.0-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 0.9985588784159919 85 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//20-0.375-0.25-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.0931816564397774 86 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//3-0.0-0.375-0.625/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.077565423696319 87 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//7-0.125-0.125-0.75/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.018190423696319 88 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//13-0.25-0.125-0.625/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.0501977089723926 89 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//6-0.125-0.0-0.875/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 2.9952765529141105 90 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//27-0.625-0.0-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.172266823236196 91 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//20-0.375-0.25-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.1391403853527606 92 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//3-0.0-0.375-0.625/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.972791718273628 93 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//7-0.125-0.125-0.75/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.009342844893293 94 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//13-0.25-0.125-0.625/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.0290616663490852 95 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//6-0.125-0.0-0.875/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.1394638433689024 96 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//27-0.625-0.0-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.294091201410061 97 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//20-0.375-0.25-0.375/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.0248576838795733 98 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//0-0.0-0.0-1.0/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 3.3230365953947367 99 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//0-0.0-0.0-1.0/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.0011023773006134 100 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC/4-0.0-0.5-0.5/60000/ /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.9615091463414633 101 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC//0-0.0-0.0-1.0/60000 /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 3.2022154273056405 102 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC/8-0.125-0.25-0.625/60000/ /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Github.bin 1.2467609359185223 103 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC/8-0.125-0.25-0.625/60000/ /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Pile-CC.bin 3.052737969708589 104 | /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/ckpts/Github-Books3-PileCC/8-0.125-0.25-0.625/60000/ /cpfs01/projects-HDD/cfff-173661e84712_HDD/public/jsye/data/pythia-pile.valid.Github-Books-PileCC/pythia-pile.valid.Books3.bin 2.9826347537157014 105 | -------------------------------------------------------------------------------- /data/5Mix/mix5mlp.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yegcjs/mixinglaws/665850190419fc57c57d77d9d3bd8b632f15a66f/data/5Mix/mix5mlp.pt -------------------------------------------------------------------------------- /data/5Mix/proportions.txt: -------------------------------------------------------------------------------- 1 | 0-0-0-0.25-0.75 2 | 0.25-0.09375-0.03125-0.25-0.375 3 | 0.5-0-0-0.125-0.375 4 | 0.0625-0.1875-0-0-0.75 5 | 0.0625-0.09375-0.09375-0-0.75 6 | 0.0625-0-0.0625-0.125-0.75 7 | 0.5-0-0.0625-0.0625-0.375 8 | 0-0.09375-0.03125-0.125-0.75 9 | 0.5-0.1875-0.0625-0.0625-0.1875 10 | 0.125-0-0.0625-0.0625-0.75 11 | 0.5-0.1875-0-0.125-0.1875 12 | 0-0.09375-0.09375-0.0625-0.75 13 | 0.5-0.09375-0.0625-0.25-0.09375 14 | 0.5-0-0.0625-0.25-0.1875 15 | 0.5-0.1875-0.0625-0.25-0 16 | 0.125-0.1875-0.0625-0.25-0.375 17 | 0.5-0.09375-0.03125-0-0.375 18 | 0.5-0.09375-0.09375-0.125-0.1875 19 | 0.25-0.1875-0.0625-0.125-0.375 20 | 0.5-0.1875-0.09375-0.125-0.09375 21 | 0-0.1875-0-0.0625-0.75 22 | 0.125-0.09375-0.03125-0-0.75 23 | 0-0.1875-0.0625-0-0.75 24 | 0.0625-0.09375-0.03125-0.0625-0.75 25 | 0.25-0-0-0-0.75 26 | 0.125-0-0-0.125-0.75 27 | -------------------------------------------------------------------------------- /data/RPJ.predictions.json: -------------------------------------------------------------------------------- 1 | { 2 | "academic": { 3 | "0.0625-0.125-0.28125-0.0625-0.1875-0.125-0.15625": { 4 | "30000": [ 5 | 2.9300174713134766, 6 | 2.9306214830478483 7 | ] 8 | }, 9 | "0.125-0.125-0.28125-0.125-0.09375-0.125-0.125": { 10 | "30000": [ 11 | 2.852001190185547, 12 | 2.851106291584088 13 | ] 14 | }, 15 | "0.25-0.25-0.140625-0.125-0.09375-0.0-0.140625": { 16 | "30000": [ 17 | 2.8338637351989746, 18 | 2.833253335574745 19 | ] 20 | }, 21 | "0.0625-0.5-0.140625-0.0625-0.09375-0.125-0.015625": { 22 | "30000": [ 23 | 2.834183692932129, 24 | 2.8331159200700737 25 | ] 26 | }, 27 | "0.25-0.0625-0.140625-0.25-0.09375-0.0625-0.140625": { 28 | "30000": [ 29 | 2.811591148376465, 30 | 2.8117778082531144 31 | ] 32 | }, 33 | "0.25-0.25-0.0703125-0.125-0.09375-0.125-0.0859375": { 34 | "30000": [ 35 | 2.802999973297119, 36 | 2.8045044636537373 37 | ] 38 | }, 39 | "0.0-0.0625-0.5625-0.125-0.09375-0.125-0.03125": { 40 | "30000": [ 41 | 2.967801094055176, 42 | 2.9683396889156284 43 | ] 44 | }, 45 | "0.25-0.125-0.140625-0.125-0.1875-0.125-0.046875": { 46 | "30000": [ 47 | 2.8218636512756348, 48 | 2.8214353256830407 49 | ] 50 | }, 51 | "0.0625-0.125-0.140625-0.25-0.1875-0.125-0.109375": { 52 | "30000": [ 53 | 2.860473155975342, 54 | 2.86531759847466 55 | ] 56 | }, 57 | "0.0625-0.5-0.140625-0.0-0.1875-0.0-0.109375": { 58 | "30000": [ 59 | 3.1335039138793945, 60 | 3.133612661912514 61 | ] 62 | }, 63 | "0.0625-0.125-0.28125-0.125-0.1875-0.0625-0.15625": { 64 | "30000": [ 65 | 2.9080495834350586, 66 | 2.907173199939836 67 | ] 68 | }, 69 | "0.5-0.0625-0.0-0.125-0.09375-0.0625-0.15625": { 70 | "30000": [ 71 | 2.815553665161133, 72 | 2.814943341679643 73 | ] 74 | }, 75 | "0.0625-0.5-0.0703125-0.125-0.09375-0.125-0.0234375": { 76 | "30000": [ 77 | 2.812465190887451, 78 | 2.8123092219351644 79 | ] 80 | }, 81 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.015625": { 82 | "30000": [ 83 | 2.7962210178375244, 84 | 2.797223100845838 85 | ] 86 | }, 87 | "0.5-0.0625-0.0703125-0.0625-0.09375-0.0625-0.1484375": { 88 | "30000": [ 89 | 2.8454840183258057, 90 | 2.846400432651472 91 | ] 92 | }, 93 | "0.5-0.0625-0.140625-0.0625-0.0-0.125-0.109375": { 94 | "30000": [ 95 | 2.822133779525757, 96 | 2.8226800834336068 97 | ] 98 | }, 99 | "0.0625-0.25-0.0703125-0.25-0.09375-0.125-0.1484375": { 100 | "30000": [ 101 | 2.8294901847839355, 102 | 2.830116004123018 103 | ] 104 | }, 105 | "0.125-0.125-0.140625-0.125-0.1875-0.125-0.171875": { 106 | "30000": [ 107 | 2.88153076171875, 108 | 2.881709858083239 109 | ] 110 | }, 111 | "0.25-0.5-0.0-0.0625-0.09375-0.0625-0.03125": { 112 | "30000": [ 113 | 2.821284770965576, 114 | 2.8209445560234996 115 | ] 116 | }, 117 | "0.0625-0.125-0.28125-0.25-0.1875-0.0625-0.03125": { 118 | "30000": [ 119 | 2.8711047172546387, 120 | 2.8706892452045585 121 | ] 122 | }, 123 | "0.66796875-0.168375-0.046875-0.04296875-0.04296875-0.02734375-0.0234375": { 124 | "100000": [ 125 | 2.4058761596679688, 126 | 2.51987299157701 127 | ] 128 | }, 129 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.01562": { 130 | "100000": [ 131 | 2.349031686782837, 132 | 2.458348171627265 133 | ] 134 | } 135 | }, 136 | "prose": { 137 | "0.0625-0.125-0.28125-0.0625-0.1875-0.125-0.15625": { 138 | "30000": [ 139 | 3.2424473762512207, 140 | 3.2428480429092637 141 | ] 142 | }, 143 | "0.125-0.125-0.28125-0.125-0.09375-0.125-0.125": { 144 | "30000": [ 145 | 3.2767653465270996, 146 | 3.276142875555203 147 | ] 148 | }, 149 | "0.25-0.25-0.140625-0.125-0.09375-0.0-0.140625": { 150 | "30000": [ 151 | 3.22133469581604, 152 | 3.2216635766973347 153 | ] 154 | }, 155 | "0.0625-0.5-0.140625-0.0625-0.09375-0.125-0.015625": { 156 | "30000": [ 157 | 3.2219345569610596, 158 | 3.2205606063610404 159 | ] 160 | }, 161 | "0.25-0.0625-0.140625-0.25-0.09375-0.0625-0.140625": { 162 | "30000": [ 163 | 3.255295753479004, 164 | 3.2551157994923856 165 | ] 166 | }, 167 | "0.25-0.25-0.0703125-0.125-0.09375-0.125-0.0859375": { 168 | "30000": [ 169 | 3.2234678268432617, 170 | 3.2163098330425126 171 | ] 172 | }, 173 | "0.0-0.0625-0.5625-0.125-0.09375-0.125-0.03125": { 174 | "30000": [ 175 | 3.374268054962158, 176 | 3.374692655456853 177 | ] 178 | }, 179 | "0.25-0.125-0.140625-0.125-0.1875-0.125-0.046875": { 180 | "30000": [ 181 | 3.188995838165283, 182 | 3.1898435021414975 183 | ] 184 | }, 185 | "0.0625-0.125-0.140625-0.25-0.1875-0.125-0.109375": { 186 | "30000": [ 187 | 3.243699073791504, 188 | 3.244072463911802 189 | ] 190 | }, 191 | "0.0625-0.5-0.140625-0.0-0.1875-0.0-0.109375": { 192 | "30000": [ 193 | 3.1862070560455322, 194 | 3.187050136817893 195 | ] 196 | }, 197 | "0.0625-0.125-0.28125-0.125-0.1875-0.0625-0.15625": { 198 | "30000": [ 199 | 3.2442617416381836, 200 | 3.2441480607550757 201 | ] 202 | }, 203 | "0.5-0.0625-0.0-0.125-0.09375-0.0625-0.15625": { 204 | "30000": [ 205 | 3.203584671020508, 206 | 3.202796587484137 207 | ] 208 | }, 209 | "0.0625-0.5-0.0703125-0.125-0.09375-0.125-0.0234375": { 210 | "30000": [ 211 | 3.2231409549713135, 212 | 3.2243478842798217 213 | ] 214 | }, 215 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.015625": { 216 | "30000": [ 217 | 3.2525994777679443, 218 | 3.2529544733502536 219 | ] 220 | }, 221 | "0.5-0.0625-0.0703125-0.0625-0.09375-0.0625-0.1484375": { 222 | "30000": [ 223 | 3.2079176902770996, 224 | 3.207906190513959 225 | ] 226 | }, 227 | "0.5-0.0625-0.140625-0.0625-0.0-0.125-0.109375": { 228 | "30000": [ 229 | 3.365699052810669, 230 | 3.3665257177982233 231 | ] 232 | }, 233 | "0.0625-0.25-0.0703125-0.25-0.09375-0.125-0.1484375": { 234 | "30000": [ 235 | 3.271671772003174, 236 | 3.2728240502062182 237 | ] 238 | }, 239 | "0.125-0.125-0.140625-0.125-0.1875-0.125-0.171875": { 240 | "30000": [ 241 | 3.222820997238159, 242 | 3.2253678220177666 243 | ] 244 | }, 245 | "0.25-0.5-0.0-0.0625-0.09375-0.0625-0.03125": { 246 | "30000": [ 247 | 3.177290439605713, 248 | 3.177692239054568 249 | ] 250 | }, 251 | "0.0625-0.125-0.28125-0.25-0.1875-0.0625-0.03125": { 252 | "30000": [ 253 | 3.2534732818603516, 254 | 3.256125822890228 255 | ] 256 | }, 257 | "0.66796875-0.168375-0.046875-0.04296875-0.04296875-0.02734375-0.0234375": { 258 | "100000": [ 259 | 2.8425731658935547, 260 | 2.9100025777284264 261 | ] 262 | }, 263 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.01562": { 264 | "100000": [ 265 | 2.8087639808654785, 266 | 2.9173738895939088 267 | ] 268 | } 269 | }, 270 | "dialogue": { 271 | "0.0625-0.125-0.28125-0.0625-0.1875-0.125-0.15625": { 272 | "30000": [ 273 | 3.436397075653076, 274 | 3.434123347355769 275 | ] 276 | }, 277 | "0.125-0.125-0.28125-0.125-0.09375-0.125-0.125": { 278 | "30000": [ 279 | 3.4162344932556152, 280 | 3.4127516526442307 281 | ] 282 | }, 283 | "0.25-0.25-0.140625-0.125-0.09375-0.0-0.140625": { 284 | "30000": [ 285 | 3.3971638679504395, 286 | 3.3959763746995186 287 | ] 288 | }, 289 | "0.0625-0.5-0.140625-0.0625-0.09375-0.125-0.015625": { 290 | "30000": [ 291 | 3.4383938312530518, 292 | 3.435498046875 293 | ] 294 | }, 295 | "0.25-0.0625-0.140625-0.25-0.09375-0.0625-0.140625": { 296 | "30000": [ 297 | 3.4208216667175293, 298 | 3.418851412259615 299 | ] 300 | }, 301 | "0.25-0.25-0.0703125-0.125-0.09375-0.125-0.0859375": { 302 | "30000": [ 303 | 3.3937296867370605, 304 | 3.3904334435096155 305 | ] 306 | }, 307 | "0.0-0.0625-0.5625-0.125-0.09375-0.125-0.03125": { 308 | "30000": [ 309 | 3.506420850753784, 310 | 3.5056650015024036 311 | ] 312 | }, 313 | "0.25-0.125-0.140625-0.125-0.1875-0.125-0.046875": { 314 | "30000": [ 315 | 3.460205316543579, 316 | 3.458561823918269 317 | ] 318 | }, 319 | "0.0625-0.125-0.140625-0.25-0.1875-0.125-0.109375": { 320 | "30000": [ 321 | 3.4700193405151367, 322 | 3.4648014948918266 323 | ] 324 | }, 325 | "0.0625-0.5-0.140625-0.0-0.1875-0.0-0.109375": { 326 | "30000": [ 327 | 3.471752166748047, 328 | 3.47142333984375 329 | ] 330 | }, 331 | "0.0625-0.125-0.28125-0.125-0.1875-0.0625-0.15625": { 332 | "30000": [ 333 | 3.464780807495117, 334 | 3.4678729717548067 335 | ] 336 | }, 337 | "0.5-0.0625-0.0-0.125-0.09375-0.0625-0.15625": { 338 | "30000": [ 339 | 3.4250645637512207, 340 | 3.423660982572115 341 | ] 342 | }, 343 | "0.0625-0.5-0.0703125-0.125-0.09375-0.125-0.0234375": { 344 | "30000": [ 345 | 3.4527127742767334, 346 | 3.4527108999399037 347 | ] 348 | }, 349 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.015625": { 350 | "30000": [ 351 | 3.471829891204834, 352 | 3.471722881610577 353 | ] 354 | }, 355 | "0.5-0.0625-0.0703125-0.0625-0.09375-0.0625-0.1484375": { 356 | "30000": [ 357 | 3.4239442348480225, 358 | 3.4221172626201923 359 | ] 360 | }, 361 | "0.5-0.0625-0.140625-0.0625-0.0-0.125-0.109375": { 362 | "30000": [ 363 | 3.440342426300049, 364 | 3.439732008713942 365 | ] 366 | }, 367 | "0.0625-0.25-0.0703125-0.25-0.09375-0.125-0.1484375": { 368 | "30000": [ 369 | 3.421232223510742, 370 | 3.4207782451923077 371 | ] 372 | }, 373 | "0.125-0.125-0.140625-0.125-0.1875-0.125-0.171875": { 374 | "30000": [ 375 | 3.439239025115967, 376 | 3.4393160306490382 377 | ] 378 | }, 379 | "0.25-0.5-0.0-0.0625-0.09375-0.0625-0.03125": { 380 | "30000": [ 381 | 3.4357707500457764, 382 | 3.435026667668269 383 | ] 384 | }, 385 | "0.0625-0.125-0.28125-0.25-0.1875-0.0625-0.03125": { 386 | "30000": [ 387 | 3.523632049560547, 388 | 3.522898512620192 389 | ] 390 | }, 391 | "0.66796875-0.168375-0.046875-0.04296875-0.04296875-0.02734375-0.0234375": { 392 | "100000": [ 393 | 3.15234637260437, 394 | 3.198142653245192 395 | ] 396 | }, 397 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.01562": { 398 | "100000": [ 399 | 3.0460586547851562, 400 | 3.15412128155048 401 | ] 402 | } 403 | }, 404 | "symbolic": { 405 | "0.0625-0.125-0.28125-0.0625-0.1875-0.125-0.15625": { 406 | "30000": [ 407 | 1.9899041652679443, 408 | 1.9899926516089108 409 | ] 410 | }, 411 | "0.125-0.125-0.28125-0.125-0.09375-0.125-0.125": { 412 | "30000": [ 413 | 1.9573752880096436, 414 | 1.9578909795276402 415 | ] 416 | }, 417 | "0.25-0.25-0.140625-0.125-0.09375-0.0-0.140625": { 418 | "30000": [ 419 | 2.058415174484253, 420 | 2.0580224770523925 421 | ] 422 | }, 423 | "0.0625-0.5-0.140625-0.0625-0.09375-0.125-0.015625": { 424 | "30000": [ 425 | 2.0230274200439453, 426 | 2.024107073793317 427 | ] 428 | }, 429 | "0.25-0.0625-0.140625-0.25-0.09375-0.0625-0.140625": { 430 | "30000": [ 431 | 2.0357842445373535, 432 | 2.034512460035066 433 | ] 434 | }, 435 | "0.25-0.25-0.0703125-0.125-0.09375-0.125-0.0859375": { 436 | "30000": [ 437 | 2.063053607940674, 438 | 2.0613292530424916 439 | ] 440 | }, 441 | "0.0-0.0625-0.5625-0.125-0.09375-0.125-0.03125": { 442 | "30000": [ 443 | 1.8478426933288574, 444 | 1.8476288546823434 445 | ] 446 | }, 447 | "0.25-0.125-0.140625-0.125-0.1875-0.125-0.046875": { 448 | "30000": [ 449 | 1.9992642402648926, 450 | 1.9993199515264024 451 | ] 452 | }, 453 | "0.0625-0.125-0.140625-0.25-0.1875-0.125-0.109375": { 454 | "30000": [ 455 | 2.011667251586914, 456 | 2.0061156017945545 457 | ] 458 | }, 459 | "0.0625-0.5-0.140625-0.0-0.1875-0.0-0.109375": { 460 | "30000": [ 461 | 2.1013948917388916, 462 | 2.101368315542492 463 | ] 464 | }, 465 | "0.0625-0.125-0.28125-0.125-0.1875-0.0625-0.15625": { 466 | "30000": [ 467 | 2.01318359375, 468 | 2.0139417994533826 469 | ] 470 | }, 471 | "0.5-0.0625-0.0-0.125-0.09375-0.0625-0.15625": { 472 | "30000": [ 473 | 2.319758892059326, 474 | 2.320028877887789 475 | ] 476 | }, 477 | "0.0625-0.5-0.0703125-0.125-0.09375-0.125-0.0234375": { 478 | "30000": [ 479 | 2.0248565673828125, 480 | 2.0225785762169965 481 | ] 482 | }, 483 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.015625": { 484 | "30000": [ 485 | 1.9735321998596191, 486 | 1.972794032332921 487 | ] 488 | }, 489 | "0.5-0.0625-0.0703125-0.0625-0.09375-0.0625-0.1484375": { 490 | "30000": [ 491 | 2.106642246246338, 492 | 2.104820931311881 493 | ] 494 | }, 495 | "0.5-0.0625-0.140625-0.0625-0.0-0.125-0.109375": { 496 | "30000": [ 497 | 2.043154239654541, 498 | 2.0432749329620465 499 | ] 500 | }, 501 | "0.0625-0.25-0.0703125-0.25-0.09375-0.125-0.1484375": { 502 | "30000": [ 503 | 2.075809955596924, 504 | 2.075855217357673 505 | ] 506 | }, 507 | "0.125-0.125-0.140625-0.125-0.1875-0.125-0.171875": { 508 | "30000": [ 509 | 2.0428073406219482, 510 | 2.043242703176568 511 | ] 512 | }, 513 | "0.25-0.5-0.0-0.0625-0.09375-0.0625-0.03125": { 514 | "30000": [ 515 | 2.2741281986236572, 516 | 2.2742448561262374 517 | ] 518 | }, 519 | "0.0625-0.125-0.28125-0.25-0.1875-0.0625-0.03125": { 520 | "30000": [ 521 | 2.005448341369629, 522 | 2.0061534717924916 523 | ] 524 | }, 525 | "0.66796875-0.168375-0.046875-0.04296875-0.04296875-0.02734375-0.0234375": { 526 | "100000": [ 527 | 2.003422498703003, 528 | 1.9781240331064356 529 | ] 530 | }, 531 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.01562": { 532 | "100000": [ 533 | 1.6315585374832153, 534 | 1.8152572581476898 535 | ] 536 | } 537 | }, 538 | "internet": { 539 | "0.0625-0.125-0.28125-0.0625-0.1875-0.125-0.15625": { 540 | "30000": [ 541 | 3.4025323390960693, 542 | 3.4048792694161 543 | ] 544 | }, 545 | "0.125-0.125-0.28125-0.125-0.09375-0.125-0.125": { 546 | "30000": [ 547 | 3.355842113494873, 548 | 3.3560040878330497 549 | ] 550 | }, 551 | "0.25-0.25-0.140625-0.125-0.09375-0.0-0.140625": { 552 | "30000": [ 553 | 3.352553606033325, 554 | 3.3542164912840136 555 | ] 556 | }, 557 | "0.0625-0.5-0.140625-0.0625-0.09375-0.125-0.015625": { 558 | "30000": [ 559 | 3.2986202239990234, 560 | 3.2979639446924605 561 | ] 562 | }, 563 | "0.25-0.0625-0.140625-0.25-0.09375-0.0625-0.140625": { 564 | "30000": [ 565 | 3.3653645515441895, 566 | 3.3649232479450117 567 | ] 568 | }, 569 | "0.25-0.25-0.0703125-0.125-0.09375-0.125-0.0859375": { 570 | "30000": [ 571 | 3.2974839210510254, 572 | 3.2995140438988093 573 | ] 574 | }, 575 | "0.0-0.0625-0.5625-0.125-0.09375-0.125-0.03125": { 576 | "30000": [ 577 | 3.5242409706115723, 578 | 3.525057353670635 579 | ] 580 | }, 581 | "0.25-0.125-0.140625-0.125-0.1875-0.125-0.046875": { 582 | "30000": [ 583 | 3.340324640274048, 584 | 3.340169270833334 585 | ] 586 | }, 587 | "0.0625-0.125-0.140625-0.25-0.1875-0.125-0.109375": { 588 | "30000": [ 589 | 3.407989978790283, 590 | 3.4091492391227325 591 | ] 592 | }, 593 | "0.0625-0.5-0.140625-0.0-0.1875-0.0-0.109375": { 594 | "30000": [ 595 | 3.3758580684661865, 596 | 3.376147627019557 597 | ] 598 | }, 599 | "0.0625-0.125-0.28125-0.125-0.1875-0.0625-0.15625": { 600 | "30000": [ 601 | 3.416383743286133, 602 | 3.4106472992842978 603 | ] 604 | }, 605 | "0.5-0.0625-0.0-0.125-0.09375-0.0625-0.15625": { 606 | "30000": [ 607 | 3.3169758319854736, 608 | 3.316311583227041 609 | ] 610 | }, 611 | "0.0625-0.5-0.0703125-0.125-0.09375-0.125-0.0234375": { 612 | "30000": [ 613 | 3.3090291023254395, 614 | 3.3105883955144555 615 | ] 616 | }, 617 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.015625": { 618 | "30000": [ 619 | 3.3404195308685303, 620 | 3.340882870075113 621 | ] 622 | }, 623 | "0.5-0.0625-0.0703125-0.0625-0.09375-0.0625-0.1484375": { 624 | "30000": [ 625 | 3.320150375366211, 626 | 3.3199487802933674 627 | ] 628 | }, 629 | "0.5-0.0625-0.140625-0.0625-0.0-0.125-0.109375": { 630 | "30000": [ 631 | 3.303866386413574, 632 | 3.3040597098214284 633 | ] 634 | }, 635 | "0.0625-0.25-0.0703125-0.25-0.09375-0.125-0.1484375": { 636 | "30000": [ 637 | 3.356137275695801, 638 | 3.3566944355867347 639 | ] 640 | }, 641 | "0.125-0.125-0.140625-0.125-0.1875-0.125-0.171875": { 642 | "30000": [ 643 | 3.3749442100524902, 644 | 3.3755037822420633 645 | ] 646 | }, 647 | "0.25-0.5-0.0-0.0625-0.09375-0.0625-0.03125": { 648 | "30000": [ 649 | 3.2772610187530518, 650 | 3.276581433354592 651 | ] 652 | }, 653 | "0.0625-0.125-0.28125-0.25-0.1875-0.0625-0.03125": { 654 | "30000": [ 655 | 3.434436798095703, 656 | 3.4339818284084465 657 | ] 658 | }, 659 | "0.66796875-0.168375-0.046875-0.04296875-0.04296875-0.02734375-0.0234375": { 660 | "100000": [ 661 | 2.928129196166992, 662 | 3.000241372590703 663 | ] 664 | }, 665 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.01562": { 666 | "100000": [ 667 | 2.926778554916382, 668 | 3.01924946410856 669 | ] 670 | } 671 | }, 672 | "full": { 673 | "0.0625-0.125-0.28125-0.0625-0.1875-0.125-0.15625": { 674 | "30000": [ 675 | 3.0543947219848633, 676 | 3.054756258740837 677 | ] 678 | }, 679 | "0.125-0.125-0.28125-0.125-0.09375-0.125-0.125": { 680 | "30000": [ 681 | 3.0151305198669434, 682 | 3.0112737019856772 683 | ] 684 | }, 685 | "0.25-0.25-0.140625-0.125-0.09375-0.0-0.140625": { 686 | "30000": [ 687 | 3.007810115814209, 688 | 3.0071662149311584 689 | ] 690 | }, 691 | "0.0625-0.5-0.140625-0.0625-0.09375-0.125-0.015625": { 692 | "30000": [ 693 | 2.9865620136260986, 694 | 2.9858277874228394 695 | ] 696 | }, 697 | "0.25-0.0625-0.140625-0.25-0.09375-0.0625-0.140625": { 698 | "30000": [ 699 | 3.0068554878234863, 700 | 3.00697750515408 701 | ] 702 | }, 703 | "0.25-0.25-0.0703125-0.125-0.09375-0.125-0.0859375": { 704 | "30000": [ 705 | 2.978445053100586, 706 | 2.9780532460153837 707 | ] 708 | }, 709 | "0.0-0.0625-0.5625-0.125-0.09375-0.125-0.03125": { 710 | "30000": [ 711 | 3.114712715148926, 712 | 3.1154866630648392 713 | ] 714 | }, 715 | "0.25-0.125-0.140625-0.125-0.1875-0.125-0.046875": { 716 | "30000": [ 717 | 2.9897141456604004, 718 | 2.9898001588421104 719 | ] 720 | }, 721 | "0.0625-0.125-0.140625-0.25-0.1875-0.125-0.109375": { 722 | "30000": [ 723 | 3.0375113487243652, 724 | 3.037572083649812 725 | ] 726 | }, 727 | "0.0625-0.5-0.140625-0.0-0.1875-0.0-0.109375": { 728 | "30000": [ 729 | 3.119861602783203, 730 | 3.1205399595660928 731 | ] 732 | }, 733 | "0.0625-0.125-0.28125-0.125-0.1875-0.0625-0.15625": { 734 | "30000": [ 735 | 3.0540735721588135, 736 | 3.0534209263177567 737 | ] 738 | }, 739 | "0.5-0.0625-0.0-0.125-0.09375-0.0625-0.15625": { 740 | "30000": [ 741 | 3.0156397819519043, 742 | 3.0171791359230324 743 | ] 744 | }, 745 | "0.0625-0.5-0.0703125-0.125-0.09375-0.125-0.0234375": { 746 | "30000": [ 747 | 2.9877395629882812, 748 | 2.984295833257981 749 | ] 750 | }, 751 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.015625": { 752 | "30000": [ 753 | 2.988614559173584, 754 | 2.9889472678855613 755 | ] 756 | }, 757 | "0.5-0.0625-0.0703125-0.0625-0.09375-0.0625-0.1484375": { 758 | "30000": [ 759 | 3.004990577697754, 760 | 3.0046749350465376 761 | ] 762 | }, 763 | "0.5-0.0625-0.140625-0.0625-0.0-0.125-0.109375": { 764 | "30000": [ 765 | 3.0079407691955566, 766 | 3.0089875680428966 767 | ] 768 | }, 769 | "0.0625-0.25-0.0703125-0.25-0.09375-0.125-0.1484375": { 770 | "30000": [ 771 | 3.017430067062378, 772 | 3.0180458728178046 773 | ] 774 | }, 775 | "0.125-0.125-0.140625-0.125-0.1875-0.125-0.171875": { 776 | "30000": [ 777 | 3.0313010215759277, 778 | 3.0319261904116033 779 | ] 780 | }, 781 | "0.25-0.5-0.0-0.0625-0.09375-0.0625-0.03125": { 782 | "30000": [ 783 | 2.9970803260803223, 784 | 2.9971061989113137 785 | ] 786 | }, 787 | "0.0625-0.125-0.28125-0.25-0.1875-0.0625-0.03125": { 788 | "30000": [ 789 | 3.052419424057007, 790 | 3.052602414731626 791 | ] 792 | }, 793 | "0.66796875-0.168375-0.046875-0.04296875-0.04296875-0.02734375-0.0234375": { 794 | "100000": [ 795 | 2.6502251625061035, 796 | 2.7133224864064913 797 | ] 798 | }, 799 | "0.125-0.25-0.140625-0.25-0.09375-0.125-0.01562": { 800 | "100000": [ 801 | 2.568206548690796, 802 | 2.6787050506215038 803 | ] 804 | } 805 | } 806 | } -------------------------------------------------------------------------------- /data/RedPajama/proportions.txt: -------------------------------------------------------------------------------- 1 | 0.0625-0.125-0.28125-0.0625-0.1875-0.125-0.15625 2 | 0.125-0.125-0.28125-0.125-0.09375-0.125-0.125 3 | 0.25-0.25-0.140625-0.125-0.09375-0.0-0.140625 4 | 0.0625-0.5-0.140625-0.0625-0.09375-0.125-0.015625 5 | 0.25-0.0625-0.140625-0.25-0.09375-0.0625-0.140625 6 | 0.25-0.25-0.0703125-0.125-0.09375-0.125-0.0859375 7 | 0.0-0.0625-0.5625-0.125-0.09375-0.125-0.03125 8 | 0.25-0.125-0.140625-0.125-0.1875-0.125-0.046875 9 | 0.0625-0.125-0.140625-0.25-0.1875-0.125-0.109375 10 | 0.0625-0.5-0.140625-0.0-0.1875-0.0-0.109375 11 | 0.0625-0.125-0.28125-0.125-0.1875-0.0625-0.15625 12 | 0.5-0.0625-0.0-0.125-0.09375-0.0625-0.15625 13 | 0.0625-0.5-0.0703125-0.125-0.09375-0.125-0.0234375 14 | 0.125-0.25-0.140625-0.25-0.09375-0.125-0.015625 15 | 0.5-0.0625-0.0703125-0.0625-0.09375-0.0625-0.1484375 16 | 0.5-0.0625-0.140625-0.0625-0.0-0.125-0.109375 17 | 0.0625-0.25-0.0703125-0.25-0.09375-0.125-0.1484375 18 | 0.125-0.125-0.140625-0.125-0.1875-0.125-0.171875 19 | 0.25-0.5-0.0-0.0625-0.09375-0.0625-0.03125 20 | 0.0625-0.125-0.28125-0.25-0.1875-0.0625-0.03125 -------------------------------------------------------------------------------- /law.py: -------------------------------------------------------------------------------- 1 | # import multiprocessing as mp 2 | import multiprocessing as mp 3 | import torch 4 | from functools import partial 5 | import logging 6 | import numpy as np 7 | from tqdm import tqdm 8 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 9 | logger = logging.getLogger(__name__) 10 | import time 11 | 12 | mp.set_start_method("fork") 13 | 14 | def calculate_r_squared(actuals, predictions): 15 | actuals, predictions = actuals.numpy(), predictions.numpy() 16 | # Calculate the total sum of squares 17 | total_sum_of_squares = np.sum((actuals - np.mean(actuals)) ** 2) 18 | # Calculate the residual sum of squares 19 | residual_sum_of_squares = np.sum((actuals - predictions) ** 2) 20 | # Calculate R-squared 21 | r_squared = 1 - (residual_sum_of_squares / total_sum_of_squares) 22 | return r_squared 23 | 24 | 25 | def fit_scaling_laws(func, valid_split, x, y, max_step, eps, delta, init_param): 26 | param = torch.nn.Parameter(init_param) 27 | x, y = torch.tensor(x).to(param), torch.tensor(y).to(param) 28 | if valid_split == 0: 29 | train_x, eval_x = x, x[:0] 30 | train_y, eval_y = y, y[:0] 31 | else: 32 | train_x, eval_x = x[:-valid_split], x[-valid_split:] 33 | train_y, eval_y = y[:-valid_split], y[-valid_split:] 34 | optimizer = torch.optim.LBFGS([param], lr=0.01, history_size=10, max_iter=20, line_search_fn="strong_wolfe") 35 | # optimizer = torch.optim.AdamW([param], lr=1e-3) 36 | def closure(): 37 | optimizer.zero_grad() 38 | prediction = func(train_x, param) 39 | loss = torch.nn.functional.huber_loss(train_y, prediction, delta=delta, reduction="sum") 40 | loss.backward() 41 | return loss 42 | 43 | min_loss, best_param = 1e10, None 44 | best_step = 0 45 | for _ in range(max_step): 46 | loss = optimizer.step(closure).item() 47 | # prediction = func(train_x, param) 48 | # train_r2 = calculate_r_squared(train_y, prediction) 49 | with torch.no_grad(): 50 | if len(eval_x) > 1: 51 | eval_prediction = func(eval_x, param) 52 | eval_loss = torch.nn.functional.huber_loss(eval_prediction, eval_y, delta=delta).item() 53 | # eval_r2 = calculate_r_squared(eval_y, eval_prediction) 54 | # eval_loss = -eval_r2 55 | elif len(eval_x) == 1: 56 | eval_prediction = func(eval_x, param) 57 | eval_loss = torch.nn.functional.mse_loss(eval_prediction, eval_y).item() 58 | else: 59 | eval_prediction = func(train_x, param) 60 | eval_loss = torch.nn.functional.huber_loss(eval_prediction, train_y, delta=delta).item() 61 | # eval_loss = -calculate_r_squared(train_y, eval_prediction) 62 | # eval_loss = -eval_r2 63 | if eval_loss <= min_loss: # FIXME 64 | min_loss = eval_loss 65 | best_param = param.detach().clone() 66 | best_step = _ 67 | # print(loss) 68 | if np.abs(min_loss - eval_loss) < eps: 69 | assert False 70 | break 71 | return min_loss, best_param, best_step 72 | 73 | 74 | class ScalingLaw: 75 | def __init__(self, func): 76 | self.func = func 77 | self.params = None 78 | 79 | def fit(self, x, y, init_params, max_step=20, eps=0, workers=-1, valid_split=0, delta=0.01): 80 | if workers == -1: 81 | workers = mp.cpu_count() 82 | init_params = [torch.tensor(init_param, dtype=torch.float32) for init_param in init_params] 83 | minloss, optimal_param = 1e10, None 84 | _fit = partial(fit_scaling_laws, self.func, valid_split, x, y, max_step, eps, delta) 85 | if workers != 1: 86 | best_step = 0 87 | with mp.Pool(workers) as p: 88 | for loss, param, step in tqdm(p.imap_unordered(_fit, init_params, chunksize=2), total=len(init_params)): 89 | if loss < minloss: 90 | minloss = loss 91 | optimal_param = param 92 | best_step = step 93 | else: 94 | for init_param in tqdm(init_params): 95 | loss, param, step = _fit(init_param) 96 | # print(param) 97 | if loss < minloss: 98 | minloss = loss 99 | optimal_param = param 100 | self.params = optimal_param.tolist() 101 | print(minloss) 102 | print(optimal_param) 103 | return self.params 104 | 105 | 106 | -------------------------------------------------------------------------------- /pipeline/find_opt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fire 3 | import json 4 | import torch 5 | from tqdm import tqdm 6 | from collections import defaultdict 7 | from utils import * 8 | # from get_loss import steplaws, sizelaws, mixlaws 9 | import matplotlib.pyplot as plt 10 | 11 | GRID_SZ = 1 / 16 12 | max_size = [1, 1, 0.5857, 0.2588, 0.2254, 0.1779, 0.1727] 13 | 14 | def predict_losses(size, step, ratios): 15 | ckpt = torch.load("../data/RedPajama/mixlaws.pt") 16 | law = ckpt[size]["full"][step]["params"] 17 | prediction = law.predict(np.array(ratios)) 18 | return prediction.tolist() 19 | # total_losses, domain_losses = 0, {domain: [] for domain in DOMAINS_2_SUBDOMAINS} 20 | # for domain in DOMAINS_2_SUBDOMAINS: 21 | # loss = batch_get_loss(size, ratios, domain, step) 22 | # total_losses += loss * valid_weight[domain] 23 | # domain_losses[domain] = loss.tolist() 24 | # total_losses = total_losses.tolist() 25 | # return total_losses, domain_losses 26 | 27 | 28 | def dfs(depth, rs): 29 | results = [] 30 | if depth == len(max_size) - 1: 31 | r = 1 - sum(rs) 32 | if 0 <= r <= max_size[depth]: 33 | results.append(rs + [r]) 34 | else: 35 | for s in range(0, 1+int(max_size[depth] / GRID_SZ)): 36 | r = GRID_SZ * int(max_size[depth] / GRID_SZ) * (2**(-s)) 37 | if r < GRID_SZ: 38 | r = 0.0 39 | results += dfs(depth+1, rs+[r]) 40 | return results 41 | 42 | def find_optimal_ratio(size, step, savefig, write_losses=None): 43 | GRID = 256 44 | optimal_ratio, min_loss, min_loss_domain = [], 1000, None 45 | t = int(GRID * (1 - 0.103806741)) 46 | # ratios = [ 47 | # (r1/GRID, r2/GRID, (GRID - r1 - r2 - r4 - r5)/GRID, r4/GRID, r5/GRID) 48 | # for r1 in tqdm(range(int(0.56 * GRID))) 49 | # for r2 in range(int(min(1-r1/GRID, 0.247208695) * GRID)) 50 | # for r4 in range(int(min(1-r1/GRID-r2/GRID, 0.272774883) * GRID)) 51 | # for r5 in range(int(min(1-r1/GRID-r2/GRID-r4/GRID, 0.769600522) * GRID)) 52 | # if (r1 + r2 + r4 + r5 <= GRID) and (r1 + r2 + r4 + r5 >= t) 53 | # ] 54 | ratios = dfs(0, []) 55 | ratios = set(["-".join(map(str, ratio)) for ratio in ratios]) 56 | ratios = [list(map(float, ratio.split('-'))) for ratio in ratios] 57 | ratio_chunks = [ratios[i:i+8192] for i in range(0, len(ratios), 8192)] 58 | all_losses = [] 59 | for ratio_ch in tqdm(ratio_chunks): 60 | loss_ch = predict_losses(size, step, ratio_ch) 61 | min_loss_idx = np.argmin(loss_ch) 62 | if loss_ch[min_loss_idx] < min_loss: 63 | min_loss = loss_ch[min_loss_idx] 64 | optimal_ratio = ratio_ch[min_loss_idx] 65 | # min_loss_domain = {domain: domain_losses[domain][min_loss_idx] for domain in domain_losses} 66 | all_losses += loss_ch 67 | if write_losses is not None: 68 | with open(write_losses, "w") as f: 69 | for ratio, loss in zip(ratios, all_losses): 70 | f.write(f"{ratio}\t{loss}\n") 71 | plt.hist(all_losses, bins=int((max(all_losses)-min(all_losses))/0.01)) 72 | plt.legend() 73 | plt.savefig(savefig) 74 | print("optimal ratio:", optimal_ratio, "\nmin loss:", min_loss, "\n", min_loss_domain) 75 | 76 | if __name__ == '__main__': 77 | fire.Fire(find_optimal_ratio) -------------------------------------------------------------------------------- /pipeline/get_loss.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fire 3 | import json 4 | import torch 5 | from collections import defaultdict 6 | from utils import * 7 | 8 | def load_loss(path, filter=True): 9 | losses = defaultdict(lambda: defaultdict(dict)) # domain -> ratio 10 | with open(path, "r") as f: 11 | lines = set(f.readlines()) 12 | for line in lines: 13 | model, data, loss = line.split() 14 | model = model.strip('/') 15 | step = int(model.split('/')[-1]) 16 | if filter and int(model.split('/')[-2].split('-')[0].split('_')[-1]) > 900: 17 | continue 18 | ratio = "-".join(model.split('/')[-2].split('-')[1:]) 19 | subdomain = data.split('/')[-1].split('.')[-2] 20 | losses[ratio][subdomain][step] = float(loss) 21 | # merge 22 | for domain, subdomains in DOMAINS_2_SUBDOMAINS.items(): 23 | domain_norm = sum(valid_weight[subdomain] for subdomain in subdomains) 24 | for ratio in losses: 25 | for step in losses[ratio][subdomains[0]]: 26 | try: 27 | losses[ratio][domain][step] = sum(losses[ratio][subdomain][step] * valid_weight[subdomain] / domain_norm for subdomain in subdomains) 28 | except: 29 | continue 30 | return losses 31 | 32 | class GetLoss: 33 | def __init__(self, train_data): 34 | homedir = os.path.dirname(os.path.abspath(__file__)) 35 | self.train_data = train_data 36 | self.OBSERVED_LOSSES = { 37 | "70M": load_loss(f"../data/{train_data}/70M.txt"), # filter=False), 38 | "160M": load_loss(f"../data/{train_data}/160M.txt"), # filter=False), 39 | "305M": load_loss(f"../data/{train_data}/305M.txt"), 40 | "410M": load_loss(f"../data/{train_data}/410M.txt") 41 | } # sz, ratio, domain, step 42 | self.steplaws = defaultdict(lambda:defaultdict(lambda:defaultdict)) 43 | if os.path.exists(STEPLAW_FILES[train_data]): 44 | with open(STEPLAW_FILES[train_data], "r") as f: 45 | self.steplaws = json.load(f) # sz, ratio, domain 46 | self.sizelaws = defaultdict(lambda:defaultdict(lambda:defaultdict)) 47 | if os.path.exists(SIZELAW_FILES[train_data]): 48 | with open(SIZELAW_FILES[train_data], "r") as f: 49 | self.sizelaws = json.load(f) # size, step, ratio, domain 50 | self.mixlaws = defaultdict(lambda:defaultdict(lambda:defaultdict)) 51 | if os.path.exists(MIXLAW_FILES[train_data]): 52 | self.mixlaws = torch.load(MIXLAW_FILES[train_data]) 53 | 54 | def load(self, size, filter=True): 55 | self.OBSERVED_LOSSES[size] = load_loss(f"{homedir}/data/{self.train_data}/{size}.txt", filter=filter) 56 | 57 | def get_loss(self, size, ratio, domain, step): 58 | try: 59 | loss = self.OBSERVED_LOSSES[size][ratio][domain][step] 60 | except: 61 | if size in self.steplaws: 62 | # print("from steplaw") 63 | token = BSZ * step 64 | step_param = self.steplaws[size][ratio][domain] 65 | logc, logd0, alpha = step_param["log_c"], step_param["log_d0"], step_param["alpha"] 66 | loss = power_law(token, torch.tensor([logc, logd0, alpha])) 67 | elif ratio in self.sizelaws: 68 | # print("from sizelaw") 69 | size_param = self.sizelaws[ratio][domain][str(step)] 70 | logc, logn0, alpha = size_param["log_c"], size_param["log_n0"], size_param["alpha"] 71 | loss = power_law(MODEL_SIZES[size], torch.tensor([logc, logn0, alpha])) 72 | elif (size in self.mixlaws) and (domain in self.mixlaws[size]): 73 | # print("from mixlaw") 74 | param = self.mixlaws[size][domain][step] 75 | x = np.array([list(map(float, ratio.split('-')))]) 76 | if param["type"] == "linear": 77 | loss = mixture_law_2(x, param["params"]).item() # mixture_law(x, param["params"]) 78 | elif param["type"] == "mlp": 79 | mlp = MixtureMLP(); mlp.load_state_dict(param["params"]) 80 | loss = mlp(torch.tensor(x)).item() 81 | elif param["type"] == "mlp_adaboost": 82 | regr = param["params"] 83 | loss = regr.predict(x).item() 84 | else: 85 | raise NotImplementedError 86 | return loss 87 | 88 | def main(size, step, ratios="ratios/default"): 89 | with open(ratios, "r") as f: 90 | ratios = [line.strip() for line in f] 91 | 92 | for ratio in ratios: 93 | for domain in DOMAINS_2_SUBDOMAINS: 94 | print(size, ratio, domain, step, get_loss(size, ratio, domain, step)) 95 | 96 | 97 | if __name__ == '__main__': 98 | fire.Fire(main) -------------------------------------------------------------------------------- /pipeline/mixlaw.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fire 3 | import json 4 | from tqdm import tqdm 5 | from collections import defaultdict 6 | import multiprocessing as mp 7 | from utils import * 8 | from get_loss import GetLoss 9 | from law import ScalingLaw 10 | 11 | import matplotlib.pylab as plt 12 | import warnings 13 | warnings.filterwarnings('ignore') 14 | 15 | 16 | def fit_regressor(args): 17 | dim, activation, x, y, state = args 18 | # random.seed(seed+seed2) 19 | # np.random.seed(seed+seed2) 20 | # batch_ratios = np.random.choice(ratios, size, replace=False) 21 | # x = np.array([list(map(float, ratio.split('-'))) for ratio in batch_ratios]) 22 | # y = np.array([ 23 | # sum(losses[ratio][domain][STEP] * valid_weight[domain] for domain in DOMAINS_2_SUBDOMAINS) 24 | # for ratio in batch_ratios 25 | # ], dtype=np.float32) 26 | regr = AdaBoostRegressor(MLPEstimator(dim, x.shape[-1], activation), random_state=state, n_estimators=30) 27 | regr.fit(x, y) 28 | return None, regr, args 29 | 30 | def main(size, step, train_data, ratios, target="full", seed=42): 31 | get_loss = GetLoss(train_data) 32 | set_seed(seed) 33 | with open(ratios, "r") as f: 34 | ratios = [line.strip() for line in f] 35 | np.random.shuffle(ratios) 36 | num_mixture = len(ratios[0].split('-')) 37 | 38 | mixlaws = defaultdict(lambda:defaultdict(lambda: defaultdict(dict))) # size, step, domain 39 | if os.path.exists(MIXLAW_FILES[train_data]): 40 | mixlaws.update(torch.load(MIXLAW_FILES[train_data])) 41 | 42 | x, y = [], [] 43 | for ratio in ratios: 44 | if target == "full": 45 | y.append(sum( 46 | valid_weight[domain] * get_loss.get_loss(size, ratio, domain, step) 47 | for domain in DOMAINS_2_SUBDOMAINS 48 | )) 49 | else: 50 | y.append(get_loss.get_loss(size, ratio, target, step)) 51 | x.append(list(map(float, ratio.split('-')))) 52 | 53 | args_list = [] 54 | for i in range(16): 55 | args_list.append( 56 | [30, "exp", np.array(x), np.array(y), seed+i], 57 | ) 58 | min_mae, best_model = 100, None 59 | with mp.Pool(16) as p: 60 | # for args in tqdm(args_list): 61 | # fit_regressor(args_list[0]) 62 | # _, regr, _ = fit_regressor(args) 63 | for _, regr, args in tqdm(p.imap(fit_regressor, args_list)): 64 | prediction = regr.predict(np.array(x)) 65 | mae = np.mean(np.abs(prediction - y)) 66 | print(mae) 67 | if mae < min_mae: 68 | min_mae = mae 69 | best_model = regr 70 | if target not in mixlaws[size]: 71 | mixlaws[size][target] = defaultdict(dict) 72 | mixlaws[size][target][step] = {"type": "mlp_adaboost", "params": best_model} 73 | 74 | torch.save({size: {domain: dict(content) for domain, content in mixlaws[size].items()}for size in mixlaws}, MIXLAW_FILES[train_data]) 75 | 76 | 77 | if __name__ == '__main__': 78 | mp.get_start_method("fork") 79 | fire.Fire(main) -------------------------------------------------------------------------------- /pipeline/ratios.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yegcjs/mixinglaws/665850190419fc57c57d77d9d3bd8b632f15a66f/pipeline/ratios.txt -------------------------------------------------------------------------------- /pipeline/run.sh: -------------------------------------------------------------------------------- 1 | DATA=RedPajama 2 | mkdir ../figs 3 | for size in "70M" "160M" "305M" "410M" 4 | do 5 | python3 steplaw.py --train_data $DATA --savefig ../figs/${DATA}_${size}_steps.png --model_size ${size} --fit_step_range 10000,30000 --tie_alpha valid --ratios ../data/${DATA}/proportions.txt 6 | done 7 | 8 | python sizelaw.py --train_data $DATA --savefig ../figs/${DATA}_sizes.png --fit_sizes 70M,160M,305M,410M --target_size 1B --step 100000 --tie_alpha all --ratios ../data/${DATA}/proportions.txt 9 | python mixlaw.py --size 1B --step 100000 --train_data $DATA --ratios ../data/${DATA}/proportions.txt 10 | 11 | python find_opt.py --size 1B --step 100000 --savefig ../figs/opt.png 12 | -------------------------------------------------------------------------------- /pipeline/sizelaw.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fire 3 | import json 4 | import torch 5 | import numpy as np 6 | from collections import defaultdict 7 | import matplotlib.pyplot as plt 8 | 9 | from get_loss import GetLoss 10 | 11 | from law import ScalingLaw 12 | from utils import * 13 | 14 | import warnings 15 | warnings.filterwarnings('ignore') 16 | 17 | def param_generator_share_alpha(log_c): 18 | for log_N0 in np.linspace(15, 25, 20): 19 | for alpha in np.linspace(0.3, 0.8, 10): 20 | for _ in range(50): 21 | yield [_log_c + (1+np.random.rand()) for _log_c in log_c] + [log_N0 + np.random.rand() - 0.5 for _ in log_c] + [alpha] 22 | 23 | def main(savefig, fit_sizes, target_size, train_data, step, ratios=None, tie_alpha="all", seed=42, variable="size"): 24 | global MODEL_SIZES 25 | MODEL_SIZES = MODEL_SIZES if variable != "flops" else MODEL_FLOPS 26 | # if variable == "flops": 27 | # MODEL_SIZES = MODEL_FLOPS 28 | set_seed(seed) 29 | fit_sizes = fit_sizes.split(',') 30 | # OBSERVED_LOSSES = GetLoss(train_data).OBSERVED_LOSSES 31 | SIZELAW_FILE = SIZELAW_FILES[train_data] 32 | STEPLAW_FILE = STEPLAW_FILES[train_data] 33 | with open(STEPLAW_FILE, "r") as f: 34 | steplaws = json.load(f) 35 | token = step * BSZ 36 | if ratios is not None: 37 | with open(ratios, "r") as f: 38 | ratios = [line.strip() for line in f] 39 | else: 40 | ratios = steplaws[fit_sizes[0]] 41 | 42 | 43 | sizelaws = defaultdict(lambda: defaultdict(lambda:defaultdict(dict))) 44 | if os.path.exists(SIZELAW_FILE): 45 | with open(SIZELAW_FILE, "r") as f: 46 | sizelaws.update(json.load(f)) 47 | 48 | fig, axes = plt.subplots(len(ratios), 2, figsize=(15, 60)) 49 | 50 | if tie_alpha == "all": 51 | i = -1 52 | indices = {(ratio, domain): (i:=i+1) 53 | for ratio in ratios for domain in DOMAINS_2_SUBDOMAINS 54 | } 55 | x = np.array([[MODEL_SIZES[sz] for sz in fit_sizes] for ratio in ratios for domain in DOMAINS_2_SUBDOMAINS]).T 56 | y = np.array([ 57 | [ 58 | power_law(token, (steplaws[sz][ratio][domain]["log_c"],steplaws[sz][ratio][domain]["log_d0"], steplaws[sz][ratio][domain]["alpha"])) for sz in fit_sizes 59 | ] 60 | for ratio in ratios for domain in DOMAINS_2_SUBDOMAINS 61 | ]).T 62 | law = ScalingLaw(power_law_share_alpha) 63 | param = law.fit(x, y, param_generator_share_alpha(np.log(np.min(y, axis=0))), 100, eps=0, valid_split=1, delta=8e-2) 64 | fit_x = torch.stack([ 65 | torch.linspace(np.min(x), MODEL_SIZES[target_size], 1000) 66 | for ratio in ratios for domain in DOMAINS_2_SUBDOMAINS 67 | ], dim=1) 68 | prediction = power_law_share_alpha(torch.tensor(fit_x), torch.tensor(param)) 69 | 70 | for ax_row, ratio in zip(axes, ratios): 71 | for j, domain in enumerate(DOMAINS_2_SUBDOMAINS): 72 | idx = indices[(ratio, domain)] 73 | plot_x = np.array([MODEL_SIZES[sz] for sz in fit_sizes]) # + [MODEL_SIZES[target_size]]) 74 | plot_y = np.array([ 75 | power_law(token, (steplaws[sz][ratio][domain]["log_c"],steplaws[sz][ratio][domain]["log_d0"], steplaws[sz][ratio][domain]["alpha"])) 76 | for sz in fit_sizes # + [target_size] 77 | ]) 78 | ax_row[0].scatter(plot_x, plot_y - np.exp(param[idx]), c=PALLETES[j]) 79 | ax_row[0].plot(fit_x[:, idx], prediction[:, idx] - np.exp(param[idx]), c=PALLETES[j]) 80 | ax_row[1].scatter(plot_x, plot_y, c=PALLETES[j]) 81 | ax_row[1].plot(fit_x[:, idx], prediction[:, idx], c=PALLETES[j]) 82 | sizelaws[ratio][domain][step] = {"log_c": param[idx], "log_n0": param[idx+(len(param)-1)//2], "alpha": param[-1]} 83 | ax_row[0].set_xscale("log"); ax_row[0].set_yscale("log") 84 | ax_row[1].set_xscale("log"); ax_row[1].set_yscale("log") 85 | elif tie_alpha == "valid": 86 | for ax_row, ratio in zip(axes, ratios): 87 | for domain in DOMAINS_2_SUBDOMAINS: 88 | for sz in fit_sizes: 89 | try: 90 | x = steplaws[sz][ratio][domain] 91 | except: 92 | import ipdb;ipdb.set_trace() 93 | 94 | x = np.array([[MODEL_SIZES[sz] for sz in fit_sizes] for domain in DOMAINS_2_SUBDOMAINS]).T 95 | y = np.array([ 96 | [ 97 | power_law(token, (steplaws[sz][ratio][domain]["log_c"],steplaws[sz][ratio][domain]["log_d0"], steplaws[sz][ratio][domain]["alpha"])) for sz in fit_sizes 98 | ] 99 | for domain in DOMAINS_2_SUBDOMAINS 100 | ]).T 101 | law = ScalingLaw(power_law_share_alpha) 102 | param = law.fit(x, y, param_generator_share_alpha(np.log(np.min(y, axis=0))), 10, eps=0, valid_split=0, delta=8e-2) 103 | fit_x = torch.stack([ 104 | torch.linspace(np.min(x), MODEL_SIZES[target_size], 1000) 105 | for domain in DOMAINS_2_SUBDOMAINS 106 | ], dim=1) 107 | prediction = power_law_share_alpha(torch.tensor(fit_x), torch.tensor(param)) 108 | for j, domain in enumerate(DOMAINS_2_SUBDOMAINS): 109 | idx = j 110 | plot_x = np.array([MODEL_SIZES[sz] for sz in fit_sizes]) # + [MODEL_SIZES[target_size]]) 111 | plot_y = np.array([ 112 | power_law(token, (steplaws[sz][ratio][domain]["log_c"],steplaws[sz][ratio][domain]["log_d0"], steplaws[sz][ratio][domain]["alpha"])) 113 | for sz in fit_sizes # + [target_size] 114 | ]) 115 | ax_row[0].scatter(plot_x, plot_y - np.exp(param[idx]), c=PALLETES[j]) 116 | ax_row[0].plot(fit_x[:, idx], prediction[:, idx] - np.exp(param[idx]), c=PALLETES[j]) 117 | ax_row[1].scatter(plot_x, plot_y, c=PALLETES[j]) 118 | ax_row[1].plot(fit_x[:, idx], prediction[:, idx], c=PALLETES[j]) 119 | sizelaws[ratio][domain][step] = {"log_c": param[idx], "log_n0": param[idx+(len(param)-1)//2], "alpha": param[-1]} 120 | ax_row[0].set_xscale("log"); ax_row[0].set_yscale("log") 121 | ax_row[1].set_xscale("log"); ax_row[1].set_yscale("log") 122 | else: 123 | raise NotImplementedError 124 | 125 | fig.savefig(savefig) 126 | with open(SIZELAW_FILE, "w") as f: 127 | json.dump(sizelaws, f, indent=4) 128 | 129 | 130 | 131 | if __name__ == '__main__': 132 | fire.Fire(main) -------------------------------------------------------------------------------- /pipeline/steplaw.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fire 3 | import json 4 | import torch 5 | from collections import defaultdict 6 | import matplotlib.pyplot as plt 7 | 8 | from law import ScalingLaw 9 | from utils import * 10 | from get_loss import GetLoss 11 | 12 | import numpy as np 13 | 14 | 15 | import warnings 16 | warnings.filterwarnings('ignore') 17 | 18 | ### SL related 19 | # XXX: for valid 20 | def param_generator_share_alpha(log_c): 21 | for log_D0 in np.linspace(15, 23, 11): 22 | for alpha in np.linspace(0.65, 0.9, 10): 23 | for _ in range(50): 24 | yield [_log_c + (1+np.random.rand()) for _log_c in log_c] + [log_D0 + np.random.rand() - 0.5 for _ in log_c] + [alpha] 25 | 26 | 27 | def check(ratios, steps, losses): 28 | pass_flag = True 29 | for domain in DOMAINS_2_SUBDOMAINS: 30 | for ratio in ratios: 31 | for step in steps: 32 | try: 33 | t = losses[ratio][domain][step] 34 | except: 35 | pass_flag = False 36 | print("NOT FOUND:", ratio, domain, step) 37 | if not pass_flag: 38 | raise ReferenceError 39 | 40 | def main(savefig, train_data, ratios=None, model_size="70M", fit_step_range=[10000, 30000], tie_alpha="all", seed=42): 41 | """ 42 | params 43 | savefig: path to save the prediction fig 44 | model_size: the model size of the law to fit 45 | fit_step_range: mininum and maximum step used to fit the law 46 | tie_alpha: enum of ["all", "valid", "none"] tie alpha of different scaling law curves. 47 | seed: random seed 48 | """ 49 | OBSERVED_LOSSES = GetLoss(train_data).OBSERVED_LOSSES 50 | STEPLAW_FILE = STEPLAW_FILES[train_data] 51 | set_seed(seed) 52 | steplaws = defaultdict(lambda:defaultdict(lambda:defaultdict(dict))) 53 | if os.path.exists(STEPLAW_FILE): 54 | with open(STEPLAW_FILE, "r") as f: 55 | steplaws.update(json.load(f)) 56 | if ratios is not None: 57 | with open(ratios, "r") as f: 58 | ratios = [line.strip() for line in f] 59 | else: 60 | ratios = list(OBSERVED_LOSSES[model_size]) 61 | 62 | if "0.125-0.5-0.0-0.0625-0.1875-0.0-0.125" in ratios: # FIXME: remove this item in data 63 | ratios.remove("0.125-0.5-0.0-0.0625-0.1875-0.0-0.125") 64 | step_start, step_end = fit_step_range 65 | fit_steps = sorted([ step 66 | for step in OBSERVED_LOSSES[model_size][ratios[-1]][list(DOMAINS_2_SUBDOMAINS.keys())[0]] 67 | if step_start <= int(step) <= step_end 68 | ]) 69 | check(ratios, fit_steps, OBSERVED_LOSSES[model_size]) 70 | 71 | fig, axes = plt.subplots(len(ratios), 2, figsize=(15, len(ratios)*5)) 72 | 73 | # start fitting 74 | if tie_alpha == "all": 75 | i = -1 76 | indices = {(ratio, domain): (i:=i+1) 77 | for ratio in ratios for domain in DOMAINS_2_SUBDOMAINS 78 | } 79 | tokens = BSZ * np.array([fit_steps for ratio in ratios for domain in DOMAINS_2_SUBDOMAINS]).T 80 | loss = np.array([ 81 | [OBSERVED_LOSSES[model_size][ratio][domain][step] for step in fit_steps] 82 | for ratio in ratios for domain in DOMAINS_2_SUBDOMAINS 83 | ]).T 84 | law = ScalingLaw(power_law_share_alpha) 85 | param = law.fit( 86 | tokens, loss, param_generator_share_alpha(np.min(np.log(loss), axis=0)), 87 | 300, eps=0, workers=-1, delta=2e-2, valid_split=3 88 | ) 89 | fit_x = torch.stack([ 90 | torch.linspace(10000 * BSZ, 1000000 * BSZ, 1000) 91 | for ratio in ratios for domain in DOMAINS_2_SUBDOMAINS 92 | ], dim=1) 93 | prediction = power_law_share_alpha(torch.tensor(fit_x), torch.tensor(param)) 94 | 95 | for ax_row, ratio in zip(axes, ratios): 96 | for j, domain in enumerate(DOMAINS_2_SUBDOMAINS): 97 | idx = indices[(ratio, domain)] 98 | plot_x = [step * BSZ for step in OBSERVED_LOSSES[model_size][ratio][domain] if step >= step_start] 99 | plot_y = [loss for step, loss in OBSERVED_LOSSES[model_size][ratio][domain].items() if step >= step_start] 100 | ax_row[0].scatter(plot_x, plot_y - np.exp(param[idx]), c=PALLETES[j], label=domain) 101 | ax_row[0].plot(fit_x[:, idx], prediction[:, idx] - np.exp(param[idx]), c=PALLETES[j]) 102 | ax_row[1].scatter(plot_x, plot_y, c=PALLETES[j]) 103 | ax_row[1].plot(fit_x[:, idx], prediction[:, idx], c=PALLETES[j]) 104 | try: 105 | steplaws[model_size][ratio][domain] = {"log_c": param[idx], "log_d0": param[idx+(len(param)-1)//2], "alpha": param[-1]} 106 | except: 107 | if model_size not in steplaws: 108 | steplaws[model_size] = defaultdict(lambda:defaultdict(dict)) 109 | elif ratio not in steplaws[model_size]: 110 | steplaws[model_size][ratio] = defaultdict(dict) 111 | elif domain not in steplaws[model_size][domain]: 112 | steplaws[model_size][ratio][domain] = {} 113 | steplaws[model_size][ratio][domain] = {"log_c": param[idx], "log_d0": param[idx+(len(param)-1)//2], "alpha": param[-1]} 114 | 115 | ax_row[0].set_xscale("log"); ax_row[0].set_yscale("log");ax_row[0].legend();ax_row[0].set_title(ratio) 116 | ax_row[1].set_xscale("log"); ax_row[1].set_yscale("log") 117 | 118 | elif tie_alpha == "valid": 119 | for ax_row, ratio in zip(axes, ratios): 120 | tokens = BSZ * np.array([fit_steps for domain in DOMAINS_2_SUBDOMAINS]).T 121 | loss = np.array([ 122 | [OBSERVED_LOSSES[model_size][ratio][domain][step] for step in fit_steps] 123 | for domain in DOMAINS_2_SUBDOMAINS 124 | ]).T 125 | law = ScalingLaw(power_law_share_alpha) 126 | param = law.fit( 127 | tokens, loss, param_generator_share_alpha(np.min(np.log(loss), axis=0)), 128 | 20, eps=0, workers=-1, delta=5e-2, valid_split=5 129 | ) 130 | fit_x = torch.stack([ 131 | torch.linspace(10000 * BSZ, 1000000 * BSZ, 1000) 132 | for domain in DOMAINS_2_SUBDOMAINS 133 | ], dim=1) 134 | prediction = power_law_share_alpha(torch.tensor(fit_x), torch.tensor(param)) 135 | 136 | for idx, domain in enumerate(DOMAINS_2_SUBDOMAINS): 137 | plot_x = [step * BSZ for step in OBSERVED_LOSSES[model_size][ratio][domain] if step >= step_start] 138 | plot_y = [loss for step, loss in OBSERVED_LOSSES[model_size][ratio][domain].items() if step >= step_start] 139 | ax_row[0].scatter(plot_x, plot_y - np.exp(param[idx]), c=PALLETES[idx], label=domain) 140 | ax_row[0].plot(fit_x[:, idx], prediction[:, idx] - np.exp(param[idx]), c=PALLETES[idx]) 141 | ax_row[1].scatter(plot_x, plot_y, c=PALLETES[idx]) 142 | ax_row[1].plot(fit_x[:, idx], prediction[:, idx], c=PALLETES[idx]) 143 | try: 144 | steplaws[model_size][ratio][domain] = {"log_c": param[idx], "log_d0": param[idx+(len(param)-1)//2], "alpha": param[-1]} 145 | except: 146 | if model_size not in steplaws: 147 | steplaws[model_size] = defaultdict(lambda:defaultdict(dict)) 148 | elif ratio not in steplaws[model_size]: 149 | steplaws[model_size][ratio] = defaultdict(dict) 150 | elif domain not in steplaws[model_size][domain]: 151 | steplaws[model_size][ratio][domain] = {} 152 | steplaws[model_size][ratio][domain] = {"log_c": param[idx], "log_d0": param[idx+(len(param)-1)//2], "alpha": param[-1]} 153 | 154 | ax_row[0].set_xscale("log"); ax_row[0].set_yscale("log");ax_row[0].legend();ax_row[0].set_title(ratio) 155 | ax_row[1].set_xscale("log"); ax_row[1].set_yscale("log") 156 | elif tie_alpha == "none": 157 | pass 158 | else: 159 | raise NotImplementedError 160 | 161 | fig.savefig(savefig) 162 | with open(STEPLAW_FILE, "w") as f: 163 | json.dump(steplaws, f, indent=4) 164 | 165 | if __name__ == '__main__': 166 | fire.Fire(main) -------------------------------------------------------------------------------- /pipeline/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import random 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import numpy as np 8 | from functools import partial 9 | from sklearn.ensemble import AdaBoostRegressor 10 | from sklearn.base import BaseEstimator, RegressorMixin 11 | from tqdm import tqdm 12 | 13 | homedir = os.path.dirname(os.path.abspath(__file__)) 14 | WEIGHT_FILE = "valid_weight.json" 15 | STEPLAW_FILES = { 16 | "RedPajama": f"../data/RedPajama/steplaws.json", 17 | } 18 | SIZELAW_FILES = { 19 | "RedPajama": f"../data/RedPajama/sizelaws.json", 20 | } 21 | MIXLAW_FILES = { 22 | "RedPajama": f"../data/RedPajama/mixlaws.pt", 23 | } 24 | 25 | # DOMAINS_2_SUBDOMAINS = { 26 | # "academic": ["ArXiv", "PubMed_Abstracts", "PhilPapers", "NIH_ExPorter", "FreeLaw", "USPTO_Backgrounds", "PubMed_Central"], 27 | # "prose": ["PG19", "Books3", "BookCorpus2"], 28 | # "dialogue": ["Ubuntu_IRC", "OpenSubtitles", "EuroParl", "Enron_Emails", "HackerNews", "YoutubeSubtitles"], 29 | # "symbolic": ["DM_Mathematics", "Github"], 30 | # "internet": ["Pile-CC", "OpenWebText2", "StackExchange", "Wikipedia_en"], 31 | # } 32 | DOMAINS_2_SUBDOMAINS = { 33 | "subset_0": ["Github", "PubMed_Abstracts", "OpenWebText2", "EuroParl"], 34 | "subset_1": ["Pile-CC", "DM_Mathematics", "Wikipedia_en", "ArXiv", "USPTO_Backgrounds"], 35 | "subset_2": ["HackerNews", "PG19", "PubMed_Central", "Ubuntu_IRC"], 36 | "subset_3": ["PhilPapers", "FreeLaw", "OpenSubtitles", "NIH_ExPorter", "BookCorpus2"], 37 | "subset_4": ["StackExchange", "Books3", "YoutubeSubtitles", "Enron_Emails"] 38 | } 39 | SUBDOMAINS_2_DOMAINS = { 40 | subdomain: domain 41 | for domain, subdomains in DOMAINS_2_SUBDOMAINS.items() for subdomain in subdomains 42 | } 43 | with open(WEIGHT_FILE, "r") as f: 44 | valid_weight = json.load(f) 45 | for domain, subdomains in DOMAINS_2_SUBDOMAINS.items(): 46 | valid_weight[domain] = sum(valid_weight[subdomain] for subdomain in subdomains) 47 | 48 | MODEL_SIZES = { 49 | "70M": 18915328, 50 | "160M": 85056000, 51 | "305M": 201541632, 52 | "410M": 302311424, 53 | "1B": 805736448, 54 | "2.8B": 2517652480 55 | } 56 | # MODEL_SIZES = { 57 | # "70M": 12*6*512*512, 58 | # "160M": 12*12*768*768, 59 | # "305M": 12*16*1024*1024, 60 | # "410M": 12*24*1024*1024, 61 | # "1B": 12*16*2048*2048, 62 | # # "2.8B": 2517652480 63 | # } 64 | MODEL_FLOPS = { 65 | "70M": 6 * (6 * 512 * 512 + 4096 * 512 ), 66 | "160M": 12 * (6 * 768 * 768 + 4096 * 768 ), 67 | "305M": 16 * (6 * 1024 * 1024 + 4096 * 1024 ), 68 | "410M": 24 * (6 * 1024 * 1024 + 4096 * 1024 ), 69 | "1B": 16 * (6 * 2048 * 2048 + 4096 * 2048 ), 70 | "2.8B": 32 * (6 * 2560 * 2560 + 4096 * 2560 ), 71 | } 72 | PALLETES = [ 73 | "#5865f2", "#57f287", "#eb459e", "#ed4245", "#520099", "#2c2f33", 74 | "#4AA2D9", "#D9CE32", "#F2A950", "#F24B4B", "#fd5c63" 75 | ] 76 | BSZ = 4096 * 256 77 | 78 | class ScalingLawWrap: 79 | def __init__(self, func): 80 | self.func = func 81 | 82 | def __call__(self, x, param): 83 | if not isinstance(x, torch.Tensor): 84 | x = torch.tensor(x, dtype=torch.float) 85 | if not isinstance(param, torch.Tensor): 86 | param = torch.tensor(param, dtype=torch.float) 87 | return self.func(x, param) 88 | 89 | # def wrapped_power_law(func, x, param): 90 | # if not isinstance(x, torch.Tensor): 91 | # x = torch.tensor(x) 92 | # if not isinstance(param, torch.Tensor): 93 | # param = torch.tensor(param) 94 | # return func(x, param) 95 | 96 | # def scaling_law_wrap(func): 97 | # return partial(wrapped_power_law, func) 98 | 99 | def power_law_(x, param): 100 | log_c, log_k, alpha = param 101 | return torch.exp(log_c) + torch.exp(alpha * (log_k - torch.log(x))) 102 | power_law = ScalingLawWrap(power_law_) 103 | 104 | def power_law_share_alpha_(x, param): 105 | num_curves = (len(param) - 1) // 2 106 | log_c, log_k, alpha = param[:num_curves], param[num_curves:-1], param[-1] 107 | log_reducible_loss = alpha * (log_k[None, :] - x.log()) 108 | return torch.exp(log_c) + torch.exp(log_reducible_loss) 109 | power_law_share_alpha = ScalingLawWrap(power_law_share_alpha_) 110 | 111 | def mixture_law_(x, param): 112 | log_c, log_k, t = param[0], param[1], param[2:] 113 | return torch.exp(log_c) + torch.exp(log_k + torch.matmul(x, t)) 114 | # def mixture_law_2(x, param): 115 | # c, k, t = param[0], param[1], param[2:] 116 | # return c + k * torch.exp(torch.matmul(x, t)) 117 | mixture_law = ScalingLawWrap(mixture_law_) 118 | 119 | def mixture_law_one_domain(k, x, param): 120 | log_c, log_k, t = param[0], param[1], param[2:] 121 | return torch.exp(log_c) + torch.exp(log_k + x[:, k] * t[k]) 122 | # def mixture_law_2(x, param): 123 | # c, k, t = param[0], param[1], param[2:] 124 | # return c + k * torch.exp(torch.matmul(x, t)) 125 | mixture_law = ScalingLawWrap(mixture_law_) 126 | 127 | 128 | def mixture_law_2_(x, param): 129 | result = 1 130 | c_0, param = param[0], param[1:] 131 | for i in range(len(x[0])): 132 | log_c, log_k, t = param[i*3:(i+1)*3] 133 | result *= torch.exp(log_c) + torch.exp(log_k + t * x[:, i]) 134 | return result + c_0 135 | mixture_law_2 = ScalingLawWrap(mixture_law_2_) 136 | 137 | 138 | def set_seed(seed): 139 | random.seed(seed) 140 | np.random.seed(seed) 141 | torch.manual_seed(seed) 142 | torch.cuda.manual_seed_all(seed) 143 | 144 | class MixtureMLP(nn.Module): 145 | def __init__(self, dim=10, num_mixture=5, activation="exp", bias=None) -> None: 146 | super().__init__() 147 | self.in_linear = nn.Linear(num_mixture, dim) 148 | # for i in range(self.in_linear.weight.data.shape[0]): 149 | # self.in_linear.weight.data[i, i%num_mixture] = -10 * np.abs(self.in_linear.weight.data[i, i%num_mixture]) 150 | self.out_linear = nn.Linear(dim, 1) 151 | self.act = activation 152 | if bias is not None: 153 | self.out_linear.bias = nn.Parameter(torch.tensor([bias], dtype=torch.float)) 154 | # self.dropout = nn.Dropout(0.5) 155 | 156 | def forward(self, x): 157 | x = torch.tensor(x, dtype=torch.float) 158 | h = self.in_linear(x) 159 | if self.act == "exp": 160 | h = torch.exp(h) 161 | elif self.act == "relu": 162 | h = torch.nn.functional.relu(h) 163 | elif self.act == "gelu": 164 | h = torch.nn.functional.gelu(h, approximate="tanh") 165 | elif self.act == "silu": 166 | h = torch.nn.functional.silu(h) 167 | elif self.act == "tanh": 168 | h = torch.nn.functional.tanh(h) 169 | elif self.act == "sigmoid": 170 | h = torch.nn.functional.sigmoid(h) 171 | elif self.act == "softplus": 172 | h = torch.nn.functional.softplus(h) 173 | # return self.out_linear(torch.nn.functional.gelu(self.in_linear(x))) 174 | w = F.softmax(self.out_linear.weight, dim=-1) 175 | # import ipdb;ipdb.set_trace() 176 | return torch.matmul(h, w.T) + self.out_linear.bias 177 | 178 | class MLPEstimator(BaseEstimator, RegressorMixin): 179 | def __init__(self, dim, num_mixture, activation): 180 | # Initialize parameters 181 | self.dim, self.num_mixture, self.activation = dim, num_mixture, activation 182 | self.mlp = MixtureMLP(dim, num_mixture, activation) 183 | self.use_cuda = torch.cuda.is_available() 184 | 185 | def fit(self, X, y): 186 | best_valid_loss, valid_split = 1e10, int(X.shape[0] / 5) 187 | X, y = torch.tensor(X, dtype=torch.float), torch.tensor(y, dtype=torch.float) 188 | self.mlp, X, y = self.mlp.cuda(), X.cuda(), y.cuda() 189 | 190 | optimizer = torch.optim.AdamW(self.mlp.parameters(), lr=0.1, weight_decay=1e-3) 191 | for _ in tqdm(range(5000)): 192 | self.mlp.train() 193 | # loss = optimizer.step(closure) 194 | loss = torch.nn.functional.huber_loss(self.mlp(torch.tensor(X[:-valid_split]).float()).squeeze(), torch.tensor(y[:-valid_split]).float(), delta=0.03) 195 | optimizer.zero_grad() 196 | loss.backward() 197 | optimizer.step() 198 | with torch.no_grad(): 199 | self.mlp.eval() 200 | valid_loss = torch.nn.functional.l1_loss(self.mlp(torch.tensor(X).float()).squeeze(), torch.tensor(y).float()) 201 | # valid_loss = torch.nn.functional.mse_loss(mlp(torch.tensor(x[-5:]).float()).squeeze(), torch.tensor(y[-5:]).float()) 202 | if valid_loss < best_valid_loss: 203 | best_valid_loss = valid_loss 204 | best_param = self.mlp.state_dict() 205 | self.mlp.load_state_dict(best_param) 206 | 207 | @torch.no_grad() 208 | def predict(self, X): 209 | self.mlp.eval() 210 | if self.use_cuda: 211 | self.mlp = self.mlp.cuda() 212 | x = torch.tensor(X, dtype=torch.float, device='cuda') 213 | result = self.mlp(x).squeeze().cpu().numpy() 214 | self.mlp = self.mlp.cpu() 215 | else: 216 | x = torch.tensor(X, dtype=torch.float) 217 | result = self.mlp(x).squeeze().numpy() 218 | return result -------------------------------------------------------------------------------- /pipeline/valid_weight.json: -------------------------------------------------------------------------------- 1 | { 2 | "Github": 0.09529320987654322, "Pile-CC": 0.1570216049382716, "Books3": 0.12654320987654322, "PhilPapers": 0.005015432098765432, 3 | "OpenWebText2": 0.09066358024691358, "PubMed_Abstracts": 0.023919753086419752, "DM_Mathematics": 0.021604938271604937, "PG19": 0.018518518518518517, 4 | "FreeLaw": 0.05439814814814815, "HackerNews": 0.0073302469135802465, "EuroParl": 0.008101851851851851, "OpenSubtitles": 0.015817901234567902, 5 | "PubMed_Central": 0.125, "Wikipedia_en": 0.03742283950617284, "StackExchange": 0.055169753086419755, "BookCorpus2": 0.006944444444444444, 6 | "Enron_Emails": 0.0011574074074074073, "NIH_ExPorter": 0.0019290123456790122, "YoutubeSubtitles": 0.009645061728395061, 7 | "Ubuntu_IRC": 0.008101851851851851, "ArXiv": 0.10185185185185185, "USPTO_Backgrounds": 0.02854938271604938 8 | } 9 | --------------------------------------------------------------------------------