├── README.md ├── analysis_section1.ipynb ├── analysis_section2.ipynb ├── analysis_section3.ipynb ├── create_legends.ipynb ├── images ├── legend.pdf ├── neogpt-legend.pdf ├── section1 │ ├── Decreases-percentage-gutenberg_pg-19.pdf │ ├── Increases-opt_1.3b-FLOPs-gutenberg_pg-19.pdf │ ├── Increases-opt_1.3b-Steps-gutenberg_pg-19.pdf │ ├── Increases-opt_1.3b-ppl-gutenberg_pg-19.pdf │ ├── Increases-opt_1.3b-ppl-gutenberg_pg-19.png │ ├── Increases-percentage-gutenberg_pg-19.pdf │ ├── Stagnates-opt_1.3b-FLOPs-gutenberg_pg-19.pdf │ ├── Stagnates-opt_1.3b-Steps-gutenberg_pg-19.pdf │ ├── Stagnates-opt_1.3b-ppl-gutenberg_pg-19.pdf │ └── Stagnates-percentage-gutenberg_pg-19.pdf ├── section2 │ ├── gpt-neo.png │ └── noise.png ├── section3 │ ├── bigbench__codelinedescription_ppl.pdf │ ├── bigbench__dateunderstanding_ppl.pdf │ ├── bigbench__fantasyreasoning_ppl.pdf │ ├── bigbench__figureofspeechdetection_ppl.pdf │ ├── bigbench__hhhalignment_ppl.pdf │ ├── bigbench__humanorganssenses_ppl.pdf │ ├── bigbench__implicatures_ppl.pdf │ ├── bigbench__implicitrelations_ppl.pdf │ ├── bigbench__intentrecognition_ppl.pdf │ ├── bigbench__misconceptions_ppl.pdf │ ├── bigbench__nonsensewordsgrammar_ppl.pdf │ ├── bigbench__phraserelatedness_ppl.pdf │ ├── bigbench__similaritiesabstraction_ppl.pdf │ ├── bigbench__simpleethicalquestions_ppl.pdf │ ├── bigbench__strangestories_ppl.pdf │ ├── bigbench__swahilienglishproverbs_ppl.pdf │ ├── bigbench__undopermutation_ppl.pdf │ ├── bigbench__whatisthetao_ppl.pdf │ ├── bigbench_all_tasks.pdf │ ├── bigbench_breakthroughness_tasks.pdf │ ├── bigbench_linearity_tasks.pdf │ ├── task_performance.png │ └── what_is_tao_joint.png ├── validation_ppl.pdf ├── validation_ppl_annotated.pdf └── validation_ppl_annotated.png ├── plot_validation_ppl.ipynb └── utils.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/README.md -------------------------------------------------------------------------------- /analysis_section1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/analysis_section1.ipynb -------------------------------------------------------------------------------- /analysis_section2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/analysis_section2.ipynb -------------------------------------------------------------------------------- /analysis_section3.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/analysis_section3.ipynb -------------------------------------------------------------------------------- /create_legends.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/create_legends.ipynb -------------------------------------------------------------------------------- /images/legend.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/legend.pdf -------------------------------------------------------------------------------- /images/neogpt-legend.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/neogpt-legend.pdf -------------------------------------------------------------------------------- /images/section1/Decreases-percentage-gutenberg_pg-19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section1/Decreases-percentage-gutenberg_pg-19.pdf -------------------------------------------------------------------------------- /images/section1/Increases-opt_1.3b-FLOPs-gutenberg_pg-19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section1/Increases-opt_1.3b-FLOPs-gutenberg_pg-19.pdf -------------------------------------------------------------------------------- /images/section1/Increases-opt_1.3b-Steps-gutenberg_pg-19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section1/Increases-opt_1.3b-Steps-gutenberg_pg-19.pdf -------------------------------------------------------------------------------- /images/section1/Increases-opt_1.3b-ppl-gutenberg_pg-19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section1/Increases-opt_1.3b-ppl-gutenberg_pg-19.pdf -------------------------------------------------------------------------------- /images/section1/Increases-opt_1.3b-ppl-gutenberg_pg-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section1/Increases-opt_1.3b-ppl-gutenberg_pg-19.png -------------------------------------------------------------------------------- /images/section1/Increases-percentage-gutenberg_pg-19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section1/Increases-percentage-gutenberg_pg-19.pdf -------------------------------------------------------------------------------- /images/section1/Stagnates-opt_1.3b-FLOPs-gutenberg_pg-19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section1/Stagnates-opt_1.3b-FLOPs-gutenberg_pg-19.pdf -------------------------------------------------------------------------------- /images/section1/Stagnates-opt_1.3b-Steps-gutenberg_pg-19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section1/Stagnates-opt_1.3b-Steps-gutenberg_pg-19.pdf -------------------------------------------------------------------------------- /images/section1/Stagnates-opt_1.3b-ppl-gutenberg_pg-19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section1/Stagnates-opt_1.3b-ppl-gutenberg_pg-19.pdf -------------------------------------------------------------------------------- /images/section1/Stagnates-percentage-gutenberg_pg-19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section1/Stagnates-percentage-gutenberg_pg-19.pdf -------------------------------------------------------------------------------- /images/section2/gpt-neo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section2/gpt-neo.png -------------------------------------------------------------------------------- /images/section2/noise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section2/noise.png -------------------------------------------------------------------------------- /images/section3/bigbench__codelinedescription_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__codelinedescription_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__dateunderstanding_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__dateunderstanding_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__fantasyreasoning_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__fantasyreasoning_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__figureofspeechdetection_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__figureofspeechdetection_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__hhhalignment_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__hhhalignment_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__humanorganssenses_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__humanorganssenses_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__implicatures_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__implicatures_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__implicitrelations_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__implicitrelations_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__intentrecognition_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__intentrecognition_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__misconceptions_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__misconceptions_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__nonsensewordsgrammar_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__nonsensewordsgrammar_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__phraserelatedness_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__phraserelatedness_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__similaritiesabstraction_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__similaritiesabstraction_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__simpleethicalquestions_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__simpleethicalquestions_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__strangestories_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__strangestories_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__swahilienglishproverbs_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__swahilienglishproverbs_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__undopermutation_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__undopermutation_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench__whatisthetao_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench__whatisthetao_ppl.pdf -------------------------------------------------------------------------------- /images/section3/bigbench_all_tasks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench_all_tasks.pdf -------------------------------------------------------------------------------- /images/section3/bigbench_breakthroughness_tasks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench_breakthroughness_tasks.pdf -------------------------------------------------------------------------------- /images/section3/bigbench_linearity_tasks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/bigbench_linearity_tasks.pdf -------------------------------------------------------------------------------- /images/section3/task_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/task_performance.png -------------------------------------------------------------------------------- /images/section3/what_is_tao_joint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/section3/what_is_tao_joint.png -------------------------------------------------------------------------------- /images/validation_ppl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/validation_ppl.pdf -------------------------------------------------------------------------------- /images/validation_ppl_annotated.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/validation_ppl_annotated.pdf -------------------------------------------------------------------------------- /images/validation_ppl_annotated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/images/validation_ppl_annotated.png -------------------------------------------------------------------------------- /plot_validation_ppl.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/plot_validation_ppl.ipynb -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiamengzhou/training_trajectory_analysis/HEAD/utils.py --------------------------------------------------------------------------------