├── 1.model.ipynb ├── 1.model_gemma2.ipynb ├── 2.actor.ipynb ├── 3.critic.ipynb ├── 4.rlhf.ipynb ├── 5.test.ipynb ├── README.md ├── dataset ├── eval.json └── train.json ├── model └── save_models_here ├── tokenizer ├── google │ └── gemma-2-2b-it │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer.model │ │ └── tokenizer_config.json └── meta-llama │ └── Meta-Llama-3-8B │ ├── special_tokens_map.json │ ├── tokenizer.json │ └── tokenizer_config.json └── util.py /1.model.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/1.model.ipynb -------------------------------------------------------------------------------- /1.model_gemma2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/1.model_gemma2.ipynb -------------------------------------------------------------------------------- /2.actor.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/2.actor.ipynb -------------------------------------------------------------------------------- /3.critic.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/3.critic.ipynb -------------------------------------------------------------------------------- /4.rlhf.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/4.rlhf.ipynb -------------------------------------------------------------------------------- /5.test.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/5.test.ipynb -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/README.md -------------------------------------------------------------------------------- /dataset/eval.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/dataset/eval.json -------------------------------------------------------------------------------- /dataset/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/dataset/train.json -------------------------------------------------------------------------------- /model/save_models_here: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tokenizer/google/gemma-2-2b-it/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/tokenizer/google/gemma-2-2b-it/special_tokens_map.json -------------------------------------------------------------------------------- /tokenizer/google/gemma-2-2b-it/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/tokenizer/google/gemma-2-2b-it/tokenizer.json -------------------------------------------------------------------------------- /tokenizer/google/gemma-2-2b-it/tokenizer.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/tokenizer/google/gemma-2-2b-it/tokenizer.model -------------------------------------------------------------------------------- /tokenizer/google/gemma-2-2b-it/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/tokenizer/google/gemma-2-2b-it/tokenizer_config.json -------------------------------------------------------------------------------- /tokenizer/meta-llama/Meta-Llama-3-8B/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/tokenizer/meta-llama/Meta-Llama-3-8B/special_tokens_map.json -------------------------------------------------------------------------------- /tokenizer/meta-llama/Meta-Llama-3-8B/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/tokenizer/meta-llama/Meta-Llama-3-8B/tokenizer.json -------------------------------------------------------------------------------- /tokenizer/meta-llama/Meta-Llama-3-8B/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/tokenizer/meta-llama/Meta-Llama-3-8B/tokenizer_config.json -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_RLHF_Llama3/HEAD/util.py --------------------------------------------------------------------------------