├── .github └── .gitkeep ├── .gitignore ├── LICENSE ├── README.md ├── assets └── title.png ├── examples ├── __init__.py ├── data │ ├── gsm8k.jsonl │ ├── gsm8k.nano │ ├── gsm8k_len.json │ └── gsm8k_len.jsonl ├── kernels.py ├── nanoray.py ├── nanosets.py └── nanotron.py ├── nanorlhf ├── __init__.py ├── kernels │ ├── __init__.py │ ├── api.py │ ├── flash_attn │ │ ├── __init__.py │ │ ├── bwd.py │ │ ├── fwd.py │ │ └── ops.py │ ├── flash_attn_varlen │ │ ├── __init__.py │ │ ├── bwd.py │ │ ├── fwd.py │ │ └── ops.py │ ├── patch.py │ ├── rmsnorm │ │ ├── __init__.py │ │ ├── bwd.py │ │ ├── fwd.py │ │ └── ops.py │ ├── rotary │ │ ├── __init__.py │ │ └── ops.py │ └── utils │ │ ├── __init__.py │ │ ├── huggingface.py │ │ └── padding.py ├── nanoray │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── initialization.py │ │ ├── remote.py │ │ └── session.py │ ├── core │ │ ├── __init__.py │ │ ├── actor.py │ │ ├── object_ref.py │ │ ├── object_store.py │ │ ├── placement.py │ │ ├── runtime_env.py │ │ ├── serialization.py │ │ └── task.py │ ├── network │ │ ├── __init__.py │ │ ├── proxy.py │ │ ├── router.py │ │ ├── rpc_client.py │ │ └── rpc_server.py │ ├── runtime │ │ ├── __init__.py │ │ ├── process_pool.py │ │ └── worker.py │ ├── scheduler │ │ ├── __init__.py │ │ ├── node_state.py │ │ ├── policies.py │ │ └── scheduler.py │ └── utils.py ├── nanorlhf │ └── __init__.py ├── nanosets │ ├── __init__.py │ ├── api.py │ ├── core │ │ ├── __init__.py │ │ ├── bitmap.py │ │ └── buffer.py │ ├── dtype │ │ ├── __init__.py │ │ ├── array.py │ │ ├── dtype.py │ │ ├── dtype_inference.py │ │ ├── list_array.py │ │ ├── primitive_array.py │ │ ├── string_array.py │ │ ├── struct_array.py │ │ └── tensor_array.py │ ├── io │ │ ├── __init__.py │ │ ├── ipc.py │ │ └── json_io.py │ ├── table │ │ ├── __init__.py │ │ ├── field.py │ │ ├── record_batch.py │ │ ├── schema.py │ │ └── table.py │ └── utils.py ├── nanotron │ ├── __init__.py │ ├── api.py │ ├── core │ │ ├── __init__.py │ │ ├── dp │ │ │ ├── __init__.py │ │ │ ├── engine.py │ │ │ ├── grad.py │ │ │ └── optim.py │ │ ├── pp │ │ │ ├── __init__.py │ │ │ ├── buffer.py │ │ │ ├── engine.py │ │ │ ├── loss.py │ │ │ └── utils.py │ │ └── tp │ │ │ ├── __init__.py │ │ │ ├── engine.py │ │ │ ├── loss.py │ │ │ ├── modules.py │ │ │ └── ops.py │ ├── distributed │ │ ├── __init__.py │ │ ├── collectives.py │ │ ├── initializers.py │ │ ├── mode.py │ │ ├── mpu.py │ │ ├── p2p.py │ │ └── seed.py │ └── utils │ │ ├── __init__.py │ │ ├── checkpoint.py │ │ ├── huggingface.py │ │ ├── snapshot.py │ │ ├── tracing.py │ │ └── wrapping.py └── nanovllm │ ├── __init__.py │ ├── api.py │ ├── core │ └── __init__.py │ └── utils │ └── __init__.py ├── pyproject.toml ├── requirements.txt ├── scripts ├── prepare_rl_dataset.sh ├── prepare_sft_dataset.sh ├── train_rl.sh └── train_sft.sh └── tests ├── __init__.py ├── kernels ├── __init__.py ├── flash_attn_huggingface_test.py ├── flash_attn_test.py └── flash_attn_varlen_test.py ├── nanoray ├── __init__.py ├── actor_test.py ├── actor_vs_task_test.py ├── basic_test.py ├── pg_test.py ├── policy_test.py ├── rl_like_test.py ├── rpc_client.py └── rpc_server.py ├── nanosets ├── __init__.py ├── api_test.py ├── array_test.py ├── bench_test.py ├── data │ ├── api_test │ │ ├── hf_sample.jsonl │ │ └── ns_roundtrip.jsonl │ ├── bench_test │ │ ├── data.json │ │ ├── data.jsonl │ │ └── table.nano │ ├── ipc_test │ │ └── table.nano │ └── json_test │ │ ├── data.json │ │ └── data.jsonl ├── ipc_test.py └── json_test.py └── nanotron ├── ckpt.py ├── ckpt.sh ├── losses ├── loss_overlay.png ├── loss_tp1_pp1_dp1_stg0.json ├── loss_tp1_pp1_dp4_stg1.json ├── loss_tp1_pp1_dp4_stg2.json ├── loss_tp1_pp1_dp4_stg3.json ├── loss_tp1_pp2_dp2_stg0.json ├── loss_tp1_pp2_dp2_stg1.json ├── loss_tp1_pp4_dp1_stg0.json ├── loss_tp2_pp1_dp2_stg0.json ├── loss_tp2_pp1_dp2_stg1.json ├── loss_tp2_pp1_dp2_stg2.json ├── loss_tp2_pp2_dp1_stg0.json ├── loss_tp2_pp2_dp2_stg1.json └── loss_tp4_pp1_dp1_stg0.json ├── plot.py ├── train.py └── train.sh /.github/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/README.md -------------------------------------------------------------------------------- /assets/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/assets/title.png -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/data/gsm8k.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/examples/data/gsm8k.jsonl -------------------------------------------------------------------------------- /examples/data/gsm8k.nano: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/examples/data/gsm8k.nano -------------------------------------------------------------------------------- /examples/data/gsm8k_len.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/examples/data/gsm8k_len.json -------------------------------------------------------------------------------- /examples/data/gsm8k_len.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/examples/data/gsm8k_len.jsonl -------------------------------------------------------------------------------- /examples/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/examples/kernels.py -------------------------------------------------------------------------------- /examples/nanoray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/examples/nanoray.py -------------------------------------------------------------------------------- /examples/nanosets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/examples/nanosets.py -------------------------------------------------------------------------------- /examples/nanotron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/examples/nanotron.py -------------------------------------------------------------------------------- /nanorlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/__init__.py -------------------------------------------------------------------------------- /nanorlhf/kernels/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/api.py -------------------------------------------------------------------------------- /nanorlhf/kernels/flash_attn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/kernels/flash_attn/bwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/flash_attn/bwd.py -------------------------------------------------------------------------------- /nanorlhf/kernels/flash_attn/fwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/flash_attn/fwd.py -------------------------------------------------------------------------------- /nanorlhf/kernels/flash_attn/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/flash_attn/ops.py -------------------------------------------------------------------------------- /nanorlhf/kernels/flash_attn_varlen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/kernels/flash_attn_varlen/bwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/flash_attn_varlen/bwd.py -------------------------------------------------------------------------------- /nanorlhf/kernels/flash_attn_varlen/fwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/flash_attn_varlen/fwd.py -------------------------------------------------------------------------------- /nanorlhf/kernels/flash_attn_varlen/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/flash_attn_varlen/ops.py -------------------------------------------------------------------------------- /nanorlhf/kernels/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/patch.py -------------------------------------------------------------------------------- /nanorlhf/kernels/rmsnorm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/kernels/rmsnorm/bwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/rmsnorm/bwd.py -------------------------------------------------------------------------------- /nanorlhf/kernels/rmsnorm/fwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/rmsnorm/fwd.py -------------------------------------------------------------------------------- /nanorlhf/kernels/rmsnorm/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/rmsnorm/ops.py -------------------------------------------------------------------------------- /nanorlhf/kernels/rotary/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/kernels/rotary/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/rotary/ops.py -------------------------------------------------------------------------------- /nanorlhf/kernels/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/kernels/utils/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/utils/huggingface.py -------------------------------------------------------------------------------- /nanorlhf/kernels/utils/padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/kernels/utils/padding.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/__init__.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanoray/api/initialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/api/initialization.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/api/remote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/api/remote.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/api/session.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/api/session.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanoray/core/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/core/actor.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/core/object_ref.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/core/object_ref.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/core/object_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/core/object_store.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/core/placement.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/core/placement.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/core/runtime_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/core/runtime_env.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/core/serialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/core/serialization.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/core/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/core/task.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanoray/network/proxy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/network/proxy.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/network/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/network/router.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/network/rpc_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/network/rpc_client.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/network/rpc_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/network/rpc_server.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/runtime/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanoray/runtime/process_pool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/runtime/process_pool.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/runtime/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/runtime/worker.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanoray/scheduler/node_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/scheduler/node_state.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/scheduler/policies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/scheduler/policies.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/scheduler/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/scheduler/scheduler.py -------------------------------------------------------------------------------- /nanorlhf/nanoray/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanoray/utils.py -------------------------------------------------------------------------------- /nanorlhf/nanorlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanosets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/__init__.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/api.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanosets/core/bitmap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/core/bitmap.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/core/buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/core/buffer.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/dtype/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanosets/dtype/array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/dtype/array.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/dtype/dtype.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/dtype/dtype.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/dtype/dtype_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/dtype/dtype_inference.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/dtype/list_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/dtype/list_array.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/dtype/primitive_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/dtype/primitive_array.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/dtype/string_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/dtype/string_array.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/dtype/struct_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/dtype/struct_array.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/dtype/tensor_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/dtype/tensor_array.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/io/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanosets/io/ipc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/io/ipc.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/io/json_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/io/json_io.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/table/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanosets/table/field.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/table/field.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/table/record_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/table/record_batch.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/table/schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/table/schema.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/table/table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/table/table.py -------------------------------------------------------------------------------- /nanorlhf/nanosets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanosets/utils.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/__init__.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/api.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/dp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/dp/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/dp/engine.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/dp/grad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/dp/grad.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/dp/optim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/dp/optim.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/pp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/pp/buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/pp/buffer.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/pp/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/pp/engine.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/pp/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/pp/loss.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/pp/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/pp/utils.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/tp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/tp/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/tp/engine.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/tp/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/tp/loss.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/tp/modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/tp/modules.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/core/tp/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/core/tp/ops.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/distributed/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanotron/distributed/collectives.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/distributed/collectives.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/distributed/initializers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/distributed/initializers.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/distributed/mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/distributed/mode.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/distributed/mpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/distributed/mpu.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/distributed/p2p.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/distributed/p2p.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/distributed/seed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/distributed/seed.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanotron/utils/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/utils/checkpoint.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/utils/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/utils/huggingface.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/utils/snapshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/utils/snapshot.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/utils/tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/utils/tracing.py -------------------------------------------------------------------------------- /nanorlhf/nanotron/utils/wrapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/nanorlhf/nanotron/utils/wrapping.py -------------------------------------------------------------------------------- /nanorlhf/nanovllm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanovllm/api.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanovllm/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nanorlhf/nanovllm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | triton 3 | cloudpickle 4 | zstandard -------------------------------------------------------------------------------- /scripts/prepare_rl_dataset.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/prepare_sft_dataset.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/train_rl.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/train_sft.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/kernels/flash_attn_huggingface_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/kernels/flash_attn_huggingface_test.py -------------------------------------------------------------------------------- /tests/kernels/flash_attn_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/kernels/flash_attn_test.py -------------------------------------------------------------------------------- /tests/kernels/flash_attn_varlen_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/kernels/flash_attn_varlen_test.py -------------------------------------------------------------------------------- /tests/nanoray/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/nanoray/actor_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanoray/actor_test.py -------------------------------------------------------------------------------- /tests/nanoray/actor_vs_task_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanoray/actor_vs_task_test.py -------------------------------------------------------------------------------- /tests/nanoray/basic_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanoray/basic_test.py -------------------------------------------------------------------------------- /tests/nanoray/pg_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanoray/pg_test.py -------------------------------------------------------------------------------- /tests/nanoray/policy_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanoray/policy_test.py -------------------------------------------------------------------------------- /tests/nanoray/rl_like_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanoray/rl_like_test.py -------------------------------------------------------------------------------- /tests/nanoray/rpc_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanoray/rpc_client.py -------------------------------------------------------------------------------- /tests/nanoray/rpc_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanoray/rpc_server.py -------------------------------------------------------------------------------- /tests/nanosets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/nanosets/api_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/api_test.py -------------------------------------------------------------------------------- /tests/nanosets/array_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/array_test.py -------------------------------------------------------------------------------- /tests/nanosets/bench_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/bench_test.py -------------------------------------------------------------------------------- /tests/nanosets/data/api_test/hf_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/data/api_test/hf_sample.jsonl -------------------------------------------------------------------------------- /tests/nanosets/data/api_test/ns_roundtrip.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/data/api_test/ns_roundtrip.jsonl -------------------------------------------------------------------------------- /tests/nanosets/data/bench_test/data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/data/bench_test/data.json -------------------------------------------------------------------------------- /tests/nanosets/data/bench_test/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/data/bench_test/data.jsonl -------------------------------------------------------------------------------- /tests/nanosets/data/bench_test/table.nano: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/data/bench_test/table.nano -------------------------------------------------------------------------------- /tests/nanosets/data/ipc_test/table.nano: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/data/ipc_test/table.nano -------------------------------------------------------------------------------- /tests/nanosets/data/json_test/data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/data/json_test/data.json -------------------------------------------------------------------------------- /tests/nanosets/data/json_test/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/data/json_test/data.jsonl -------------------------------------------------------------------------------- /tests/nanosets/ipc_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/ipc_test.py -------------------------------------------------------------------------------- /tests/nanosets/json_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanosets/json_test.py -------------------------------------------------------------------------------- /tests/nanotron/ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/ckpt.py -------------------------------------------------------------------------------- /tests/nanotron/ckpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/ckpt.sh -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_overlay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_overlay.png -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp1_pp1_dp1_stg0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp1_pp1_dp1_stg0.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp1_pp1_dp4_stg1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp1_pp1_dp4_stg1.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp1_pp1_dp4_stg2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp1_pp1_dp4_stg2.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp1_pp1_dp4_stg3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp1_pp1_dp4_stg3.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp1_pp2_dp2_stg0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp1_pp2_dp2_stg0.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp1_pp2_dp2_stg1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp1_pp2_dp2_stg1.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp1_pp4_dp1_stg0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp1_pp4_dp1_stg0.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp2_pp1_dp2_stg0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp2_pp1_dp2_stg0.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp2_pp1_dp2_stg1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp2_pp1_dp2_stg1.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp2_pp1_dp2_stg2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp2_pp1_dp2_stg2.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp2_pp2_dp1_stg0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp2_pp2_dp1_stg0.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp2_pp2_dp2_stg1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp2_pp2_dp2_stg1.json -------------------------------------------------------------------------------- /tests/nanotron/losses/loss_tp4_pp1_dp1_stg0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/losses/loss_tp4_pp1_dp1_stg0.json -------------------------------------------------------------------------------- /tests/nanotron/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/plot.py -------------------------------------------------------------------------------- /tests/nanotron/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/train.py -------------------------------------------------------------------------------- /tests/nanotron/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyunwoongko/nanoRLHF/HEAD/tests/nanotron/train.sh --------------------------------------------------------------------------------