├── .DS_Store ├── =1.0.0 ├── README.md ├── __init__.py ├── agents ├── .ipynb_checkpoints │ └── rl_agent-checkpoint.py ├── __pycache__ │ └── rl_agent.cpython-312.pyc └── rl_agent.py ├── configs ├── .ipynb_checkpoints │ ├── deepseek-checkpoint.yaml │ └── reasoner-checkpoint.yaml ├── deepseek.yaml └── reasoner.yaml ├── core ├── .DS_Store ├── .ipynb_checkpoints │ ├── model-checkpoint.py │ ├── replay_buffer-checkpoint.py │ ├── rewards-checkpoint.py │ ├── search-checkpoint.py │ └── tree-checkpoint.py ├── __init__.py ├── model.py ├── replay_buffer.py ├── rewards.py ├── search.py └── tree.py ├── data_collected.jsonl ├── deepseek-rl-reasoning ├── .DS_Store ├── .ipynb_checkpoints │ ├── Untitled-checkpoint.ipynb │ ├── base_model-checkpoint.py │ ├── config-checkpoint.py │ ├── data_test-checkpoint.py │ ├── evaluate-checkpoint.py │ ├── prompt_gen-checkpoint.py │ ├── rl_framework-checkpoint.py │ ├── test-checkpoint.py │ └── train-checkpoint.py ├── Untitled.ipynb ├── base_model.py ├── config.py ├── data │ ├── .ipynb_checkpoints │ │ ├── differentiated_prompts_zh-checkpoint.json │ │ ├── reasoning_data_zh0-checkpoint.json │ │ ├── test-checkpoint.json │ │ └── train-checkpoint.json │ ├── differentiated_prompts_zh.json │ ├── reasoning_data_zh0.json │ ├── test.json │ └── train.json ├── data_test.py ├── evaluate.py ├── prompt_gen.py ├── rl_framework.py ├── test.py └── train.py ├── inference_rl.py ├── langGraphPratic0 └── langGraphPratic0 │ ├── deepReseachDemo.ipynb │ ├── langGraph2-人在流程.ipynb │ ├── langGraph基础介绍0.ipynb │ ├── langGraph课程1-带记忆功能.ipynb │ ├── langGraph课程3定制消息格式.ipynb │ ├── langGraph课程4对话版本分支管理.ipynb │ └── langGraph课程5多种Agent模式.ipynb ├── requirements.txt ├── rl_policy.pth ├── run_inference.py ├── tasks ├── .ipynb_checkpoints │ └── math_solver-checkpoint.py ├── __pycache__ │ └── math_solver.cpython-312.pyc └── math_solver.py ├── train_rl.py └── validate_rl.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/.DS_Store -------------------------------------------------------------------------------- /=1.0.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/=1.0.0 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/README.md -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agents/.ipynb_checkpoints/rl_agent-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/agents/.ipynb_checkpoints/rl_agent-checkpoint.py -------------------------------------------------------------------------------- /agents/__pycache__/rl_agent.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/agents/__pycache__/rl_agent.cpython-312.pyc -------------------------------------------------------------------------------- /agents/rl_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/agents/rl_agent.py -------------------------------------------------------------------------------- /configs/.ipynb_checkpoints/deepseek-checkpoint.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/configs/.ipynb_checkpoints/deepseek-checkpoint.yaml -------------------------------------------------------------------------------- /configs/.ipynb_checkpoints/reasoner-checkpoint.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/configs/.ipynb_checkpoints/reasoner-checkpoint.yaml -------------------------------------------------------------------------------- /configs/deepseek.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/configs/deepseek.yaml -------------------------------------------------------------------------------- /configs/reasoner.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/configs/reasoner.yaml -------------------------------------------------------------------------------- /core/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/.DS_Store -------------------------------------------------------------------------------- /core/.ipynb_checkpoints/model-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/.ipynb_checkpoints/model-checkpoint.py -------------------------------------------------------------------------------- /core/.ipynb_checkpoints/replay_buffer-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/.ipynb_checkpoints/replay_buffer-checkpoint.py -------------------------------------------------------------------------------- /core/.ipynb_checkpoints/rewards-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/.ipynb_checkpoints/rewards-checkpoint.py -------------------------------------------------------------------------------- /core/.ipynb_checkpoints/search-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/.ipynb_checkpoints/search-checkpoint.py -------------------------------------------------------------------------------- /core/.ipynb_checkpoints/tree-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/.ipynb_checkpoints/tree-checkpoint.py -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/model.py -------------------------------------------------------------------------------- /core/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/replay_buffer.py -------------------------------------------------------------------------------- /core/rewards.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/rewards.py -------------------------------------------------------------------------------- /core/search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/search.py -------------------------------------------------------------------------------- /core/tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/core/tree.py -------------------------------------------------------------------------------- /data_collected.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/data_collected.jsonl -------------------------------------------------------------------------------- /deepseek-rl-reasoning/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/.DS_Store -------------------------------------------------------------------------------- /deepseek-rl-reasoning/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/.ipynb_checkpoints/Untitled-checkpoint.ipynb -------------------------------------------------------------------------------- /deepseek-rl-reasoning/.ipynb_checkpoints/base_model-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/.ipynb_checkpoints/base_model-checkpoint.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/.ipynb_checkpoints/config-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/.ipynb_checkpoints/config-checkpoint.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/.ipynb_checkpoints/data_test-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/.ipynb_checkpoints/data_test-checkpoint.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/.ipynb_checkpoints/evaluate-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/.ipynb_checkpoints/evaluate-checkpoint.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/.ipynb_checkpoints/prompt_gen-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/.ipynb_checkpoints/prompt_gen-checkpoint.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/.ipynb_checkpoints/rl_framework-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/.ipynb_checkpoints/rl_framework-checkpoint.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/.ipynb_checkpoints/test-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/.ipynb_checkpoints/test-checkpoint.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/.ipynb_checkpoints/train-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/.ipynb_checkpoints/train-checkpoint.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/Untitled.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/Untitled.ipynb -------------------------------------------------------------------------------- /deepseek-rl-reasoning/base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/base_model.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/config.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/data/.ipynb_checkpoints/differentiated_prompts_zh-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/data/.ipynb_checkpoints/differentiated_prompts_zh-checkpoint.json -------------------------------------------------------------------------------- /deepseek-rl-reasoning/data/.ipynb_checkpoints/reasoning_data_zh0-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/data/.ipynb_checkpoints/reasoning_data_zh0-checkpoint.json -------------------------------------------------------------------------------- /deepseek-rl-reasoning/data/.ipynb_checkpoints/test-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/data/.ipynb_checkpoints/test-checkpoint.json -------------------------------------------------------------------------------- /deepseek-rl-reasoning/data/.ipynb_checkpoints/train-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/data/.ipynb_checkpoints/train-checkpoint.json -------------------------------------------------------------------------------- /deepseek-rl-reasoning/data/differentiated_prompts_zh.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/data/differentiated_prompts_zh.json -------------------------------------------------------------------------------- /deepseek-rl-reasoning/data/reasoning_data_zh0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/data/reasoning_data_zh0.json -------------------------------------------------------------------------------- /deepseek-rl-reasoning/data/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/data/test.json -------------------------------------------------------------------------------- /deepseek-rl-reasoning/data/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/data/train.json -------------------------------------------------------------------------------- /deepseek-rl-reasoning/data_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/data_test.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/evaluate.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/prompt_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/prompt_gen.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/rl_framework.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/rl_framework.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/test.py -------------------------------------------------------------------------------- /deepseek-rl-reasoning/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/deepseek-rl-reasoning/train.py -------------------------------------------------------------------------------- /inference_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/inference_rl.py -------------------------------------------------------------------------------- /langGraphPratic0/langGraphPratic0/deepReseachDemo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/langGraphPratic0/langGraphPratic0/deepReseachDemo.ipynb -------------------------------------------------------------------------------- /langGraphPratic0/langGraphPratic0/langGraph2-人在流程.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/langGraphPratic0/langGraphPratic0/langGraph2-人在流程.ipynb -------------------------------------------------------------------------------- /langGraphPratic0/langGraphPratic0/langGraph基础介绍0.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/langGraphPratic0/langGraphPratic0/langGraph基础介绍0.ipynb -------------------------------------------------------------------------------- /langGraphPratic0/langGraphPratic0/langGraph课程1-带记忆功能.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/langGraphPratic0/langGraphPratic0/langGraph课程1-带记忆功能.ipynb -------------------------------------------------------------------------------- /langGraphPratic0/langGraphPratic0/langGraph课程3定制消息格式.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/langGraphPratic0/langGraphPratic0/langGraph课程3定制消息格式.ipynb -------------------------------------------------------------------------------- /langGraphPratic0/langGraphPratic0/langGraph课程4对话版本分支管理.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/langGraphPratic0/langGraphPratic0/langGraph课程4对话版本分支管理.ipynb -------------------------------------------------------------------------------- /langGraphPratic0/langGraphPratic0/langGraph课程5多种Agent模式.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/langGraphPratic0/langGraphPratic0/langGraph课程5多种Agent模式.ipynb -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/requirements.txt -------------------------------------------------------------------------------- /rl_policy.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/rl_policy.pth -------------------------------------------------------------------------------- /run_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/run_inference.py -------------------------------------------------------------------------------- /tasks/.ipynb_checkpoints/math_solver-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/tasks/.ipynb_checkpoints/math_solver-checkpoint.py -------------------------------------------------------------------------------- /tasks/__pycache__/math_solver.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/tasks/__pycache__/math_solver.cpython-312.pyc -------------------------------------------------------------------------------- /tasks/math_solver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/tasks/math_solver.py -------------------------------------------------------------------------------- /train_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/train_rl.py -------------------------------------------------------------------------------- /validate_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liangwq/deepSeekRecurrence/HEAD/validate_rl.py --------------------------------------------------------------------------------