├── README.md
├── ReasoningTrack
    ├── Evaluation
    │   ├── dutrack.py
    │   ├── i2d.py
    │   └── sample0001
    │   │   ├── data.json
    │   │   ├── img1.jpg
    │   │   └── img2.jpg
    ├── Reinforcement Learning
    │   ├── LICENSE
    │   ├── demo3.py
    │   ├── environment.yml
    │   ├── examples
    │   │   ├── aqwen2_5_vl_3b_fulldataset_grpo.sh
    │   │   ├── aqwen2_5_vl_3b_tnl2k_grpo.sh
    │   │   ├── baselines
    │   │   │   ├── qwen2_5_vl_3b_clevr.sh
    │   │   │   └── qwen2_5_vl_3b_geoqa8k.sh
    │   │   ├── bqwen2_5_vl_3b_tnl2k_grpo copy.sh
    │   │   ├── config.yaml
    │   │   ├── format_prompt
    │   │   │   ├── LTracking_format.jinja
    │   │   │   ├── math_format.jinja
    │   │   │   └── r1v_format.jinja
    │   │   ├── qwen0.5b_math_grpo.sh
    │   │   ├── qwen2_5_7b_math_grpo.sh
    │   │   ├── qwen2_5_vl_32b_geo3k_grpo.sh
    │   │   ├── qwen2_5_vl_3b_geo3k_grpo.sh
    │   │   ├── qwen2_5_vl_7b_geo3k_grpo.sh
    │   │   ├── qwen2_5_vl_7b_geo3k_reinforce.sh
    │   │   ├── qwen2_5_vl_7b_geo3k_swanlab.sh
    │   │   ├── qwen2_5_vl_7b_multi_image.sh
    │   │   ├── qwen3_4b_math_grpo.sh
    │   │   ├── reward_function
    │   │   │   ├── __pycache__
    │   │   │   │   ├── math.cpython-310.pyc
    │   │   │   │   └── track.cpython-310.pyc
    │   │   │   ├── demo_results
    │   │   │   │   ├── initial_frame.jpg
    │   │   │   │   └── tracking_result.jpg
    │   │   │   ├── mathreward.py
    │   │   │   ├── r1v.py
    │   │   │   └── track.py
    │   │   └── runtime_env.yaml
    │   ├── run_full.sh
    │   ├── scripts
    │   │   └── model_merger.py
    │   ├── startvllm.sh
    │   └── verl
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-310.pyc
    │   │       └── protocol.cpython-310.pyc
    │   │   ├── models
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-310.pyc
    │   │       │   └── monkey_patch.cpython-310.pyc
    │   │       ├── monkey_patch.py
    │   │       └── transformers
    │   │       │   ├── __init__.py
    │   │       │   ├── __pycache__
    │   │       │       ├── __init__.cpython-310.pyc
    │   │       │       ├── flash_attention_utils.cpython-310.pyc
    │   │       │       └── qwen2_vl.cpython-310.pyc
    │   │       │   ├── flash_attention_utils.py
    │   │       │   └── qwen2_vl.py
    │   │   ├── protocol.py
    │   │   ├── single_controller
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   └── __init__.cpython-310.pyc
    │   │       ├── base
    │   │       │   ├── __init__.py
    │   │       │   ├── __pycache__
    │   │       │   │   ├── __init__.cpython-310.pyc
    │   │       │   │   ├── decorator.cpython-310.pyc
    │   │       │   │   ├── worker.cpython-310.pyc
    │   │       │   │   └── worker_group.cpython-310.pyc
    │   │       │   ├── decorator.py
    │   │       │   ├── register_center
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── __pycache__
    │   │       │   │   │   ├── __init__.cpython-310.pyc
    │   │       │   │   │   └── ray.cpython-310.pyc
    │   │       │   │   └── ray.py
    │   │       │   ├── worker.py
    │   │       │   └── worker_group.py
    │   │       └── ray
    │   │       │   ├── __init__.py
    │   │       │   ├── __pycache__
    │   │       │       ├── __init__.cpython-310.pyc
    │   │       │       └── base.cpython-310.pyc
    │   │       │   └── base.py
    │   │   ├── trainer
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-310.pyc
    │   │       │   ├── config.cpython-310.pyc
    │   │       │   ├── core_algos.cpython-310.pyc
    │   │       │   ├── data_loader.cpython-310.pyc
    │   │       │   ├── main.cpython-310.pyc
    │   │       │   ├── metrics.cpython-310.pyc
    │   │       │   └── ray_trainer.cpython-310.pyc
    │   │       ├── config.py
    │   │       ├── core_algos.py
    │   │       ├── data_loader.py
    │   │       ├── main.py
    │   │       ├── metrics.py
    │   │       └── ray_trainer.py
    │   │   ├── utils
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-310.pyc
    │   │       │   ├── dataset.cpython-310.pyc
    │   │       │   ├── flops_counter.cpython-310.pyc
    │   │       │   ├── fsdp_utils.cpython-310.pyc
    │   │       │   ├── model_utils.cpython-310.pyc
    │   │       │   ├── py_functional.cpython-310.pyc
    │   │       │   ├── seqlen_balancing.cpython-310.pyc
    │   │       │   ├── tokenizer.cpython-310.pyc
    │   │       │   ├── torch_dtypes.cpython-310.pyc
    │   │       │   ├── torch_functional.cpython-310.pyc
    │   │       │   └── ulysses.cpython-310.pyc
    │   │       ├── checkpoint
    │   │       │   ├── __init__.py
    │   │       │   ├── __pycache__
    │   │       │   │   ├── __init__.cpython-310.pyc
    │   │       │   │   ├── checkpoint_manager.cpython-310.pyc
    │   │       │   │   └── fsdp_checkpoint_manager.cpython-310.pyc
    │   │       │   ├── checkpoint_manager.py
    │   │       │   └── fsdp_checkpoint_manager.py
    │   │       ├── dataset.py
    │   │       ├── flops_counter.py
    │   │       ├── fsdp_utils.py
    │   │       ├── logger
    │   │       │   ├── __init__.py
    │   │       │   ├── __pycache__
    │   │       │   │   ├── __init__.cpython-310.pyc
    │   │       │   │   ├── gen_logger.cpython-310.pyc
    │   │       │   │   └── logger.cpython-310.pyc
    │   │       │   ├── gen_logger.py
    │   │       │   └── logger.py
    │   │       ├── model_utils.py
    │   │       ├── py_functional.py
    │   │       ├── seqlen_balancing.py
    │   │       ├── tokenizer.py
    │   │       ├── torch_dtypes.py
    │   │       ├── torch_functional.py
    │   │       └── ulysses.py
    │   │   └── workers
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │           ├── __init__.cpython-310.pyc
    │   │           ├── config.cpython-310.pyc
    │   │           └── fsdp_workers.cpython-310.pyc
    │   │       ├── actor
    │   │           ├── __init__.py
    │   │           ├── __pycache__
    │   │           │   ├── __init__.cpython-310.pyc
    │   │           │   ├── base.cpython-310.pyc
    │   │           │   ├── config.cpython-310.pyc
    │   │           │   └── dp_actor.cpython-310.pyc
    │   │           ├── base.py
    │   │           ├── config.py
    │   │           └── dp_actor.py
    │   │       ├── config.py
    │   │       ├── critic
    │   │           ├── __init__.py
    │   │           ├── __pycache__
    │   │           │   ├── __init__.cpython-310.pyc
    │   │           │   └── config.cpython-310.pyc
    │   │           ├── base.py
    │   │           ├── config.py
    │   │           └── dp_critic.py
    │   │       ├── fsdp_workers.py
    │   │       ├── reward
    │   │           ├── __init__.py
    │   │           ├── __pycache__
    │   │           │   ├── __init__.cpython-310.pyc
    │   │           │   ├── config.cpython-310.pyc
    │   │           │   └── function.cpython-310.pyc
    │   │           ├── config.py
    │   │           └── function.py
    │   │       ├── rollout
    │   │           ├── __init__.py
    │   │           ├── __pycache__
    │   │           │   ├── __init__.cpython-310.pyc
    │   │           │   ├── base.cpython-310.pyc
    │   │           │   ├── config.cpython-310.pyc
    │   │           │   └── vllm_rollout_spmd.cpython-310.pyc
    │   │           ├── base.py
    │   │           ├── config.py
    │   │           └── vllm_rollout_spmd.py
    │   │       └── sharding_manager
    │   │           ├── __init__.py
    │   │           ├── __pycache__
    │   │               ├── __init__.cpython-310.pyc
    │   │               ├── base.cpython-310.pyc
    │   │               ├── fsdp_ulysses.cpython-310.pyc
    │   │               └── fsdp_vllm.cpython-310.pyc
    │   │           ├── base.py
    │   │           ├── fsdp_ulysses.py
    │   │           └── fsdp_vllm.py
    └── Supervise fine-tuning
    │   ├── sft_dataset_GOT-10k.json
    │   ├── sft_dataset_OTB.json
    │   ├── sft_dataset_TNL2K.json
    │   ├── sft_dataset_TNLLT.json
    │   ├── sft_dataset_lasot.json
    │   └── training_args.yaml
├── TNLLT_Evaluation_Toolkit
    ├── LICENSE
    ├── run_tracker_performance_evaluation.m
    ├── sequence_evaluation_config
    │   ├── all_dataset.txt
    │   └── testing_set.txt
    ├── tmp_mat
    │   └── readme.txt
    └── utils
    │   ├── calc_rect_int.m
    │   ├── calc_seq_err_robust.m
    │   ├── config_plot_style.m
    │   ├── config_sequence.m
    │   ├── config_tracker.m
    │   ├── eval_tracker.m
    │   └── plot_draw_save.m
├── figures
    ├── SRPRNPR.png
    └── TNLLT_samples.png
└── scripts
    ├── SFT
        └── transforme_json.py
    ├── TNL2KLTDataset
        ├── tnl_lt.py
        ├── tnl_lt_test_split.txt
        ├── tnl_lt_train_split.txt
        ├── tnl_lt_val_split.txt
        └── tnlltdataset.py
    ├── TNL2K_JE_json_generation.py
    ├── check_TNL2K_dataset.m
    └── text2audio_toolkit.py


/ReasoningTrack/Evaluation/sample0001/data.json:
--------------------------------------------------------------------------------
1 | {
2 |     "language_description": "the wooden ship on the river",
3 |     "img1_groundtruth": "176,64.7031,243,232",
4 |     "img2_groundtruth": "130,62.7031,258,225",
5 |     "source_folder": "JE_Assian_ship_v01",
6 |     "original_img1_name": "00715.png",
7 |     "original_img2_name": "01091.png"
8 | }


--------------------------------------------------------------------------------
/ReasoningTrack/Evaluation/sample0001/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Evaluation/sample0001/img1.jpg


--------------------------------------------------------------------------------
/ReasoningTrack/Evaluation/sample0001/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Evaluation/sample0001/img2.jpg


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/demo3.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import cv2
 3 | import argparse
 4 | import numpy as np
 5 | # from datasets import load_dataset
 6 | import os
 7 | from PIL import Image
 8 | 
 9 | def track_and_visualize(image1: np.ndarray, image2: np.ndarray, init_bbox: list, init_text_description: str, output_dir: str = "./demo_results"):
10 | 
11 |     # breakpoint()
12 | 
13 |     try:
14 |         if isinstance(image1, Image.Image):  
15 |             image1 = np.array(image1)
16 |         if isinstance(image2, Image.Image):
17 |             image2 = np.array(image2)
18 | 
19 |         parameter_name = "dutrack_256_full"  
20 |         param_module = importlib.import_module(f'lib.test.parameter.dutrack')
21 |         params = param_module.parameters(parameter_name, None)
22 |         params.debug = False
23 | 
24 |         tracker_class = importlib.import_module(f'lib.test.tracker.dutrack').get_tracker_class()
25 |         # breakpoint()
26 |         tracker = tracker_class(params)
27 | 
28 |         init_info = {
29 |             'init_bbox': init_bbox,
30 |             'init_text_description': init_text_description
31 |         }
32 | 
33 |         out = tracker.initialize(image1, init_info)
34 | 
35 |         info = {}
36 |         out = tracker.track(image2, info)
37 | 
38 |         return True, out
39 | 
40 |     except Exception as e:
41 |         breakpoint()
42 |         print(f"Error during tracking and visualization: {str(e)}")
43 |         return False, None
44 | 
45 | def batch_track_and_visualize(img1s: list, img2s: list, init_bboxes: list, init_text_descriptions: list):
46 | 
47 |     try:
48 | 
49 |         parameter_name = "dutrack_256_full" 
50 |         param_module = importlib.import_module(f'lib.test.parameter.dutrack')
51 |         params = param_module.parameters(parameter_name, None)
52 |         params.debug = False
53 | 
54 |         tracker_class = importlib.import_module(f'lib.test.tracker.dutrack').get_tracker_class()
55 |         tracker = tracker_class(params)
56 | 
57 |         results = []
58 |         for image1, image2, init_bbox, init_text_description in zip(img1s, img2s, init_bboxes, init_text_descriptions):
59 |             if isinstance(image1, Image.Image):  
60 |                 if image1.mode == 'RGBA':
61 |                     image1 = image1.convert('RGB')
62 |                     image2 = image2.convert('RGB')
63 |                 if image1.mode == 'L':
64 |                     image1 = image1.convert('RGB')
65 |                     image2 = image2.convert('RGB')
66 |                 image1 = np.array(image1)
67 |                 if len(image1.shape) == 2:
68 |                     breakpoint()
69 |             if isinstance(image2, Image.Image):
70 |                 image2 = np.array(image2)
71 | 
72 | 
73 |             init_info = {
74 |                 'init_bbox': init_bbox,
75 |                 'init_text_description': init_text_description
76 |             }
77 | 
78 |             out = tracker.initialize(image1, init_info)
79 | 
80 | 
81 |             info = {}
82 |             out = tracker.track(image2, info)
83 | 
84 |             results.append(out)
85 | 
86 |         return True, results
87 | 
88 |     except Exception as e:
89 |         # breakpoint()
90 |         print(f"Error during batch tracking and visualization: {str(e)}")
91 |         return False, None
92 |     
93 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/aqwen2_5_vl_3b_fulldataset_grpo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH="/rydata/jinliye/RL/vltracking/EasyR1/LongTimeTracking/LLaMA-Factory/saves/Qwen2.5-VL-3B-Instruct/full/train_2025-05-22-08-42-07-fullfull/checkpoint-372"  # replace it with your local file path
 8 | # MODEL_PATH="/rydata/wengchaoliu/qwen2.5vl-3b/"
 9 | CUDA_VISIBLE_DEVICES=7 nohup python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.train_files=Jinliye/RLFullDataset@train \
12 |     data.val_files=Jinliye/RLFullDataset@test \
13 |     data.rollout_batch_size=256 \
14 |     worker.actor.model.model_path=${MODEL_PATH} \
15 |     worker.rollout.tensor_parallel_size=1 \
16 |     worker.actor.optim.strategy=adamw_bf16\
17 |     worker.rollout.limit_images=2 \
18 |     trainer.experiment_name=qwen2_5_vl_3b_tnllt_grpo \
19 |     trainer.n_gpus_per_node=1 \
20 |     trainer.logger=['console','tensorboard']\ 
21 |     
22 |     # trainer.save_checkpoint_path= "/rydata/jinliye/RL/vltracking/EasyR1/checkpoint/TNLLT" \
23 |     # data.format_prompt= ./examples/format_prompt/LTracking_format.jinja \
24 |     # trainer.total_epochs = 1 \
25 |     # > /rydata/jinliye/RL/vltracking/EasyR1/log/training.log 2>&1
26 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/aqwen2_5_vl_3b_tnl2k_grpo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH="/rydata/jinliye/RL/vltracking/EasyR1/LongTimeTracking/LLaMA-Factory/saves/Qwen2.5-VL-3B-Instruct/full/train_2025-05-09-13-12-15/checkpoint-250"  # replace it with your local file path
 8 | # MODEL_PATH="/rydata/wengchaoliu/qwen2.5vl-3b/"
 9 | CUDA_VISIBLE_DEVICES=2 nohup python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.train_files=Jinliye/TNL2KLTRLDataset2@train \
12 |     data.val_files=Jinliye/TNL2KLTRLDataset2@test \
13 |     data.rollout_batch_size=256 \
14 |     worker.actor.model.model_path=${MODEL_PATH} \
15 |     worker.rollout.tensor_parallel_size=1 \
16 |     worker.actor.optim.strategy=adamw_bf16\
17 |     worker.rollout.limit_images=2 \
18 |     trainer.experiment_name=qwen2_5_vl_3b_tnllt_grpo \
19 |     trainer.n_gpus_per_node=1 \
20 |     trainer.logger=['console','tensorboard']\ 
21 |     
22 |     # trainer.save_checkpoint_path= "/rydata/jinliye/RL/vltracking/EasyR1/checkpoint/TNLLT" \
23 |     # data.format_prompt= ./examples/format_prompt/LTracking_format.jinja \
24 |     # trainer.total_epochs = 1 \
25 |     # > /rydata/jinliye/RL/vltracking/EasyR1/log/training.log 2>&1
26 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/baselines/qwen2_5_vl_3b_clevr.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
 8 | 
 9 | python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.train_files=BUAADreamer/clevr_count_70k@train \
12 |     data.val_files=BUAADreamer/clevr_count_70k@test \
13 |     data.format_prompt=./examples/format_prompt/r1v_format.jinja \
14 |     worker.actor.model.model_path=${MODEL_PATH} \
15 |     worker.rollout.tensor_parallel_size=1 \
16 |     worker.reward.reward_type=sequential \
17 |     worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
18 |     trainer.experiment_name=qwen2_5_vl_3b_clevr \
19 |     trainer.n_gpus_per_node=2
20 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/baselines/qwen2_5_vl_3b_geoqa8k.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path
 8 | 
 9 | python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.train_files=leonardPKU/GEOQA_8K_R1V@train \
12 |     data.val_files=leonardPKU/GEOQA_8K_R1V@test \
13 |     data.format_prompt=./examples/format_prompt/r1v_format.jinja \
14 |     worker.actor.model.model_path=${MODEL_PATH} \
15 |     worker.rollout.tensor_parallel_size=1 \
16 |     worker.reward.reward_type=sequential \
17 |     worker.reward.reward_function=./examples/reward_function/r1v.py:compute_score \
18 |     trainer.experiment_name=qwen2_5_vl_3b_geoqa8k \
19 |     trainer.n_gpus_per_node=8
20 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/bqwen2_5_vl_3b_tnl2k_grpo copy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH="/rydata/jinliye/RL/vltracking/EasyR1/LongTimeTracking/LLaMA-Factory/saves/Qwen2.5-VL-3B-Instruct/full/train_2025-05-09-13-12-15/checkpoint-100/"  # replace it with your local file path
 8 | # MODEL_PATH="/rydata/wengchaoliu/qwen2.5vl-3b/"
 9 | 
10 | LOG_FILE="/rydata/jinliye/RL/vltracking/EasyR1/log/$(date +'%Y-%m-%d_%H-%M-%S')_logfile.log"  # 添加日期前缀
11 | 
12 | CUDA_VISIBLE_DEVICES=2 nohup python3 -m verl.trainer.main \
13 |     config=examples/config.yaml \
14 |     data.train_files=Jinliye/TNL2KLTRLDataset2@train \
15 |     data.val_files=Jinliye/TNL2KLTRLDataset2@test \
16 |     data.rollout_batch_size=256 \
17 |     worker.actor.model.model_path=${MODEL_PATH} \
18 |     worker.rollout.tensor_parallel_size=1 \
19 |     worker.actor.optim.strategy=adamw_bf16 \
20 |     worker.rollout.limit_images=2 \
21 |     trainer.experiment_name=qwen2_5_vl_3b_tnllt_grpo \
22 |     trainer.n_gpus_per_node=1 \
23 |     trainer.logger=['console'] \ 
24 |     > "$LOG_FILE" 2>&1 &
25 | 
26 | mv nohup.out ${LOG_FILE}
27 |     # trainer.save_checkpoint_path= "/rydata/jinliye/RL/vltracking/EasyR1/checkpoint/TNLLT" \
28 |     # data.format_prompt= ./examples/format_prompt/LTracking_format.jinja \
29 |     # trainer.total_epochs = 1 \
30 |     # > /rydata/jinliye/RL/vltracking/EasyR1/log/training.log 2>&1
31 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/config.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   train_files: hiyouga/math12k@train
 3 |   val_files: hiyouga/math12k@test
 4 |   prompt_key: problem
 5 |   answer_key: answer
 6 |   image_key: images
 7 |   max_prompt_length: 4096
 8 |   max_response_length: 2048
 9 |   rollout_batch_size: 512
10 |   # rollout_batch_size: 64
11 |   val_batch_size: 1024
12 |   format_prompt: ./examples/format_prompt/LTracking_format.jinja
13 |   override_chat_template: null
14 |   shuffle: true
15 |   seed: 1
16 |   max_pixels: 262144 #4194304
17 |   min_pixels: 59536 #262144
18 |   filter_overlong_prompts: true
19 | 
20 | algorithm:
21 |   adv_estimator: grpo
22 |   disable_kl: false
23 |   use_kl_loss: true
24 |   kl_penalty: low_var_kl
25 |   kl_coef: 1.0e-2
26 | 
27 | worker:
28 |   actor:
29 |     global_batch_size: 128
30 |     micro_batch_size_per_device_for_update: 4
31 |     micro_batch_size_per_device_for_experience: 16
32 |     max_grad_norm: 1.0
33 |     padding_free: true
34 |     ulysses_sequence_parallel_size: 1
35 |     model:
36 |       model_path: Qwen/Qwen2.5-7B-Instruct
37 |       enable_gradient_checkpointing: true
38 |       trust_remote_code: false
39 |       freeze_vision_tower: false
40 |     optim:
41 |       lr: 1.0e-6
42 |       weight_decay: 1.0e-2
43 |       strategy: adamw_bf16  # {adamw, adamw_bf16}
44 |       lr_warmup_ratio: 0.0
45 |     fsdp:
46 |       enable_full_shard: true
47 |       enable_cpu_offload: false
48 |       enable_rank0_init: true
49 |       torch_dtype: bf16
50 |     offload:
51 |       offload_params: true  # true: more CPU memory; false: more GPU memory
52 |       offload_optimizer: true  # true: more CPU memory; false: more GPU memory
53 | 
54 |   rollout:
55 |     n: 5
56 |     temperature: 1.0
57 |     top_p: 0.99
58 |     gpu_memory_utilization: 0.6 #0.6
59 |     enforce_eager: true
60 |     enable_chunked_prefill: false
61 |     tensor_parallel_size: 2
62 |     limit_images: 0
63 |     val_override_config:
64 |       temperature: 0.5
65 |       n: 1
66 | 
67 |   ref:
68 |     fsdp:
69 |       enable_full_shard: true
70 |       enable_cpu_offload: true  # true: more CPU memory; false: more GPU memory
71 |       enable_rank0_init: true
72 |     offload:
73 |       offload_params: false
74 | 
75 |   reward:
76 |     reward_type: batch
77 |     reward_function: ./examples/reward_function/track.py:compute_score_batch
78 | 
79 | trainer:
80 |   total_epochs: 2
81 |   max_steps: null
82 |   project_name: easy_r1
83 |   experiment_name: qwen2_5_7b_math_grpo
84 |   logger: ["console", "wandb"]
85 |   nnodes: 1
86 |   n_gpus_per_node: 8
87 |   val_freq: 5  # -1 to disable
88 |   val_before_train: true
89 |   val_only: false
90 |   val_generations_to_log: 3
91 |   save_freq: 10  # -1 to disable
92 |   save_limit: 4  # -1 to disable
93 |   save_checkpoint_path: /wangx_nas/JLY/Code/LongTimeTracking/RLModels/easyr1/FULL_ioubf16_sft372_removeioureward
94 |   load_checkpoint_path: 
95 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/format_prompt/LTracking_format.jinja:
--------------------------------------------------------------------------------
1 | {{ content | trim }} You are a visual tracking assistant that strictly analyzes only visual elements (ignore all text in images). Given an initial description and two consecutive frames (Frame 1 and Frame 2), first verify if the target object in Frame 1 matches the description, then determine if the description needs updating based on visual changes between frames (like position, shape, or color). Always respond in the exact format: <think>[your reasoning process]</think><d>yes/no</d><answer>[updated or original description for Frame 2]</answer>


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/format_prompt/math_format.jinja:
--------------------------------------------------------------------------------
1 | {{ content | trim }} You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}.
2 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/format_prompt/r1v_format.jinja:
--------------------------------------------------------------------------------
1 | {{ content | trim }} A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>
2 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/qwen0.5b_math_grpo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH="Qwen/Qwen2.5-0.5B-Instruct"  # replace it with your local file path
 8 | 
 9 | CUDA_VISIBLE_DEVICES=4 python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.train_files=xiaodongguaAIGC/X-R1-750@train \
12 |     data.val_files=xiaodongguaAIGC/X-R1-750@test \
13 |     worker.actor.model.model_path=${MODEL_PATH} \
14 |     worker.rollout.tensor_parallel_size=1 \
15 |     worker.actor.optim.strategy=adamw_bf16\
16 |     trainer.experiment_name=qwen2_5_vl_3b_geo_grpo \
17 |     trainer.n_gpus_per_node=1\
18 |     # trainer.logger=['console']\ 
19 |     # > /rydata/jinliye/RL/vltracking/EasyR1/log/training.log 2>&1
20 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/qwen2_5_7b_math_grpo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH=Qwen/Qwen2.5-7B-Instruct  # replace it with your local file path
 8 | 
 9 | python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     worker.actor.model.model_path=${MODEL_PATH}
12 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/qwen2_5_vl_32b_geo3k_grpo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH=Qwen/Qwen2.5-VL-32B-Instruct  # replace it with your local file path
 8 | 
 9 | python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.train_files=hiyouga/geometry3k@train \
12 |     data.val_files=hiyouga/geometry3k@test \
13 |     worker.actor.model.model_path=${MODEL_PATH} \
14 |     worker.actor.micro_batch_size_per_device_for_update=1 \
15 |     worker.actor.micro_batch_size_per_device_for_experience=8 \
16 |     worker.actor.fsdp.torch_dtype=bf16 \
17 |     worker.actor.optim.strategy=adamw_bf16 \
18 |     worker.rollout.tensor_parallel_size=8 \
19 |     trainer.experiment_name=qwen2_5_vl_32b_geo_grpo \
20 |     trainer.n_gpus_per_node=8
21 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/qwen2_5_vl_3b_geo3k_grpo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH="/rydata/wengchaoliu/qwen2.5vl-3b/"  # replace it with your local file path
 8 | 
 9 | CUDA_VISIBLE_DEVICES=4 python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.train_files=hiyouga/geometry3k@train \
12 |     data.val_files=hiyouga/geometry3k@test \
13 |     worker.actor.model.model_path=${MODEL_PATH} \
14 |     worker.rollout.tensor_parallel_size=1 \
15 |     worker.actor.optim.strategy=adamw_bf16 \
16 |     trainer.experiment_name=qwen2_5_vl_3b_geo_grpo \
17 |     trainer.n_gpus_per_node=1 \
18 |     # trainer.logger=['console']\ 
19 |     # > /rydata/jinliye/RL/vltracking/EasyR1/log/training.log 2>&1
20 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/qwen2_5_vl_7b_geo3k_grpo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
 8 | 
 9 | python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.train_files=hiyouga/geometry3k@train \
12 |     data.val_files=hiyouga/geometry3k@test \
13 |     worker.actor.model.model_path=${MODEL_PATH} \
14 |     trainer.experiment_name=qwen2_5_vl_7b_geo_grpo \
15 |     trainer.n_gpus_per_node=8
16 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/qwen2_5_vl_7b_geo3k_reinforce.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
 8 | 
 9 | python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.train_files=hiyouga/geometry3k@train \
12 |     data.val_files=hiyouga/geometry3k@test \
13 |     worker.actor.model.model_path=${MODEL_PATH} \
14 |     algorithm.adv_estimator=reinforce_plus_plus \
15 |     algorithm.use_kl_loss=false \
16 |     algorithm.kl_penalty=kl \
17 |     algorithm.kl_coef=1.0e-3 \
18 |     trainer.experiment_name=qwen2_5_vl_7b_geo_reinforce_pp \
19 |     trainer.n_gpus_per_node=8
20 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/qwen2_5_vl_7b_geo3k_swanlab.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
 8 | 
 9 | python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.train_files=hiyouga/geometry3k@train \
12 |     data.val_files=hiyouga/geometry3k@test \
13 |     worker.actor.model.model_path=${MODEL_PATH} \
14 |     trainer.experiment_name=qwen2_5_vl_7b_geo_grpo \
15 |     trainer.logger=['console','swanlab'] \
16 |     trainer.n_gpus_per_node=8
17 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/qwen2_5_vl_7b_multi_image.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # REMINDER: this script uses test data split and should ONLY be used for debugging. DO NOT use for training.
 3 | 
 4 | set -x
 5 | 
 6 | export PYTHONUNBUFFERED=1
 7 | 
 8 | # MODEL_PATH=Qwen/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
 9 | MODEL_PATH="/rydata/jinliye/RL/vltracking/EasyR1/LongTimeTracking/LLaMA-Factory/saves/Qwen2.5-VL-3B-Instruct/full/train_2025-05-09-13-12-15/checkpoint-100/"  # replace it with your local file path
10 | 
11 | CUDA_VISIBLE_DEVICES=0,3 python3 -m verl.trainer.main \
12 |     config=examples/config.yaml \
13 |     data.train_files=hiyouga/journeybench-multi-image-vqa@train \
14 |     data.val_files=hiyouga/journeybench-multi-image-vqa@test \
15 |     data.rollout_batch_size=256 \
16 |     worker.actor.model.model_path=/rydata/jinliye/RL/vltracking/EasyR1/LongTimeTracking/LLaMA-Factory/saves/Qwen2.5-VL-3B-Instruct/full/train_2025-05-09-13-12-15/checkpoint-100/ \
17 |     worker.rollout.limit_images=2 \
18 |     trainer.experiment_name=qwen2_5_vl_7b_multi_image \
19 |     trainer.n_gpus_per_node=2
20 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/qwen3_4b_math_grpo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | export PYTHONUNBUFFERED=1
 6 | 
 7 | MODEL_PATH=Qwen/Qwen3-4B  # replace it with your local file path
 8 | 
 9 | python3 -m verl.trainer.main \
10 |     config=examples/config.yaml \
11 |     data.max_response_length=4096 \
12 |     worker.actor.model.model_path=${MODEL_PATH} \
13 |     trainer.experiment_name=qwen3_4b_math_grpo
14 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/reward_function/__pycache__/math.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/examples/reward_function/__pycache__/math.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/reward_function/__pycache__/track.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/examples/reward_function/__pycache__/track.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/reward_function/demo_results/initial_frame.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/examples/reward_function/demo_results/initial_frame.jpg


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/reward_function/demo_results/tracking_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/examples/reward_function/demo_results/tracking_result.jpg


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/reward_function/mathreward.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | from typing import Dict, List
17 | 
18 | from mathruler.grader import extract_boxed_content, grade_answer
19 | 
20 | 
21 | def format_reward(predict: str) -> float:
22 |     pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL)
23 |     format_match = re.fullmatch(pattern, predict)
24 |     return 1.0 if format_match else 0.0
25 | 
26 | 
27 | def accuracy_reward(predict: str, ground_truth: str) -> float:
28 |     answer = extract_boxed_content(predict)
29 |     return 1.0 if grade_answer(answer, ground_truth) else 0.0
30 | 
31 | 
32 | def compute_score(predicts: List[str], ground_truths: List[str], levellist: List[str], format_weight: float = 0.1) -> List[Dict[str, float]]:
33 |     scores = []
34 |     for predict, ground_truth in zip(predicts, ground_truths):
35 |         # breakpoint()
36 |         predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict)  # handle qwen2.5vl-32b format
37 |         format_score = format_reward(predict)
38 |         accuracy_score = accuracy_reward(predict, ground_truth)
39 |         scores.append(
40 |             {
41 |                 "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
42 |                 "format": format_score,
43 |                 "accuracy": accuracy_score,
44 |             }
45 |         )
46 | 
47 |     return scores
48 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/reward_function/r1v.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | from typing import Dict
17 | 
18 | from mathruler.grader import grade_answer
19 | 
20 | 
21 | def format_reward(predict: str) -> float:
22 |     pattern = re.compile(r"<think>.*?</think>\s*<answer>.*?</answer>", re.DOTALL)
23 |     format_match = re.fullmatch(pattern, predict)
24 |     return 1.0 if format_match else 0.0
25 | 
26 | 
27 | def accuracy_reward(predict: str, ground_truth: str) -> float:
28 |     try:
29 |         content_match = re.search(r"<answer>(.*?)</answer>", predict)
30 |         given_answer = content_match.group(1).strip() if content_match else predict.strip()
31 |         if grade_answer(given_answer, ground_truth.strip()):
32 |             return 1.0
33 | 
34 |     except Exception:
35 |         pass
36 | 
37 |     return 0.0
38 | 
39 | 
40 | def compute_score(predict: str, ground_truth: str, format_weight: float = 0.5) -> Dict[str, float]:
41 |     format_score = format_reward(predict)
42 |     accuracy_score = accuracy_reward(predict, ground_truth)
43 |     return {
44 |         "overall": (1 - format_weight) * accuracy_score + format_weight * format_score,
45 |         "format": format_score,
46 |         "accuracy": accuracy_score,
47 |     }
48 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/examples/runtime_env.yaml:
--------------------------------------------------------------------------------
 1 | working_dir: ./
 2 | excludes: ["/.git/"]
 3 | env_vars:
 4 |   TOKENIZERS_PARALLELISM: "true"
 5 |   NCCL_DEBUG: "WARN"
 6 |   VLLM_LOGGING_LEVEL: "WARN"
 7 |   TORCH_NCCL_AVOID_RECORD_STREAMS: "1"
 8 |   PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
 9 |   PYTHONUNBUFFERED: "1"
10 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/run_full.sh:
--------------------------------------------------------------------------------
1 | LOG_FILE="/rydata/jinliye/RL/vltracking/EasyR1/log/$(date +'%Y-%m-%d_%H-%M-%S')_logfile_fulldataset_removeioureward.log"  # 添加日期前缀
2 | 
3 | bash examples/aqwen2_5_vl_3b_fulldataset_grpo.sh  >  ${LOG_FILE} 2>&1 &
4 | 
5 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/startvllm.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=2 vllm serve /wangx_nas/JLY/Code/LongTimeTracking/RLModels/easyr1/TNLLT_ioubf16/global_step_90/actor/huggingface \
2 |   --port 8000 \
3 |   --host 0.0.0.0 \
4 |   --dtype bfloat16 \
5 |   --limit-mm-per-prompt image=5,video=5 \
6 |   --gpu-memory-utilization 0.8
7 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from .utils.py_functional import is_package_available
18 | 
19 | 
20 | if is_package_available("modelscope"):
21 |     from modelscope.utils.hf_util import patch_hub  # type: ignore
22 | 
23 | 
24 | __version__ = "0.3.1.dev0"
25 | 
26 | 
27 | if os.getenv("USE_MODELSCOPE_HUB", "0").lower() in ["true", "y", "1"]:
28 |     # Patch hub to download models from modelscope to speed up.
29 |     if not is_package_available("modelscope"):
30 |         raise ImportError("You are using the modelscope hub, please install modelscope by `pip install modelscope`.")
31 | 
32 |     patch_hub()
33 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/__pycache__/protocol.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/__pycache__/protocol.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/models/__pycache__/monkey_patch.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/models/__pycache__/monkey_patch.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/models/monkey_patch.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
17 | 
18 | from .transformers.flash_attention_utils import flash_attention_forward
19 | from .transformers.qwen2_vl import qwen2_vl_attn_forward
20 | 
21 | 
22 | def apply_ulysses_patch(model_type: str) -> None:
23 |     if model_type in ("llama", "gemma", "gemma2", "mistral", "qwen2", "qwen3", "qwen3_moe"):
24 |         ALL_ATTENTION_FUNCTIONS["flash_attention_2"] = flash_attention_forward
25 |     elif model_type in ("qwen2_vl", "qwen2_5_vl"):
26 |         from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLFlashAttention2
27 |         from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLFlashAttention2
28 | 
29 |         Qwen2VLFlashAttention2.forward = qwen2_vl_attn_forward
30 |         Qwen2_5_VLFlashAttention2.forward = qwen2_vl_attn_forward
31 |     else:
32 |         raise NotImplementedError(f"Model architecture {model_type} is not supported yet.")
33 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/models/transformers/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/models/transformers/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/models/transformers/__pycache__/flash_attention_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/models/transformers/__pycache__/flash_attention_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/models/transformers/__pycache__/qwen2_vl.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/models/transformers/__pycache__/qwen2_vl.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/single_controller/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .worker import Worker
16 | from .worker_group import ClassWithInitArgs, ResourcePool, WorkerGroup
17 | 
18 | 
19 | __all__ = ["ClassWithInitArgs", "ResourcePool", "Worker", "WorkerGroup"]
20 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/__pycache__/decorator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/__pycache__/decorator.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/__pycache__/worker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/__pycache__/worker.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/register_center/ray.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ray
16 | 
17 | 
18 | @ray.remote
19 | class WorkerGroupRegisterCenter:
20 |     def __init__(self, rank_zero_info):
21 |         self.rank_zero_info = rank_zero_info
22 | 
23 |     def get_rank_zero_info(self):
24 |         return self.rank_zero_info
25 | 
26 | 
27 | def create_worker_group_register_center(name, info):
28 |     return WorkerGroupRegisterCenter.options(name=name).remote(info)
29 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/base/worker.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | the class for Worker
 16 | """
 17 | 
 18 | import os
 19 | import socket
 20 | from dataclasses import dataclass
 21 | from typing import Tuple
 22 | 
 23 | import ray
 24 | import torch
 25 | 
 26 | from .decorator import Dispatch, Execute, register
 27 | from .register_center.ray import create_worker_group_register_center
 28 | 
 29 | 
 30 | @dataclass
 31 | class DistRankInfo:
 32 |     tp_rank: int
 33 |     dp_rank: int
 34 |     pp_rank: int
 35 | 
 36 | 
 37 | @dataclass
 38 | class DistGlobalInfo:
 39 |     tp_size: int
 40 |     dp_size: int
 41 |     pp_size: int
 42 | 
 43 | 
 44 | class WorkerHelper:
 45 |     def _get_node_ip(self) -> str:
 46 |         host_ipv4 = os.getenv("MY_HOST_IP", None)
 47 |         host_ipv6 = os.getenv("MY_HOST_IPV6", None)
 48 |         host_ip_by_env = host_ipv4 or host_ipv6
 49 |         host_ip_by_sdk = ray._private.services.get_node_ip_address()
 50 | 
 51 |         host_ip = host_ip_by_env or host_ip_by_sdk
 52 |         return host_ip
 53 | 
 54 |     def _get_free_port(self) -> int:
 55 |         with socket.socket() as sock:
 56 |             sock.bind(("", 0))
 57 |             return sock.getsockname()[1]
 58 | 
 59 |     def get_availale_master_addr_port(self) -> Tuple[str, str]:
 60 |         return self._get_node_ip(), str(self._get_free_port())
 61 | 
 62 |     def _get_pid(self):
 63 |         return
 64 | 
 65 | 
 66 | class WorkerMeta:
 67 |     keys = [
 68 |         "WORLD_SIZE",
 69 |         "RANK",
 70 |         "LOCAL_WORLD_SIZE",
 71 |         "LOCAL_RANK",
 72 |         "MASTER_ADDR",
 73 |         "MASTER_PORT",
 74 |         "CUDA_VISIBLE_DEVICES",
 75 |     ]
 76 | 
 77 |     def __init__(self, store) -> None:
 78 |         self._store = store
 79 | 
 80 |     def to_dict(self):
 81 |         return {f"_{key.lower()}": self._store.get(f"_{key.lower()}", None) for key in WorkerMeta.keys}
 82 | 
 83 | 
 84 | # we assume that in each WorkerGroup, there is a Master Worker
 85 | class Worker(WorkerHelper):
 86 |     """A (distributed) worker."""
 87 | 
 88 |     _world_size: int
 89 |     _rank: int
 90 |     _local_world_size: int
 91 |     _local_rank: int
 92 |     _master_addr: str
 93 |     _master_port: str
 94 |     _cuda_visible_devices: str
 95 | 
 96 |     def __new__(cls, *args, **kwargs):
 97 |         instance = super().__new__(cls)
 98 | 
 99 |         # note that here we use int to distinguish
100 |         disable_worker_init = int(os.getenv("DISABLE_WORKER_INIT", 0))
101 |         if disable_worker_init:
102 |             return instance
103 | 
104 |         rank = os.getenv("RANK", None)
105 |         worker_group_prefix = os.getenv("WG_PREFIX", None)
106 | 
107 |         # when decorator @ray.remote applies, __new__ will be called while we don't want to apply _configure_before_init
108 |         if None not in [rank, worker_group_prefix] and "ActorClass(" not in cls.__name__:
109 |             instance._configure_before_init(f"{worker_group_prefix}_register_center", int(rank))
110 | 
111 |         return instance
112 | 
113 |     def _configure_before_init(self, register_center_name: str, rank: int):
114 |         assert isinstance(rank, int), f"rank must be int, instead of {type(rank)}"
115 | 
116 |         if rank == 0:
117 |             master_addr, master_port = self.get_availale_master_addr_port()
118 |             rank_zero_info = {
119 |                 "MASTER_ADDR": master_addr,
120 |                 "MASTER_PORT": master_port,
121 |             }
122 |             self.register_center = create_worker_group_register_center(name=register_center_name, info=rank_zero_info)
123 |             os.environ.update(rank_zero_info)
124 | 
125 |     def __init__(self, cuda_visible_devices=None) -> None:
126 |         # construct a meta from envrionment variable. Note that the import must be inside the class because it is executed remotely
127 |         world_size = int(os.getenv("WORLD_SIZE"))
128 |         rank = int(os.getenv("RANK"))
129 |         self._rank = rank
130 |         self._world_size = world_size
131 | 
132 |         if "AMD" in torch.cuda.get_device_name():
133 |             os.environ["CUDA_VISIBLE_DEVICES"] = os.getenv("ROCR_VISIBLE_DEVICES")
134 |             os.environ["LOCAL_RANK"] = os.getenv("RAY_LOCAL_RANK")
135 |             cuda_visible_devices = os.getenv("LOCAL_RANK", "0")
136 |             torch.cuda.set_device(int(cuda_visible_devices))
137 | 
138 |         master_addr = os.getenv("MASTER_ADDR")
139 |         master_port = os.getenv("MASTER_PORT")
140 | 
141 |         local_world_size = int(os.getenv("LOCAL_WORLD_SIZE", "1"))
142 |         local_rank = int(os.getenv("LOCAL_RANK", "0"))
143 | 
144 |         store = {
145 |             "_world_size": world_size,
146 |             "_rank": rank,
147 |             "_local_world_size": local_world_size,
148 |             "_local_rank": local_rank,
149 |             "_master_addr": master_addr,
150 |             "_master_port": master_port,
151 |         }
152 |         if cuda_visible_devices is not None:
153 |             store["_cuda_visible_devices"] = cuda_visible_devices
154 | 
155 |         meta = WorkerMeta(store=store)
156 |         self._configure_with_meta(meta=meta)
157 | 
158 |     def _configure_with_meta(self, meta: WorkerMeta):
159 |         """
160 |         This function should only be called inside by WorkerGroup
161 |         """
162 |         assert isinstance(meta, WorkerMeta)
163 |         self.__dict__.update(meta.to_dict())  # this is hacky
164 |         # print(f"__dict__: {self.__dict__}")
165 |         for key in WorkerMeta.keys:
166 |             val = self.__dict__.get(f"_{key.lower()}", None)
167 |             if val is not None:
168 |                 # print(f"set {key} to {val}")
169 |                 os.environ[key] = str(val)
170 | 
171 |         os.environ["REDIS_STORE_SERVER_HOST"] = (
172 |             str(self._master_addr).replace("[", "").replace("]", "") if self._master_addr else ""
173 |         )
174 | 
175 |     def get_master_addr_port(self):
176 |         return self._master_addr, self._master_port
177 | 
178 |     def get_cuda_visible_devices(self):
179 |         cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES", "not set")
180 |         return cuda_visible_devices
181 | 
182 |     def print_rank0(self, *args, **kwargs):
183 |         if self.rank == 0:
184 |             print(*args, **kwargs)
185 | 
186 |     @property
187 |     def world_size(self):
188 |         return self._world_size
189 | 
190 |     @property
191 |     def rank(self):
192 |         return self._rank
193 | 
194 |     @register(dispatch_mode=Dispatch.DP_COMPUTE_PROTO_WITH_FUNC)
195 |     def execute_with_func_generator(self, func, *args, **kwargs):
196 |         ret_proto = func(self, *args, **kwargs)
197 |         return ret_proto
198 | 
199 |     @register(dispatch_mode=Dispatch.ALL_TO_ALL, execute_mode=Execute.RANK_ZERO)
200 |     def execute_func_rank_zero(self, func, *args, **kwargs):
201 |         result = func(*args, **kwargs)
202 |         return result
203 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import RayClassWithInitArgs, RayResourcePool, RayWorkerGroup, create_colocated_worker_cls
16 | 
17 | 
18 | __all__ = ["RayClassWithInitArgs", "RayResourcePool", "RayWorkerGroup", "create_colocated_worker_cls"]
19 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/single_controller/ray/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/single_controller/ray/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/core_algos.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/core_algos.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/data_loader.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/data_loader.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/main.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/main.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/metrics.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/metrics.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/ray_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/trainer/__pycache__/ray_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/config.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | PPO config
 16 | """
 17 | 
 18 | import os
 19 | from dataclasses import asdict, dataclass, field, fields, is_dataclass
 20 | from typing import Optional, Tuple
 21 | 
 22 | from ..workers.config import WorkerConfig
 23 | 
 24 | 
 25 | def recursive_post_init(dataclass_obj):
 26 |     if hasattr(dataclass_obj, "post_init"):
 27 |         dataclass_obj.post_init()
 28 | 
 29 |     for attr in fields(dataclass_obj):
 30 |         if is_dataclass(getattr(dataclass_obj, attr.name)):
 31 |             recursive_post_init(getattr(dataclass_obj, attr.name))
 32 | 
 33 | 
 34 | @dataclass
 35 | class DataConfig:
 36 |     train_files: str = ""
 37 |     val_files: str = ""
 38 |     prompt_key: str = "prompt"
 39 |     answer_key: str = "answer"
 40 |     image_key: str = "images"
 41 |     max_prompt_length: int = 512
 42 |     max_response_length: int = 512
 43 |     rollout_batch_size: int = 512
 44 |     val_batch_size: int = -1
 45 |     format_prompt: Optional[str] = None
 46 |     override_chat_template: Optional[str] = None
 47 |     shuffle: bool = True
 48 |     seed: int = 1
 49 |     max_pixels: int = 4194304
 50 |     min_pixels: int = 262144
 51 |     filter_overlong_prompts: bool = True
 52 | 
 53 |     def post_init(self):
 54 |         if self.format_prompt is not None:
 55 |             if os.path.exists(self.format_prompt):  # ray job uses absolute path
 56 |                 self.format_prompt = os.path.abspath(self.format_prompt)
 57 |             else:
 58 |                 self.format_prompt = None
 59 | 
 60 | 
 61 | @dataclass
 62 | class AlgorithmConfig:
 63 |     gamma: float = 1.0
 64 |     lam: float = 1.0
 65 |     adv_estimator: str = "grpo"
 66 |     disable_kl: bool = False
 67 |     use_kl_loss: bool = False
 68 |     kl_penalty: str = "kl"
 69 |     kl_coef: float = 1e-3
 70 |     kl_type: str = "fixed"
 71 |     kl_horizon: float = 0.0
 72 |     kl_target: float = 0.0
 73 | 
 74 | 
 75 | @dataclass
 76 | class TrainerConfig:
 77 |     total_epochs: int = 10
 78 |     max_steps: Optional[int] = None
 79 |     project_name: str = "easy_r1"
 80 |     experiment_name: str = "demo"
 81 |     logger: Tuple[str] = ("console", "wandb")
 82 |     nnodes: int = 1
 83 |     n_gpus_per_node: int = 8
 84 |     critic_warmup: int = 0
 85 |     val_freq: int = -1
 86 |     val_before_train: bool = True
 87 |     val_only: bool = False
 88 |     val_generations_to_log: int = 0
 89 |     save_freq: int = -1
 90 |     save_limit: int = -1
 91 |     save_checkpoint_path: Optional[str] = None
 92 |     load_checkpoint_path: Optional[str] = None
 93 | 
 94 |     def post_init(self):
 95 |         if self.save_checkpoint_path is None:
 96 |             self.save_checkpoint_path = os.path.join("checkpoints", self.project_name, self.experiment_name)
 97 | 
 98 |         self.save_checkpoint_path = os.path.abspath(self.save_checkpoint_path)  # ray job uses absolute path
 99 |         if self.load_checkpoint_path is not None:
100 |             self.load_checkpoint_path = os.path.abspath(self.load_checkpoint_path)
101 | 
102 | 
103 | @dataclass
104 | class PPOConfig:
105 |     data: DataConfig = field(default_factory=DataConfig)
106 |     worker: WorkerConfig = field(default_factory=WorkerConfig)
107 |     algorithm: AlgorithmConfig = field(default_factory=AlgorithmConfig)
108 |     trainer: TrainerConfig = field(default_factory=TrainerConfig)
109 | 
110 |     def post_init(self):
111 |         self.worker.rollout.prompt_length = self.data.max_prompt_length
112 |         self.worker.rollout.response_length = self.data.max_response_length
113 |         self.worker.rollout.trust_remote_code = self.worker.actor.model.trust_remote_code
114 |         self.worker.actor.disable_kl = self.algorithm.disable_kl
115 |         self.worker.actor.use_kl_loss = self.algorithm.use_kl_loss
116 |         self.worker.actor.kl_penalty = self.algorithm.kl_penalty
117 |         self.worker.actor.kl_coef = self.algorithm.kl_coef
118 | 
119 |     def deep_post_init(self):
120 |         recursive_post_init(self)
121 | 
122 |     def to_dict(self):
123 |         return asdict(self)
124 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/data_loader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Optional
16 | 
17 | import torch
18 | from torch.utils.data import RandomSampler, SequentialSampler
19 | from torchdata.stateful_dataloader import StatefulDataLoader
20 | from transformers import PreTrainedTokenizer, ProcessorMixin
21 | 
22 | from ..utils.dataset import RLHFDataset, collate_fn
23 | from .config import DataConfig
24 | 
25 | 
26 | def create_dataloader(config: DataConfig, tokenizer: PreTrainedTokenizer, processor: Optional[ProcessorMixin]) -> None:
27 |     train_dataset = RLHFDataset(
28 |         data_path=config.train_files,
29 |         tokenizer=tokenizer,
30 |         processor=processor,
31 |         prompt_key=config.prompt_key,
32 |         answer_key=config.answer_key,
33 |         image_key=config.image_key,
34 |         max_prompt_length=config.max_prompt_length,
35 |         truncation="right",
36 |         format_prompt=config.format_prompt,
37 |         min_pixels=config.min_pixels,
38 |         max_pixels=config.max_pixels,
39 |         filter_overlong_prompts=config.filter_overlong_prompts,
40 |     )
41 |     # breakpoint()
42 |     # use sampler for better ckpt resume
43 |     if config.shuffle:
44 |         train_dataloader_generator = torch.Generator()
45 |         train_dataloader_generator.manual_seed(config.seed)
46 |         sampler = RandomSampler(data_source=train_dataset, generator=train_dataloader_generator)
47 |     else:
48 |         sampler = SequentialSampler(data_source=train_dataset)
49 | 
50 |     train_dataloader = StatefulDataLoader(
51 |         dataset=train_dataset,
52 |         batch_size=config.rollout_batch_size,
53 |         sampler=sampler,
54 |         num_workers=8,
55 |         collate_fn=collate_fn,
56 |         pin_memory=False,
57 |         drop_last=True,
58 |     )
59 | 
60 |     val_dataset = RLHFDataset(
61 |         data_path=config.val_files,
62 |         tokenizer=tokenizer,
63 |         processor=processor,
64 |         prompt_key=config.prompt_key,
65 |         answer_key=config.answer_key,
66 |         image_key=config.image_key,
67 |         max_prompt_length=config.max_prompt_length,
68 |         truncation="right",
69 |         format_prompt=config.format_prompt,
70 |         min_pixels=config.min_pixels,
71 |         max_pixels=config.max_pixels,
72 |         filter_overlong_prompts=config.filter_overlong_prompts,
73 |     )
74 |     val_dataloader = StatefulDataLoader(
75 |         dataset=val_dataset,
76 |         batch_size=len(val_dataset) if config.val_batch_size == -1 else config.val_batch_size,
77 |         shuffle=False,
78 |         num_workers=8,
79 |         collate_fn=collate_fn,
80 |         pin_memory=False,
81 |         drop_last=False,
82 |     )
83 | 
84 |     assert len(train_dataloader) >= 1
85 |     assert len(val_dataloader) >= 1
86 |     print(f"Size of train dataloader: {len(train_dataloader)}")
87 |     print(f"Size of val dataloader: {len(val_dataloader)}")
88 |     return train_dataloader, val_dataloader
89 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import json
 16 | 
 17 | import ray
 18 | # from ray import debug
 19 | from omegaconf import OmegaConf
 20 | 
 21 | from ..single_controller.ray import RayWorkerGroup
 22 | from ..utils.tokenizer import get_processor, get_tokenizer
 23 | from ..workers.fsdp_workers import FSDPWorker
 24 | from ..workers.reward import BatchFunctionRewardManager, SequentialFunctionRewardManager
 25 | from .config import PPOConfig
 26 | from .data_loader import create_dataloader
 27 | from .ray_trainer import RayPPOTrainer, ResourcePoolManager, Role
 28 | 
 29 | # import debugpy
 30 | # debugpy.listen(("localhost", 5678))
 31 | # print("Waiting for debugger attach, press 'F5' to continue")
 32 | # debugpy.wait_for_client()
 33 | 
 34 | # please make sure main_task is not scheduled on head
 35 | @ray.remote(num_cpus=1)
 36 | class Runner:
 37 |     """A runner for RL training."""
 38 | 
 39 |     def run(self, config: PPOConfig):
 40 |         # print config
 41 |         # breakpoint()
 42 |         print(json.dumps(config.to_dict(), indent=2))
 43 | 
 44 |         # instantiate tokenizer
 45 |         tokenizer = get_tokenizer(
 46 |             config.worker.actor.model.model_path,
 47 |             override_chat_template=config.data.override_chat_template,
 48 |             trust_remote_code=config.worker.actor.model.trust_remote_code,
 49 |             use_fast=True,
 50 |         )
 51 |         processor = get_processor(
 52 |             config.worker.actor.model.model_path,
 53 |             override_chat_template=config.data.override_chat_template,
 54 |             trust_remote_code=config.worker.actor.model.trust_remote_code,
 55 |             use_fast=True,
 56 |         )
 57 | 
 58 |         # define worker classes
 59 |         ray_worker_group_cls = RayWorkerGroup
 60 |         role_worker_mapping = {
 61 |             Role.ActorRollout: ray.remote(FSDPWorker),
 62 |             Role.Critic: ray.remote(FSDPWorker),
 63 |             Role.RefPolicy: ray.remote(FSDPWorker),
 64 |         }
 65 |         global_pool_id = "global_pool"
 66 |         resource_pool_spec = {
 67 |             global_pool_id: [config.trainer.n_gpus_per_node] * config.trainer.nnodes,
 68 |         }
 69 |         mapping = {
 70 |             Role.ActorRollout: global_pool_id,
 71 |             Role.Critic: global_pool_id,
 72 |             Role.RefPolicy: global_pool_id,
 73 |         }
 74 |         resource_pool_manager = ResourcePoolManager(resource_pool_spec=resource_pool_spec, mapping=mapping)
 75 | 
 76 |         if config.worker.reward.reward_type == "sequential":
 77 |             RewardManager = SequentialFunctionRewardManager
 78 |         elif config.worker.reward.reward_type == "batch":
 79 |             RewardManager = BatchFunctionRewardManager
 80 |         else:
 81 |             raise NotImplementedError(f"Unknown reward type {config.worker.reward.reward_type}.")
 82 | 
 83 |         RemoteRewardManager = ray.remote(RewardManager).options(num_cpus=config.worker.reward.num_cpus)
 84 |         reward_fn = RemoteRewardManager.remote(config.worker.reward, tokenizer)
 85 |         val_reward_fn = RemoteRewardManager.remote(config.worker.reward, tokenizer)
 86 | 
 87 |         train_dataloader, val_dataloader = create_dataloader(config.data, tokenizer, processor)
 88 | 
 89 |         trainer = RayPPOTrainer(
 90 |             config=config,
 91 |             tokenizer=tokenizer,
 92 |             processor=processor,
 93 |             train_dataloader=train_dataloader,
 94 |             val_dataloader=val_dataloader,
 95 |             role_worker_mapping=role_worker_mapping,
 96 |             resource_pool_manager=resource_pool_manager,
 97 |             ray_worker_group_cls=ray_worker_group_cls,
 98 |             reward_fn=reward_fn,
 99 |             val_reward_fn=val_reward_fn,
100 |         )
101 |         trainer.init_workers()
102 |         trainer.fit()
103 | 
104 | 
105 | def main():
106 |     cli_args = OmegaConf.from_cli()
107 |     default_config = OmegaConf.structured(PPOConfig())
108 | 
109 |     if hasattr(cli_args, "config"):
110 |         config_path = cli_args.pop("config", None)
111 |         file_config = OmegaConf.load(config_path)
112 |         default_config = OmegaConf.merge(default_config, file_config)
113 | 
114 |     ppo_config = OmegaConf.merge(default_config, cli_args)
115 |     ppo_config: PPOConfig = OmegaConf.to_object(ppo_config)
116 |     ppo_config.deep_post_init()
117 | 
118 |     if not ray.is_initialized():
119 |         runtime_env = {
120 |             "env_vars": {
121 |                 "TOKENIZERS_PARALLELISM": "true",
122 |                 "NCCL_DEBUG": "WARN",
123 |                 "VLLM_LOGGING_LEVEL": "WARN",
124 |                 "TORCH_NCCL_AVOID_RECORD_STREAMS": "1",
125 |                 "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:False",
126 |                 "PYTHONUNBUFFERED": "1",
127 |                 "RAY_DEBUG": "1",
128 |             }
129 |         }
130 |         ray.init(runtime_env=runtime_env,_temp_dir="/wangx_nas/JLY/raylog")
131 |         # debug.init()
132 | 
133 |     runner = Runner.remote()
134 |     ray.get(runner.run.remote(ppo_config))
135 | 
136 | 
137 | if __name__ == "__main__":
138 |     main()
139 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/trainer/metrics.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from typing import Any, Dict, List
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | 
 20 | from ..protocol import DataProto
 21 | 
 22 | 
 23 | def reduce_metrics(metrics: Dict[str, List[Any]]) -> Dict[str, Any]:
 24 |     return {key: np.mean(value) for key, value in metrics.items()}
 25 | 
 26 | 
 27 | def compute_data_metrics(batch: DataProto, use_critic: bool = False) -> Dict[str, Any]:
 28 |     sequence_score = batch.batch["token_level_scores"].sum(-1)
 29 |     sequence_reward = batch.batch["token_level_rewards"].sum(-1)
 30 | 
 31 |     advantages = batch.batch["advantages"]
 32 |     returns = batch.batch["returns"]
 33 | 
 34 |     max_response_length = batch.batch["responses"].size(-1)
 35 | 
 36 |     prompt_mask = batch.batch["attention_mask"][:, :-max_response_length].bool()
 37 |     response_mask = batch.batch["attention_mask"][:, -max_response_length:].bool()
 38 | 
 39 |     max_prompt_length = prompt_mask.size(-1)
 40 |     prompt_length = prompt_mask.sum(-1).float()
 41 |     response_length = response_mask.sum(-1).float()
 42 | 
 43 |     valid_adv = torch.masked_select(advantages, response_mask)
 44 |     valid_returns = torch.masked_select(returns, response_mask)
 45 | 
 46 |     if use_critic:
 47 |         values = batch.batch["values"]
 48 |         valid_values = torch.masked_select(values, response_mask)
 49 |         return_diff_var = torch.var(valid_returns - valid_values)
 50 |         return_var = torch.var(valid_returns)
 51 | 
 52 |     metrics = {
 53 |         # score
 54 |         "critic/score/mean": torch.mean(sequence_score).detach().item(),
 55 |         "critic/score/max": torch.max(sequence_score).detach().item(),
 56 |         "critic/score/min": torch.min(sequence_score).detach().item(),
 57 |         # reward
 58 |         "critic/rewards/mean": torch.mean(sequence_reward).detach().item(),
 59 |         "critic/rewards/max": torch.max(sequence_reward).detach().item(),
 60 |         "critic/rewards/min": torch.min(sequence_reward).detach().item(),
 61 |         # adv
 62 |         "critic/advantages/mean": torch.mean(valid_adv).detach().item(),
 63 |         "critic/advantages/max": torch.max(valid_adv).detach().item(),
 64 |         "critic/advantages/min": torch.min(valid_adv).detach().item(),
 65 |         # returns
 66 |         "critic/returns/mean": torch.mean(valid_returns).detach().item(),
 67 |         "critic/returns/max": torch.max(valid_returns).detach().item(),
 68 |         "critic/returns/min": torch.min(valid_returns).detach().item(),
 69 |         **(
 70 |             {
 71 |                 # values
 72 |                 "critic/values/mean": torch.mean(valid_values).detach().item(),
 73 |                 "critic/values/max": torch.max(valid_values).detach().item(),
 74 |                 "critic/values/min": torch.min(valid_values).detach().item(),
 75 |                 # vf explained var
 76 |                 "critic/vf_explained_var": (1.0 - return_diff_var / (return_var + 1e-5)).detach().item(),
 77 |             }
 78 |             if use_critic
 79 |             else {}
 80 |         ),
 81 |         # response length
 82 |         "response_length/mean": torch.mean(response_length).detach().item(),
 83 |         "response_length/max": torch.max(response_length).detach().item(),
 84 |         "response_length/min": torch.min(response_length).detach().item(),
 85 |         "response_length/clip_ratio": torch.mean(torch.eq(response_length, max_response_length).float())
 86 |         .detach()
 87 |         .item(),
 88 |         # prompt length
 89 |         "prompt_length/mean": torch.mean(prompt_length).detach().item(),
 90 |         "prompt_length/max": torch.max(prompt_length).detach().item(),
 91 |         "prompt_length/min": torch.min(prompt_length).detach().item(),
 92 |         "prompt_length/clip_ratio": torch.mean(torch.eq(prompt_length, max_prompt_length).float()).detach().item(),
 93 |     }
 94 |     return metrics
 95 | 
 96 | 
 97 | def compute_timing_metrics(batch: DataProto, timing_raw: Dict[str, float]) -> Dict[str, Any]:
 98 |     num_response_tokens = torch.sum(batch.batch["response_mask"]).item()
 99 |     num_overall_tokens = sum(batch.meta_info["global_token_num"])
100 |     num_tokens_of_section = {
101 |         **dict.fromkeys(["gen", "reward"], num_response_tokens),
102 |         **dict.fromkeys(["ref", "old", "values", "adv", "update_critic", "update_actor"], num_overall_tokens),
103 |     }
104 |     return {
105 |         **{f"timing_s/{name}": value for name, value in timing_raw.items()},
106 |         **{
107 |             f"timing_per_token_ms/{name}": timing_raw[name] * 1000 / num_tokens_of_section[name]
108 |             for name in set(num_tokens_of_section.keys()) & set(timing_raw.keys())
109 |         },
110 |     }
111 | 
112 | 
113 | def compute_throughout_metrics(batch: DataProto, timing_raw: Dict[str, float], num_gpus: int) -> Dict[str, Any]:
114 |     total_num_tokens = sum(batch.meta_info["global_token_num"])
115 |     time = timing_raw["step"]
116 |     return {
117 |         "perf/total_num_tokens": total_num_tokens,
118 |         "perf/time_per_step": time,
119 |         "perf/throughput": total_num_tokens / (time * num_gpus),
120 |     }
121 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/flops_counter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/flops_counter.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/model_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/model_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/py_functional.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/py_functional.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/tokenizer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/tokenizer.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/torch_functional.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/torch_functional.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/ulysses.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/__pycache__/ulysses.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .checkpoint_manager import CHECKPOINT_TRACKER, remove_obsolete_ckpt
16 | 
17 | 
18 | __all__ = ["CHECKPOINT_TRACKER", "remove_obsolete_ckpt"]
19 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/checkpoint/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/checkpoint/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/checkpoint/__pycache__/checkpoint_manager.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/checkpoint/__pycache__/checkpoint_manager.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/checkpoint/__pycache__/fsdp_checkpoint_manager.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/checkpoint/__pycache__/fsdp_checkpoint_manager.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/checkpoint/checkpoint_manager.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import os
 16 | import random
 17 | import re
 18 | import shutil
 19 | import tempfile
 20 | from abc import ABC, abstractmethod
 21 | from typing import Any, Dict, Optional, Union
 22 | 
 23 | import numpy as np
 24 | import torch
 25 | import torch.distributed as dist
 26 | from filelock import FileLock
 27 | from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
 28 | from transformers import PreTrainedTokenizer, ProcessorMixin
 29 | 
 30 | 
 31 | CHECKPOINT_TRACKER = "latest_global_step.txt"
 32 | 
 33 | 
 34 | class BaseCheckpointManager(ABC):
 35 |     """
 36 |     A checkpoint manager that saves and loads
 37 |     - model
 38 |     - optimizer
 39 |     - lr_scheduler
 40 |     - extra_states
 41 |     in a SPMD way.
 42 | 
 43 |     We save
 44 |     - sharded model states and optimizer states
 45 |     - full lr_scheduler states
 46 |     - huggingface tokenizer and config for ckpt merge
 47 |     """
 48 | 
 49 |     def __init__(
 50 |         self,
 51 |         model: FSDP,
 52 |         optimizer: torch.optim.Optimizer,
 53 |         lr_scheduler: torch.optim.lr_scheduler.LRScheduler,
 54 |         processing_class: Union[PreTrainedTokenizer, ProcessorMixin],
 55 |     ):
 56 |         self.model = model
 57 |         self.optimizer = optimizer
 58 |         self.lr_scheduler = lr_scheduler
 59 |         self.processing_class = processing_class
 60 | 
 61 |         assert isinstance(self.model, FSDP)
 62 |         self.rank = dist.get_rank()
 63 |         self.world_size = dist.get_world_size()
 64 | 
 65 |     @abstractmethod
 66 |     def load_checkpoint(self, *args, **kwargs):
 67 |         raise NotImplementedError
 68 | 
 69 |     @abstractmethod
 70 |     def save_checkpoint(self, *args, **kwargs):
 71 |         raise NotImplementedError
 72 | 
 73 |     @staticmethod
 74 |     def local_mkdir(path: str) -> str:
 75 |         if not os.path.isabs(path):
 76 |             working_dir = os.getcwd()
 77 |             path = os.path.join(working_dir, path)
 78 | 
 79 |         # Using hash value of path as lock file name to avoid long file name
 80 |         lock_filename = f"ckpt_{hash(path) & 0xFFFFFFFF:08x}.lock"
 81 |         lock_path = os.path.join(tempfile.gettempdir(), lock_filename)
 82 | 
 83 |         try:
 84 |             with FileLock(lock_path, timeout=60):
 85 |                 os.makedirs(path, exist_ok=True)
 86 |         except Exception as e:
 87 |             print(f"Warning: Failed to acquire lock for {path}: {e}")
 88 |             os.makedirs(path, exist_ok=True)  # even if the lock is not acquired, try to create the directory
 89 | 
 90 |         return path
 91 | 
 92 |     @staticmethod
 93 |     def get_rng_state() -> Dict[str, Any]:
 94 |         rng_state = {
 95 |             "cpu": torch.get_rng_state(),
 96 |             "cuda": torch.cuda.get_rng_state(),
 97 |             "numpy": np.random.get_state(),
 98 |             "random": random.getstate(),
 99 |         }
100 |         return rng_state
101 | 
102 |     @staticmethod
103 |     def load_rng_state(rng_state: Dict[str, Any]):
104 |         torch.set_rng_state(rng_state["cpu"])
105 |         torch.cuda.set_rng_state(rng_state["cuda"])
106 |         np.random.set_state(rng_state["numpy"])
107 |         random.setstate(rng_state["random"])
108 | 
109 | 
110 | def find_latest_ckpt_path(path: Optional[str] = None, directory_format: str = "global_step_{}") -> Optional[str]:
111 |     if path is None:
112 |         return None
113 | 
114 |     tracker_file = get_checkpoint_tracker_filename(path)
115 |     if not os.path.exists(tracker_file):
116 |         print("Checkpoint tracker file does not exist: %s", tracker_file)
117 |         return None
118 | 
119 |     with open(tracker_file, "rb") as f:
120 |         iteration = int(f.read().decode())
121 | 
122 |     ckpt_path = os.path.join(path, directory_format.format(iteration))
123 |     if not os.path.exists(ckpt_path):
124 |         print("Checkpoint does not exist: %s", ckpt_path)
125 |         return None
126 | 
127 |     print("Found checkpoint: %s", ckpt_path)
128 |     return ckpt_path
129 | 
130 | 
131 | def get_checkpoint_tracker_filename(root_path: str) -> str:
132 |     """
133 |     Tracker file rescords the latest chckpoint during training to restart from.
134 |     """
135 |     return os.path.join(root_path, CHECKPOINT_TRACKER)
136 | 
137 | 
138 | def remove_obsolete_ckpt(path: str, global_step: int, save_limit: int = -1, directory_format: str = "global_step_{}"):
139 |     """
140 |     Remove the obsolete checkpoints that exceed the save_limit.
141 |     """
142 |     if save_limit <= 0:
143 |         return
144 | 
145 |     if not os.path.exists(path):
146 |         return
147 | 
148 |     pattern = re.escape(directory_format).replace(r"\{\}", r"(\d+)")
149 |     ckpt_folders = []
150 |     for folder in os.listdir(path):
151 |         if match := re.match(pattern, folder):
152 |             step = int(match.group(1))
153 |             if step < global_step:
154 |                 ckpt_folders.append((step, folder))
155 | 
156 |     ckpt_folders.sort(reverse=True)
157 |     for _, folder in ckpt_folders[save_limit - 1 :]:
158 |         folder_path = os.path.join(path, folder)
159 |         shutil.rmtree(folder_path, ignore_errors=True)
160 |         print(f"Removed obsolete checkpoint: {folder_path}")
161 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/checkpoint/fsdp_checkpoint_manager.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import os
 16 | from typing import Optional, Union
 17 | 
 18 | import torch
 19 | import torch.distributed as dist
 20 | from torch.distributed.checkpoint.state_dict import StateDictOptions, get_state_dict, set_state_dict
 21 | from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
 22 | from transformers import PreTrainedModel, PreTrainedTokenizer, ProcessorMixin
 23 | 
 24 | from .checkpoint_manager import BaseCheckpointManager
 25 | 
 26 | 
 27 | class FSDPCheckpointManager(BaseCheckpointManager):
 28 |     """
 29 |     A checkpoint manager that saves and loads
 30 |     - model
 31 |     - optimizer
 32 |     - lr_scheduler
 33 |     - extra_states
 34 |     in a SPMD way.
 35 | 
 36 |     We save
 37 |     - sharded model states and optimizer states
 38 |     - full lr_scheduler states
 39 |     - huggingface tokenizer and config for ckpt merge
 40 |     """
 41 | 
 42 |     def __init__(
 43 |         self,
 44 |         model: FSDP,
 45 |         optimizer: torch.optim.Optimizer,
 46 |         lr_scheduler: torch.optim.lr_scheduler.LRScheduler,
 47 |         processing_class: Union[PreTrainedTokenizer, ProcessorMixin],
 48 |     ):
 49 |         super().__init__(model, optimizer, lr_scheduler, processing_class)
 50 | 
 51 |     def load_checkpoint(self, path: Optional[str] = None):
 52 |         if path is None:
 53 |             return
 54 | 
 55 |         # every rank download its own checkpoint
 56 |         model_path = os.path.join(path, f"model_world_size_{self.world_size}_rank_{self.rank}.pt")
 57 |         optim_path = os.path.join(path, f"optim_world_size_{self.world_size}_rank_{self.rank}.pt")
 58 |         extra_path = os.path.join(path, f"extra_state_world_size_{self.world_size}_rank_{self.rank}.pt")
 59 |         print(f"[rank-{self.rank}]: Loading model from {os.path.abspath(model_path)}.")
 60 |         print(f"[rank-{self.rank}]: Loading optimizer from {os.path.abspath(optim_path)}.")
 61 |         print(f"[rank-{self.rank}]: Loading extra_state from {os.path.abspath(extra_path)}.")
 62 |         model_state_dict = torch.load(model_path, weights_only=False)
 63 |         optim_state_dict = torch.load(optim_path, weights_only=False)
 64 |         extra_state_dict = torch.load(extra_path, weights_only=False)
 65 | 
 66 |         state_dict_options = StateDictOptions(cpu_offload=True)
 67 |         set_state_dict(
 68 |             model=self.model,
 69 |             optimizers=self.optimizer,
 70 |             model_state_dict=model_state_dict,
 71 |             optim_state_dict=optim_state_dict,
 72 |             options=state_dict_options,
 73 |         )
 74 |         self.lr_scheduler.load_state_dict(extra_state_dict["lr_scheduler"])
 75 | 
 76 |         # recover random state
 77 |         if "rng" in extra_state_dict:
 78 |             self.load_rng_state(extra_state_dict["rng"])
 79 | 
 80 |     def save_checkpoint(self, path: str):
 81 |         path = self.local_mkdir(path)
 82 |         dist.barrier()
 83 | 
 84 |         # every rank will save its own model and optim shard
 85 |         state_dict_options = StateDictOptions(cpu_offload=True)
 86 |         model_state_dict, optim_state_dict = get_state_dict(self.model, self.optimizer, options=state_dict_options)
 87 |         extra_state_dict = {
 88 |             "lr_scheduler": self.lr_scheduler.state_dict(),
 89 |             "rng": self.get_rng_state(),
 90 |         }
 91 |         model_path = os.path.join(path, f"model_world_size_{self.world_size}_rank_{self.rank}.pt")
 92 |         optim_path = os.path.join(path, f"optim_world_size_{self.world_size}_rank_{self.rank}.pt")
 93 |         extra_path = os.path.join(path, f"extra_state_world_size_{self.world_size}_rank_{self.rank}.pt")
 94 | 
 95 |         print(f"[rank-{self.rank}]: Saving model to {os.path.abspath(model_path)}.")
 96 |         print(f"[rank-{self.rank}]: Saving optimizer to {os.path.abspath(optim_path)}.")
 97 |         print(f"[rank-{self.rank}]: Saving extra_state to {os.path.abspath(extra_path)}.")
 98 |         torch.save(model_state_dict, model_path)
 99 |         torch.save(optim_state_dict, optim_path)
100 |         torch.save(extra_state_dict, extra_path)
101 | 
102 |         # wait for everyone to dump to local
103 |         dist.barrier()
104 | 
105 |         if self.rank == 0:
106 |             hf_path = os.path.join(path, "huggingface")
107 |             os.makedirs(hf_path, exist_ok=True)
108 |             assert isinstance(self.model._fsdp_wrapped_module, PreTrainedModel)
109 |             self.model._fsdp_wrapped_module.config.save_pretrained(hf_path)
110 |             self.model._fsdp_wrapped_module.generation_config.save_pretrained(hf_path)
111 |             self.processing_class.save_pretrained(hf_path)
112 | 
113 |         dist.barrier()
114 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/flops_counter.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from typing import TYPE_CHECKING, List, Tuple
 16 | 
 17 | import torch
 18 | 
 19 | 
 20 | if TYPE_CHECKING:
 21 |     from transformers.models.llama.configuration_llama import LlamaConfig
 22 | 
 23 | 
 24 | VALID_MODLE_TYPE = {"llama", "qwen2", "qwen2_vl", "qwen2_5_vl", "qwen3"}
 25 | 
 26 | 
 27 | def get_device_flops(unit: str = "T") -> float:
 28 |     def unit_convert(number: float, level: str):
 29 |         units = ["B", "K", "M", "G", "T", "P"]
 30 |         if number <= 0:
 31 |             return number
 32 | 
 33 |         ptr = 0
 34 |         while ptr < len(units) and units[ptr] != level:
 35 |             number /= 1000
 36 |             ptr += 1
 37 | 
 38 |         return number
 39 | 
 40 |     device_name = torch.cuda.get_device_name()
 41 |     flops = float("inf")  # INF flops for unkown gpu type
 42 |     if "H100" in device_name or "H800" in device_name:
 43 |         flops = 989e12
 44 |     elif "A100" in device_name or "A800" in device_name:
 45 |         flops = 312e12
 46 |     elif "L40" in device_name:
 47 |         flops = 181.05e12
 48 |     elif "L20" in device_name:
 49 |         flops = 119.5e12
 50 |     elif "H20" in device_name:
 51 |         flops = 148e12
 52 |     elif "910B" in device_name:
 53 |         flops = 354e12
 54 |     flops_unit = unit_convert(flops, unit)
 55 |     return flops_unit
 56 | 
 57 | 
 58 | class FlopsCounter:
 59 |     """
 60 |     Used to count mfu during training loop
 61 | 
 62 |     Example:
 63 |         flops_counter = FlopsCounter(config)
 64 |         flops_achieved, flops_promised = flops_counter.estimate_flops(tokens_list, delta_time)
 65 |     """
 66 | 
 67 |     def __init__(self, config: "LlamaConfig"):
 68 |         if config.model_type not in VALID_MODLE_TYPE:
 69 |             print(f"Only support {VALID_MODLE_TYPE}, but got {config.model_type}. MFU will always be zero.")
 70 | 
 71 |         self.estimate_func = {
 72 |             "llama": self._estimate_llama_flops,
 73 |             "qwen2": self._estimate_llama_flops,
 74 |             "qwen2_vl": self._estimate_llama_flops,
 75 |             "qwen2_5_vl": self._estimate_llama_flops,
 76 |         }
 77 |         self.config = config
 78 | 
 79 |     def _estimate_unknown_flops(self, tokens_sum: int, batch_seqlens: List[int], delta_time: float) -> float:
 80 |         return 0
 81 | 
 82 |     def _estimate_llama_flops(self, tokens_sum: int, batch_seqlens: List[int], delta_time: float) -> float:
 83 |         hidden_size = self.config.hidden_size
 84 |         vocab_size = self.config.vocab_size
 85 |         num_hidden_layers = self.config.num_hidden_layers
 86 |         num_key_value_heads = self.config.num_key_value_heads
 87 |         num_attention_heads = self.config.num_attention_heads
 88 |         intermediate_size = self.config.intermediate_size
 89 | 
 90 |         head_dim = hidden_size // num_attention_heads
 91 |         q_size = num_attention_heads * head_dim
 92 |         k_size = num_key_value_heads * head_dim
 93 |         v_size = num_key_value_heads * head_dim
 94 | 
 95 |         # non-attn per layer parm
 96 |         # Qwen2/LLama use SwiGelu, gate, having up and down linear layer in mlp
 97 |         mlp_N = hidden_size * intermediate_size * 3
 98 |         attn_linear_N = hidden_size * (q_size + k_size + v_size + num_attention_heads * head_dim)
 99 |         emd_and_lm_head_N = vocab_size * hidden_size * 2
100 |         # non-attn all_layer parm
101 |         dense_N = (mlp_N + attn_linear_N) * num_hidden_layers + emd_and_lm_head_N
102 |         # non-attn all_layer & all_token fwd & bwd flops
103 |         dense_N_flops = 6 * dense_N * tokens_sum
104 | 
105 |         # attn all_layer & all_token fwd & bwd flops
106 |         seqlen_square_sum = 0
107 |         for seqlen in batch_seqlens:
108 |             seqlen_square_sum += seqlen * seqlen
109 | 
110 |         attn_qkv_flops = 12 * seqlen_square_sum * head_dim * num_attention_heads * num_hidden_layers
111 | 
112 |         # all_layer & all_token fwd & bwd flops
113 |         flops_all_token = dense_N_flops + attn_qkv_flops
114 |         flops_achieved = flops_all_token * (1.0 / delta_time) / 1e12
115 |         return flops_achieved
116 | 
117 |     def estimate_flops(self, batch_seqlens: List[int], delta_time: float) -> Tuple[float, float]:
118 |         """
119 |         Estimate the FLOPS based on the number of valid tokens in the current batch and the time taken.
120 | 
121 |         Args:
122 |             batch_seqlens (List[int]): A list where each element represents the number of valid tokens in the current batch.
123 |             delta_time (float): The time taken to process the batch, in seconds.
124 | 
125 |         Returns:
126 |             estimated_flops (float): The estimated FLOPS based on the input tokens and time.
127 |             promised_flops (float): The expected FLOPS of the current device.
128 |         """
129 |         tokens_sum = sum(batch_seqlens)
130 |         func = self.estimate_func.get(self.config.model_type, self._estimate_unknown_flops)
131 |         estimated_flops = func(tokens_sum, batch_seqlens, delta_time)
132 |         promised_flops = get_device_flops()
133 |         return estimated_flops, promised_flops
134 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/fsdp_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import gc
 16 | from collections import defaultdict
 17 | from functools import partial
 18 | from typing import Callable, Union
 19 | 
 20 | import torch
 21 | from torch import nn
 22 | from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
 23 | from torch.distributed.fsdp._runtime_utils import _lazy_init
 24 | from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy
 25 | from torch.optim import Optimizer
 26 | from transformers import PreTrainedModel
 27 | from transformers.trainer_pt_utils import get_module_class_from_name
 28 | 
 29 | 
 30 | def get_init_fn(model: nn.Module, device: Union[str, torch.device]) -> Callable[[nn.Module], None]:
 31 |     param_occurrence = defaultdict(int)
 32 |     for _, param in model.named_parameters(remove_duplicate=False):
 33 |         param_occurrence[param] += 1
 34 | 
 35 |     duplicated_params = {param for param in param_occurrence.keys() if param_occurrence[param] > 1}
 36 |     materialized_params = {}
 37 | 
 38 |     def init_fn(module: nn.Module):
 39 |         for name, param in module.named_parameters(recurse=False):
 40 |             if param in duplicated_params:
 41 |                 module._parameters[name] = materialized_params.setdefault(
 42 |                     param, nn.Parameter(torch.empty_like(param.data, device=device), requires_grad=param.requires_grad)
 43 |                 )
 44 |             else:
 45 |                 module._parameters[name] = nn.Parameter(
 46 |                     torch.empty_like(param.data, device=device), requires_grad=param.requires_grad
 47 |                 )
 48 | 
 49 |     return init_fn
 50 | 
 51 | 
 52 | def get_fsdp_wrap_policy(model: PreTrainedModel):
 53 |     """Get FSDP wrap policy for the model.
 54 | 
 55 |     Args:
 56 |         module: The module to get wrap policy for
 57 |     """
 58 |     transformer_cls_to_wrap = set()
 59 |     for module in model._no_split_modules:
 60 |         transformer_cls = get_module_class_from_name(model, module)
 61 |         if transformer_cls is None:
 62 |             raise Exception(f"Cannot find {module} in pretrained model.")
 63 |         else:
 64 |             transformer_cls_to_wrap.add(transformer_cls)
 65 | 
 66 |     return partial(transformer_auto_wrap_policy, transformer_layer_cls=transformer_cls_to_wrap)
 67 | 
 68 | 
 69 | @torch.no_grad()
 70 | def offload_fsdp_model(model: FSDP, empty_cache: bool = True):
 71 |     # lazy init FSDP model
 72 |     _lazy_init(model, model)
 73 |     assert model._is_root, "Only support root model offloading to CPU"
 74 |     for handle in model._all_handles:
 75 |         if handle._offload_params:
 76 |             continue
 77 | 
 78 |         flat_param = handle.flat_param
 79 |         assert (
 80 |             flat_param.data.data_ptr() == flat_param._local_shard.data_ptr()
 81 |             and id(flat_param.data) != id(flat_param._local_shard)
 82 |             and flat_param.data.size() == flat_param._local_shard.size()
 83 |         )
 84 |         handle.flat_param_to("cpu", non_blocking=True)
 85 |         # the following still keeps id(._local_shard) != id(.data)
 86 |         flat_param._local_shard = flat_param.data
 87 |         assert id(flat_param._local_shard) != id(flat_param.data)
 88 | 
 89 |     if empty_cache:
 90 |         torch.cuda.empty_cache()
 91 | 
 92 | 
 93 | @torch.no_grad()
 94 | def load_fsdp_model(model: FSDP, empty_cache: bool = True):
 95 |     # lazy init FSDP model
 96 |     _lazy_init(model, model)
 97 |     assert model._is_root, "Only support root model loading to GPU"
 98 |     for handle in model._all_handles:
 99 |         if handle._offload_params:
100 |             continue
101 | 
102 |         flat_param = handle.flat_param
103 |         handle.flat_param_to("cuda", non_blocking=True)
104 |         # the following still keeps id(._local_shard) != id(.data)
105 |         flat_param._local_shard = flat_param.data
106 | 
107 |     if empty_cache:
108 |         gc.collect()
109 | 
110 | 
111 | @torch.no_grad()
112 | def offload_fsdp_optimizer(optimizer: Optimizer, empty_cache: bool = True):
113 |     if not optimizer.state:
114 |         return
115 | 
116 |     for param_group in optimizer.param_groups:
117 |         for param in param_group["params"]:
118 |             state = optimizer.state[param]
119 |             for key, value in state.items():
120 |                 if isinstance(value, torch.Tensor):
121 |                     state[key] = value.to("cpu", non_blocking=True)
122 | 
123 |     if empty_cache:
124 |         torch.cuda.empty_cache()
125 | 
126 | 
127 | @torch.no_grad()
128 | def load_fsdp_optimizer(optimizer: Optimizer, empty_cache: bool = True):
129 |     if not optimizer.state:
130 |         return
131 | 
132 |     for param_group in optimizer.param_groups:
133 |         for param in param_group["params"]:
134 |             state = optimizer.state[param]
135 |             for key, value in state.items():
136 |                 if isinstance(value, torch.Tensor):
137 |                     state[key] = value.to("cuda", non_blocking=True)
138 | 
139 |     if empty_cache:
140 |         gc.collect()
141 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from .logger import Tracker
17 | 
18 | 
19 | __all__ = ["Tracker"]
20 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/logger/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/logger/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/logger/__pycache__/gen_logger.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/logger/__pycache__/gen_logger.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/logger/__pycache__/logger.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/utils/logger/__pycache__/logger.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/logger/gen_logger.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | from abc import ABC, abstractmethod
 17 | from dataclasses import dataclass
 18 | from typing import List, Tuple
 19 | 
 20 | from ..py_functional import is_package_available
 21 | 
 22 | 
 23 | if is_package_available("wandb"):
 24 |     import wandb  # type: ignore
 25 | 
 26 | 
 27 | if is_package_available("swanlab"):
 28 |     import swanlab  # type: ignore
 29 | 
 30 | 
 31 | @dataclass
 32 | class GenerationLogger(ABC):
 33 |     @abstractmethod
 34 |     def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None: ...
 35 | 
 36 | 
 37 | @dataclass
 38 | class ConsoleGenerationLogger(GenerationLogger):
 39 |     def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
 40 |         for inp, out, lab, score in samples:
 41 |             print(f"[prompt] {inp}\n[output] {out}\n[ground_truth] {lab}\n[score] {score}\n")
 42 | 
 43 | 
 44 | @dataclass
 45 | class WandbGenerationLogger(GenerationLogger):
 46 |     def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
 47 |         # Create column names for all samples
 48 |         columns = ["step"] + sum(
 49 |             [[f"input_{i + 1}", f"output_{i + 1}", f"label_{i + 1}", f"score_{i + 1}"] for i in range(len(samples))],
 50 |             [],
 51 |         )
 52 | 
 53 |         if not hasattr(self, "validation_table"):
 54 |             # Initialize the table on first call
 55 |             self.validation_table = wandb.Table(columns=columns)
 56 | 
 57 |         # Create a new table with same columns and existing data
 58 |         # Workaround for https://github.com/wandb/wandb/issues/2981#issuecomment-1997445737
 59 |         new_table = wandb.Table(columns=columns, data=self.validation_table.data)
 60 | 
 61 |         # Add new row with all data
 62 |         row_data = [step]
 63 |         for sample in samples:
 64 |             row_data.extend(sample)
 65 | 
 66 |         new_table.add_data(*row_data)
 67 |         wandb.log({"val/generations": new_table}, step=step)
 68 |         self.validation_table = new_table
 69 | 
 70 | 
 71 | @dataclass
 72 | class SwanlabGenerationLogger(GenerationLogger):
 73 |     def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
 74 |         swanlab_text_list = []
 75 |         for i, sample in enumerate(samples):
 76 |             row_text = "\n\n---\n\n".join(
 77 |                 (f"input: {sample[0]}", f"output: {sample[1]}", f"label: {sample[2]}", f"score: {sample[3]}")
 78 |             )
 79 |             swanlab_text_list.append(swanlab.Text(row_text, caption=f"sample {i + 1}"))
 80 | 
 81 |         swanlab.log({"val/generations": swanlab_text_list}, step=step)
 82 | 
 83 | 
 84 | GEN_LOGGERS = {
 85 |     "console": ConsoleGenerationLogger,
 86 |     "wandb": WandbGenerationLogger,
 87 |     "swanlab": SwanlabGenerationLogger,
 88 | }
 89 | 
 90 | 
 91 | @dataclass
 92 | class AggregateGenerationsLogger:
 93 |     def __init__(self, loggers: List[str]):
 94 |         self.loggers: List[GenerationLogger] = []
 95 | 
 96 |         for logger in loggers:
 97 |             if logger in GEN_LOGGERS:
 98 |                 self.loggers.append(GEN_LOGGERS[logger]())
 99 | 
100 |     def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
101 |         for logger in self.loggers:
102 |             logger.log(samples, step)
103 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/logger/logger.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | A unified tracking interface that supports logging data to different backend
 16 | """
 17 | 
 18 | import os
 19 | from abc import ABC, abstractmethod
 20 | from typing import Any, Dict, List, Optional, Tuple, Union
 21 | 
 22 | from ..py_functional import convert_dict_to_str, flatten_dict, is_package_available, unflatten_dict
 23 | from .gen_logger import AggregateGenerationsLogger
 24 | 
 25 | 
 26 | if is_package_available("mlflow"):
 27 |     import mlflow  # type: ignore
 28 | 
 29 | 
 30 | if is_package_available("tensorboard"):
 31 |     from torch.utils.tensorboard import SummaryWriter
 32 | 
 33 | 
 34 | if is_package_available("wandb"):
 35 |     import wandb  # type: ignore
 36 | 
 37 | 
 38 | if is_package_available("swanlab"):
 39 |     import swanlab  # type: ignore
 40 | 
 41 | 
 42 | class Logger(ABC):
 43 |     @abstractmethod
 44 |     def __init__(self, config: Dict[str, Any]) -> None: ...
 45 | 
 46 |     @abstractmethod
 47 |     def log(self, data: Dict[str, Any], step: int) -> None: ...
 48 | 
 49 |     def finish(self) -> None:
 50 |         pass
 51 | 
 52 | 
 53 | class ConsoleLogger(Logger):
 54 |     def __init__(self, config: Dict[str, Any]) -> None:
 55 |         print("Config\n" + convert_dict_to_str(config))
 56 | 
 57 |     def log(self, data: Dict[str, Any], step: int) -> None:
 58 |         print(f"Step {step}\n" + convert_dict_to_str(unflatten_dict(data)))
 59 | 
 60 | 
 61 | class MlflowLogger(Logger):
 62 |     def __init__(self, config: Dict[str, Any]) -> None:
 63 |         mlflow.start_run(run_name=config["trainer"]["experiment_name"])
 64 |         mlflow.log_params(flatten_dict(config))
 65 | 
 66 |     def log(self, data: Dict[str, Any], step: int) -> None:
 67 |         mlflow.log_metrics(metrics=data, step=step)
 68 | 
 69 | 
 70 | class SwanlabLogger(Logger):
 71 |     def __init__(self, config: Dict[str, Any]) -> None:
 72 |         swanlab_key = os.getenv("SWANLAB_API_KEY")
 73 |         swanlab_dir = os.getenv("SWANLAB_DIR", "swanlab_log")
 74 |         swanlab_mode = os.getenv("SWANLAB_MODE", "cloud")
 75 |         if swanlab_key:
 76 |             swanlab.login(swanlab_key)
 77 | 
 78 |         swanlab.init(
 79 |             project=config["trainer"]["project_name"],
 80 |             experiment_name=config["trainer"]["experiment_name"],
 81 |             config={"UPPERFRAMEWORK": "EasyR1", "FRAMEWORK": "veRL", **config},
 82 |             logdir=swanlab_dir,
 83 |             mode=swanlab_mode,
 84 |         )
 85 | 
 86 |     def log(self, data: Dict[str, Any], step: int) -> None:
 87 |         swanlab.log(data=data, step=step)
 88 | 
 89 |     def finish(self) -> None:
 90 |         swanlab.finish()
 91 | 
 92 | 
 93 | class TensorBoardLogger(Logger):
 94 |     def __init__(self, config: Dict[str, Any]) -> None:
 95 |         tensorboard_dir = os.getenv("TENSORBOARD_DIR", "tensorboard_log")
 96 |         os.makedirs(tensorboard_dir, exist_ok=True)
 97 |         print(f"Saving tensorboard log to {tensorboard_dir}.")
 98 |         self.writer = SummaryWriter(tensorboard_dir)
 99 |         # self.writer.add_hparams(hparam_dict=flatten_dict(config), metric_dict={"placeholder": 0})
100 | 
101 |     def log(self, data: Dict[str, Any], step: int) -> None:
102 |         for key, value in data.items():
103 |             self.writer.add_scalar(key, value, step)
104 | 
105 |     def finish(self):
106 |         self.writer.close()
107 | 
108 | 
109 | class WandbLogger(Logger):
110 |     def __init__(self, config: Dict[str, Any]) -> None:
111 |         wandb.init(
112 |             project=config["trainer"]["project_name"],
113 |             name=config["trainer"]["experiment_name"],
114 |             config=config,
115 |         )
116 | 
117 |     def log(self, data: Dict[str, Any], step: int) -> None:
118 |         wandb.log(data=data, step=step)
119 | 
120 |     def finish(self) -> None:
121 |         wandb.finish()
122 | 
123 | 
124 | LOGGERS = {
125 |     "console": ConsoleLogger,
126 |     "mlflow": MlflowLogger,
127 |     "swanlab": SwanlabLogger,
128 |     "tensorboard": TensorBoardLogger,
129 |     "wandb": WandbLogger,
130 | }
131 | 
132 | 
133 | class Tracker:
134 |     def __init__(self, loggers: Union[str, List[str]] = "console", config: Optional[Dict[str, Any]] = None):
135 |         if isinstance(loggers, str):
136 |             loggers = [loggers]
137 | 
138 |         self.loggers: List[Logger] = []
139 |         for logger in loggers:
140 |             if logger not in LOGGERS:
141 |                 raise ValueError(f"{logger} is not supported.")
142 | 
143 |             self.loggers.append(LOGGERS[logger](config))
144 | 
145 |         self.gen_logger = AggregateGenerationsLogger(loggers)
146 | 
147 |     def log(self, data: Dict[str, Any], step: int) -> None:
148 |         for logger in self.loggers:
149 |             logger.log(data=data, step=step)
150 | 
151 |     def log_generation(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
152 |         self.gen_logger.log(samples, step)
153 | 
154 |     def __del__(self):
155 |         for logger in self.loggers:
156 |             logger.finish()
157 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/model_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Utilities to create common models
16 | """
17 | 
18 | from functools import lru_cache
19 | from typing import Optional, Tuple
20 | 
21 | import torch
22 | import torch.distributed as dist
23 | from torch import nn
24 | 
25 | 
26 | @lru_cache
27 | def is_rank0() -> int:
28 |     return (not dist.is_initialized()) or (dist.get_rank() == 0)
29 | 
30 | 
31 | def print_gpu_memory_usage(prefix: str = "GPU memory usage") -> None:
32 |     """Report the current GPU VRAM usage."""
33 |     if is_rank0():
34 |         free_mem, total_mem = torch.cuda.mem_get_info()
35 |         print(f"{prefix}: {(total_mem - free_mem) / (1024**3):.2f} GB / {total_mem / (1024**3):.2f} GB.")
36 | 
37 | 
38 | def _get_model_size(model: nn.Module, scale: str = "auto") -> Tuple[float, str]:
39 |     """Compute the model size."""
40 |     n_params = sum(p.numel() for p in model.parameters())
41 | 
42 |     if scale == "auto":
43 |         if n_params > 1e9:
44 |             scale = "B"
45 |         elif n_params > 1e6:
46 |             scale = "M"
47 |         elif n_params > 1e3:
48 |             scale = "K"
49 |         else:
50 |             scale = ""
51 | 
52 |     if scale == "B":
53 |         n_params = n_params / 1e9
54 |     elif scale == "M":
55 |         n_params = n_params / 1e6
56 |     elif scale == "K":
57 |         n_params = n_params / 1e3
58 |     elif scale == "":
59 |         pass
60 |     else:
61 |         raise NotImplementedError(f"Unknown scale {scale}.")
62 | 
63 |     return n_params, scale
64 | 
65 | 
66 | def print_model_size(model: nn.Module, name: Optional[str] = None) -> None:
67 |     """Print the model size."""
68 |     if is_rank0():
69 |         n_params, scale = _get_model_size(model, scale="auto")
70 |         if name is None:
71 |             name = model.__class__.__name__
72 | 
73 |         print(f"{name} contains {n_params:.2f}{scale} parameters.")
74 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/py_functional.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | Contain small python utility functions
 16 | """
 17 | 
 18 | import importlib.util
 19 | import re
 20 | from contextlib import contextmanager
 21 | from functools import lru_cache
 22 | from typing import Any, Dict, List, Union
 23 | 
 24 | import numpy as np
 25 | import yaml
 26 | from codetiming import Timer
 27 | from yaml import Dumper
 28 | 
 29 | 
 30 | def is_sci_notation(number: float) -> bool:
 31 |     pattern = re.compile(r"^[+-]?\d+(\.\d*)?[eE][+-]?\d+$")
 32 |     return bool(pattern.match(str(number)))
 33 | 
 34 | 
 35 | def float_representer(dumper: Dumper, number: Union[float, np.float32, np.float64]):
 36 |     if is_sci_notation(number):
 37 |         value = str(number)
 38 |         if "." not in value and "e" in value:
 39 |             value = value.replace("e", ".0e", 1)
 40 |     else:
 41 |         value = str(round(number, 3))
 42 | 
 43 |     return dumper.represent_scalar("tag:yaml.org,2002:float", value)
 44 | 
 45 | 
 46 | yaml.add_representer(float, float_representer)
 47 | yaml.add_representer(np.float32, float_representer)
 48 | yaml.add_representer(np.float64, float_representer)
 49 | 
 50 | 
 51 | @lru_cache
 52 | def is_package_available(name: str) -> bool:
 53 |     return importlib.util.find_spec(name) is not None
 54 | 
 55 | 
 56 | def union_two_dict(dict1: Dict[str, Any], dict2: Dict[str, Any]) -> Dict[str, Any]:
 57 |     """Union two dict. Will throw an error if there is an item not the same object with the same key."""
 58 |     for key in dict2.keys():
 59 |         if key in dict1:
 60 |             assert dict1[key] == dict2[key], f"{key} in dict1 and dict2 are not the same object"
 61 | 
 62 |         dict1[key] = dict2[key]
 63 | 
 64 |     return dict1
 65 | 
 66 | 
 67 | def append_to_dict(data: Dict[str, List[Any]], new_data: Dict[str, Any]) -> None:
 68 |     """Append dict to a dict of list."""
 69 |     for key, val in new_data.items():
 70 |         if key not in data:
 71 |             data[key] = []
 72 | 
 73 |         data[key].append(val)
 74 | 
 75 | 
 76 | def unflatten_dict(data: Dict[str, Any], sep: str = "/") -> Dict[str, Any]:
 77 |     unflattened = {}
 78 |     for key, value in data.items():
 79 |         pieces = key.split(sep)
 80 |         pointer = unflattened
 81 |         for piece in pieces[:-1]:
 82 |             if piece not in pointer:
 83 |                 pointer[piece] = {}
 84 | 
 85 |             pointer = pointer[piece]
 86 | 
 87 |         pointer[pieces[-1]] = value
 88 | 
 89 |     return unflattened
 90 | 
 91 | 
 92 | def flatten_dict(data: Dict[str, Any], parent_key: str = "", sep: str = "/") -> Dict[str, Any]:
 93 |     flattened = {}
 94 |     for key, value in data.items():
 95 |         new_key = parent_key + sep + key if parent_key else key
 96 |         if isinstance(value, dict):
 97 |             flattened.update(flatten_dict(value, new_key, sep=sep))
 98 |         else:
 99 |             flattened[new_key] = value
100 | 
101 |     return flattened
102 | 
103 | 
104 | def convert_dict_to_str(data: Dict[str, Any]) -> str:
105 |     return yaml.dump(data, indent=2)
106 | 
107 | 
108 | @contextmanager
109 | def timer(name: str, timing_raw: Dict[str, float]):
110 |     with Timer(name=name, logger=None) as timer:
111 |         yield
112 | 
113 |     timing_raw[name] = timer.last
114 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/tokenizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Utils for tokenization."""
15 | 
16 | from typing import Optional
17 | 
18 | from transformers import AutoProcessor, AutoTokenizer, PreTrainedTokenizer, ProcessorMixin
19 | 
20 | 
21 | def get_tokenizer(model_path: str, override_chat_template: Optional[str] = None, **kwargs) -> PreTrainedTokenizer:
22 |     """Create a huggingface pretrained tokenizer."""
23 |     tokenizer = AutoTokenizer.from_pretrained(model_path, **kwargs)
24 |     if override_chat_template is not None:
25 |         tokenizer.chat_template = override_chat_template
26 | 
27 |     if tokenizer.bos_token == "<bos>" and tokenizer.eos_token == "<eos>":
28 |         # the EOS token in gemma2 & gemma3 is ambiguious, which may worsen RL performance.
29 |         # https://huggingface.co/google/gemma-2-2b-it/commit/17a01657f5c87135bcdd0ec7abb4b2dece04408a
30 |         print("Found gemma model. Set eos_token and eos_token_id to <end_of_turn> and 107.")
31 |         tokenizer.eos_token = "<end_of_turn>"
32 | 
33 |     if tokenizer.pad_token_id is None:
34 |         print("Pad token is None. Set it to eos_token.")
35 |         tokenizer.pad_token = tokenizer.eos_token
36 | 
37 |     return tokenizer
38 | 
39 | 
40 | def get_processor(model_path: str, override_chat_template: Optional[str] = None, **kwargs) -> Optional[ProcessorMixin]:
41 |     """Create a huggingface pretrained processor."""
42 |     processor = AutoProcessor.from_pretrained(model_path, **kwargs)
43 |     if override_chat_template is not None:
44 |         processor.chat_template = override_chat_template
45 | 
46 |     # Avoid load tokenizer, see:
47 |     # https://github.com/huggingface/transformers/blob/v4.49.0/src/transformers/models/auto/processing_auto.py#L344
48 |     if processor is not None and "Processor" not in processor.__class__.__name__:
49 |         processor = None
50 | 
51 |     return processor
52 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/utils/torch_dtypes.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | 
18 | HALF_LIST = ["fp16", "float16"]
19 | FLOAT_LIST = ["fp32", "float32"]
20 | BFLOAT_LIST = ["bf16", "bfloat16"]
21 | 
22 | 
23 | class PrecisionType:
24 |     """Type of precision used."""
25 | 
26 |     @staticmethod
27 |     def is_fp16(precision: str) -> bool:
28 |         return precision in HALF_LIST
29 | 
30 |     @staticmethod
31 |     def is_fp32(precision: str) -> bool:
32 |         return precision in FLOAT_LIST
33 | 
34 |     @staticmethod
35 |     def is_bf16(precision: str) -> bool:
36 |         return precision in BFLOAT_LIST
37 | 
38 |     @staticmethod
39 |     def to_dtype(precision: str) -> torch.dtype:
40 |         if precision in HALF_LIST:
41 |             return torch.float16
42 |         elif precision in FLOAT_LIST:
43 |             return torch.float32
44 |         elif precision in BFLOAT_LIST:
45 |             return torch.bfloat16
46 |         else:
47 |             raise RuntimeError(f"Unexpected precision: {precision}")
48 | 
49 |     @staticmethod
50 |     def to_str(precision: torch.dtype) -> str:
51 |         if precision == torch.float16:
52 |             return "float16"
53 |         elif precision == torch.float32:
54 |             return "float32"
55 |         elif precision == torch.bfloat16:
56 |             return "bfloat16"
57 |         else:
58 |             raise RuntimeError(f"Unexpected precision: {precision}")
59 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .config import ActorConfig, FSDPConfig, ModelConfig, OptimConfig, RefConfig
16 | 
17 | 
18 | __all__ = [
19 |     "ActorConfig",
20 |     "FSDPConfig",
21 |     "ModelConfig",
22 |     "OptimConfig",
23 |     "RefConfig",
24 | ]
25 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/actor/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/actor/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/actor/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/actor/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/actor/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/actor/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/actor/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | The base class for Actor
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | from typing import Any, Dict
20 | 
21 | import torch
22 | 
23 | from ...protocol import DataProto
24 | from .config import ActorConfig
25 | 
26 | 
27 | __all__ = ["BasePPOActor"]
28 | 
29 | 
30 | class BasePPOActor(ABC):
31 |     def __init__(self, config: ActorConfig):
32 |         """The base class for PPO actor
33 | 
34 |         Args:
35 |             config (ActorConfig): a config passed to the PPOActor.
36 |         """
37 |         self.config = config
38 | 
39 |     @abstractmethod
40 |     def compute_log_prob(self, data: DataProto) -> torch.Tensor:
41 |         """Compute logits given a batch of data.
42 | 
43 |         Args:
44 |             data (DataProto): a batch of data represented by DataProto. It must contain key ```input_ids```,
45 |                 ```attention_mask``` and ```position_ids```.
46 | 
47 |         Returns:
48 |             DataProto: a DataProto containing the key ```log_probs```
49 |         """
50 |         pass
51 | 
52 |     @abstractmethod
53 |     def update_policy(self, data: DataProto) -> Dict[str, Any]:
54 |         """Update the policy with an iterator of DataProto
55 | 
56 |         Args:
57 |             data (DataProto): an iterator over the DataProto that returns by
58 |                 ```make_minibatch_iterator```
59 | 
60 |         Returns:
61 |             Dict: a dictionary contains anything. Typically, it contains the statistics during updating the model
62 |             such as ```loss```, ```grad_norm```, etc,.
63 |         """
64 |         pass
65 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/actor/config.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | Actor config
 16 | """
 17 | 
 18 | import os
 19 | from dataclasses import dataclass, field
 20 | from typing import Any, Dict, Optional, Tuple
 21 | 
 22 | 
 23 | @dataclass
 24 | class ModelConfig:
 25 |     model_path: Optional[str] = None
 26 |     tokenizer_path: Optional[str] = None
 27 |     override_config: Dict[str, Any] = field(default_factory=dict)
 28 |     enable_gradient_checkpointing: bool = True
 29 |     trust_remote_code: bool = True
 30 |     freeze_vision_tower: bool = False
 31 | 
 32 |     def post_init(self):
 33 |         if self.tokenizer_path is None:
 34 |             self.tokenizer_path = self.model_path
 35 | 
 36 |         if self.model_path is not None and os.path.exists(self.model_path):  # ray job uses absolute path
 37 |             self.model_path = os.path.abspath(self.model_path)
 38 | 
 39 |         if self.tokenizer_path is not None and os.path.exists(self.tokenizer_path):
 40 |             self.tokenizer_path = os.path.abspath(self.tokenizer_path)
 41 | 
 42 | 
 43 | @dataclass
 44 | class OptimConfig:
 45 |     lr: float = 1e-6
 46 |     betas: Tuple[float, float] = (0.9, 0.999)
 47 |     weight_decay: float = 1e-2
 48 |     strategy: str = "adamw"
 49 |     lr_warmup_ratio: float = 0.0
 50 |     min_lr_ratio: Optional[float] = None
 51 |     warmup_style: str = "constant"
 52 |     """auto keys"""
 53 |     training_steps: int = field(default=-1, init=False)
 54 | 
 55 | 
 56 | @dataclass
 57 | class FSDPConfig:
 58 |     enable_full_shard: bool = True
 59 |     enable_cpu_offload: bool = False
 60 |     enable_rank0_init: bool = False
 61 |     use_orig_params: bool = False
 62 |     torch_dtype: Optional[str] = None
 63 |     fsdp_size: int = -1
 64 |     mp_param_dtype: str = "bf16"
 65 |     mp_reduce_dtype: str = "fp32"
 66 |     mp_buffer_dtype: str = "fp32"
 67 | 
 68 | 
 69 | @dataclass
 70 | class OffloadConfig:
 71 |     offload_params: bool = False
 72 |     offload_optimizer: bool = False
 73 | 
 74 | 
 75 | @dataclass
 76 | class ActorConfig:
 77 |     strategy: str = "fsdp"
 78 |     global_batch_size: int = 256
 79 |     micro_batch_size_per_device_for_update: int = 4
 80 |     micro_batch_size_per_device_for_experience: int = 16
 81 |     max_grad_norm: float = 1.0
 82 |     clip_ratio_low: float = 0.2
 83 |     clip_ratio_high: float = 0.3
 84 |     clip_ratio_dual: float = 3.0
 85 |     ppo_epochs: int = 1
 86 |     padding_free: bool = False
 87 |     ulysses_sequence_parallel_size: int = 1
 88 |     use_torch_compile: bool = True
 89 |     model: ModelConfig = field(default_factory=ModelConfig)
 90 |     optim: OptimConfig = field(default_factory=OptimConfig)
 91 |     fsdp: FSDPConfig = field(default_factory=FSDPConfig)
 92 |     offload: OffloadConfig = field(default_factory=OffloadConfig)
 93 |     """auto keys"""
 94 |     global_batch_size_per_device: int = field(default=-1, init=False)
 95 |     disable_kl: bool = field(default=False, init=False)
 96 |     use_kl_loss: bool = field(default=False, init=False)
 97 |     kl_penalty: str = field(default="kl", init=False)
 98 |     kl_coef: float = field(default=0.0, init=False)
 99 | 
100 | 
101 | @dataclass
102 | class RefConfig:
103 |     strategy: str = "fsdp"
104 |     fsdp: FSDPConfig = field(default_factory=FSDPConfig)
105 |     offload: OffloadConfig = field(default_factory=OffloadConfig)
106 |     """auto keys"""
107 |     micro_batch_size_per_device_for_experience: int = field(default=-1, init=False)
108 |     padding_free: bool = field(default=False, init=False)
109 |     ulysses_sequence_parallel_size: int = field(default=1, init=False)
110 |     use_torch_compile: bool = field(default=True, init=False)
111 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | ActorRolloutRef config
16 | """
17 | 
18 | from dataclasses import dataclass, field
19 | 
20 | from .actor import ActorConfig, FSDPConfig, ModelConfig, OptimConfig, RefConfig
21 | from .critic import CriticConfig
22 | from .reward import RewardConfig
23 | from .rollout import RolloutConfig
24 | 
25 | 
26 | __all__ = [
27 |     "ActorConfig",
28 |     "CriticConfig",
29 |     "FSDPConfig",
30 |     "ModelConfig",
31 |     "OptimConfig",
32 |     "RefConfig",
33 |     "RewardConfig",
34 |     "RolloutConfig",
35 |     "WorkerConfig",
36 | ]
37 | 
38 | 
39 | @dataclass
40 | class WorkerConfig:
41 |     hybrid_engine: bool = True
42 |     actor: ActorConfig = field(default_factory=ActorConfig)
43 |     critic: CriticConfig = field(default_factory=CriticConfig)
44 |     ref: RefConfig = field(default_factory=RefConfig)
45 |     reward: RewardConfig = field(default_factory=RewardConfig)
46 |     rollout: RolloutConfig = field(default_factory=RolloutConfig)
47 | 
48 |     def post_init(self):
49 |         self.ref.micro_batch_size_per_device_for_experience = self.actor.micro_batch_size_per_device_for_experience
50 |         self.ref.padding_free = self.actor.padding_free
51 |         self.ref.ulysses_sequence_parallel_size = self.actor.ulysses_sequence_parallel_size
52 |         self.ref.use_torch_compile = self.actor.use_torch_compile
53 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .config import CriticConfig
16 | 
17 | 
18 | __all__ = ["CriticConfig"]
19 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/critic/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/critic/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/critic/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/critic/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Base class for Critic
16 | """
17 | 
18 | from abc import ABC, abstractmethod
19 | from typing import Any, Dict
20 | 
21 | import torch
22 | 
23 | from ...protocol import DataProto
24 | from .config import CriticConfig
25 | 
26 | 
27 | __all__ = ["BasePPOCritic"]
28 | 
29 | 
30 | class BasePPOCritic(ABC):
31 |     def __init__(self, config: CriticConfig):
32 |         self.config = config
33 | 
34 |     @abstractmethod
35 |     def compute_values(self, data: DataProto) -> torch.Tensor:
36 |         """Compute values"""
37 |         pass
38 | 
39 |     @abstractmethod
40 |     def update_critic(self, data: DataProto) -> Dict[str, Any]:
41 |         """Update the critic"""
42 |         pass
43 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/critic/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Critic config
16 | """
17 | 
18 | from dataclasses import dataclass, field
19 | 
20 | from ..actor.config import FSDPConfig, ModelConfig, OffloadConfig, OptimConfig
21 | 
22 | 
23 | @dataclass
24 | class CriticConfig:
25 |     strategy: str = "fsdp"
26 |     global_batch_size: int = 256
27 |     micro_batch_size_per_device_for_update: int = 4
28 |     micro_batch_size_per_device_for_experience: int = 16
29 |     max_grad_norm: float = 1.0
30 |     cliprange_value: float = 0.5
31 |     ppo_epochs: int = 1
32 |     padding_free: bool = False
33 |     ulysses_sequence_parallel_size: int = 1
34 |     model: ModelConfig = field(default_factory=ModelConfig)
35 |     optim: OptimConfig = field(default_factory=OptimConfig)
36 |     fsdp: FSDPConfig = field(default_factory=FSDPConfig)
37 |     offload: OffloadConfig = field(default_factory=OffloadConfig)
38 |     """auto keys"""
39 |     global_batch_size_per_device: int = field(default=-1, init=False)
40 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/reward/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 PRIME team and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .config import RewardConfig
16 | from .function import BatchFunctionRewardManager, FunctionRewardManager, SequentialFunctionRewardManager
17 | 
18 | 
19 | __all__ = ["BatchFunctionRewardManager", "FunctionRewardManager", "RewardConfig", "SequentialFunctionRewardManager"]
20 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/reward/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/reward/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/reward/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/reward/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/reward/__pycache__/function.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/reward/__pycache__/function.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/reward/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Reward config
16 | """
17 | 
18 | import os
19 | from dataclasses import dataclass, field
20 | from typing import Optional
21 | 
22 | 
23 | @dataclass
24 | class RewardConfig:
25 |     reward_type: str = "batch"
26 |     reward_function: Optional[str] = None
27 |     reward_function_kwargs: dict = field(default_factory=dict)
28 |     skip_special_tokens: bool = True
29 |     num_cpus: int = 1
30 |     """auto keys"""
31 |     reward_function_name: Optional[str] = field(default=None, init=False)
32 | 
33 |     def post_init(self):
34 |         if self.reward_function is not None:  # support custom reward function, e.g., ./math.py:main
35 |             if ":" not in self.reward_function:
36 |                 self.reward_function_name = "main"
37 |             else:
38 |                 self.reward_function, self.reward_function_name = self.reward_function.rsplit(":", maxsplit=1)
39 | 
40 |             if os.path.exists(self.reward_function):  # ray job uses absolute path
41 |                 self.reward_function = os.path.abspath(self.reward_function)
42 |             else:
43 |                 self.reward_function = None
44 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/reward/function.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import importlib.util
 16 | import os
 17 | import sys
 18 | from abc import ABC, abstractmethod
 19 | from collections import defaultdict
 20 | from functools import partial
 21 | from typing import Callable, Dict, List, Optional, Tuple, TypedDict
 22 | 
 23 | import torch
 24 | from transformers import PreTrainedTokenizer
 25 | 
 26 | from ...protocol import DataProto
 27 | from .config import RewardConfig
 28 | from PIL import Image
 29 | # scores.append(
 30 | #     {
 31 | #         "overall": weight[0] * format_score + weight[1] * format2_score + weight[2] * iou_score  + weight[3] * yn_score ,
 32 | #         "format": format_score,
 33 | #         "format_strict": format2_score,
 34 | #         "iou":iou_score,
 35 | #         "yn":yn_score,
 36 | #     }
 37 | class RewardScore(TypedDict):
 38 |     overall: float
 39 |     format: Optional[float]
 40 |     format_strict: Optional[float]
 41 |     iou: Optional[float]
 42 |     yn: Optional[float]
 43 | 
 44 | SequentialRewardFunction = Callable[[str, str], RewardScore]
 45 | 
 46 | BatchRewardFunction = Callable[[List[str], List[str], List[str], List[str], List[Image.Image],List[Image.Image],List[str]], List[RewardScore]]
 47 | # BatchRewardFunction = Callable[[List[str], List[str]], List[RewardScore]]
 48 | 
 49 | class FunctionRewardManager(ABC):
 50 |     """Reward manager for rule-based reward."""
 51 | 
 52 |     def __init__(self, config: RewardConfig, tokenizer: PreTrainedTokenizer):
 53 |         if config.reward_function is None:
 54 |             raise ValueError("Reward function is not provided.")
 55 | 
 56 |         if not os.path.exists(config.reward_function):
 57 |             raise FileNotFoundError(f"Reward function file {config.reward_function} not found.")
 58 | 
 59 |         spec = importlib.util.spec_from_file_location("custom_reward_fn", config.reward_function)
 60 |         module = importlib.util.module_from_spec(spec)
 61 |         try:
 62 |             sys.modules["custom_reward_fn"] = module
 63 |             spec.loader.exec_module(module)
 64 |         except Exception as e:
 65 |             raise RuntimeError(f"Failed to load reward function: {e}")
 66 | 
 67 |         if not hasattr(module, config.reward_function_name):
 68 |             raise AttributeError(f"Module {module} does not have function {config.reward_function_name}.")
 69 | 
 70 |         reward_fn = getattr(module, config.reward_function_name)
 71 |         print(f"Using reward function `{config.reward_function_name}` from `{config.reward_function}`.")
 72 |         self.reward_fn = partial(reward_fn, **config.reward_function_kwargs)
 73 |         self.config = config
 74 |         self.tokenizer = tokenizer
 75 | 
 76 |     @abstractmethod
 77 |     def compute_reward(self, data: DataProto) -> Tuple[torch.Tensor, Dict[str, List[float]]]:
 78 |         """Compute reward for a batch of data."""
 79 |         ...
 80 | 
 81 | 
 82 | class SequentialFunctionRewardManager(FunctionRewardManager):
 83 |     reward_fn: SequentialRewardFunction
 84 | 
 85 |     def compute_reward(self, data: DataProto) -> Tuple[torch.Tensor, Dict[str, List[float]]]:
 86 |         reward_tensor = torch.zeros_like(data.batch["responses"], dtype=torch.float32)
 87 |         reward_metrics = defaultdict(list)
 88 |         response_ids = data.batch["responses"]
 89 |         response_length = data.batch["response_mask"].sum(dim=-1)
 90 |         for i in range(len(data)):
 91 |             valid_response_ids = response_ids[i][: response_length[i]]
 92 |             response_str = self.tokenizer.decode(
 93 |                 valid_response_ids, skip_special_tokens=self.config.skip_special_tokens
 94 |             )
 95 |             ground_truth = data.non_tensor_batch["ground_truth"][i]
 96 | 
 97 |             score = self.reward_fn(response_str, ground_truth)
 98 |             reward_tensor[i, response_length[i] - 1] = score["overall"]
 99 |             for key, value in score.items():
100 |                 reward_metrics[key].append(value)
101 | 
102 |         return reward_tensor, reward_metrics
103 | 
104 | 
105 | class BatchFunctionRewardManager(FunctionRewardManager):
106 |     reward_fn: BatchRewardFunction
107 |     # BatchRewardFunction = Callable[[List[str], List[str], List[str], List[str], List[Image.Image],List[str]], List[RewardScore]]
108 |     def compute_reward(self, data: DataProto) -> Tuple[torch.Tensor, Dict[str, List[float]]]:
109 |         # breakpoint()
110 | # {'problem': array(['<image><imag...pe=object), 'bbox1': array(['262.0437,212...pe=object), 'pic1': array([[[[29, 22, 19...pe=object), 'pic2': array([[[[11, 10, 38...pe=object), 'multi_modal_inputs': array([{'pixel_value...pe=object), 'ground_truth': array(['376.9807,246...pe=object)}
111 |         response_str, ground_truth, problem, bbox1,pic1, pic2,  = [], [] ,[],[],[],[]
112 |         response_ids = data.batch["responses"]
113 |         response_length = data.batch["response_mask"].sum(dim=-1)
114 |         
115 | 
116 | 
117 | 
118 |         for i in range(len(data)):
119 |             valid_response_ids = response_ids[i][: response_length[i]]
120 |             response_str.append(
121 |                 self.tokenizer.decode(valid_response_ids, skip_special_tokens=self.config.skip_special_tokens)
122 |             )
123 |             ground_truth.append(data.non_tensor_batch["ground_truth"][i])
124 |             problem.append(data.non_tensor_batch["problem"][i])
125 |             bbox1.append(data.non_tensor_batch['bbox1'][i])
126 |             pic1.append(data.non_tensor_batch['pic1'][i])
127 |             pic2.append(data.non_tensor_batch['pic2'][i])
128 |             # level_list.append(data.non_tensor_batch["level"][i])
129 |         # TypeError: compute_score() missing 4 required positional arguments: 'boxes', 'lan_desps', 'pics', and 'init_lans'
130 |         scores = self.reward_fn(response_str, ground_truth,bbox1,problem,pic1,pic2)
131 |         reward_tensor = torch.zeros_like(data.batch["responses"], dtype=torch.float32)
132 | 
133 |         reward_metrics = defaultdict(list)
134 |         for i, score in enumerate(scores):
135 |             reward_tensor[i, response_length[i] - 1] = score["overall"]
136 |             for key, value in score.items():
137 |                 reward_metrics[key].append(value)
138 | 
139 |         return reward_tensor, reward_metrics
140 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from .config import RolloutConfig
17 | from .vllm_rollout_spmd import vLLMRollout
18 | 
19 | 
20 | __all__ = ["RolloutConfig", "vLLMRollout"]
21 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/__pycache__/vllm_rollout_spmd.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/__pycache__/vllm_rollout_spmd.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | 
17 | from ...protocol import DataProto
18 | 
19 | 
20 | __all__ = ["BaseRollout"]
21 | 
22 | 
23 | class BaseRollout(ABC):
24 |     @abstractmethod
25 |     def generate_sequences(self, prompts: DataProto) -> DataProto:
26 |         """Generate sequences"""
27 |         pass
28 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/rollout/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Rollout config
16 | """
17 | 
18 | from dataclasses import asdict, dataclass, field
19 | from typing import Any, Dict, Optional
20 | 
21 | 
22 | @dataclass
23 | class RolloutConfig:
24 |     name: str = "vllm"
25 |     n: int = 1
26 |     temperature: float = 1.0
27 |     top_p: float = 1.0
28 |     top_k: int = -1
29 |     seed: int = 1
30 |     limit_images: int = 0
31 |     dtype: str = "bf16"
32 |     gpu_memory_utilization: float = 0.6
33 |     ignore_eos: bool = False
34 |     # enforce_eager: bool = False
35 |     enforce_eager: bool = False
36 |     enable_chunked_prefill: bool = False  # only for v0 engine
37 |     tensor_parallel_size: int = 2
38 |     max_model_len: Optional[int] = None
39 |     max_num_batched_tokens: int = 8192
40 |     disable_log_stats: bool = True
41 |     val_override_config: Dict[str, Any] = field(default_factory=dict)
42 |     """auto keys"""
43 |     prompt_length: int = field(default=-1, init=False)
44 |     response_length: int = field(default=-1, init=False)
45 |     trust_remote_code: bool = field(default=False, init=False)
46 | 
47 |     def to_dict(self):
48 |         return asdict(self)
49 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from .base import BaseShardingManager
17 | from .fsdp_ulysses import FSDPUlyssesShardingManager
18 | from .fsdp_vllm import FSDPVLLMShardingManager
19 | 
20 | 
21 | __all__ = ["BaseShardingManager", "FSDPUlyssesShardingManager", "FSDPVLLMShardingManager"]
22 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Sharding manager to implement HybridEngine
16 | """
17 | 
18 | from ...protocol import DataProto
19 | 
20 | 
21 | class BaseShardingManager:
22 |     def __enter__(self):
23 |         pass
24 | 
25 |     def __exit__(self, exc_type, exc_value, traceback):
26 |         pass
27 | 
28 |     def preprocess_data(self, data: DataProto) -> DataProto:
29 |         return data
30 | 
31 |     def postprocess_data(self, data: DataProto) -> DataProto:
32 |         return data
33 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/fsdp_ulysses.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Contains a resharding manager that binds weights from FSDP zero3 to XPerfGPT
16 | """
17 | 
18 | from torch.distributed.device_mesh import DeviceMesh
19 | 
20 | from ...protocol import DataProto, all_gather_data_proto
21 | from ...utils.ulysses import get_ulysses_sequence_parallel_group, set_ulysses_sequence_parallel_group
22 | from .base import BaseShardingManager
23 | 
24 | 
25 | class FSDPUlyssesShardingManager(BaseShardingManager):
26 |     """
27 |     Sharding manager to support data resharding when using FSDP + Ulysses
28 |     """
29 | 
30 |     def __init__(self, device_mesh: DeviceMesh):
31 |         super().__init__()
32 |         self.device_mesh = device_mesh
33 | 
34 |     def __enter__(self):
35 |         if self.device_mesh is not None:
36 |             self.prev_sp_group = get_ulysses_sequence_parallel_group()
37 |             set_ulysses_sequence_parallel_group(self.device_mesh["sp"].get_group())
38 | 
39 |     def __exit__(self, exc_type, exc_value, traceback):
40 |         if self.device_mesh is not None:
41 |             set_ulysses_sequence_parallel_group(self.prev_sp_group)
42 | 
43 |     def preprocess_data(self, data: DataProto) -> DataProto:
44 |         """
45 |         AllGather data from sp region
46 |         This is because the data is first sharded along the FSDP dimension as we utilize the DP_COMPUTE
47 |         In Ulysses, we need to make sure the same data is used across a SP group
48 |         """
49 |         if self.device_mesh is not None:
50 |             sp_size = self.device_mesh["sp"].size()
51 |             sp_group = self.device_mesh["sp"].get_group()
52 |             all_gather_data_proto(data, size=sp_size, group=sp_group)
53 | 
54 |         return data
55 | 
56 |     def postprocess_data(self, data: DataProto) -> DataProto:
57 |         """
58 |         Split the data to follow FSDP partition
59 |         """
60 |         if self.device_mesh is not None:
61 |             sp_size = self.device_mesh["sp"].size()
62 |             sp_rank = self.device_mesh["sp"].get_local_rank()
63 |             data = data.chunk(chunks=sp_size)[sp_rank]
64 | 
65 |         return data
66 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Reinforcement Learning/verl/workers/sharding_manager/fsdp_vllm.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Bytedance Ltd. and/or its affiliates
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import inspect
 16 | from typing import Dict, Iterable, Tuple, Union
 17 | 
 18 | import torch
 19 | import torch.distributed as dist
 20 | from torch.distributed._tensor import DTensor
 21 | from torch.distributed.checkpoint.state_dict import get_model_state_dict
 22 | from torch.distributed.device_mesh import DeviceMesh
 23 | from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel as FSDP
 24 | from vllm import LLM
 25 | from vllm.distributed import parallel_state as vllm_ps
 26 | 
 27 | from ...protocol import DataProto, all_gather_data_proto
 28 | from ...utils.model_utils import print_gpu_memory_usage
 29 | from .base import BaseShardingManager
 30 | 
 31 | 
 32 | class FSDPVLLMShardingManager(BaseShardingManager):
 33 |     def __init__(
 34 |         self,
 35 |         module: FSDP,
 36 |         inference_engine: LLM,
 37 |         device_mesh: DeviceMesh,
 38 |     ):
 39 |         self.module = module
 40 |         self.inference_engine = inference_engine
 41 |         self.device_mesh = device_mesh
 42 | 
 43 |         self.world_size = dist.get_world_size()
 44 |         self.tp_size = vllm_ps.get_tensor_model_parallel_world_size()
 45 |         self.tp_rank = vllm_ps.get_tensor_model_parallel_rank()
 46 |         self.tp_group = vllm_ps.get_tensor_model_parallel_group().device_group
 47 | 
 48 |         # Record freed bytes to estimate memory usage correctly
 49 |         # https://github.com/vllm-project/vllm/pull/11743#issuecomment-2754338119
 50 |         self.freed_bytes = 0
 51 | 
 52 |         # Note that torch_random_states may be different on each dp rank
 53 |         self.torch_random_states = torch.cuda.get_rng_state()
 54 |         # get a random rng states
 55 |         gen_dp_rank = self.device_mesh["dp"].get_local_rank()
 56 |         torch.cuda.manual_seed(gen_dp_rank + 1000)  # make sure all tp ranks have the same random states
 57 |         self.gen_random_states = torch.cuda.get_rng_state()
 58 |         torch.cuda.set_rng_state(self.torch_random_states)
 59 | 
 60 |     def _make_weight_iterator(
 61 |         self, actor_weights: Dict[str, Union[torch.Tensor, DTensor]]
 62 |     ) -> Iterable[Tuple[str, torch.Tensor]]:
 63 |         for name, tensor in actor_weights.items():
 64 |             yield name, tensor.full_tensor() if self.world_size != 1 else tensor
 65 | 
 66 |     def __enter__(self):
 67 |         # NOTE: Basically, we only need `torch.cuda.empty_cache()` before vllm wake_up and
 68 |         # after vllm sleep, since vllm has its own caching memory allocator CuMemAllocator.
 69 |         # Out of vllm scope, we should avoid empty cache to let pytorch using caching memory
 70 |         # to speed up memory allocations.
 71 |         #
 72 |         # pytorch: https://pytorch.org/docs/stable/notes/cuda.html#memory-management
 73 |         # vllm: https://github.com/vllm-project/vllm/blob/v0.7.3/vllm/device_allocator/cumem.py#L103
 74 |         torch.cuda.empty_cache()
 75 |         print_gpu_memory_usage("Before state_dict() in sharding manager")
 76 |         actor_weights = get_model_state_dict(self.module)
 77 |         print_gpu_memory_usage("After state_dict() in sharding manager")
 78 | 
 79 |         if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
 80 |             self.inference_engine.wake_up(tags=["weights"])
 81 |         else:
 82 |             self.inference_engine.wake_up()
 83 | 
 84 |         model = self.inference_engine.llm_engine.model_executor.driver_worker.worker.model_runner.model
 85 |         model.load_weights(self._make_weight_iterator(actor_weights))
 86 |         print_gpu_memory_usage("After sync model weights in sharding manager")
 87 | 
 88 |         del actor_weights
 89 |         torch.cuda.empty_cache()
 90 | 
 91 |         if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
 92 |             self.inference_engine.wake_up(tags=["kv_cache"])
 93 | 
 94 |         print_gpu_memory_usage("After del state_dict and empty_cache in sharding manager")
 95 |         # important: need to manually set the random states of each tp to be identical.
 96 |         if self.device_mesh is not None:
 97 |             self.torch_random_states = torch.cuda.get_rng_state()
 98 |             torch.cuda.set_rng_state(self.gen_random_states)
 99 | 
100 |     def __exit__(self, exc_type, exc_value, traceback):
101 |         print_gpu_memory_usage("Before vllm offload in sharding manager")
102 |         free_bytes_before_sleep = torch.cuda.mem_get_info()[0]
103 |         self.inference_engine.sleep(level=1)
104 |         free_bytes_after_sleep = torch.cuda.mem_get_info()[0]
105 |         self.freed_bytes = free_bytes_after_sleep - free_bytes_before_sleep
106 |         print_gpu_memory_usage("After vllm offload in sharding manager")
107 | 
108 |         self.module.train()
109 |         torch.cuda.empty_cache()  # add empty cache after each compute
110 | 
111 |         # restore random states
112 |         if self.device_mesh is not None:
113 |             self.gen_random_states = torch.cuda.get_rng_state()
114 |             torch.cuda.set_rng_state(self.torch_random_states)
115 | 
116 |     def preprocess_data(self, data: DataProto) -> DataProto:
117 |         """All gather across tp group to make each rank has identical input."""
118 |         all_gather_data_proto(data, size=self.tp_size, group=self.tp_group)
119 |         return data
120 | 
121 |     def postprocess_data(self, data: DataProto) -> DataProto:
122 |         """Get chunk data of this tp rank since we do all gather in preprocess."""
123 |         if self.tp_size > 1:
124 |             data = data.chunk(chunks=self.tp_size)[self.tp_rank]
125 | 
126 |         return data
127 | 


--------------------------------------------------------------------------------
/ReasoningTrack/Supervise fine-tuning/training_args.yaml:
--------------------------------------------------------------------------------
 1 | bf16: true
 2 | cutoff_len: 4096
 3 | dataset: sft_TNLLT,sft_OTB,sft_got10k,sft_TNL2K,sft_lasot
 4 | dataset_dir: data
 5 | ddp_timeout: 180000000
 6 | do_train: true
 7 | finetuning_type: full
 8 | flash_attn: auto
 9 | gradient_accumulation_steps: 8
10 | include_num_input_tokens_seen: true
11 | learning_rate: 5.0e-05
12 | logging_steps: 5
13 | lr_scheduler_type: cosine
14 | max_grad_norm: 1.0
15 | max_samples: 100000
16 | model_name_or_path: Qwen/Qwen2.5-VL-3B-Instruct
17 | num_train_epochs: 3.0
18 | optim: adamw_torch
19 | output_dir: saves/Qwen2.5-VL-3B-Instruct/full/train_2025-05-22-08-42-07-fullfull
20 | packing: false
21 | per_device_train_batch_size: 1
22 | plot_loss: true
23 | preprocessing_num_workers: 16
24 | report_to: none
25 | save_steps: 150
26 | stage: sft
27 | template: qwen2_vl
28 | trust_remote_code: true
29 | warmup_steps: 0
30 | 


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/run_tracker_performance_evaluation.m:
--------------------------------------------------------------------------------
  1 | % tracker performance evaluation tool for our benchmark
  2 | % 07/19/2018
  3 | 
  4 | clc; clear; close all;
  5 | 
  6 | addpath('./utils/');
  7 | addpath('./sequence_evaluation_config/');
  8 | 
  9 | tmp_mat_path  = './tmp_mat/';          % path to save temporary results
 10 | path_anno     = './annos/';            % path to annotations
 11 | path_att      = './annos/att/';        % path to attribute
 12 | rp_all        = './tracking_results/'; % path to tracking results
 13 | save_fig_path = './res_fig/';          % path to result figures
 14 | save_fig_suf  = 'png';                 % suffix of figures, 'png' or 'eps'
 15 | 
 16 | 
 17 | att_name      = {'Camera Motion', 'Rotation', 'Deformation', ...
 18 |                  'Full Occlusion', 'Illumination Variation', 'Out-of-View', 'Partial Occlusion', ...
 19 |                  'Viewpoint change', 'Scale Variation', 'Background Clutter', 'Motion Blur', ...
 20 |                  'Aspect Ration Change', 'Low Resolution', 'FM','AS'};
 21 | att_fig_name  = {'CM', 'ROT', 'DEF', 'FOC', 'IV', 'OV', 'POC', ...
 22 |                  'VC', 'SV', 'BC', 'MB', 'ARC', 'LR', 'FM','AS'};
 23 | 
 24 | % 'test_set' --- evaluation with training subset
 25 | evaluation_dataset_type = 'test_set';
 26 | 
 27 | % 1 for PR; 2 for NOR; 3 for SR
 28 | flag = 3;
 29 | 
 30 | if flag == 1
 31 |     norm_dst = false;         
 32 | elseif flag == 2
 33 |     norm_dst = true;
 34 | else
 35 |     norm_dst = true;
 36 | end
 37 | 
 38 | trackers   = config_tracker();
 39 | sequences  = config_sequence(evaluation_dataset_type);
 40 | plot_style = config_plot_style();
 41 | 
 42 | num_seq = numel(sequences);
 43 | 
 44 | num_tracker = numel(trackers);
 45 | 
 46 | % load tracker info
 47 | name_tracker_all = cell(num_tracker, 1);
 48 | for i = 1:num_tracker
 49 |     name_tracker_all{i} = trackers{i}.name;
 50 | end
 51 | 
 52 | % load sequence info
 53 | name_seq_all = cell(num_seq, 1);
 54 | for i = 1:num_seq
 55 |     name_seq_all{i} = sequences{i};
 56 |     seq_att         = dlmread(fullfile(path_att, [sequences{i} '.txt']));
 57 |     if i == 1
 58 |         att_all = zeros(num_seq, numel(seq_att));
 59 |     end
 60 |     att_all(i, :) = seq_att;
 61 | end
 62 | 
 63 | % parameters for evaluation
 64 | metric_type_set = {'error', 'overlap'};
 65 | eval_type       = 'OPE';
 66 | if flag == 1 || flag == 2
 67 |     ranking_type = 'threshold';
 68 | else
 69 |     ranking_type = 'AUC';
 70 | end
 71 | % ranking_type    = 'AUC';
 72 | % ranking_type    = 'threshold';   % change it to 'AUC' for success plots
 73 | rank_num        = 50;
 74 | 
 75 | threshold_set_error   = 0:50;
 76 | if norm_dst
 77 |     threshold_set_error = threshold_set_error / 100;
 78 | end
 79 | threshold_set_overlap = 0:0.05:1;
 80 | 
 81 | for i = 1:numel(metric_type_set)
 82 |     % error (for distance plots) or overlap (for success plots)
 83 |     metric_type = metric_type_set{i};
 84 |     switch metric_type
 85 |         case 'error'
 86 |             threshold_set = threshold_set_error;
 87 |             rank_idx      = 21;
 88 |             x_label_name  = 'Location error threshold';
 89 |             y_label_name  = 'Precision';
 90 |         case 'overlap'
 91 |             threshold_set = threshold_set_overlap;
 92 |             rank_idx      = 11;
 93 |             x_label_name  = 'Overlap threshold';
 94 |             y_label_name  = 'Success rate';
 95 |     end
 96 | 
 97 |     if flag == 3
 98 |         if strcmp(metric_type, 'error') && strcmp(ranking_type, 'AUC') % for ranking_type = 'AUC'
 99 |             continue;
100 |         end
101 |     else
102 |         if strcmp(metric_type, 'overlap') && strcmp(ranking_type, 'threshold')  % for ranking_type = 'threshold'
103 |             continue;
104 |         end
105 |     end
106 |     
107 | %     if strcmp(metric_type, 'error') && strcmp(ranking_type, 'AUC') % for ranking_type = 'AUC'
108 | %     if strcmp(metric_type, 'overlap') && strcmp(ranking_type, 'threshold')  % for ranking_type = 'threshold'
109 | %         continue;
110 | %     end
111 |    
112 |     t_num = numel(threshold_set);
113 |     
114 |     % we only use OPE for evaluation
115 |     plot_type = [metric_type '_' eval_type];
116 |     
117 |     switch metric_type
118 |         case 'error'
119 |             title_name = ['Precision plots of ' eval_type];
120 |             if norm_dst
121 |                 title_name = ['Normalized ' title_name];
122 |             end
123 |             
124 |             if strcmp(evaluation_dataset_type, 'all')
125 |                 title_name = [title_name ' on TNLLT'];
126 |             else
127 |                 title_name = [title_name ' on TNLLT Testing Set'];
128 |             end
129 |         case 'overlap'
130 |             title_name = ['Success plots of ' eval_type];
131 |             
132 |             if strcmp(evaluation_dataset_type, 'all')
133 |                 title_name = [title_name ' on TNLLT'];
134 |             else
135 |                 title_name = [title_name ' on TNLLT Testing Set'];
136 |             end
137 |     end
138 |     
139 |     dataName = [tmp_mat_path 'aveSuccessRatePlot_' num2str(num_tracker) ...
140 |                 'alg_'  plot_type '.mat'];
141 |     
142 |     % evaluate tracker performance
143 |     if ~exist(dataName, 'file') || true
144 |         eval_tracker(sequences, trackers, eval_type, name_tracker_all, ...
145 |                     tmp_mat_path, path_anno, rp_all, norm_dst);
146 |     end
147 |     
148 |     % plot performance
149 |     load(dataName);
150 |     num_tracker = size(ave_success_rate_plot, 1);
151 |     
152 |     if rank_num > num_tracker || rank_num <0
153 |         rank_num = num_tracker;
154 |     end
155 |     
156 |     fig_name= [plot_type '_' ranking_type];
157 |     idx_seq_set = 1:numel(sequences);
158 |     
159 |     % draw and save the overall performance plot
160 |     plot_draw_save(num_tracker, plot_style, ave_success_rate_plot, ...
161 |                    idx_seq_set, rank_num, ranking_type, rank_idx, ...
162 |                    name_tracker_all, threshold_set, title_name, ...
163 |                    x_label_name, y_label_name, fig_name, save_fig_path, ...
164 |                    save_fig_suf);
165 |                
166 |     % draw and save the per-attribute performance plot
167 | %     att_trld = 0;
168 | %     att_num  = size(att_all, 2);
169 | %     for att_idx = 1:att_num    % for each attribute
170 | %         idx_seq_set = find(att_all(:, att_idx) > att_trld);
171 | %         if length(idx_seq_set) < 2
172 | %             continue;
173 | %         end
174 | %         disp([att_name{att_idx} ' ' num2str(length(idx_seq_set))]);
175 | %         
176 | %         fig_name   = [att_fig_name{att_idx} '_'  plot_type '_' ranking_type];
177 | %         title_name = ['Plots of ' eval_type ': ' att_name{att_idx} ' (' num2str(length(idx_seq_set)) ')'];
178 | %         
179 | %         switch metric_type
180 | %             case 'overlap'
181 | %                 title_name = ['Success plots of ' eval_type ' - ' att_name{att_idx} ' (' num2str(length(idx_seq_set)) ')'];
182 | %             case 'error'
183 | %                 title_name = ['Precision plots of ' eval_type ' - ' att_name{att_idx} ' (' num2str(length(idx_seq_set)) ')'];
184 | %                 if norm_dst
185 | %                     title_name = ['Normalized ' title_name];
186 | %                 end
187 | %         end
188 | %         
189 | %         plot_draw_save(num_tracker, plot_style, ave_success_rate_plot, ...
190 | %                    idx_seq_set, rank_num, ranking_type, rank_idx, ...
191 | %                    name_tracker_all, threshold_set, title_name, ...
192 | %                    x_label_name, y_label_name, fig_name, save_fig_path, ...
193 | %                    save_fig_suf);
194 | %     end
195 | 
196 | end


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/sequence_evaluation_config/all_dataset.txt:
--------------------------------------------------------------------------------
  1 | JE_Weapon_ChangeGUN_video_Z07
  2 | JE_CrashCar_video_10
  3 | JE_Weapon_ChangeGUN_video_Z11
  4 | JE_GuMuLiYing_video_04
  5 | JE_BatMan_video_01
  6 | JE_SoliderHead_video_C13
  7 | JE_GirlHead_video_14
  8 | JE_MaoXianRen_video_11
  9 | JE_KongqueKaiping_video_02
 10 | JE_Transformation_video_04
 11 | JE_Horse_03
 12 | JE_SoliderHead_video_C05
 13 | JE_ShipNight_video_02
 14 | JE_ClothChange_video_01
 15 | JE_SoliderHead_video_C07
 16 | JE_Horizontalbar_08
 17 | JE_SpaceShip_video_09
 18 | JE_ClothChange_video_12
 19 | JE_MaoXianRen_video_09
 20 | JE_BasketballPlayer_video_002
 21 | JE_PersonFly_video_01
 22 | JE_Transform_video_Q09
 23 | JE_RaceCar_01
 24 | JE_FaceChange_video_A06
 25 | JE_Weapon_HandGUN_video_01
 26 | JE_SportGirl_video_07
 27 | JE_GodWar_video_04
 28 | JE_SoliderHead_video_C04
 29 | JE_BianSeLong_video_02
 30 | JE_Ship_video_X02
 31 | JE_Muma_24
 32 | JE_Game_WalkingDead_video_ZZ02
 33 | JE_ChangeWeapon_02
 34 | JE_SoliderHead_video_C08
 35 | JE_Gumuliying_video_12
 36 | JE_interlaken_00_b_01
 37 | JE_Sking_04
 38 | JE_littleAnimal_video_06
 39 | JE_robot_x03
 40 | JE_BF5_Weapon_video_Z01
 41 | JE_BianLian_video_06
 42 | JE_BaseBall_video_02
 43 | JE_Sking_02
 44 | JE_GuMuLiYing_video_10
 45 | JE_Horizontalbar_09
 46 | JE_GirlHead_video_01
 47 | JE_BluceLi_video_13
 48 | JE_CarNight_video_X02
 49 | JE_Hulk_video_x1
 50 | JE_Wukong_video_p707
 51 | JE_SoliderHead_video_C03
 52 | JE_FaceChange_video_A07
 53 | JE_RaceCar_06
 54 | JE_GameGirl_video_C03
 55 | JE_Chase_video_X09
 56 | JE_Gumuliying_video_14
 57 | JE_Game_person_video_01
 58 | JE_SoliderHead_video_C01
 59 | JE_Game_GodWar_video_Z01
 60 | JE_ClothChange_video_03
 61 | JE_CrashCar_video_03
 62 | JE_Transform_video_Q07
 63 | JE_SportMotor_video_C07
 64 | JE_Game_TempRUN_video_X01
 65 | JE_FaceChange_video_A02
 66 | JE_GirlHead_video_02
 67 | JE_Ship_video_X01
 68 | JE_FaceChange_video_A08
 69 | JE_SoliderHead_video_C02
 70 | JE_SoliderHead_video_C09
 71 | JE_SportMotor_video_C06
 72 | JE_Transform_video_Q13
 73 | JE_GirlHead_video_03
 74 | JE_CrashCar_video_19
 75 | JE_Chase_video_01
 76 | JE_Glass_video_X02
 77 | JE_Gumuliying_video_15
 78 | JE_GuMuLiYing_video_02
 79 | JE_RaceCar_02
 80 | JE_MaoXianRen_video_05
 81 | JE_SoliderHead_video_C12_01
 82 | JE_Shadow_01
 83 | JE_ShipNight_video_03
 84 | JE_BatMan_video_18
 85 | JE_Muma_14
 86 | JE_Glass_video_X01
 87 | JE_ManAssian_video_C02
 88 | JE_Muma_21
 89 | JE_interlaken_00_d_01
 90 | JE_Horizontalbar_06
 91 | JE_Gumuliying_video_aa10
 92 | JE_Assian_ship_v01
 93 | JE_SpaceShip_video_10
 94 | JE_ClothChange_video_14
 95 | JE_Shadow_02
 96 | JE_GodWar_video_03
 97 | JE_Wukong_video_p708
 98 | JE_GuMuLiYing_video_A01
 99 | JE_Weapon_ChangeGUN_video_Zz10
100 | JE_CrashCar_video_13
101 | JE_Chase_video_X07
102 | JE_SoliderHead_video_C02_01
103 | JE_Cartoon_YellowPeople_video_01
104 | JE_SoliderHead_video_C14
105 | JE_SoliderHead_video_C12
106 | JE_SoliderHead_video_C06
107 | JE_Horse_09
108 | JE_SoliderHead_video_C15
109 | JE_Muma_18
110 | JE_Cartoon_YellowPeople_video_02
111 | JE_BianSeLong_video_08
112 | JE_Horse_08
113 | JE_BatMan_video_01_1
114 | JE_Weapon_ChangeGUN_video_Z09
115 | JE_Chase_video_06
116 | JE_Horizontalbar_10
117 | JE_CarNight_video_X05
118 | JE_Sking_08
119 | JE_BallThrow_03
120 | JE_FlySkate_05
121 | JE_Sking_06
122 | JE_GameGirl_video_C08
123 | JE_Transform_video_Q10
124 | JE_IceSkate_02
125 | JE_CarNight_video_X03
126 | JE_SpaceShip_video_05
127 | JE_PlayDog_video_02
128 | JE_BianSeLong_video_01
129 | JE_BluceLi_video_06
130 | JE_MaoXianRen_video_07
131 | JE_GuMuLiYing_video_09
132 | JE_BianLian_video_05
133 | JE_ManAssian_video_C03
134 | JE_GameGirl_video_C05
135 | JE_Girl_03
136 | JE_Chase_video_02
137 | JE_MaoXianRen_video_03
138 | JE_CarNight_video_X04
139 | JE_Swimming_video_04
140 | JE_BianSeLong_video_07
141 | JE_ClothChange_video_13
142 | JE_WalkingDead_video_X05
143 | JE_Assian_v02
144 | JE_SoliderHead_video_C01_01
145 | JE_GuMuLiYing_video_05
146 | JE_Chase_video_05
147 | JE_BaseBall_video_04
148 | JE_Horizontalbar_11
149 | JE_Chase_video_X06
150 | JE_Transform_video_Q11
151 | JE_GameGirl_video_C01
152 | JE_HeiBao_video_13
153 | JE_Chase_video_X01
154 | JE_Game_WalkingDead_video_ZZ01
155 | JE_BluceLi_video_15
156 | JE_Transformation_video_06
157 | JE_MaoXianRen_video_01
158 | JE_Transform_video_Q05
159 | JE_BaseBall_video_04_1
160 | JE_Gumuliying_video_13
161 | JE_SoliderHead_video_C11
162 | JE_SpaceShip_video_01
163 | JE_PlayDog_video_10
164 | JE_HeiBao_video_02
165 | JE_SpiderMan_video_p710
166 | JE_Transform_video_Q01
167 | JE_BallThrow_01
168 | JE_Transform_video_Q03
169 | JE_IronMan_transform_02
170 | JE_BianSeLong_video_05
171 | JE_FaceChange_video_A01
172 | JE_Transform_video_Q08
173 | JE_RaceCar_03
174 | JE_Horse_12
175 | JE_interlaken_01_a_01
176 | JE_interlaken_00_e_01
177 | JE_Horizontalbar_05
178 | JE_Transform_video_Q12
179 | JE_Muma_01
180 | JE_ClothChange_video_11
181 | JE_SplinterCell_head_video_01
182 | JE_BoyHead_video_01
183 | JE_WalkingDead_video_X02
184 | JE_Horizontalbar_03
185 | JE_Guangjian_01
186 | JE_GuMuLiYing_video_03
187 | JE_SportGirl_video_C01
188 | JE_Chase_video_04
189 | JE_BianLian_video_07
190 | JE_CrashCar_video_20
191 | JE_Muma_19
192 | JE_Wukong_video_p706
193 | JE_ChangeWeapon_01
194 | JE_Hulk_video_05
195 | JE_interlaken_01_a_02
196 | JE_BluceLi_video_06_1
197 | JE_SoliderHead_video_C11_01
198 | JE_BasketballPlayer_video_002_1
199 | JE_BaseBall_video_02_1
200 | JE_interlaken_00_f_02


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/sequence_evaluation_config/testing_set.txt:
--------------------------------------------------------------------------------
 1 | JE_Weapon_ChangeGUN_video_Z07
 2 | JE_CrashCar_video_10
 3 | JE_Weapon_ChangeGUN_video_Z11
 4 | JE_GuMuLiYing_video_04
 5 | JE_BatMan_video_01
 6 | JE_SoliderHead_video_C13
 7 | JE_GirlHead_video_14
 8 | JE_MaoXianRen_video_11
 9 | JE_KongqueKaiping_video_02
10 | JE_Transformation_video_04
11 | JE_Horse_03
12 | JE_SoliderHead_video_C05
13 | JE_ShipNight_video_02
14 | JE_ClothChange_video_01
15 | JE_SoliderHead_video_C07
16 | JE_Horizontalbar_08
17 | JE_SpaceShip_video_09
18 | JE_ClothChange_video_12
19 | JE_MaoXianRen_video_09
20 | JE_BasketballPlayer_video_002
21 | JE_PersonFly_video_01
22 | JE_Transform_video_Q09
23 | JE_RaceCar_01
24 | JE_FaceChange_video_A06
25 | JE_Weapon_HandGUN_video_01
26 | JE_SportGirl_video_07
27 | JE_GodWar_video_04
28 | JE_SoliderHead_video_C04
29 | JE_BianSeLong_video_02
30 | JE_Ship_video_X02
31 | JE_Muma_24
32 | JE_Game_WalkingDead_video_ZZ02
33 | JE_ChangeWeapon_02
34 | JE_SoliderHead_video_C08
35 | JE_Gumuliying_video_12
36 | JE_interlaken_00_b_01
37 | JE_Sking_04
38 | JE_littleAnimal_video_06
39 | JE_robot_x03
40 | JE_BF5_Weapon_video_Z01
41 | JE_BianLian_video_06
42 | JE_BaseBall_video_02
43 | JE_Sking_02
44 | JE_GuMuLiYing_video_10
45 | JE_Horizontalbar_09
46 | JE_GirlHead_video_01
47 | JE_BluceLi_video_13
48 | JE_CarNight_video_X02
49 | JE_Hulk_video_x1
50 | JE_Wukong_video_p707


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/tmp_mat/readme.txt:
--------------------------------------------------------------------------------
1 | Save evaluation mat files


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/utils/calc_rect_int.m:
--------------------------------------------------------------------------------
 1 | function overlap = calc_rect_int(A, B)
 2 | % Calculate overlap of two rectangles
 3 | leftA   = A(:,1);
 4 | bottomA = A(:,2);
 5 | rightA  = leftA + A(:,3) - 1;
 6 | topA    = bottomA + A(:,4) - 1;
 7 | 
 8 | leftB   = B(:,1);
 9 | bottomB = B(:,2);
10 | rightB  = leftB + B(:,3) - 1;
11 | topB    = bottomB + B(:,4) - 1;
12 | 
13 | tmp     = (max(0, min(rightA, rightB) - max(leftA, leftB)+1 )) .* (max(0, min(topA, topB) - max(bottomA, bottomB)+1 ));
14 | areaA   = A(:,3) .* A(:,4);
15 | areaB   = B(:,3) .* B(:,4);
16 | overlap = tmp./(areaA+areaB-tmp);
17 | end


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/utils/calc_seq_err_robust.m:
--------------------------------------------------------------------------------
 1 | function [errCoverage, err_center] = calc_seq_err_robust(results, rect_anno, absent_anno, norm_dst)
 2 | % calculate center distance error and overlap
 3 | % seq_length = results.len;
 4 | seq_length = size(rect_anno, 1);
 5 | 
 6 | % %%%%%%%%% for monkey-17, we only keep the first 2260 frames (1-2260), and
 7 | % we need to cut off for the tracking results
 8 | if size(results, 1) ~= size(rect_anno, 1)
 9 |     results = results(1:size(rect_anno, 1), :);
10 | end
11 | 
12 | % handle the invalided tracking results (NAN, negative and even complex ones)
13 | for i = 2:seq_length
14 |     r = results(i,:);
15 |     r_anno = rect_anno(i,:);
16 |      
17 |     if (sum(isnan(r)) | ~isreal(r) | r(3)<=0 | r(4)<=0) & (~isnan(r_anno))
18 |         results(i,:) = results(i-1,:);
19 |     end
20 | end
21 | 
22 | rect_mat = results;
23 | rect_mat(1,:) = rect_anno(1,:);  % ignore the result in the first frame
24 | 
25 | % before evaluation, remove the frames where the target is absent
26 | absent_idx = absent_anno == 0;
27 | rect_mat(absent_idx, :)  = [];
28 | rect_anno(absent_idx, :) = [];
29 | 
30 | % center position
31 | center_GT = [rect_anno(:,1)+(rect_anno(:,3)-1)/2 ...
32 |              rect_anno(:,2)+(rect_anno(:,4)-1)/2];
33 | 
34 | center = [rect_mat(:,1)+(rect_mat(:,3)-1)/2 ...
35 |           rect_mat(:,2)+(rect_mat(:,4)-1)/2];
36 | 
37 | % the new seq_length, since we remove the absent frames
38 | new_seq_length = size(rect_anno, 1);
39 |       
40 | % % computer center distance
41 | if norm_dst
42 |     center(:, 1) = center(:, 1)./rect_anno(:, 3);
43 |     center(:, 2) = center(:, 2)./rect_anno(:, 4);
44 |     center_GT(:, 1) = center_GT(:, 1)./rect_anno(:, 3);
45 |     center_GT(:, 2) = center_GT(:, 2)./rect_anno(:, 4);
46 | end
47 | err_center = sqrt(sum(((center(1:new_seq_length,:)-center_GT(1:new_seq_length,:)).^2),2));
48 | 
49 | index = rect_anno > 0;
50 | idx   = (sum(index, 2)==4);
51 | 
52 | % calculate overlap
53 | tmp = calc_rect_int(rect_mat(idx,:), rect_anno(idx,:));
54 | 
55 | errCoverage      = -ones(length(idx),1);
56 | errCoverage(idx) = tmp;
57 | err_center(~idx) = -1;
58 | end
59 | 


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/utils/config_plot_style.m:
--------------------------------------------------------------------------------
 1 | function plot_styles = config_plot_style()
 2 | % config plot styles for different trackers
 3 | plot_styles ={struct('color', [1,0,0],          'lineStyle', '-'),  ...
 4 |               struct('color', [0,1,0],          'lineStyle', '-'),  ...
 5 |               struct('color', [0,0,1],          'lineStyle', '-'),  ...
 6 |               struct('color', [0,0,0],          'lineStyle', '-'),  ...%    struct('color',[1,1,0],'lineStyle','-'),...%yellow
 7 |               struct('color', [1,0,1],          'lineStyle', '-'),  ...%pink
 8 |               struct('color', [0,1,1],          'lineStyle', '-'),  ...
 9 |               struct('color', [0.5,0.5,0.5],    'lineStyle', '-'),  ...%gray-25%
10 |               struct('color', [136,0,21]/255,   'lineStyle', '-'),  ...%dark red
11 |               struct('color', [255,127,39]/255, 'lineStyle', '-'),  ...%orange
12 |               struct('color', [0,162,232]/255,  'lineStyle', '-'),  ...%Turquoise
13 |               struct('color', [163,73,164]/255, 'lineStyle', '-'),  ...%purple    %%%%%%%%%%%%%%%%%%%%
14 |               struct('color', [191,144,0]/255,  'lineStyle', '-'),  ...
15 |               struct('color', [1,0,0],          'lineStyle', '--'), ...
16 |               struct('color', [0,1,0],          'lineStyle', '--'), ...
17 |               struct('color', [0,0,1],          'lineStyle', '--'), ...
18 |               struct('color', [0,0,0],          'lineStyle', '--'), ...%    struct('color',[1,1,0],'lineStyle','--'),...%yellow
19 |               struct('color', [1,0,1],          'lineStyle', '--'), ...%pink
20 |               struct('color', [0,1,1],          'lineStyle', '--'), ...
21 |               struct('color', [0.5,0.5,0.5],    'lineStyle', '--'), ...%gray-25%
22 |               struct('color', [136,0,21]/255,   'lineStyle', '--'), ...%dark red
23 |               struct('color', [255,127,39]/255, 'lineStyle', '--'), ...%orange
24 |               struct('color', [0,162,232]/255,  'lineStyle', '--'), ...%Turquoise
25 |               struct('color', [163,73,164]/255, 'lineStyle', '--'), ...%purple    %%%%%%%%%%%%%%%%%%%
26 |               struct('color', [191,144,0]/255,  'lineStyle', '--'), ...
27 |               struct('color', [1,0,0],          'lineStyle', '-.'), ...
28 |               struct('color', [0,1,0],          'lineStyle', '-.'), ...
29 |               struct('color', [0,0,1],          'lineStyle', '-.'), ...
30 |               struct('color', [0,0,0],          'lineStyle', '-.'), ...%    struct('color',[1,1,0],'lineStyle',':'),...%yellow
31 |               struct('color', [1,0,1],          'lineStyle', '-.'), ...%pink
32 |               struct('color', [0,1,1],          'lineStyle', '-.'), ...
33 |               struct('color', [0.5,0.5,0.5],    'lineStyle', '-.'), ...%gray-25%
34 |               struct('color', [136,0,21]/255,   'lineStyle', '-.'), ...%dark red
35 |               struct('color', [255,127,39]/255, 'lineStyle', '-.'), ...%orange
36 |               struct('color', [0,162,232]/255,  'lineStyle', '-.'), ...%Turquoise
37 |               struct('color', [163,73,164]/255, 'lineStyle', '-.'), ...%purple
38 |               struct('color', [191,144,0]/255,  'lineStyle', '-.'), ...
39 |              };
40 | end


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/utils/config_sequence.m:
--------------------------------------------------------------------------------
 1 | function sequences = config_sequence(type)
 2 | % config sequences for evaluation
 3 | % the configuration files are placed in ./sequence_evaluation_config/;
 4 | switch type
 5 |     case 'test_set'
 6 |         dataset_name = 'testing_set.txt';
 7 |     case 'all'
 8 |         dataset_name = 'all_dataset.txt';
 9 |     otherwise
10 |         error('Error in evaluation dataset type! Either ''testing_set'' or ''all''.')
11 | end
12 | 
13 | % check if the file exists
14 | if ~exist(dataset_name, 'file')
15 |     error('%s is not found!', dataset_name);
16 | end
17 | 
18 | % load evaluation sequences
19 | fid = fopen(dataset_name, 'r');
20 | i = 0;
21 | sequences = cell(100000, 1);
22 | while ~feof(fid)
23 |     i = i + 1;
24 |     sequences{i, 1} = fgetl(fid);
25 | end
26 | sequences(i+1:end) = [];
27 | fclose(fid);
28 | end


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/utils/config_tracker.m:
--------------------------------------------------------------------------------
 1 | function trackers = config_tracker()
 2 | % config trackers to be evaluated
 3 | 
 4 | trackers = {
 5 |             struct('name', 'ReasoningTrack',       'publish', 'ahaha2') ...
 6 |             struct('name', 'dutrack',       'publish', 'ahaha2') ...
 7 |             struct('name', 'uvltrackNLBB',       'publish', 'ahaha2') ...
 8 |             struct('name', 'uvltrackNL',       'publish', 'ahaha2') ...
 9 |             struct('name', 'uvltrackBB',       'publish', 'ahaha2') ...
10 |             struct('name', 'sutrack',       'publish', 'ahaha2') ...
11 |             struct('name', 'romtrack',       'publish', 'ahaha2') ...
12 |             struct('name', 'ostrack',       'publish', 'ahaha2') ...
13 |             struct('name', 'odtrack',       'publish', 'ahaha2') ...
14 |             struct('name', 'mmtrack',       'publish', 'ahaha2') ...
15 |             struct('name', 'mixformer',       'publish', 'ahaha2') ...
16 |             struct('name', 'lmtrack',       'publish', 'ahaha2') ...
17 |             struct('name', 'jointNLTrack',       'publish', 'ahaha2') ...
18 |             struct('name', 'jointNL',       'publish', 'ahaha2') ...
19 |             struct('name', 'evptrack',       'publish', 'ahaha2') ...
20 |             struct('name', 'ctvlt',       'publish', 'ahaha2') ...
21 |             struct('name', 'citetrack',       'publish', 'ahaha2') ...
22 |             struct('name', 'aqatrack',       'publish', 'ahaha2') ...
23 |             struct('name', 'allinone',       'publish', 'ahaha2') ...
24 |             struct('name', 'aiatrack',       'publish', 'ahaha2') ...
25 |             struct('name', 'grm',       'publish', 'ahaha2') ...
26 | 
27 |             };
28 | 
29 | end


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/utils/eval_tracker.m:
--------------------------------------------------------------------------------
 1 | function eval_tracker(seqs, trackers, eval_type, name_tracker_all, tmp_mat_path, path_anno, rp_all, norm_dst)
 2 | % evaluate each tracker
 3 | num_tracker = numel(trackers);
 4 | 
 5 | threshold_set_overlap = 0:0.05:1;
 6 | threshold_set_error   = 0:50;
 7 | if norm_dst
 8 |     threshold_set_error = threshold_set_error / 100;
 9 | end
10 | 
11 | for i = 1:numel(seqs) % for each sequence
12 |     s    = seqs{i};      % name of sequence
13 |     
14 |     % load GT and the absent flags
15 |     anno        = dlmread([path_anno s '.txt']);
16 |     absent_anno = dlmread([path_anno 'absent/' s '.txt']);
17 |     
18 |     for k = 1:num_tracker  % evaluate each tracker
19 |         t = trackers{k};   % name of tracker
20 |         
21 |         % load tracking result
22 |         res = dlmread([rp_all t.name '_tracking_result/' s '.txt']);
23 |         fprintf(['evaluating ' t.name ' on ' s ' ...\n']);
24 |         
25 |         success_num_overlap = zeros(1, numel(threshold_set_overlap));
26 |         success_num_err     = zeros(1, numel(threshold_set_error));
27 |         
28 |         if isempty(res)
29 |             break;
30 |         end
31 |         
32 |         [err_coverage, err_center] = calc_seq_err_robust(res, anno, absent_anno, norm_dst);
33 |         
34 |         for t_idx = 1:numel(threshold_set_overlap)
35 |             success_num_overlap(1, t_idx) = sum(err_coverage > threshold_set_overlap(t_idx));
36 |         end
37 |         
38 |         for t_idx = 1:length(threshold_set_error)
39 |             success_num_err(1, t_idx) = sum(err_center <= threshold_set_error(t_idx));
40 |         end
41 |         
42 |         len_all = size(anno, 1);  % number of frames in the sequence
43 |         
44 |         ave_success_rate_plot(k, i, :)     = success_num_overlap/(len_all + eps);
45 |         ave_success_rate_plot_err(k, i, :) = success_num_err/(len_all + eps);
46 |     end
47 | end
48 | 
49 | % save results
50 | if ~exist(tmp_mat_path, 'dir')
51 |     mkdir(tmp_mat_path);
52 | end
53 | 
54 | dataName1 = [tmp_mat_path 'aveSuccessRatePlot_' num2str(num_tracker) 'alg_overlap_' eval_type '.mat'];
55 | save(dataName1, 'ave_success_rate_plot', 'name_tracker_all');
56 | 
57 | dataName2 = [tmp_mat_path 'aveSuccessRatePlot_' num2str(num_tracker) 'alg_error_' eval_type '.mat'];
58 | ave_success_rate_plot = ave_success_rate_plot_err;
59 | save(dataName2, 'ave_success_rate_plot', 'name_tracker_all');
60 | 
61 | end
62 | 


--------------------------------------------------------------------------------
/TNLLT_Evaluation_Toolkit/utils/plot_draw_save.m:
--------------------------------------------------------------------------------
 1 | function plot_draw_save(num_tracker, plot_style, ave_success_rate_plot, idx_seq_set, rank_num, ...
 2 |                         ranking_type, rank_idx, name_tracker_all, threshold_set, title_name, ...
 3 |                         x_label_name, y_label_name, fig_name, save_fig_path, save_fig_suf)
 4 | % plot and save curves
 5 | perf = zeros(1, num_tracker);
 6 | for i=1:num_tracker
 7 |     %each row is the sr plot of one sequence
 8 |     tmp = ave_success_rate_plot(i, idx_seq_set, :);
 9 |     aa  = reshape(tmp, [numel(idx_seq_set), size(ave_success_rate_plot, 3)]);
10 |     aa  = aa(sum(aa,2)>eps, :);
11 |     bb  = mean(aa);
12 |     switch ranking_type
13 |         case 'AUC'
14 |             perf(i) = mean(bb);
15 |         case 'threshold'
16 |             perf(i) = bb(rank_idx);
17 |     end
18 | end
19 | 
20 | [~, index_sort] = sort(perf,'descend');
21 | 
22 | i = 1;
23 | 
24 | % plot settings
25 | font_size        = 16; 
26 | % font_size        = 14;
27 | % font_size_legend = 12;   % for overall plot
28 | font_size_legend = 14;      % for attribute-based plot
29 | axex_font_size   = 14;
30 | 
31 | tmp_figure = figure;
32 | set(gcf, 'unit', 'normalized', 'position', [0.2,0.2,0.45,0.6]);      % for overall plot
33 | % set(gcf, 'unit', 'normalized', 'position', [0.01,0.01,0.45,0.88]); % for attribute-based plot
34 | 
35 | tmp_axes = axes('Parent', tmp_figure, 'FontSize', axex_font_size);
36 | for k = index_sort(1:rank_num)
37 | 
38 |     tmp = ave_success_rate_plot(k, idx_seq_set, :);
39 |     aa  = reshape(tmp, [numel(idx_seq_set), size(ave_success_rate_plot, 3)]);
40 |     aa  = aa(sum(aa,2)>eps, :);
41 |     bb  = mean(aa);
42 |     
43 |     switch ranking_type
44 |         case 'AUC'
45 |             score = mean(bb);
46 |             tmp   = sprintf('%.3f', score);
47 |         case 'threshold'
48 |             score = bb(rank_idx);
49 |             tmp   = sprintf('%.3f', score);
50 |     end    
51 |     
52 |     tmpName{i} = ['[' tmp '] ' name_tracker_all{k}];
53 |     plot(threshold_set, bb, 'color', plot_style{i}.color, 'lineStyle', plot_style{i}.lineStyle,'lineWidth', 4,'Parent', tmp_axes);
54 |     hold on
55 |     grid on;
56 |     if k == index_sort(1)
57 |         set(gca,'GridLineStyle', ':', 'GridColor', 'k', 'GridAlpha', 1, 'LineWidth', 1.2);
58 |     end
59 |     i = i + 1;
60 | end
61 | 
62 | if strcmp(ranking_type, 'threshold')
63 |     legend_position = 'Northwest';  % 'Southeast' or 'Southeastoutside'
64 | else
65 |     legend_position = 'Northwest';  % 'Southwest' or 'Southwestoutside'
66 | end
67 | 
68 | legend(tmpName, 'Interpreter', 'none', 'fontsize', font_size_legend, 'Location', legend_position);
69 | title(title_name, 'fontsize', font_size);
70 | xlabel(x_label_name, 'fontsize', font_size);
71 | ylabel(y_label_name, 'fontsize', font_size);
72 | 
73 | hold off
74 | 
75 | % save result figures
76 | if ~exist(save_fig_path, 'dir')
77 |     mkdir(save_fig_path);
78 | end
79 | if strcmp(save_fig_suf, 'eps')
80 |     print('-depsc', [save_fig_path fig_name]);
81 | else
82 |     saveas(gcf, [save_fig_path fig_name], 'png');
83 | end
84 | 
85 | end


--------------------------------------------------------------------------------
/figures/SRPRNPR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/figures/SRPRNPR.png


--------------------------------------------------------------------------------
/figures/TNLLT_samples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/figures/TNLLT_samples.png


--------------------------------------------------------------------------------
/scripts/SFT/transforme_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | from pathlib import Path
 4 | 
 5 | def rewrite_images_prefix(json_file: str, new_prefix: str):
 6 |     """
 7 |     json_file : str  the path of the JSON file to be modified
 8 |     new_prefix: str  the new prefix that will replace /your_root_path
 9 |     """
10 |     old_prefix = "/your_root_path"
11 | 
12 |     with open(json_file, "r", encoding="utf-8") as f:
13 |         data = json.load(f)
14 |     for item in data:
15 |         if "images" in item and isinstance(item["images"], list):
16 |             new_images = []
17 |             for path in item["images"]:
18 |                 if isinstance(path, str) and path.startswith(old_prefix):
19 |                     new_images.append(str(Path(new_prefix) / Path(path).relative_to(old_prefix)))
20 |                 else:
21 |                     new_images.append(path)
22 |             item["images"] = new_images
23 | 
24 |     with open(json_file, "w", encoding="utf-8") as f:
25 |         json.dump(data, f, ensure_ascii=False, indent=2)
26 | 
27 | if __name__ == "__main__":
28 |     # how to use：python script.py data.json /new/prefix
29 |     rewrite_images_prefix(sys.argv[1], sys.argv[2])


--------------------------------------------------------------------------------
/scripts/TNL2KLTDataset/tnl_lt_test_split.txt:
--------------------------------------------------------------------------------
 1 | JE_Weapon_ChangeGUN_video_Z07
 2 | JE_CrashCar_video_10
 3 | JE_Weapon_ChangeGUN_video_Z11
 4 | JE_GuMuLiYing_video_04
 5 | JE_BatMan_video_01
 6 | JE_SoliderHead_video_C13
 7 | JE_GirlHead_video_14
 8 | JE_MaoXianRen_video_11
 9 | JE_KongqueKaiping_video_02
10 | JE_Transformation_video_04
11 | JE_Horse_03
12 | JE_SoliderHead_video_C05
13 | JE_ShipNight_video_02
14 | JE_ClothChange_video_01
15 | JE_SoliderHead_video_C07
16 | JE_Horizontalbar_08
17 | JE_SpaceShip_video_09
18 | JE_ClothChange_video_12
19 | JE_MaoXianRen_video_09
20 | JE_BasketballPlayer_video_002
21 | JE_PersonFly_video_01
22 | JE_Transform_video_Q09
23 | JE_RaceCar_01
24 | JE_FaceChange_video_A06
25 | JE_Weapon_HandGUN_video_01
26 | JE_SportGirl_video_07
27 | JE_GodWar_video_04
28 | JE_SoliderHead_video_C04
29 | JE_BianSeLong_video_02
30 | JE_Ship_video_X02
31 | JE_Muma_24
32 | JE_Game_WalkingDead_video_ZZ02
33 | JE_ChangeWeapon_02
34 | JE_SoliderHead_video_C08
35 | JE_Gumuliying_video_12
36 | JE_interlaken_00_b_01
37 | JE_Sking_04
38 | JE_littleAnimal_video_06
39 | JE_robot_x03
40 | JE_BF5_Weapon_video_Z01
41 | JE_BianLian_video_06
42 | JE_BaseBall_video_02
43 | JE_Sking_02
44 | JE_GuMuLiYing_video_10
45 | JE_Horizontalbar_09
46 | JE_GirlHead_video_01
47 | JE_BluceLi_video_13
48 | JE_CarNight_video_X02
49 | JE_Hulk_video_x1
50 | JE_Wukong_video_p707


--------------------------------------------------------------------------------
/scripts/TNL2KLTDataset/tnl_lt_train_split.txt:
--------------------------------------------------------------------------------
  1 | JE_SoliderHead_video_C03
  2 | JE_FaceChange_video_A07
  3 | JE_RaceCar_06
  4 | JE_GameGirl_video_C03
  5 | JE_Chase_video_X09
  6 | JE_Gumuliying_video_14
  7 | JE_Game_person_video_01
  8 | JE_SoliderHead_video_C01
  9 | JE_Game_GodWar_video_Z01
 10 | JE_ClothChange_video_03
 11 | JE_CrashCar_video_03
 12 | JE_Transform_video_Q07
 13 | JE_SportMotor_video_C07
 14 | JE_Game_TempRUN_video_X01
 15 | JE_FaceChange_video_A02
 16 | JE_GirlHead_video_02
 17 | JE_Ship_video_X01
 18 | JE_FaceChange_video_A08
 19 | JE_SoliderHead_video_C02
 20 | JE_SoliderHead_video_C09
 21 | JE_SportMotor_video_C06
 22 | JE_Transform_video_Q13
 23 | JE_GirlHead_video_03
 24 | JE_CrashCar_video_19
 25 | JE_Chase_video_01
 26 | JE_Glass_video_X02
 27 | JE_Gumuliying_video_15
 28 | JE_GuMuLiYing_video_02
 29 | JE_RaceCar_02
 30 | JE_MaoXianRen_video_05
 31 | JE_SoliderHead_video_C12_01
 32 | JE_Shadow_01
 33 | JE_ShipNight_video_03
 34 | JE_BatMan_video_18
 35 | JE_Muma_14
 36 | JE_Glass_video_X01
 37 | JE_ManAssian_video_C02
 38 | JE_Muma_21
 39 | JE_interlaken_00_d_01
 40 | JE_Horizontalbar_06
 41 | JE_Gumuliying_video_aa10
 42 | JE_Assian_ship_v01
 43 | JE_SpaceShip_video_10
 44 | JE_ClothChange_video_14
 45 | JE_Shadow_02
 46 | JE_GodWar_video_03
 47 | JE_Wukong_video_p708
 48 | JE_GuMuLiYing_video_A01
 49 | JE_Weapon_ChangeGUN_video_Zz10
 50 | JE_CrashCar_video_13
 51 | JE_Chase_video_X07
 52 | JE_SoliderHead_video_C02_01
 53 | JE_Cartoon_YellowPeople_video_01
 54 | JE_SoliderHead_video_C14
 55 | JE_SoliderHead_video_C12
 56 | JE_SoliderHead_video_C06
 57 | JE_Horse_09
 58 | JE_SoliderHead_video_C15
 59 | JE_Muma_18
 60 | JE_Cartoon_YellowPeople_video_02
 61 | JE_BianSeLong_video_08
 62 | JE_Horse_08
 63 | JE_BatMan_video_01_1
 64 | JE_Weapon_ChangeGUN_video_Z09
 65 | JE_Chase_video_06
 66 | JE_Horizontalbar_10
 67 | JE_CarNight_video_X05
 68 | JE_Sking_08
 69 | JE_BallThrow_03
 70 | JE_FlySkate_05
 71 | JE_Sking_06
 72 | JE_GameGirl_video_C08
 73 | JE_Transform_video_Q10
 74 | JE_IceSkate_02
 75 | JE_CarNight_video_X03
 76 | JE_SpaceShip_video_05
 77 | JE_PlayDog_video_02
 78 | JE_BianSeLong_video_01
 79 | JE_BluceLi_video_06
 80 | JE_MaoXianRen_video_07
 81 | JE_GuMuLiYing_video_09
 82 | JE_BianLian_video_05
 83 | JE_ManAssian_video_C03
 84 | JE_GameGirl_video_C05
 85 | JE_Girl_03
 86 | JE_Chase_video_02
 87 | JE_MaoXianRen_video_03
 88 | JE_CarNight_video_X04
 89 | JE_Swimming_video_04
 90 | JE_BianSeLong_video_07
 91 | JE_ClothChange_video_13
 92 | JE_WalkingDead_video_X05
 93 | JE_Assian_v02
 94 | JE_SoliderHead_video_C01_01
 95 | JE_GuMuLiYing_video_05
 96 | JE_Chase_video_05
 97 | JE_BaseBall_video_04
 98 | JE_Horizontalbar_11
 99 | JE_Chase_video_X06
100 | JE_Transform_video_Q11
101 | JE_GameGirl_video_C01
102 | JE_HeiBao_video_13
103 | JE_Chase_video_X01
104 | JE_Game_WalkingDead_video_ZZ01
105 | JE_BluceLi_video_15
106 | JE_Transformation_video_06
107 | JE_MaoXianRen_video_01
108 | JE_Transform_video_Q05
109 | JE_BaseBall_video_04_1
110 | JE_Gumuliying_video_13
111 | JE_SoliderHead_video_C11
112 | JE_SpaceShip_video_01
113 | JE_PlayDog_video_10
114 | JE_HeiBao_video_02
115 | JE_SpiderMan_video_p710
116 | JE_Transform_video_Q01
117 | JE_BallThrow_01
118 | JE_Transform_video_Q03
119 | JE_IronMan_transform_02
120 | JE_BianSeLong_video_05
121 | JE_FaceChange_video_A01
122 | JE_Transform_video_Q08
123 | JE_RaceCar_03
124 | JE_Horse_12
125 | JE_interlaken_01_a_01
126 | JE_interlaken_00_e_01
127 | JE_Horizontalbar_05
128 | JE_Transform_video_Q12
129 | JE_Muma_01
130 | JE_ClothChange_video_11
131 | JE_SplinterCell_head_video_01
132 | JE_BoyHead_video_01
133 | JE_WalkingDead_video_X02
134 | JE_Horizontalbar_03
135 | JE_Guangjian_01
136 | JE_GuMuLiYing_video_03
137 | JE_SportGirl_video_C01
138 | JE_Chase_video_04
139 | JE_BianLian_video_07
140 | JE_CrashCar_video_20
141 | JE_Muma_19
142 | JE_Wukong_video_p706
143 | JE_ChangeWeapon_01
144 | JE_Hulk_video_05
145 | JE_interlaken_01_a_02
146 | JE_BluceLi_video_06_1
147 | JE_SoliderHead_video_C11_01
148 | JE_BasketballPlayer_video_002_1
149 | JE_BaseBall_video_02_1
150 | JE_interlaken_00_f_02


--------------------------------------------------------------------------------
/scripts/TNL2KLTDataset/tnl_lt_val_split.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Event-AHU/Open_VLTrack/2b83cc192df52045435dd933b89cf89ec8f7c5f2/scripts/TNL2KLTDataset/tnl_lt_val_split.txt


--------------------------------------------------------------------------------
/scripts/TNL2KLTDataset/tnlltdataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList
 3 | from lib.test.utils.load_text import load_text
 4 | import os
 5 | 
 6 | 
 7 | class TNLLTDataset(BaseDataset):
 8 |     def __init__(self):
 9 |         super().__init__()
10 |         self.base_path = self.env_settings.tnllt_path
11 |         self.sequence_list = self._get_sequence_list()
12 |         self.clean_list = self.sequence_list
13 | 
14 | 
15 |     def get_sequence_list(self):
16 |         return SequenceList([self._construct_sequence(s) for s in self.sequence_list])
17 | 
18 |     def _construct_sequence(self, sequence_name):
19 |         # class_name = sequence_name.split('-')[0]
20 |         anno_path = '{}/{}/groundtruth.txt'.format(self.base_path, sequence_name)
21 | 
22 |         ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64)
23 | 
24 |         absent_label_label_path = '{}/{}/absent_label.txt'.format(self.base_path, sequence_name)
25 | 
26 |         # NOTE: pandas backed seems super super slow for loading occlusion/oov masks
27 |         # full_occlusion = load_text(str(occlusion_label_path), delimiter=',', dtype=np.float64, backend='numpy')
28 |         with open(str(absent_label_label_path), 'r') as file:
29 |             lines = file.read().splitlines()
30 |             absent_label = np.array([list(map(float, line.split())) for line in lines], dtype=np.float64)
31 | 
32 |         # out_of_view_label_path = '{}/{}/{}/out_of_view.txt'.format(self.base_path, class_name, sequence_name)
33 |         # out_of_view = load_text(str(out_of_view_label_path), delimiter=',', dtype=np.float64, backend='numpy')
34 | 
35 |         target_visible = absent_label
36 | 
37 |         frames_path = '{}/{}/imgs'.format(self.base_path, sequence_name)
38 | 
39 |         frames_list = ['{}/{:05d}.png'.format(frames_path, frame_number) for frame_number in range(1, ground_truth_rect.shape[0] + 1)]
40 | 
41 |         target_class = sequence_name
42 | 
43 |         language_file = os.path.join(self.base_path, sequence_name,"language.txt")
44 |         
45 |         with open(language_file, 'r') as f:
46 |             language = f.readlines()[0].rstrip()
47 | 
48 |         return Sequence(sequence_name, frames_list, 'tnllt', ground_truth_rect.reshape(-1, 4),
49 |                         object_class=target_class, target_visible=target_visible,language=language)
50 | 
51 |     def __len__(self):
52 |         return len(self.sequence_list)
53 |     
54 |     def _get_sequence_list(self):
55 |         with open('/your_root_path/tnl_lt_test_split.txt', 'r') as file:
56 |             sequence_list = [line.strip() for line in file.readlines()]
57 | 
58 |         return sequence_list
59 |     
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/scripts/TNL2K_JE_json_generation.py:
--------------------------------------------------------------------------------
 1 | ############################################################################################
 2 | ####                                Fot LaSOT dataset 
 3 | ############################################################################################
 4 | import os
 5 | import numpy as np 
 6 | import json
 7 | import pdb 
 8 | 
 9 | LaSOT_path = "/home/wangxiao/Downloads/projects/TNL2K_TPAMI/TNL2K_JE_dataset/"
10 | video_files = os.listdir(LaSOT_path)
11 | video_files = np.sort(video_files)
12 | 
13 | ## use this class to avoid some array or other format issues in json. 
14 | class NumpyEncoder(json.JSONEncoder):
15 |     """ Special json encoder for numpy types """
16 |     def default(self, obj):
17 |         if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64)):
18 |             return int(obj)
19 |         elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
20 |             return float(obj)
21 |         elif isinstance(obj, (np.ndarray,)): #### This is the fix
22 |             return obj.tolist()
23 |         return json.JSONEncoder.default(self, obj)
24 | 
25 | 
26 | dict = {}
27 | 
28 | for idx in range(len(video_files)): 
29 | # for idx in range(5): ## for test this code work or not. 
30 |     video_name = video_files[idx]
31 |     img_path = LaSOT_path + video_name + '/imgs/'
32 |     gt_path  = LaSOT_path + video_name + '/groundtruth.txt'
33 | 
34 |     print("==>> video Name: ", video_name, " current-index/total: ", idx, "/", len(video_files), ", please wait ... ")
35 |     
36 |     img_files = sorted([p for p in os.listdir(img_path) if os.path.splitext(p)[1] == '.jpg'])
37 | 
38 |     if len(img_files) == 0: 
39 |         img_files = sorted([p for p in os.listdir(img_path) if os.path.splitext(p)[1] == '.png'])
40 | 
41 |     # pdb.set_trace()
42 | 
43 |     gt_files  = np.loadtxt(gt_path, delimiter=',') 
44 |     init_rect = gt_files[0]
45 |     init_rect_first = init_rect.tolist()
46 |     
47 |     # pdb.set_trace()
48 |     img_names_list = []
49 |     gt_files_list = []
50 | 
51 |     #### for each image and ground truth 
52 |     for img_idx in range(len(img_files)): 
53 |         img_names = video_name + '/imgs/' + img_files[img_idx]
54 |         img_names_list.append(img_names)
55 |         gt_files_list.append(gt_files[img_idx])
56 |     
57 |     # pdb.set_trace()
58 |     
59 |     #### collect and save into one dict. 
60 |     dict_collect = {'video_dir': video_name, 'init_rect': init_rect_first, 'img_names': img_names_list, 'gt_rect': gt_files_list}
61 |     dict[video_name] = dict_collect
62 | 
63 | 
64 | dumped = json.dumps(dict, cls=NumpyEncoder) 
65 | with open('TNL2K_JE_test.json', 'w+') as f:
66 |     json.dump(dumped, f)
67 | 
68 | print("==>> Done !")
69 | 
70 | file = open('TNL2K_JE_test.json','r',encoding='utf-8')
71 | benchmark_info = json.load(file)
72 | print(benchmark_info)


--------------------------------------------------------------------------------
/scripts/check_TNL2K_dataset.m:
--------------------------------------------------------------------------------
 1 | %%
 2 | clc; clear all; close all; warning off; 
 3 | path = '/media/wangxiao/44907FD2907FC946/dataset/TNL2K_dataset/TNL2KJE/';
 4 | files =dir(path);
 5 | files = files(3:end);
 6 | 
 7 | for vid =1:size(files, 1)
 8 |     vid
 9 |     videoName = files(vid).name;
10 |     imgFiles = dir([path videoName '/imgs/']);
11 |     imgFiles = imgFiles(3:end);
12 |     
13 |     for imgID =1:size(imgFiles, 1)
14 |         imageName = imgFiles(imgID).name; 
15 |         
16 |         try
17 |             image = imread([path videoName '/imgs/' imageName]);
18 |         catch
19 |             disp(['==>> bad case: ', [videoName '/imgs/' imageName]]); 
20 |         end
21 |         
22 |         
23 |     end
24 |     
25 | end
26 | 
27 | 


--------------------------------------------------------------------------------
/scripts/text2audio_toolkit.py:
--------------------------------------------------------------------------------
 1 | ####################################################################################################
 2 | ####                        for sinlge video processing (example only)
 3 | ####################################################################################################
 4 | 
 5 | import pyttsx3
 6 | engine = pyttsx3.init()
 7 | 
 8 | 
 9 | """ RATE"""
10 | rate = engine.getProperty('rate')   # getting details of current speaking rate
11 | # print (rate)                        #printing current voice rate
12 | engine.setProperty('rate', 130)     # setting up new voice rate
13 | 
14 | 
15 | """VOLUME"""
16 | volume = engine.getProperty('volume')   #getting to know current volume level (min=0 and max=1)
17 | # print (volume)                          #printing current volume level
18 | engine.setProperty('volume',1.0)    # setting up volume level  between 0 and 1
19 | 
20 | """VOICE"""
21 | voices = engine.getProperty('voices')       #getting details of current voice
22 | #engine.setProperty('voice', voices[0].id)  #changing index, changes voices. o for male
23 | engine.setProperty('voice', voices[1].id)   #changing index, changes voices. 1 for female
24 | 
25 | # engine.say("I will speak this text")
26 | engine.save_to_file('I will speak this text, today is the final day of 2021, see you in the 2022.', 'C:\\Users\\wangx\\OneDrive\\文档\\test.mp3')
27 | engine.runAndWait()
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | ####################################################################################################
35 | ####        for dataset-level processing (running on Windows system, visual stuido code)
36 | ####################################################################################################
37 | 
38 | import pyttsx3
39 | import os 
40 | import pdb 
41 | 
42 | engine = pyttsx3.init()
43 | 
44 | 
45 | path = "D:\\TNL2K_JE_longterm_videos\\external\\"
46 | files = os.listdir(path) 
47 | 
48 | for video in range(len(files)):
49 |     videoName = files[video] 
50 |     print("==>> processing the video: ", videoName) 
51 | 
52 |     fid = open(path + videoName + "\\language.txt", 'r') 
53 |     text_input = fid.readline()
54 |     
55 |     #### rate of the generated audio file 
56 |     rate_value = 190        ## 80, 100, 125, 150, 170, 190 
57 | 
58 |     if rate_value == 80: 
59 |         audio_savePath = path + videoName + "\\" + videoName + "_audio_woman_rate080.mp3"
60 |     else:
61 |         audio_savePath = path + videoName + "\\" + videoName + "_audio_woman_rate" + str(rate_value) + ".mp3"
62 | 
63 |     """ RATE"""
64 |     rate = engine.getProperty('rate')   # getting details of current speaking rate
65 |     # print (rate)                        #printing current voice rate
66 |     engine.setProperty('rate', rate_value)     # setting up new voice rate
67 | 
68 | 
69 |     """VOLUME"""
70 |     volume = engine.getProperty('volume')   #getting to know current volume level (min=0 and max=1)
71 |     # print (volume)                          #printing current volume level
72 |     engine.setProperty('volume',1.0)    # setting up volume level  between 0 and 1
73 | 
74 |     """VOICE"""
75 |     voices = engine.getProperty('voices')       #getting details of current voice
76 |     #engine.setProperty('voice', voices[0].id)  #changing index, changes voices. o for male
77 |     engine.setProperty('voice', voices[1].id)   #changing index, changes voices. 1 for female
78 | 
79 |     # engine.say("I will speak this text")
80 |     engine.save_to_file(text_input, audio_savePath)
81 |     engine.runAndWait()
82 | 
83 |     fid.close()
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------