├── .gitignore
├── LICENSE
├── README.md
├── assets
    └── images
    │   ├── cua_evaluation_results.png
    │   ├── main_pic.png
    │   ├── video2action.png
    │   └── video_collection_pipeline.png
├── raw_data
    └── S8Kbt1xKRcs
    │   ├── S8Kbt1xKRcs.mp4
    │   └── S8Kbt1xKRcs_transcript.json
├── requirements.txt
├── video2action.py
├── video2action
    ├── README.md
    ├── __init__.py
    ├── config.py
    ├── pipeline.py
    ├── stages
    │   ├── __init__.py
    │   ├── action_clipper.py
    │   ├── action_identifier.py
    │   ├── action_validator.py
    │   ├── inner_monologue_generator.py
    │   ├── keyframe_detector.py
    │   ├── trajectory_builder.py
    │   ├── trajectory_exporter.py
    │   └── video_splitter.py
    └── utils
    │   ├── __init__.py
    │   ├── data_utils.py
    │   └── qwen_vl_utils.py
├── video_preprocess.py
└── video_preprocess
    ├── README.md
    ├── __init__.py
    ├── config.py
    ├── cursor_detector.py
    └── pipeline.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/.gitignore


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/README.md


--------------------------------------------------------------------------------
/assets/images/cua_evaluation_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/assets/images/cua_evaluation_results.png


--------------------------------------------------------------------------------
/assets/images/main_pic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/assets/images/main_pic.png


--------------------------------------------------------------------------------
/assets/images/video2action.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/assets/images/video2action.png


--------------------------------------------------------------------------------
/assets/images/video_collection_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/assets/images/video_collection_pipeline.png


--------------------------------------------------------------------------------
/raw_data/S8Kbt1xKRcs/S8Kbt1xKRcs.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/raw_data/S8Kbt1xKRcs/S8Kbt1xKRcs.mp4


--------------------------------------------------------------------------------
/raw_data/S8Kbt1xKRcs/S8Kbt1xKRcs_transcript.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/raw_data/S8Kbt1xKRcs/S8Kbt1xKRcs_transcript.json


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/requirements.txt


--------------------------------------------------------------------------------
/video2action.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action.py


--------------------------------------------------------------------------------
/video2action/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/README.md


--------------------------------------------------------------------------------
/video2action/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/__init__.py


--------------------------------------------------------------------------------
/video2action/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/config.py


--------------------------------------------------------------------------------
/video2action/pipeline.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/pipeline.py


--------------------------------------------------------------------------------
/video2action/stages/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/stages/__init__.py


--------------------------------------------------------------------------------
/video2action/stages/action_clipper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/stages/action_clipper.py


--------------------------------------------------------------------------------
/video2action/stages/action_identifier.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/stages/action_identifier.py


--------------------------------------------------------------------------------
/video2action/stages/action_validator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/stages/action_validator.py


--------------------------------------------------------------------------------
/video2action/stages/inner_monologue_generator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/stages/inner_monologue_generator.py


--------------------------------------------------------------------------------
/video2action/stages/keyframe_detector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/stages/keyframe_detector.py


--------------------------------------------------------------------------------
/video2action/stages/trajectory_builder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/stages/trajectory_builder.py


--------------------------------------------------------------------------------
/video2action/stages/trajectory_exporter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/stages/trajectory_exporter.py


--------------------------------------------------------------------------------
/video2action/stages/video_splitter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/stages/video_splitter.py


--------------------------------------------------------------------------------
/video2action/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/utils/__init__.py


--------------------------------------------------------------------------------
/video2action/utils/data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/utils/data_utils.py


--------------------------------------------------------------------------------
/video2action/utils/qwen_vl_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video2action/utils/qwen_vl_utils.py


--------------------------------------------------------------------------------
/video_preprocess.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video_preprocess.py


--------------------------------------------------------------------------------
/video_preprocess/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video_preprocess/README.md


--------------------------------------------------------------------------------
/video_preprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video_preprocess/__init__.py


--------------------------------------------------------------------------------
/video_preprocess/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video_preprocess/config.py


--------------------------------------------------------------------------------
/video_preprocess/cursor_detector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video_preprocess/cursor_detector.py


--------------------------------------------------------------------------------
/video_preprocess/pipeline.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xlang-ai/VideoAgentTrek/HEAD/video_preprocess/pipeline.py


--------------------------------------------------------------------------------