├── README.md
├── data
    ├── count.json
    ├── direction.json
    ├── rotation.json
    ├── shape&trend.json
    ├── velocity&frequency.json
    └── visual_cues.json
├── misc
    ├── ball_rotation_frames.png
    ├── dropping_reversed_frames.png
    ├── earth_moon_frames.png
    ├── figure1.png
    ├── frame_information_parity.png
    ├── frame_order_sensitivity.png
    ├── human_gesture_frames.png
    ├── main_results.png
    ├── multi_frame_gain1.png
    ├── multi_frame_gain2.png
    └── synthetic_human_frames.png
└── src
    ├── config.json
    ├── evaluate.py
    ├── generate_lib
        ├── __init__.py
        ├── claude.py
        ├── constant.py
        ├── construct_prompt.py
        ├── gemini.py
        ├── gpt.py
        ├── internvideo.py
        ├── internvl.py
        ├── llava_next.py
        ├── phi.py
        ├── qwen.py
        ├── reka.py
        ├── utils.py
        ├── video_ccam.py
        ├── video_llama.py
        ├── video_llava.py
        └── vila.py
    ├── get_categorized_score.py
    └── parse_result.py


/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/README.md


--------------------------------------------------------------------------------
/data/count.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/count.json


--------------------------------------------------------------------------------
/data/direction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/direction.json


--------------------------------------------------------------------------------
/data/rotation.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/rotation.json


--------------------------------------------------------------------------------
/data/shape&trend.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/shape&trend.json


--------------------------------------------------------------------------------
/data/velocity&frequency.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/velocity&frequency.json


--------------------------------------------------------------------------------
/data/visual_cues.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/visual_cues.json


--------------------------------------------------------------------------------
/misc/ball_rotation_frames.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/ball_rotation_frames.png


--------------------------------------------------------------------------------
/misc/dropping_reversed_frames.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/dropping_reversed_frames.png


--------------------------------------------------------------------------------
/misc/earth_moon_frames.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/earth_moon_frames.png


--------------------------------------------------------------------------------
/misc/figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/figure1.png


--------------------------------------------------------------------------------
/misc/frame_information_parity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/frame_information_parity.png


--------------------------------------------------------------------------------
/misc/frame_order_sensitivity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/frame_order_sensitivity.png


--------------------------------------------------------------------------------
/misc/human_gesture_frames.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/human_gesture_frames.png


--------------------------------------------------------------------------------
/misc/main_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/main_results.png


--------------------------------------------------------------------------------
/misc/multi_frame_gain1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/multi_frame_gain1.png


--------------------------------------------------------------------------------
/misc/multi_frame_gain2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/multi_frame_gain2.png


--------------------------------------------------------------------------------
/misc/synthetic_human_frames.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/synthetic_human_frames.png


--------------------------------------------------------------------------------
/src/config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/config.json


--------------------------------------------------------------------------------
/src/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/evaluate.py


--------------------------------------------------------------------------------
/src/generate_lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/generate_lib/claude.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/claude.py


--------------------------------------------------------------------------------
/src/generate_lib/constant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/constant.py


--------------------------------------------------------------------------------
/src/generate_lib/construct_prompt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/construct_prompt.py


--------------------------------------------------------------------------------
/src/generate_lib/gemini.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/gemini.py


--------------------------------------------------------------------------------
/src/generate_lib/gpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/gpt.py


--------------------------------------------------------------------------------
/src/generate_lib/internvideo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/internvideo.py


--------------------------------------------------------------------------------
/src/generate_lib/internvl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/internvl.py


--------------------------------------------------------------------------------
/src/generate_lib/llava_next.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/llava_next.py


--------------------------------------------------------------------------------
/src/generate_lib/phi.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/phi.py


--------------------------------------------------------------------------------
/src/generate_lib/qwen.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/qwen.py


--------------------------------------------------------------------------------
/src/generate_lib/reka.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/reka.py


--------------------------------------------------------------------------------
/src/generate_lib/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/utils.py


--------------------------------------------------------------------------------
/src/generate_lib/video_ccam.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/video_ccam.py


--------------------------------------------------------------------------------
/src/generate_lib/video_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/video_llama.py


--------------------------------------------------------------------------------
/src/generate_lib/video_llava.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/video_llava.py


--------------------------------------------------------------------------------
/src/generate_lib/vila.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/vila.py


--------------------------------------------------------------------------------
/src/get_categorized_score.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/get_categorized_score.py


--------------------------------------------------------------------------------
/src/parse_result.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/parse_result.py


--------------------------------------------------------------------------------