├── README.md ├── data ├── count.json ├── direction.json ├── rotation.json ├── shape&trend.json ├── velocity&frequency.json └── visual_cues.json ├── misc ├── ball_rotation_frames.png ├── dropping_reversed_frames.png ├── earth_moon_frames.png ├── figure1.png ├── frame_information_parity.png ├── frame_order_sensitivity.png ├── human_gesture_frames.png ├── main_results.png ├── multi_frame_gain1.png ├── multi_frame_gain2.png └── synthetic_human_frames.png └── src ├── config.json ├── evaluate.py ├── generate_lib ├── __init__.py ├── claude.py ├── constant.py ├── construct_prompt.py ├── gemini.py ├── gpt.py ├── internvideo.py ├── internvl.py ├── llava_next.py ├── phi.py ├── qwen.py ├── reka.py ├── utils.py ├── video_ccam.py ├── video_llama.py ├── video_llava.py └── vila.py ├── get_categorized_score.py └── parse_result.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/README.md -------------------------------------------------------------------------------- /data/count.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/count.json -------------------------------------------------------------------------------- /data/direction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/direction.json -------------------------------------------------------------------------------- /data/rotation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/rotation.json -------------------------------------------------------------------------------- /data/shape&trend.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/shape&trend.json -------------------------------------------------------------------------------- /data/velocity&frequency.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/velocity&frequency.json -------------------------------------------------------------------------------- /data/visual_cues.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/data/visual_cues.json -------------------------------------------------------------------------------- /misc/ball_rotation_frames.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/ball_rotation_frames.png -------------------------------------------------------------------------------- /misc/dropping_reversed_frames.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/dropping_reversed_frames.png -------------------------------------------------------------------------------- /misc/earth_moon_frames.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/earth_moon_frames.png -------------------------------------------------------------------------------- /misc/figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/figure1.png -------------------------------------------------------------------------------- /misc/frame_information_parity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/frame_information_parity.png -------------------------------------------------------------------------------- /misc/frame_order_sensitivity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/frame_order_sensitivity.png -------------------------------------------------------------------------------- /misc/human_gesture_frames.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/human_gesture_frames.png -------------------------------------------------------------------------------- /misc/main_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/main_results.png -------------------------------------------------------------------------------- /misc/multi_frame_gain1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/multi_frame_gain1.png -------------------------------------------------------------------------------- /misc/multi_frame_gain2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/multi_frame_gain2.png -------------------------------------------------------------------------------- /misc/synthetic_human_frames.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/misc/synthetic_human_frames.png -------------------------------------------------------------------------------- /src/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/config.json -------------------------------------------------------------------------------- /src/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/evaluate.py -------------------------------------------------------------------------------- /src/generate_lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/generate_lib/claude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/claude.py -------------------------------------------------------------------------------- /src/generate_lib/constant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/constant.py -------------------------------------------------------------------------------- /src/generate_lib/construct_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/construct_prompt.py -------------------------------------------------------------------------------- /src/generate_lib/gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/gemini.py -------------------------------------------------------------------------------- /src/generate_lib/gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/gpt.py -------------------------------------------------------------------------------- /src/generate_lib/internvideo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/internvideo.py -------------------------------------------------------------------------------- /src/generate_lib/internvl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/internvl.py -------------------------------------------------------------------------------- /src/generate_lib/llava_next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/llava_next.py -------------------------------------------------------------------------------- /src/generate_lib/phi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/phi.py -------------------------------------------------------------------------------- /src/generate_lib/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/qwen.py -------------------------------------------------------------------------------- /src/generate_lib/reka.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/reka.py -------------------------------------------------------------------------------- /src/generate_lib/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/utils.py -------------------------------------------------------------------------------- /src/generate_lib/video_ccam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/video_ccam.py -------------------------------------------------------------------------------- /src/generate_lib/video_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/video_llama.py -------------------------------------------------------------------------------- /src/generate_lib/video_llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/video_llava.py -------------------------------------------------------------------------------- /src/generate_lib/vila.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/generate_lib/vila.py -------------------------------------------------------------------------------- /src/get_categorized_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/get_categorized_score.py -------------------------------------------------------------------------------- /src/parse_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yale-nlp/TOMATO/HEAD/src/parse_result.py --------------------------------------------------------------------------------