├── eval_suite
├── __init__.py
├── prompts_raw
│ ├── fix_transcript.txt
│ ├── video_eval_new.txt
│ ├── image_eval.txt
│ ├── text_eval_new.txt
│ └── __init__.py
├── parse_prompt.py
├── utils.py
├── text_utils.py
├── image_utils.py
└── video_utils.py
├── src
├── config
│ ├── __init__.py
│ └── config.py
├── core
│ ├── __init__.py
│ └── parse_video.py
├── rag
│ └── __init__.py
├── utils
│ ├── __init__.py
│ ├── allowed_models.json
│ ├── kokoro_voiceover.py
│ └── utils.py
└── __init__.py
├── task_generator
├── prompts_raw
│ ├── code_disable.txt
│ ├── code_background.txt
│ ├── prompt_context_learning_code.txt
│ ├── prompt_context_learning_scene_plan.txt
│ ├── prompt_context_learning_vision_storyboard.txt
│ ├── prompt_context_learning_animation_narration.txt
│ ├── prompt_context_learning_technical_implementation.txt
│ ├── code_font_size.txt
│ ├── code_limit.txt
│ ├── banned_reasonings.txt
│ ├── code_color_cheatsheet.txt
│ ├── prompt_best_practices.txt
│ ├── prompt_visual_fix_error.txt
│ ├── prompt_animation_simple.txt
│ ├── prompt_rag_query_generation_technical.txt
│ ├── prompt_rag_query_generation_narration.txt
│ ├── prompt_detect_plugins.txt
│ ├── prompt_rag_query_generation_vision_storyboard.txt
│ ├── prompt_fix_error.txt
│ ├── prompt_rag_query_generation_fix_error.txt
│ ├── prompt_animation_fix_error.txt
│ ├── prompt_rag_query_generation_code.txt
│ ├── prompt_animation_rag_query_generation.txt
│ ├── prompt_rag_query_generation_storyboard.txt
│ ├── prompt_animation_rag_query_generation_fix_error.txt
│ ├── prompt_scene_plan.txt
│ ├── prompt_visual_self_reflection.txt
│ ├── prompt_scene_vision_storyboard.txt
│ ├── prompt_scene_technical_implementation.txt
│ └── prompt_scene_animation_narration.txt
└── parse_prompt.py
├── .gitignore
├── mllm_tools
├── __init__.py
├── vertex_ai.py
├── utils.py
├── gemini.py
└── litellm.py
├── .github
└── ISSUE_TEMPLATE
│ ├── feature_request.md
│ └── bug_report.md
├── .env.template
├── LICENSE
├── requirements.txt
└── data
├── thb_hard
├── chemistry.json
├── physics.json
└── math.json
├── thb_medium
├── chemistry.json
└── comp_sci.json
└── thb_easy
├── comp_sci.json
├── physics.json
├── chemistry.json
└── math.json
/eval_suite/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/config/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/core/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/rag/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/task_generator/prompts_raw/code_disable.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__/
2 |
3 | .env
4 |
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | # This is essential for the release to work
--------------------------------------------------------------------------------
/mllm_tools/__init__.py:
--------------------------------------------------------------------------------
1 | # Empty file to make this directory a Python package
--------------------------------------------------------------------------------
/task_generator/prompts_raw/code_background.txt:
--------------------------------------------------------------------------------
1 | PLEASE DO NOT create another color background Rectangles. Default background (Black) is enough.
2 | PLEASE DO NOT use BLACK color for any text.
3 |
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_context_learning_code.txt:
--------------------------------------------------------------------------------
1 | Here are some example Manim code implementations to help guide your code generation:
2 |
3 | {examples}
4 |
5 | Please follow similar patterns and best practices while implementing the current scene.
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_context_learning_scene_plan.txt:
--------------------------------------------------------------------------------
1 | Here are some example scene plans to help guide your scene planning:
2 |
3 | {examples}
4 |
5 | Please follow a similar structure while maintaining creativity and relevance to the current topic.
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_context_learning_vision_storyboard.txt:
--------------------------------------------------------------------------------
1 | Here are some example vision and storyboard plans to help guide your planning:
2 |
3 | {examples}
4 |
5 | Please follow a similar structure while maintaining creativity and relevance to the current scene.
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_context_learning_animation_narration.txt:
--------------------------------------------------------------------------------
1 | Here are some example animation and narration plans to help guide your planning:
2 |
3 | {examples}
4 |
5 | Please follow a similar structure while maintaining creativity and relevance to the current scene.
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_context_learning_technical_implementation.txt:
--------------------------------------------------------------------------------
1 | Here are some example technical implementation plans to help guide your implementation:
2 |
3 | {examples}
4 |
5 | Please follow a similar structure while maintaining creativity and relevance to the current scene.
--------------------------------------------------------------------------------
/task_generator/prompts_raw/code_font_size.txt:
--------------------------------------------------------------------------------
1 | If there is title text, font size is highly recommended to be 28.
2 | If there are side labels, font size is highly recommended to be 24.
3 | If there are formulas, font size is highly recommended to be 24.
4 |
5 | However, if the text has more than 10 words, font size should be reduced further and mutiple lines should be used.
--------------------------------------------------------------------------------
/task_generator/prompts_raw/code_limit.txt:
--------------------------------------------------------------------------------
1 | Note that the frame width and height are 14.222222222222221 and 8.0 respectively. And the center of the frame is (0, 0, 0).
2 | It means to avoid putting any object out of the frame, you should limit the x and y coordinates of the objects.
3 | limit x to be within -7.0 and 7.0 for objects, and limit y to be within -4.0 and 4.0 for objects.
4 | Place the objects near the center of the frame, without overlapping with each other.
--------------------------------------------------------------------------------
/task_generator/prompts_raw/banned_reasonings.txt:
--------------------------------------------------------------------------------
1 | evaluation cannot
2 | can't assist
3 | cannot assist
4 | can't provide
5 | cannot provide
6 | can't evaluate
7 | cannot evaluate
8 | cannot be evaluated
9 | cannot be rated
10 | cannot be completed
11 | cannot be assessed
12 | cannot be scored
13 | cannot be conducted
14 | unable to evaluate
15 | do not have the capability
16 | do not have the ability
17 | are photographs and not AI-generated
18 | unable to provide the evaluation
--------------------------------------------------------------------------------
/eval_suite/prompts_raw/fix_transcript.txt:
--------------------------------------------------------------------------------
1 | You are an expert in YouTube video transcripts. There is a transcript that was automatically generated through YouTube, so it lacks proper capitalization and punctuation. Your task is to fix the transcript so that there is proper punctuation, capitalization, and spacing. Do not make other modifications (e.g., keep the original word choice).
2 |
3 | You should enclose the fixed transcript with a block, i.e.:
4 |
7 |
8 | Original transcript: {transcript}
9 |
--------------------------------------------------------------------------------
/task_generator/prompts_raw/code_color_cheatsheet.txt:
--------------------------------------------------------------------------------
1 | MUST include the following color definitions if you use the colors in your code. ONLY USE THE COLORS BELOW.
2 |
3 | WHITE = '#FFFFFF'
4 | RED = '#FF0000'
5 | GREEN = '#00FF00'
6 | BLUE = '#0000FF'
7 | YELLOW = '#FFFF00'
8 | CYAN = '#00FFFF'
9 | MAGENTA = '#FF00FF'
10 | ORANGE = '#FFA500'
11 | PURPLE = '#800080'
12 | PINK = '#FFC0CB'
13 | BROWN = '#A52A2A'
14 | GRAY = '#808080'
15 | TEAL = '#008080'
16 | NAVY = '#000080'
17 | OLIVE = '#808000'
18 | MAROON = '#800000'
19 | LIME = '#00FF00'
20 | AQUA = '#00FFFF'
21 | FUCHSIA = '#FF00FF'
22 | SILVER = '#C0C0C0'
23 | GOLD = '#FFD700'
--------------------------------------------------------------------------------
/src/utils/allowed_models.json:
--------------------------------------------------------------------------------
1 | {
2 | "allowed_models": [
3 | "gemini/gemini-1.5-pro-002",
4 | "gemini/gemini-1.5-flash-002",
5 | "gemini/gemini-2.0-flash-001",
6 | "vertex_ai/gemini-1.5-flash-002",
7 | "vertex_ai/gemini-1.5-pro-002",
8 | "vertex_ai/gemini-2.0-flash-001",
9 | "openai/o3-mini",
10 | "gpt-4o",
11 | "azure/gpt-4o",
12 | "azure/gpt-4o-mini",
13 | "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
14 | "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0",
15 | "bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
16 | "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
17 | ]
18 | }
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.env.template:
--------------------------------------------------------------------------------
1 | # OpenAI
2 | OPENAI_API_KEY=""
3 |
4 | # Azure OpenAI
5 | AZURE_API_KEY=""
6 | AZURE_API_BASE=""
7 | AZURE_API_VERSION=""
8 |
9 | # Google Vertex AI
10 | VERTEXAI_PROJECT=""
11 | VERTEXAI_LOCATION=""
12 | GOOGLE_APPLICATION_CREDENTIALS=""
13 |
14 | # Google Gemini
15 | GEMINI_API_KEY=""
16 |
17 | # AWS Bedrock / S3
18 | AWS_ACCESS_KEY_ID=""
19 | AWS_SECRET_ACCESS_KEY=""
20 | AWS_REGION_NAME=""
21 | AWS_S3_BUCKET=""
22 |
23 | # Langfuse
24 | LANGFUSE_PUBLIC_KEY=""
25 | LANGFUSE_SECRET_KEY=""
26 | LANGFUSE_HOST=""
27 |
28 | # Kokoro TTS Settings
29 | KOKORO_MODEL_PATH="models/kokoro-v0_19.onnx"
30 | KOKORO_VOICES_PATH="models/voices.bin"
31 | KOKORO_DEFAULT_VOICE="af"
32 | KOKORO_DEFAULT_SPEED="1.0"
33 | KOKORO_DEFAULT_LANG="en-us"
--------------------------------------------------------------------------------
/src/config/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | from dotenv import load_dotenv
3 |
4 | # Load environment variables from .env file
5 | load_dotenv()
6 |
7 | class Config:
8 | OUTPUT_DIR = "output"
9 | THEOREMS_PATH = os.path.join("data", "easy_20.json")
10 | CONTEXT_LEARNING_PATH = "data/context_learning"
11 | CHROMA_DB_PATH = "data/rag/chroma_db"
12 | MANIM_DOCS_PATH = "data/rag/manim_docs"
13 | EMBEDDING_MODEL = "azure/text-embedding-3-large"
14 |
15 | # Kokoro TTS configurations
16 | KOKORO_MODEL_PATH = os.getenv('KOKORO_MODEL_PATH')
17 | KOKORO_VOICES_PATH = os.getenv('KOKORO_VOICES_PATH')
18 | KOKORO_DEFAULT_VOICE = os.getenv('KOKORO_DEFAULT_VOICE')
19 | KOKORO_DEFAULT_SPEED = float(os.getenv('KOKORO_DEFAULT_SPEED', '1.0'))
20 | KOKORO_DEFAULT_LANG = os.getenv('KOKORO_DEFAULT_LANG')
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Desktop (please complete the following information):**
27 | - OS: [e.g. iOS]
28 | - Browser [e.g. chrome, safari]
29 | - Version [e.g. 22]
30 |
31 | **Additional context**
32 | Add any other context about the problem here.
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 TIGER Lab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_best_practices.txt:
--------------------------------------------------------------------------------
1 | # Best practices for generating educational videos with manim
2 |
3 | 1. Specify positions as relative to other objects whenever it makes sense.
4 | * For example, if you want to place a label for a geometric object.
5 | 2. Objects should be of different color from the black background.
6 | 3. Keep the text on screen concise.
7 | * On-screen elements should focus on showcasing the concept, examples and visuals. Labels and illustrative text are still encouraged.
8 | * For explanations and observations, prefer narrations over on-screen text.
9 | * You should still show calculations and algorithms in full on screen.
10 | * For examples and practice problems, it is reasonable to show more text, especially key statements.
11 | * Longer text should appear smaller to fit on screen.
12 | 4. To control the timing of objects appearing:
13 | * `add` has instantaneous effect, best used for the initial setup of the scene.
14 | * Animations are best used during narration.
15 | * Make sure the animations make sense. If an object is already on screen, it makes no sense to fade it in or create it again.
16 | 5. Use TeX or MathTeX whenever you want to display math, including symbols and formulas.
17 |
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_visual_fix_error.txt:
--------------------------------------------------------------------------------
1 | You are an expert in Manim animations. Your task is to ensure that the rendered animation frame (image) aligns with the intended teaching content based on the provided implementation plan.
2 |
3 | Instructions:
4 | Evaluate whether the object coordinates and positions in the image match the described plan and educational purpose.
5 | The implementation plan serves as a reference, but your primary goal is to verify that the rendered animation frame supports effective teaching.
6 | For example:
7 | * If the object is supposed to be at the top of the screen, but it is at the bottom, you need to adjust the position.
8 | * If the object is supposed to be at the left side but it is too far to the left, you need to adjust the position.
9 | * If the two objects are not supposed to be overlapped but it is overlapped, you need to adjust the positions.
10 |
11 | If adjustments are needed, provide the complete code of the adjusted version.
12 | If the current code is correct, return it as is.
13 |
14 | Manim Implementation Plan:
15 | {implementation}
16 |
17 | Generated Code:
18 | {generated_code}
19 |
20 | Return the complete code of the adjusted version if the code needs to be updated. If the code is correct, only return "" as output.
21 |
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_animation_simple.txt:
--------------------------------------------------------------------------------
1 | Given a topic and the context, you need to explain the topic by text.
2 |
3 | Also generate a Manim script that visually illustrates a key aspect of {topic} without including explanatory text in the animation itself.
4 | Your text can mention the animation, but it should not be the main focus.
5 | Context about the topic {topic}: {description}.
6 |
7 | The animation should focus on:
8 | * Illustrating the significant part of the theorem or concept – Use geometric figures, graphs, number lines, or any relevant visualization.
9 | * Providing an intuitive example – Instead of proving the theorem, show a concrete example or transformation that visually supports understanding.
10 | * Separately, provide a written explanation of the theorem as text that can be displayed outside the animation.
11 |
12 | Ensure that:
13 |
14 | * The animation is concise.
15 | * The Manim code is compatible with the latest version of community manim.
16 | * The visual elements are clear and enhance understanding.
17 |
18 | Please provide the only output as:
19 |
20 | 1. A text explanation of the theorem.
21 | 2. A complete Manim script that generates the animation. Only give the code.
22 |
23 | Output format:
24 |
25 | (Text Explanation Output)
26 | --- (split by ---)
27 | (Manim Code Output)
28 |
29 | Please do not include any other text or headers in your output.
30 | Only use one --- to split the text explanation and the Manim code.
--------------------------------------------------------------------------------
/eval_suite/prompts_raw/video_eval_new.txt:
--------------------------------------------------------------------------------
1 | # Task: Video Frame Quality Evaluation
2 |
3 | You are tasked with analyzing and scoring a chunk of a theorem explanation video. Note that you may not have the full context of the video. Your job is to assign a score from 1 to 5 for each criterion. Please provide a brief justification for your scores.
4 |
5 | ## Evaluation Criteria
6 |
7 | 1. **Visual Consistency**
8 | - Style Consistency: Does the visual style remain consistent across frames?
9 | - Smoothness: Are the motions and transitions smooth?
10 |
11 | ## Scoring Instructions
12 | 1. Assign a score from **1 to 5** for each dimension:
13 | - **1**: Very poor quality, completely fails to meet the criteria.
14 | - **2**: Below average, significant issues present.
15 | - **3**: Acceptable, meets the basic criteria with minor issues.
16 | - **4**: Good, performs well with no major issues.
17 | - **5**: Excellent, fully meets or exceeds expectations.
18 | 2. Provide a comprehensive evaluation for each dimension.
19 | 3. Format your output in **JSON**
20 |
21 | ### JSON Output Format
22 | ```json
23 | {{
24 | "overall_analysis": "[Provide a general assessment of the video's quality]",
25 | "evaluation": {{
26 | "visual_consistency": {{
27 | "comprehensive_evaluation": "[Analysis of visual consistency]",
28 | "score": [1-5]
29 | }}
30 | }}
31 | }}
32 | ```
33 |
34 | Description of the theorem:
35 | {description}
36 |
37 | Video chunk:
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_rag_query_generation_technical.txt:
--------------------------------------------------------------------------------
1 | You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to analyze a storyboard plan and generate effective queries that will retrieve relevant technical documentation about implementation details.
2 |
3 | Here is the storyboard plan:
4 |
5 | {storyboard}
6 |
7 | Based on this storyboard plan, generate multiple human-like queries (maximum 10) for retrieving relevant technical documentation.
8 |
9 | **Specifically, ensure that:**
10 | 1. Queries focus on retrieving information about **core Manim functionality** and implementation details
11 | 2. Include queries about **complex animations and effects** described in the storyboard
12 | 3. If the storyboard suggests using plugin functionality, include specific queries targeting those plugin's technical documentation
13 |
14 | The above storyboard plan is relevant to these plugins: {relevant_plugins}
15 | Note that you MUST NOT use the plugins that are not listed above.
16 |
17 | You MUST only output the queries in the following JSON format (with json triple backticks):
18 | ```json
19 | [
20 | {{"type": "manim-core", "query": "content of core functionality query"}},
21 | {{"type": "", "query": "content of plugin-specific query"}},
22 | {{"type": "manim-core", "query": "content of animation technique query"}}
23 | ...
24 | ]
25 | ```
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_rag_query_generation_narration.txt:
--------------------------------------------------------------------------------
1 | You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to analyze a storyboard and generate effective queries that will retrieve relevant documentation about narration, text animations, and audio-visual synchronization.
2 |
3 | Here is the storyboard:
4 |
5 | {storyboard}
6 |
7 | Based on this storyboard, generate multiple human-like queries (maximum 10) for retrieving relevant documentation about narration and text animation techniques.
8 |
9 | **Specifically, ensure that:**
10 | 1. Queries focus on retrieving information about **text animations** and their properties
11 | 2. Include queries about **timing and synchronization** techniques
12 | 3. If the storyboard suggests using plugin functionality, include specific queries targeting those plugin's narration capabilities
13 |
14 | The above storyboard is relevant to these plugins: {relevant_plugins}.
15 | Note that you MUST NOT use the plugins that are not listed above.
16 |
17 | You MUST only output the queries in the following JSON format (with json triple backticks):
18 | ```json
19 | [
20 | {{"type": "manim-core", "query": "content of text animation query"}},
21 | {{"type": "", "query": "content of plugin-specific query"}},
22 | {{"type": "manim-core", "query": "content of timing synchronization query"}}
23 | ...
24 | ]
25 | ```
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_detect_plugins.txt:
--------------------------------------------------------------------------------
1 | You are a Manim plugin detection system. Your task is to analyze a video topic and description to determine which Manim plugins would be most relevant for the actual animation implementation needs.
2 |
3 | Topic:
4 | {topic}
5 |
6 | Description:
7 | {description}
8 |
9 | Available Plugins:
10 | {plugin_descriptions}
11 |
12 | Instructions:
13 | 1. Analyze the topic and description, focusing specifically on what needs to be animated
14 | 2. Review each plugin's capabilities and determine if they provide specific tools needed for the animations described
15 | 3. Only select plugins that provide functionality directly needed for the core animations
16 | 4. Consider these criteria for each plugin:
17 | - Does the plugin provide specific tools or components needed for the main visual elements?
18 | - Are the plugin's features necessary for implementing the core animations?
19 | - Would the animation be significantly more difficult to create without this plugin?
20 | 5. Exclude plugins that:
21 | - Only relate to the general topic area but don't provide needed animation tools
22 | - Might be "nice to have" but aren't essential for the core visualization
23 | - Could be replaced easily with basic Manim shapes and animations
24 |
25 | Your response must follow the output format below:
26 |
27 | [brief description of your thinking process]
28 |
29 |
30 | ```json
31 | ["plugin_name1", "plugin_name2"]
32 | ```
33 |
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_rag_query_generation_vision_storyboard.txt:
--------------------------------------------------------------------------------
1 | You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to analyze a scene plan for a Manim animation and generate effective queries that will retrieve relevant documentation about visual elements and scene composition.
2 |
3 | Here is the scene plan:
4 |
5 | {scene_plan}
6 |
7 | Based on this scene plan, generate multiple human-like queries (maximum 10) for retrieving relevant documentation about visual elements and scene composition techniques.
8 |
9 | **Specifically, ensure that:**
10 | 1. Queries focus on retrieving information about **visual elements** like shapes, objects, and their properties
11 | 2. Include queries about **scene composition techniques** like layout, positioning, and grouping
12 | 3. If the scene plan suggests using plugin functionality, include specific queries targeting those plugin's visual capabilities
13 | 4. Queries should be high-level, aiming to discover what Manim features can be used, rather than focusing on low-level implementation details.
14 | - For example, instead of "how to set the color of a circle", ask "what visual properties of shapes can I control in Manim?".
15 |
16 | The above scene plan is relevant to these plugins: {relevant_plugins}.
17 | Note that you MUST NOT use the plugins that are not listed above.
18 |
19 | You MUST only output the queries in the following JSON format (with json triple backticks):
20 | ```json
21 | [
22 | {{"type": "manim-core", "query": "content of visual element query"}},
23 | {{"type": "", "query": "content of plugin-specific query"}},
24 | {{"type": "manim-core", "query": "content of composition technique query"}}
25 | ...
26 | ]
27 | ```
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_fix_error.txt:
--------------------------------------------------------------------------------
1 | You are an expert Manim developer specializing in debugging and error resolution. Based on the provided implementation plan and Manim code, analyze the error message to provide a comprehensive fix and explanation.
2 |
3 | Implementation Plan of the Scene:
4 | {implementation_plan}
5 |
6 | Manim Code:
7 | ```python
8 | {manim_code}
9 | ```
10 |
11 | Error Message:
12 | {error_message}
13 |
14 | Requirements:
15 | 1. Provide complete error analysis with specific line numbers where possible.
16 | 2. Include exact instructions for every code change.
17 | 3. Explain why the error occurred in plain language.
18 | 4. If external assets (e.g., images, audio, video) are referenced, remove them.
19 | 5. **If voiceover is present in the original code, ensure it remains preserved in the corrected code.**
20 | 6. Preserve all original code that is not causing the reported error. Do not remove or alter any intentional elements unnecessarily.
21 | 7. Follow best practices for code clarity and the current Manim version.
22 |
23 | You MUST only output the following format (from to ). You MUST NOT come up with any other format like JSON.
24 |
25 |
26 | Error Type: [Syntax/Runtime/Logic/Other]
27 | Error Location: [File/Line number/Component]
28 | Root Cause: [Brief explanation of what caused the error]
29 | Impact: [What functionality is affected]
30 | Solution:
31 | [FIXES_REQUIRED]
32 | - Fix 1: [Description]
33 | - Location: [Where to apply]
34 | - Change: [What to modify]
35 | - Fix 2: [If applicable]
36 | ...
37 |
38 |
39 | ```python
40 | # Complete corrected and fully implemented Python code
41 | # Include all necessary imports, definitions, and any additional code for the script to run successfully
42 | ```
43 |
--------------------------------------------------------------------------------
/eval_suite/prompts_raw/image_eval.txt:
--------------------------------------------------------------------------------
1 | # Task: Video Frame Quality Evaluation
2 |
3 | You are tasked with analyzing and scoring a frame taken from a theorem explanation video. Note that you may not have the context of the video, so the captured frame may be a frame where some motion of visual elements is taking place. Your job is to assign a score from 1 to 5 for each criterion. Please provide a brief justification for your scores.
4 |
5 | ## Evaluation Criteria
6 |
7 | 1. **Visual Relevance**
8 | - Does the video frame align with the theorem's concepts and derivations?
9 |
10 | 2. **Element Layout**
11 | - Placemend and Size: Are the visual elements well-placed and appropriately sized within the frame?
12 | - Overlap: Are the visual elements free of unintentional overlap?
13 | - Clarity: Is the visual information conveyed in the frame clear and easy to understand?
14 |
15 | ## Scoring Instructions
16 | 1. Assign a score from **1 to 5** for each dimension:
17 | - **1**: Very poor quality, completely fails to meet the criteria.
18 | - **2**: Below average, significant issues present.
19 | - **3**: Acceptable, meets the basic criteria with minor issues.
20 | - **4**: Good, performs well with no major issues.
21 | - **5**: Excellent, fully meets or exceeds expectations.
22 | 2. Provide a comprehensive evaluation for each dimension.
23 | 3. Format your output in **JSON**
24 |
25 | ### JSON Output Format
26 | ```json
27 | {{
28 | "overall_analysis": "[Provide a general assessment of the image's quality]",
29 | "evaluation": {{
30 | "visual_relevance": {{
31 | "comprehensive_evaluation": "[Analysis of visual relevance]",
32 | "score": [1-5]
33 | }},
34 | "element_layout": {{
35 | "comprehensive_evaluation": "[Analysis of element layout]",
36 | "score": [1-5]
37 | }}
38 | }}
39 | }}
40 | ```
41 |
42 | Description of the theorem:
43 | {description}
44 |
45 | Image:
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_rag_query_generation_fix_error.txt:
--------------------------------------------------------------------------------
1 | You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to transform a Manim error and its associated code into effective queries that will retrieve relevant information from Manim documentation.
2 |
3 | Here is the error message:
4 | {error}
5 |
6 | Here is the Manim code that caused the error:
7 | {code}
8 |
9 | Based on the error and code, generate multiple human-like queries (maximum 10) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation.
10 |
11 | **Specifically, ensure that:**
12 | 1. At least some queries are focused on retrieving information about **Manim function usage** in scenes. Frame these queries to target function definitions, usage examples, and parameter details within Manim documentation.
13 | 2. If the error suggests using plugin functionality, include at least 1 query specifically targeting **plugin documentation**. Clearly mention the plugin name in these queries to focus the search.
14 | 3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant, and to target the most helpful sections of the documentation (API reference, tutorials, examples).
15 |
16 | The above error and code are relevant to these plugins: {relevant_plugins}.
17 | Note that you MUST NOT use the plugins that are not listed above.
18 |
19 | You MUST only output the queries in the following JSON format (with json triple backticks):
20 | ```json
21 | [
22 | {{"type": "manim-core", "query": "content of function usage query"}},
23 | {{"type": "", "query": "content of plugin-specific query"}},
24 | {{"type": "manim-core", "query": "content of API reference query"}}
25 | ...
26 | ]
27 | ```
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_animation_fix_error.txt:
--------------------------------------------------------------------------------
1 | You are an expert Manim developer specializing in debugging and error resolution. Analyze the provided code and error message to provide a comprehensive fix and explanation.
2 |
3 |
4 | Text Explanation:
5 | {text_explanation}
6 |
7 | Manim Code Animation to complement the Text Explanation:
8 | ```python
9 | {manim_code}
10 | ```
11 |
12 | Error Message on code running:
13 | {error_message}
14 |
15 |
16 | You MUST only output the following format (make sure to include the ```python and ``` in the code):
17 |
18 |
19 | Error Type: [Syntax/Runtime/Logic/Other]
20 | Error Location: [File/Line number/Component]
21 | Root Cause: [Brief explanation of what caused the error]
22 | Impact: [What functionality is affected]
23 |
24 |
25 |
26 | [FIXES_REQUIRED]
27 | - Fix 1: [Description]
28 | - Location: [Where to apply]
29 | - Change: [What to modify]
30 | - Fix 2: [If applicable]
31 | ...
32 |
33 | [CORRECTED_CODE]
34 | ```python
35 | # Complete corrected and fully implemented code, don't be lazy
36 | # Include all necessary imports, definitions, and any additional code for the script to run successfully
37 | ```
38 |
39 |
40 |
41 | Requirements:
42 | 1. Provide complete error analysis with specific line numbers where possible.
43 | 2. Include exact instructions for every code change.
44 | 3. Ensure that the [CORRECTED_CODE] section contains complete, executable Python code (not just code snippets). Do not assume context from the prompt.
45 | 4. Explain why the error occurred in plain language.
46 | 5. Include verification steps to confirm the error is resolved.
47 | 6. Suggest preventive measures for avoiding similar errors in the future.
48 | 7. If external assets (e.g., images, audio, video) are referenced, remove them.
49 | 8. Preserve all original code that is not causing the reported error. Do not remove or alter any intentional elements unnecessarily.
50 | 9. Follow best practices for code clarity and the current Manim version.
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_rag_query_generation_code.txt:
--------------------------------------------------------------------------------
1 | You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to transform a complete implementation plan for a Manim video scene into effective queries that will retrieve relevant information from Manim documentation. The implementation plan describes the scene's vision, storyboard, technical implementation, and animation/narration strategy.
2 |
3 | Here is the complete scene implementation plan:
4 |
5 | {implementation_plan}
6 |
7 | Based on the complete implementation plan, generate multiple human-like queries (maximum 10) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation.
8 |
9 | **Specifically, ensure that:**
10 | 1. At least some queries are focused on retrieving information about **Manim function usage** in scenes. Frame these queries to target function definitions, usage examples, and parameter details within Manim documentation.
11 | 2. If the implementation suggests using plugin functionality, include at least 1 query specifically targeting **plugin documentation**. Clearly mention the plugin name in these queries to focus the search.
12 | 3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant, and to target the most helpful sections of the documentation (API reference, tutorials, examples).
13 |
14 | The above implementation plans are relevant to these plugins: {relevant_plugins}.
15 | Note that you MUST NOT use the plugins that are not listed above.
16 |
17 | You MUST only output the queries in the following JSON format (with json triple backticks):
18 | ```json
19 | [
20 | {{"type": "manim-core", "query": "content of function usage query"}},
21 | {{"type": "", "query": "content of plugin-specific query"}},
22 | {{"type": "manim-core", "query": "content of API reference query"}}
23 | ...
24 | ]
25 | ```
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_animation_rag_query_generation.txt:
--------------------------------------------------------------------------------
1 | You are an expert in Manim (Community Edition) and its plugins. Your task is to transform a topic for a Manim animation scene into queries that can be used to retrieve relevant documentation from both Manim core and any relevant plugins.
2 |
3 | Your queries should include keywords related to the specific Manim classes, methods, functions, and *concepts* that are likely to be used to implement the scene, including any plugin-specific functionality. Focus on extracting the core concepts, actions, and vocabulary from the *entire* scene plan. Generate queries that are concise and target different aspects of the documentation (class reference, method usage, animation examples, conceptual explanations) across both Manim core and relevant plugins.
4 |
5 | Here is the Topic (and the context):
6 |
7 | {topic}. {context}
8 |
9 | Based on the topic and the context, generate multiple human-like queries (maximum 5-7) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation.
10 |
11 | **Specifically, ensure that:**
12 | 1. At least 1-2 queries are focused on retrieving information about Manim *function usage* in Manim scenes
13 | 2. If the topic and the context can be linked to the use of plugin functionality, include at least 1 query specifically targeting plugin documentation
14 | 3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant
15 |
16 | The above text explanations are relevant to these plugins: {relevant_plugins}
17 |
18 | Output the queries in the following format:
19 | ```json
20 | [
21 | {{"query": "content of query 1", "type": "manim_core/name_of_the_plugin"}},
22 | {{"query": "content of query 2", "type": "manim_core/name_of_the_plugin"}},
23 | {{"query": "content of query 3", "type": "manim_core/name_of_the_plugin"}},
24 | {{"query": "content of query 4", "type": "manim_core/name_of_the_plugin"}},
25 | {{"query": "content of query 5", "type": "manim_core/name_of_the_plugin"}},
26 | {{"query": "content of query 6", "type": "manim_core/name_of_the_plugin"}},
27 | {{"query": "content of query 7", "type": "manim_core/name_of_the_plugin"}},
28 | ]
29 | ```
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_rag_query_generation_storyboard.txt:
--------------------------------------------------------------------------------
1 | You are an expert in generating search queries specifically for **Manim (Community Edition) documentation** (both core Manim and its plugins). Your task is to transform a storyboard plan for a Manim video scene into effective queries that will retrieve relevant information from Manim documentation. The storyboard plan describes the scene's visual elements and narrative flow.
2 |
3 | Here is the storyboard plan:
4 |
5 | {storyboard}
6 |
7 | Based on the storyboard plan, generate multiple human-like queries (maximum 10) for retrieving relevant documentation. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the implementation.
8 |
9 | **Specifically, ensure that:**
10 | 1. At least some queries are focused on retrieving information about **Manim core functionalities**, like general visual elements or animations. Frame these queries using Manim terminology (classes, methods, concepts).
11 | 2. If the storyboard suggests using specific visual effects or complex animations that might be plugin-related, include at least 1 query specifically targeting **plugin documentation**. Make sure to mention the plugin name if known or suspected.
12 | 3. Queries should be general enough to explore different possibilities within Manim and its plugins based on the storyboard's visual and narrative descriptions, but also specific enough to target Manim documentation effectively.
13 |
14 | The above storyboard might be relevant to these plugins: {relevant_plugins}.
15 | Note that you MUST NOT use the plugins that are not listed above.
16 |
17 | Output the queries in the following format:
18 | ```json
19 | [
20 | {{"query": "content of query 1", "type": "manim_core/{relevant_plugins}"}},
21 | {{"query": "content of query 2", "type": "manim_core/{relevant_plugins}"}},
22 | {{"query": "content of query 3", "type": "manim_core/{relevant_plugins}"}},
23 | {{"query": "content of query 4", "type": "manim_core/{relevant_plugins}"}},
24 | {{"query": "content of query 5", "type": "manim_core/{relevant_plugins}"}},
25 | {{"query": "content of query 6", "type": "manim_core/{relevant_plugins}"}},
26 | {{"query": "content of query 7", "type": "manim_core/{relevant_plugins}"}},
27 | ]
28 | ```
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | annotated-types~=0.7.0
2 | azure-cognitiveservices-speech~=1.41.1
3 | cachetools~=5.5.0
4 | certifi~=2024.8.30
5 | charset-normalizer~=3.4.0
6 | click~=8.1.7
7 | cloup~=3.0.5
8 | Cython~=3.0.11
9 | decorator~=5.1.1
10 | glcontext~=3.0.0
11 | google-ai-generativelanguage~=0.6.10
12 | google-api-core~=2.22.0
13 | google-api-python-client~=2.151.0
14 | google-auth~=2.35.0
15 | google-auth-httplib2~=0.2.0
16 | google-generativeai~=0.8.3
17 | googleapis-common-protos~=1.65.0
18 | grpcio~=1.67.1
19 | grpcio-status~=1.67.1
20 | gTTS~=2.5.3
21 | httplib2~=0.22.0
22 | idna~=3.10
23 | isosurfaces~=0.1.2
24 | manim~=0.18.1
25 | manim-voiceover~=0.3.7
26 | ManimPango~=0.6.0 # sudo apt-get install libsdl-pango-dev if you dont have pangocairo
27 | mapbox_earcut~=1.0.2
28 | markdown-it-py~=3.0.0
29 | mdurl~=0.1.2
30 | moderngl~=5.12.0
31 | multipledispatch~=1.0.0
32 | mutagen~=1.47.0
33 | networkx~=3.4.2
34 | numpy~=2.2.2
35 | pillow
36 | proto-plus~=1.25.0
37 | protobuf~=5.28.3
38 | pyasn1~=0.6.1
39 | pyasn1_modules~=0.4.1
40 | PyAudio~=0.2.14 #required brew install portaudio for mac
41 | pycairo~=1.27.0
42 | pydantic~=2.9.2
43 | pydantic_core~=2.23.4
44 | pydub~=0.25.1
45 | pyglet~=2.0.18
46 | Pygments~=2.18.0
47 | #pyobjc-core~=10.3.1 # only for mac
48 | #pyobjc-framework-Cocoa~=10.3.1 # only for mac
49 | pyparsing~=3.2.0
50 | pyrr~=0.10.3
51 | python-dotenv~=0.21.1
52 | python-slugify~=8.0.4
53 | requests~=2.32.3
54 | rich~=13.9.3
55 | rsa~=4.9
56 | scipy~=1.14.1
57 | screeninfo~=0.8.1
58 | skia-pathops~=0.8.0.post2
59 | sox~=1.5.0
60 | srt~=3.5.3
61 | svgelements~=1.9.6
62 | text-unidecode~=1.3
63 | tqdm~=4.66.5
64 | typing_extensions~=4.12.2
65 | uritemplate~=4.1.1
66 | urllib3~=2.2.3
67 | watchdog~=5.0.3
68 | inquirer
69 | openai~=1.61.0
70 | tiktoken~=0.8.0
71 | timm
72 | sentencepiece
73 | transformers
74 | litellm~=1.60.5
75 | pysrt
76 | moviepy~=2.1.2
77 | yt-dlp
78 | imageio_ffmpeg~=0.5.1
79 | langchain~=0.3.14
80 | langchain_community~=0.3.14
81 | SpeechRecognition~=3.14.1
82 | boto3~=1.36.9
83 | manim-physics~=0.4.0
84 | manim-ml~=0.0.24
85 | manim-chemistry~=0.4.4
86 | manim-dsa~=0.2.0
87 | manim-circuit~=0.0.3
88 | langfuse~=2.58.1
89 | chromadb~=0.6.3
90 | google-cloud-aiplatform~=1.79.0
91 | cairosvg
92 | pylatexenc~=2.10
93 | ffmpeg-python~=0.2.0
94 | kokoro-onnx[gpu] # if you have a GPU, otherwise kokoro-onnx
95 | soundfile~=0.13.1
96 | krippendorff~=0.8.1
97 | statsmodels~=0.14.4
98 | opencv-python~=4.11.0
--------------------------------------------------------------------------------
/eval_suite/prompts_raw/text_eval_new.txt:
--------------------------------------------------------------------------------
1 | You are a specialist in evaluating theorem explanation videos, known for giving clear and objective feedback. You will be given the transcript of a video. Your task is to evaluate and score the content of the video in several dimensions.
2 |
3 | ### Task Objective
4 | 1. Perform an overall analysis of the video.
5 | * Identify the topic of the video.
6 | * Note your general thoughts and impression of the video, and any findings and observations.
7 | 2. Conduct a comprehensive evaluation and score each criterion in the given dimensions.
8 | * Analyze how well or poorly the video meets each criterion.
9 | * Assign a score from **1 to 5** for each dimension:
10 | - **1**: Very poor quality, completely fails to meet the criteria.
11 | - **2**: Below average, significant issues present.
12 | - **3**: Acceptable, meets the basic criteria with minor issues.
13 | - **4**: Good, performs well with no major issues.
14 | - **5**: Excellent, fully meets or exceeds expectations.
15 | 3. Output the results in the specified JSON format.
16 |
17 | ### Evaluation Criteria
18 | 1. **Accuracy and Depth**
19 | - Does the narration explain the theorem accurately?
20 | - Does the video provide intuitive and/or rigorous explanations for why the theorem holds?
21 | 2. **Logical Flow**
22 | - Does the video follow a clear and logical structure?
23 | - Does the video present a coherent buildup of ideas?
24 |
25 | ### Notes
26 | * You do not have access to the visual portion of the video as you are given only the textual portion. Do not reference or commentate on the visuals as they will be evaluated separately - just assume that there are reasonable visuals (e.g., geometric objects, graphs of functions, and calculations) to accompany the narration.
27 | * The evaluation criteria are intended to be independent of each other. Do not restate the same violation in multiple criteria; only consider it in the most relevant criterion.
28 |
29 | ### Output Format
30 | ```json
31 | {{
32 | "overall_analysis": "[Overall analysis]",
33 | "evaluation": {{
34 | "accuracy_and_depth": {{
35 | "comprehensive_evaluation": "[Analysis of accuracy and depth]",
36 | "score": [1-5]
37 | }},
38 | "logical_flow": {{
39 | "comprehensive_evaluation": "[Analysis of logical flow]",
40 | "score": [1-5]
41 | }}
42 | }}
43 | }}
44 | ```
45 |
46 | The transcript of the video is as follows:
47 | {transcript}
48 |
--------------------------------------------------------------------------------
/eval_suite/parse_prompt.py:
--------------------------------------------------------------------------------
1 | import os
2 | from tqdm import tqdm
3 |
4 |
5 | def call_parse_prompt():
6 | """
7 | Locates the prompts_raw directory and generates an __init__.py file containing prompt texts.
8 |
9 | Searches for prompts_raw directory in current and parent directories. Once found, calls
10 | create_python_file_with_texts() to generate the __init__.py file.
11 | """
12 | current_file_path = os.path.abspath(__file__)
13 | current_folder_path = os.path.dirname(current_file_path)
14 | folder_path = os.path.join(current_folder_path, "prompts_raw")
15 |
16 | # If prompts_raw not found in current directory, search parent directories
17 | if not os.path.exists(folder_path):
18 | parent_dir = current_folder_path
19 | while parent_dir != os.path.dirname(parent_dir): # Stop at root directory
20 | parent_dir = os.path.dirname(parent_dir)
21 | test_path = os.path.join(parent_dir, "prompts_raw")
22 | if os.path.exists(test_path):
23 | folder_path = test_path
24 | break
25 |
26 | output_file = os.path.join(folder_path, "__init__.py")
27 | create_python_file_with_texts(folder_path, output_file)
28 |
29 |
30 | def create_python_file_with_texts(folder_path, output_file):
31 | """
32 | Creates a Python file containing prompt texts from .txt files.
33 |
34 | Args:
35 | folder_path (str): Path to directory containing prompt .txt files
36 | output_file (str): Path where the output __init__.py file will be created
37 |
38 | The function reads all .txt files in the given folder, converts their contents into
39 | Python variables, and writes them to the output file. Variable names are derived from
40 | file paths with special characters replaced.
41 | """
42 | with open(output_file, 'w', encoding='utf-8') as out_file:
43 | out_file.write("# This file is generated automatically through parse_prompt.py\n\n")
44 | txt_files = [file for root, dirs, files in os.walk(folder_path) for file in files if file.endswith(".txt")]
45 | for file in tqdm(txt_files, desc="Processing files"):
46 | file_path = os.path.join(folder_path, file)
47 | var_name = "_" + file_path.replace(folder_path, "").replace(os.sep, "_").replace(".txt", "").strip("_")
48 | with open(file_path, 'r', encoding='utf-8') as f:
49 | content = f.read().replace('"""', '\"\"\"')
50 | out_file.write(f'{var_name} = """{content}"""\n\n')
51 |
52 |
53 | if __name__ == "__main__":
54 | call_parse_prompt()
--------------------------------------------------------------------------------
/task_generator/parse_prompt.py:
--------------------------------------------------------------------------------
1 | import os
2 | from tqdm import tqdm
3 |
4 |
5 | def call_parse_prompt():
6 | """
7 | Find the prompts_raw directory and generate an __init__.py file containing prompt texts.
8 |
9 | Searches for prompts_raw directory in current and parent directories. Once found,
10 | calls create_python_file_with_texts() to generate the __init__.py file.
11 | """
12 | current_file_path = os.path.abspath(__file__)
13 | current_folder_path = os.path.dirname(current_file_path)
14 | folder_path = os.path.join(current_folder_path, "prompts_raw")
15 |
16 | # If prompts_raw not found in current directory, search parent directories
17 | if not os.path.exists(folder_path):
18 | parent_dir = current_folder_path
19 | while parent_dir != os.path.dirname(parent_dir): # Stop at root directory
20 | parent_dir = os.path.dirname(parent_dir)
21 | test_path = os.path.join(parent_dir, "prompts_raw")
22 | if os.path.exists(test_path):
23 | folder_path = test_path
24 | break
25 |
26 | output_file = os.path.join(folder_path, "__init__.py")
27 | create_python_file_with_texts(folder_path, output_file)
28 |
29 |
30 | def create_python_file_with_texts(folder_path: str, output_file: str) -> None:
31 | """
32 | Generate a Python file containing prompt texts from .txt files.
33 |
34 | Args:
35 | folder_path (str): Path to directory containing prompt .txt files
36 | output_file (str): Path where the generated Python file will be saved
37 |
38 | The function reads all .txt files in the given folder, converts their contents
39 | into Python variables, and writes them to the output file. Variable names are
40 | derived from file paths with special characters replaced.
41 | """
42 | with open(output_file, 'w', encoding='utf-8') as out_file:
43 | out_file.write("# This file is generated automatically through parse_prompt.py\n\n")
44 | txt_files = [file for root, dirs, files in os.walk(folder_path) for file in files if file.endswith(".txt")]
45 | for file in tqdm(txt_files, desc="Processing files"):
46 | file_path = os.path.join(folder_path, file)
47 | var_name = "_" + file_path.replace(folder_path, "").replace(os.sep, "_").replace(".txt", "").strip("_")
48 | with open(file_path, 'r', encoding='utf-8') as f:
49 | content = f.read().replace('"""', '\"\"\"')
50 | out_file.write(f'{var_name} = """{content}"""\n\n')
51 |
52 |
53 | if __name__ == "__main__":
54 | call_parse_prompt()
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_animation_rag_query_generation_fix_error.txt:
--------------------------------------------------------------------------------
1 | You are an expert in Manim (Community Edition) and its plugins. Your task is to transform a complete implementation plan for a Manim animation scene into queries that can be used to retrieve relevant documentation from both Manim core and any relevant plugins. The implementation plan will describe the scene's vision, technical implementation, and animation strategy.
2 |
3 | Here is the Text Explanation (Implementation Plan) as the context:
4 |
5 | {text_explanation}
6 |
7 | The error message will describe a problem encountered while running Manim code. Your queries should include keywords related to the specific Manim classes, methods, functions, and *concepts* that are likely related to the error, including any plugin-specific functionality. Focus on extracting the core concepts, actions, and vocabulary from the error message itself and the code snippet that produced the error. Generate queries that are concise and target different aspects of the documentation (class reference, method usage, animation examples, conceptual explanations) across both Manim core and relevant plugins.
8 |
9 | Here is the error message and the code snippet:
10 |
11 | **Error Message:**
12 | {error}
13 |
14 | **Code Snippet:**
15 | {code}
16 |
17 | Based on the error message and the code snippet, generate multiple human-like queries (maximum 5-7) for retrieving relevant documentation to fix this error. Please ensure that the search targets are different so that the RAG can retrieve a diverse set of documents covering various aspects of the error and its potential solutions.
18 |
19 | **Specifically, ensure that:**
20 | 1. At least 1-2 queries are focused on retrieving information about Manim *function or class usage* that might be causing the error.
21 | 2. If the error message or code suggests the use of plugin functionality, include at least 1 query specifically targeting plugin documentation related to the error.
22 | 3. Queries should be specific enough to distinguish between core Manim and plugin functionality when relevant.
23 |
24 | Output the queries in the following format:
25 | [
26 | {{"query": "content of query 1", "type": "manim_core/name_of_the_plugin"}},
27 | {{"query": "content of query 2", "type": "manim_core/name_of_the_plugin"}},
28 | {{"query": "content of query 3", "type": "manim_core/name_of_the_plugin"}},
29 | {{"query": "content of query 4", "type": "manim_core/name_of_the_plugin"}},
30 | {{"query": "content of query 5", "type": "manim_core/name_of_the_plugin"}},
31 | {{"query": "content of query 6", "type": "manim_core/name_of_the_plugin"}},
32 | {{"query": "content of query 7", "type": "manim_core/name_of_the_plugin"}},
33 | ]
--------------------------------------------------------------------------------
/eval_suite/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import re
3 | from math import prod
4 | from typing import List
5 |
6 | def extract_json(response: str) -> dict:
7 | """
8 | Extract JSON content from a string response.
9 |
10 | Args:
11 | response (str): String containing JSON content, possibly within code blocks.
12 |
13 | Returns:
14 | dict: Extracted and parsed JSON content.
15 |
16 | Raises:
17 | ValueError: If no valid JSON content could be extracted.
18 | """
19 | try:
20 | evaluation_json = json.loads(response)
21 | except json.JSONDecodeError:
22 | # If JSON parsing fails, try to extract the content between ```json and ```
23 | match = re.search(r'```json\n(.*?)\n```', response, re.DOTALL)
24 | if not match:
25 | # If no match for ```json, try to extract content between ``` and ```
26 | match = re.search(r'```\n(.*?)\n```', response, re.DOTALL)
27 |
28 | if match:
29 | evaluation_content = match.group(1)
30 | evaluation_json = json.loads(evaluation_content)
31 | else:
32 | raise ValueError("Failed to extract valid JSON content")
33 | return evaluation_json
34 |
35 |
36 | def convert_score_fields(data: dict) -> dict:
37 | """
38 | Convert score fields in a dictionary to integers recursively.
39 |
40 | Args:
41 | data (dict): Dictionary containing score fields to convert.
42 |
43 | Returns:
44 | dict: Dictionary with score fields converted to integers.
45 |
46 | Raises:
47 | ValueError: If a score value cannot be converted to integer.
48 | """
49 | # Create a new dictionary with the converted values
50 | converted_data = {}
51 | for key, value in data.items():
52 | if key == "score":
53 | if isinstance(value, int):
54 | converted_data[key] = value
55 | elif isinstance(value, str) and value.isdigit():
56 | converted_data[key] = int(value)
57 | else:
58 | raise ValueError(f"Invalid score value: {value!r}")
59 | elif isinstance(value, dict):
60 | converted_data[key] = convert_score_fields(value)
61 | else:
62 | converted_data[key] = value
63 | return converted_data
64 |
65 |
66 | def calculate_geometric_mean(scores: List[int]) -> float:
67 | """
68 | Calculate the geometric mean of a list of scores.
69 |
70 | Args:
71 | scores (List[int]): List of integer scores, may contain None values.
72 |
73 | Returns:
74 | float: Geometric mean of non-None scores. Returns 0.0 if list is empty
75 | or contains only None values.
76 | """
77 | scores = [s for s in scores if s is not None]
78 | if not scores:
79 | return 0.0
80 | product = prod(scores)
81 | return product ** (1 / len(scores))
82 |
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_scene_plan.txt:
--------------------------------------------------------------------------------
1 | You are an expert in educational video production, instructional design, and {topic}. Please design a high-quality video to provide in-depth explanation on {topic}.
2 |
3 | **Video Overview:**
4 |
5 | Topic: {topic}
6 | Description: {description}
7 |
8 | **Scene Breakdown:**
9 |
10 | Plan individual scenes. For each scene please provide the following:
11 |
12 | * **Scene Title:** Short, descriptive title (2-5 words).
13 | * **Scene Purpose:** Learning objective of this scene. How does it connect to previous scenes?
14 | * **Scene Description:** Detailed description of scene content.
15 | * **Scene Layout:** Detailedly describe the spatial layout concept. Consider safe area margins and minimum spacing between objects.
16 |
17 | Please generate the scene plan for the video in the following format:
18 |
19 | ```xml
20 |
21 |
22 | Scene Title: [Title]
23 | Scene Purpose: [Learning objective, connection to previous scene]
24 | Scene Description: [Brief content description]
25 | Scene Layout: [Spatial layout concept, consider safe area and spacing]
26 |
27 |
28 |
29 | ...
30 |
31 | ...
32 |
33 | ```
34 |
35 | **Spatial Constraints:**
36 | * **Safe area margins:** 0.5 units on all sides from the scene edges. *All objects must be positioned within these margins.*
37 | * **Minimum spacing:** 0.3 units between any two Manim objects (measured edge to edge). *Ensure adequate spacing to prevent overlaps and maintain visual clarity.*
38 |
39 | Requirements:
40 | 1. Scenes must build progressively, starting from foundational concepts and advancing to more complex ideas to ensure a logical flow of understanding for the viewer. Each scene should naturally follow from the previous one, creating a cohesive learning narrative. Start with simpler scene layouts and progressively increase complexity in later scenes.
41 | 2. The total number of scenes should be between 3 and 7.
42 | 3. Learning objectives should be distributed evenly across the scenes.
43 | 4. The total video duration must be under 15 minutes.
44 | 5. It is essential to use the exact output format, tags, and headers as specified in the prompt.
45 | 6. Maintain consistent formatting throughout the entire scene plan.
46 | 7. **No External Assets:** Do not import any external files (images, audio, video). *Use only Manim built-in elements and procedural generation.
47 | 8. **Focus on in-depth explanation of the theorem. Do not include any promotional elements (like YouTube channel promotion, subscribe messages, or external resources) or quiz sessions. Detailed example questions are acceptable and encouraged.**
48 |
49 | Note: High-level plan. Detailed scene specifications will be generated later, ensuring adherence to safe area margins and minimum spacing. The spatial constraints defined above will be strictly enforced in subsequent planning stages.
--------------------------------------------------------------------------------
/eval_suite/text_utils.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 |
3 | import pysrt
4 |
5 | from mllm_tools.litellm import LiteLLMWrapper
6 | from mllm_tools.gemini import GeminiWrapper
7 | from mllm_tools.utils import _prepare_text_inputs
8 | from eval_suite.prompts_raw import _fix_transcript, _text_eval_new
9 | from eval_suite.utils import extract_json, convert_score_fields
10 |
11 |
12 | def parse_srt_to_text(srt_path) -> str:
13 | """
14 | Parse an SRT subtitle file into plain text.
15 |
16 | Args:
17 | srt_path: Path to the SRT subtitle file.
18 |
19 | Returns:
20 | str: The subtitle text with duplicates removed and ellipses replaced.
21 | """
22 | subs = pysrt.open(srt_path)
23 | full_text = []
24 | for sub in subs:
25 | sub.text = sub.text.replace("...", ".")
26 | for line in sub.text.splitlines():
27 | # .srt can contain repeated lines
28 | if full_text and full_text[-1] == line:
29 | continue
30 | full_text.append(line)
31 | return "\n".join(full_text)
32 |
33 |
34 | def fix_transcript(text_eval_model: Union[LiteLLMWrapper, GeminiWrapper], transcript: str) -> str:
35 | """
36 | Fix and clean up a transcript using an LLM model.
37 |
38 | Args:
39 | text_eval_model: The LLM model wrapper to use for fixing the transcript.
40 | transcript: The input transcript text to fix.
41 |
42 | Returns:
43 | str: The fixed and cleaned transcript text.
44 | """
45 | print("Fixing transcript...")
46 |
47 | prompt = _fix_transcript.format(transcript=transcript)
48 | response = text_eval_model(_prepare_text_inputs(prompt))
49 | fixed_script = response.split("")[0]
50 |
51 | return fixed_script
52 |
53 |
54 | def evaluate_text(text_eval_model: LiteLLMWrapper, transcript: str, retry_limit: int) -> dict:
55 | """
56 | Evaluate transcript text using an LLM model with retry logic.
57 |
58 | Args:
59 | text_eval_model: The LLM model wrapper to use for evaluation.
60 | transcript: The transcript text to evaluate.
61 | retry_limit: Maximum number of retry attempts on failure.
62 |
63 | Returns:
64 | dict: The evaluation results as a JSON object.
65 |
66 | Raises:
67 | ValueError: If all retry attempts fail.
68 | """
69 | # prompt = _text_eval.format(transcript=transcript)
70 | prompt = _text_eval_new.format(transcript=transcript)
71 | for attempt in range(retry_limit):
72 | try:
73 | evaluation = text_eval_model(_prepare_text_inputs(prompt))
74 | evaluation_json = extract_json(evaluation)
75 | evaluation_json = convert_score_fields(evaluation_json)
76 | return evaluation_json
77 | except Exception as e:
78 | print(f"Attempt {attempt + 1} failed: {e.__class__.__name__}: {e}")
79 | if attempt + 1 == retry_limit:
80 | raise ValueError("Reached maximum retry limit. Evaluation failed.") from None
81 |
--------------------------------------------------------------------------------
/mllm_tools/vertex_ai.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import List, Dict, Any, Optional
3 | import vertexai
4 | from vertexai.generative_models import GenerativeModel, Part
5 | from google.auth import default
6 | from google.auth.transport import requests
7 |
8 |
9 | # TODO: check if this is the correct way to use Vertex AI
10 | # TODO: add langfuse support
11 | class VertexAIWrapper:
12 | """Wrapper for Vertex AI to support Gemini models."""
13 |
14 | def __init__(
15 | self,
16 | model_name: str = "gemini-1.5-pro",
17 | temperature: float = 0.7,
18 | print_cost: bool = False,
19 | verbose: bool = False,
20 | use_langfuse: bool = False
21 | ):
22 | """Initialize the Vertex AI wrapper.
23 |
24 | Args:
25 | model_name: Name of the model to use (e.g. "gemini-1.5-pro")
26 | temperature: Temperature for generation between 0 and 1
27 | print_cost: Whether to print the cost of the completion
28 | verbose: Whether to print verbose output
29 | use_langfuse: Whether to enable Langfuse logging
30 | """
31 | self.model_name = model_name
32 | self.temperature = temperature
33 | self.print_cost = print_cost
34 | self.verbose = verbose
35 |
36 | # Initialize Vertex AI
37 | project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
38 | location = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")
39 | if not project_id:
40 | raise ValueError("No GOOGLE_CLOUD_PROJECT found in environment variables")
41 |
42 | vertexai.init(project=project_id, location=location)
43 | self.model = GenerativeModel(model_name)
44 |
45 | def __call__(self, messages: List[Dict[str, Any]], metadata: Optional[Dict[str, Any]] = None) -> str:
46 | """Process messages and return completion.
47 |
48 | Args:
49 | messages: List of message dictionaries containing type and content
50 | metadata: Optional metadata dictionary to pass to the model
51 |
52 | Returns:
53 | Generated text response from the model
54 |
55 | Raises:
56 | ValueError: If message type is not supported
57 | """
58 | parts = []
59 |
60 | for msg in messages:
61 | if msg["type"] == "text":
62 | parts.append(Part.from_text(msg["content"]))
63 | elif msg["type"] in ["image", "video"]:
64 | mime_type = "video/mp4" if msg["type"] == "video" else "image/jpeg"
65 | if isinstance(msg["content"], str):
66 | # Handle GCS URI
67 | parts.append(Part.from_uri(
68 | msg["content"],
69 | mime_type=mime_type
70 | ))
71 | else:
72 | # Handle file path or bytes
73 | parts.append(Part.from_data(
74 | msg["content"],
75 | mime_type=mime_type
76 | ))
77 |
78 | response = self.model.generate_content(
79 | parts,
80 | generation_config={
81 | "temperature": self.temperature,
82 | "top_p": 0.95,
83 | }
84 | )
85 |
86 | return response.text
--------------------------------------------------------------------------------
/eval_suite/image_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tempfile
3 |
4 | import numpy as np
5 | from PIL import Image, ImageOps
6 | from moviepy import VideoFileClip
7 |
8 | from eval_suite.prompts_raw import _image_eval
9 | from eval_suite.utils import extract_json, convert_score_fields, calculate_geometric_mean
10 | from mllm_tools.utils import _prepare_text_image_inputs
11 | from src.core.parse_video import image_with_most_non_black_space
12 |
13 | def extract_key_frames(video_path, output_dir, num_chunks):
14 | """Extract key frames from a video by dividing it into chunks and selecting representative frames.
15 |
16 | Args:
17 | video_path (str): Path to the input video file
18 | output_dir (str): Directory where extracted frames will be saved
19 | num_chunks (int): Number of chunks to divide the video into
20 |
21 | Returns:
22 | list: List of paths to the extracted key frames
23 | """
24 | # Create output directory if it doesn't exist
25 | os.makedirs(output_dir, exist_ok=True)
26 |
27 | # Extract all frames from the video
28 | clip = VideoFileClip(video_path)
29 | frames = list(clip.iter_frames(fps=1)) # one frame every second
30 |
31 | total_frames = len(frames)
32 | if total_frames == 0:
33 | print("No frames extracted from the video.")
34 | return []
35 |
36 | # Determine the number of frames per chunk
37 | frames_per_chunk = total_frames // num_chunks
38 | num_chunks = min(num_chunks, (total_frames + frames_per_chunk - 1) // frames_per_chunk)
39 |
40 | key_frames = []
41 |
42 | # Process each chunk of frames
43 | for i in range(num_chunks):
44 | start_idx = i * frames_per_chunk
45 | end_idx = min((i + 1) * frames_per_chunk, total_frames)
46 | chunk_frames = frames[start_idx:end_idx]
47 |
48 | if chunk_frames:
49 | # Save the frame with most non-black space
50 | output_path = os.path.join(output_dir, f"key_frame_{i+1}.jpg")
51 | result = image_with_most_non_black_space(chunk_frames, output_path)
52 | else:
53 | print(f"No frames in chunk {i+1}. Skipping.")
54 | result = None
55 |
56 | if result is not None:
57 | key_frames.append(output_path)
58 | clip.close()
59 |
60 | return key_frames
61 |
62 |
63 | def evaluate_sampled_images(model, video_path, description="No description provided", num_chunks=10, output_folder=None):
64 | """Evaluate sampled frames from a video using an image evaluation model.
65 |
66 | Args:
67 | model: The image evaluation model to use
68 | video_path (str): Path to the input video file
69 | description (str, optional): Description of the video content. Defaults to "No description provided"
70 | num_chunks (int, optional): Number of chunks to divide the video into. Defaults to 10
71 | output_folder (str, optional): Directory for temporary files. Defaults to None
72 |
73 | Returns:
74 | dict: Dictionary containing evaluation scores and individual frame assessments with keys:
75 | - evaluation: Dictionary of averaged scores for each criterion
76 | - image_chunks: List of individual frame evaluation results
77 | """
78 | with tempfile.TemporaryDirectory(dir=output_folder) as temp_dir:
79 | key_frames = extract_key_frames(video_path, temp_dir, num_chunks)
80 |
81 | prompt = _image_eval.format(description=description)
82 |
83 | responses = []
84 | for key_frame in key_frames:
85 | inputs = _prepare_text_image_inputs(prompt, key_frame)
86 | response = model(inputs)
87 | response_json = extract_json(response)
88 | response_json = convert_score_fields(response_json)
89 | responses.append(response_json)
90 |
91 | criteria = list(responses[0]["evaluation"].keys())
92 | scores_dict = {c: [] for c in criteria}
93 | for response in responses:
94 | for key, val in response["evaluation"].items():
95 | scores_dict[key].append(val["score"])
96 |
97 | res_score = {}
98 | for key, scores in scores_dict.items():
99 | res_score[key] = {"score": calculate_geometric_mean(scores)}
100 |
101 | return {
102 | "evaluation": res_score,
103 | "image_chunks": responses
104 | }
105 |
--------------------------------------------------------------------------------
/src/utils/kokoro_voiceover.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2025 Xposed73
3 | All rights reserved.
4 | This file is part of the Manim Voiceover project.
5 | """
6 |
7 | import hashlib
8 | import json
9 | import numpy as np
10 | from pathlib import Path
11 | from manim_voiceover.services.base import SpeechService
12 | from kokoro_onnx import Kokoro
13 | from manim_voiceover.helper import remove_bookmarks, wav2mp3
14 | from scipy.io.wavfile import write as write_wav
15 | from src.config.config import Config
16 |
17 |
18 | class KokoroService(SpeechService):
19 | """Speech service class for kokoro_self (using text_to_speech via Kokoro ONNX)."""
20 |
21 | def __init__(self, engine=None,
22 | model_path: str = Config.KOKORO_MODEL_PATH,
23 | voices_path: str = Config.KOKORO_VOICES_PATH,
24 | voice: str = Config.KOKORO_DEFAULT_VOICE,
25 | speed: float = Config.KOKORO_DEFAULT_SPEED,
26 | lang: str = Config.KOKORO_DEFAULT_LANG,
27 | **kwargs):
28 | self.kokoro = Kokoro(model_path, voices_path)
29 | self.voice = voice
30 | self.speed = speed
31 | self.lang = lang
32 |
33 | if engine is None:
34 | engine = self.text_to_speech # Default to local function
35 |
36 | self.engine = engine
37 | super().__init__(**kwargs)
38 |
39 | def get_data_hash(self, input_data: dict) -> str:
40 | """
41 | Generates a hash based on the input data dictionary.
42 | The hash is used to create a unique identifier for the input data.
43 |
44 | Parameters:
45 | input_data (dict): A dictionary of input data (e.g., text, voice, etc.).
46 |
47 | Returns:
48 | str: The generated hash as a string.
49 | """
50 | # Convert the input data dictionary to a JSON string (sorted for consistency)
51 | data_str = json.dumps(input_data, sort_keys=True)
52 | # Generate a SHA-256 hash of the JSON string
53 | return hashlib.sha256(data_str.encode('utf-8')).hexdigest()
54 |
55 | def text_to_speech(self, text, output_file, voice_name, speed, lang):
56 | """
57 | Generates speech from text using Kokoro ONNX and saves the audio file.
58 | Normalizes the audio to make it audible.
59 | """
60 | # Generate audio samples using Kokoro
61 | samples, sample_rate = self.kokoro.create(
62 | text, voice=voice_name, speed=speed, lang=lang
63 | )
64 |
65 | # Normalize audio to the range [-1, 1]
66 | max_val = np.max(np.abs(samples))
67 | if max_val > 0:
68 | samples = samples / max_val
69 |
70 | # Convert to 16-bit integer PCM format
71 | samples = (samples * 32767).astype("int16")
72 |
73 | # Save the normalized audio as a .wav file
74 | write_wav(output_file, sample_rate, samples)
75 | print(f"Saved at {output_file}")
76 |
77 | return output_file
78 |
79 |
80 | def generate_from_text(self, text: str, cache_dir: str = None, path: str = None) -> dict:
81 | if cache_dir is None:
82 | cache_dir = self.cache_dir
83 |
84 | input_data = {"input_text": text, "service": "kokoro_self", "voice": self.voice, "lang": self.lang}
85 | cached_result = self.get_cached_result(input_data, cache_dir)
86 | if cached_result is not None:
87 | return cached_result
88 |
89 | if path is None:
90 | audio_path = self.get_data_hash(input_data) + ".mp3"
91 | else:
92 | audio_path = path
93 |
94 | # Generate .wav file using the text_to_speech function
95 | audio_path_wav = str(Path(cache_dir) / audio_path.replace(".mp3", ".wav"))
96 | self.engine(
97 | text=text,
98 | output_file=audio_path_wav,
99 | voice_name=self.voice,
100 | speed=self.speed,
101 | lang=self.lang,
102 | )
103 |
104 | # Convert .wav to .mp3
105 | mp3_audio_path = str(Path(cache_dir) / audio_path)
106 | wav2mp3(audio_path_wav, mp3_audio_path)
107 |
108 | # Remove original .wav file
109 | remove_bookmarks(audio_path_wav)
110 |
111 | json_dict = {
112 | "input_text": text,
113 | "input_data": input_data,
114 | "original_audio": audio_path,
115 | }
116 |
117 | return json_dict
--------------------------------------------------------------------------------
/src/utils/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import re
3 | try:
4 | from pylatexenc.latexencode import utf8tolatex, UnicodeToLatexEncoder
5 | except:
6 | print("Warning: Missing pylatexenc, please do pip install pylatexenc")
7 |
8 | def _print_response(response_type: str, theorem_name: str, content: str, separator: str = "=" * 50) -> None:
9 | """Print formatted responses from the video generation process.
10 |
11 | Prints a formatted response with separators and headers for readability.
12 |
13 | Args:
14 | response_type (str): Type of response (e.g., 'Scene Plan', 'Implementation Plan')
15 | theorem_name (str): Name of the theorem being processed
16 | content (str): The content to print
17 | separator (str, optional): Separator string for visual distinction. Defaults to 50 equals signs.
18 |
19 | Returns:
20 | None
21 | """
22 | print(f"\n{separator}")
23 | print(f"{response_type} for {theorem_name}:")
24 | print(f"{separator}\n")
25 | print(content)
26 | print(f"\n{separator}")
27 |
28 | def _extract_code(response_text: str) -> str:
29 | """Extract code blocks from a text response.
30 |
31 | Extracts Python code blocks delimited by ```python markers. If no code blocks are found,
32 | returns the entire response text.
33 |
34 | Args:
35 | response_text (str): The text response containing code blocks
36 |
37 | Returns:
38 | str: The extracted code blocks joined by newlines, or the full response if no blocks found
39 | """
40 | code = ""
41 | code_blocks = re.findall(r'```python\n(.*?)\n```', response_text, re.DOTALL)
42 | if code_blocks:
43 | code = "\n\n".join(code_blocks)
44 | elif "```" not in response_text: # if no code block, return the whole response
45 | code = response_text
46 | return code
47 |
48 | def extract_json(response: str) -> dict:
49 | """Extract and parse JSON content from a text response.
50 |
51 | Attempts to parse the response as JSON directly, then tries to extract JSON from code blocks
52 | if direct parsing fails.
53 |
54 | Args:
55 | response (str): The text response containing JSON content
56 |
57 | Returns:
58 | dict: The parsed JSON content as a dictionary, or empty list if parsing fails
59 |
60 | Note:
61 | Will attempt to parse content between ```json markers first, then between generic ``` markers
62 | """
63 | try:
64 | evaluation_json = json.loads(response)
65 | except json.JSONDecodeError:
66 | # If JSON parsing fails, try to extract the content between ```json and ```
67 | match = re.search(r'```json\n(.*?)\n```', response, re.DOTALL)
68 | if not match:
69 | # If no match for ```json, try to extract content between ``` and ```
70 | match = re.search(r'```\n(.*?)\n```', response, re.DOTALL)
71 |
72 | if match:
73 | evaluation_content = match.group(1)
74 | evaluation_json = json.loads(evaluation_content)
75 | else:
76 | # return empty list
77 | evaluation_json = []
78 | print(f"Warning: Failed to extract valid JSON content from {response}")
79 | return evaluation_json
80 |
81 | def _fix_unicode_to_latex(text: str, parse_unicode: bool = True) -> str:
82 | """Convert Unicode symbols to LaTeX source code.
83 |
84 | Converts Unicode subscripts and superscripts to LaTeX format, with optional full Unicode parsing.
85 |
86 | Args:
87 | text (str): The text containing Unicode symbols to convert
88 | parse_unicode (bool, optional): Whether to perform full Unicode to LaTeX conversion. Defaults to True.
89 |
90 | Returns:
91 | str: The text with Unicode symbols converted to LaTeX format
92 | """
93 | # Map of unicode subscripts to latex format
94 | subscripts = {
95 | "₀": "_0", "₁": "_1", "₂": "_2", "₃": "_3", "₄": "_4",
96 | "₅": "_5", "₆": "_6", "₇": "_7", "₈": "_8", "₉": "_9",
97 | "₊": "_+", "₋": "_-"
98 | }
99 | # Map of unicode superscripts to latex format
100 | superscripts = {
101 | "⁰": "^0", "¹": "^1", "²": "^2", "³": "^3", "⁴": "^4",
102 | "⁵": "^5", "⁶": "^6", "⁷": "^7", "⁸": "^8", "⁹": "^9",
103 | "⁺": "^+", "⁻": "^-"
104 | }
105 |
106 | for unicode_char, latex_format in {**subscripts, **superscripts}.items():
107 | text = text.replace(unicode_char, latex_format)
108 |
109 | if parse_unicode:
110 | text = utf8tolatex(text)
111 |
112 | return text
113 |
114 | def extract_xml(response: str) -> str:
115 | """Extract XML content from a text response.
116 |
117 | Extracts XML content between ```xml markers. Returns the full response if no XML blocks found.
118 |
119 | Args:
120 | response (str): The text response containing XML content
121 |
122 | Returns:
123 | str: The extracted XML content, or the full response if no XML blocks found
124 | """
125 | try:
126 | return re.search(r'```xml\n(.*?)\n```', response, re.DOTALL).group(1)
127 | except:
128 | return response
129 |
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_visual_self_reflection.txt:
--------------------------------------------------------------------------------
1 | You are an expert in Manim animations and educational video quality assessment. Your task is to analyze a rendered Manim video and its corresponding audio narration to identify areas for visual and auditory improvement, ensuring alignment with the provided implementation plan and enhancing the video's teaching effectiveness.
2 |
3 | Please analyze the provided Manim video and listen to the accompanying audio narration. Conduct a thorough self-reflection focusing on the following aspects:
4 |
5 | **1. Visual Presentation and Clarity (Automated VLM Analysis & Expert Human-like Judgment):**
6 |
7 | * **Object Overlap:** Does the video exhibit any visual elements (text, shapes, equations, etc.) overlapping in a way that obscures information or makes the animation difficult to understand? If possible, Detect regions of significant overlap and highlight them in your reflection.
8 | * **Out-of-Bounds Objects:** Are any objects positioned partially or entirely outside of the visible frame of the video? Identify and report objects that appear to be clipped or outside the frame boundaries.
9 | * **Incorrect Object Positioning:** Based on your understanding of good visual design and the scene's educational purpose, are objects placed in positions that are illogical, distracting, or misaligned with their intended locations or relationships to other elements as described in the implementation plan? Consider:
10 | * **Logical Flow:** Does the spatial arrangement support the intended visual flow and narrative progression of the scene?
11 | * **Alignment and Balance:** Is the scene visually balanced? Are elements aligned in a way that is aesthetically pleasing and contributes to clarity, or does the layout appear haphazard or unbalanced?
12 | * **Proximity and Grouping:** Are related elements positioned close enough to be visually grouped, and are unrelated elements sufficiently separated to avoid visual clutter?
13 | * **General Visual Clarity & Effectiveness:** Consider broader aspects of visual communication. Are there any other issues that detract from the video's clarity, impact, or overall effectiveness? This could include:
14 | * **Visual Clutter:** Is the scene too busy or visually overwhelming at any point? Are there too many elements on screen simultaneously?
15 | * **Poor Spacing/Layout:** Is the spacing between elements inconsistent or inefficient, making the scene feel cramped or unbalanced? Are margins and padding used effectively?
16 | * **Ineffective Use of Color:** Are color choices distracting, clashing, or not contributing to the animation's message? Are colors used consistently and purposefully to highlight key information?
17 | * **Pacing Issues (Visual):** Is the visual animation too fast or too slow in certain sections, hindering comprehension? Are visual transitions smooth and well-timed?
18 | * **Animation Clarity:** Are the animations themselves clear and helpful in conveying the intended information? Do animations effectively guide the viewer's eye and focus attention?
19 |
20 | **2. Narration Quality:**
21 |
22 | * **Narration Clarity and Pacing:** Is the narration clear, concise, and easy to understand? Is the pacing of the narration appropriate for the visual content and the target audience? Does the narration effectively support the visual explanations?
23 | * **Narration Sync with Visuals:** Does the narration effectively synchronize with the on-screen visuals? Use VLM to analyze the video and identify instances where the narration is misaligned with the animations or visual elements it is describing. Report specific timings of misalignment.
24 |
25 | **3. Alignment with Implementation Plan:**
26 |
27 | * **Visual Fidelity:** Does the rendered video accurately reflect the visual elements and spatial arrangements described in the provided Manim Implementation Plan? Identify any deviations.
28 | * **Animation Fidelity:** Do the animations in the video match the animation methods and sequences outlined in the Implementation Plan? Report any discrepancies.
29 |
30 | Manim Implementation Plan:
31 | {implementation}
32 |
33 | Generated Code:
34 | {generated_code}
35 |
36 | Output Format 1:
37 | If any issues are identified in visual presentation, audio quality, narration, or plan alignment, please provide a detailed reflection on the issues and how to improve the video's visual and auditory quality, narration effectiveness, and code correctness. Then, you must return the updated Python code that directly addresses these issues. The code must be complete and executable.
38 |
39 |
40 | [Detailed reflection on visual, auditory, narration, and plan alignment issues and improvement suggestions. Include specific timings for narration/visual sync issues and descriptions of object overlap/out-of-bounds problems if detected by VLM. Be specific about code changes needed for improvement.]
41 |
42 |
43 | [Improved Python Code - Complete and Executable - Directly Addressing Reflection Points]
44 |
45 |
46 | Output Format 2:
47 | If no issues are found and the video and audio are deemed high quality, visually clear, narratively effective, and fully aligned with the implementation plan, please explicitly only return "" as output.
--------------------------------------------------------------------------------
/eval_suite/prompts_raw/__init__.py:
--------------------------------------------------------------------------------
1 | # This file is generated automatically through parse_prompt.py
2 |
3 | _video_eval_new = """# Task: Video Frame Quality Evaluation
4 |
5 | You are tasked with analyzing and scoring a chunk of a theorem explanation video. Note that you may not have the full context of the video. Your job is to assign a score from 1 to 5 for each criterion. Please provide a brief justification for your scores.
6 |
7 | ## Evaluation Criteria
8 |
9 | 1. **Visual Consistency**
10 | - Style Consistency: Does the visual style remain consistent across frames?
11 | - Smoothness: Are the motions and transitions smooth?
12 |
13 | ## Scoring Instructions
14 | 1. Assign a score from **1 to 5** for each dimension:
15 | - **1**: Very poor quality, completely fails to meet the criteria.
16 | - **2**: Below average, significant issues present.
17 | - **3**: Acceptable, meets the basic criteria with minor issues.
18 | - **4**: Good, performs well with no major issues.
19 | - **5**: Excellent, fully meets or exceeds expectations.
20 | 2. Provide a comprehensive evaluation for each dimension.
21 | 3. Format your output in **JSON**
22 |
23 | ### JSON Output Format
24 | ```json
25 | {{
26 | "overall_analysis": "[Provide a general assessment of the video's quality]",
27 | "evaluation": {{
28 | "visual_consistency": {{
29 | "comprehensive_evaluation": "[Analysis of visual consistency]",
30 | "score": [1-5]
31 | }}
32 | }}
33 | }}
34 | ```
35 |
36 | Description of the theorem:
37 | {description}
38 |
39 | Video chunk:"""
40 |
41 | _text_eval_new = """You are a specialist in evaluating theorem explanation videos, known for giving clear and objective feedback. You will be given the transcript of a video. Your task is to evaluate and score the content of the video in several dimensions.
42 |
43 | ### Task Objective
44 | 1. Perform an overall analysis of the video.
45 | * Identify the topic of the video.
46 | * Note your general thoughts and impression of the video, and any findings and observations.
47 | 2. Conduct a comprehensive evaluation and score each criterion in the given dimensions.
48 | * Analyze how well or poorly the video meets each criterion.
49 | * Assign a score from **1 to 5** for each dimension:
50 | - **1**: Very poor quality, completely fails to meet the criteria.
51 | - **2**: Below average, significant issues present.
52 | - **3**: Acceptable, meets the basic criteria with minor issues.
53 | - **4**: Good, performs well with no major issues.
54 | - **5**: Excellent, fully meets or exceeds expectations.
55 | 3. Output the results in the specified JSON format.
56 |
57 | ### Evaluation Criteria
58 | 1. **Accuracy and Depth**
59 | - Does the narration explain the theorem accurately?
60 | - Does the video provide intuitive and/or rigorous explanations for why the theorem holds?
61 | 2. **Logical Flow**
62 | - Does the video follow a clear and logical structure?
63 | - Does the video present a coherent buildup of ideas?
64 |
65 | ### Notes
66 | * You do not have access to the visual portion of the video as you are given only the textual portion. Do not reference or commentate on the visuals as they will be evaluated separately - just assume that there are reasonable visuals (e.g., geometric objects, graphs of functions, and calculations) to accompany the narration.
67 | * The evaluation criteria are intended to be independent of each other. Do not restate the same violation in multiple criteria; only consider it in the most relevant criterion.
68 |
69 | ### Output Format
70 | ```json
71 | {{
72 | "overall_analysis": "[Overall analysis]",
73 | "evaluation": {{
74 | "accuracy_and_depth": {{
75 | "comprehensive_evaluation": "[Analysis of accuracy and depth]",
76 | "score": [1-5]
77 | }},
78 | "logical_flow": {{
79 | "comprehensive_evaluation": "[Analysis of logical flow]",
80 | "score": [1-5]
81 | }}
82 | }}
83 | }}
84 | ```
85 |
86 | The transcript of the video is as follows:
87 | {transcript}
88 | """
89 |
90 | _fix_transcript = """You are an expert in YouTube video transcripts. There is a transcript that was automatically generated through YouTube, so it lacks proper capitalization and punctuation. Your task is to fix the transcript so that there is proper punctuation, capitalization, and spacing. Do not make other modifications (e.g., keep the original word choice).
91 |
92 | You should enclose the fixed transcript with a block, i.e.:
93 |
96 |
97 | Original transcript: {transcript}
98 | """
99 |
100 | _image_eval = """# Task: Video Frame Quality Evaluation
101 |
102 | You are tasked with analyzing and scoring a frame taken from a theorem explanation video. Note that you may not have the context of the video, so the captured frame may be a frame where some motion of visual elements is taking place. Your job is to assign a score from 1 to 5 for each criterion. Please provide a brief justification for your scores.
103 |
104 | ## Evaluation Criteria
105 |
106 | 1. **Visual Relevance**
107 | - Does the video frame align with the theorem's concepts and derivations?
108 |
109 | 2. **Element Layout**
110 | - Placemend and Size: Are the visual elements well-placed and appropriately sized within the frame?
111 | - Overlap: Are the visual elements free of unintentional overlap?
112 | - Clarity: Is the visual information conveyed in the frame clear and easy to understand?
113 |
114 | ## Scoring Instructions
115 | 1. Assign a score from **1 to 5** for each dimension:
116 | - **1**: Very poor quality, completely fails to meet the criteria.
117 | - **2**: Below average, significant issues present.
118 | - **3**: Acceptable, meets the basic criteria with minor issues.
119 | - **4**: Good, performs well with no major issues.
120 | - **5**: Excellent, fully meets or exceeds expectations.
121 | 2. Provide a comprehensive evaluation for each dimension.
122 | 3. Format your output in **JSON**
123 |
124 | ### JSON Output Format
125 | ```json
126 | {{
127 | "overall_analysis": "[Provide a general assessment of the image's quality]",
128 | "evaluation": {{
129 | "visual_relevance": {{
130 | "comprehensive_evaluation": "[Analysis of visual relevance]",
131 | "score": [1-5]
132 | }},
133 | "element_layout": {{
134 | "comprehensive_evaluation": "[Analysis of element layout]",
135 | "score": [1-5]
136 | }}
137 | }}
138 | }}
139 | ```
140 |
141 | Description of the theorem:
142 | {description}
143 |
144 | Image:"""
145 |
146 |
--------------------------------------------------------------------------------
/mllm_tools/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Union, List, Dict, Any, Optional
2 | from PIL import Image
3 | import google.generativeai as genai
4 | import tempfile
5 | import os
6 | from .gemini import GeminiWrapper
7 | from .vertex_ai import VertexAIWrapper
8 |
9 |
10 | def _prepare_text_inputs(texts: List[str]) -> List[Dict[str, str]]:
11 | """
12 | Converts a list of text strings into the input format for the Agent model.
13 |
14 | Args:
15 | texts (List[str]): The list of text strings to be processed.
16 |
17 | Returns:
18 | List[Dict[str, str]]: A list of dictionaries formatted for the Agent model.
19 | """
20 | inputs = []
21 | # Add each text string to the inputs
22 | if isinstance(texts, str):
23 | texts = [texts]
24 | for text in texts:
25 | inputs.append({
26 | "type": "text",
27 | "content": text
28 | })
29 | return inputs
30 |
31 | def _prepare_text_image_inputs(texts: Union[str, List[str]], images: Union[str, Image.Image, List[Union[str, Image.Image]]]) -> List[Dict[str, str]]:
32 | """
33 | Converts text strings and images into the input format for the Agent model.
34 |
35 | Args:
36 | texts (Union[str, List[str]]): Text string(s) to be processed.
37 | images (Union[str, Image.Image, List[Union[str, Image.Image]]]): Image file path(s) or PIL Image object(s).
38 | Returns:
39 | List[Dict[str, str]]: A list of dictionaries formatted for the Agent model.
40 | """
41 | inputs = []
42 | # Add each text string to the inputs
43 | if isinstance(texts, str):
44 | texts = [texts]
45 | for text in texts:
46 | inputs.append({
47 | "type": "text",
48 | "content": text
49 | })
50 | if isinstance(images, (str, Image.Image)):
51 | images = [images]
52 | for image in images:
53 | inputs.append({
54 | "type": "image",
55 | "content": image
56 | })
57 | return inputs
58 |
59 | def _prepare_text_video_inputs(texts: Union[str, List[str]], videos: Union[str, List[str]]) -> List[Dict[str, str]]:
60 | """
61 | Converts text strings and video file paths into the input format for the Agent model.
62 |
63 | Args:
64 | texts (Union[str, List[str]]): Text string(s) to be processed.
65 | videos (Union[str, List[str]]): Video file path(s).
66 | Returns:
67 | List[Dict[str, str]]: A list of dictionaries formatted for the Agent model.
68 | """
69 | inputs = []
70 | # Add each text string to the inputs
71 | if isinstance(texts, str):
72 | texts = [texts]
73 | for text in texts:
74 | inputs.append({
75 | "type": "text",
76 | "content": text
77 | })
78 | # Add each video file path to the inputs
79 | if isinstance(videos, str):
80 | videos = [videos]
81 | for video in videos:
82 | inputs.append({
83 | "type": "video",
84 | "content": video
85 | })
86 | return inputs
87 |
88 | def _prepare_text_audio_inputs(texts: Union[str, List[str]], audios: Union[str, List[str]]) -> List[Dict[str, str]]:
89 | """
90 | Converts text strings and audio file paths into the input format for the Agent model.
91 |
92 | Args:
93 | texts (Union[str, List[str]]): Text string(s) to be processed.
94 | audios (Union[str, List[str]]): Audio file path(s).
95 | Returns:
96 | List[Dict[str, str]]: A list of dictionaries formatted for the Agent model.
97 | """
98 | inputs = []
99 | # Add each text string to the inputs
100 | if isinstance(texts, str):
101 | texts = [texts]
102 | for text in texts:
103 | inputs.append({
104 | "type": "text",
105 | "content": text
106 | })
107 | # Add each audio file path to the inputs
108 | if isinstance(audios, str):
109 | audios = [audios]
110 | for audio in audios:
111 | inputs.append({
112 | "type": "audio",
113 | "content": audio
114 | })
115 | return inputs
116 |
117 | def _extract_code(text: str) -> str:
118 | """Helper to extract code block from model response, support Gemini style and OpenAI style"""
119 | try:
120 | # Find code between ```python and ``` tags
121 | start = text.split("```python\n")[-1]
122 | end = start.split("```")[0]
123 | return end.strip()
124 | except IndexError:
125 | return text
126 |
127 | def _upload_to_gemini(input, mime_type=None):
128 | """Uploads the given file or PIL image to Gemini.
129 |
130 | See https://ai.google.dev/gemini-api/docs/prompting_with_media
131 | """
132 | if isinstance(input, str):
133 | # Input is a file path
134 | file = genai.upload_file(input, mime_type=mime_type)
135 | elif isinstance(input, Image.Image):
136 | # Input is a PIL image
137 | with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
138 | input.save(tmp_file, format="JPEG")
139 | tmp_file_path = tmp_file.name
140 | file = genai.upload_file(tmp_file_path, mime_type=mime_type or "image/jpeg")
141 | os.remove(tmp_file_path)
142 | else:
143 | raise ValueError("Unsupported input type. Must be a file path or PIL Image.")
144 |
145 | #print(f"Uploaded file '{file.display_name}' as: {file.uri}")
146 | return file
147 |
148 | def get_media_wrapper(model_name: str) -> Optional[Union[GeminiWrapper, VertexAIWrapper]]:
149 | """Get appropriate wrapper for media handling based on model name"""
150 | if model_name.startswith('gemini/'):
151 | return GeminiWrapper(model_name=model_name.split('/')[-1])
152 | elif model_name.startswith('vertex_ai/'):
153 | return VertexAIWrapper(model_name=model_name.split('/')[-1])
154 | return None
155 |
156 | def prepare_media_messages(prompt: str, media_path: Union[str, Image.Image], model_name: str) -> List[Dict[str, Any]]:
157 | """Prepare messages for media input based on model type"""
158 | is_video = isinstance(media_path, str) and media_path.endswith('.mp4')
159 |
160 | if is_video and (model_name.startswith('gemini/') or model_name.startswith('vertex_ai/')):
161 | return [
162 | {"type": "text", "content": prompt},
163 | {"type": "video", "content": media_path}
164 | ]
165 | else:
166 | # For images or non-Gemini/Vertex models
167 | if isinstance(media_path, str):
168 | media = Image.open(media_path)
169 | else:
170 | media = media_path
171 | return [
172 | {"type": "text", "content": prompt},
173 | {"type": "image", "content": media}
174 | ]
--------------------------------------------------------------------------------
/eval_suite/video_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import tempfile
4 |
5 | from dotenv import load_dotenv
6 |
7 | from mllm_tools.utils import _prepare_text_video_inputs
8 | from eval_suite.prompts_raw import _video_eval_new
9 | from eval_suite.utils import extract_json, convert_score_fields
10 |
11 | load_dotenv()
12 |
13 |
14 | def reduce_video_framerate(input_path, target_fps=1, output_path=None):
15 | """
16 | Reduces the frame rate of a video by only keeping frames at the target interval.
17 |
18 | Args:
19 | input_path (str): Path to the input video
20 | target_fps (int): Target frames per second (default: 1)
21 | output_path (str, optional): Path to save the processed video. If None, uses a temporary file.
22 |
23 | Returns:
24 | str: Path to the processed video
25 |
26 | Raises:
27 | ValueError: If input video cannot be opened or has invalid FPS
28 | RuntimeError: If video writer initialization fails or output video creation fails
29 | """
30 | cap = cv2.VideoCapture(input_path)
31 | if not cap.isOpened():
32 | raise ValueError(f"Could not open input video: {input_path}")
33 |
34 | original_fps = cap.get(cv2.CAP_PROP_FPS)
35 | if original_fps <= 0:
36 | raise ValueError(f"Invalid FPS ({original_fps}) detected in input video")
37 |
38 | frame_interval = int(original_fps / target_fps)
39 |
40 | # Get video properties
41 | width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
42 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
43 |
44 | # Use provided output path or create temporary file
45 | if output_path is None:
46 | temp_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
47 | output_path = temp_output.name
48 |
49 | # Ensure output directory exists
50 | os.makedirs(os.path.dirname(output_path), exist_ok=True)
51 |
52 | # Try different codecs in order of preference
53 | codecs = [
54 | ('avc1', '.mp4'), # H.264 codec
55 | ('mp4v', '.mp4'), # MP4V codec
56 | ('XVID', '.avi'), # XVID codec
57 | ('MJPG', '.avi'), # Motion JPEG codec
58 | ]
59 |
60 | success = False
61 | for codec, ext in codecs:
62 | if output_path.endswith('.mp4') and not ext.endswith('.mp4'):
63 | # If we're switching to AVI format, change the extension
64 | output_path = output_path[:-4] + ext
65 |
66 | fourcc = cv2.VideoWriter_fourcc(*codec)
67 | out = cv2.VideoWriter(output_path, fourcc, target_fps, (width, height))
68 |
69 | if out.isOpened():
70 | success = True
71 | print(f"Successfully initialized video writer with codec: {codec}")
72 | break
73 | else:
74 | out.release()
75 | if os.path.exists(output_path):
76 | os.remove(output_path)
77 |
78 | if not success:
79 | raise RuntimeError("Could not initialize video writer with any available codec")
80 |
81 | frame_count = 0
82 | frames_written = 0
83 | while cap.isOpened():
84 | ret, frame = cap.read()
85 | if not ret:
86 | break
87 |
88 | # Only write frames at the specified interval
89 | if frame_count % frame_interval == 0:
90 | out.write(frame)
91 | frames_written += 1
92 | frame_count += 1
93 |
94 | cap.release()
95 | out.release()
96 |
97 | # Verify the output
98 | verify_cap = cv2.VideoCapture(output_path)
99 | if not verify_cap.isOpened():
100 | raise RuntimeError(f"Failed to create output video at {output_path}")
101 |
102 | actual_fps = verify_cap.get(cv2.CAP_PROP_FPS)
103 | total_frames = verify_cap.get(cv2.CAP_PROP_FRAME_COUNT)
104 | verify_cap.release()
105 |
106 | if actual_fps <= 0:
107 | print("Warning: Output video reports invalid FPS. This might be a codec issue.")
108 | actual_fps = target_fps # Use target FPS for duration calculation
109 |
110 | print(f"Created video with {frames_written} frames at {actual_fps} FPS")
111 | print(f"Total duration: {total_frames/actual_fps:.2f} seconds")
112 | print(f"Video saved to: {output_path}")
113 |
114 | return output_path
115 |
116 |
117 | def evaluate_video_chunk_new(model, video_path, transcript="No transcript provided", description="No description provided",
118 | save_processed_video=None, target_fps=None, retry_limit=5):
119 | """
120 | Evaluate a single video chunk using a multimodal model.
121 |
122 | Args:
123 | model: The multimodal model to use for evaluation
124 | video_path (str): Path to the video file to evaluate
125 | transcript (str, optional): Video transcript text. Defaults to "No transcript provided"
126 | description (str, optional): Video description text. Defaults to "No description provided"
127 | save_processed_video (str, optional): Path to save processed video. If None, uses temporary file
128 | target_fps (int, optional): Target frames per second for video processing. If None, no processing
129 | retry_limit (int, optional): Maximum number of retry attempts. Defaults to 5
130 |
131 | Returns:
132 | dict: Evaluation results as a JSON object with scores converted to integers
133 |
134 | Raises:
135 | FileNotFoundError: If video file does not exist
136 | Exception: If evaluation fails after all retry attempts
137 | """
138 | if not os.path.exists(video_path):
139 | raise FileNotFoundError(f"Video file not found: {video_path}")
140 |
141 | # Only process video if target_fps is specified
142 | if target_fps is not None:
143 | processed_video_path = reduce_video_framerate(video_path, target_fps=target_fps, output_path=save_processed_video)
144 | video_to_use = processed_video_path
145 | else:
146 | video_to_use = video_path
147 |
148 | prompt = _video_eval_new.format(description=description)
149 | inputs = _prepare_text_video_inputs(prompt, video_to_use)
150 |
151 | try:
152 | for attempt in range(retry_limit):
153 | try:
154 | response = model(inputs)
155 | response_json = extract_json(response)
156 | response_json = convert_score_fields(response_json)
157 |
158 | return response_json
159 | except Exception as e:
160 | print(f"Attempt {attempt + 1} failed: {e}")
161 | if attempt + 1 == retry_limit:
162 | print("Reached maximum retry limit. Evaluation failed.")
163 | raise
164 | finally:
165 | # Clean up the temporary processed video if we created one
166 | if target_fps is not None and save_processed_video is None and os.path.exists(processed_video_path):
167 | os.unlink(processed_video_path)
--------------------------------------------------------------------------------
/mllm_tools/gemini.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict, Any, Union, Optional
2 | import io
3 | import os
4 | import base64
5 | from PIL import Image
6 | import mimetypes
7 | import google.generativeai as genai
8 | import tempfile
9 | import time
10 | from urllib.parse import urlparse
11 | import requests
12 | from io import BytesIO
13 |
14 | class GeminiWrapper:
15 | """Wrapper for Gemini to support multiple models and logging"""
16 |
17 | def __init__(
18 | self,
19 | model_name: str = "gemini-1.5-pro-002",
20 | temperature: float = 0.7,
21 | print_cost: bool = False,
22 | verbose: bool = False,
23 | use_langfuse: bool = False
24 | ):
25 | """
26 | Initialize the Gemini wrapper
27 |
28 | Args:
29 | model_name: Name of the model to use
30 | temperature: Temperature for completion
31 | print_cost: Whether to print the cost of the completion
32 | verbose: Whether to print verbose output
33 | use_langfuse: Whether to enable Langfuse logging
34 | """
35 | self.model_name = model_name.split('/')[-1] if '/' in model_name else model_name
36 | self.temperature = temperature
37 | self.print_cost = print_cost
38 | self.verbose = verbose
39 | self.accumulated_cost = 0
40 |
41 | api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
42 | if not api_key:
43 | raise ValueError("No API_KEY found. Please set the `GEMINI_API_KEY` or `GOOGLE_API_KEY` environment variable.")
44 | genai.configure(api_key=api_key)
45 |
46 | generation_config = {
47 | "temperature": self.temperature,
48 | "top_p": 0.95,
49 | "response_mime_type": "text/plain",
50 | }
51 | safety_settings = [
52 | {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
53 | {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
54 | {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
55 | {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
56 | ]
57 | self.model = genai.GenerativeModel(
58 | model_name=self.model_name,
59 | safety_settings=safety_settings,
60 | generation_config=generation_config,
61 | )
62 |
63 | def _get_mime_type(self, file_path: str) -> str:
64 | """
65 | Get the MIME type of a file based on its extension
66 |
67 | Args:
68 | file_path: Path to the file
69 |
70 | Returns:
71 | MIME type as a string (e.g., "image/jpeg", "audio/mp3")
72 | """
73 | mime_type, _ = mimetypes.guess_type(file_path)
74 | if mime_type is None:
75 | raise ValueError(f"Unsupported file type: {file_path}")
76 | return mime_type
77 |
78 | def _download_file(self, url: str) -> str:
79 | """
80 | Download a file from a URL and save it as a temporary file
81 |
82 | Args:
83 | url: URL of the file to download
84 |
85 | Returns:
86 | Path to the temporary file
87 | """
88 | response = requests.get(url)
89 | if response.status_code == 200:
90 | temp_file = tempfile.NamedTemporaryFile(delete=False)
91 | temp_file.write(response.content)
92 | temp_file.close()
93 | return temp_file.name
94 | else:
95 | raise ValueError(f"Failed to download file from URL: {url}")
96 |
97 | def _save_image_to_temp(self, image: Image.Image) -> str:
98 | """
99 | Save a PIL Image to a temporary file
100 |
101 | Args:
102 | image: PIL Image object
103 |
104 | Returns:
105 | Path to the temporary file
106 | """
107 | temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
108 | image.save(temp_file, format="PNG")
109 | temp_file.close()
110 | return temp_file.name
111 |
112 | def _upload_to_gemini(self, file_path: str, mime_type: Optional[str] = None):
113 | """
114 | Uploads the given file to Gemini.
115 |
116 | Args:
117 | file_path: Path to the file
118 | mime_type: MIME type of the file
119 |
120 | Returns:
121 | Uploaded file object
122 | """
123 | return genai.upload_file(file_path, mime_type=mime_type)
124 |
125 | def __call__(self, messages: List[Dict[str, Any]], metadata: Optional[Dict[str, Any]] = None) -> str:
126 | """
127 | Process messages and return completion
128 |
129 | Args:
130 | messages: List of message dictionaries with 'type' and 'content' keys
131 | metadata: Optional metadata to pass to Gemini completion
132 |
133 | Returns:
134 | Generated text response
135 | """
136 | contents = []
137 | for msg in messages:
138 | if msg["type"] == "text":
139 | contents.append(msg["content"])
140 | elif msg["type"] in ["image", "audio", "video"]:
141 | if isinstance(msg["content"], Image.Image):
142 | file_path = self._save_image_to_temp(msg["content"])
143 | mime_type = "image/png"
144 | elif isinstance(msg["content"], str):
145 | if msg["content"].startswith("http"):
146 | file_path = self._download_file(msg["content"])
147 | mime_type = self._get_mime_type(msg["content"])
148 | else:
149 | file_path = msg["content"]
150 | mime_type = self._get_mime_type(file_path)
151 | else:
152 | raise ValueError("Unsupported content type")
153 |
154 | uploaded_file = self._upload_to_gemini(file_path, mime_type)
155 |
156 | while uploaded_file.state.name == "PROCESSING":
157 | print('.', end='')
158 | time.sleep(3)
159 | uploaded_file = genai.get_file(uploaded_file.name)
160 | if uploaded_file.state.name == "FAILED":
161 | raise ValueError(uploaded_file.state.name)
162 | print("Upload successfully")
163 | contents.append(uploaded_file)
164 | else:
165 | raise ValueError("Unsupported message type")
166 |
167 | response = self.model.generate_content(contents, request_options={"timeout": 600})
168 | try:
169 | return response.text
170 | except Exception as e:
171 | print(e)
172 | print(response.prompt_feedback)
173 | return str(response.prompt_feedback)
174 |
175 | if __name__ == "__main__":
176 | pass
--------------------------------------------------------------------------------
/data/thb_hard/chemistry.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "theorem": "The Henderson-Hasselbalch Equation",
4 | "description": "The pH of a buffer solution is equal to the pKa of the weak acid plus the logarithm of the ratio of the concentration of the conjugate base to the concentration of the weak acid: pH = pKa + log([A-]/[HA]). It allows for the calculation of buffer solutions pH and predicting how pH would change with addition of acid or base",
5 | "difficulty": "Hard",
6 | "remark": "Crucial in understanding buffer solutions and titrations. Used in biochemistry extensively.",
7 | "subfield": "Acid-Base Chemistry"
8 | },
9 | {
10 | "theorem": "Bragg's law",
11 | "description": "Bragg's law in chemistry describes how X-rays reflect off of a crystal surface.",
12 | "difficulty": "Hard",
13 | "remark": "",
14 | "subfield": "Crystallography"
15 | },
16 | {
17 | "theorem": "Debye-Scherrer Equation",
18 | "description": "The Debye-Scherrer equation is used in chemistry to calculate the size of crystalline nanoparticles. It is based on X-ray diffraction (XRD) measurements.",
19 | "difficulty": "Hard",
20 | "remark": "",
21 | "subfield": "Crystallography"
22 | },
23 | {
24 | "theorem": "Hückel's Rule",
25 | "description": "In organic chemistry, Hückel's rule predicts that a planar ring molecule will have aromatic properties if it has 4n + 2 π-electrons, where n is a non-negative integer.",
26 | "difficulty": "Hard",
27 | "remark": "",
28 | "subfield": "Organic Chemistry"
29 | },
30 | {
31 | "theorem": "Hard Acid Soft Base Theory",
32 | "description": "Hard Acid Soft Base Theory (HSAB): This theory works on the principle that soft acid reacts with the soft base while hard acid reacts with the hard base",
33 | "difficulty": "Hard",
34 | "remark": "",
35 | "subfield": "Acid-Base Chemistry"
36 | },
37 | {
38 | "theorem": "Pauli Exclusion Principle",
39 | "description": "Pauli's Exclusion Principle states that no two electrons in the same atom can have identical values for all four of their quantum numbers.",
40 | "difficulty": "Hard",
41 | "remark": "",
42 | "subfield": "Quantum Chemistry"
43 | },
44 | {
45 | "theorem": "Crystal Field Theory",
46 | "description": "Crystal field theory (CFT) describes the breaking of orbital degeneracy in transition metal complexes due to the presence of ligands.",
47 | "difficulty": "Hard",
48 | "remark": "",
49 | "subfield": "Inorganic Chemistry"
50 | },
51 | {
52 | "theorem": "Hohenberg-Kohn theorem",
53 | "description": "The first Hohenberg–Kohn theorem states that 'the ground state of any interacting many particle system with a given fixed inter-particle interaction is a unique functional of the electron density n(r).",
54 | "difficulty": "Hard",
55 | "remark": "",
56 | "subfield": "Quantum Chemistry"
57 | },
58 | {
59 | "theorem": "Frost–Ebsworth diagram",
60 | "description": "A Frost diagram or Frost–Ebsworth diagram is a type of graph used by inorganic chemists in electrochemistry to illustrate the relative stability of a number of different oxidation states of a particular substance. The graph illustrates the free energy vs oxidation state of a chemical species.",
61 | "difficulty": "Hard",
62 | "remark": "",
63 | "subfield": "Electrochemistry"
64 | },
65 | {
66 | "theorem": "Coulson-Fischer Theorem",
67 | "description": "In theoretical chemistry and molecular physics, Coulson–Fischer theory provides a quantum mechanical description of the electronic structure of molecules.",
68 | "difficulty": "Hard",
69 | "remark": "",
70 | "subfield": "Quantum Chemistry"
71 | },
72 | {
73 | "theorem": "Frank-Condon Principle",
74 | "description": "The Franck-Condon Principle describes the intensities of vibronic transitions, or the absorption or emission of a photon.",
75 | "difficulty": "Hard",
76 | "remark": "",
77 | "subfield": "Spectroscopy"
78 | },
79 | {
80 | "theorem": "Nernst Equation",
81 | "description": "The Nernst Equation enables the determination of cell potential under non-standard conditions.",
82 | "difficulty": "Hard",
83 | "remark": "",
84 | "subfield": "Electrochemistry"
85 | },
86 | {
87 | "theorem": "Slater's Rules",
88 | "description": "The general principle behind Slater's Rule is that the actual charge felt by an electron is equal to what you'd expect the charge to be from a certain number of protons, but minus a certain amount of charge from other electrons.",
89 | "difficulty": "Hard",
90 | "remark": "",
91 | "subfield": "Quantum Chemistry"
92 | },
93 | {
94 | "theorem": "Langmuir Adsorption Isotherm",
95 | "description": "A continuous monolayer of adsorbate molecules surrounding a homogeneous solid surface is the conceptual basis for this adsorption model.",
96 | "difficulty": "Hard",
97 | "remark": "",
98 | "subfield": "Physical Chemistry"
99 | },
100 | {
101 | "theorem": "Marcus Theory",
102 | "description": "Marcus theory is a theory originally developed by Rudolph A. Marcus, starting in 1956, to explain the rates of electron transfer reactions.",
103 | "difficulty": "Hard",
104 | "remark": "",
105 | "subfield": "Physical Chemistry"
106 | },
107 | {
108 | "theorem": "Eyring Equation",
109 | "description": "The Eyring equation is an equation used in chemical kinetics to describe changes in the rate of a chemical reaction against temperature.",
110 | "difficulty": "Hard",
111 | "remark": "",
112 | "subfield": "Chemical Kinetics"
113 | },
114 | {
115 | "theorem": "Woodward-Hoffmann Rules",
116 | "description": "Robert Burns Woodward and Roald Hoffmann devised these set of rules to explain the stereochemistry of pericyclic reactions based on the orbital symmetry.",
117 | "difficulty": "Hard",
118 | "remark": "",
119 | "subfield": "Organic Chemistry"
120 | },
121 | {
122 | "theorem": "Born-Haber Cycle",
123 | "description": "A Born–Haber cycle applies Hess's law to calculate the lattice enthalpy by comparing the standard enthalpy change of formation of the ionic compound (from the elements) to the enthalpy required to make gaseous ions from the elements. This lattice calculation is complex.",
124 | "difficulty": "Hard",
125 | "remark": "",
126 | "subfield": "Thermodynamics"
127 | },
128 | {
129 | "theorem": "Molecular Orbital Theory",
130 | "description": "In chemistry, molecular orbital theory is a method for describing the electronic structure of molecules using quantum mechanics.",
131 | "difficulty": "Hard",
132 | "remark": "",
133 | "subfield": "Quantum Chemistry"
134 | },
135 | {
136 | "theorem": "Hammond Postulate",
137 | "description": "The postulate, which George Hammond first proposed in 1955, states that if two states, such as a transition state and an unstable intermediate, occur consecutively during a reaction process and have nearly the same energy content, their interconversion will result in only a minor reorganisation of molecular structures.",
138 | "difficulty": "Hard",
139 | "remark": "",
140 | "subfield": "Physical Chemistry"
141 | }
142 | ]
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_scene_vision_storyboard.txt:
--------------------------------------------------------------------------------
1 | You are an expert in educational video production and Manim animation.
2 | **Reminder:** Each scene's vision and storyboard plan is entirely self-contained. There is no dependency on any implementation from previous or subsequent scenes. However, the narration will treat all scenes as part of a single, continuous video.
3 |
4 | Create a scene vision and storyboard plan for Scene {scene_number}, thinking in Manim terms, and strictly adhering to the defined spatial constraints.
5 |
6 | Topic: {topic}
7 | Description: {description}
8 |
9 | Scene Overview:
10 | {scene_outline}
11 |
12 | The following manim plugins are relevant to the scene:
13 | {relevant_plugins}
14 |
15 | **Spatial Constraints (Strictly Enforced):**
16 | * **Safe area margins:** 0.5 units on all sides from the scene edges. *All objects must be positioned within these margins.*
17 | * **Minimum spacing:** 0.3 units between any two Manim objects (measured edge to edge). *Ensure a minimum spacing of 0.3 units to prevent overlaps and maintain visual clarity. This spacing must be maintained between all objects in the scene, including text, shapes, and graphs.*
18 |
19 | **Positioning Requirements:**
20 | 1. Safe area margins (0.5 units).
21 | 2. Minimum spacing between objects (0.3 units).
22 | 3. Relative positioning (`next_to`, `align_to`, `shift`) from `ORIGIN`, margins, or object references. **No absolute coordinates are allowed.** All positioning MUST be relative and clearly specified using reference points and relative positioning methods.
23 | 4. Transition buffers (`Wait` times) between sub-scenes and animation steps for visual clarity and pacing.
24 |
25 | **Diagrams/Sketches (Optional but Recommended for Complex Scenes):**
26 | * For complex scenes, consider including a simple diagram or sketch (even text-based) of the intended layout to visually clarify spatial relationships and ensure adherence to spacing and margin constraints.
27 |
28 | **Focus:**
29 | * Focus on clear visual communication of the scene's learning objective through effective use of Manim objects and animations, while strictly adhering to the defined spatial constraints.
30 | * Provide detailed visual descriptions in Manim terms to guide human implementation.
31 | * Prioritize explanation and visualization of the theorem. Do not include any promotional elements or quiz sessions.
32 | * Minimize text usage - rely primarily on visual elements, mathematical notation, and animations to convey concepts. Use text sparingly and only when necessary for clarity.
33 |
34 | **Common Mistakes:**
35 | * The Triangle class in Manim creates equilateral triangles by default. To create a right-angled triangle, use the Polygon class instead.
36 |
37 | **Manim Plugins:**
38 | * Consider using established Manim plugins if they significantly simplify the implementation or offer visual elements not readily available in core Manim. If a plugin is used, clearly indicate this in the storyboard with a note like "**Plugin Suggestion:** Consider using the `manim-plugin-name` plugin for [brief explanation of benefit]."
39 |
40 | You MUST generate the scene vision and storyboard plan for the scene in the following format (from ```xml to ```):
41 |
42 | ```xml
43 |
44 | [SCENE_VISION]
45 | 1. **Scene Overview**:
46 | - Scene story, key takeaway, video role. *Consider how this scene fits within the overall video narrative.*
47 | - **Visual learning objectives for viewers:** Think about *specific Manim object types* that best represent the learning objective. Example: "Visualize roots as `Dot` objects on an `Axes` graph." Be specific about Manim object classes (e.g., `MathTex`, `Shapes`, `Graphs`, `Axes`, `VGroup`). If a plugin provides a relevant object type, mention it (e.g., "Visualize X using `PluginObject` from `manim-plugin-name`").
48 | - How Manim visuals & animations support learning? Consider `MathTex`, `Shapes`, `Graphs`, `Axes`, `VGroup`. Focus on spatial arrangement and clarity, ensuring adherence to safe area margins and minimum spacing (0.3 units). Consider using `VGroup` to group related formula components for easier animation and spatial control. Example: "Use `VGroup` to group related formula components for easier animation and spatial control, ensuring a minimum spacing of 0.3 units between VGroup and other scene elements." If a plugin offers a more efficient way to achieve a visual effect, mention it.
49 | - Key concepts to emphasize visually using visual hierarchy and spatial arrangement in Manim, while respecting safe area margins and minimum spacing (0.3 units). **Use `MathTex` for mathematical expressions and equations. Use `Tex` for general text, titles, labels, and any non-mathematical text. When mixing text with mathematical symbols in `MathTex`, use the `\\text{{}}` command (e.g., `MathTex(r"\\text{{Area}} = \\pi r^2")`)**
50 |
51 | [STORYBOARD]
52 | 1. **Visual Flow & Pacing (Manim Animation Sequence)**:
53 | - Describe the sequence of Manim visuals and animations (`Text`, `Circle`, `Arrow`, `Create`, `FadeIn`, `Transform`, etc.). Be specific about animation types and their parameters (e.g., `run_time`). If a plugin provides a specific animation type, mention it (e.g., "Use `PluginAnimation` from `manim-plugin-name`").
54 | - Key visual moments: composition and arrangement of Manim elements, ensuring all elements are within safe area margins and maintain a minimum 0.3 unit spacing. Example: "`MathTex` formula center (`.move_to(ORIGIN)`) with `Write` animation, ensuring 0.3 unit spacing from scene edges and other elements."
55 | - Visual transitions between ideas using Manim animations (`Transform`, `Shift`, `FadeOutAndShift`, etc.). Specify transition animations and their timings.
56 | - Scene pacing (pauses, action) and Manim animation timing's role. Use `Wait()` for transition buffers and visual clarity.
57 | - **Sub-scene Breakdown**: Divide the scene into logical sub-scenes, each focusing on a specific step in the explanation or visualization.
58 | - For each sub-scene, start with a **Visual Element**: The primary visual component that drives the explanation (e.g., mathematical notation, diagram, graph). If this element comes from a plugin, clearly state this (e.g., "Visual Element: `PluginObject` from `manim-plugin-name`").
59 | - Detail the **Animation Sequence**: Describe step-by-step the Manim animations and visual elements for each sub-scene. Be specific about:
60 | - **Text Usage Guidelines:**
61 | - **Use `MathTex` *only* for mathematical expressions and equations.**
62 | - **Use `Tex` for all other text, including labels, explanations, and titles.**
63 | - **When mixing text with mathematical symbols in `MathTex`, wrap the text portions in `\\text{{}}`. Example: `MathTex(r"\\text{{Area of circle}} = \\pi r^2")`.**
64 | - Manim object classes (`MathTex`, `Circle`, `Arrow`, `Axes`, `Plot`, `Line`, `VGroup`, etc.), prioritizing mathematical notation and visual elements over text. Include plugin object classes where appropriate.
65 | - Animation types (`Create`, `Write`, `FadeIn`, `Transform`, `FadeOut`, `Circumscribe`, `FocusOn`, etc.) and their parameters (e.g., `run_time`). Include plugin animation types where appropriate.
66 | - Positioning of objects using relative positioning methods (`.next_to()`, `.align_to()`, `.shift()`, `.to_corner()`, `.move_to(ORIGIN)`, etc.) and references to other objects or scene elements. **No absolute coordinates allowed.**
67 | - Color and style specifications (e.g., `color=BLUE`, `stroke_width=2`, `dashed=True`).
68 | - Explicitly mention safe area margins and minimum spacing (0.3 units) for all objects within each sub-scene.
69 |
70 |
71 | ```
--------------------------------------------------------------------------------
/data/thb_hard/physics.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "theorem": "Boltzmann machine",
4 | "description": "It is a statistical physics technique applied in the context of cognitive science. It is also classified as a Markov random field.",
5 | "difficulty": "Hard",
6 | "remark": "",
7 | "subfield": "Statistical Physics"
8 | },
9 | {
10 | "theorem": "Geometric Brownian Motion",
11 | "description": "A geometric Brownian motion (GBM) (also known as exponential Brownian motion) is a continuous-time stochastic process in which the logarithm of the randomly varying quantity follows a Brownian motion (also called a Wiener process) with drift.",
12 | "difficulty": "Hard",
13 | "remark": "",
14 | "subfield": "Statistical Physics"
15 | },
16 | {
17 | "theorem": "Fermat's Principle",
18 | "description": "Fermat's principle states that light travels between two points along the path that requires the least time, as compared to other nearby paths.",
19 | "difficulty": "Hard",
20 | "remark": "",
21 | "subfield": "Optics"
22 | },
23 | {
24 | "theorem": "Huygens's Principle",
25 | "description": "The Huygens–Fresnel principle states that every point on a wavefront is itself the source of spherical wavelets, and the secondary wavelets emanating from different points mutually interfere. The sum of these spherical wavelets forms a new wavefront.",
26 | "difficulty": "Hard",
27 | "remark": "",
28 | "subfield": "Optics"
29 | },
30 | {
31 | "theorem": "Virial Theorem",
32 | "description": "In mechanics, the virial theorem provides a general equation that relates the average over time of the total kinetic energy of a stable system of discrete particles, bound by a conservative force, with that of the total potential energy of the system.",
33 | "difficulty": "Hard",
34 | "remark": "",
35 | "subfield": "Classical Mechanics"
36 | },
37 | {
38 | "theorem": "Poynting Theorem",
39 | "description": "It states that in a given volume, the stored energy changes at a rate given by the work done on the charges within the volume, minus the rate at which energy leaves the volume.",
40 | "difficulty": "Hard",
41 | "remark": "",
42 | "subfield": "Electromagnetism"
43 | },
44 | {
45 | "theorem": "Fresnel transmission equations",
46 | "description": "Fresnel's equations describe the reflection and transmission of electromagnetic waves at an interface.",
47 | "difficulty": "Hard",
48 | "remark": "",
49 | "subfield": "Optics"
50 | },
51 | {
52 | "theorem": "Fourier Heat Conduction Law",
53 | "description": "Fourier's law states that the negative gradient of temperature and the time rate of heat transfer is proportional to the area at right angles of that gradient through which the heat flows.",
54 | "difficulty": "Hard",
55 | "remark": "",
56 | "subfield": "Thermodynamics"
57 | },
58 | {
59 | "theorem": "Ampère's circuital law",
60 | "description": "Ampere's circuital law states that the line integral of the magnetic field surrounding closed-loop equals to the number of times the algebraic sum of currents passing through the loop.",
61 | "difficulty": "Hard",
62 | "remark": "",
63 | "subfield": "Electromagnetism"
64 | },
65 | {
66 | "theorem": "Malus's Law",
67 | "description": "Malus law states that the intensity of a plane-polarised light that passes through an analyser is directly proportional to the square of the cosine of the angle between the plane of the polariser and the transmission axis of the analyser.",
68 | "difficulty": "Hard",
69 | "remark": "",
70 | "subfield": "Optics"
71 | },
72 | {
73 | "theorem": "Van der Waals Equation",
74 | "description": "The van der Waals equation is a mathematical formula that describes the behavior of real gases. It is an equation of state that relates the pressure, temperature, and molar volume in a fluid.",
75 | "difficulty": "Hard",
76 | "remark": "",
77 | "subfield": "Thermodynamics"
78 | },
79 | {
80 | "theorem": "Rayleigh Criterion",
81 | "description": "The Rayleigh criterion is the generally accepted criterion for the minimum resolvable detail - the imaging process is said to be diffraction-limited when the first diffraction minimum of the image of one source point coincides with the maximum of another.",
82 | "difficulty": "Hard",
83 | "remark": "",
84 | "subfield": "Optics"
85 | },
86 | {
87 | "theorem": "Paschen Curve",
88 | "description": "Paschen's law is an equation that gives the breakdown voltage, that is, the voltage necessary to start a discharge or electric arc, between two electrodes in a gas as a function of pressure and gap length.",
89 | "difficulty": "Hard",
90 | "remark": "",
91 | "subfield": "Electromagnetism"
92 | },
93 | {
94 | "theorem": "Chandrasekhar Limit",
95 | "description": "The Chandrasekhar limit is the maximum mass that a star can have and still be a stable white dwarf.",
96 | "difficulty": "Hard",
97 | "remark": "",
98 | "subfield": "Astrophysics"
99 | },
100 | {
101 | "theorem": "Landau Damping",
102 | "description": "Landau damping is a phenomena observed in plasma wherein there is an ex- ponential decay in the oscillations of the number density of electrons in a plasma (also referred to as Langmuir waves) and so stability is achieved in some area of the phase-space.",
103 | "difficulty": "Hard",
104 | "remark": "",
105 | "subfield": "Plasma Physics"
106 | },
107 | {
108 | "theorem": "Schwarzschild radius",
109 | "description": "The Schwarzschild radius is the critical distance from the center of a massive body where the gravitational pull becomes so strong that not even light can escape, defining the boundary of a black hole.",
110 | "difficulty": "Hard",
111 | "remark": "",
112 | "subfield": "Astrophysics"
113 | },
114 | {
115 | "theorem": "Babinet's Principle",
116 | "description": "In physics, Babinet's principle states that the diffraction pattern from an opaque body is identical to that from a hole of the same size and shape except for the overall forward beam intensity.",
117 | "difficulty": "Hard",
118 | "remark": "",
119 | "subfield": "Optics"
120 | },
121 | {
122 | "theorem": "Schrödinger's Cat",
123 | "description": "Schrödinger's cat is a thought experiment in quantum mechanics that illustrates the paradoxical nature of quantum superposition and wave function collapse.",
124 | "difficulty": "Hard",
125 | "remark": "",
126 | "subfield": "Quantum Mechanics"
127 | },
128 | {
129 | "theorem": "Rayleigh Criterion for Resolution",
130 | "description": "For a circular aperture, lens, or mirror, the Rayleigh criterion states that two images are just resolvable when the center of the diffraction pattern of one is directly over the first minimum of the diffraction pattern of the other.",
131 | "difficulty": "Hard",
132 | "remark": "",
133 | "subfield": "Optics"
134 | },
135 | {
136 | "theorem": "Navier-Stokes Equations",
137 | "description": "In fluid mechanics, the Navier-Stokes equations are partial differential equations that express the flow of viscous fluids.",
138 | "difficulty": "Hard",
139 | "remark": "",
140 | "subfield": "Fluid Mechanics"
141 | }
142 | ]
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_scene_technical_implementation.txt:
--------------------------------------------------------------------------------
1 | You are an expert in educational video production and Manim (Community Edition), adept at translating pedagogical narration plans into robust and spatially accurate Manim code.
2 | **Reminder:** This technical implementation plan is fully self-contained. There is no dependency on the implementation from any previous or subsequent scenes.
3 |
4 | Create a detailed technical implementation plan for Scene {scene_number} (Manim code focused), *informed by the provided Manim documentation context*, strictly adhering to defined spatial constraints (safe area margins: 0.5 units, minimum spacing: 0.3 units), and **addressing potential text bounding box overflow issues**.
5 |
6 | Topic: {topic}
7 | Description: {description}
8 |
9 | Scene Overview:
10 | {scene_outline}
11 |
12 | Scene Vision and Storyboard:
13 | {scene_vision_storyboard}
14 |
15 | The following manim plugins are relevant to the scene:
16 | {relevant_plugins}
17 |
18 | **Spatial Constraints (Strictly Enforced):**
19 | * **Safe area margins:** 0.5 units on all sides from the scene edges. All objects must be positioned within these margins.
20 | * **Minimum spacing:** 0.3 units between any two Manim objects (measured edge to edge). This prevents overlaps and maintains visual clarity.
21 |
22 | **Positioning Requirements:**
23 | 1. All positioning MUST be relative (`next_to`, `align_to`, `shift`) from ORIGIN, safe margins, or other objects. **No absolute coordinates are allowed.**
24 | 2. Use transition buffers (`Wait` times) between sub-scenes and animation steps.
25 |
26 | **Diagrams/Sketches (Highly Recommended):**
27 | * Include diagrams/sketches (even text-based) for complex layouts to visualize spatial relationships, improve clarity, and reduce spatial errors.
28 |
29 | **Common Mistakes:**
30 | * The Triangle class in Manim creates equilateral triangles by default. To create a right-angled triangle, use the Polygon class instead.
31 |
32 | **Manim Plugins:**
33 | * You may use established, well-documented Manim plugins if they offer significant advantages in terms of code clarity, efficiency, or functionality not readily available in core Manim.
34 | * **If a plugin is used:**
35 | * Clearly state the plugin name and version (if applicable).
36 | * Provide a brief justification for using the plugin (e.g., "Using `manim-plugin-name` for its advanced graph layout capabilities").
37 | * Ensure all plugin usage adheres to the plugin's documentation.
38 | * Include a comment in the plan: `### Plugin: - `.
39 |
40 | **Focus:**
41 | * Creating *pedagogically sound and spatially correct Manim code*.
42 | * Detailed technical descriptions, referencing Manim documentation.
43 | * Strict adherence to spatial constraints and relative positioning.
44 |
45 | You MUST generate the technical implementation plan for the scene in the following format (from ```xml to ```):
46 |
47 | ```xml
48 |
49 | 0. **Dependencies**:
50 | - **Manim API Version**: Target the latest stable Manim release, using only documented API elements.
51 | - **Allowed Imports**: `manim`, `numpy`, and any explicitly approved and documented Manim plugins. No external assets (e.g., images, audio, or video files) are allowed, but established Manim plugins are permitted.
52 |
53 | 1. **Manim Object Selection & Configuration (Text and Shapes)**:
54 | - Clearly define the Manim objects (e.g., `Tex`, `MathTex`, `Circle`, `Line`, etc.) used to construct the scene. Also include any objects provided by used plugins.
55 | - Specify all key parameters such as text content, font size, color, stroke, or shape dimensions.
56 | - **Text Considerations**:
57 | - **Use `MathTex` for mathematical expressions and equations, ensuring valid LaTeX syntax.** For example: `MathTex("x^2 + y^2 = r^2")`.
58 | - **Use `Tex` for all non-mathematical text, including titles, labels, explanations, and general text.** For example: `Tex("This is a circle")`.
59 | - **If you need to include regular text *within* a `MathTex` environment (e.g., for explanations alongside a formula), use the `\\text{{}}` command.** For example: `MathTex(r"\\text{{Area of circle}} = \\pi r^2")`.
60 | - **Do not use `MathTex` for regular text, as it will result in incorrect spacing and formatting.**
61 | - **LaTeX Packages**: If any `Tex` or `MathTex` objects require LaTeX packages beyond those included in Manim's default template, specify them here. For example: "Requires: `\\usepackage{{amssymb}}`". Create a `TexTemplate` object and add the necessary packages using `add_to_preamble()`.
62 | - **Font Size Recommendations**:
63 | - If there is title text, font size is highly recommended to be 28.
64 | - If there are side labels or formulas, font size is highly recommended to be 24.
65 | - However, if the text has more than 10 words, the font size should be reduced further and multiple lines should be used.
66 | - Confirm all objects begin within the safe area (0.5 units from all edges) and maintain at least 0.3 units spacing to avoid overlaps.
67 |
68 | 2. **VGroup Structure & Hierarchy**:
69 | - Organize related elements into `VGroup`s for efficient spatial and animation management. If a plugin provides a specialized group-like object, consider using it.
70 | - For each `VGroup`, define the parent-child relationships and ensure internal spacing of at least 0.3 units.
71 | - Clearly document the purpose for each grouping (e.g., "formula_group" for mathematical expressions).
72 |
73 | 3. **Spatial Positioning Strategy**:
74 | - Mandate the exclusive use of relative positioning methods (`next_to`, `align_to`, `shift`), based on ORIGIN, safe margins, or other objects.
75 | - For every object, specify:
76 | - The reference object (or safe edge) used for positioning.
77 | - The specific method (and direction/aligned edge) along with a `buff` value (minimum 0.3 units).
78 | - Outline the layout in sequential stages, inserting visual checkpoints to verify that every element continues to respect safe margins and spacing.
79 | - Highlight measures to safeguard text bounding boxes, especially for multi-line text.
80 | - Reference the font size recommendations under "Text Considerations" to ensure appropriate sizing and prevent overflow.
81 |
82 | 4. **Animation Methods & Object Lifecycle Management**:
83 | - Define clear animation sequences using documented methods such as `Create`, `Write`, `FadeIn`, `Transform`, and corresponding removal animations (`FadeOut`, `Uncreate`). Include animation methods from plugins if they are used.
84 | - For each animation, specify parameters like `run_time`, `lag_ratio`, and the use of `Wait()` for transition buffers.
85 | - Ensure every object's appearance and removal is managed to prevent clutter and maintain scene clarity.
86 |
87 | 5. **Code Structure & Reusability**:
88 | - Propose modular functions for creating and animating common objects to promote code reusability.
89 | - Organize the overall code structure into logical sections: dependencies, object definitions, individual layout stages, and the main `construct` method.
90 | - Include inline comments to document the rationale for configuration choices, referencing the Manim Documentation *and the plugin documentation where applicable*.
91 |
92 | ***Mandatory Safety Checks***:
93 | - **Safe Area Enforcement**: All objects, including text bounding boxes, must remain within 0.5 unit margins.
94 | - **Minimum Spacing Validation**: Confirm a minimum of 0.3 units spacing between every pair of objects.
95 | - **Transition Buffers**: Use explicit `Wait()` calls to separate animation steps and sub-scenes.
96 |
97 | ```
98 |
--------------------------------------------------------------------------------
/task_generator/prompts_raw/prompt_scene_animation_narration.txt:
--------------------------------------------------------------------------------
1 | You are an expert in educational video production and Manim animation, skilled in creating engaging and pedagogically effective learning experiences.
2 | **Reminder:** This animation and narration plan is entirely self-contained; there is no dependency on any previous or subsequent scene implementations. However, the narration should flow smoothly as part of a larger, single video.
3 |
4 | Your task is to create a **detailed animation and narration plan for Scene {scene_number}**, ensuring it is not just visually appealing but also serves a clear educational purpose within the overall video topic.
5 |
6 | Remember, the narration should not simply describe what's happening visually, but rather **teach a concept step-by-step**, guiding the viewer to a deeper understanding. Animations should be spatially coherent, contribute to a clear visual flow, and strictly respect safe area margins (0.5 units) and minimum spacing (0.3 units). **Consider the scene number {scene_number} and the overall scene context to ensure smooth transitions and a logical flow within the larger video narrative.**
7 |
8 | Topic: {topic}
9 | Description: {description}
10 |
11 | Scene Overview:
12 | {scene_outline}
13 |
14 | Scene Vision and Storyboard:
15 | {scene_vision_storyboard}
16 |
17 | Technical Implementation Plan:
18 | {technical_implementation_plan}
19 |
20 | The following manim plugins are relevant to the scene:
21 | {relevant_plugins}
22 |
23 | **Spatial Constraints (Strictly Enforced Throughout Animations):**
24 | * **Safe area margins:** 0.5 units. *Maintain objects and VGroups within margins.*
25 | * **Minimum spacing:** 0.3 units. *Ensure minimum spacing between all objects and VGroups.*
26 |
27 | **Animation Timing and Pacing Requirements:**
28 | * Specify `run_time` for all animations.
29 | * Use `Wait()` for transition buffers, specifying durations and **pedagogical purpose**.
30 | * Coordinate animation timings with narration cues for synchronized pedagogical presentation.
31 |
32 | **Visual Flow and Pedagogical Clarity:**
33 | * Ensure animations create a clear and logical visual flow, **optimized for learning and concept understanding.**
34 | * Use animation pacing and transition buffers to visually separate ideas and **enhance pedagogical clarity.**
35 | * Maintain spatial coherence for predictable and understandable animations, strictly adhering to spatial constraints.
36 |
37 | **Diagrams/Sketches (Optional but Highly Recommended for Complex Scenes):**
38 | * For complex animations, include diagrams/sketches to visualize animation flow and object movements. This aids clarity and reduces errors.
39 |
40 | Your plan must demonstrate a strong understanding of pedagogical narration and how animations can be used to effectively teach concepts, while strictly adhering to spatial constraints and timing requirements.
41 |
42 | You MUST generate a **detailed and comprehensive** animation and narration plan for **Scene {scene_number}**, in the following format, similar to the example provided (from ```xml to ```):
43 |
44 | ```xml
45 |
46 |
47 | [ANIMATION_STRATEGY]
48 | 1. **Pedagogical Animation Plan:** Provide a detailed plan for all animations in the scene, explicitly focusing on how each animation contributes to **teaching the core concepts** of this scene.
49 | - **Parent VGroup transitions (if applicable):**
50 | - If VGroups are used, specify transitions (`Shift`, `Transform`, `FadeIn`, `FadeOut`) with `Animation` type, direction, magnitude, target VGroup, and `run_time`.
51 | - **Explain the pedagogical rationale** for each VGroup transition. How does it guide the viewer's attention or contribute to understanding the scene's learning objectives? Ensure spatial coherence and respect for constraints.
52 | - **Element animations within VGroups and for individual Mobjects:**
53 | - Specify animation types (`Create`, `Write`, `FadeIn`, `Transform`, `Circumscribe`, `AnimationGroup`, `Succession`) for elements.
54 | - For each element animation, specify `Animation` type, target object(s), and `run_time`. Detail sequences and timing for `AnimationGroup` or `Succession`.
55 | - **Explain the pedagogical purpose** of each element animation. How does it break down complex information, highlight key details, or improve visual clarity for learning? Ensure spatial coherence and minimum spacing.
56 | - **Coordinate element animations with VGroup transitions:**
57 | - Clearly describe the synchronization between element animations and VGroup transitions (if any).
58 | - Specify relative timing and `run_time` to illustrate coordination.
59 | - **Explain how this animation sequence and coordination creates a pedagogical flow**, guiding the viewer's eye and attention logically through the learning material.
60 |
61 | 2. **Scene Flow - Pedagogical Pacing and Clarity:** Detail the overall flow of the scene, emphasizing pedagogical effectiveness.
62 | - **Overall animation sequence, spatial progression for learning:**
63 | - Describe the complete animation sequence, broken down into pedagogical sub-sections (e.g., "Introducing the Problem", "Step-by-step Solution", "Concept Reinforcement").
64 | - Outline the spatial progression of objects and VGroups, focusing on how it supports the **pedagogical narrative** and concept development.
65 | - Ensure a clear and logical visual flow optimized for learning, respecting spatial constraints.
66 | - **Transition buffers for pedagogical pauses:**
67 | - Specify `Wait()` times between animation sections for visual separation and **learner processing time**.
68 | - For each `Wait()`, specify duration and **explain the pedagogical reason** for this buffer (e.g., "Allow viewers time to process the formula", "Create a pause for reflection before moving to the next concept").
69 | - **Coordinate animation timing with narration for engagement and comprehension:**
70 | - Describe how animation timings are coordinated with the narration script to **maximize viewer engagement and comprehension**.
71 | - Specify animation cues within the narration script and explain how these cues are synchronized with animations to **reinforce learning points** at the optimal moment.
72 |
73 | [NARRATION]
74 | - **Pedagogical Narration Script:**
75 | - Provide the full narration script for Scene {scene_number}.
76 | - **Embed precise animation timing cues** within the narration script (as described before).
77 | - **The script should be written as if delivered by a knowledgeable and engaging lecturer.** It should:
78 | - **Clearly explain concepts step-by-step.**
79 | - **Use analogies and real-world examples to enhance understanding.**
80 | - **Pose questions to encourage active thinking.**
81 | - **Summarize key points and transitions.**
82 | - **Be detailed and knowledge-rich, not just visually descriptive.**
83 | - **Connect smoothly with the previous and subsequent scenes, acting as a segment within a single, cohesive video.
84 | - Avoid repetitive introductions or conclusions.**
85 | - Consider using phrases like "Building on what we saw in the previous part..." or "Let's now move on to..." to create a sense of continuity.
86 | - Reference the scene number when appropriate (e.g., "Now, let's explore...").
87 | - **Crucially, the narration should seamlessly integrate with the animations to create a cohesive and effective learning experience.**
88 | - **Narration Sync - Pedagogical Alignment:**
89 | - Detail the synchronization strategy between narration and animations, emphasizing **pedagogical alignment**.
90 | - Explain how narration timing is aligned with animation start/end times to **guide viewer attention to key learning elements precisely when they animate.**
91 | - Emphasize how narration cues and animation timings work together to **create a synchronized audiovisual presentation that maximizes learning and retention.**
92 |
93 |
94 | ```
95 |
--------------------------------------------------------------------------------
/data/thb_medium/chemistry.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "theorem": "Le Chatelier's Principle",
4 | "description": "When a system at equilibrium is subjected to a change in condition (such as temperature, pressure, or concentration), the system will shift in a direction that relieves the stress and a new equilibrium will be established. This principle helps predict how equilibrium will shift in response to external changes.",
5 | "difficulty": "Medium",
6 | "remark": "Essential for understanding chemical equilibrium and its practical applications in industrial processes.",
7 | "subfield": "Chemical Equilibrium"
8 | },
9 | {
10 | "theorem": "The Pauli Exclusion Principle",
11 | "description": "No two electrons in the same atom can have the same set of four quantum numbers (n, l, ml, ms). This limits the number of electrons that can occupy an orbital, which is max two electrons, with opposite spins (+1/2 and -1/2). This explains electronic configuration in atoms.",
12 | "difficulty": "Medium",
13 | "remark": "Essential for understanding electronic structure and the basis for chemical bonding.",
14 | "subfield": "Quantum Chemistry"
15 | },
16 | {
17 | "theorem": "Raoult's Law",
18 | "description": "The partial vapor pressure of a component in an ideal solution is equal to the vapor pressure of the pure component multiplied by its mole fraction in the solution: P_A = P_A* X_A. This helps to predict vapor pressure of ideal solutions and is a basis for colligative properties",
19 | "difficulty": "Medium",
20 | "remark": "Describes vapor pressure of solutions, useful in understanding boiling point elevation and freezing point depression.",
21 | "subfield": "Physical Chemistry"
22 | },
23 | {
24 | "theorem": "Beer-Lambert Law",
25 | "description": "The absorbance of a solution is directly proportional to the concentration of the analyte and the path length of the light beam through the solution: A = \u03b5bc, where \u03b5 is molar absorptivity, b is path length, and c is the concentration. Useful in analytical chemistry for determining the concentration of a substance by measuring the light it absorbs.",
26 | "difficulty": "Medium",
27 | "remark": "Important in spectrophotometry for quantitative analysis of solutions.",
28 | "subfield": "Analytical Chemistry"
29 | },
30 | {
31 | "theorem": "Phase diagram",
32 | "description": "Phase diagram is a graphical representation of the physical states of a substance under different conditions of temperature and pressure.",
33 | "difficulty": "Medium",
34 | "remark": "Useful in understanding the phase transitions of substances.",
35 | "subfield": "Physical Chemistry"
36 | },
37 | {
38 | "theorem": "Boyle's Law",
39 | "description": "Raoult's law is a relation of physical chemistry, with implications in thermodynamics.",
40 | "difficulty": "Medium",
41 | "remark": "",
42 | "subfield": "Physical Chemistry"
43 | },
44 | {
45 | "theorem": "Graham's Law of Effusion",
46 | "description": "Graham's law of effusion was formulated by Scottish physical chemist Thomas Graham in 1848. Graham found experimentally that the rate of effusion of a gas is inversely proportional to the square root of the molar mass of its particles.",
47 | "difficulty": "Medium",
48 | "remark": "",
49 | "subfield": "Physical Chemistry"
50 | },
51 | {
52 | "theorem": "Arrhenius Equation",
53 | "description": "In physical chemistry, the Arrhenius equation is a formula for the temperature dependence of reaction rates.",
54 | "difficulty": "Medium",
55 | "remark": "",
56 | "subfield": "Chemical Kinetics"
57 | },
58 | {
59 | "theorem": "Henry's law",
60 | "description": "the proportional relationship between the concentration of dissolved gas in a solution and the partial pressure of the gas in contact with the solution",
61 | "difficulty": "Medium",
62 | "remark": "",
63 | "subfield": "Physical Chemistry"
64 | },
65 | {
66 | "theorem": "Lewis Acid-Base Theory",
67 | "description": "In the Lewis theory of acid-base reactions, bases donate pairs of electrons and acids accept pairs of electrons.",
68 | "difficulty": "Medium",
69 | "remark": "",
70 | "subfield": "Acid-Base Chemistry"
71 | },
72 | {
73 | "theorem": "Clausius-Clapeyron Equation",
74 | "description": "allows us to estimate the vapor pressure at another temperature.",
75 | "difficulty": "Medium",
76 | "remark": "",
77 | "subfield": "Thermodynamics"
78 | },
79 | {
80 | "theorem": "Michaelis-Menten Kinetics",
81 | "description": "In biochemistry, Michaelis–Menten kinetics, named after Leonor Michaelis and Maud Menten, is the simplest case of enzyme kinetics, applied to enzyme-catalysed reactions of one substrate and one product.",
82 | "difficulty": "Medium",
83 | "remark": "",
84 | "subfield": "Chemical Kinetics"
85 | },
86 | {
87 | "theorem": "Gibbs Free Energy Equation",
88 | "description": "The change in free energy, ΔG, is equal to the sum of the enthalpy plus the product of the temperature and entropy of the system.",
89 | "difficulty": "Medium",
90 | "remark": "",
91 | "subfield": "Thermodynamics"
92 | },
93 | {
94 | "theorem": "Transition State Theory",
95 | "description": "In chemistry, transition state theory (TST) explains the reaction rates of elementary chemical reactions.",
96 | "difficulty": "Medium",
97 | "remark": "",
98 | "subfield": "Chemical Kinetics"
99 | },
100 | {
101 | "theorem": "Koopman's Theorem",
102 | "description": "Koopmans' theorem states that the first ionization energy of a molecule is equal to the negative of the energy of the highest occupied molecular orbital (HOMO).",
103 | "difficulty": "Medium",
104 | "remark": "",
105 | "subfield": "Quantum Chemistry"
106 | },
107 | {
108 | "theorem": "Recrystallization",
109 | "description": "Recrystallization, also known as fractional crystallization, is a procedure for purifying an impure compound in a solvent.",
110 | "difficulty": "Medium",
111 | "remark": "",
112 | "subfield": "Analytical Chemistry"
113 | },
114 | {
115 | "theorem": "Electrogravimetry",
116 | "description": "Electrogravimetry is a method used to separate and quantify ions of a substance, usually a metal. In this process, the analyte solution is electrolyzed.",
117 | "difficulty": "Medium",
118 | "remark": "",
119 | "subfield": "Analytical Chemistry"
120 | },
121 | {
122 | "theorem": "Kjeldahl Method",
123 | "description": "The Kjeldahl method is a laboratory technique used to measure the amount of nitrogen in a sample. ",
124 | "difficulty": "Medium",
125 | "remark": "",
126 | "subfield": "Analytical Chemistry"
127 | },
128 | {
129 | "theorem": "Liquid-Liquid Extraction",
130 | "description": "Liquid–liquid extraction, also known as solvent extraction and partitioning, is a method to separate compounds or metal complexes, based on their relative solubilities in two different immiscible liquids, usually water (polar) and an organic solvent (non-polar).",
131 | "difficulty": "Medium",
132 | "remark": "",
133 | "subfield": "Analytical Chemistry"
134 | },
135 | {
136 | "theorem": "Reflux",
137 | "description": "Reflux is a laboratory technique where a reaction mixture is heated to boil and the vapors are condensed back into the reaction flask, allowing continuous heating without loss of volatile components.",
138 | "difficulty": "Medium",
139 | "remark": "",
140 | "subfield": "Laboratory Techniques"
141 | }
142 | ]
--------------------------------------------------------------------------------
/data/thb_easy/comp_sci.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "theorem": "The Pigeonhole Principle",
4 | "description": "If you have more pigeons than pigeonholes, then at least one pigeonhole must contain more than one pigeon. More formally, if *n* items are put into *m* containers, with *n > m*, then at least one container must contain more than one item.",
5 | "difficulty": "Easy",
6 | "remark": "A fundamental principle in combinatorics with surprising applications in various areas of computer science, like proving existence in hashing or data compression. Simple to understand, powerful in use.",
7 | "subfield": "Discrete Mathematics"
8 | },
9 | {
10 | "theorem": "De Morgan's Laws",
11 | "description": "De Morgan's Laws provide a way to simplify or transform logical statements involving AND, OR, and NOT. Specifically: 1) NOT (A AND B) is equivalent to (NOT A) OR (NOT B). 2) NOT (A OR B) is equivalent to (NOT A) AND (NOT B).",
12 | "difficulty": "Easy",
13 | "remark": "Crucial for boolean algebra and digital logic design. Helps with simplifying complex logic expressions and is widely used in programming.",
14 | "subfield": "Boolean Algebra"
15 | },
16 | {
17 | "theorem": "The Time Complexity of Linear Search",
18 | "description": "In the worst-case scenario, searching for an element in an unsorted array using linear search requires O(n) time, where 'n' is the number of elements in the array. This is because the algorithm may need to examine every element in the array to find or conclude the non-existence of the target.",
19 | "difficulty": "Easy",
20 | "remark": "A foundational concept in algorithm analysis. Illustrates how the running time of an algorithm scales with the input size.",
21 | "subfield": "Algorithm Analysis"
22 | },
23 | {
24 | "theorem": "The Properties of a Binary Tree",
25 | "description": "For a complete or full binary tree: 1) The maximum number of nodes at level *l* is 2^l (where the root is at level 0). 2) The total number of nodes in a complete binary tree of *h* depth is 2^(h+1) - 1.",
26 | "difficulty": "Easy",
27 | "remark": "Fundamental for understanding and analyzing tree data structures. Used in many algorithmic designs.",
28 | "subfield": "Data Structures"
29 | },
30 | {
31 | "theorem": "The Triangle Inequality Theorem",
32 | "description": "The triangle inequality states that for any three points A, B, and C in a metric space (e.g., the Euclidean plane), the sum of the lengths of any two sides of a triangle must be greater than or equal to the length of the third side. |AB| + |BC| >= |AC|",
33 | "difficulty": "Easy",
34 | "remark": "Often used in graph algorithms (e.g. proving properties of shortest path) . The principle is used as basis of many distance metrics.",
35 | "subfield": "Computational Geometry"
36 | },
37 | {
38 | "theorem": "Hamming distance",
39 | "description": "In information theory, the Hamming distance between two strings or vectors of equal length is the number of positions at which the corresponding symbols are different.",
40 | "difficulty": "Easy",
41 | "remark": "",
42 | "subfield": "Information Theory"
43 | },
44 | {
45 | "theorem": "Big O notation",
46 | "description": "most common type of asymptotic notation in computer science used to measure worst case complexity",
47 | "difficulty": "Easy",
48 | "remark": "",
49 | "subfield": "Algorithm Analysis"
50 | },
51 | {
52 | "theorem": "Deadlock",
53 | "description": "A deadlock is a situation where two or more processes are blocked waiting for each other to release resources, resulting in a circular wait condition.",
54 | "difficulty": "Easy",
55 | "remark": "",
56 | "subfield": "Operating Systems"
57 | },
58 | {
59 | "theorem": "Bubble Sort",
60 | "description": "Bubble sort is a simple sorting algorithm that repeatedly steps through the list, compares adjacent elements and swaps them if they are in the wrong order.",
61 | "difficulty": "Easy",
62 | "remark": "",
63 | "subfield": "Algorithms"
64 | },
65 | {
66 | "theorem": "Karnaugh Map",
67 | "description": "A Karnaugh map (K-map) is a graphical method for simplifying Boolean algebra expressions.",
68 | "difficulty": "Easy",
69 | "remark": "",
70 | "subfield": "Digital Logic Design"
71 | },
72 | {
73 | "theorem": "Hash table",
74 | "description": "A hash table uses a hash function to compute an index, also called a hash code, into an array of buckets or slots, from which the desired value can be found.",
75 | "difficulty": "Easy",
76 | "remark": "",
77 | "subfield": "Data Structures"
78 | },
79 | {
80 | "theorem": "Linked list",
81 | "description": "data structure that does not necessarily store elements next to each other and instead works by maintaining, for each element, a link to the next element in the list",
82 | "difficulty": "Easy",
83 | "remark": "",
84 | "subfield": "Data Structures"
85 | },
86 | {
87 | "theorem": "Chain Code",
88 | "description": "A chain code is a lossless compression based image segmentation method for binary images based upon tracing image contours. The basic principle of chain coding, like other contour codings, is to separately encode each connected component, or blob in the image.",
89 | "difficulty": "Easy",
90 | "remark": "",
91 | "subfield": "Image Processing"
92 | },
93 | {
94 | "theorem": "Signal-to-noise ratio",
95 | "description": "The signal-to-noise ratio (SNR) is a measure of the ratio between the power of a signal and the power of background noise.",
96 | "difficulty": "Easy",
97 | "remark": "",
98 | "subfield": "Signal Processing"
99 | },
100 | {
101 | "theorem": "Run-length encoding",
102 | "description": "Run-length encoding (RLE) is a form of data compression that encodes consecutive data elements by a single data value and count, rather than by the original data values.",
103 | "difficulty": "Easy",
104 | "remark": "",
105 | "subfield": "Data Compression"
106 | },
107 | {
108 | "theorem": "Elbow method",
109 | "description": "The elbow method is a graphical method for finding the optimal K value in a k-means clustering algorithm.",
110 | "difficulty": "Easy",
111 | "remark": "",
112 | "subfield": "Machine Learning"
113 | },
114 | {
115 | "theorem": "Huffman coding",
116 | "description": "In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression.",
117 | "difficulty": "Easy",
118 | "remark": "",
119 | "subfield": "Data Compression"
120 | },
121 | {
122 | "theorem": "Paging",
123 | "description": "Paging is a memory management technique used in operating systems to manage virtual memory. It involves dividing the virtual address space into fixed-size blocks called pages, and storing these pages in a secondary storage device called a paging file.",
124 | "difficulty": "Easy",
125 | "remark": "",
126 | "subfield": "Operating Systems"
127 | },
128 | {
129 | "theorem": "OSI model",
130 | "description": "The Open Systems Interconnection (OSI) model is a conceptual framework that describes how data is sent over a network.",
131 | "difficulty": "Easy",
132 | "remark": "",
133 | "subfield": "Computer Networks"
134 | },
135 | {
136 | "theorem": "IEEE Convertion",
137 | "description": "The IEEE-754 standard describes floating-point formats, a way to represent real numbers in hardware.",
138 | "difficulty": "Easy",
139 | "remark": "",
140 | "subfield": "Computer Architecture"
141 | }
142 | ]
--------------------------------------------------------------------------------
/mllm_tools/litellm.py:
--------------------------------------------------------------------------------
1 | import json
2 | import re
3 | from typing import List, Dict, Any, Union, Optional
4 | import io
5 | import os
6 | import base64
7 | from PIL import Image
8 | import mimetypes
9 | import litellm
10 | from litellm import completion, completion_cost
11 | from dotenv import load_dotenv
12 |
13 | load_dotenv()
14 |
15 | class LiteLLMWrapper:
16 | """Wrapper for LiteLLM to support multiple models and logging"""
17 |
18 | def __init__(
19 | self,
20 | model_name: str = "gpt-4-vision-preview",
21 | temperature: float = 0.7,
22 | print_cost: bool = False,
23 | verbose: bool = False,
24 | use_langfuse: bool = True,
25 | ):
26 | """
27 | Initialize the LiteLLM wrapper
28 |
29 | Args:
30 | model_name: Name of the model to use (e.g. "azure/gpt-4", "vertex_ai/gemini-pro")
31 | temperature: Temperature for completion
32 | print_cost: Whether to print the cost of the completion
33 | verbose: Whether to print verbose output
34 | use_langfuse: Whether to enable Langfuse logging
35 | """
36 | self.model_name = model_name
37 | self.temperature = temperature
38 | self.print_cost = print_cost
39 | self.verbose = verbose
40 | self.accumulated_cost = 0
41 |
42 | if self.verbose:
43 | os.environ['LITELLM_LOG'] = 'DEBUG'
44 |
45 | # Set langfuse callback only if enabled
46 | if use_langfuse:
47 | litellm.success_callback = ["langfuse"]
48 | litellm.failure_callback = ["langfuse"]
49 |
50 | def _encode_file(self, file_path: Union[str, Image.Image]) -> str:
51 | """
52 | Encode local file or PIL Image to base64 string
53 |
54 | Args:
55 | file_path: Path to local file or PIL Image object
56 |
57 | Returns:
58 | Base64 encoded file string
59 | """
60 | if isinstance(file_path, Image.Image):
61 | buffered = io.BytesIO()
62 | file_path.save(buffered, format="PNG")
63 | return base64.b64encode(buffered.getvalue()).decode("utf-8")
64 | else:
65 | with open(file_path, "rb") as file:
66 | return base64.b64encode(file.read()).decode("utf-8")
67 |
68 | def _get_mime_type(self, file_path: str) -> str:
69 | """
70 | Get the MIME type of a file based on its extension
71 |
72 | Args:
73 | file_path: Path to the file
74 |
75 | Returns:
76 | MIME type as a string (e.g., "image/jpeg", "audio/mp3")
77 | """
78 | mime_type, _ = mimetypes.guess_type(file_path)
79 | if mime_type is None:
80 | raise ValueError(f"Unsupported file type: {file_path}")
81 | return mime_type
82 |
83 | def __call__(self, messages: List[Dict[str, Any]], metadata: Optional[Dict[str, Any]] = None) -> str:
84 | """
85 | Process messages and return completion
86 |
87 | Args:
88 | messages: List of message dictionaries with 'type' and 'content' keys
89 | metadata: Optional metadata to pass to litellm completion, e.g. for Langfuse tracking
90 |
91 | Returns:
92 | Generated text response
93 | """
94 | if metadata is None:
95 | print("No metadata provided, using empty metadata")
96 | metadata = {}
97 | metadata["trace_name"] = f"litellm-completion-{self.model_name}"
98 | # Convert messages to LiteLLM format
99 | formatted_messages = []
100 | for msg in messages:
101 | if msg["type"] == "text":
102 | formatted_messages.append({
103 | "role": "user",
104 | "content": [{"type": "text", "text": msg["content"]}]
105 | })
106 | elif msg["type"] in ["image", "audio", "video"]:
107 | # Check if content is a local file path or PIL Image
108 | if isinstance(msg["content"], Image.Image) or os.path.isfile(msg["content"]):
109 | try:
110 | if isinstance(msg["content"], Image.Image):
111 | mime_type = "image/png"
112 | else:
113 | mime_type = self._get_mime_type(msg["content"])
114 | base64_data = self._encode_file(msg["content"])
115 | data_url = f"data:{mime_type};base64,{base64_data}"
116 | except ValueError as e:
117 | print(f"Error processing file {msg['content']}: {e}")
118 | continue
119 | else:
120 | data_url = msg["content"]
121 |
122 | # Append the formatted message based on the model
123 | if "gemini" in self.model_name:
124 | formatted_messages.append({
125 | "role": "user",
126 | "content": [
127 | {
128 | "type": "image_url",
129 | "image_url": data_url
130 | }
131 | ]
132 | })
133 | elif "gpt" in self.model_name:
134 | # GPT and other models expect a different format
135 | if msg["type"] == "image":
136 | # Default format for images and videos in GPT
137 | formatted_messages.append({
138 | "role": "user",
139 | "content": [
140 | {
141 | "type": f"image_url",
142 | f"{msg['type']}_url": {
143 | "url": data_url,
144 | "detail": "high"
145 | }
146 | }
147 | ]
148 | })
149 | else:
150 | raise ValueError("For GPT, only text and image inferencing are supported")
151 | else:
152 | raise ValueError("Only support Gemini and Gpt for Multimodal capability now")
153 |
154 | try:
155 | # if it's openai o series model, set temperature to None and reasoning_effort to "medium"
156 | if (re.match(r"^o\d+.*$", self.model_name) or re.match(r"^openai/o.*$", self.model_name)):
157 | self.temperature = None
158 | self.reasoning_effort = "medium"
159 | response = completion(
160 | model=self.model_name,
161 | messages=formatted_messages,
162 | temperature=self.temperature,
163 | reasoning_effort=self.reasoning_effort,
164 | metadata=metadata,
165 | max_retries=99
166 | )
167 | else:
168 | response = completion(
169 | model=self.model_name,
170 | messages=formatted_messages,
171 | temperature=self.temperature,
172 | metadata=metadata,
173 | max_retries=99
174 | )
175 | if self.print_cost:
176 | # pass your response from completion to completion_cost
177 | cost = completion_cost(completion_response=response)
178 | formatted_string = f"Cost: ${float(cost):.10f}"
179 | # print(formatted_string)
180 | self.accumulated_cost += cost
181 | print(f"Accumulated Cost: ${self.accumulated_cost:.10f}")
182 |
183 | content = response.choices[0].message.content
184 | if content is None:
185 | print(f"Got null response from model. Full response: {response}")
186 | return content
187 |
188 | except Exception as e:
189 | print(f"Error in model completion: {e}")
190 | return str(e)
191 |
192 | if __name__ == "__main__":
193 | pass
--------------------------------------------------------------------------------
/data/thb_easy/physics.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "theorem": "Ohm's Law",
4 | "description": "The voltage (V) across a conductor is directly proportional to the current (I) flowing through it, given the resistance (R) remains constant. The formula is V = IR. This law holds for many materials, particularly metals, and components like resistors.",
5 | "difficulty": "Easy",
6 | "remark": "A cornerstone of circuit analysis. While it is an approximation, it's incredibly useful in solving basic circuit problems. The 'resistance' is a macroscopic property representing the ease of electron movement.",
7 | "subfield": "Electricity and Circuits"
8 | },
9 | {
10 | "theorem": "Newton's First Law of Motion",
11 | "description": "a body at rest remains at rest, or, if in motion, remains in motion at a constant velocity unless acted on by a net external force; also known as the law of inertia",
12 | "difficulty": "Easy",
13 | "remark": "This law is fundamental to understanding the relationship between force and motion. It establishes that forces cause acceleration which changes velocity. Applicable for solving motion problems where force and mass are known.",
14 | "subfield": "Classical Mechanics"
15 | },
16 | {
17 | "theorem": "Newton's Second Law of Motion",
18 | "description": "The net force (F_net) acting on an object is equal to the mass (m) of the object multiplied by its acceleration (a). F_net = ma. This law is fundamental to understanding the relationship between force and motion.",
19 | "difficulty": "Easy",
20 | "remark": "This is one of the most important laws in classical mechanics. It establishes that forces cause acceleration which changes velocity. Applicable for solving motion problems where force and mass are known.",
21 | "subfield": "Classical Mechanics"
22 | },
23 | {
24 | "theorem": "Hooke's law",
25 | "description": "In physics, Hooke's law is an empirical law which states that the force needed to extend or compress a spring by some distance scales linearly with respect to that distance.",
26 | "difficulty": "Easy",
27 | "remark": "This law is fundamental to understanding the relationship between force and motion. It establishes that forces cause acceleration which changes velocity. Applicable for solving motion problems where force and mass are known.",
28 | "subfield": "Classical Mechanics"
29 | },
30 | {
31 | "theorem": "Gravitational Force",
32 | "description": "In physics, gravity is a fundamental interaction primarily observed as mutual attraction between all things that have mass.",
33 | "difficulty": "Easy",
34 | "remark": "",
35 | "subfield": "Classical Mechanics"
36 | },
37 | {
38 | "theorem": "Centrifugal force",
39 | "description": "Centrifugal force is a fictitious force in Newtonian mechanics that appears to act on all objects when viewed in a rotating frame of reference. It appears to be directed radially away from the axis of rotation of the frame.",
40 | "difficulty": "Easy",
41 | "remark": "",
42 | "subfield": "Classical Mechanics"
43 | },
44 | {
45 | "theorem": "Kinetic energy",
46 | "description": "In physics, the kinetic energy of an object is the form of energy that it possesses due to its motion. In classical mechanics, the kinetic energy of a non-rotating object of mass m traveling at a speed v is.",
47 | "difficulty": "Easy",
48 | "remark": "",
49 | "subfield": "Classical Mechanics"
50 | },
51 | {
52 | "theorem": "Torque",
53 | "description": "Torque is a measure of the force that can cause an object to rotate about an axis. Just as force is what causes an object to accelerate in linear kinematics, torque is what causes an object to acquire angular acceleration. Torque is a vector quantity.",
54 | "difficulty": "Easy",
55 | "remark": "",
56 | "subfield": "Classical Mechanics"
57 | },
58 | {
59 | "theorem": "Right-hand rule",
60 | "description": "The right hand rule is a hand mnemonic used in physics to identify the direction of axes or parameters that point in three dimensions.",
61 | "difficulty": "Easy",
62 | "remark": "",
63 | "subfield": "Electromagnetism"
64 | },
65 | {
66 | "theorem": "Snell's Law",
67 | "description": "Relates the angles of incidence and refraction of light when passing between two different media. It states that n₁sin(θ₁) = n₂sin(θ₂), where n₁ and n₂ are the refractive indices of the two media, and θ₁ and θ₂ are the angles of incidence and refraction, respectively.",
68 | "difficulty": "Easy",
69 | "remark": "This theorem is fundamental to understanding how light bends when it travels through different materials, essential for studying optics (lenses, prisms). Its application involves using trigonometry.",
70 | "subfield": "Optics"
71 | },
72 | {
73 | "theorem": "The Ideal Gas Law",
74 | "description": "Relates the pressure (P), volume (V), temperature (T), and the number of moles (n) of an ideal gas: PV = nRT, where R is the ideal gas constant. It serves as a good approximation for the behavior of real gases under certain conditions.",
75 | "difficulty": "Easy",
76 | "remark": "Connects macroscopic gas properties and allows calculations involving gas behavior under varied conditions. Applicable for thermodynamics problems and understanding gas pressure, volume and temperature relationship.",
77 | "subfield": "Thermodynamics"
78 | },
79 | {
80 | "theorem": "Pascal's Principle",
81 | "description": "Pascal's law is a principle in fluid mechanics given by Blaise Pascal that states that a pressure change at any point in a confined incompressible fluid is transmitted throughout the fluid such that the same change occurs everywhere.",
82 | "difficulty": "Easy",
83 | "remark": "",
84 | "subfield": "Fluid Mechanics"
85 | },
86 | {
87 | "theorem": "Avogadro's number",
88 | "description": "The concept of the mole can be used to convert between mass and number of particles.",
89 | "difficulty": "Easy",
90 | "remark": "",
91 | "subfield": "Thermodynamics"
92 | },
93 | {
94 | "theorem": "Dalton's law of partial pressures",
95 | "description": "Dalton's law of partial pressures states that the total pressure of a mixture of gases is the sum of the partial pressures of its components.",
96 | "difficulty": "Easy",
97 | "remark": "",
98 | "subfield": "Thermodynamics"
99 | },
100 | {
101 | "theorem": "PV diagram",
102 | "description": "a graph of pressure vs. volume",
103 | "difficulty": "Easy",
104 | "remark": "",
105 | "subfield": "Thermodynamics"
106 | },
107 | {
108 | "theorem": "Color wavelengths",
109 | "description": "The wavelength of a color is the range of nanometers (nm) at which it appears in the visible light spectrum.",
110 | "difficulty": "Easy",
111 | "remark": "",
112 | "subfield": "Optics"
113 | },
114 | {
115 | "theorem": "Ultrasound",
116 | "description": "Ultrasound refers to sound waves with frequencies higher than the audible range for humans.",
117 | "difficulty": "Easy",
118 | "remark": "",
119 | "subfield": "Waves and Sound"
120 | },
121 | {
122 | "theorem": "Coulomb's law",
123 | "description": "Coulomb's inverse-square law, or simply Coulomb's law, is an experimental law of physics that calculates the amount of force between two electrically charged particles at rest. This electric force is conventionally called the electrostatic force or Coulomb force.",
124 | "difficulty": "Easy",
125 | "remark": "",
126 | "subfield": "Electromagnetism"
127 | },
128 | {
129 | "theorem": "Kirchhoff's voltage law",
130 | "description": "The sum of all the voltages around a loop is equal to zero.",
131 | "difficulty": "Easy",
132 | "remark": "",
133 | "subfield": "Electricity and Circuits"
134 | },
135 | {
136 | "theorem": "Thévenin's theorem",
137 | "description": "Thévenin's theorem states that any linear circuit containing several voltage sources and resistors can be simplified to a Thévenin-equivalent circuit with a single voltage source and resistance connected in series with a load.",
138 | "difficulty": "Easy",
139 | "remark": "",
140 | "subfield": "Electricity and Circuits"
141 | }
142 | ]
--------------------------------------------------------------------------------
/src/core/parse_video.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pysrt
3 | from moviepy import VideoFileClip
4 | import shutil
5 | from PIL import Image, ImageOps
6 | import numpy as np
7 | import speech_recognition as sr
8 |
9 | def get_images_from_video(video_path, fps=0.2):
10 | """Extract frames from a video file at specified FPS.
11 |
12 | Args:
13 | video_path (str): Path to the video file.
14 | fps (float, optional): Frames per second to extract. Defaults to 0.2.
15 |
16 | Returns:
17 | list: List of frames as numpy arrays.
18 | """
19 | clip = VideoFileClip(video_path)
20 | images = clip.iter_frames(fps=fps)
21 | return images
22 |
23 | def image_with_most_non_black_space(images, output_path, return_type="path"):
24 | """Find and save the image with the most non-black space from a list of images.
25 |
26 | Args:
27 | images (list): List of image file paths, PIL Image objects, or numpy arrays.
28 | output_path (str): Path where the output image should be saved.
29 | return_type (str, optional): Type of return value - "path" or "image". Defaults to "path".
30 |
31 | Returns:
32 | Union[str, PIL.Image, None]: Path to saved image, PIL Image object, or None if no valid image found.
33 | """
34 | max_non_black_area = 0
35 | image_with_max_non_black_space = None
36 |
37 | for img in images:
38 | try:
39 | # If img is a path, open the image
40 | if isinstance(img, str):
41 | image = Image.open(img)
42 | elif isinstance(img, Image.Image):
43 | image = img
44 | elif isinstance(img, np.ndarray):
45 | image = Image.fromarray(img)
46 | else:
47 | print(f"Unsupported type: {type(img)}. Skipping.")
48 | continue
49 |
50 | # Convert to grayscale
51 | gray = ImageOps.grayscale(image)
52 |
53 | # Convert to numpy array
54 | gray_array = np.array(gray)
55 |
56 | # Count non-black pixels (threshold to consider near-black as black)
57 | non_black_pixels = np.sum(gray_array > 10) # Threshold 10 to account for slight variations in black
58 |
59 | if non_black_pixels > max_non_black_area:
60 | max_non_black_area = non_black_pixels
61 | image_with_max_non_black_space = image
62 |
63 | except Exception as e:
64 | print(f"Warning: Unable to process image {img}: {e}")
65 |
66 | if image_with_max_non_black_space is not None:
67 | image_with_max_non_black_space.save(output_path)
68 | print(f"Saved image with most non-black space to {output_path}")
69 |
70 | if return_type == "path":
71 | return output_path
72 | else:
73 | return image_with_max_non_black_space
74 | return image_with_max_non_black_space
75 |
76 | def parse_srt_to_text(output_dir, topic_name):
77 | """Convert SRT subtitle file to plain text.
78 |
79 | Args:
80 | output_dir (str): Directory containing the topic folders.
81 | topic_name (str): Name of the topic/video.
82 | """
83 | topic_name = topic_name.replace(" ", "_").lower()
84 | srt_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.srt")
85 | txt_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.txt")
86 | subs = pysrt.open(srt_path)
87 |
88 | with open(txt_path, 'w') as f:
89 | full_text = ""
90 | for sub in subs:
91 | sub.text = sub.text.replace("...", ".")
92 | full_text += sub.text + " "
93 | f.write(full_text.strip())
94 |
95 | def parse_srt_and_extract_frames(output_dir, topic_name):
96 | """Extract frames from video at subtitle timestamps and save with corresponding text.
97 |
98 | Args:
99 | output_dir (str): Directory containing the topic folders.
100 | topic_name (str): Name of the topic/video.
101 | """
102 | topic_name = topic_name.replace(" ", "_").lower()
103 | video_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.mp4")
104 | srt_path = os.path.join(output_dir, topic_name, f"{topic_name}_combined.srt")
105 | subs = pysrt.open(srt_path)
106 |
107 | # Create extract_images folder if it doesn't exist
108 | images_dir = os.path.join(output_dir, topic_name, "extract_images")
109 | if os.path.exists(images_dir):
110 | shutil.rmtree(images_dir)
111 | os.makedirs(images_dir)
112 |
113 | # Load the video file
114 | video = VideoFileClip(video_path)
115 |
116 | # Dictionary to store image-text pairs
117 | pairs = {}
118 |
119 | i = 0
120 | while i < len(subs):
121 | sub = subs[i]
122 | text = sub.text
123 | sub_indexes = [sub.index]
124 |
125 | # Check if we need to concatenate with next subtitle
126 | while i < len(subs) - 1 and not text.strip().endswith('.'):
127 | i += 1
128 | next_sub = subs[i]
129 | text += " " + next_sub.text
130 | sub_indexes.append(next_sub.index)
131 |
132 | # Get the end time of the last concatenated subtitle
133 | end_time = sub.end.to_time()
134 | # Convert end time to seconds
135 | end_time_seconds = end_time.hour * 3600 + end_time.minute * 60 + end_time.second + end_time.microsecond / 1e6
136 |
137 | # Save the frame as an image in extract_images folder
138 | frame_path = os.path.join(images_dir, f"{sub.index}.jpg")
139 | video.save_frame(frame_path, t=end_time_seconds)
140 |
141 | # Save the subtitle text to a txt file
142 | text_path = os.path.join(images_dir, f"{sub.index}.txt")
143 | with open(text_path, 'w') as f:
144 | f.write(text)
145 |
146 | # Add pair to dictionary
147 | pairs[str(sub.index)] = {
148 | "image_path": f"{sub.index}.jpg",
149 | "text": text,
150 | "text_path": f"{sub.index}.txt",
151 | "srt_index": sub_indexes,
152 | }
153 |
154 | i += 1
155 |
156 | # Save pairs to json file
157 | import json
158 | json_path = os.path.join(images_dir, "pairs.json")
159 | with open(json_path, 'w') as f:
160 | json.dump(pairs, f, indent=4)
161 |
162 | # Close the video file
163 | video.close()
164 |
165 | def extract_trasnscript(video_path):
166 | """Extract transcript from video audio using Google Speech Recognition.
167 |
168 | Args:
169 | video_path (str): Path to the video file.
170 |
171 | Returns:
172 | str: Transcribed text from the video audio.
173 |
174 | Raises:
175 | FileNotFoundError: If video file does not exist.
176 | """
177 | if not os.path.exists(video_path):
178 | raise FileNotFoundError(f"Video file not found: {video_path}")
179 |
180 | clip = VideoFileClip(video_path)
181 |
182 | # write the video to a temporary audio file
183 | audio_path = os.path.join(os.path.dirname(video_path), "audio.wav")
184 | clip.audio.write_audiofile(audio_path)
185 |
186 | try:
187 | # extract the subtitles from the audio file
188 | recognizer = sr.Recognizer()
189 | with sr.AudioFile(audio_path) as source:
190 | audio = recognizer.record(source)
191 | return recognizer.recognize_google(audio)
192 | finally:
193 | # clean up the temporary audio file
194 | if os.path.exists(audio_path):
195 | os.remove(audio_path)
196 |
197 | if __name__ == "__main__":
198 | import argparse
199 |
200 | def process_all_topics(output_folder):
201 | """Process all topic folders in the output directory.
202 |
203 | Args:
204 | output_folder (str): Directory containing the topic folders.
205 | """
206 | # Only get immediate subdirectories
207 | topics = [d for d in os.listdir(output_folder)
208 | if os.path.isdir(os.path.join(output_folder, d))]
209 |
210 | for topic in topics:
211 | print(f"\nProcessing topic: {topic}")
212 | try:
213 | parse_srt_to_text(output_folder, topic)
214 | parse_srt_and_extract_frames(output_folder, topic)
215 | except Exception as e:
216 | print(f"Error processing {topic}: {str(e)}")
217 | continue
218 |
219 | # Set up argument parser
220 | parser = argparse.ArgumentParser(description='Process video files and extract frames with subtitles')
221 | parser.add_argument('--output_dir', type=str, default="output",
222 | help='Directory containing the topic folders')
223 |
224 | args = parser.parse_args()
225 |
226 | # Process topics using provided output directory
227 | process_all_topics(args.output_dir)
--------------------------------------------------------------------------------
/data/thb_easy/chemistry.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "theorem": "The Aufbau Principle",
4 | "description": "Electrons fill atomic orbitals in order of increasing energy levels. This means the lowest energy orbitals are filled first, followed by higher energy orbitals. This helps in predicting electronic configuration and understanding the properties of elements.",
5 | "difficulty": "Easy",
6 | "remark": "Fundamental principle for building the electron configurations of atoms and understanding the periodic table.",
7 | "subfield": "Atomic Structure"
8 | },
9 | {
10 | "theorem": "The Law of Conservation of Mass",
11 | "description": "In a closed system, the total mass of the reactants is equal to the total mass of the products. This implies that matter is neither created nor destroyed during a chemical reaction, only transformed. This principle is fundamental for understanding stoichiometry.",
12 | "difficulty": "Easy",
13 | "remark": "A cornerstone of chemistry, this principle allows us to balance chemical equations and make quantitative predictions.",
14 | "subfield": "Chemical Reactions and Stoichiometry"
15 | },
16 | {
17 | "theorem": "The Octet Rule",
18 | "description": "Atoms tend to gain, lose, or share electrons in order to achieve a full outer shell of eight electrons (or two in the case of hydrogen and some other exceptions). This explains the bonding behaviour of most main group elements, guiding the formations of compounds.",
19 | "difficulty": "Easy",
20 | "remark": "Simple and powerful rule to understand the formations of chemical bonds and predict molecules' structures.",
21 | "subfield": "Chemical Bonding"
22 | },
23 | {
24 | "theorem": "Alkali metals",
25 | "description": "The alkali metals consist of the chemical elements lithium (Li), sodium (Na), potassium (K), rubidium (Rb), caesium (Cs), and francium (Fr).",
26 | "difficulty": "Easy",
27 | "remark": "",
28 | "subfield": "Periodic Table and Elements"
29 | },
30 | {
31 | "theorem": "Distillation",
32 | "description": "In chemistry, Distillation is among the most useful methods available to chemists for separating the parts of a liquid. A process that relies on a cycle of heating, vaporization, condensing and cooling. A liquid of a lower boiling point will vaporize before a liquid of higher boiling point.",
33 | "difficulty": "Easy",
34 | "remark": "",
35 | "subfield": "Separation Techniques"
36 | },
37 | {
38 | "theorem": "Crystallization",
39 | "description": "In chemistry, Crystallization, or crystallisation, is the process of atoms or molecules arranging into a well-defined, rigid crystal lattice in order to minimize their energetic state. The smallest entity of a crystal lattice is called a unit cell, which can accept atoms or molecules to grow a macroscopic crystal.",
40 | "difficulty": "Easy",
41 | "remark": "",
42 | "subfield": "Solid State Chemistry"
43 | },
44 | {
45 | "theorem": "Titration",
46 | "description": "Titration is a common laboratory method of quantitative chemical analysis to determine the concentration of an identified analyte. A reagent, termed the titrant or titrator, is prepared as a standard solution of known concentration and volume.",
47 | "difficulty": "Easy",
48 | "remark": "",
49 | "subfield": "Analytical Chemistry"
50 | },
51 | {
52 | "theorem": "Ionic Compound",
53 | "description": "An ionic compound is a chemical compound composed of ions. Ionic compounds are formed by the electrostatic attraction between positively charged cations and negatively charged anions.",
54 | "difficulty": "Easy",
55 | "remark": "",
56 | "subfield": "Chemical Bonding"
57 | },
58 | {
59 | "theorem": "Noble gas",
60 | "description": "The noble gases are so named because they rarely react with other elements. Helium, neon, argon, krypton, xenon and radon atoms all have a full outer valence shell of electrons, which makes them quite unreactive.",
61 | "difficulty": "Easy",
62 | "remark": "",
63 | "subfield": "Periodic Table and Elements"
64 | },
65 | {
66 | "theorem": "Transition Metal",
67 | "description": "Transition metal, any of various chemical elements that have valence electrons—i.e., electrons that can participate in the formation of chemical bonds—in two shells instead of only one.",
68 | "difficulty": "Easy",
69 | "remark": "",
70 | "subfield": "Periodic Table and Elements"
71 | },
72 | {
73 | "theorem": "Balance Chemical Equation",
74 | "description": "A balanced equation is an equation for a chemical reaction in which the number of atoms for each element in the reaction and the total charge are the same for both the reactants and the products.",
75 | "difficulty": "Easy",
76 | "remark": "",
77 | "subfield": "Chemical Reactions and Stoichiometry"
78 | },
79 | {
80 | "theorem": "Combustion analysis",
81 | "description": "Combustion analysis is a method used in both organic chemistry and analytical chemistry to determine the elemental composition (more precisely empirical formula) of a pure organic compound by combusting the sample under conditions where the resulting combustion products can be quantitatively analyzed.",
82 | "difficulty": "Easy",
83 | "remark": "",
84 | "subfield": "Analytical Chemistry"
85 | },
86 | {
87 | "theorem": "Oxidation",
88 | "description": "In chemistry, the oxidation state, or oxidation number, is the hypothetical charge of an atom if all of its bonds to other atoms were fully ionic. It describes the degree of oxidation of an atom in a chemical compound. Conceptually, the oxidation state may be positive, negative or zero.",
89 | "difficulty": "Easy",
90 | "remark": "",
91 | "subfield": "Redox Chemistry"
92 | },
93 | {
94 | "theorem": "First law of thermodynamics",
95 | "description": "The first law of thermodynamics is a formulation of the law of conservation of energy in the context of thermodynamic processes. The law distinguishes two principal forms of energy transfer, heat and thermodynamic work, that modify a thermodynamic system containing a constant amount of matter.",
96 | "difficulty": "Easy",
97 | "remark": "",
98 | "subfield": "Thermodynamics"
99 | },
100 | {
101 | "theorem": "Hess's Law",
102 | "description": "The enthalpy change of a reaction is independent of the path taken from reactants to products. This allows the calculation of enthalpy changes for reactions that cannot be easily measured directly by using a series of reactions with known enthalpy changes. The overall enthalpy change is the sum of enthalpy changes of individual steps.",
103 | "difficulty": "Easy",
104 | "remark": "Useful for calculating enthalpy changes of complex reactions. It's based on the state function of enthalpy.",
105 | "subfield": "Thermodynamics"
106 | },
107 | {
108 | "theorem": "The Ideal Gas Law",
109 | "description": "The product of the pressure and volume of an ideal gas is proportional to the product of the amount of gas and its absolute temperature: PV = nRT. This law describes the behavior of ideal gases and helps predict their volume, pressure, temperature, or amount under given conditions.",
110 | "difficulty": "Easy",
111 | "remark": "Ideal for understanding the behaviour of gases, often used in stoichiometry related to gases. Assumes no intermolecular forces or particle volume.",
112 | "subfield": "Gas Laws"
113 | },
114 | {
115 | "theorem": "Charles's Law",
116 | "description": "Charles's law (also known as the law of volumes) is an experimental gas law that describes how gases tend to expand when heated.",
117 | "difficulty": "Easy",
118 | "remark": "",
119 | "subfield": "Gas Laws"
120 | },
121 | {
122 | "theorem": "Gay-Lussac's Law",
123 | "description": "Gay-Lussac's law usually refers to Joseph-Louis Gay-Lussac's law of combining volumes of gases, discovered in 1808 and published in 1809.",
124 | "difficulty": "Easy",
125 | "remark": "",
126 | "subfield": "Gas Laws"
127 | },
128 | {
129 | "theorem": "pH Scale Definition",
130 | "description": "pH is a measure of the hydrogen ion concentration in a solution.",
131 | "difficulty": "Easy",
132 | "remark": "",
133 | "subfield": "Acid-Base Chemistry"
134 | },
135 | {
136 | "theorem": "Van't Hoff Equation",
137 | "description": "The Van 't Hoff equation has been widely utilized to explore the changes in state functions in a thermodynamic system. ",
138 | "difficulty": "Easy",
139 | "remark": "",
140 | "subfield": "Chemical Kinetics"
141 | }
142 | ]
--------------------------------------------------------------------------------
/data/thb_medium/comp_sci.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "theorem": "The Halting Problem (Undecidability)",
4 | "description": "There is no general algorithm (or program) that can determine, for any arbitrary computer program and its input, whether the program will eventually halt (stop) or run forever.",
5 | "difficulty": "Medium",
6 | "remark": "A core concept in theoretical computer science. Introduces the idea of limits of computation. Understanding the proof (usually using diagonalization) is key to grasp the concept. Usually taught in discrete math or Theory of Computation.",
7 | "subfield": "Theory of Computation"
8 | },
9 | {
10 | "theorem": "The Time Complexity of Binary Search",
11 | "description": "In the worst case, searching for an element in a sorted array using binary search requires O(log n) time, where n is the number of elements in the array. This efficiency arises from repeatedly dividing the search interval in half.",
12 | "difficulty": "Medium",
13 | "remark": "Highlights the power of divide-and-conquer algorithms. Illustrates why sorted data structures are often essential. Requires understanding of logarithms",
14 | "subfield": "Algorithms"
15 | },
16 | {
17 | "theorem": "The Correctness of Simple Sorting Algorithm (e.g. Bubble Sort)",
18 | "description": "Bubble sort repeatedly compares adjacent elements and swaps them if they are in the wrong order. We can formally prove that after n-1 passes, the array will be sorted. Proving it involves demonstrating that the largest element is 'bubbled' to the end of the array in each pass, by using loop invariants.",
19 | "difficulty": "Medium",
20 | "remark": "Demonstrates how to formally analyze simple algorithms for their correctness and requires some understanding of loop invariants. Useful for introduction to proofs in algorithm.",
21 | "subfield": "Algorithms"
22 | },
23 | {
24 | "theorem": "The Church-Turing Thesis",
25 | "description": "All models of computation that we know can compute what is Turing computable. In other words, if an effective method (algorithm) for solving a problem exists at all, then a Turing machine can also compute a solution, and vice versa.",
26 | "difficulty": "Medium",
27 | "remark": "A fundamental principle in theoretical computer science. It defines the limit of computability. It links different computational models to a single class. Requires an understanding of the Turing Machine.",
28 | "subfield": "Theory of Computation"
29 | },
30 | {
31 | "theorem": "The Relationship between Recursion and Induction",
32 | "description": "Recursive functions can be proven correct and analyzed with mathematical induction. The base case of induction matches the base case in the recursive function. The induction step corresponds to the recursive step.",
33 | "difficulty": "Medium",
34 | "remark": "Connects two key concepts in Computer Science. Illustrates how induction can be used to prove correctness of recursive algorithms and mathematical induction can be used to define recursive functions. Important for formal analysis.",
35 | "subfield": "Programming Fundamentals"
36 | },
37 | {
38 | "theorem": "Chroma Subsampling",
39 | "description": "Chroma subsampling is a technique used in digital image processing to reduce the amount of data required to represent an image. It involves reducing the number of color channels or samples per pixel in an image, typically by using fewer bits for chroma (color) information.",
40 | "difficulty": "Medium",
41 | "remark": "",
42 | "subfield": "Image Processing"
43 | },
44 | {
45 | "theorem": "Median filtering",
46 | "description": "Median filtering is a non-linear digital filtering technique that is used to remove noise from an image or signal. It works by replacing each pixel with the median value of the pixels in its neighborhood.",
47 | "difficulty": "Medium",
48 | "remark": "",
49 | "subfield": "Image Processing"
50 | },
51 | {
52 | "theorem": "Shannon Lower bound",
53 | "description": "The Shannon Lower Bound refers to a theoretical limit in information theory that represents the minimum entropy or information required to encode a random source. It is tied to the Shannon Entropy, which quantifies the average information content of a random variable. Here's a breakdown of what it means:",
54 | "difficulty": "Medium",
55 | "remark": "",
56 | "subfield": "Information Theory"
57 | },
58 | {
59 | "theorem": "Dijkstra's algorithm",
60 | "description": "maintains a priority queue of vertices in the graph ordered by distance from the start and repeatedly selects the next shortest path to an unconnected part of the graph",
61 | "difficulty": "Medium",
62 | "remark": "",
63 | "subfield": "Graph Theory"
64 | },
65 | {
66 | "theorem": "K-means clustering",
67 | "description": "K-means clustering is a method of clustering that partitions the dataset into K clusters, where each cluster is represented by its centroid or center point.",
68 | "difficulty": "Medium",
69 | "remark": "",
70 | "subfield": "Machine Learning"
71 | },
72 | {
73 | "theorem": "K-nearest neighbors",
74 | "description": "K-nearest neighbors (KNN) is a simple and effective classification algorithm that works by finding the K closest data points in the training set to a new data point and then assigning the class label based on the majority class of these neighbors.",
75 | "difficulty": "Medium",
76 | "remark": "",
77 | "subfield": "Machine Learning"
78 | },
79 | {
80 | "theorem": "Gradient descent",
81 | "description": "Common optimization algorithm used in machine learning to minimize a loss function.",
82 | "difficulty": "Medium",
83 | "remark": "",
84 | "subfield": "Machine Learning"
85 | },
86 | {
87 | "theorem": "Markov Decision Processes",
88 | "description": "A Markov decision process (MDP) refers to a stochastic decision-making process that uses a mathematical framework to model the decision-making of a dynamic system.",
89 | "difficulty": "Medium",
90 | "remark": "",
91 | "subfield": "Machine Learning"
92 | },
93 | {
94 | "theorem": "ALOHA network",
95 | "description": "ALOHA is basically a multiple access protocol which describes how all the terminals can access a medium without interfering at all with one another or even colliding. It operates at the data-link layer.",
96 | "difficulty": "Medium",
97 | "remark": "",
98 | "subfield": "Computer Networks"
99 | },
100 | {
101 | "theorem": "Discrete Cosine Transform",
102 | "description": "A discrete cosine transform (DCT) expresses a finite sequence of data points in terms of a sum of cosine functions oscillating at different frequencies.",
103 | "difficulty": "Medium",
104 | "remark": "",
105 | "subfield": "Digital Signal Processing"
106 | },
107 | {
108 | "theorem": "Master Theorem",
109 | "description": "The master theorem is used in calculating the time complexity of recurrence relations (divide and conquer algorithms) in a simple and quick way.",
110 | "difficulty": "Medium",
111 | "remark": "",
112 | "subfield": "Algorithms"
113 | },
114 | {
115 | "theorem": "Fast Fourier Transform",
116 | "description": "A fast Fourier transform (FFT) is an algorithm that computes the Discrete Fourier Transform (DFT) of a sequence, or its inverse (IDFT).",
117 | "difficulty": "Medium",
118 | "remark": "",
119 | "subfield": "Digital Signal Processing"
120 | },
121 | {
122 | "theorem": "SR latch",
123 | "description": "S-R latches i.e., Set-Reset latches are the simplest form of latches and are implemented using two inputs: S (Set) and R (Reset).",
124 | "difficulty": "Medium",
125 | "remark": "",
126 | "subfield": "Digital Logic"
127 | },
128 | {
129 | "theorem": "TCP Reno",
130 | "description": "TCP Reno is a classic congestion control algorithm that was introduced in the early 1990s. It uses a mechanism called additive increase multiplicative decrease (AIMD) to adjust the TCP window size, which is the amount of data that can be sent without waiting for an acknowledgment.",
131 | "difficulty": "Medium",
132 | "remark": "",
133 | "subfield": "Computer Networks"
134 | },
135 | {
136 | "theorem": "Chord P2P Network and finger table",
137 | "description": "Chord addresses peer addressability and peer findability and message routability challenges by organizing all peers in the P2P network into a single virtual ring.",
138 | "difficulty": "Medium",
139 | "remark": "",
140 | "subfield": "Computer Networks"
141 | }
142 | ]
--------------------------------------------------------------------------------
/data/thb_hard/math.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "theorem": "Taylor's theorem",
4 | "description": "Taylor's theorem gives an approximation of a k-times differentiable function around a given point by a polynomial of degree k, called the k-th order Taylor polynomial.",
5 | "difficulty": "Hard",
6 | "remark": "",
7 | "subfield": "Calculus"
8 | },
9 | {
10 | "theorem": "Simpson's rule",
11 | "description": "In numerical integration, Simpson's rules are several approximations for definite integrals, named after Thomas Simpson.",
12 | "difficulty": "Hard",
13 | "remark": "",
14 | "subfield": "Numerical Analysis"
15 | },
16 | {
17 | "theorem": "Velocity vector",
18 | "description": "Velocity is the speed in combination with the direction of motion of an object.",
19 | "difficulty": "Hard",
20 | "remark": "",
21 | "subfield": "Vector Calculus"
22 | },
23 | {
24 | "theorem": "Double Riemann sum",
25 | "description": "A double Riemann sum is a mathematical method used to approximate the value of a double integral over a two-dimensional region.",
26 | "difficulty": "Hard",
27 | "remark": "",
28 | "subfield": "Multivariable Calculus"
29 | },
30 | {
31 | "theorem": "Fubini's theorem",
32 | "description": "Fubini's Theorem is a fundamental result in calculus that allows the evaluation of a double integral as an iterated integral, provided certain conditions are met. It simplifies the computation of double integrals over a rectangular or general region by breaking them into two single integrals.",
33 | "difficulty": "Hard",
34 | "remark": "",
35 | "subfield": "Multivariable Calculus"
36 | },
37 | {
38 | "theorem": "Jacobian matrix and determinant",
39 | "description": "In vector calculus, the Jacobian matrix of a vector-valued function of several variables is the matrix of all its first-order partial derivatives.",
40 | "difficulty": "Hard",
41 | "remark": "",
42 | "subfield": "Vector Calculus"
43 | },
44 | {
45 | "theorem": "Green's theorem",
46 | "description": "Green's theorem is used to integrate the derivatives in a particular plane.",
47 | "difficulty": "Hard",
48 | "remark": "",
49 | "subfield": "Vector Calculus"
50 | },
51 | {
52 | "theorem": "Stokes' theorem",
53 | "description": "relates the flux integral over a surface S to a line integral around the boundary C of the surface S",
54 | "difficulty": "Hard",
55 | "remark": "",
56 | "subfield": "Vector Calculus"
57 | },
58 | {
59 | "theorem": "Burnside's Lemma",
60 | "description": "Burnside's Lemma, also known as the Cauchy-Frobenius Lemma or the Orbit-Counting Theorem, is a fundamental result in combinatorics that deals with counting the number of distinct elements in a set under the action of a group. It is particularly useful in counting problems involving symmetries and permutations.\n\nThe lemma is named after the British mathematician William Burnside, who contributed significantly to the development of group theory.\n\nStatement of Burnside's Lemma:\n\nLet G be a finite group that acts on a finite set X. Then the number of distinct orbits of X under the action of G is given by:\n\n(1/|G|) * \u03a3 |Fix(g)|\n\nwhere |G| is the order of the group (i.e., the number of elements in G), the sum is taken over all elements g in G, and |Fix(g)| is the number of elements in X that are fixed by the action of g (i.e., the number of elements x in X such that g(x) = x).\n\nIn simpler terms, Burnside's Lemma states that the number of distinct orbits (or equivalence classes) in a set under the action of a group can be found by averaging the number of fixed points of each group element.\n\nBurnside's Lemma is often used in combinatorial problems where we need to count the number of distinct configurations of an object, taking into account its symmetries. By applying the lemma, we can avoid overcounting configurations that are equivalent under a given symmetry operation.",
61 | "difficulty": "Hard",
62 | "remark": "",
63 | "subfield": "Group Theory"
64 | },
65 | {
66 | "theorem": "Lah Number",
67 | "description": "In mathematics, the (signed and unsigned) Lah numbers are coefficients expressing rising factorials in terms of falling factorials and vice versa.",
68 | "difficulty": "Hard",
69 | "remark": "",
70 | "subfield": "Combinatorics"
71 | },
72 | {
73 | "theorem": "Ramsey's theorem",
74 | "description": "Ramsey's theorem essentially states that if a structure (such as a graph or a set of numbers) is large enough, then some kind of order or regularity will always emerge, no matter how it is arranged or colored.",
75 | "difficulty": "Hard",
76 | "remark": "",
77 | "subfield": "Combinatorics"
78 | },
79 | {
80 | "theorem": "Schwarz Lemma theorem",
81 | "description": "Schwarz Lemma is a fundamental result in complex analysis that provides a bound on the behavior of holomorphic functions (i.e., complex-differentiable functions) in the unit disk. It is named after the German mathematician Hermann Schwarz.\n\nStatement of Schwarz Lemma:\n\nLet f be a holomorphic function on the open unit disk D = {z \u2208 \u2102 : |z| < 1} such that f(0) = 0 and |f(z)| \u2264 1 for all z \u2208 D. Then, for all z \u2208 D, the following inequalities hold:\n\n1. |f(z)| \u2264 |z|\n2. |f'(0)| \u2264 1\n\nMoreover, if equality holds for some z \u2260 0 (i.e., |f(z)| = |z|) or |f'(0)| = 1, then f is a rotation, i.e., f(z) = e^(i\u03b8)z for some real \u03b8.\n\nThe Schwarz Lemma has several important consequences and generalizations in complex analysis, such as the Riemann Mapping Theorem and the Pick's Lemma. It is a powerful tool for understanding the behavior of holomorphic functions in the unit disk and provides a way to compare the size of their derivatives at the origin.",
82 | "difficulty": "Hard",
83 | "remark": "",
84 | "subfield": "Complex Analysis"
85 | },
86 | {
87 | "theorem": "Cauchy Riemann Theorem",
88 | "description": "The Cauchy-Riemann Theorem is a fundamental result in complex analysis, a branch of mathematics that studies functions of complex variables. It provides necessary and sufficient conditions for a complex function to be holomorphic (complex differentiable) in a given domain.",
89 | "difficulty": "Hard",
90 | "remark": "",
91 | "subfield": "Complex Analysis"
92 | },
93 | {
94 | "theorem": "Morera's Theorem",
95 | "description": "Morera's theorem, named after Giacinto Morera, gives an important criterion for proving that a function is holomorphic.",
96 | "difficulty": "Hard",
97 | "remark": "",
98 | "subfield": "Complex Analysis"
99 | },
100 | {
101 | "theorem": "Catalan-Mingantu Number",
102 | "description": "The Catalan numbers are a sequence of natural numbers that occur in various counting problems, often involving recursively defined objects. ",
103 | "difficulty": "Hard",
104 | "remark": "",
105 | "subfield": "Combinatorics"
106 | },
107 | {
108 | "theorem": "Liouville's theorem",
109 | "description": "Liouville's theorem states that: The density of states in an ensemble of many identical states with different initial conditions is constant along every trajectory in phase space. It states that if one constructs an ensemble of paths, the probability density along the trajectory remains constant.",
110 | "difficulty": "Hard",
111 | "remark": "",
112 | "subfield": "Complex Analysis"
113 | },
114 | {
115 | "theorem": "Derangement Formula",
116 | "description": "In combinatorial mathematics, a derangement is a permutation of the elements of a set in which no element appears in its original position.",
117 | "difficulty": "Hard",
118 | "remark": "",
119 | "subfield": "Combinatorics"
120 | },
121 | {
122 | "theorem": "Delian problem",
123 | "description": "Doubling the cube, also known as the Delian problem, is an ancient geometric problem. Given the edge of a cube, the problem requires the construction of the edge of a second cube whose volume is double that of the first.",
124 | "difficulty": "Hard",
125 | "remark": "",
126 | "subfield": "Geometry"
127 | },
128 | {
129 | "theorem": "Polya's Enumeration Theorem",
130 | "description": "Pólya's Enumeration Theorem, also known as Pólya's Counting Theorem, is a powerful result in combinatorics used to count distinct arrangements or configurations of objects that are invariant under a group of symmetries.",
131 | "difficulty": "Hard",
132 | "remark": "",
133 | "subfield": "Combinatorics"
134 | },
135 | {
136 | "theorem": "Cauchy's theorem",
137 | "description": "Cauchy's Theorem is a fundamental result in group theory, a branch of abstract algebra. It provides a condition under which a finite group contains an element of a specific order. It is named after the French mathematician Augustin-Louis Cauchy.",
138 | "difficulty": "Hard",
139 | "remark": "",
140 | "subfield": "Group Theory"
141 | }
142 | ]
--------------------------------------------------------------------------------
/data/thb_easy/math.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "theorem": "The Pythagorean Theorem",
4 | "description": "In a right-angled triangle, the square of the length of the hypotenuse (the side opposite the right angle) is equal to the sum of the squares of the lengths of the other two sides. If a and b are the lengths of the legs and c is the length of the hypotenuse, then a\u00b2 + b\u00b2 = c\u00b2.",
5 | "difficulty": "Easy",
6 | "remark": "Fundamental theorem in geometry; widely used in various fields.",
7 | "subfield": "Geometry"
8 | },
9 | {
10 | "theorem": "Properties of Kites",
11 | "description": "A kite is a quadrilateral with two pairs of adjacent, congruent sides. In geometry, kites have several unique properties that distinguish them from other quadrilaterals. Here are some of the key properties of kites:\n\n1. Two pairs of adjacent sides are congruent: In a kite, there are two distinct pairs of adjacent sides that have equal length. This means that if one pair of sides has a length of 'a', the other pair will also have a length of 'a', and if the other pair has a length of 'b', the first pair will also have a length of 'b'.\n\n2. Diagonals are perpendicular: The diagonals of a kite intersect at a 90-degree angle, meaning they are perpendicular to each other.\n\n3. One diagonal is bisected: In a kite, one of the diagonals is bisected by the other diagonal, meaning it is divided into two equal parts. This property is true for the diagonal connecting the vertices between the congruent sides.\n\n4. One pair of opposite angles is congruent: In a kite, the angles between the congruent sides (the angles formed by the two pairs of equal sides) are congruent, meaning they have the same degree measure.\n\n5. Area: The area of a kite can be calculated using the lengths of its diagonals. If 'd1' and 'd2' are the lengths of the diagonals, the area of the kite is given by the formula: Area = (1/2) * d1 * d2.\n\n6. Circumscribed circle: A kite can have a circumscribed circle only if it is a rhombus (all sides are congruent) or a square (all sides and angles are congruent).\n\n7. Inscribed circle: A kite can have an inscribed circle only if it is a square (all sides and angles are congruent).\n\nThese properties make kites an interesting and unique type of quadrilateral in geometry.",
12 | "difficulty": "Easy",
13 | "remark": "Properties of kites are useful for solving geometry problems involving kites.",
14 | "subfield": "Geometry"
15 | },
16 | {
17 | "theorem": "Euler's formula",
18 | "description": "Euler's formula is a fundamental equation in complex analysis that establishes a deep connection between trigonometry and complex exponentials. It is named after the Swiss mathematician Leonhard Euler. The formula is given by:\n\ne^(ix) = cos(x) + i*sin(x)\n\nwhere e is the base of the natural logarithm (approximately 2.71828), i is the imaginary unit (i^2 = -1), x is a real number, and cos(x) and sin(x) are the trigonometric functions cosine and sine, respectively.\n\nEuler's formula demonstrates that complex exponentials can be expressed in terms of trigonometric functions, and vice versa. This relationship is particularly useful in various fields of mathematics, physics, and engineering, as it simplifies calculations involving complex numbers and trigonometric functions.\n\nOne of the most famous consequences of Euler's formula is Euler's identity, which is obtained by setting x = \u03c0 in the formula:\n\ne^(i\u03c0) + 1 = 0\n\nEuler's identity is considered one of the most beautiful equations in mathematics, as it combines five fundamental constants (e, i, \u03c0, 1, and 0) in a simple and elegant relationship.",
19 | "difficulty": "Easy",
20 | "remark": "Euler's formula is widely used in various fields, including engineering, physics, and computer science.",
21 | "subfield": "Complex Analysis"
22 | },
23 | {
24 | "theorem": "Laws of Exponents",
25 | "description": "The laws of exponents simplify the multiplication and division operations.",
26 | "difficulty": "Easy",
27 | "remark": "",
28 | "subfield": "Algebra"
29 | },
30 | {
31 | "theorem": "One-to-one function",
32 | "description": "a function for which each value of the output is associated with a unique input value",
33 | "difficulty": "Easy",
34 | "remark": "",
35 | "subfield": "Functions"
36 | },
37 | {
38 | "theorem": "Inverse function",
39 | "description": "For any one-to-one function f(x), the inverse is a function f^(-1)(x) such that f^(-1)(f(x))=x for all x in the domain of f; this also implies that f(f^(-1)(x))=x for all x in the domain of f^(-1)",
40 | "difficulty": "Easy",
41 | "remark": "",
42 | "subfield": "Functions"
43 | },
44 | {
45 | "theorem": "Remainder theorem",
46 | "description": "The remainder theorem states that when a polynomial p(x) is divided by a linear polynomial (x - a), then the remainder is equal to p(a).",
47 | "difficulty": "Easy",
48 | "remark": "",
49 | "subfield": "Algebra"
50 | },
51 | {
52 | "theorem": "Rational Zero Theorem",
53 | "description": "The rational root theorem is also known as the rational zero theorem (or) the rational zero test (or) rational test theorem and is used to determine the rational roots of a polynomial function. ",
54 | "difficulty": "Easy",
55 | "remark": "",
56 | "subfield": "Algebra"
57 | },
58 | {
59 | "theorem": "Product-to-sum formula",
60 | "description": "The product-to-sum formulas are a set of formulas from trigonometric formulas.",
61 | "difficulty": "Easy",
62 | "remark": "",
63 | "subfield": "Geometry"
64 | },
65 | {
66 | "theorem": "Heron's formula",
67 | "description": "Heron's formula is a formula that is used to find the area of a triangle when the lengths of all three sides are known.",
68 | "difficulty": "Easy",
69 | "remark": "",
70 | "subfield": "Geometry"
71 | },
72 | {
73 | "theorem": "De Moivre's Theorem",
74 | "description": "Formula used to find the nth power or nth roots of a complex number; states that, for a positive integer n, z^n is found by raising the modulus to the nth power and multiplying the angles by n",
75 | "difficulty": "Easy",
76 | "remark": "",
77 | "subfield": "Complex Analysis"
78 | },
79 | {
80 | "theorem": "Cramer's Rule",
81 | "description": "a method for solving systems of equations that have the same number of equations as variables using determinants",
82 | "difficulty": "Easy",
83 | "remark": "",
84 | "subfield": "Algebra"
85 | },
86 | {
87 | "theorem": "Angle of rotation",
88 | "description": "An angle of rotation is the measure of the amount that a figure is rotated about a fixed point called a point of rotation.",
89 | "difficulty": "Easy",
90 | "remark": "",
91 | "subfield": "Geometry"
92 | },
93 | {
94 | "theorem": "Similar Triangles Theorem",
95 | "description": "Two triangles are similar if their corresponding angles are equal and their corresponding sides are proportional.",
96 | "difficulty": "Easy",
97 | "remark": "",
98 | "subfield": "Geometry"
99 | },
100 | {
101 | "theorem": "Congruent Triangles Theorem",
102 | "description": "Two triangles are congruent if they satisfy any of these criteria: SSS (Side-Side-Side), SAS (Side-Angle-Side), ASA (Angle-Side-Angle), AAS (Angle-Angle-Side), or HL (Hypotenuse-Leg) for right triangles.",
103 | "difficulty": "Easy",
104 | "remark": "",
105 | "subfield": "Geometry"
106 | },
107 | {
108 | "theorem": "Geometric Sequence",
109 | "description": "For a geometric sequence with the first term a, common ratio r, and n terms, the sum is: S_n = a * (1 - r^n) / (1 - r) for r != 1",
110 | "difficulty": "Easy",
111 | "remark": "",
112 | "subfield": "Sequences and Series"
113 | },
114 | {
115 | "theorem": "Arithmetic Sequence",
116 | "description": "For an arithmetic sequence with the first term a, common difference d, and n terms, the sum is: S_n = (n/2) * (2a + (n-1)d)",
117 | "difficulty": "Easy",
118 | "remark": "",
119 | "subfield": "Sequences and Series"
120 | },
121 | {
122 | "theorem": "Permutation",
123 | "description": "The term permutation refers to a mathematical calculation of the number of ways a particular set can be arranged.",
124 | "difficulty": "Easy",
125 | "remark": "",
126 | "subfield": "Combinatorics"
127 | },
128 | {
129 | "theorem": "Directrix",
130 | "description": "a line perpendicular to the axis of symmetry of a parabola; a line such that the ratio of the distance between the points on the conic and the focus to the distance to the directrix is constant.",
131 | "difficulty": "Easy",
132 | "remark": "",
133 | "subfield": "Conic Sections"
134 | },
135 | {
136 | "theorem": "Eccentricity",
137 | "description": "the eccentricity of a conic section is a non-negative real number that uniquely characterizes its shape.",
138 | "difficulty": "Easy",
139 | "remark": "",
140 | "subfield": "Conic Sections"
141 | }
142 | ]
--------------------------------------------------------------------------------