├── .github └── workflows │ ├── jekyll-gh-pages.yml │ └── pypi-publish.yml ├── .gitignore ├── README.md ├── gsm-infinite ├── __init__.py ├── app.py ├── config.sh ├── data │ ├── realistic │ │ ├── DependencyGraph.py │ │ ├── Igsm │ │ │ └── readme.txt │ │ ├── StructureGraphThree.py │ │ ├── __init__.py │ │ ├── datagenerationworker.py │ │ ├── forward_generator.py │ │ ├── reverse_generator.py │ │ ├── simple_dummy_namesone.py │ │ ├── simple_dummy_namesthree.py │ │ ├── simple_dummy_namestwo.py │ │ ├── simple_dummy_text.py │ │ ├── simple_names_three.py │ │ ├── solver.py │ │ └── test_generate3.sh │ └── symbolic │ │ ├── __init__.py │ │ ├── generate_payload.py │ │ ├── generate_symbolic.py │ │ ├── generate_symbolic.sh │ │ └── utils.py ├── pred │ ├── __init__.py │ ├── eval_realistic.py │ ├── eval_symbolic.py │ ├── model_handler.py │ ├── no_rag_pipeline.py │ └── pred.py ├── preprocess.py └── run.sh ├── index.html ├── pyproject.toml ├── requirements.txt └── static ├── Triforce.ttf ├── css ├── bulma-carousel.min.css ├── bulma-slider.min.css ├── bulma.css.map.txt ├── bulma.min.css ├── fontawesome.all.min.css └── index.css ├── images ├── DALL·E 2024-09-02 20.52.33 - A cute but more accurate representation of the International Space Station (ISS) floating in space. The ISS should be depicted with recognizable featu.webp ├── Fast.png ├── GPU.png ├── GSMQQ.gif ├── GSMmouse.svg ├── GSMmouse2.svg ├── Hierarchical.png ├── Hierarchy.png ├── Idea.png ├── InternationalSpaceStation.png ├── Llama.png ├── Observation.png ├── Simplerspace.webp ├── Sys_readme.png ├── Telescope.png ├── TriForce.gif ├── computationalgraph.png ├── computationgraph.png ├── computgraph.png ├── cosmonautllama.png ├── cra.png ├── demo.png ├── evaluationfronfigure.png ├── facinfinity.webp ├── forwardre.png ├── introductin.png ├── introduction.pdf ├── introduction.png ├── iss.webp ├── lim.png ├── locality.png ├── methodsillustration.png ├── miscellaneous.png ├── mouseeatingcookie.png ├── mousehuggingface.png ├── mousetelescope.png ├── probwebsite.png ├── rag.png ├── rag22.png ├── repeatedsampling.png ├── retrieval.png ├── rockets.png ├── rockets.webp ├── simpleiss.png ├── sirius-symbol.svg ├── siriuslogo.png ├── siriuslogo.webp ├── siriusmoti.png ├── sparsity_top4096.png ├── sys.png ├── triforce.png ├── twomooone.png ├── twostars.webp └── twostarts.webp ├── js ├── bulma-carousel.js ├── bulma-carousel.min.js ├── bulma-slider.js ├── bulma-slider.min.js ├── fontawesome.all.min.js └── index.js ├── pdfs └── sample.pdf └── videos └── TriForce.mp4 /.github/workflows/jekyll-gh-pages.yml: -------------------------------------------------------------------------------- 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages 2 | name: Deploy Jekyll with GitHub Pages dependencies preinstalled 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["main"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: false 23 | 24 | jobs: 25 | # Build job 26 | build: 27 | runs-on: ubuntu-latest 28 | steps: 29 | - name: Checkout 30 | uses: actions/checkout@v4 31 | - name: Setup Pages 32 | uses: actions/configure-pages@v5 33 | - name: Build with Jekyll 34 | uses: actions/jekyll-build-pages@v1 35 | with: 36 | source: ./ 37 | destination: ./_site 38 | - name: Upload artifact 39 | uses: actions/upload-pages-artifact@v3 40 | 41 | # Deployment job 42 | deploy: 43 | environment: 44 | name: github-pages 45 | url: ${{ steps.deployment.outputs.page_url }} 46 | runs-on: ubuntu-latest 47 | needs: build 48 | steps: 49 | - name: Deploy to GitHub Pages 50 | id: deployment 51 | uses: actions/deploy-pages@v4 52 | -------------------------------------------------------------------------------- /.github/workflows/pypi-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | release-build: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - uses: actions/setup-python@v5 18 | with: 19 | python-version: "3.x" 20 | 21 | - name: Install dependencies 22 | run: pip install build wheel 23 | 24 | - name: Build release distributions 25 | run: python -m build 26 | 27 | - name: Upload distributions 28 | uses: actions/upload-artifact@v4 29 | with: 30 | name: release-dists 31 | path: dist/ 32 | 33 | pypi-publish: 34 | runs-on: ubuntu-latest 35 | needs: release-build 36 | permissions: 37 | id-token: write # Required for trusted publishing 38 | environment: 39 | name: pypi 40 | url: https://pypi.org/project/gsm-infinite/${{ github.event.release.name }} # Update with your project name 41 | 42 | steps: 43 | - name: Retrieve release distributions 44 | uses: actions/download-artifact@v4 45 | with: 46 | name: release-dists 47 | path: dist/ 48 | 49 | - name: Publish release distributions to PyPI 50 | uses: pypa/gh-action-pypi-publish@release/v1 51 | with: 52 | packages-dir: dist/ 53 | password: ${{ secrets.PYPI_API_TOKEN }} # Use a secret for your PyPI API token -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .DS_Store 3 | realistic/.DS_Store 4 | symbolic/.DS_Store 5 | static/.DS_Store 6 | datasets/ 7 | results/ 8 | dist/ 9 | *.egg-info -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

GSM-Infinite: How Do Your LLMs Behave over Infinitely
Increasing Context Length and Reasoning Complexity? 3 |

4 |
5 | GSM-Infinite is a reasoning benchmarks that is completely synthetic without LLMs in the loop, capable of generating problems of context length and reasoning complexity that are infinitely scalable. Inspired by Physics of Language Model 2.1, we use abstract grade school level math problems in to computational graph and through graph manipulation and graph-language mapping to generate LLM-readable (also, Human-readable) problems. 6 | 7 |
8 | Yang Zhou*1, 9 | Hongyi Liu*1, 10 | Zhuoming Chen1, 11 | Yuandong Tian2, 12 | Beidi Chen1, 13 |
14 |
15 | *Equal Contributions, order decided by a coin flip 16 |
17 |
18 | 1Carnegie Mellon University 19 | 2Meta AI 20 |
21 | 22 |
23 | [Paper] | [Blog] | [🤗Leaderboards at huggingface] | [Datasets] 24 |
25 | 26 |

Limitation of Existing Long-context Benchmark

27 |
28 | 29 |
RAG can robustly solve most of today popular long-context benchmarks
30 |
31 | In this paper, we first point out the insufficiencies in long-context LLMs evaluation, highlighting: 32 |
    33 |
  1. 34 | Lack of reasoning complexity: Most tasks rely on text retrieval, text summarization, QA. 35 |
  2. 36 |
  3. 37 | Lack of context length: Some tasks are inherently short-context tasks but are bloated to long-context through injecting semantically irrelevant noise. 38 |
  4. 39 |
  5. 40 | Lack of scalability: Admittedly, tasks with high reasoning complexity and high information density exists, but these tasks requires huge human-effort to gather, dedup, and verify. The result is lack of scalability in quantity, making it hard to prevail in the community. 41 |
  6. 42 |
43 | First two is further studied in the above figure. These tasks are not tasks that only long-context LLMs can do. We show that RAG are robust and have performance on par with long-context LLMs. However, given the high efficiency to build and run inference on RAG systems, RAG is more favorable in practice on these tasks. Therefore, we have the following problem to solve. 44 |

45 | Problem Statement: How can we develop a benchmark that contains sufficient problems at every fine-grained level of reasoning difficulty, from easy retrieval tasks to infinitely hard challenges, while providing infinitely customizable context length with high information density? 46 |

47 | 48 |

GSM-Infinite

49 |

50 | We present GSM-Infinite, a benchmark with test examples completely synthesized, thus can scaled up infinitely in both context length and reasoning complexity. 51 |

52 | 53 |

54 | Importantly, the context length generated is high in information density, which can be seen from the following study in the Figure. 55 |

56 |
57 | 58 |
RAG methods performance on GSM-Infinite.
59 |
60 | (a) and (b) show that retriever, all-mpnet-base-v2, cannot differentiate the close noise we generate from the essential block as they can comfortably do for vt in RULER. (c) and (d) show that retriever's performance is much lower in both Medium and Hard subset of GSM-Infinite than long-context LLMs, showing that the tasks are only Long-context LLM solvable. 61 | 62 |

Leaderboards

63 | Here we provided both the Zero Noise and Long-context leader board. Since the Leaderboard will be updated from time to time. Please be sure to checkout our huggingface space website for the latest models and updates. 64 | 65 | Firstly, we evaluated 18 models on GSM-Infinite Zero Noise. The performance are as follows. 66 | | Models | Symbolic | Medium | Hard | 1st<50% op on Hard | 1st<10% op on Hard | Avg. Acc op≤30 on Hard | Average↑ | 67 | |-----------------------|----------|---------|---------|------------|------------|----------------|-----------| 68 | | **DeepSeek-R1** | 7280.0 | 9750.85 | 8573.8 | 100 | >130 | 0.9427 | 8534.88 | 69 | | **GPT-o3-mini** | 6690.0 | 8335.66 | 5769.96 | 70 | 110 | 0.9423 | 6931.88 | 70 | | **GPT-o1-mini** | 5060.0 | 6054.91 | 3738.43 | 50 | 90 | 0.8397 | 4951.11 | 71 | | **DeepSeek-V3** | 4310.0 | 4100.81 | 2407.86 | 24 | 55 | 0.6669 | 3606.22 | 72 | | **QwQ-32B-preview** | 3530.0 | 3205.75 | 1846.19 | 21 | 50 | 0.5403 | 2860.65 | 73 | | **Gemini-1.5-Pro-002**| 2547.0 | 3659.59 | 2318.28 | 26 | 45 | 0.6924 | 2841.62 | 74 | | **Claude-3.5-Sonnet** | 2161.0 | 3281.8 | 2115.79 | 26 | 40 | 0.6758 | 2519.53 | 75 | | **Mistral-Large** | 2332.5 | 2879.92 | 2310.49 | 25 | 40 | 0.6645 | 2507.64 | 76 | | **Qwen2.5-72B-Instruct** | 2048.0 | 2496.81 | 2016.38 | 21 | 40 | 0.5433 | 2187.06 | 77 | | **GPT-4o** | 2379.0 | 2457.37 | 1451.54 | 18 | 30 | 0.5064 | 2095.97 | 78 | | **Gemini-1.5-Flash-002** | 1970.0 | 1478.75 | 1274.25 | 19 | 30 | 0.4460 | 1574.33 | 79 | | **Llama3.1-70B-Instruct** | 1769.0 | 1650.25 | 1205.25 | 10 | 30 | 0.4314 | 1541.50 | 80 | | **MiniMax-Text-01** | 1618.5 | 1712.64 | 1178.51 | 14 | 30 | 0.4213 | 1503.22 | 81 | | **GPT-4o-mini** | 1389.0 | 1406.5 | 913.89 | 12 | 22 | 0.3094 | 1236.46 | 82 | | **Claude-3.5-Haiku** | 897.0 | 1053.16 | 784.34 | 10 | 22 | 0.2910 | 911.50 | 83 | | **Qwen2.5-7B-Instruct** | 786.95 | 886.75 | 618.5 | 7 | 16 | 0.2257 | 764.07 | 84 | | **Llama3.1-8B-Instruct** | 462.0 | 786.5 | 606.5 | 6 | 17 | 0.2212 | 618.30 | 85 | | **Jamba-1.5-Large** | 856.0 | 485.13 | 466.4 | 6 | 26 | 0.1828 | 602.51 | 86 | 87 | Secondly, we evaluated 11 models on GSM-Infinite long-context tasks. 88 | | Model | 8K | 16K | 32K | Average↑ | 89 | |---------------------------|---------|---------|---------|----------| 90 | | **gemini-1.5-pro-002** | 1182.43 | 896.31 | 812.96 | 963.9 | 91 | | **qwen-2.5-72b-instruct** | 927.33 | 681.53 | 563.65 | 724.17 | 92 | | **mistral-large-2411** | 914.49 | 563.73 | 319.21 | 599.14 | 93 | | **deepseek-v3** | 935.10 | 477.02 | 313.66 | 575.2 | 94 | | **gemini-1.5-flash-002** | 673.88 | 476.72 | 377.38 | 509.3 | 95 | | **llama-3.1-70b-instruct** | 479.00 | 394.50 | 355.5 | 409.67 | 96 | | **minimax-text-01** | 481.32 | 359.56 | 325.95 | 388.94 | 97 | | **gpt-4o-mini** | 401.00 | 337.81 | 275.63 | 338.15 | 98 | | **qwen-2.5-7b-instruct** | 248.00 | 211.50 | 196.17 | 218.56 | 99 | | **llama-3.1-8b-instruct** | 183.67 | 149.50 | 109.45 | 147.54 | 100 | 101 | We present detailed description of data generation and evaluation findings uniquely benefited from the design of GSM-Infinite. Please make sure to checkout our paper. 102 | 103 |

Overview of the Code Organization

104 | From the paper, we have three subtasks in GSM-Infinite. We have Symbolic, Medium, and Hard. The classification is mainly about semantic hierarchy. More details in the paper. Below is a menu of the organization of files and folders. 105 | 106 | - [Symbolic](#symbolic) 107 | - [Data](#symbolic-data) 108 | - [Predictions](#symbolic-predictions) 109 | - [Realistic](#realistic) 110 | - [Data](#realistic-data) 111 | - [Predictions](#realistic-predictions) 112 | 113 | The main components of the code are data generation and model evaluation scripts. Since there are some subtle differences between these two. We separate them into two different folders. 114 | 115 |

Environment Installation

116 | 117 | ``` 118 | pip install -r requirements.txt 119 | ``` 120 | If you want to serve model locally, please install platforms of your choice (vllm, sglang, etc.). 121 | 122 |

Generation and Evaluation of Symbolic Dataset

123 | We provide a `run.sh` script to sample from and evaluate on the Symbolic dataset. Below is a quick walkthrough: 124 | 125 | 1. **Navigate to the Symbolic directory** 126 | 127 | ```bash 128 | cd symbolic 129 | ``` 130 | In this repo, we recommend running evaluations with api calling mechanism. Even for open-source models, we advise either deploy models locally via vllm/sglang, or using api providers such as DeepInfra, etc. 131 | 132 | 2. **Edit `config.sh`** 133 | - Set `run_sampling` to `true` if you want to sample new predictions from your model. Set to `false` to skip sampling. 134 | ```bash 135 | run_sampling=true # Set to true to sample from the model 136 | ``` 137 | - Set `run_evaluation` to `true` if you want to evaluate existing predictions (this requires an evaluation model, typically a smaller LLM, specified in `EVAL_OPENAI_*` variables). Set to `false` to skip evaluation. 138 | ```bash 139 | run_evaluation=true # Set to true to evaluate existing predictions 140 | ``` 141 | - Configure the sampling model details (if `run_sampling=true`): 142 | - `backend_type`: `'openai'`, `'gemini'`, or `'anthropic'` 143 | - `SAMPLER_OPENAI_BASE_URL` and `SAMPLER_OPENAI_API_KEY` (or `GEMINI_API_KEY` or `ANTHROPIC_API_KEY`) 144 | - `model_name`, `dataset_base` (if you want to use custom datasets) 145 | - `num_samples`, `temperature`, `max_tokens`, etc. 146 | - Configure the evaluation model details (if `run_evaluation=true`): 147 | - `EVAL_OPENAI_BASE_URL` and `EVAL_OPENAI_API_KEY` (for an OpenAI-compatible evaluation model) 148 | 149 | 3. **Run the script** 150 | ```bash 151 | bash -x run.sh 152 | ``` 153 | 154 | 4. **Check your output** 155 | - New predictions (if sampled) will be saved in folder `datasets`. 156 | - Evaluation results (if generated) will be in folder `results`. 157 | 158 | If you want to generate the data yourself, please feel free to look into the `data` folder, and look into the `generate_symbolic.sh`. Then, fill in your dataset settings (name, ops, context length). Try hitting 159 | ``` 160 | bash -x generate_symbolic.sh 161 | ``` 162 | 163 |

Generation and Evaluation of Realistic Dataset

164 | 165 | h3>Generation and Evaluation of Realistic Dataset 166 | The Realistic dataset (Medium and Hard subsets) uses a similar process: 167 | 168 | 1. **Navigate to the Realistic directory** 169 | 170 | ```bash 171 | cd realistic 172 | ``` 173 | 174 | 2. **Edit `config.sh`** 175 | - Fill in your API keys, backend type, model name, etc. 176 | - Adjust `lengths` and `dataset_suffixes` to control which subsets and context lengths to process. 177 | 178 | - Configure the model details 179 | - `backend_type`: `'openai'`, `'gemini'`, or `'anthropic'` 180 | - `OPENAI_BASE_URL` and `OPENAI_API_KEY` (or `GEMINI_API_KEY` or `ANTHROPIC_API_KEY`) 181 | - `model_name`, `dataset_base` (if you want to use custom datasets) 182 | - `num_samples`, `temperature`, `max_tokens`, etc. 183 | 184 | 3. **Run the script** 185 | ```bash 186 | bash -x run.sh 187 | ``` 188 | This script samples predictions and then automatically evaluates them using `eval_realistic.py`. Note that there is no separate `run_evaluation` flag here; evaluation always follows sampling. 189 | 190 | 4. **Check your output** 191 | - New predictions will be saved in folder `datasets`. 192 | - Evaluation results will be in folder `results`. 193 | 194 | If you want to generate the data yourself, please feel free to look into the `data` folder, and look into the `test_generate3.sh`. Then, fill in your dataset settings (ops, context length). Try hitting 195 | ``` 196 | bash -x test_generate3.sh 197 | ``` 198 | 199 |

Citation

200 | If you think our code base is useful, please consider citing the code through the following bibtxt. 201 | 202 | ``` 203 | @misc{zhou2025gsminfinitellmsbehaveinfinitely, 204 | title={GSM-Infinite: How Do Your LLMs Behave over Infinitely Increasing Context Length and Reasoning Complexity?}, 205 | author={Yang Zhou and Hongyi Liu and Zhuoming Chen and Yuandong Tian and Beidi Chen}, 206 | year={2025}, 207 | eprint={2502.05252}, 208 | archivePrefix={arXiv}, 209 | primaryClass={cs.CL}, 210 | url={https://arxiv.org/abs/2502.05252}, 211 | } 212 | ``` 213 | -------------------------------------------------------------------------------- /gsm-infinite/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/gsm-infinite/__init__.py -------------------------------------------------------------------------------- /gsm-infinite/app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pandas as pd 3 | import plotly.graph_objects as go 4 | import numpy as np 5 | import uuid 6 | from collections import defaultdict 7 | from scipy import integrate 8 | 9 | # ----------------------------- 10 | # 1. App Configuration 11 | # ----------------------------- 12 | 13 | # Set the page configuration 14 | st.set_page_config( 15 | page_title="GSM-Infinite Data Viewer", 16 | layout="wide", 17 | initial_sidebar_state="expanded", 18 | ) 19 | 20 | # Title of the app 21 | st.title("GSM-Infinite Results Viewer") 22 | 23 | # ----------------------------- 24 | # 2. Data Loading and Processing 25 | # ----------------------------- 26 | 27 | @st.cache_data 28 | def load_data(): 29 | """Load and process the benchmark results from CSV.""" 30 | try: 31 | df = pd.read_csv('results/processed_results.csv') 32 | return df 33 | except FileNotFoundError: 34 | st.error("The file 'processed_results.csv' was not found. Please run preprocess.py first.") 35 | st.stop() 36 | 37 | df = load_data() 38 | 39 | # Initialize session state for storing selected series 40 | if 'selected_series' not in st.session_state: 41 | st.session_state.selected_series = [] 42 | 43 | # ----------------------------- 44 | # 3. Series Selection Interface 45 | # ----------------------------- 46 | 47 | st.sidebar.header("Add New Series") 48 | 49 | # a. Dataset Selection 50 | datasets = sorted(df['dataset'].unique()) 51 | selected_dataset = st.sidebar.selectbox("Select Dataset", options=datasets, key="dataset_selector") 52 | 53 | # b. Model Selection based on Selected Dataset 54 | models = sorted(df[df['dataset'] == selected_dataset]['model'].unique()) 55 | selected_model = st.sidebar.selectbox("Select Model", options=models, key="model_selector") 56 | 57 | # c. Length Selection 58 | lengths = sorted(df['length'].unique()) 59 | selected_length = st.sidebar.selectbox("Select Length", options=lengths, key="length_selector", index=lengths.index(0) if 0 in lengths else 0) 60 | 61 | # d. Subset Filters (if available) 62 | has_subset_info = 'template' in df.columns and 'mode' in df.columns 63 | 64 | # Function to calculate area under curve 65 | def calculate_auc(x, y): 66 | """Calculate area under curve using trapezoidal rule and multiply by 100""" 67 | if len(x) < 2: 68 | return 0 69 | # Sort by x values to ensure proper integration 70 | sorted_pairs = sorted(zip(x, y), key=lambda pair: pair[0]) 71 | sorted_x, sorted_y = zip(*sorted_pairs) 72 | auc = np.trapz(sorted_y, sorted_x) * 100 73 | return auc 74 | 75 | if has_subset_info: 76 | # Get available templates and modes for the selected dataset/model/length 77 | filtered_base = df[ 78 | (df['dataset'] == selected_dataset) & 79 | (df['model'] == selected_model) & 80 | (df['length'] == selected_length) 81 | ] 82 | 83 | # Get all available templates and modes 84 | available_templates = [t for t in sorted(filtered_base['template'].unique()) if t != 'default'] 85 | available_modes = [m for m in sorted(filtered_base['mode'].unique()) if m != 'default'] 86 | 87 | # Replace 'default' with 'all' in the options 88 | template_options = ['all'] + available_templates 89 | mode_options = ['all'] + available_modes 90 | 91 | # Multi-select for templates 92 | selected_templates = st.sidebar.multiselect( 93 | "Select Templates", 94 | options=template_options, 95 | default=["all"], 96 | key="template_selector" 97 | ) 98 | 99 | # Multi-select for modes 100 | selected_modes = st.sidebar.multiselect( 101 | "Select Modes", 102 | options=mode_options, 103 | default=["all"], 104 | key="mode_selector" 105 | ) 106 | # Process 'all' selection - convert to actual template/mode values 107 | if 'all' in selected_templates: 108 | selected_templates = available_templates 109 | if 'all' in selected_modes: 110 | selected_modes = available_modes 111 | # Import a color library if you want more sophisticated palettes 112 | import plotly.express as px 113 | # e. Series Color 114 | # Define color palettes 115 | color_palettes = { 116 | "Default": ['blue', 'red', 'green', 'purple', 'orange', 'teal', 'pink', 'brown', 'gray', 'black'], 117 | "Plotly": px.colors.qualitative.Plotly, 118 | "Pastel": px.colors.qualitative.Pastel, 119 | "Dark": px.colors.qualitative.Dark24, 120 | "Light": px.colors.qualitative.Light24 121 | } 122 | 123 | # Select palette first 124 | selected_palette = st.sidebar.selectbox( 125 | "Color Palette", 126 | options=list(color_palettes.keys()), 127 | key="palette_selector" 128 | ) 129 | 130 | # Then select color from that palette 131 | palette_colors = color_palettes[selected_palette] 132 | default_color_index = len(st.session_state.selected_series) % len(palette_colors) 133 | selected_color = st.sidebar.selectbox( 134 | "Series Color", 135 | options=palette_colors, 136 | index=default_color_index, 137 | key="color_selector" 138 | ) 139 | # f. Add Series Button 140 | if st.sidebar.button("Add Series to Plot"): 141 | # Create a filter for the selected series 142 | series_filter = { 143 | 'dataset': selected_dataset, 144 | 'model': selected_model, 145 | 'length': selected_length 146 | } 147 | 148 | # Filter the data based on dataset, model, and length 149 | filtered_data = df[ 150 | (df['dataset'] == selected_dataset) & 151 | (df['model'] == selected_model) & 152 | (df['length'] == selected_length) 153 | ] 154 | 155 | if has_subset_info and selected_templates and selected_modes: 156 | # Further filter by selected templates and modes 157 | filtered_data = filtered_data[ 158 | (filtered_data['template'].isin(selected_templates)) & 159 | (filtered_data['mode'].isin(selected_modes)) 160 | ] 161 | 162 | # Group by N and calculate weighted average based on num_examples 163 | grouped_data = filtered_data.groupby('N').apply( 164 | lambda x: np.average(x['accuracy'], weights=x['num_examples']), 165 | include_groups=False # Add this parameter to fix the deprecation warning 166 | ).reset_index() 167 | grouped_data.columns = ['N', 'accuracy'] 168 | 169 | # Sort by N 170 | grouped_data = grouped_data.sort_values('N') 171 | 172 | # Create a label for the series 173 | template_str = ", ".join(selected_templates) if len(selected_templates) < len(available_templates) else f"all templates" 174 | mode_str = ", ".join(selected_modes) if len(selected_modes) < len(available_modes) else f"all modes" 175 | label = f"{selected_dataset}: {selected_model} (len={selected_length}, {template_str}, {mode_str})" 176 | 177 | # Calculate AUC 178 | auc = calculate_auc(grouped_data['N'].tolist(), grouped_data['accuracy'].tolist()) 179 | 180 | # Add to session state 181 | st.session_state.selected_series.append({ 182 | 'id': str(uuid.uuid4()), 183 | 'label': label, 184 | 'filter': { 185 | 'dataset': selected_dataset, 186 | 'model': selected_model, 187 | 'length': selected_length, 188 | 'templates': selected_templates, 189 | 'modes': selected_modes 190 | }, 191 | 'x': grouped_data['N'].tolist(), 192 | 'y': grouped_data['accuracy'].tolist(), 193 | 'color': selected_color, 194 | 'auc': auc 195 | }) 196 | else: 197 | # If no subset info or no templates/modes selected, use all data 198 | filtered_data = filtered_data.sort_values('N') 199 | 200 | # Create a label for the series 201 | label = f"{selected_dataset}: {selected_model} (len={selected_length})" 202 | 203 | # Calculate AUC 204 | auc = calculate_auc(filtered_data['N'].tolist(), filtered_data['accuracy'].tolist()) 205 | 206 | # Add to session state 207 | st.session_state.selected_series.append({ 208 | 'id': str(uuid.uuid4()), 209 | 'label': label, 210 | 'filter': series_filter, 211 | 'x': filtered_data['N'].tolist(), 212 | 'y': filtered_data['accuracy'].tolist(), 213 | 'color': selected_color, 214 | 'auc': auc 215 | }) 216 | 217 | st.sidebar.success(f"Added series: {label}") 218 | 219 | # ----------------------------- 220 | # 4. Series Management 221 | # ----------------------------- 222 | 223 | st.header("Selected Series") 224 | 225 | # Display the selected series in a table 226 | if st.session_state.selected_series: 227 | # Create a DataFrame for the selected series 228 | series_df = pd.DataFrame([ 229 | { 230 | 'Series ID': s['id'][:6], # Truncate UUID for display 231 | 'Label': s['label'], 232 | 'Points': len(s['x']), 233 | 'Min Op': min(s['x']) if s['x'] else 'N/A', 234 | 'Max Op': max(s['x']) if s['x'] else 'N/A', 235 | 'Min Accuracy': min(s['y']) if s['y'] else 'N/A', 236 | 'Max Accuracy': max(s['y']) if s['y'] else 'N/A', 237 | 'Avg Accuracy': np.mean(s['y']) if s['y'] else 'N/A', 238 | 'AUC (×100)': round(s['auc'], 2), 239 | 'Color': s['color'] 240 | } 241 | for s in st.session_state.selected_series 242 | ]) 243 | 244 | # Display the table 245 | st.dataframe(series_df, use_container_width=True) 246 | 247 | # Add a button to clear all series 248 | if st.button("Clear All Series"): 249 | st.session_state.selected_series = [] 250 | st.rerun() 251 | 252 | # Add a button to remove selected series 253 | series_to_remove = st.selectbox( 254 | "Select Series to Remove", 255 | options=[s['label'] for s in st.session_state.selected_series], 256 | key="series_to_remove" 257 | ) 258 | 259 | if st.button("Remove Selected Series"): 260 | st.session_state.selected_series = [ 261 | s for s in st.session_state.selected_series if s['label'] != series_to_remove 262 | ] 263 | st.rerun() 264 | else: 265 | st.info("No series selected. Use the sidebar to add series to the plot.") 266 | 267 | # ----------------------------- 268 | # 5. Main Plot 269 | # ----------------------------- 270 | 271 | if st.session_state.selected_series: 272 | st.header("Accuracy vs. Op") 273 | 274 | # Display Options 275 | col1, col2, col3 = st.columns(3) 276 | with col1: 277 | show_average = st.checkbox("Show Average Line", value=True) 278 | with col2: 279 | line_width = st.slider("Line Width", min_value=1, max_value=5, value=2) 280 | with col3: 281 | marker_size = st.slider("Marker Size", min_value=4, max_value=12, value=8) 282 | 283 | # Create the main plot 284 | fig = go.Figure() 285 | 286 | # Add each series to the plot 287 | for series in st.session_state.selected_series: 288 | fig.add_trace(go.Scatter( 289 | x=series['x'], 290 | y=series['y'], 291 | mode='lines+markers', 292 | name=f"{series['label']} (AUC={round(series['auc'], 2)})", 293 | line=dict(width=line_width, color=series['color']), 294 | marker=dict(size=marker_size, color=series['color']) 295 | )) 296 | 297 | # Add average line if requested 298 | if show_average and len(st.session_state.selected_series) > 1: 299 | # Collect all N values 300 | all_n_values = sorted(set(n for series in st.session_state.selected_series for n in series['x'])) 301 | 302 | # Calculate average accuracy for each N 303 | avg_accuracies = [] 304 | for n in all_n_values: 305 | accuracies = [] 306 | for series in st.session_state.selected_series: 307 | if n in series['x']: 308 | idx = series['x'].index(n) 309 | accuracies.append(series['y'][idx]) 310 | 311 | if accuracies: 312 | avg_accuracies.append(np.mean(accuracies)) 313 | else: 314 | avg_accuracies.append(None) 315 | 316 | # Calculate average of individual AUCs instead of AUC of the average line 317 | avg_auc = np.mean([series['auc'] for series in st.session_state.selected_series]) 318 | 319 | 320 | # Add average line to plot 321 | fig.add_trace(go.Scatter( 322 | x=all_n_values, 323 | y=avg_accuracies, 324 | mode='lines+markers', 325 | name=f'Average (AUC={round(avg_auc, 2)})', 326 | line=dict(width=line_width+1, dash='dash', color='black'), 327 | marker=dict(size=marker_size+2, color='black') 328 | )) 329 | 330 | # Update layout 331 | fig.update_layout( 332 | title=f"Accuracy vs. Op", 333 | xaxis_title="Op", 334 | yaxis_title="Accuracy", 335 | legend_title="Series", 336 | hovermode='closest', 337 | height=600 338 | ) 339 | 340 | # Display the plot 341 | st.plotly_chart(fig, use_container_width=True) 342 | 343 | # ----------------------------- 344 | # 6. Series Details 345 | # ----------------------------- 346 | 347 | st.header("Series Details") 348 | 349 | # Let user select a series to view details 350 | selected_detail_series = st.selectbox( 351 | "Select Series to View Details", 352 | options=[s['label'] for s in st.session_state.selected_series], 353 | key="detail_series_selector" 354 | ) 355 | 356 | # Find the selected series 357 | detail_series = next((s for s in st.session_state.selected_series if s['label'] == selected_detail_series), None) 358 | 359 | if detail_series: 360 | # Create a DataFrame with the detailed data 361 | detail_df = pd.DataFrame({ 362 | 'Op': detail_series['x'], 363 | 'Accuracy': detail_series['y'] 364 | }) 365 | 366 | # Display the data 367 | st.dataframe(detail_df, use_container_width=True) 368 | 369 | # Add download button for this series 370 | csv = detail_df.to_csv(index=False).encode('utf-8') 371 | st.download_button( 372 | label=f"Download {selected_detail_series} Data", 373 | data=csv, 374 | file_name=f"gsm_infinite_{selected_detail_series.replace(' ', '_')}.csv", 375 | mime='text/csv', 376 | ) 377 | 378 | # ----------------------------- 379 | # 7. Download All Data 380 | # ----------------------------- 381 | 382 | if st.session_state.selected_series: 383 | st.header("Download All Data") 384 | 385 | # Prepare data for all series 386 | all_series_data = [] 387 | for series in st.session_state.selected_series: 388 | for i, (n, acc) in enumerate(zip(series['x'], series['y'])): 389 | all_series_data.append({ 390 | 'Series': series['label'], 391 | 'Op': n, 392 | 'Accuracy': acc, 393 | 'AUC': series['auc'] 394 | }) 395 | 396 | all_data_df = pd.DataFrame(all_series_data) 397 | 398 | # Download button for all data 399 | all_csv = all_data_df.to_csv(index=False).encode('utf-8') 400 | st.download_button( 401 | label="Download All Series Data", 402 | data=all_csv, 403 | file_name="gsm_infinite_all_series_data.csv", 404 | mime='text/csv', 405 | ) -------------------------------------------------------------------------------- /gsm-infinite/config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Configure your API Keys and URLs, Leave it blank if not necessary 4 | # You can use environment variables here as well 5 | # If you choose openai backend compatible backend, please fill SAMPLER_OPENAI_BASE_URL and SAMPLER_OPENAI_API_KEY with openai base url and api key of the backend 6 | backend_type='openai' # can be 'openai', 'gemini' and 'anthropic' 7 | SAMPLER_OPENAI_BASE_URL='http://127.0.0.1:30000/v1' 8 | SAMPLER_OPENAI_API_KEY='EXAMPLE API KEY' 9 | GEMINI_API_KEY='' 10 | ANTHROPIC_API_KEY='' 11 | 12 | # To evaluate symbolic subset, you should first launch an openai compatible backend. 13 | # We use Qwen/Qwen2.5-7B-Instruct as our parser to extract the answer. 14 | # Fill EVAL_OPENAI_BASE_URL and EVAL_OPENAI_API_KEY with openai base url and api key of the backend 15 | EVAL_OPENAI_BASE_URL='http://127.0.0.1:30000/v1' 16 | EVAL_OPENAI_API_KEY='EXAMPLE API KEY' 17 | 18 | # Control sampling and evaluation (can be set from command line) 19 | run_sampling=true # Set to "true" to run sampling, "false" to skip 20 | run_evaluation=true # Set to "true" to run evaluation, "false" to skip 21 | run_symbolic_evaluation=false # Set to "true" to ONLY run symbolic evaluation 22 | run_realistic_evaluation=false # Set to "true" to ONLY run realistic evaluation 23 | 24 | # Model and Dataset Configuration 25 | model_name='Qwen/Qwen2.5-7B-Instruct' # API model name 26 | dataset_base='InfiniAILab/gsm_infinite' # Base name for the dataset 27 | save_name='qwen-2.5-7b-instruct' # Model name for saving the results 28 | 29 | # Sampling Settings 30 | num_samples=1 31 | temperature_symbolic=1.0 # Temperature for symbolic 32 | temperature_realistic=0.0 # Temperature for realistic 33 | max_tokens=4096 34 | 35 | # Batch size and example limit per op 36 | batch_size=200 37 | limit_symbolic=100 # Limit for symbolic 38 | limit_realistic=200 # Limit for realistic 39 | 40 | 41 | # Lengths to process (can be numbers or strings like '8k') 42 | lengths=( 43 | "0" 44 | "8k" 45 | "16k" 46 | "32k" 47 | ) 48 | 49 | # Dataset suffixes 50 | dataset_suffixes=( 51 | "symbolic" 52 | "medium" 53 | "hard" 54 | ) 55 | 56 | # Operation Range Configuration (Per length and suffix). if empty, the subset will be skipped. 57 | declare -A ops_config 58 | # Example configurations: 59 | ops_config["0_symbolic"]='{"start": 1, "end": 50, "stride": 1}' 60 | ops_config["8k_symbolic"]='{"start": 1, "end": 30, "stride": 1}' 61 | ops_config["16k_symbolic"]='{"start": 1, "end": 20, "stride": 1}' 62 | ops_config["32k_symbolic"]='{"start": 1, "end": 10, "stride": 1}' 63 | 64 | ops_config["0_medium"]='{"start": 2, "end": 30, "stride": 1}' 65 | ops_config["8k_medium"]='{"start": 2, "end": 30, "stride": 1}' 66 | ops_config["16k_medium"]='{"start": 2, "end": 30, "stride": 1}' 67 | ops_config["32k_medium"]='{"start": 2, "end": 30, "stride": 1}' 68 | 69 | ops_config["0_hard"]='{"start": 2, "end": 30, "stride": 1}' 70 | ops_config["8k_hard"]='{"start": 2, "end": 30, "stride": 1}' 71 | ops_config["16k_hard"]='{"start": 2, "end": 30, "stride": 1}' 72 | ops_config["32k_hard"]='{"start": 2, "end": 30, "stride": 1}' 73 | 74 | 75 | # Filter Configuration (JSON string, only used for realistic) 76 | filter_config='[ 77 | {"percentage": 0.4, "template": "crazy_zootopia", "mode": "normalforward"}, 78 | {"percentage": 0.05, "template": "movie_festival_awards", "mode": "normalforward"}, 79 | {"percentage": 0.05, "template": "teachers_in_school", "mode": "normalforward"}, 80 | {"percentage": 0.4, "template": "crazy_zootopia", "mode": "forwardreverse"}, 81 | {"percentage": 0.05, "template": "movie_festival_awards", "mode": "forwardreverse"}, 82 | {"percentage": 0.05, "template": "teachers_in_school", "mode": "forwardreverse"} 83 | ]' -------------------------------------------------------------------------------- /gsm-infinite/data/realistic/Igsm/readme.txt: -------------------------------------------------------------------------------- 1 | The data generated from executing test_generate3.sh will store data inside the folder. 2 | Now it is empty by default. 3 | -------------------------------------------------------------------------------- /gsm-infinite/data/realistic/StructureGraphThree.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import networkx as nx 4 | import matplotlib.pyplot as plt 5 | import pickle 6 | import os 7 | 8 | class Node: 9 | def __init__(self, id, leveli = None): 10 | self.id = id 11 | self.name = None 12 | 13 | # list in set 14 | # from others to node 15 | self.adjacent = set() 16 | self.leveli = leveli 17 | self.layercategory = None 18 | 19 | def __str__(self): 20 | return self.name 21 | 22 | def add_adjacent(self, node): 23 | self.adjacent.add(node) 24 | node.adjacent.add(self) 25 | 26 | class GraphStructure: 27 | def __init__(self, d, w0, w1): 28 | self.d = d 29 | self.w0 = w0 30 | self.w1 = w1 31 | self.e = 0 32 | self.layers = [[] for _ in range(d)] 33 | self.l = [self.w0 for _ in range(d)] 34 | self.layers_names = None 35 | self.nodes = set() 36 | 37 | def fill_ite(self): 38 | # Construct Gs with exactly li items on layer i ∈ [d]. 39 | for i in range(self.d): 40 | for j in range(self.l[i]): 41 | newnod = Node(i * self.w1 + j, i) 42 | self.nodes.add(newnod) 43 | self.layers[i].append(newnod) 44 | 45 | def construct_first(self): 46 | # foreachitemaineachlayeri≥2do 47 | # randomly select an item b in layer i − 1 and connect (a, b) in Gs. ⋄ this creates e− edges 48 | for i in range(1, self.d): 49 | for j in range(self.l[i]): 50 | nod = self.layers[i][j] 51 | idx22 = np.random.randint(0, len(self.layers[i - 1])) 52 | node_past_le = self.layers[i - 1][idx22] 53 | nod.add_adjacent(node_past_le) # add edge goes in nod and node_past_le 54 | self.e += 1 55 | 56 | def checklayerconnectivity(self, levelidx): 57 | """ 58 | output nodes that are not connected 59 | """ 60 | listofnodes = [] 61 | for i, node in enumerate(self.layers[levelidx]): 62 | if len(node.adjacent) == 0: 63 | listofnodes.append(i) 64 | return len(listofnodes), listofnodes 65 | 66 | def construct_second(self, e): 67 | # while number of edges < e do 68 | # randomly select two items a,b from adjacent layers to create an edge in Gs. 69 | countattempt = 0 70 | while self.e < e: 71 | countattempt += 1 72 | if countattempt > e * 10: 73 | raise ValueError("countattempt > needed") # This is needed to avoid infinite loop 74 | levelidx1 = random.randint(0, (self.d - 1) - 1) 75 | levelidx2 = levelidx1 + 1 76 | 77 | numberofnodes, listofnodes = self.checklayerconnectivity(levelidx1) 78 | if numberofnodes == 0: 79 | idx1 = random.randint(0, self.l[levelidx1] - 1) 80 | else: 81 | idx1 = listofnodes[random.randint(0, numberofnodes - 1)] 82 | idx2 = random.randint(0, self.l[levelidx2] - 1) 83 | 84 | node1 = self.layers[levelidx1][idx1] 85 | node2 = self.layers[levelidx2][idx2] 86 | 87 | appov = True 88 | for node in node2.adjacent: 89 | if node1.id == node.id: 90 | appov = False 91 | break 92 | if not appov: 93 | continue 94 | else: 95 | node2.add_adjacent(node1) 96 | self.e += 1 97 | 98 | def construct_extra(self, e): 99 | if self.d == 3: 100 | picknode = self.layers[2][0] 101 | for node in self.layers[1]: 102 | picknode.add_adjacent(node) 103 | 104 | def attachEnglish(self, hierarchical_categorizations = None, subcategories = None): 105 | # pick from d consecutive categories 106 | if self.d == 2 or self.d == 3: 107 | categorynames = hierarchical_categorizations[0][: self.d] 108 | else: 109 | raise ValueError("d must be 2 or 3") 110 | 111 | assert len(categorynames) == self.d 112 | self.layer_names = categorynames 113 | for node in self.nodes: 114 | node.layercategory = self.layer_names[node.leveli] 115 | 116 | for i in range(self.d): 117 | layer_category = categorynames[i] 118 | subcategoriestwo = subcategories[layer_category] 119 | choices = list(subcategoriestwo.keys()) 120 | subcategory = choices[random.randint(0, len(choices) - 1)] 121 | # pick li items from the subcategory 20 122 | if self.d == 2 or (self.d == 3 and i != 2): 123 | nodenames = random.sample(subcategories[layer_category][subcategory], self.l[i]) 124 | for node in self.layers[i]: 125 | node.name = nodenames.pop() 126 | else: 127 | # nodenames = ["Average Number of Newborn Children"] 128 | nodenames = subcategories[layer_category][subcategory] # it seems that sample doesn't want list of one element 129 | for node in self.layers[i]: 130 | node.name = nodenames[0] 131 | 132 | def draw(self): 133 | structure_graph = nx.Graph() 134 | pos = {} 135 | for i in range(self.d): 136 | for j, node in enumerate(self.layers[i]): 137 | structure_graph.add_node(node.name) 138 | pos[node.name] = (j, -i) 139 | 140 | added_edges = [] 141 | for node in self.nodes: 142 | for adj in node.adjacent: 143 | if (adj.id, node.id) in added_edges: 144 | continue 145 | else: 146 | added_edges.append((node.id, adj.id)) 147 | structure_graph.add_edge(node.name, adj.name, directed = False) 148 | 149 | plt.figure(figsize = (20, 10)) 150 | nx.draw(structure_graph, pos, with_labels = True, node_color = "lightblue", node_size = 5000, font_size = 10) 151 | plt.title("Structure Graph") 152 | plt.axis("off") 153 | 154 | for i in range(self.d): 155 | plt.text(-0.1, -(i + 0.1), self.layer_names[i], fontsize = 8, bbox=dict(facecolor='white', edgecolor='none', alpha=0.7)) 156 | 157 | filename = "structure_graph.png" 158 | plt.savefig(filename) 159 | 160 | def save_graph_structure(graph_structure, filename): 161 | """ 162 | Save the GraphStructure object to a file using pickle. 163 | 164 | :param graph_structure: The GraphStructure object to save 165 | :param filename: The name of the file to save to 166 | """ 167 | with open(filename, 'wb') as file: 168 | pickle.dump(graph_structure, file) 169 | print(f"GraphStructure saved to {filename}") 170 | 171 | def load_graph_structure(filename): 172 | """ 173 | Load a GraphStructure object from a file. 174 | 175 | :param filename: The name of the file to load from 176 | :return: The loaded GraphStructure object 177 | """ 178 | if not os.path.exists(filename): 179 | print(f"File {filename} does not exist.") 180 | return None 181 | 182 | with open(filename, 'rb') as file: 183 | graph_structure = pickle.load(file) 184 | print(f"GraphStructure loaded from {filename}") 185 | return graph_structure 186 | -------------------------------------------------------------------------------- /gsm-infinite/data/realistic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/gsm-infinite/data/realistic/__init__.py -------------------------------------------------------------------------------- /gsm-infinite/data/realistic/datagenerationworker.py: -------------------------------------------------------------------------------- 1 | from forward_generator import drawAll 2 | from reverse_generator import drawAllEquan 3 | 4 | import json 5 | import hashlib 6 | from tqdm import tqdm 7 | import multiprocessing as mp 8 | import random 9 | import numpy as np 10 | import argparse 11 | import os 12 | 13 | from transformers import AutoTokenizer 14 | from termcolor import colored 15 | 16 | from simple_names_three import message, messagetwo, messagethree 17 | 18 | generator = { 19 | "normalforward": drawAll, 20 | "forwardreverse": drawAllEquan, 21 | } 22 | 23 | tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") 24 | 25 | def work_function( 26 | op_set, 27 | ip_set, 28 | force, 29 | mod, 30 | number_range, 31 | target_length, 32 | listoperations, 33 | contextname, 34 | d, 35 | tokenizer, 36 | nums, 37 | identifier, 38 | ): 39 | for mode in ["normalforward", "forwardreverse"]: 40 | for template in ["crazy_zootopia", "teachers_in_school", "movie_festival_awards"]: 41 | files = [] 42 | num = nums[template] 43 | 44 | oplist = listoperations 45 | for op in oplist: 46 | filename = "Igsm/{}/{}/{}/".format(target_length, "medium" if d == 2 else "hard", op) 47 | filename += "igsm_op{}_ip{}_force_{}_{}.jsonl".format(op, ip_set, force, identifier) 48 | files.append(filename) 49 | print(filename) 50 | # print(num) 51 | items = [[] for _ in range(len(oplist))] 52 | lines = 0 53 | 54 | np.random.seed(identifier) 55 | random.seed(identifier) 56 | 57 | while True: 58 | try: 59 | problem_text, question_text, solution_text, op, id = generator[mode]( 60 | op_max = op_set, 61 | ip_max = ip_set, 62 | force = force, 63 | number_range = number_range, 64 | strictline = op_set, 65 | mod = mod, 66 | target_length = target_length, 67 | template = template, 68 | d = d, 69 | tokenizer = tokenizer, 70 | oplist = oplist, 71 | ) 72 | except: 73 | continue 74 | found = False 75 | for idx, ask_op in enumerate(oplist): 76 | if op == ask_op: 77 | found == True 78 | item = { 79 | "problem": problem_text, 80 | "question": question_text, 81 | "solution": solution_text, 82 | "op": op, 83 | "id": id, 84 | "template": template, 85 | "mode": mode, 86 | "length": target_length, 87 | "d": d, 88 | } 89 | items[idx].append(item) 90 | break 91 | 92 | lines = np.min([len(items[idx]) for idx in range(len(oplist))]) 93 | maxlines = np.max([len(items[idx]) for idx in range(len(oplist))]) 94 | print(colored("{}({}){} out of {}".format(lines, [len(items[idx]) for idx in range(len(oplist))], maxlines, num), "green"), flush = True) 95 | if lines > num: 96 | break 97 | if found == False: 98 | continue 99 | 100 | for idx, op in enumerate(oplist): 101 | filename = files[idx] 102 | os.makedirs(os.path.dirname(filename), exist_ok=True) 103 | file = open(filename, "a") 104 | file.write("\n".join([json.dumps(item) for item in items[idx]]) + "\n") 105 | file.close() 106 | 107 | def calculate_offset(num, numprocs): 108 | return (num + numprocs - 1) // numprocs 109 | 110 | if __name__ == "__main__": 111 | parser = argparse.ArgumentParser() 112 | parser.add_argument("--numprocs", type = int, default = 1) 113 | parser.add_argument("--opmax", type = int, default = 15) 114 | parser.add_argument("--ipmax", type = int, default = 20) 115 | parser.add_argument("--force", action = "store_true") 116 | parser.add_argument("--total", type = int, default = 1608) 117 | parser.add_argument("--number_range", type = int, default = 23) 118 | parser.add_argument("--mod", type = int, default = 23) 119 | parser.add_argument("--target_length", type = str, default = None) 120 | parser.add_argument("--listoperations", nargs = "+", type = int, default = [4]) 121 | parser.add_argument("--d", type = int, default = 2) 122 | args = parser.parse_args() 123 | print(args) 124 | 125 | numprocs = args.numprocs 126 | processes = [] 127 | op_max = args.opmax 128 | ip_max = args.ipmax 129 | force = args.force 130 | total = args.total 131 | number_range = args.number_range 132 | mod = args.mod 133 | target_length = args.target_length 134 | 135 | num = (total + numprocs - 1) // numprocs 136 | 137 | nums = { 138 | "crazy_zootopia": calculate_offset(args.total, args.numprocs), 139 | "teachers_in_school": calculate_offset((args.total // 2), args.numprocs), 140 | "movie_festival_awards": calculate_offset((args.total // 2), args.numprocs), 141 | } 142 | 143 | for i in range(numprocs): 144 | p = mp.Process(target = work_function, args = (op_max, ip_max, force, mod, number_range, target_length, args.listoperations, args.target_length, args.d, tokenizer, nums, i)) 145 | processes.append(p) 146 | p.start() 147 | 148 | for p in processes: 149 | p.join() 150 | 151 | print("processes joined") 152 | 153 | def merge_files(file1_path, file2_path, file3_path, output_path): 154 | # Open files 155 | with open(file1_path, 'r') as f1, open(file2_path, 'r') as f2, open(file3_path, 'r') as f3, open(output_path, 'w') as output: 156 | # Read lines from each file 157 | file1_lines = f1.readlines() 158 | file2_lines = f2.readlines() 159 | file3_lines = f3.readlines() 160 | 161 | # Indices for each file 162 | index1, index2, index3 = 0, 0, 0 163 | 164 | # Loop until we reach the end of the shortest file 165 | while index1 < len(file1_lines) and index2 < len(file2_lines) and index3 < len(file3_lines): 166 | # Add two lines from file1 167 | if index1 < len(file1_lines) - 1: 168 | output.write(file1_lines[index1]) 169 | output.write(file1_lines[index1 + 1]) 170 | index1 += 2 171 | elif index1 < len(file1_lines): # Case when only one line is left in file1 172 | output.write(file1_lines[index1]) 173 | index1 += 1 174 | 175 | # Add one line from file2 and file3 176 | if index2 < len(file2_lines): 177 | output.write(file2_lines[index2]) 178 | index2 += 1 179 | if index3 < len(file3_lines): 180 | output.write(file3_lines[index3]) 181 | index3 += 1 182 | 183 | # Optionally, add remaining lines from file1 if needed 184 | for i in range(index1, len(file1_lines)): 185 | output.write(file1_lines[i]) 186 | 187 | print("Files merged successfully into", output_path) 188 | -------------------------------------------------------------------------------- /gsm-infinite/data/realistic/simple_names_three.py: -------------------------------------------------------------------------------- 1 | hierarchical_categorizations = [ 2 | ["Location", "total number of adult animals", "total number of newborn animal children"], 3 | ] 4 | 5 | subcategories = { 6 | "Location": { 7 | "Wild place": ["Beverly Forest", "Cedar Valley", "Oakridge Riverside", "Pine Ridge", "Maple Creek"], 8 | "Human place": ["South Zoo", "Jefferson Circus", "Mayer Aquarium", "Bundle Ranch", "Hamilton Farm"], 9 | }, 10 | "total number of adult animals": { 11 | "Mammal": ["adult bear", "adult wolf", "adult deer", "adult fox", "adult racoon"], 12 | "Bird": ["adult eagle", "adult parrot", "adult owl", "adult blue jay", "adult crow"], 13 | }, 14 | "total number of newborn animal children": { 15 | "number": ["average number of newborn children"], 16 | }, 17 | } 18 | 19 | messagetwo = "Answer the questions below. \ 20 | Note: the total number of adult animals in one location refers to sum of all types of adult animals ever mentioned for the specific location throughout the problem EXCLUDING their number of newborn children. \ 21 | Important: If the a type of animal is never mentioned for a location, assume its inexistence in that location. \ 22 | Each question is INDEPENDENT of the others." 23 | 24 | message = "Answer the questions below. \ 25 | Note: the total number of adult animals in one location refers to sum of all types of adult animals ever mentioned for the specific location throughout the problem EXCLUDING their number of newborn children. \ 26 | IMPORTANT: if the a type of animal is never mentioned for a location for that specific problem, assume its INEXISTENCE in that location (number in the location = 0). Previous problems animal quantities are not applicable for the present one. \ 27 | The average number of newborn children of the same type of animal might vary across different locations. \ 28 | The total newborn animal children of a location refers to the sum of the TOTAL newborn children (not average newborn children) from all adult animals mentioned for that specific location. \ 29 | Hint: the total newborn children of one type of animal in a location equals the average children per that animal in location times the number of that animal in location. \ 30 | Each question is self-contained INDEPENDENT of the others. Quantities of previous problems is NOT correct for the new problem, so quantities of animals MUST be recalculated for each question! \ 31 | Final answer needs to be presented in the format 'Answer: xxx', where xxx is the number you calculated." 32 | lengthmessage = 40 33 | 34 | hierarchical_categorizationstwo = [ 35 | ["City", "total number of schools", "total number of teachers from all schools"], 36 | ] 37 | 38 | subcategoriestwo = { 39 | "City": { 40 | "Names without water": ["Evervale City", "Hawkesbury", "Glenfield City", "Westhaven City", "Brightford"], 41 | "Names with water": ["Riverton City", "Clearwater Bay", "Shoreline City", "Oakbridge City", "Ruby Bay"], 42 | }, 43 | "total number of schools": { 44 | # "Common": ["Elementary School", "Public Highschool", "Private Christian Highschool", "Private Middle School", "Technical College"], 45 | # "Specialty": ["Acting School", "Regional Medical School", "Culinarian School", "Regional Law School"], 46 | "Common": ["elementary school", "private middle school", "public highschool", "regional medical school", "culinarian school"], 47 | }, 48 | "total number of teachers from all schools": { 49 | "number": ["average number of teachers"], 50 | }, 51 | } 52 | messagetwo = "Answer the questions below. \ 53 | Note: the total number of schools in one location refers to sum of all types of schools ever mentioned for the specific location throughout the problem. \ 54 | IMPORTANT: if the a type of school is never mentioned for a location for that specific problem, assume its INEXISTENCE in that location (number in the location = 0). Previous problems school quantities are not applicable for the present one. \ 55 | The average number of teachers of the same type of school might vary across different locations. \ 56 | The number of teachers from all schools of a location refers to the sum of the TOTAL teachers (not average number of teachers) from all type of schools mentioned for that specific location. \ 57 | Hint: the number of teachers of one type of school in a location equals the average teacher per that type of schools in location times the number of that type of schools in location. \ 58 | Each question is self-contained INDEPENDENT of the others. Quantities of previous problems is NOT correct for the new problem, so quantities of schools or teachers MUST be recalculated for each question! \ 59 | Final answer needs to be presented in the format 'Answer: xxx', where xxx is the number you calculated." 60 | lengthmessagetwo = 40 61 | 62 | hierarchical_categorizationsthree = [ 63 | ["Festivals", "total number of movies", "total number of movie nominations"], 64 | ] 65 | 66 | subcategoriesthree = { 67 | "Festivals": { 68 | # "Visionary": ["Morgan Glass", "Luca Verdi", "Emma Steele", "Ravi Kapoor", "Sasha Liu"], 69 | "Random": ["Taylor Movie Festival", "Verdi Movie Festival", "West Sahara Movie Festival", "Northwood Movie Festival", "Golden Banana Movie Festival"], 70 | # "Comedy": ["Mia Lark", "Oscar Bloom", "Nina Wilde", "Felix Grant", "Dana Sparks"], 71 | "French": ["Festival Lumière de Valmont", "Rêves de Belleville", "Cinéma de Montreval", "Festival de Clairmont", "Festival de Saint-Rivage"], 72 | }, 73 | "total number of movies": { 74 | # "Drama": ["Modern Family Drama", "Solemn Period Drama", "Futuristic Sci-Fi Movie", "Mythical Adventure Movie", "Realistic Detective Thriller"], 75 | # "Crime": ["Urban Love Comedy", "Gothic Horror Movie", "Upbeat Fantasy Musical", "Intense Sports Drama", "Calm Road Movie"], 76 | "Common": ["upbeat metropolis comedy", "solemn period drama", "futuristic sci-fi movie", "calm road movie", "intense detective thriller"], 77 | }, 78 | "total number of movie nominations": { 79 | "number": ["average number of nominations"], 80 | } 81 | } 82 | messagethree = "Answer the questions below. \ 83 | Commedy, drama, and thriller are all movies types. \ 84 | Note: the total number of movies in one movie festival refers to sum of all types of movies ever mentioned for the specific movie festival throughout the problem. \ 85 | IMPORTANT: if the a type of movies is never mentioned for a location for that specific problem, assume its INEXISTENCE in that location (number in the location = 0). Previous problems movies quantities are not applicable for the present one. \ 86 | The average number of nominations of the same type of festival might vary across different locations. \ 87 | The number of total movie nominations from all movies of a festival refers to the sum of the TOTAL movie nominations (not average number of nominations) from all types of movies mentioned for that specific location. \ 88 | Hint: the number of total movie nomination of one type of movie in a festival equals the average nomination per that type of movie in festival times the number of that type of movies in festival. \ 89 | Each question is self-contained INDEPENDENT of the others. Quantities of previous problems is NOT correct for the new problem, so quantities of movies or nominations MUST be recalculated for each question! \ 90 | Final answer needs to be presented in the format 'Answer: xxx', where xxx is the number you calculated." 91 | lengthmessagethree = 55 92 | -------------------------------------------------------------------------------- /gsm-infinite/data/realistic/solver.py: -------------------------------------------------------------------------------- 1 | from sympy import symbols, Eq, solve, sympify 2 | from sympy.core.sympify import SympifyError 3 | from sympy import simplify 4 | from math import sqrt 5 | 6 | def solve_linear_equation(equation, variable): 7 | steps = [] 8 | 9 | # Ensure the equation is in the form lhs = rhs 10 | lhs, rhs = equation.lhs, equation.rhs 11 | steps.append(f"Original equation: {lhs} = {rhs}") 12 | 13 | # Move all terms to the lhs 14 | # For simplicity, assume the equation is linear 15 | # Find the coefficient and constant 16 | coeff = lhs.coeff(variable) 17 | constant = lhs - coeff * variable 18 | 19 | # Step 1: Subtract constant from both sides 20 | step1_lhs = coeff * variable 21 | step1_rhs = rhs - constant 22 | steps.append(f"Step 1: Subtract {constant} from both sides: {coeff}*x = {rhs} - ({constant})") 23 | steps.append(f"Result: {coeff}*x = {step1_rhs}") 24 | 25 | # Step 2: Divide both sides by the coefficient 26 | solution = step1_rhs / coeff 27 | steps.append(f"Step 2: Divide both sides by {coeff}: x = {step1_rhs} / {coeff}") 28 | steps.append(f"Solution: x = {solution}") 29 | 30 | return steps, solution 31 | 32 | def solve_linear_equation_from_string(equation_str, variable_str='x'): 33 | """ 34 | Solves a linear equation for a given variable and provides step-by-step deduction. 35 | 36 | Parameters: 37 | - equation_str (str): The equation as a string, e.g., "2*x + 3 = 7" 38 | - variable_str (str): The variable to solve for, default is 'x' 39 | 40 | Returns: 41 | - steps (list of str): A list of deduction steps 42 | - solution (sympy expression): The solution for the variable 43 | """ 44 | steps = [] 45 | 46 | # Define the variable 47 | variable = symbols(variable_str) 48 | 49 | try: 50 | # Parse the equation string into a SymPy equation 51 | # sympify converts the string into a SymPy expression 52 | # Then, split into lhs and rhs based on the '=' sign 53 | if '=' not in equation_str: 54 | raise ValueError("Equation must contain an '=' sign.") 55 | 56 | lhs_str, rhs_str = equation_str.split('=', 1) 57 | lhs = sympify(lhs_str) 58 | rhs = sympify(rhs_str) 59 | 60 | equation = Eq(lhs, rhs) 61 | steps.append(f"Simplifying the equation: {lhs} = {rhs}") 62 | 63 | # Ensure the equation is linear in the given variable 64 | if equation.lhs.as_poly(variable) is None or equation.rhs.as_poly(variable) is None: 65 | raise ValueError(f"The equation is not linear in {variable_str}.") 66 | 67 | # Move all terms to the lhs 68 | # new_eq = lhs - rhs = 0 69 | new_eq = Eq(lhs - rhs, 0) 70 | steps.append(f"Move all terms to one side: {new_eq.lhs} = {new_eq.rhs}") 71 | 72 | # Find the coefficient and constant term 73 | poly = new_eq.lhs.as_poly(variable) 74 | coeff = poly.coeff_monomial(variable) 75 | constant = poly.as_expr() - coeff * variable 76 | 77 | # Step 2: Isolate the term with the variable 78 | step2_lhs = coeff * variable 79 | step2_rhs = -constant # Because we moved the constant to the other side 80 | steps.append(f"Isolate the term with {variable_str}: {coeff}*{variable_str} = {-constant}") 81 | 82 | # Step 3: Solve for the variable by dividing both sides by the coefficient 83 | solution = step2_rhs / coeff 84 | steps.append(f"Divide both sides by {coeff}: {variable_str} = {step2_rhs} / {coeff}") 85 | steps.append(f"Solution: {variable_str} = {solution}") 86 | 87 | return steps, solution 88 | 89 | except SympifyError: 90 | steps.append("Error: The equation string could not be parsed. Please check the syntax.") 91 | return steps, None 92 | except ValueError as ve: 93 | steps.append(f"Error: {ve}") 94 | return steps, None 95 | except Exception as e: 96 | steps.append(f"An unexpected error occurred: {e}") 97 | return steps, None 98 | 99 | def solve_quadratic_step_by_step(equation_str, variable_str='x'): 100 | """ 101 | Solves a quadratic equation step-by-step and returns positive integer solutions. 102 | 103 | Parameters: 104 | - equation_str (str): The quadratic equation as a string, e.g., "x**2 - 5*x + 6 = 0" 105 | - variable_str (str): The variable in the equation, default is 'x' 106 | 107 | Returns: 108 | - steps (list of str): A list containing each step of the solution process. 109 | - solutions (list): A list of positive integer solutions if they exist. 110 | """ 111 | steps = [] 112 | solutions = [] 113 | 114 | try: 115 | # Define the variable 116 | variable = symbols(variable_str) 117 | 118 | # Ensure the equation contains an '=' sign 119 | if '=' not in equation_str: 120 | steps.append("Error: Equation must contain an '=' sign.") 121 | return steps, solutions 122 | 123 | # Split the equation into LHS and RHS 124 | lhs_str, rhs_str = equation_str.split('=', 1) 125 | lhs = sympify(lhs_str) 126 | rhs = sympify(rhs_str) 127 | 128 | # Form the equation 129 | equation = Eq(lhs, rhs) 130 | steps.append(f"Original Equation: {equation}") 131 | 132 | # Move all terms to LHS to get standard form: ax^2 + bx + c = 0 133 | standard_eq = Eq(lhs - rhs, 0) 134 | steps.append(f"Step 1: Move all terms to one side to form the standard quadratic equation:") 135 | steps.append(f" {standard_eq.lhs} = {standard_eq.rhs}") 136 | 137 | # Extract coefficients a, b, c 138 | poly = standard_eq.lhs.as_poly(variable) 139 | if poly is None: 140 | steps.append(f"Error: The equation is not a polynomial in {variable_str}.") 141 | return steps, solutions 142 | 143 | a = poly.coeff_monomial(variable**2) 144 | b = poly.coeff_monomial(variable) 145 | c = poly.coeff_monomial(1) 146 | 147 | steps.append(f"Step 2: Identify the coefficients:") 148 | steps.append(f" a = {a}") 149 | steps.append(f" b = {b}") 150 | steps.append(f" c = {c}") 151 | 152 | # Calculate the discriminant D = b^2 - 4ac 153 | D = b**2 - 4*a*c 154 | steps.append(f"Step 3: Calculate the discriminant (D):") 155 | steps.append(f" D = b^2 - 4ac = ({b})^2 - 4*({a})*({c}) = {D}") 156 | 157 | # Determine the nature of the roots based on the discriminant 158 | if D > 0: 159 | nature = "two distinct real roots" 160 | elif D == 0: 161 | nature = "one real root (a double root)" 162 | else: 163 | nature = "no real roots" 164 | steps.append(f"Step 4: Determine the nature of the roots based on D:") 165 | steps.append(f" Since D = {D}, the equation has {nature}.") 166 | 167 | # If D < 0, no real solutions 168 | if D < 0: 169 | steps.append(f"Conclusion: There are no real roots to this equation.") 170 | return steps, solutions 171 | 172 | # Apply the quadratic formula: x = (-b ± sqrt(D)) / (2a) 173 | steps.append(f"Step 5: Apply the quadratic formula:") 174 | steps.append(f" x = (-b ± sqrt(D)) / (2a)") 175 | steps.append(f" x = (-({b}) ± sqrt({D})) / (2*{a})") 176 | 177 | # Calculate the roots 178 | sqrt_D = sqrt(D) 179 | root1 = simplify((-b + sqrt_D) / (2*a)) 180 | root2 = simplify((-b - sqrt_D) / (2*a)) 181 | steps.append(f" x₁ = ({-b} + sqrt({D})) / (2*{a}) = {root1:.2f}") 182 | steps.append(f" x₂ = ({-b} - sqrt({D})) / (2*{a}) = {root2:.2f}") 183 | 184 | # Simplify roots 185 | steps.append(f"Step 6: Simplify the roots:") 186 | steps.append(f" x₁ = {root1:.2f}") 187 | steps.append(f" x₂ = {root2:.2f}") 188 | 189 | # Collect the roots 190 | roots = [root1, root2] 191 | 192 | # Filter for positive integer solutions 193 | for sol in roots: 194 | if abs(int(sol) - sol) < 1e-5: # Check if the solution is an integer 195 | sol = int(sol) 196 | if sol > 0: 197 | solutions.append(int(sol)) 198 | 199 | # Conclusion 200 | if solutions: 201 | steps.append(f"Conclusion: The positive integer solution(s) is/are {solutions}.") 202 | else: 203 | steps.append(f"Conclusion: There are no positive integer solutions.") 204 | 205 | return steps, solutions 206 | 207 | except SympifyError: 208 | steps.append("Error: The equation string could not be parsed. Please check the syntax.") 209 | return steps, solutions 210 | except Exception as e: 211 | steps.append(f"An unexpected error occurred: {e}") 212 | return steps, solutions 213 | 214 | def check_equation_order(equation_str, variable_str='x'): 215 | """ 216 | Checks whether the given single-variable equation is first-order or second-order. 217 | 218 | Parameters: 219 | - equation_str (str): The equation as a string, e.g., "2*x + 3 = 7" 220 | - variable_str (str): The variable to check the order for, default is 'x' 221 | 222 | Returns: 223 | - str: "first-order" if the equation is linear, 224 | "second-order" if the equation is quadratic, 225 | or an error message if the input is invalid. 226 | """ 227 | try: 228 | # Define the variable 229 | variable = symbols(variable_str) 230 | 231 | # Ensure the equation contains an '=' sign 232 | if '=' not in equation_str: 233 | return "Error: Equation must contain an '=' sign." 234 | 235 | # Split the equation into LHS and RHS 236 | lhs_str, rhs_str = equation_str.split('=', 1) 237 | 238 | # Parse the LHS and RHS into SymPy expressions 239 | lhs = sympify(lhs_str) 240 | rhs = sympify(rhs_str) 241 | 242 | # Form the standard equation: lhs - rhs = 0 243 | standard_eq = lhs - rhs 244 | 245 | # Convert the standard equation to a polynomial in the specified variable 246 | poly = standard_eq.as_poly(variable) 247 | 248 | if poly is None: 249 | return f"Error: The equation is not a polynomial in '{variable_str}'." 250 | 251 | # Get the degree of the polynomial 252 | degree = poly.degree() 253 | 254 | if degree == 1: 255 | return "first-order" 256 | elif degree == 2: 257 | return "second-order" 258 | else: 259 | return f"Error: The equation is of degree {degree}, which is unsupported." 260 | 261 | except SympifyError: 262 | return "Error: The equation string could not be parsed. Please check the syntax." 263 | except Exception as e: 264 | return f"An unexpected error occurred: {e}" 265 | 266 | if __name__ == "__main__": 267 | # Example usage 268 | x = symbols('x') 269 | # equation = "2*x + 3 = 7" 270 | equation = "2 * (3 + x) - 4 = 5" 271 | # steps, solution = solve_linear_equation_from_string(equation, 'x') 272 | # equation = Eq(2*x + 3, 7) 273 | # steps, solution = solve_linear_equation(equation, x) 274 | 275 | print(check_equation_order(equation)) 276 | equation = "x**2 - 5*x + 6 = 0" 277 | print(check_equation_order(equation)) 278 | steps, equation = solve_quadratic_step_by_step(equation) 279 | for step in steps: 280 | print(step) 281 | -------------------------------------------------------------------------------- /gsm-infinite/data/realistic/test_generate3.sh: -------------------------------------------------------------------------------- 1 | modes=(2 3) 2 | for d in "${modes[@]}" 3 | do 4 | total=100 # adjust based on your needs 5 | # breakdown 100 -> 80 zoo 20 teacher-school 20 movies (100 for reverse mode as well) 6 | 7 | lengths=(zero_context 8k 16k 32k) # adjust if needed 8 | 9 | for length in "${lengths[@]}" 10 | do 11 | python datagenerationworker.py --numprocs 8 --opmax 30 --total $total --mod -1 --number_range 5 --target_length $length --d $d --force --listoperations 20 19 12 | python datagenerationworker.py --numprocs 8 --opmax 25 --total $total --mod -1 --number_range 5 --target_length $length --d $d --force --listoperations 16 17 18 13 | python datagenerationworker.py --numprocs 8 --opmax 20 --total $total --mod -1 --number_range 5 --target_length $length --d $d --force --listoperations 12 13 14 15 14 | python datagenerationworker.py --numprocs 8 --opmax 15 --total $total --mod -1 --number_range 5 --target_length $length --d $d --force --listoperations 10 11 15 | python datagenerationworker.py --numprocs 8 --opmax 10 --total $total --mod -1 --number_range 5 --target_length $length --d $d --force --listoperations 7 8 9 16 | python datagenerationworker.py --numprocs 16 --opmax 6 --total $total --mod -1 --number_range 5 --target_length $length --d $d --force --listoperations 5 6 17 | python datagenerationworker.py --numprocs 16 --opmax 4 --total $total --mod -1 --number_range 5 --target_length $length --d $d --force --listoperations 4 18 | python datagenerationworker.py --numprocs 16 --opmax 4 --total $total --mod -1 --number_range 5 --target_length $length --d $d --force --listoperations 3 19 | python datagenerationworker.py --numprocs 16 --opmax 3 --total $total --mod -1 --number_range 5 --target_length $length --d $d --force --listoperations 2 20 | done 21 | done 22 | -------------------------------------------------------------------------------- /gsm-infinite/data/symbolic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/gsm-infinite/data/symbolic/__init__.py -------------------------------------------------------------------------------- /gsm-infinite/data/symbolic/generate_payload.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | 4 | def create_forest(rng, nodes, k): 5 | rng.shuffle(nodes) 6 | 7 | N = len(nodes) 8 | 9 | edges = [] 10 | parent = [-1] * N 11 | 12 | for i in range(k, N): 13 | parent[i] = rng.randint(0, i-1) 14 | if (parent[i] != i): 15 | edges.append((nodes[parent[i]], nodes[i])) 16 | 17 | return nodes, edges 18 | 19 | def generate_output(rng, nodes, edges, operations): 20 | node_values = {node: None for node in nodes} 21 | output_list = [] 22 | 23 | roots = set(nodes) - {e[1] for e in edges} 24 | 25 | for root in roots: 26 | val = rng.randint(0, 10) 27 | node_values[root] = val 28 | output_list.append(f'assign {root} = {val}') 29 | 30 | processed = set(roots) 31 | while len(processed) < len(nodes): 32 | for (parent, child), operation in zip(edges, operations): 33 | if node_values[child] is None and parent in processed: 34 | val = node_values[parent] 35 | node_values[child] = val + (1 if operation == ' + 1' else -1 if operation == ' - 1' else 0) 36 | output_list.append(f'assign {child} = {parent}{operation}') 37 | processed.add(child) 38 | 39 | # Sort the output_list based on the numeric part of the variable names 40 | query_value = rng.choice(list(node_values.values())) 41 | query_list = [node for node, value in node_values.items() if value == query_value] 42 | 43 | return output_list, (query_value, query_list), node_values 44 | 45 | 46 | def generate_filler(nodes, edges, operations): 47 | output_list = [] 48 | 49 | roots = set(nodes) - {e[1] for e in edges} 50 | 51 | 52 | processed = set(roots) 53 | for (parent, child), operation in zip(edges, operations): 54 | output_list.append(f'assign {child} = {parent}{operation}') 55 | processed.add(child) 56 | 57 | return output_list 58 | 59 | 60 | 61 | 62 | class FindGraphGenerator: 63 | NUM_VARIABLES = 1000000 64 | MAX_CONSTANT = 9 65 | 66 | def __init__(self, seed): 67 | self.seed = seed 68 | self.rng = random.Random(seed) 69 | self.variables = [f'V{i:06d}' for i in range(self.NUM_VARIABLES)] 70 | 71 | def generate_task(self, op, N, with_solution=False): 72 | variables = self.rng.sample(self.variables, k=N) 73 | k1 = 1 74 | nodes, edges = create_forest(self.rng, variables[:op], k1) 75 | operations = self.rng.choices(['', ' + 1', ' - 1'], k=len(edges)) 76 | output_list, query, node_values = generate_output(self.rng, nodes, edges, operations) 77 | if (N > op): 78 | nodes_filler, edges_filler = create_forest(self.rng, variables[op:], 1) 79 | operations_filler = self.rng.choices(['', ' + 1', ' - 1'], k=len(edges_filler)) 80 | output_list.extend(generate_filler(nodes_filler, edges_filler, operations_filler)) 81 | self.rng.shuffle(output_list) 82 | 83 | if with_solution: 84 | assignment_dict = {} 85 | stack = [] 86 | evaluation_order = [] 87 | solution = "First, there are such direct assignment(s):\n" 88 | for line in output_list: 89 | parts = line.split(" = ") 90 | variable = parts[0].split(" ")[1] 91 | if (variable in node_values): 92 | expression = parts[1] 93 | parts_expression = expression.split(" ") 94 | if len(parts_expression) == 1 and not parts_expression[0].startswith("V"): 95 | stack.append(variable) 96 | evaluation_order.append(variable) 97 | solution += f"{variable} = {parts_expression[0]};\n" 98 | continue 99 | elif len(parts_expression) == 1 and parts_expression[0].startswith("V"): 100 | assignment_dict.setdefault(parts_expression[0], []) 101 | assignment_dict[parts_expression[0]].append((variable, expression, "")) 102 | elif len(parts_expression) == 3: # Check for 3 parts 103 | assignment_dict.setdefault(parts_expression[0], []) 104 | assignment_dict[parts_expression[0]].append((variable, expression, f"{node_values[parts_expression[0]]} {parts_expression[1]} {parts_expression[2]} =")) 105 | solution+="\nFrom these direct assignment(s), we have: \n" 106 | 107 | while stack: 108 | if (stack[-1] in assignment_dict) and (assignment_dict[stack[-1]]): 109 | variable, expression, evaluated_expression = assignment_dict[stack[-1]].pop() 110 | stack.append(variable) 111 | evaluation_order.append(variable) 112 | solution+=f"{variable} = {expression} = {evaluated_expression}{node_values[variable]};\n" 113 | else: 114 | solution+=f"No other varaibles can be derived from {stack[-1]};\n" 115 | stack.pop() 116 | 117 | solution+=f"\nNow we have done all the calculations. Derived variables are:\n" 118 | solution_dict = [] 119 | for variable in evaluation_order: 120 | solution += f"{variable} = {node_values[variable]};\n" 121 | if node_values[variable] == query[0]: 122 | solution_dict.append(variable) 123 | solution += f"\nNow we have finished this problem. ANSWER:\n{', '.join(solution_dict)}\n" 124 | 125 | 126 | return output_list, query, node_values, solution 127 | else: 128 | return output_list, query, node_values 129 | 130 | 131 | 132 | 133 | if __name__ == '__main__': 134 | # Example Usage: 135 | N = 12 # Total number of nodes 136 | op = 6 # Number of operations in the solution 137 | generator = FindGraphGenerator(43) 138 | 139 | output_list, query, node_values, solution = generator.generate_task(op, N, with_solution=True) 140 | print("Output List:") 141 | for line in output_list: 142 | print(line) 143 | print("Query:", query) 144 | print("Node Values:", node_values) 145 | print(solution) -------------------------------------------------------------------------------- /gsm-infinite/data/symbolic/generate_symbolic.py: -------------------------------------------------------------------------------- 1 | # import generate_answer 2 | import random 3 | import generate_payload 4 | # import generator_calc 5 | # from datasets import Dataset 6 | import json 7 | import os 8 | 9 | 10 | def dump_dict_to_json(data, filename): 11 | """Dumps a Python dictionary to a JSON file, creating the directory if needed. 12 | 13 | Args: 14 | data: The Python dictionary to be dumped. 15 | filename: The name of the JSON file to be created (e.g., "data/output.json"). 16 | """ 17 | try: 18 | # Extract the directory path from the filename 19 | directory = os.path.dirname(filename) 20 | 21 | # Create the directory if it doesn't exist 22 | if directory and not os.path.exists(directory): 23 | os.makedirs(directory) 24 | print(f"Created directory: {directory}") 25 | 26 | with open(filename, 'w') as f: 27 | json.dump(data, f, indent=4) 28 | print(f"Successfully dumped dictionary to {filename}") 29 | except (TypeError, OSError) as e: 30 | print(f"Error dumping dictionary to JSON: {e}") 31 | 32 | def prepare_payload(payload_items): 33 | 34 | payloads = [f"{str(item)}." for item in payload_items] 35 | 36 | new_context = " ".join(payloads) 37 | 38 | return new_context 39 | 40 | def get_payload(length, insert_points=None, op=1, N=1): 41 | output_list, query, value, solution = generator.generate_task(op, N, with_solution=True) 42 | 43 | context = prepare_payload(output_list) 44 | import utils 45 | return f"{utils.get_symbolic_prompt(query[0], context)}", context, f"{utils.get_symbolic_prompt_query(query[0])}", solution, query 46 | 47 | 48 | def get_benchmark_info(length, insert_points, N, op, id, close_rate): 49 | N+= int(length * 828 / 9668 * close_rate) 50 | input, problem, question, solution, query = get_payload(int(length * (1-close_rate)), insert_points, op, N) 51 | answer_q, answer_list = query 52 | # conversation = [{"role": "user", "content": input}, 53 | # {"role": "assistant", "content": solution}] 54 | messages = [{"role": "user", "content": input}] 55 | 56 | return {"problem": problem, "question": question, "solution": solution, "op": op, "n": N, "length": length, "id": id, "d": 1, "answer_q": answer_q, "answer_list": answer_list, "messages": messages} 57 | 58 | # print(get_payload(100, 2)) 59 | if __name__ == '__main__': 60 | import asyncio 61 | from concurrent.futures import ThreadPoolExecutor 62 | import concurrent.futures 63 | import tqdm 64 | import random 65 | random.seed(42) 66 | from datasets import Dataset, DatasetDict 67 | 68 | import argparse 69 | parser = argparse.ArgumentParser( 70 | description="generate dataset with command line arguments." 71 | ) 72 | 73 | parser.add_argument('--dataset-name', type=str, help="The name of the dataset for organizing the folders") 74 | # Required arguments 75 | parser.add_argument( 76 | '--max-op', 77 | type=int, 78 | required=False, 79 | default=40, 80 | help='max op to generate, default 40' 81 | ) 82 | 83 | parser.add_argument( 84 | '--min-op', 85 | type=int, 86 | required=False, 87 | default=1, 88 | help='min op to generate, default 1' 89 | ) 90 | 91 | parser.add_argument( 92 | '--stride', 93 | type=int, 94 | required=False, 95 | default=1, 96 | help='stride size to skip op, default 1', 97 | ) 98 | 99 | parser.add_argument( 100 | '--examples-per-op', 101 | type=int, 102 | required=False, 103 | default=50, 104 | help='examples per op to generate, default 50', 105 | ) 106 | 107 | parser.add_argument( 108 | '--length', 109 | type=str, 110 | default="0", 111 | help='noise context length' 112 | ) 113 | 114 | 115 | args = parser.parse_args() 116 | ddict = {} 117 | length = args.length 118 | if isinstance(length, str): 119 | if length.lower().endswith('k'): 120 | length = int(length[:-1]) * 1000 121 | else: 122 | length = int(length) 123 | seed = 42 124 | generator = generate_payload.FindGraphGenerator(seed) 125 | dataset_list = [] 126 | ops = [] 127 | ids = [] 128 | for op in range(1, args.max_op, args.stride): 129 | ops = [op]*args.examples_per_op 130 | ids = list(range(args.examples_per_op)) 131 | 132 | def generate_examples(ops, ids, length): 133 | for id, op in tqdm.tqdm(zip(ids, ops), total=len(ops), desc=f"Generating examples for length {length}"): # Added tqdm here 134 | insert_points = None 135 | yield get_benchmark_info(length, insert_points, op, op, id, 1.0) 136 | 137 | dataset = Dataset.from_generator(generate_examples, gen_kwargs={"ops": ops, "ids": ids, "length": length}) 138 | if (op < args.min_op): 139 | continue 140 | dataset.push_to_hub(f"{args.dataset_name}_{args.length}", split=f"ops_{op}", private=True) 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /gsm-infinite/data/symbolic/generate_symbolic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dataset_base_name="HF_USER_NAME/gsm_infinite_symbolic" 4 | 5 | 6 | # feel free to adjust the parameters. 7 | python3 generate_symbolic.py --dataset-name $dataset_base_name --length 0 --stride 1 --min-op 1 --max-op 120 --examples-per-op 100 8 | 9 | python3 generate_symbolic.py --dataset-name $dataset_base_name --length 8k --stride 1 --min-op 1 --max-op 30 --examples-per-op 100 10 | 11 | python3 generate_symbolic.py --dataset-name $dataset_base_name --length 16k --stride 1 --min-op 1 --max-op 30 --examples-per-op 100 12 | 13 | python3 generate_symbolic.py --dataset-name $dataset_base_name --length 32k --stride 1 --min-op 1 --max-op 30 --examples-per-op 100 -------------------------------------------------------------------------------- /gsm-infinite/data/symbolic/utils.py: -------------------------------------------------------------------------------- 1 | 2 | def get_symbolic_prompt(query_value, context): 3 | prompt = f"\n{context}\n\n\nThe context contains relationships between variables. These relationships are independent mathematical equations that are all satisfied simultaneously.\nUsing only these relationships, determine which variables (if any) from which values can be derived are equal to {query_value}.\nShow your step-by-step reasoning and calculations, and then conclude your final answer in a sentence." 4 | # prompt = f"{context}\n**Question:**\nCan you tell me which variables are equal to {query_value} in those relationships in '<<<>>>'? These relationships are in no particular order.\nShow your step-by-step reasoning and calculations, and then conclude your final answer in a sentence." 5 | return prompt 6 | 7 | def get_symbolic_prompt_query(query_value): 8 | prompt = f"The context contains relationships between variables. These relationships are independent mathematical equations that are all satisfied simultaneously.\nUsing only these relationships, determine which variables (if any) from which values can be derived are equal to {query_value}.\nShow your step-by-step reasoning and calculations, and then conclude your final answer in a sentence." 9 | # prompt = f"{context}\n**Question:**\nCan you tell me which variables are equal to {query_value} in those relationships in '<<<>>>'? These relationships are in no particular order.\nShow your step-by-step reasoning and calculations, and then conclude your final answer in a sentence." 10 | return prompt -------------------------------------------------------------------------------- /gsm-infinite/pred/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/gsm-infinite/pred/__init__.py -------------------------------------------------------------------------------- /gsm-infinite/pred/eval_realistic.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def preprocess_line(line): 4 | replies = line["replies"] 5 | return replies 6 | # value = line["answer_q"] 7 | # return [get_prompt(reply, value) for reply in replies] 8 | 9 | def is_integer(s): 10 | try: 11 | int(s) 12 | return True 13 | except ValueError: 14 | return False 15 | 16 | def criteriaoutput(generatedtext, inputexample): 17 | correctedanswers = 0 18 | totalanswers = 0 19 | # parsing the answer key 20 | for i in range(len(generatedtext)): 21 | totalanswers += 1 22 | idx_answer_start = inputexample["solution"].find("Answer: ") 23 | idx_answer_end = inputexample["solution"].find(".", idx_answer_start) 24 | answer_text = inputexample["solution"][idx_answer_start + len("Answer: ") : idx_answer_end] 25 | answer_text = int(answer_text.lower()) 26 | 27 | generatedtext[i] = re.sub('.\x08', 'b', generatedtext[i]) 28 | generatedtext[i] = generatedtext[i].lower() 29 | # if args.verbose and args.local_rank == 0: 30 | # print(colored(inputexample["solution"], "yellow"), flush = True) 31 | # print(colored(generatedtext[i], "cyan"), flush = True) 32 | 33 | idx_generated_begin = -1 34 | idx_generated_conclude = -1 35 | keywords = ["answer: ", "solution: ", "oxed{", "**answer:** ", "**answer: ", "final answer: answer: ", "\nanswer: ", r"\text{answer: } ", "is ", "answer: "] # updated 36 | keywordsend = [".", ".", "}", ".", "**", ".", ".", None, ".", "\n"] 37 | cnt = 0 38 | 39 | while not (idx_generated_begin != -1 and idx_generated_conclude != -1) and cnt < len(keywords): 40 | if keywords[cnt] in ["oxed{", "is "]: 41 | idx_generated_begin = generatedtext[i].rfind(keywords[cnt]) # this relies on the generated is stopped before generated next question plus onwoards by stop 42 | else: 43 | idx_generated_begin = generatedtext[i].find(keywords[cnt]) 44 | if idx_generated_begin != -1: 45 | if keywordsend[cnt] is None: 46 | idx_generated_conclude = idx_generated_begin + len(keywords[cnt]) 47 | while generatedtext[0][idx_generated_conclude].isdigit() == True: 48 | idx_generated_conclude += 1 49 | else: 50 | idx_generated_conclude = generatedtext[i].find(keywordsend[cnt], idx_generated_begin + len(keywords[cnt])) 51 | if idx_generated_conclude == -1: 52 | idx_generated_conclude = len(generatedtext[i]) 53 | cnt += 1 54 | if not is_integer(generatedtext[i][idx_generated_begin + len(keywords[cnt - 1]) : idx_generated_conclude]): 55 | idx_generated_begin = -1 56 | idx_generated_conclude = -1 57 | continue # if not this line, it will exit the loop 58 | 59 | if idx_generated_begin == -1: 60 | # if args.local_rank == 0: 61 | # print(colored("Answer not found", "red"), flush = True) 62 | correctedanswers += 0 63 | continue 64 | else: 65 | try: 66 | answergenerated_text = int(generatedtext[i][idx_generated_begin + len(keywords[cnt - 1]) : idx_generated_conclude]) 67 | except: 68 | # if args.local_rank == 0: 69 | # print(colored("Answer not found", "red"), flush = True) 70 | correctedanswers += 0 71 | continue 72 | # if args.local_rank == 0: 73 | # if answergenerated_text == answer_text: 74 | # print(colored("Answer {} expected {}".format(answergenerated_text, answer_text), "green"), flush = True) 75 | # else: 76 | # print(colored("Answer {} expected {}".format(answergenerated_text, answer_text), "red"), flush = True) 77 | correctedanswers += int(answergenerated_text == answer_text) 78 | return correctedanswers, totalanswers 79 | 80 | 81 | 82 | def postprocess_line(line, extractions): 83 | corrected, total = criteriaoutput(extractions, line) 84 | line["correct_num"] = corrected 85 | line["reply_answers"] = [""] * total 86 | 87 | return line 88 | 89 | if __name__ == '__main__': 90 | from concurrent.futures import ThreadPoolExecutor 91 | import concurrent.futures 92 | import tqdm 93 | import argparse 94 | import json 95 | parser = argparse.ArgumentParser( 96 | description="Eval with command line arguments." 97 | ) 98 | parser.add_argument('--save-name', type=str, help="The name of the file saved for organizing the folders", default="base") 99 | parser.add_argument('--save-dataset', type=str, help="Save dataset name", default="base") 100 | # parser.add_argument('--dataset-name', type=str, help="The name of the dataset for organizing the folders") 101 | parser.add_argument( 102 | '--num-samples', 103 | type=int, 104 | default=1, 105 | help='Number of samples to generate.' 106 | ) 107 | 108 | parser.add_argument( 109 | '--length', 110 | type=str, 111 | default="0", 112 | help='noise context length' 113 | ) 114 | 115 | parser.add_argument( 116 | '--filter-config', 117 | type=json.loads, 118 | help='Filter configuration as a JSON string.' 119 | ) 120 | 121 | args = parser.parse_args() 122 | 123 | if (args.num_samples == 1): 124 | sample_nums = [None] 125 | else: 126 | sample_nums = [1<" in reply: 9 | reply = reply.split("")[-1].strip() 10 | return f""" 11 | Carefully read the input text below and extract the variable names that are mentioned as being equal to {value}. If multiple variables are listed, separate them with commas. 12 | 13 | If value {value} is not mentioned, you can safely assume that they are all equal to {value}. If there are no such variables, just output None. 14 | 15 | Only output the variable names, not the values or any other text. 16 | 17 | Examples: 18 | 1. input: "V0, V3, V4", output: "V0, V3, V4" 19 | 2. input: "Variable V2 is equal to {value} in the assignment statement 'assign V2 = V1 - 1.'.", output: "V2" 20 | 3. input: "The answer is: V1, V2.", output: "V1, V2" 21 | 4. input: "There are no variables equal to {value}.", output: "None" 22 | 23 | Input: 24 | {reply} 25 | 26 | Output: 27 | """ 28 | 29 | def preprocess_line(line): 30 | replies = line["replies"] 31 | value = line["answer_q"] 32 | return [get_prompt(reply, value) for reply in replies] 33 | 34 | 35 | 36 | 37 | def postprocess_line(line, extractions): 38 | correct_counter = 0 39 | line.pop("replies") 40 | variable_list = line["answer_list"] 41 | reply_variables = [] 42 | for extraction in extractions: 43 | variables = re.findall(r'\bV\d+\b', extraction) 44 | reply_variables.append(list(variables)) 45 | if set(variable_list) == set(variables): 46 | correct_counter += 1 47 | line["correct_num"] = correct_counter 48 | line["reply_answers"] = reply_variables 49 | 50 | 51 | return line 52 | 53 | if __name__ == '__main__': 54 | from concurrent.futures import ThreadPoolExecutor 55 | import concurrent.futures 56 | import tqdm 57 | import argparse 58 | import json 59 | parser = argparse.ArgumentParser( 60 | description="Eval with command line arguments." 61 | ) 62 | parser.add_argument('--save-name', type=str, help="The name of the file saved for organizing the folders", default="base") 63 | # parser.add_argument('--dataset-name', type=str, help="The name of the dataset for organizing the folders") 64 | parser.add_argument( 65 | '--num-samples', 66 | type=int, 67 | default=1, 68 | help='Number of samples to generate.' 69 | ) 70 | 71 | parser.add_argument( 72 | '--length', 73 | type=str, 74 | default="0", 75 | help='noise context length' 76 | ) 77 | 78 | args = parser.parse_args() 79 | model_handler = ModelHandler( 80 | model_name = "Qwen/Qwen2.5-7B-Instruct", 81 | ) 82 | pipeline = NoRAGPipeline( 83 | model_handler = model_handler, 84 | temperature=0.0, 85 | max_tokens=2048 86 | ) 87 | 88 | if (args.num_samples == 1): 89 | sample_nums = [None] 90 | else: 91 | sample_nums = [1< max_retries: 40 | raise Exception( 41 | f"Maximum number of retries ({max_retries}) exceeded." 42 | ) 43 | 44 | # Increment the delay 45 | delay *= exponential_base * (1 + jitter * random.random()) 46 | 47 | # Sleep for the delay 48 | time.sleep(delay) 49 | 50 | 51 | return wrapper 52 | 53 | class ModelHandler: 54 | SUPPORTED_BACKENDS = ['openai', 'vllm', 'sglang', 'gemini', 'anthropic'] 55 | 56 | def __init__(self, model_name: str = "gpt-4o-mini", backend_type: str = "openai"): 57 | if backend_type not in self.SUPPORTED_BACKENDS: 58 | raise ValueError(f"Unsupported backend type: {backend_type}") 59 | 60 | self.model_type = backend_type 61 | self.model_name = model_name 62 | self.client = None 63 | self._initialize_client() 64 | 65 | def _initialize_client(self): 66 | if self.model_type == "openai": 67 | self.api_key = os.getenv('OPENAI_API_KEY') 68 | if not self.api_key: 69 | raise ValueError("OpenAI API key not found in environment") 70 | self.client = OpenAI(api_key=self.api_key) 71 | elif self.model_type == "gemini": 72 | self.api_key = os.getenv('GEMINI_API_KEY') 73 | if not self.api_key: 74 | raise ValueError("GEMINI API key not found in environment") 75 | genai.configure(api_key=self.api_key) 76 | elif self.model_type == "anthropic": 77 | self.api_key = os.getenv('ANTHROPIC_API_KEY') 78 | if not self.api_key: 79 | raise ValueError("ANTHROPIC API key not found in environment") 80 | self.client = anthropic.Anthropic( 81 | api_key=self.api_key, 82 | ) 83 | else: 84 | raise NotImplementedError("not implemented") 85 | 86 | @retry_with_exponential_backoff 87 | def generate_answer( 88 | self, 89 | prompt: Union[str, List[str]], 90 | **kwargs 91 | ) -> Union[str, Dict[str, Any]]: 92 | if self.model_type == "openai": 93 | return self._get_openai_response(prompt, **kwargs) 94 | elif self.model_type == "gemini": 95 | return self._get_gemini_response(prompt, **kwargs) 96 | elif self.model_type == "anthropic": 97 | return self._get_anthropic_response(prompt, **kwargs) 98 | else: 99 | raise ValueError(f"Unsupported backend type: {self.model_type}") 100 | 101 | def _get_openai_response(self, prompt: Union[str, List[str]], 102 | max_tokens: int = 4096, 103 | temperature = None, 104 | **kwargs) -> str: 105 | messages = [ 106 | # {"role": "system", "content": "You are a helpful assistant focused on providing accurate and detailed responses."}, 107 | {"role": "user", "content": prompt} 108 | ] if type(prompt) == str else prompt 109 | response = self.client.chat.completions.create( 110 | model=self.model_name, 111 | messages=messages, 112 | timeout=600.0, # 300 second timeout for individual requests 113 | max_tokens=max_tokens, 114 | temperature=temperature, 115 | **kwargs 116 | ) 117 | return response.choices[0].message.content.strip() 118 | 119 | 120 | def _get_gemini_response(self, prompt: Union[str, List[str]], 121 | max_tokens: int = 4096, 122 | temperature = None, 123 | **kwargs) -> str: 124 | try: 125 | generation_config = { 126 | "temperature": temperature, 127 | "top_p": 0.95, 128 | "top_k": 40, 129 | "max_output_tokens": max_tokens, 130 | "response_mime_type": "text/plain", 131 | } 132 | 133 | messages = [ 134 | {"role": "user", "content": prompt} 135 | ] if type(prompt) == str else prompt 136 | 137 | def convert_chat_history(openai_messages): 138 | """ 139 | Converts OpenAI-style chat history to Gemini-style chat history. 140 | 141 | Args: 142 | openai_messages: A list of dictionaries representing the OpenAI chat history. 143 | 144 | Returns: 145 | A tuple containing the Gemini-style chat history (list of dictionaries) and the system prompt (string). 146 | """ 147 | gemini_history = [] 148 | system_prompt = None 149 | 150 | role_map = {"assistant": "model", "user": "user"} 151 | 152 | for message in openai_messages[:-1]: 153 | if message["role"] == "system": 154 | system_prompt = message["content"] 155 | else: 156 | gemini_history.append( 157 | { 158 | "role": role_map[message["role"]], 159 | "parts": [message["content"]], 160 | } 161 | ) 162 | user_input = openai_messages[-1]["content"] 163 | 164 | return gemini_history, system_prompt, user_input 165 | 166 | gemini_history, system_prompt, user_input = convert_chat_history(messages) 167 | 168 | model = genai.GenerativeModel( 169 | model_name=self.model_name, 170 | generation_config=generation_config, 171 | system_instruction=system_prompt 172 | ) 173 | 174 | chat_session = model.start_chat( 175 | history=gemini_history 176 | ) 177 | 178 | response = chat_session.send_message(user_input) 179 | return response.text 180 | except Exception as e: 181 | print(f"Error in Gemini response: {str(e)}") 182 | raise 183 | 184 | def _get_anthropic_response(self, prompt: Union[str, List[str]], 185 | max_tokens: int = 4096, 186 | temperature = None, 187 | **kwargs) -> str: 188 | try: 189 | messages = [ 190 | {"role": "user", "content": prompt} 191 | ] if type(prompt) == str else prompt 192 | 193 | response = self.client.messages.create( 194 | model=self.model_name, 195 | messages=messages, 196 | timeout=600.0, # 600 second timeout for individual requests 197 | max_tokens=max_tokens, 198 | temperature=temperature, 199 | **kwargs 200 | ) 201 | return response.content[0].text 202 | except Exception as e: 203 | print(f"Error in Anthropic response: {str(e)}") 204 | raise 205 | 206 | def cleanup(self): 207 | """Clean up resources.""" 208 | if hasattr(self, 'client') and self.client: 209 | if hasattr(self.client, 'close'): 210 | self.client.close() 211 | self.client = None 212 | 213 | def main(): 214 | prompt = "What is the capital of France? Please describe it." 215 | 216 | # try: 217 | handler = ModelHandler(model_name="gpt-4o-mini", backend_type="openai") 218 | # handler = ModelHandler(model_name="gemini-2.0-flash-exp", backend_type="gemini") 219 | # handler = ModelHandler(model_name="claude-3-5-haiku-20241022", backend_type="anthropic") 220 | ret = handler.generate_answer( 221 | prompt, 222 | max_tokens=4000, 223 | temperature=0.7, 224 | ) 225 | print(f"OpenAI Response: {ret}") 226 | 227 | if __name__ == "__main__": 228 | main() 229 | -------------------------------------------------------------------------------- /gsm-infinite/pred/no_rag_pipeline.py: -------------------------------------------------------------------------------- 1 | """NoRAG pipeline implementation.""" 2 | 3 | from typing import Dict, Any, List, Optional, Tuple, Union 4 | # from ..base.pipeline import BasePipeline 5 | from model_handler import ModelHandler 6 | from concurrent.futures import ThreadPoolExecutor 7 | 8 | class NoRAGPipeline(): 9 | """Pipeline that sends entire context to model without retrieval.""" 10 | 11 | def __init__(self, model_handler: ModelHandler, temperature: float = 0.0, max_tokens=512): 12 | """Initialize pipeline with model handler and temperature. 13 | 14 | Args: 15 | model_handler: ModelHandler instance 16 | temperature: Temperature for model sampling (default 0.0 for consistent outputs) 17 | """ 18 | self.model_handler = model_handler 19 | self.temperature = temperature 20 | self.config = {'max_tokens': max_tokens} # Default configuration 21 | 22 | def process_single(self, query: str, contexts: str) -> str: 23 | """Process a single query with given context. 24 | 25 | Args: 26 | query: Question string 27 | context: Context string 28 | 29 | Returns: 30 | Model response string 31 | """ 32 | response = self.get_llm_response(query) 33 | return response 34 | 35 | def _validate_batch_inputs(self, queries: List[str], contexts: List[str]) -> None: 36 | if len(queries) != len(contexts): 37 | raise ValueError("Number of queries must match number of contexts") 38 | 39 | def process_batch_with_executor(self, queries: List[str], contexts: List[str], max_workers=200) -> List[str]: 40 | self._validate_batch_inputs(queries, contexts) 41 | responses = [] 42 | 43 | # Check if the progress bar is enabled and if the number of queries is greater than 2 44 | show_progress_bar = self.config.get('progress_bar', True) and len(queries) > 2 45 | 46 | with ThreadPoolExecutor(max_workers=max_workers) as executor: 47 | futures = [] 48 | for query, context in zip(queries, contexts): 49 | future = executor.submit(self.process_single, query, context) 50 | futures.append(future) 51 | 52 | if show_progress_bar: 53 | from tqdm import tqdm 54 | for future in tqdm(futures, total=len(queries), desc="Processing queries"): 55 | try: 56 | response = future.result() 57 | responses.append(response) 58 | except Exception as e: 59 | responses.append(f"Error: {str(e)}") 60 | else: 61 | for future in futures: 62 | try: 63 | response = future.result() 64 | responses.append(response) 65 | except Exception as e: 66 | responses.append(f"Error: {str(e)}") 67 | 68 | return responses 69 | 70 | def process_batch(self, queries: List[str], contexts: List[str]=None, max_workers=200) -> List[str]: 71 | """Process a batch of queries with corresponding contexts. 72 | 73 | Args: 74 | queries: List of question strings 75 | contexts: List of context strings 76 | 77 | Returns: 78 | List of model responses 79 | """ 80 | if contexts is None: 81 | contexts = queries 82 | self._validate_batch_inputs(queries, contexts) 83 | return self.process_batch_with_executor(queries, contexts, max_workers) 84 | 85 | def get_llm_response( 86 | self, 87 | prompt: str, 88 | max_tokens: Optional[int] = None, 89 | temperature: Optional[float] = None, 90 | **kwargs 91 | ) -> str: 92 | """Get response from LLM with token probabilities. 93 | 94 | Args: 95 | prompt: Input prompt 96 | max_tokens: Maximum number of tokens to generate 97 | temperature: Sampling temperature (0.0 for deterministic output) 98 | **kwargs: Additional arguments to pass to the model handler 99 | 100 | Returns: 101 | Tuple of (response text, token probabilities, tokens) 102 | """ 103 | try: 104 | if temperature is None: 105 | temperature = self.temperature 106 | 107 | if max_tokens is None: 108 | max_tokens = self.config.get('max_tokens', 1000) 109 | 110 | response = self.model_handler.generate_answer( 111 | prompt, 112 | temperature=temperature, 113 | max_tokens=max_tokens, 114 | **kwargs 115 | ) 116 | return response 117 | except Exception as e: 118 | print(f"Error in get_llm_response: {str(e)}") 119 | raise 120 | -------------------------------------------------------------------------------- /gsm-infinite/pred/pred.py: -------------------------------------------------------------------------------- 1 | from model_handler import ModelHandler 2 | from no_rag_pipeline import NoRAGPipeline 3 | 4 | def dump_dict_to_json(data, filename): 5 | import os 6 | import json 7 | """Dumps a Python dictionary to a JSON file, creating the directory if needed. 8 | 9 | Args: 10 | data: The Python dictionary to be dumped. 11 | filename: The name of the JSON file to be created (e.g., "data/output.json"). 12 | """ 13 | try: 14 | # Extract the directory path from the filename 15 | directory = os.path.dirname(filename) 16 | 17 | # Create the directory if it doesn't exist 18 | if directory and not os.path.exists(directory): 19 | os.makedirs(directory) 20 | print(f"Created directory: {directory}") 21 | 22 | with open(filename, 'w') as f: 23 | json.dump(data, f, indent=4) 24 | print(f"Successfully dumped dictionary to {filename}") 25 | except (TypeError, OSError) as e: 26 | print(f"Error dumping dictionary to JSON: {e}") 27 | 28 | 29 | # print(get_payload(100, 2)) 30 | if __name__ == '__main__': 31 | from concurrent.futures import ThreadPoolExecutor 32 | import concurrent.futures 33 | import tqdm 34 | from datasets import Dataset, load_dataset, load_from_disk, concatenate_datasets 35 | import json 36 | 37 | # parser = argparse.ArgumentParser(description="Run benchmark tests and organize results") 38 | # parser.add_argument('--model-name', type=str, help="The name of the model for organizing the folders") 39 | import argparse 40 | parser = argparse.ArgumentParser( 41 | description="Sample with command line arguments." 42 | ) 43 | parser.add_argument('--save-name', type=str, help="Save model name", default="base") 44 | parser.add_argument('--save-dataset', type=str, help="Save dataset name", default="base") 45 | parser.add_argument('--dataset-name', type=str, help="The name of the dataset for organizing the folders") 46 | # Required arguments 47 | parser.add_argument( 48 | '--model-name', 49 | type=str, 50 | required=True, 51 | help='Name of the model to use in api call.' 52 | ) 53 | parser.add_argument( 54 | '--backend-type', 55 | type=str, 56 | default="openai", 57 | help='backend type in [\'openai\', \'anthropic\', \'gemini\']' 58 | ) 59 | parser.add_argument( 60 | '--num-samples', 61 | type=int, 62 | default=1, 63 | help='Number of samples to generate per example.' 64 | ) 65 | 66 | # Optional arguments with default values 67 | parser.add_argument( 68 | '--temperature', 69 | type=float, 70 | default=None, 71 | help='Sampling temperature (default: None).' 72 | ) 73 | 74 | parser.add_argument( 75 | '--max-tokens', 76 | type=int, 77 | default=3072, 78 | help='Maximum number of tokens (default: 3072).' 79 | ) 80 | 81 | parser.add_argument( 82 | '--batch-size', 83 | type=int, 84 | default=200, 85 | help='Batch size (default: 200).' 86 | ) 87 | 88 | parser.add_argument( 89 | '--length', 90 | type=str, 91 | default="0", 92 | help='noise context length' 93 | ) 94 | 95 | parser.add_argument( 96 | '--limit', 97 | type=int, 98 | default=100, 99 | help="max number of examples per op" 100 | ) 101 | 102 | parser.add_argument( 103 | '--filter-config', 104 | type=json.loads, 105 | help='Filter configuration as a JSON string.' 106 | ) 107 | 108 | parser.add_argument( 109 | '--op-range', 110 | type=str, 111 | help='Operating range, can be an integer, or a list of integers separated by commas.' 112 | ) 113 | args = parser.parse_args() 114 | 115 | if args.op_range: 116 | try: 117 | # Attempt to parse as a single integer 118 | args.op_range = [int(args.op_range)] 119 | except ValueError: 120 | # If not a single integer, split by comma and convert to integers 121 | try: 122 | args.op_range = [int(x.strip()) for x in args.op_range.split(',')] 123 | except ValueError: 124 | raise ValueError("Invalid input for --op-range. Please provide an integer or a comma-separated list of integers.") 125 | 126 | 127 | subsets = [f"ops_{x}" for x in args.op_range] 128 | use_full_query = True 129 | 130 | model_handler = ModelHandler( 131 | model_name=args.model_name, 132 | backend_type=args.backend_type 133 | ) 134 | pipeline = NoRAGPipeline( 135 | model_handler = model_handler, 136 | max_tokens=args.max_tokens, 137 | temperature=args.temperature 138 | ) 139 | use_full_query = True 140 | 141 | 142 | # for length in [0, 8000, 16000, 32000, 64000, 128000]: 143 | length = args.length 144 | try: 145 | 146 | # opset = set(args.op_range) 147 | # unprocessed_dataset = unprocessed_dataset.filter(lambda example: example["op"] in opset) 148 | full_dataset = load_dataset(f"{args.dataset_name}_{length}") 149 | filter_config = args.filter_config 150 | if filter_config: 151 | filtered_datasets = [] 152 | for split in subsets: 153 | dataset_split = full_dataset[split] 154 | total_samples = min(args.limit, len(dataset_split)) 155 | filtered_data = [] 156 | for config in filter_config: 157 | num_to_add = int(total_samples * config["percentage"]) 158 | current_filter = {key: value for key, value in config.items() if key not in ["percentage"]} 159 | filtered_subset = dataset_split.filter(lambda example: all(example[key] == value for key, value in current_filter.items())) 160 | filtered_data.extend(filtered_subset.select(range(min(num_to_add, len(filtered_subset))))) 161 | filtered_datasets.append(Dataset.from_list(filtered_data)) 162 | unprocessed_dataset = concatenate_datasets(filtered_datasets) 163 | else: 164 | unprocessed_dataset = concatenate_datasets([full_dataset[split].select(range(min(args.limit, len(full_dataset[split])))) for split in subsets]) 165 | # unprocessed_dataset = concatenate_datasets([full_dataset[split].select(range(min(args.limit, len(full_dataset[split])))) for split in subsets]) 166 | # unprocessed_dataset = load_from_disk( 167 | # f"{args.dataset_name}_{length}", 168 | # # data_dir=f"o 169 | # # split=str(length), 170 | # ) 171 | # with open(args.dataset_name, 'r') as f: 172 | # unprocessed_dataset = json.load(f)[str(length)] 173 | # print(unprocessed_dataset) 174 | 175 | len_dataset = len(unprocessed_dataset) 176 | contexts = [] 177 | queries = [] 178 | for i in range(0, len_dataset): 179 | for _ in range(args.num_samples): 180 | queries.append(unprocessed_dataset[i]['messages']) 181 | 182 | replies = pipeline.process_batch(queries=queries, max_workers=args.batch_size) 183 | processed_examples = [] 184 | 185 | for i in range(0, len_dataset): 186 | newline = unprocessed_dataset[i] 187 | newline["replies"] = replies[i*args.num_samples:(i+1)*args.num_samples] 188 | newline.pop("problem", "") 189 | newline.pop("question", "") 190 | newline.pop("messages", "") 191 | processed_examples.append(newline) 192 | 193 | # print(replies[0]) 194 | import os 195 | dir_name = "datasets" 196 | os.makedirs(dir_name, exist_ok=True) # Create directory if it doesn't exist 197 | 198 | dump_dict_to_json(processed_examples, f"{dir_name}/{args.save_dataset}-{args.save_name}_{length}") 199 | except Exception as e: 200 | print(e) 201 | raise 202 | -------------------------------------------------------------------------------- /gsm-infinite/preprocess.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import re 4 | 5 | # Directory containing result files 6 | RESULTS_DIR = 'results/' 7 | 8 | # Initialize an empty list to store data 9 | data = [] 10 | 11 | # Regex pattern to extract dataset and model from filename 12 | filename_pattern = re.compile(r"^result_(?P[A-Za-z0-9_\.]+)_(?P[A-Za-z0-9\-\.]+)\.txt$") 13 | 14 | # Iterate over all files in the results directory 15 | for filename in os.listdir(RESULTS_DIR): 16 | match = filename_pattern.match(filename) 17 | if match: 18 | dataset = match.group("dataset") 19 | model = match.group("model") 20 | filepath = os.path.join(RESULTS_DIR, filename) 21 | 22 | try: 23 | with open(filepath, 'r') as file: 24 | for line in file: 25 | # Parse the line 26 | parts = line.strip().split(',') 27 | length = int(parts[0].split(':')[1].strip()) 28 | 29 | # Check if this is a line with fine-grained statistics 30 | has_subset_info = any('template:' in part for part in parts) or any('mode:' in part for part in parts) 31 | 32 | # Extract operation number (N) 33 | op_part = parts[1].strip() 34 | if op_part.startswith('op:'): 35 | N = int(op_part.split(':')[1].strip()) 36 | elif op_part.startswith('N:'): 37 | N = int(op_part.split(':')[1].strip()) 38 | else: 39 | raise ValueError(f"Unexpected format for operation: {op_part}") 40 | 41 | # Extract accuracy 42 | acc_part = next(part for part in parts if 'acc:' in part) 43 | acc = float(acc_part.split(':')[1].strip()) 44 | 45 | # Initialize entry with common fields 46 | entry = { 47 | 'dataset': dataset, 48 | 'model': model, 49 | 'length': length, 50 | 'N': N, 51 | 'accuracy': acc, 52 | 'has_subset_info': has_subset_info 53 | } 54 | 55 | # Add subset information if available 56 | if has_subset_info: 57 | for part in parts[2:]: # Skip length and op parts 58 | if 'num_examples:' in part: 59 | entry['num_examples'] = int(part.split(':')[1].strip()) 60 | elif 'template:' in part: 61 | entry['template'] = part.split(':')[1].strip() 62 | elif 'mode:' in part: 63 | entry['mode'] = part.split(':')[1].strip() 64 | 65 | data.append(entry) 66 | 67 | except Exception as e: 68 | print(f"Error processing {filepath}: {e}") 69 | raise 70 | 71 | # Create a DataFrame from the collected data 72 | df = pd.DataFrame(data) 73 | 74 | # Fill NaN values for subset columns 75 | if 'template' in df.columns: 76 | df['template'].fillna('default', inplace=True) 77 | if 'mode' in df.columns: 78 | df['mode'].fillna('default', inplace=True) 79 | if 'num_examples' in df.columns: 80 | df['num_examples'].fillna(-1, inplace=True) 81 | 82 | # Optional: Save the processed data for future use 83 | df.to_csv('results/processed_results.csv', index=False) 84 | 85 | print(f"Processed {len(df)} data points from {len(df['dataset'].unique())} datasets and {len(df['model'].unique())} models.") -------------------------------------------------------------------------------- /gsm-infinite/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Source the configuration file 4 | source config.sh 5 | 6 | # Function to generate a comma-separated string of numbers with a given stride 7 | generate_sequence() { 8 | local start=$1 9 | local end=$2 10 | local stride=$3 11 | numbers=$(seq "$start" "$stride" "$end") 12 | result=$(echo "$numbers" | paste -s -d, -) 13 | echo "$result" 14 | } 15 | 16 | for length in "${lengths[@]}"; do 17 | for suffix in "${dataset_suffixes[@]}"; do 18 | dataset_name="${dataset_base}_${suffix}" 19 | save_dataset="$suffix" 20 | 21 | config_key="${length}_${suffix}" 22 | if [[ -z "${ops_config[$config_key]}" ]]; then 23 | echo "Skipping ${dataset_name} because no ops configuration found for $config_key." 24 | continue # Skip to the next iteration 25 | else 26 | ops_start=$(echo "${ops_config[$config_key]}" | jq -r '.start') 27 | ops_end=$(echo "${ops_config[$config_key]}" | jq -r '.end') 28 | ops_stride=$(echo "${ops_config[$config_key]}" | jq -r '.stride') 29 | ops=$(generate_sequence "$ops_start" "$ops_end" "$ops_stride") 30 | fi 31 | 32 | echo "Running with length: $length, dataset: $dataset_name, save-dataset: $save_dataset" 33 | 34 | if [[ "$run_sampling" == true && ! "$run_symbolic_evaluation" == true && ! "$run_realistic_evaluation" == true ]]; then 35 | # Set API keys for sampling 36 | export OPENAI_BASE_URL=$SAMPLER_OPENAI_BASE_URL 37 | export OPENAI_API_KEY=$SAMPLER_OPENAI_API_KEY 38 | 39 | # Set temperature and limit based on suffix 40 | if [[ "$suffix" == "symbolic" ]]; then 41 | temperature=$temperature_symbolic 42 | limit=$limit_symbolic 43 | python3 pred/pred.py \ 44 | --dataset-name "$dataset_name" \ 45 | --model-name "$model_name" \ 46 | --save-dataset "$save_dataset" \ 47 | --save-name "$save_name" \ 48 | --backend-type "$backend_type" \ 49 | --num-samples "$num_samples" \ 50 | --temperature "$temperature" \ 51 | --max-tokens "$max_tokens" \ 52 | --length "$length" \ 53 | --op-range "$ops" \ 54 | --batch-size "$batch_size" \ 55 | --limit "$limit" 56 | else 57 | temperature=$temperature_realistic 58 | limit=$limit_realistic 59 | # filter_arg=$(echo "--filter-config \"$filter_config\"") # Corrected line 60 | python3 pred/pred.py \ 61 | --dataset-name "$dataset_name" \ 62 | --model-name "$model_name" \ 63 | --save-dataset "$save_dataset" \ 64 | --save-name "$save_name" \ 65 | --backend-type "$backend_type" \ 66 | --num-samples "$num_samples" \ 67 | --temperature "$temperature" \ 68 | --max-tokens "$max_tokens" \ 69 | --length "$length" \ 70 | --op-range "$ops" \ 71 | --batch-size "$batch_size" \ 72 | --limit "$limit" \ 73 | --filter-config "$filter_config" 74 | fi 75 | 76 | fi 77 | 78 | if [[ "$run_evaluation" == true || "$run_symbolic_evaluation" == true ]] && [[ "$suffix" == "symbolic" ]]; then 79 | # Set API keys for evaluation 80 | export OPENAI_BASE_URL=$EVAL_OPENAI_BASE_URL 81 | export OPENAI_API_KEY=$EVAL_OPENAI_API_KEY 82 | 83 | python3 pred/eval_symbolic.py \ 84 | --save-name "$save_name" \ 85 | --num-samples "$num_samples" \ 86 | --length "$length" 87 | fi 88 | 89 | 90 | if [[ "$run_evaluation" == true || "$run_realistic_evaluation" == true ]] && [[ "$suffix" != "symbolic" ]]; then 91 | python3 pred/eval_realistic.py \ 92 | --save-dataset "$save_dataset" \ 93 | --save-name "$save_name" \ 94 | --num-samples "$num_samples" \ 95 | --length "$length" \ 96 | --filter-config "$filter_config" # Add filter argument only for medium/hard 97 | fi 98 | done 99 | done -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=61.0", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | 8 | [project] 9 | name = "gsm-infinite" # Choose a suitable name 10 | version = "0.1.0" # Start with an initial version 11 | description = "GSM-Infinite Benchmark for LLMs" 12 | readme = "README.md" 13 | requires-python = ">=3.8" # Specify minimum Python version 14 | license = {file = "LICENSE"} 15 | authors = [ 16 | {name = "Yang Zhou", email = "yangzho6@andrew.cmu.edu"}, 17 | {name = "Hongyi Liu", email = "liuhongy21@gmail.com"}, # Update with your information 18 | {name = "Zhuoming Chen", email = "zhuominc@andrew.cmu.edu"}, 19 | {name = "Yuandong Tian", email = "yuandong@meta.com"}, 20 | {name = "Beidi Chen", email = "beidic@andrew.cmu.edu"}, 21 | ] 22 | keywords = ["LLM", "benchmark", "evaluation"] 23 | classifiers = [ 24 | "Programming Language :: Python :: 3", 25 | "Operating System :: OS Independent", 26 | ] 27 | 28 | dependencies = [ 29 | "openai>=1.0.0", 30 | "nltk>=3.8.1", 31 | "numpy>=1.24.0", 32 | "tenacity>=8.2.3", 33 | "datasets>=2.14.0", 34 | "wonderwords>=2.2.0", 35 | "tqdm>=4.65.0", 36 | "pyyaml>=6.0.1", 37 | "html2text", 38 | "beautifulsoup4", 39 | "spacy", 40 | "tiktoken", 41 | "anthropic", 42 | "google-generativeai", 43 | "termcolor", 44 | "sympy", 45 | "networkx", 46 | "matplotlib", 47 | "pydot", 48 | "argparse" 49 | ] 50 | 51 | [project.urls] 52 | "Homepage" = "https://infini-ai-lab.github.io/gsm_infinite" # Update with your repo URL 53 | "Bug Tracker" = "https://github.com/Infini-AI-Lab/gsm_infinite/issues" # Update with your issue tracker 54 | 55 | [tool.setuptools] 56 | packages = ["gsm-infinite"] # This assumes your main package is named gsm_infinite -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai>=1.0.0 2 | nltk>=3.8.1 3 | numpy>=1.24.0 4 | tenacity>=8.2.3 5 | datasets>=2.14.0 6 | wonderwords>=2.2.0 7 | tqdm>=4.65.0 8 | pyyaml>=6.0.1 9 | html2text 10 | beautifulsoup4 11 | spacy 12 | tiktoken 13 | anthropic 14 | google-generativeai 15 | termcolor 16 | sympy 17 | networkx 18 | matplotlib 19 | pydot 20 | argparse -------------------------------------------------------------------------------- /static/Triforce.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/Triforce.ttf -------------------------------------------------------------------------------- /static/css/bulma-carousel.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10} -------------------------------------------------------------------------------- /static/css/bulma-slider.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}input[type=range].slider{-webkit-appearance:none;-moz-appearance:none;appearance:none;margin:1rem 0;background:0 0;touch-action:none}input[type=range].slider.is-fullwidth{display:block;width:100%}input[type=range].slider:focus{outline:0}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{width:100%}input[type=range].slider:not([orient=vertical])::-moz-range-track{width:100%}input[type=range].slider:not([orient=vertical])::-ms-track{width:100%}input[type=range].slider:not([orient=vertical]).has-output+output,input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{width:3rem;background:#4a4a4a;border-radius:4px;padding:.4rem .8rem;font-size:.75rem;line-height:.75rem;text-align:center;text-overflow:ellipsis;white-space:nowrap;color:#fff;overflow:hidden;pointer-events:none;z-index:200}input[type=range].slider:not([orient=vertical]).has-output-tooltip:disabled+output,input[type=range].slider:not([orient=vertical]).has-output:disabled+output{opacity:.5}input[type=range].slider:not([orient=vertical]).has-output{display:inline-block;vertical-align:middle;width:calc(100% - (4.2rem))}input[type=range].slider:not([orient=vertical]).has-output+output{display:inline-block;margin-left:.75rem;vertical-align:middle}input[type=range].slider:not([orient=vertical]).has-output-tooltip{display:block}input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{position:absolute;left:0;top:-.1rem}input[type=range].slider[orient=vertical]{-webkit-appearance:slider-vertical;-moz-appearance:slider-vertical;appearance:slider-vertical;-webkit-writing-mode:bt-lr;-ms-writing-mode:bt-lr;writing-mode:bt-lr}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{height:100%}input[type=range].slider[orient=vertical]::-moz-range-track{height:100%}input[type=range].slider[orient=vertical]::-ms-track{height:100%}input[type=range].slider::-webkit-slider-runnable-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-moz-range-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-fill-lower{background:#dbdbdb;border-radius:4px}input[type=range].slider::-ms-fill-upper{background:#dbdbdb;border-radius:4px}input[type=range].slider::-webkit-slider-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-moz-range-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-ms-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none}input[type=range].slider.is-circle::-webkit-slider-thumb{border-radius:290486px}input[type=range].slider.is-circle::-moz-range-thumb{border-radius:290486px}input[type=range].slider.is-circle::-ms-thumb{border-radius:290486px}input[type=range].slider:active::-webkit-slider-thumb{-webkit-transform:scale(1.25);transform:scale(1.25)}input[type=range].slider:active::-moz-range-thumb{transform:scale(1.25)}input[type=range].slider:active::-ms-thumb{transform:scale(1.25)}input[type=range].slider:disabled{opacity:.5;cursor:not-allowed}input[type=range].slider:disabled::-webkit-slider-thumb{cursor:not-allowed;-webkit-transform:scale(1);transform:scale(1)}input[type=range].slider:disabled::-moz-range-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:disabled::-ms-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:not([orient=vertical]){min-height:calc((1rem + 2px) * 1.25)}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-moz-range-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-ms-track{height:.5rem}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{width:.5rem}input[type=range].slider[orient=vertical]::-moz-range-track{width:.5rem}input[type=range].slider[orient=vertical]::-ms-track{width:.5rem}input[type=range].slider::-webkit-slider-thumb{height:1rem;width:1rem}input[type=range].slider::-moz-range-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{margin-top:0}input[type=range].slider::-webkit-slider-thumb{margin-top:-.25rem}input[type=range].slider[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.25rem}input[type=range].slider.is-small:not([orient=vertical]){min-height:calc((.75rem + 2px) * 1.25)}input[type=range].slider.is-small:not([orient=vertical])::-webkit-slider-runnable-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-moz-range-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-ms-track{height:.375rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-runnable-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-moz-range-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-ms-track{width:.375rem}input[type=range].slider.is-small::-webkit-slider-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-moz-range-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{margin-top:0}input[type=range].slider.is-small::-webkit-slider-thumb{margin-top:-.1875rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.1875rem}input[type=range].slider.is-medium:not([orient=vertical]){min-height:calc((1.25rem + 2px) * 1.25)}input[type=range].slider.is-medium:not([orient=vertical])::-webkit-slider-runnable-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-moz-range-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-ms-track{height:.625rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-runnable-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-moz-range-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-ms-track{width:.625rem}input[type=range].slider.is-medium::-webkit-slider-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-moz-range-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{margin-top:0}input[type=range].slider.is-medium::-webkit-slider-thumb{margin-top:-.3125rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.3125rem}input[type=range].slider.is-large:not([orient=vertical]){min-height:calc((1.5rem + 2px) * 1.25)}input[type=range].slider.is-large:not([orient=vertical])::-webkit-slider-runnable-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-moz-range-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-ms-track{height:.75rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-runnable-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-moz-range-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-ms-track{width:.75rem}input[type=range].slider.is-large::-webkit-slider-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-moz-range-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{margin-top:0}input[type=range].slider.is-large::-webkit-slider-thumb{margin-top:-.375rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.375rem}input[type=range].slider.is-white::-moz-range-track{background:#fff!important}input[type=range].slider.is-white::-webkit-slider-runnable-track{background:#fff!important}input[type=range].slider.is-white::-ms-track{background:#fff!important}input[type=range].slider.is-white::-ms-fill-lower{background:#fff}input[type=range].slider.is-white::-ms-fill-upper{background:#fff}input[type=range].slider.is-white .has-output-tooltip+output,input[type=range].slider.is-white.has-output+output{background-color:#fff;color:#0a0a0a}input[type=range].slider.is-black::-moz-range-track{background:#0a0a0a!important}input[type=range].slider.is-black::-webkit-slider-runnable-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-fill-lower{background:#0a0a0a}input[type=range].slider.is-black::-ms-fill-upper{background:#0a0a0a}input[type=range].slider.is-black .has-output-tooltip+output,input[type=range].slider.is-black.has-output+output{background-color:#0a0a0a;color:#fff}input[type=range].slider.is-light::-moz-range-track{background:#f5f5f5!important}input[type=range].slider.is-light::-webkit-slider-runnable-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-fill-lower{background:#f5f5f5}input[type=range].slider.is-light::-ms-fill-upper{background:#f5f5f5}input[type=range].slider.is-light .has-output-tooltip+output,input[type=range].slider.is-light.has-output+output{background-color:#f5f5f5;color:#363636}input[type=range].slider.is-dark::-moz-range-track{background:#363636!important}input[type=range].slider.is-dark::-webkit-slider-runnable-track{background:#363636!important}input[type=range].slider.is-dark::-ms-track{background:#363636!important}input[type=range].slider.is-dark::-ms-fill-lower{background:#363636}input[type=range].slider.is-dark::-ms-fill-upper{background:#363636}input[type=range].slider.is-dark .has-output-tooltip+output,input[type=range].slider.is-dark.has-output+output{background-color:#363636;color:#f5f5f5}input[type=range].slider.is-primary::-moz-range-track{background:#00d1b2!important}input[type=range].slider.is-primary::-webkit-slider-runnable-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-fill-lower{background:#00d1b2}input[type=range].slider.is-primary::-ms-fill-upper{background:#00d1b2}input[type=range].slider.is-primary .has-output-tooltip+output,input[type=range].slider.is-primary.has-output+output{background-color:#00d1b2;color:#fff}input[type=range].slider.is-link::-moz-range-track{background:#3273dc!important}input[type=range].slider.is-link::-webkit-slider-runnable-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-fill-lower{background:#3273dc}input[type=range].slider.is-link::-ms-fill-upper{background:#3273dc}input[type=range].slider.is-link .has-output-tooltip+output,input[type=range].slider.is-link.has-output+output{background-color:#3273dc;color:#fff}input[type=range].slider.is-info::-moz-range-track{background:#209cee!important}input[type=range].slider.is-info::-webkit-slider-runnable-track{background:#209cee!important}input[type=range].slider.is-info::-ms-track{background:#209cee!important}input[type=range].slider.is-info::-ms-fill-lower{background:#209cee}input[type=range].slider.is-info::-ms-fill-upper{background:#209cee}input[type=range].slider.is-info .has-output-tooltip+output,input[type=range].slider.is-info.has-output+output{background-color:#209cee;color:#fff}input[type=range].slider.is-success::-moz-range-track{background:#23d160!important}input[type=range].slider.is-success::-webkit-slider-runnable-track{background:#23d160!important}input[type=range].slider.is-success::-ms-track{background:#23d160!important}input[type=range].slider.is-success::-ms-fill-lower{background:#23d160}input[type=range].slider.is-success::-ms-fill-upper{background:#23d160}input[type=range].slider.is-success .has-output-tooltip+output,input[type=range].slider.is-success.has-output+output{background-color:#23d160;color:#fff}input[type=range].slider.is-warning::-moz-range-track{background:#ffdd57!important}input[type=range].slider.is-warning::-webkit-slider-runnable-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-fill-lower{background:#ffdd57}input[type=range].slider.is-warning::-ms-fill-upper{background:#ffdd57}input[type=range].slider.is-warning .has-output-tooltip+output,input[type=range].slider.is-warning.has-output+output{background-color:#ffdd57;color:rgba(0,0,0,.7)}input[type=range].slider.is-danger::-moz-range-track{background:#ff3860!important}input[type=range].slider.is-danger::-webkit-slider-runnable-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-fill-lower{background:#ff3860}input[type=range].slider.is-danger::-ms-fill-upper{background:#ff3860}input[type=range].slider.is-danger .has-output-tooltip+output,input[type=range].slider.is-danger.has-output+output{background-color:#ff3860;color:#fff} -------------------------------------------------------------------------------- /static/css/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Noto Sans', sans-serif; 3 | } 4 | 5 | 6 | .footer .icon-link { 7 | font-size: 25px; 8 | color: #000; 9 | } 10 | 11 | .link-block a { 12 | margin-top: 5px; 13 | margin-bottom: 5px; 14 | } 15 | 16 | .dnerf { 17 | font-variant: small-caps; 18 | } 19 | 20 | 21 | .teaser .hero-body { 22 | padding-top: 0; 23 | padding-bottom: 3rem; 24 | } 25 | 26 | .teaser { 27 | font-family: 'Google Sans', sans-serif; 28 | } 29 | 30 | 31 | .publication-title { 32 | } 33 | 34 | .publication-banner { 35 | max-height: parent; 36 | 37 | } 38 | 39 | .publication-banner video { 40 | position: relative; 41 | left: auto; 42 | top: auto; 43 | transform: none; 44 | object-fit: fit; 45 | } 46 | 47 | .publication-header .hero-body { 48 | } 49 | 50 | .publication-title { 51 | font-family: 'Google Sans', sans-serif; 52 | } 53 | 54 | .publication-authors { 55 | font-family: 'Google Sans', sans-serif; 56 | } 57 | 58 | .publication-venue { 59 | color: #555; 60 | width: fit-content; 61 | font-weight: bold; 62 | } 63 | 64 | .publication-awards { 65 | color: #ff3860; 66 | width: fit-content; 67 | font-weight: bolder; 68 | } 69 | 70 | .publication-authors { 71 | } 72 | 73 | .publication-authors a { 74 | color: hsl(204, 86%, 53%) !important; 75 | } 76 | 77 | .publication-authors a:hover { 78 | text-decoration: underline; 79 | } 80 | 81 | .author-block { 82 | display: inline-block; 83 | } 84 | 85 | .publication-banner img { 86 | } 87 | 88 | .publication-authors { 89 | /*color: #4286f4;*/ 90 | } 91 | 92 | .publication-video { 93 | position: relative; 94 | width: 100%; 95 | height: 0; 96 | padding-bottom: 56.25%; 97 | 98 | overflow: hidden; 99 | border-radius: 10px !important; 100 | } 101 | 102 | .publication-video iframe { 103 | position: absolute; 104 | top: 0; 105 | left: 0; 106 | width: 100%; 107 | height: 100%; 108 | } 109 | 110 | .publication-body img { 111 | } 112 | 113 | .results-carousel { 114 | overflow: hidden; 115 | } 116 | 117 | .results-carousel .item { 118 | margin: 5px; 119 | overflow: hidden; 120 | padding: 20px; 121 | font-size: 0; 122 | } 123 | 124 | .results-carousel video { 125 | margin: 0; 126 | } 127 | 128 | .slider-pagination .slider-page { 129 | background: #000000; 130 | } 131 | 132 | .eql-cntrb { 133 | font-size: smaller; 134 | } 135 | 136 | .sirius-symbol { 137 | /* margin-right: 10px; */ 138 | display: inline-block; 139 | vertical-align: middle; 140 | width: 50px; 141 | height: 50px; 142 | } 143 | 144 | .publication-title { 145 | display: inline-block; 146 | vertical-align: middle; 147 | } 148 | 149 | .footnote { 150 | font-size: 0.6em; /* Makes the font smaller than before */ 151 | color: #666; 152 | margin-top: 1em; 153 | padding-top: 0.5em; 154 | padding-left: 1em; 155 | position: relative; 156 | /* border-top: 1px solid #ccc; */ 157 | line-height: 1.4; /* Improves readability for small text */ 158 | } 159 | 160 | .footnote .asterisk { 161 | position: absolute; 162 | left: 0; 163 | top: 0.5em; 164 | margin-bottom: 5px; 165 | } 166 | 167 | .footnote .asteriss { 168 | position: absolute; 169 | left: 0; 170 | top: 0.5em; 171 | margin-bottom: 5px; 172 | border-top: 1px solid #ccc; 173 | } 174 | 175 | 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- /static/images/DALL·E 2024-09-02 20.52.33 - A cute but more accurate representation of the International Space Station (ISS) floating in space. The ISS should be depicted with recognizable featu.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/DALL·E 2024-09-02 20.52.33 - A cute but more accurate representation of the International Space Station (ISS) floating in space. The ISS should be depicted with recognizable featu.webp -------------------------------------------------------------------------------- /static/images/Fast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/Fast.png -------------------------------------------------------------------------------- /static/images/GPU.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/GPU.png -------------------------------------------------------------------------------- /static/images/GSMQQ.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/GSMQQ.gif -------------------------------------------------------------------------------- /static/images/GSMmouse.svg: -------------------------------------------------------------------------------- 1 | 2 | 17 | 20 | 30 | 31 | 32 | 33 | 35 | 36 | 37 | 39 | 41 | 42 | 43 | 45 | 47 | 48 | 49 | 51 | 53 | 54 | 56 | 58 | 59 | 61 | 63 | 64 | 66 | 68 | 70 | 71 | 72 | 74 | 76 | 78 | 80 | 81 | 87 | 89 | 90 | 92 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /static/images/GSMmouse2.svg: -------------------------------------------------------------------------------- 1 | 2 | 17 | 20 | 30 | 31 | 32 | 33 | 35 | 36 | 37 | 39 | 41 | 42 | 43 | 45 | 47 | 48 | 49 | 51 | 53 | 54 | 56 | 58 | 59 | 61 | 63 | 64 | 66 | 68 | 70 | 71 | 72 | 74 | 76 | 78 | 80 | 81 | 82 | 84 | 86 | 88 | 89 | 95 | 97 | 98 | 100 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /static/images/Hierarchical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/Hierarchical.png -------------------------------------------------------------------------------- /static/images/Hierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/Hierarchy.png -------------------------------------------------------------------------------- /static/images/Idea.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/Idea.png -------------------------------------------------------------------------------- /static/images/InternationalSpaceStation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/InternationalSpaceStation.png -------------------------------------------------------------------------------- /static/images/Llama.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/Llama.png -------------------------------------------------------------------------------- /static/images/Observation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/Observation.png -------------------------------------------------------------------------------- /static/images/Simplerspace.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/Simplerspace.webp -------------------------------------------------------------------------------- /static/images/Sys_readme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/Sys_readme.png -------------------------------------------------------------------------------- /static/images/Telescope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/Telescope.png -------------------------------------------------------------------------------- /static/images/TriForce.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/TriForce.gif -------------------------------------------------------------------------------- /static/images/computationalgraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/computationalgraph.png -------------------------------------------------------------------------------- /static/images/computationgraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/computationgraph.png -------------------------------------------------------------------------------- /static/images/computgraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/computgraph.png -------------------------------------------------------------------------------- /static/images/cosmonautllama.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/cosmonautllama.png -------------------------------------------------------------------------------- /static/images/cra.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/cra.png -------------------------------------------------------------------------------- /static/images/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/demo.png -------------------------------------------------------------------------------- /static/images/evaluationfronfigure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/evaluationfronfigure.png -------------------------------------------------------------------------------- /static/images/facinfinity.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/facinfinity.webp -------------------------------------------------------------------------------- /static/images/forwardre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/forwardre.png -------------------------------------------------------------------------------- /static/images/introductin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/introductin.png -------------------------------------------------------------------------------- /static/images/introduction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/introduction.pdf -------------------------------------------------------------------------------- /static/images/introduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/introduction.png -------------------------------------------------------------------------------- /static/images/iss.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/iss.webp -------------------------------------------------------------------------------- /static/images/lim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/lim.png -------------------------------------------------------------------------------- /static/images/locality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/locality.png -------------------------------------------------------------------------------- /static/images/methodsillustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/methodsillustration.png -------------------------------------------------------------------------------- /static/images/miscellaneous.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/miscellaneous.png -------------------------------------------------------------------------------- /static/images/mouseeatingcookie.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/mouseeatingcookie.png -------------------------------------------------------------------------------- /static/images/mousehuggingface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/mousehuggingface.png -------------------------------------------------------------------------------- /static/images/mousetelescope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/mousetelescope.png -------------------------------------------------------------------------------- /static/images/probwebsite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/probwebsite.png -------------------------------------------------------------------------------- /static/images/rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/rag.png -------------------------------------------------------------------------------- /static/images/rag22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/rag22.png -------------------------------------------------------------------------------- /static/images/repeatedsampling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/repeatedsampling.png -------------------------------------------------------------------------------- /static/images/retrieval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/retrieval.png -------------------------------------------------------------------------------- /static/images/rockets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/rockets.png -------------------------------------------------------------------------------- /static/images/rockets.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/rockets.webp -------------------------------------------------------------------------------- /static/images/simpleiss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/simpleiss.png -------------------------------------------------------------------------------- /static/images/sirius-symbol.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /static/images/siriuslogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/siriuslogo.png -------------------------------------------------------------------------------- /static/images/siriuslogo.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/siriuslogo.webp -------------------------------------------------------------------------------- /static/images/siriusmoti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/siriusmoti.png -------------------------------------------------------------------------------- /static/images/sparsity_top4096.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/sparsity_top4096.png -------------------------------------------------------------------------------- /static/images/sys.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/sys.png -------------------------------------------------------------------------------- /static/images/triforce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/triforce.png -------------------------------------------------------------------------------- /static/images/twomooone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/twomooone.png -------------------------------------------------------------------------------- /static/images/twostars.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/twostars.webp -------------------------------------------------------------------------------- /static/images/twostarts.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/images/twostarts.webp -------------------------------------------------------------------------------- /static/js/bulma-slider.js: -------------------------------------------------------------------------------- 1 | (function webpackUniversalModuleDefinition(root, factory) { 2 | if(typeof exports === 'object' && typeof module === 'object') 3 | module.exports = factory(); 4 | else if(typeof define === 'function' && define.amd) 5 | define([], factory); 6 | else if(typeof exports === 'object') 7 | exports["bulmaSlider"] = factory(); 8 | else 9 | root["bulmaSlider"] = factory(); 10 | })(typeof self !== 'undefined' ? self : this, function() { 11 | return /******/ (function(modules) { // webpackBootstrap 12 | /******/ // The module cache 13 | /******/ var installedModules = {}; 14 | /******/ 15 | /******/ // The require function 16 | /******/ function __webpack_require__(moduleId) { 17 | /******/ 18 | /******/ // Check if module is in cache 19 | /******/ if(installedModules[moduleId]) { 20 | /******/ return installedModules[moduleId].exports; 21 | /******/ } 22 | /******/ // Create a new module (and put it into the cache) 23 | /******/ var module = installedModules[moduleId] = { 24 | /******/ i: moduleId, 25 | /******/ l: false, 26 | /******/ exports: {} 27 | /******/ }; 28 | /******/ 29 | /******/ // Execute the module function 30 | /******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__); 31 | /******/ 32 | /******/ // Flag the module as loaded 33 | /******/ module.l = true; 34 | /******/ 35 | /******/ // Return the exports of the module 36 | /******/ return module.exports; 37 | /******/ } 38 | /******/ 39 | /******/ 40 | /******/ // expose the modules object (__webpack_modules__) 41 | /******/ __webpack_require__.m = modules; 42 | /******/ 43 | /******/ // expose the module cache 44 | /******/ __webpack_require__.c = installedModules; 45 | /******/ 46 | /******/ // define getter function for harmony exports 47 | /******/ __webpack_require__.d = function(exports, name, getter) { 48 | /******/ if(!__webpack_require__.o(exports, name)) { 49 | /******/ Object.defineProperty(exports, name, { 50 | /******/ configurable: false, 51 | /******/ enumerable: true, 52 | /******/ get: getter 53 | /******/ }); 54 | /******/ } 55 | /******/ }; 56 | /******/ 57 | /******/ // getDefaultExport function for compatibility with non-harmony modules 58 | /******/ __webpack_require__.n = function(module) { 59 | /******/ var getter = module && module.__esModule ? 60 | /******/ function getDefault() { return module['default']; } : 61 | /******/ function getModuleExports() { return module; }; 62 | /******/ __webpack_require__.d(getter, 'a', getter); 63 | /******/ return getter; 64 | /******/ }; 65 | /******/ 66 | /******/ // Object.prototype.hasOwnProperty.call 67 | /******/ __webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); }; 68 | /******/ 69 | /******/ // __webpack_public_path__ 70 | /******/ __webpack_require__.p = ""; 71 | /******/ 72 | /******/ // Load entry module and return exports 73 | /******/ return __webpack_require__(__webpack_require__.s = 0); 74 | /******/ }) 75 | /************************************************************************/ 76 | /******/ ([ 77 | /* 0 */ 78 | /***/ (function(module, __webpack_exports__, __webpack_require__) { 79 | 80 | "use strict"; 81 | Object.defineProperty(__webpack_exports__, "__esModule", { value: true }); 82 | /* harmony export (binding) */ __webpack_require__.d(__webpack_exports__, "isString", function() { return isString; }); 83 | /* harmony import */ var __WEBPACK_IMPORTED_MODULE_0__events__ = __webpack_require__(1); 84 | var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; 85 | 86 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); 87 | 88 | var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; 89 | 90 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } 91 | 92 | function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; } 93 | 94 | function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; } 95 | 96 | 97 | 98 | var isString = function isString(unknown) { 99 | return typeof unknown === 'string' || !!unknown && (typeof unknown === 'undefined' ? 'undefined' : _typeof(unknown)) === 'object' && Object.prototype.toString.call(unknown) === '[object String]'; 100 | }; 101 | 102 | var bulmaSlider = function (_EventEmitter) { 103 | _inherits(bulmaSlider, _EventEmitter); 104 | 105 | function bulmaSlider(selector) { 106 | var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; 107 | 108 | _classCallCheck(this, bulmaSlider); 109 | 110 | var _this = _possibleConstructorReturn(this, (bulmaSlider.__proto__ || Object.getPrototypeOf(bulmaSlider)).call(this)); 111 | 112 | _this.element = typeof selector === 'string' ? document.querySelector(selector) : selector; 113 | // An invalid selector or non-DOM node has been provided. 114 | if (!_this.element) { 115 | throw new Error('An invalid selector or non-DOM node has been provided.'); 116 | } 117 | 118 | _this._clickEvents = ['click']; 119 | /// Set default options and merge with instance defined 120 | _this.options = _extends({}, options); 121 | 122 | _this.onSliderInput = _this.onSliderInput.bind(_this); 123 | 124 | _this.init(); 125 | return _this; 126 | } 127 | 128 | /** 129 | * Initiate all DOM element containing selector 130 | * @method 131 | * @return {Array} Array of all slider instances 132 | */ 133 | 134 | 135 | _createClass(bulmaSlider, [{ 136 | key: 'init', 137 | 138 | 139 | /** 140 | * Initiate plugin 141 | * @method init 142 | * @return {void} 143 | */ 144 | value: function init() { 145 | this._id = 'bulmaSlider' + new Date().getTime() + Math.floor(Math.random() * Math.floor(9999)); 146 | this.output = this._findOutputForSlider(); 147 | 148 | this._bindEvents(); 149 | 150 | if (this.output) { 151 | if (this.element.classList.contains('has-output-tooltip')) { 152 | // Get new output position 153 | var newPosition = this._getSliderOutputPosition(); 154 | 155 | // Set output position 156 | this.output.style['left'] = newPosition.position; 157 | } 158 | } 159 | 160 | this.emit('bulmaslider:ready', this.element.value); 161 | } 162 | }, { 163 | key: '_findOutputForSlider', 164 | value: function _findOutputForSlider() { 165 | var _this2 = this; 166 | 167 | var result = null; 168 | var outputs = document.getElementsByTagName('output') || []; 169 | 170 | Array.from(outputs).forEach(function (output) { 171 | if (output.htmlFor == _this2.element.getAttribute('id')) { 172 | result = output; 173 | return true; 174 | } 175 | }); 176 | return result; 177 | } 178 | }, { 179 | key: '_getSliderOutputPosition', 180 | value: function _getSliderOutputPosition() { 181 | // Update output position 182 | var newPlace, minValue; 183 | 184 | var style = window.getComputedStyle(this.element, null); 185 | // Measure width of range input 186 | var sliderWidth = parseInt(style.getPropertyValue('width'), 10); 187 | 188 | // Figure out placement percentage between left and right of input 189 | if (!this.element.getAttribute('min')) { 190 | minValue = 0; 191 | } else { 192 | minValue = this.element.getAttribute('min'); 193 | } 194 | var newPoint = (this.element.value - minValue) / (this.element.getAttribute('max') - minValue); 195 | 196 | // Prevent bubble from going beyond left or right (unsupported browsers) 197 | if (newPoint < 0) { 198 | newPlace = 0; 199 | } else if (newPoint > 1) { 200 | newPlace = sliderWidth; 201 | } else { 202 | newPlace = sliderWidth * newPoint; 203 | } 204 | 205 | return { 206 | 'position': newPlace + 'px' 207 | }; 208 | } 209 | 210 | /** 211 | * Bind all events 212 | * @method _bindEvents 213 | * @return {void} 214 | */ 215 | 216 | }, { 217 | key: '_bindEvents', 218 | value: function _bindEvents() { 219 | if (this.output) { 220 | // Add event listener to update output when slider value change 221 | this.element.addEventListener('input', this.onSliderInput, false); 222 | } 223 | } 224 | }, { 225 | key: 'onSliderInput', 226 | value: function onSliderInput(e) { 227 | e.preventDefault(); 228 | 229 | if (this.element.classList.contains('has-output-tooltip')) { 230 | // Get new output position 231 | var newPosition = this._getSliderOutputPosition(); 232 | 233 | // Set output position 234 | this.output.style['left'] = newPosition.position; 235 | } 236 | 237 | // Check for prefix and postfix 238 | var prefix = this.output.hasAttribute('data-prefix') ? this.output.getAttribute('data-prefix') : ''; 239 | var postfix = this.output.hasAttribute('data-postfix') ? this.output.getAttribute('data-postfix') : ''; 240 | 241 | // Update output with slider value 242 | this.output.value = prefix + this.element.value + postfix; 243 | 244 | this.emit('bulmaslider:ready', this.element.value); 245 | } 246 | }], [{ 247 | key: 'attach', 248 | value: function attach() { 249 | var _this3 = this; 250 | 251 | var selector = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 'input[type="range"].slider'; 252 | var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; 253 | 254 | var instances = new Array(); 255 | 256 | var elements = isString(selector) ? document.querySelectorAll(selector) : Array.isArray(selector) ? selector : [selector]; 257 | elements.forEach(function (element) { 258 | if (typeof element[_this3.constructor.name] === 'undefined') { 259 | var instance = new bulmaSlider(element, options); 260 | element[_this3.constructor.name] = instance; 261 | instances.push(instance); 262 | } else { 263 | instances.push(element[_this3.constructor.name]); 264 | } 265 | }); 266 | 267 | return instances; 268 | } 269 | }]); 270 | 271 | return bulmaSlider; 272 | }(__WEBPACK_IMPORTED_MODULE_0__events__["a" /* default */]); 273 | 274 | /* harmony default export */ __webpack_exports__["default"] = (bulmaSlider); 275 | 276 | /***/ }), 277 | /* 1 */ 278 | /***/ (function(module, __webpack_exports__, __webpack_require__) { 279 | 280 | "use strict"; 281 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); 282 | 283 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } 284 | 285 | var EventEmitter = function () { 286 | function EventEmitter() { 287 | var listeners = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : []; 288 | 289 | _classCallCheck(this, EventEmitter); 290 | 291 | this._listeners = new Map(listeners); 292 | this._middlewares = new Map(); 293 | } 294 | 295 | _createClass(EventEmitter, [{ 296 | key: "listenerCount", 297 | value: function listenerCount(eventName) { 298 | if (!this._listeners.has(eventName)) { 299 | return 0; 300 | } 301 | 302 | var eventListeners = this._listeners.get(eventName); 303 | return eventListeners.length; 304 | } 305 | }, { 306 | key: "removeListeners", 307 | value: function removeListeners() { 308 | var _this = this; 309 | 310 | var eventName = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; 311 | var middleware = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false; 312 | 313 | if (eventName !== null) { 314 | if (Array.isArray(eventName)) { 315 | name.forEach(function (e) { 316 | return _this.removeListeners(e, middleware); 317 | }); 318 | } else { 319 | this._listeners.delete(eventName); 320 | 321 | if (middleware) { 322 | this.removeMiddleware(eventName); 323 | } 324 | } 325 | } else { 326 | this._listeners = new Map(); 327 | } 328 | } 329 | }, { 330 | key: "middleware", 331 | value: function middleware(eventName, fn) { 332 | var _this2 = this; 333 | 334 | if (Array.isArray(eventName)) { 335 | name.forEach(function (e) { 336 | return _this2.middleware(e, fn); 337 | }); 338 | } else { 339 | if (!Array.isArray(this._middlewares.get(eventName))) { 340 | this._middlewares.set(eventName, []); 341 | } 342 | 343 | this._middlewares.get(eventName).push(fn); 344 | } 345 | } 346 | }, { 347 | key: "removeMiddleware", 348 | value: function removeMiddleware() { 349 | var _this3 = this; 350 | 351 | var eventName = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; 352 | 353 | if (eventName !== null) { 354 | if (Array.isArray(eventName)) { 355 | name.forEach(function (e) { 356 | return _this3.removeMiddleware(e); 357 | }); 358 | } else { 359 | this._middlewares.delete(eventName); 360 | } 361 | } else { 362 | this._middlewares = new Map(); 363 | } 364 | } 365 | }, { 366 | key: "on", 367 | value: function on(name, callback) { 368 | var _this4 = this; 369 | 370 | var once = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false; 371 | 372 | if (Array.isArray(name)) { 373 | name.forEach(function (e) { 374 | return _this4.on(e, callback); 375 | }); 376 | } else { 377 | name = name.toString(); 378 | var split = name.split(/,|, | /); 379 | 380 | if (split.length > 1) { 381 | split.forEach(function (e) { 382 | return _this4.on(e, callback); 383 | }); 384 | } else { 385 | if (!Array.isArray(this._listeners.get(name))) { 386 | this._listeners.set(name, []); 387 | } 388 | 389 | this._listeners.get(name).push({ once: once, callback: callback }); 390 | } 391 | } 392 | } 393 | }, { 394 | key: "once", 395 | value: function once(name, callback) { 396 | this.on(name, callback, true); 397 | } 398 | }, { 399 | key: "emit", 400 | value: function emit(name, data) { 401 | var _this5 = this; 402 | 403 | var silent = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false; 404 | 405 | name = name.toString(); 406 | var listeners = this._listeners.get(name); 407 | var middlewares = null; 408 | var doneCount = 0; 409 | var execute = silent; 410 | 411 | if (Array.isArray(listeners)) { 412 | listeners.forEach(function (listener, index) { 413 | // Start Middleware checks unless we're doing a silent emit 414 | if (!silent) { 415 | middlewares = _this5._middlewares.get(name); 416 | // Check and execute Middleware 417 | if (Array.isArray(middlewares)) { 418 | middlewares.forEach(function (middleware) { 419 | middleware(data, function () { 420 | var newData = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; 421 | 422 | if (newData !== null) { 423 | data = newData; 424 | } 425 | doneCount++; 426 | }, name); 427 | }); 428 | 429 | if (doneCount >= middlewares.length) { 430 | execute = true; 431 | } 432 | } else { 433 | execute = true; 434 | } 435 | } 436 | 437 | // If Middleware checks have been passed, execute 438 | if (execute) { 439 | if (listener.once) { 440 | listeners[index] = null; 441 | } 442 | listener.callback(data); 443 | } 444 | }); 445 | 446 | // Dirty way of removing used Events 447 | while (listeners.indexOf(null) !== -1) { 448 | listeners.splice(listeners.indexOf(null), 1); 449 | } 450 | } 451 | } 452 | }]); 453 | 454 | return EventEmitter; 455 | }(); 456 | 457 | /* harmony default export */ __webpack_exports__["a"] = (EventEmitter); 458 | 459 | /***/ }) 460 | /******/ ])["default"]; 461 | }); -------------------------------------------------------------------------------- /static/js/bulma-slider.min.js: -------------------------------------------------------------------------------- 1 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.bulmaSlider=e():t.bulmaSlider=e()}("undefined"!=typeof self?self:this,function(){return function(n){var r={};function i(t){if(r[t])return r[t].exports;var e=r[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}return i.m=n,i.c=r,i.d=function(t,e,n){i.o(t,e)||Object.defineProperty(t,e,{configurable:!1,enumerable:!0,get:n})},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=0)}([function(t,e,n){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),n.d(e,"isString",function(){return l});var r=n(1),i=Object.assign||function(t){for(var e=1;e=l.length&&(s=!0)):s=!0),s&&(t.once&&(u[e]=null),t.callback(r))});-1!==u.indexOf(null);)u.splice(u.indexOf(null),1)}}]),e}();e.a=i}]).default}); -------------------------------------------------------------------------------- /static/js/index.js: -------------------------------------------------------------------------------- 1 | window.HELP_IMPROVE_VIDEOJS = false; 2 | 3 | 4 | $(document).ready(function() { 5 | // Check for click events on the navbar burger icon 6 | 7 | var options = { 8 | slidesToScroll: 1, 9 | slidesToShow: 1, 10 | loop: true, 11 | infinite: true, 12 | autoplay: true, 13 | autoplaySpeed: 5000, 14 | } 15 | 16 | // Initialize all div with carousel class 17 | var carousels = bulmaCarousel.attach('.carousel', options); 18 | 19 | bulmaSlider.attach(); 20 | 21 | }) 22 | -------------------------------------------------------------------------------- /static/pdfs/sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/pdfs/sample.pdf -------------------------------------------------------------------------------- /static/videos/TriForce.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infini-AI-Lab/gsm_infinite/6a09c9a870b6f218cac748dbbcdf9faca73a0f64/static/videos/TriForce.mp4 --------------------------------------------------------------------------------