├── requirements.txt
├── benchmark_results
├── LICENSE
├── signal_miner.py
└── README.md
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | numpy
3 | numerapi
4 | lightgbm
5 | pickle-mixin
6 | scikit-learn
7 | matplotlib
--------------------------------------------------------------------------------
/benchmark_results:
--------------------------------------------------------------------------------
1 | ,colsample_bytree,max_bin,max_depth,num_leaves,min_child_samples,n_estimators,reg_lambda,learning_rate,target,n_jobs,validation_corr,validation_shp,validation_max_dd,test_corr,test_shp,test_max_dd,whole_corr,whole_shp,whole_max_dd,is_benchmark
2 | 0,0.1,5,10,1024,10000,30000,0.0,0.001,target,10,0.041077934863382015,2.4434301759616894,0.01394965999562281,0.032886313452397765,1.6958069920387409,0.03396784091858818,0.03699822826289183,1.9891945305645413,0.03396784091858862,True
3 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Timothy DeLise
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/signal_miner.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import random
4 | import lightgbm as lgb
5 |
6 | # Randomized parameter grid
7 | def get_ran_cfg(param_dict):
8 | return {k: random.sample(v, 1)[0] for k, v in param_dict.items()}
9 |
10 | def get_rdn_cfgs(param_dict, num):
11 | configurations = []
12 | while len(configurations) < num:
13 | cfg = get_ran_cfg(param_dict)
14 | if (cfg not in configurations) and (cfg['num_leaves'] <= 2 ** cfg['max_depth']):
15 | configurations.append(cfg)
16 | return configurations
17 |
18 | def get_model(cfg):
19 | model = lgb.LGBMRegressor(
20 | colsample_bytree=cfg['colsample_bytree'],
21 | max_bin=cfg['max_bin'],
22 | max_depth=cfg['max_depth'],
23 | num_leaves=cfg['num_leaves'],
24 | min_child_samples=cfg['min_child_samples'],
25 | n_estimators=cfg['n_estimators'],
26 | reg_lambda=cfg['reg_lambda'],
27 | learning_rate=cfg['learning_rate'],
28 | n_jobs=cfg['n_jobs']
29 | )
30 |
31 | return model
32 |
33 |
34 | def evaluate_completed_configs(data, configurations, mmapped_array, done_splits, all_splits, ns, label='target'):
35 | """
36 | Evaluate completed configurations and return a DataFrame with evaluation metrics.
37 |
38 | Parameters:
39 | data (pd.DataFrame): The dataset with eras, features, and mmap_idx.
40 | configurations (list): List of configuration dictionaries.
41 | mmapped_array (np.memmap): Memory-mapped array for predictions.
42 | done_splits (np.memmap): Memory-mapped array tracking completed splits.
43 | all_splits (list): List of validation-test split indices.
44 | ns (int): Number of splits per configuration.
45 | label (str): Target column name.
46 |
47 | Returns:
48 | pd.DataFrame: DataFrame with evaluation metrics for completed configurations.
49 | """
50 | # Identify completed configurations
51 | done_configs = [k for k in range(len(configurations)) if np.sum(done_splits[k * ns:k * ns + ns]) == ns]
52 |
53 | # Extract validation and test eras
54 | validation_first_date = all_splits[0][1][0]
55 | validation_last_date = all_splits[0][1][-1]
56 | test_first_date = all_splits[-1][1][0]
57 |
58 | eval_validation = data.loc[(data['era'] >= validation_first_date) & (data['era'] <= validation_last_date), [label, 'era', 'mmap_idx']].copy().dropna(subset=[label])
59 | eval_test = data.loc[data['era'] >= test_first_date, [label, 'era', 'mmap_idx']].copy().dropna(subset=[label])
60 | eval_whole = data.loc[data['era'] >= validation_first_date, [label, 'era', 'mmap_idx']].copy().dropna(subset=[label])
61 |
62 | validation_stats = {'corr': [], 'corr_shp': [], 'max_dd': []}
63 | test_stats = {'corr': [], 'corr_shp': [], 'max_dd': []}
64 | whole_stats = {'corr': [], 'corr_shp': [], 'max_dd': []}
65 |
66 | eval_validation_idx = eval_validation['mmap_idx'].values
67 | eval_test_idx = eval_test['mmap_idx'].values
68 | eval_whole_idx = eval_whole['mmap_idx'].values
69 |
70 | # Evaluate each completed configuration
71 | for i in done_configs:
72 | eval_validation['pred'] = mmapped_array[eval_validation_idx, i]
73 | validation_era_results = eval_validation.groupby('era')[[label, 'pred']].apply(lambda x: x[[label, 'pred']].dropna().corr().iloc[0, 1]).values
74 |
75 | cumpnl = np.nancumsum(validation_era_results)
76 | cummax = np.maximum.accumulate(cumpnl)
77 | max_dd = np.max(cummax - cumpnl)
78 |
79 | validation_stats['corr'].append(np.nanmean(validation_era_results))
80 | validation_stats['corr_shp'].append(np.nanmean(validation_era_results) / np.nanstd(validation_era_results))
81 | validation_stats['max_dd'].append(max_dd)
82 |
83 | eval_test['pred'] = mmapped_array[eval_test_idx, i]
84 | test_era_results = eval_test.groupby('era')[[label, 'pred']].apply(lambda x: x[[label, 'pred']].dropna().corr().iloc[0, 1]).values
85 |
86 | cumpnl = np.nancumsum(test_era_results)
87 | cummax = np.maximum.accumulate(cumpnl)
88 | max_dd = np.max(cummax - cumpnl)
89 |
90 | test_stats['corr'].append(np.nanmean(test_era_results))
91 | test_stats['corr_shp'].append(np.nanmean(test_era_results) / np.nanstd(test_era_results))
92 | test_stats['max_dd'].append(max_dd)
93 |
94 | eval_whole['pred'] = mmapped_array[eval_whole_idx, i]
95 | whole_era_results = eval_whole.groupby('era')[[label, 'pred']].apply(lambda x: x[[label, 'pred']].dropna().corr().iloc[0, 1]).values
96 |
97 | cumpnl = np.nancumsum(whole_era_results)
98 | cummax = np.maximum.accumulate(cumpnl)
99 | max_dd = np.max(cummax - cumpnl)
100 |
101 | whole_stats['corr'].append(np.nanmean(whole_era_results))
102 | whole_stats['corr_shp'].append(np.nanmean(whole_era_results) / np.nanstd(whole_era_results))
103 | whole_stats['max_dd'].append(max_dd)
104 |
105 | # Combine results into a DataFrame
106 | config_df = pd.concat([
107 | pd.DataFrame(configurations).iloc[done_configs],
108 | pd.DataFrame(validation_stats, index=done_configs),
109 | pd.DataFrame(test_stats, index=done_configs),
110 | pd.DataFrame(whole_stats, index=done_configs)
111 | ], axis=1)
112 |
113 | config_df.columns = list(config_df.columns[:-9]) + [
114 | 'validation_corr', 'validation_shp', 'validation_max_dd',
115 | 'test_corr', 'test_shp', 'test_max_dd',
116 | 'whole_corr', 'whole_shp', 'whole_max_dd'
117 | ]
118 |
119 | return config_df.sort_values('validation_shp').dropna()
120 |
121 | def compare_to_benchmark(res_df, benchmark_id=0):
122 | """
123 | Compare random configurations to the benchmark.
124 |
125 | Parameters:
126 | res_df (pd.DataFrame): DataFrame with evaluation results for all configurations.
127 | benchmark_id (int): Index of the benchmark configuration in res_df.
128 |
129 | Returns:
130 | pd.DataFrame: Subset of res_df where random configurations outperform the benchmark.
131 | """
132 | # Extract benchmark values
133 | benchmark_corr = res_df.loc[benchmark_id, 'whole_corr']
134 | benchmark_sharpe = res_df.loc[benchmark_id, 'whole_shp']
135 |
136 | # Find configurations that beat the benchmark
137 | outperforming_configs = res_df[
138 | (res_df['whole_corr'] > benchmark_corr) &
139 | (res_df['whole_shp'] > benchmark_sharpe) &
140 | (res_df.index != benchmark_id) # Exclude the benchmark itself
141 | ]
142 |
143 | return outperforming_configs
144 |
145 | def evaluate_and_ensemble(ensemble, configurations, mmapped_array, data, all_splits, feature_cols, get_model, save_name="model"):
146 | """
147 | Compare configurations to the benchmark, select the best, retrain it on all data,
148 | and package it into a predict function for deployment.
149 |
150 | Parameters:
151 | ensemble (list): List of configuration ids to retrain and export
152 | configurations (list): List of configuration dictionaries.
153 | mmapped_array (np.memmap): Memory-mapped array for predictions.
154 | data (pd.DataFrame): The dataset with eras, features, and mmap_idx.
155 | all_splits (list): List of train-test split indices.
156 | feature_cols (list): List of feature column names.
157 | get_model (function): Function to initialize the model based on configuration.
158 | save_name (str): Name to use for saving the pickle file.
159 |
160 | Returns:
161 | None
162 | """
163 | print(f"Selected ensemble: {ensemble}")
164 |
165 | # Step 3: Validate the model replicates last fold performance
166 | train_didxs, test_didxs = all_splits[-1]
167 | k = ensemble[0]
168 |
169 | cfg = configurations[k]
170 | label = cfg['target']
171 | train_rows = (data['era'].isin(np.array(sorted(data['era'].unique()))[train_didxs])) & (~data[label].isna())
172 | test_rows = (data['era'].isin(np.array(sorted(data['era'].unique()))[test_didxs])) & (~data[label].isna())
173 |
174 | model = get_model(cfg)
175 | model.fit(
176 | data.loc[train_rows, feature_cols].values,
177 | data.loc[train_rows, label].values
178 | )
179 |
180 | result_vector = model.predict(data.loc[test_rows, feature_cols].values)
181 | if not np.isclose(result_vector, mmapped_array[test_rows, k], rtol=1e-03, atol=1e-05).all():
182 | print("Model did not replicate last fold performance. Check your implementation.")
183 | return
184 |
185 | # Step 4: Retrain the ensemble on all data
186 | models = []
187 | for k in ensemble:
188 | cfg = configurations[k]
189 | label = cfg['target']
190 | train_rows = (~data[label].isna())
191 |
192 | model = get_model(cfg)
193 | model.fit(
194 | data.loc[train_rows, feature_cols].values,
195 | data.loc[train_rows, label].values
196 | )
197 |
198 | models.append(model)
199 |
200 | # Step 5: Define the predict function
201 | def predict(live_features: pd.DataFrame, live_benchmark_models: pd.DataFrame) -> pd.DataFrame:
202 | i = 0
203 | for model in models:
204 | live_features[f'pred_temp'] = model.predict(live_features[feature_cols].values)
205 | live_features[f'pred_{i}_rank'] = live_features[f'pred_temp'].rank(pct=True)
206 | i += 1
207 | live_predictions = live_features[[f'pred_{i}_rank' for i in range(len(models))]].mean(axis=1)
208 | submission = pd.Series(live_predictions, index=live_features.index)
209 | return submission.to_frame("prediction")
210 |
211 | # Step 6: Save the predict function
212 | import cloudpickle
213 | p = cloudpickle.dumps(predict)
214 | with open(f"predict_{save_name}_full.pkl", "wb") as f:
215 | f.write(p)
216 |
217 | print(f"Predict function saved as predict_{save_name}_full.pkl")
218 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # Signal Miner
5 |
6 | > **Revolutionizing Staking:** Aligning users and the fund through unique models.
7 |
8 | This repository houses code and notebooks to **mine** (or systematically search for) machine learning models that aim to beat benchmarks for [Numerai Classic Tournament](https://numer.ai). By automating the process of iteratively training, evaluating, and retaining high-performing models, **Signal Miner** is your quickstart into generating models that potentially produce better-than-benchmark performance on historical data.
9 |
10 | ---
11 |
12 | ## Table of Contents
13 | 1. [Background](#background)
14 | 2. [Installation & Setup](#installation--setup)
15 | 3. [Usage Overview](#usage-overview)
16 | 4. [Performance Plot & Randomness](#performance-plot--randomness)
17 | 5. [Contributing](#contributing)
18 | 6. [License](#license)
19 |
20 | ---
21 |
22 | ## Background
23 |
24 | This notebook addresses the **Numerai Classic** data science tournament and aims to **align incentives for generic staking** on the tournament. Ideally, when more people stake, the hedge fund’s meta model improves because it can incorporate a diversity of unique signals. However, under the current setup, generic stakers often rely on **pre-existing models**—either Numerai’s example models or paid models from NumerBay—which limits the potential for fresh, **unique alpha**.
25 |
26 | **Make Staking Great Again:**
27 | The core idea of this project is that **every staker** should be able to contribute unique alpha to Numerai Classic. Why? Because unique alpha:
28 | - Has a better chance of producing **positive MMC (Meta Model Contribution)**.
29 | - Potentially earns higher payouts than staking on widely used example models.
30 | - Doesn’t compromise on performance (all generated models exceed specified benchmark metrics like correlation and Sharpe).
31 |
32 | By automatically **searching for and refining** these distinct models, we **increase** the variety of signals feeding into Numerai’s meta model, benefiting both stakers (via higher potential rewards) and Numerai (via more robust, diversified signals).
33 |
34 | **Signal Miner** extends this idea by creating a pipeline to **search** for robust models in an automated fashion, focusing on:
35 | - **Unique**: Emphasizing uncommon or orthogonal predictions that add new information.
36 | - **Transparent**: Offering clear performance metrics at each mining iteration.
37 | - **Efficient**: Letting your machine handle the computational tasks while you focus on analysis.
38 |
39 | The result is a **win-win**:
40 | - Stakers are happy because they can generate new signals and potentially earn more.
41 | - Numerai’s hedge fund is happy because it gains new, non-redundant alpha from the community.
42 |
43 |
44 | ---
45 |
46 | ## Installation & Setup
47 |
48 | 1. **Clone the repository**:
49 | ```bash
50 | git clone https://github.com/jefferythewind/signal_miner.git
51 | cd signal_miner
52 |
53 | 2. **Create (and activate) a virtual environment** (optional but recommended):
54 | ```bash
55 | python -m venv venv
56 | source venv/bin/activate # on Linux or macOS
57 | # or
58 | venv\Scripts\activate # on Windows
59 |
60 | 3. **Install required dependencies**:
61 | ```bash
62 | pip install -r requirements.txt
63 | ```
64 | 4. **Install Jupyter (if you want to use the notebook)**:
65 | ```bash
66 | pip install jupyter
67 | ```
68 | **That’s it!** Once done, you’re ready to either run the code directly (e.g., via Python scripts) or explore the iPython notebooks.
69 |
70 | ## Usage Overview
71 |
72 | > **Recommended**: See [`Model Miner.ipynb`](Model%20Miner.ipynb) for a complete end-to-end example. It’s best run from top to bottom using **Python 3.10**.
73 |
74 | Below is a high-level summary of how you might use **Signal Miner** in practice:
75 |
76 | 1. **Load your data** as usual (e.g., reading a CSV or Parquet file into a Pandas DataFrame).
77 | 2. **Define a benchmark configuration** to compare against (e.g., a standard LightGBM model).
78 | 3. **Create a parameter dictionary** (hyperparameters to be sampled or searched).
79 | 4. **Set up time-series cross-validation** with an embargo or gap (important to avoid leakage in financial data).
80 | 5. **Launch the asynchronous mining process** (which iterates through parameter combinations and evaluates them across the defined cross-validation folds).
81 | 6. **Check progress** periodically and see how many configurations have run.
82 | 7. **Evaluate results** relative to your benchmark on the validation and test folds (e.g., correlation, Sharpe).
83 | 8. **Export or ensemble** any configuration(s) that exceed your benchmark.
84 |
85 | ### Step-by-Step Example
86 |
87 | Below are excerpts from the notebook demonstrating these steps:
88 |
89 | **1. Define the benchmark configuration:**
90 | ```python
91 | benchmark_cfg = {
92 | "colsample_bytree": 0.1,
93 | "max_bin": 5,
94 | "max_depth": 5,
95 | "num_leaves": 2**4 - 1,
96 | "min_child_samples": 20,
97 | "n_estimators": 2000,
98 | "reg_lambda": 0.0,
99 | "learning_rate": 0.01,
100 | "target": 'target' # Using the first target for simplicity
101 | }
102 | ```
103 |
104 | **2. Create the parameter dictionary to search:**
105 | ```python
106 | param_dict = {
107 | 'colsample_bytree': list(np.linspace(0.001, 1, 100)),
108 | 'reg_lambda': list(np.linspace(0, 100_000, 10000)),
109 | 'learning_rate': list(np.linspace(0.00001, 1.0, 1000, dtype='float')),
110 | 'max_bin': list(np.linspace(2, 5, 4, dtype='int')),
111 | 'max_depth': list(np.linspace(2, 12, 11, dtype='int')),
112 | 'num_leaves': list(np.linspace(2, 24, 15, dtype='int')),
113 | 'min_child_samples': list(np.linspace(1, 250, 250, dtype='int')),
114 | 'n_estimators': list(np.linspace(10, 2000, 1990, dtype='int')),
115 | 'target': targets
116 | }
117 | ```
118 |
119 | **3. Set up time-series cross-validation** (with a gap/embargo to avoid leakage across eras):
120 | ```python
121 | ns = 2 # number of splits
122 | all_splits = list(TimeSeriesSplit(n_splits=ns, max_train_size=100_000_000, gap=12).split(eras))
123 | ```
124 | Here, we use two folds. The first fold acts as “validation” and the second as a “test” set, ensuring no overlap.
125 |
126 | 
127 |
128 | **4. Launch the mining process** (asynchronous job pool) to train multiple configurations:
129 | ```python
130 | start_mining()
131 | ```
132 | This begins training across the folds for each parameter combination. The process runs in the background, so you can continue using the notebook.
133 |
134 | **5. Periodically check progress:**
135 | ```python
136 | check_progress()
137 | # Example Output:
138 | # Progress: 122.0/2002 (6.09%)
139 | ```
140 | This lets you know how many configurations have completed.
141 |
142 | **6. Evaluate results** once you’ve accumulated sufficient runs:
143 | ```python
144 | res_df = evaluate_completed_configs(
145 | data, configurations, mmapped_array, done_splits, all_splits, ns
146 | )
147 | # Label any benchmark configuration
148 | res_df['is_benchmark'] = (res_df.index == BENCHMARK_ID)
149 |
150 | print("Benchmark Results:")
151 | res_df[res_df['is_benchmark']]
152 | ```
153 | You’ll see metrics such as `validation_corr`, `test_corr`, `whole_corr`, `validation_shp`, etc., alongside your benchmark.
154 |
155 | **7. Compare models to the benchmark** to find superior configurations:
156 | ```python
157 | print("Better Than Benchmark Results:")
158 | compare_to_benchmark(res_df)
159 | ```
160 |
161 | **8. Export any top-performing models** for deployment:
162 | ```python
163 | to_export = [res_df.sort_values('whole_shp').iloc[-1].name] # pick the best by Sharpe
164 | evaluate_and_ensemble(
165 | to_export, configurations, mmapped_array, data,
166 | all_splits, feature_cols, get_model, save_name="model"
167 | )
168 | # Example output:
169 | # Predict function saved as predict_model_full.pkl
170 | ```
171 | The above snippet creates an ensemble (even if it’s a single model) and saves a `.pkl` file suitable for future inference or Numerai submission.
172 |
173 | ---
174 |
175 | That’s the **overall usage flow** of **Signal Miner**. For the most up-to-date code and additional detail, please refer to the [**Model Miner** notebook](Model%20Miner.ipynb).
176 |
177 |
178 | ## Performance Plot & Randomness
179 |
180 | Below is a scatter plot, which illustrates the relationship between **past performance** (cross-validation / in-sample Sharpe) and **future performance** (test fold or out-of-sample Sharpe):
181 |
182 |
183 |
184 |
185 | > **Key Takeaway**: The best model on historical (validation) data is **not necessarily** the best model for unseen data. There’s inherent randomness in the modeling process, and no amount of backtesting can completely guarantee out-of-sample success.
186 |
187 | In our example plot, each dot represents a model configuration:
188 | - The **x-axis** is the validation Sharpe (past fold).
189 | - The **y-axis** is the test Sharpe (future fold).
190 | - The **benchmark** model is shown as a star, and we fit a best-fit line showing a strong linear relationship.
191 |
192 | Some observations:
193 | 1. **Not Perfect**: The top-performing validation model isn’t the top performer on the test set, confirming that overfitting or luck can play a role in “winning” the validation stage.
194 | 2. **Benchmark Surprises**: The benchmark ranks near the top in validation, yet multiple models outperformed it on the test set.
195 | 3. **Encouraging Correlation**: Despite the inevitable randomness, there is a strong positive correlation between past and future performance—**meaning high validation Sharpe often translates to high test Sharpe.**
196 | 4. **What If the Plot Looked Random?**: If, instead, you saw a circular or completely random distribution, that would mean your model selection is mostly noise. In such cases, “chasing” the top validation model yields little to no real out-of-sample edge.
197 |
198 | This dynamic mirrors the transition from **training** to **live deployment**: even the best backtested model might not be the best performer going forward. But a solid positive correlation provides some confidence that better in-sample results can lead to better out-of-sample performance.
199 |
200 | ## Hardware & Resource Considerations
201 |
202 | This project was developed using **Python 3.10** on **Ubuntu Linux** running on an **AMD chipset** with **128 GB of RAM**.
203 |
204 | ### Swap Space: The Secret to Avoiding Memory Errors
205 |
206 | One of the **most crucial** optimizations for running large-scale model mining is **ensuring you have enough swap space**. By default, Linux systems often allocate **far too little swap**, leading to **memory errors** when working with large datasets.
207 |
208 | **Recommendation:** Set your **swap space** to **2X your RAM**.
209 |
210 | In my case, that meant **expanding swap to 256 GB**—a full **1/4 of my 1 TB hard drive**!
211 | Since making this change, **99.99% of my memory errors have disappeared**.
212 |
213 | #### Linux Makes This Easy
214 | Ubuntu allows full control over **swap size**, unlike macOS (which doesn’t let you modify it) or Windows (which, well, let’s not even talk about Windows).
215 |
216 | ### Expanding Swap on Ubuntu
217 |
218 | Run the following commands to **increase swap space** to any desired size (example: **256 GB**).
219 |
220 | ```
221 | # Step 1: Turn off existing swap
222 | sudo swapoff -a
223 |
224 | # Step 2: Create a new swap file of desired size (256 GB in this case)
225 | sudo fallocate -l 256G /swapfile
226 |
227 | # Step 3: Set proper permissions
228 | sudo chmod 600 /swapfile
229 |
230 | # Step 4: Format the swap space
231 | sudo mkswap /swapfile
232 |
233 | # Step 5: Enable swap
234 | sudo swapon /swapfile
235 |
236 | # Step 6: Make it permanent (add this line to /etc/fstab)
237 | echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab
238 |
239 | # Verify that swap is active
240 | swapon --show
241 | ```
242 |
243 |
244 | ## Contributing
245 |
246 | We welcome contributions! Whether it’s:
247 | - Bug fixes or clarifications
248 | - Additional model-mining techniques
249 | - Expanded plotting and diagnostic tools
250 |
251 | Feel free to open a Pull Request or Issue.
252 |
253 | ## License
254 |
255 | This project is licensed under the [MIT License](LICENSE). You’re free to use and modify this code for your own modeling adventures.
256 |
257 | **Namaste, and happy mining!**
258 |
--------------------------------------------------------------------------------