├── requirements.txt ├── benchmark_results ├── LICENSE ├── signal_miner.py └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | numpy 3 | numerapi 4 | lightgbm 5 | pickle-mixin 6 | scikit-learn 7 | matplotlib -------------------------------------------------------------------------------- /benchmark_results: -------------------------------------------------------------------------------- 1 | ,colsample_bytree,max_bin,max_depth,num_leaves,min_child_samples,n_estimators,reg_lambda,learning_rate,target,n_jobs,validation_corr,validation_shp,validation_max_dd,test_corr,test_shp,test_max_dd,whole_corr,whole_shp,whole_max_dd,is_benchmark 2 | 0,0.1,5,10,1024,10000,30000,0.0,0.001,target,10,0.041077934863382015,2.4434301759616894,0.01394965999562281,0.032886313452397765,1.6958069920387409,0.03396784091858818,0.03699822826289183,1.9891945305645413,0.03396784091858862,True 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Timothy DeLise 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /signal_miner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import random 4 | import lightgbm as lgb 5 | 6 | # Randomized parameter grid 7 | def get_ran_cfg(param_dict): 8 | return {k: random.sample(v, 1)[0] for k, v in param_dict.items()} 9 | 10 | def get_rdn_cfgs(param_dict, num): 11 | configurations = [] 12 | while len(configurations) < num: 13 | cfg = get_ran_cfg(param_dict) 14 | if (cfg not in configurations) and (cfg['num_leaves'] <= 2 ** cfg['max_depth']): 15 | configurations.append(cfg) 16 | return configurations 17 | 18 | def get_model(cfg): 19 | model = lgb.LGBMRegressor( 20 | colsample_bytree=cfg['colsample_bytree'], 21 | max_bin=cfg['max_bin'], 22 | max_depth=cfg['max_depth'], 23 | num_leaves=cfg['num_leaves'], 24 | min_child_samples=cfg['min_child_samples'], 25 | n_estimators=cfg['n_estimators'], 26 | reg_lambda=cfg['reg_lambda'], 27 | learning_rate=cfg['learning_rate'], 28 | n_jobs=cfg['n_jobs'] 29 | ) 30 | 31 | return model 32 | 33 | 34 | def evaluate_completed_configs(data, configurations, mmapped_array, done_splits, all_splits, ns, label='target'): 35 | """ 36 | Evaluate completed configurations and return a DataFrame with evaluation metrics. 37 | 38 | Parameters: 39 | data (pd.DataFrame): The dataset with eras, features, and mmap_idx. 40 | configurations (list): List of configuration dictionaries. 41 | mmapped_array (np.memmap): Memory-mapped array for predictions. 42 | done_splits (np.memmap): Memory-mapped array tracking completed splits. 43 | all_splits (list): List of validation-test split indices. 44 | ns (int): Number of splits per configuration. 45 | label (str): Target column name. 46 | 47 | Returns: 48 | pd.DataFrame: DataFrame with evaluation metrics for completed configurations. 49 | """ 50 | # Identify completed configurations 51 | done_configs = [k for k in range(len(configurations)) if np.sum(done_splits[k * ns:k * ns + ns]) == ns] 52 | 53 | # Extract validation and test eras 54 | validation_first_date = all_splits[0][1][0] 55 | validation_last_date = all_splits[0][1][-1] 56 | test_first_date = all_splits[-1][1][0] 57 | 58 | eval_validation = data.loc[(data['era'] >= validation_first_date) & (data['era'] <= validation_last_date), [label, 'era', 'mmap_idx']].copy().dropna(subset=[label]) 59 | eval_test = data.loc[data['era'] >= test_first_date, [label, 'era', 'mmap_idx']].copy().dropna(subset=[label]) 60 | eval_whole = data.loc[data['era'] >= validation_first_date, [label, 'era', 'mmap_idx']].copy().dropna(subset=[label]) 61 | 62 | validation_stats = {'corr': [], 'corr_shp': [], 'max_dd': []} 63 | test_stats = {'corr': [], 'corr_shp': [], 'max_dd': []} 64 | whole_stats = {'corr': [], 'corr_shp': [], 'max_dd': []} 65 | 66 | eval_validation_idx = eval_validation['mmap_idx'].values 67 | eval_test_idx = eval_test['mmap_idx'].values 68 | eval_whole_idx = eval_whole['mmap_idx'].values 69 | 70 | # Evaluate each completed configuration 71 | for i in done_configs: 72 | eval_validation['pred'] = mmapped_array[eval_validation_idx, i] 73 | validation_era_results = eval_validation.groupby('era')[[label, 'pred']].apply(lambda x: x[[label, 'pred']].dropna().corr().iloc[0, 1]).values 74 | 75 | cumpnl = np.nancumsum(validation_era_results) 76 | cummax = np.maximum.accumulate(cumpnl) 77 | max_dd = np.max(cummax - cumpnl) 78 | 79 | validation_stats['corr'].append(np.nanmean(validation_era_results)) 80 | validation_stats['corr_shp'].append(np.nanmean(validation_era_results) / np.nanstd(validation_era_results)) 81 | validation_stats['max_dd'].append(max_dd) 82 | 83 | eval_test['pred'] = mmapped_array[eval_test_idx, i] 84 | test_era_results = eval_test.groupby('era')[[label, 'pred']].apply(lambda x: x[[label, 'pred']].dropna().corr().iloc[0, 1]).values 85 | 86 | cumpnl = np.nancumsum(test_era_results) 87 | cummax = np.maximum.accumulate(cumpnl) 88 | max_dd = np.max(cummax - cumpnl) 89 | 90 | test_stats['corr'].append(np.nanmean(test_era_results)) 91 | test_stats['corr_shp'].append(np.nanmean(test_era_results) / np.nanstd(test_era_results)) 92 | test_stats['max_dd'].append(max_dd) 93 | 94 | eval_whole['pred'] = mmapped_array[eval_whole_idx, i] 95 | whole_era_results = eval_whole.groupby('era')[[label, 'pred']].apply(lambda x: x[[label, 'pred']].dropna().corr().iloc[0, 1]).values 96 | 97 | cumpnl = np.nancumsum(whole_era_results) 98 | cummax = np.maximum.accumulate(cumpnl) 99 | max_dd = np.max(cummax - cumpnl) 100 | 101 | whole_stats['corr'].append(np.nanmean(whole_era_results)) 102 | whole_stats['corr_shp'].append(np.nanmean(whole_era_results) / np.nanstd(whole_era_results)) 103 | whole_stats['max_dd'].append(max_dd) 104 | 105 | # Combine results into a DataFrame 106 | config_df = pd.concat([ 107 | pd.DataFrame(configurations).iloc[done_configs], 108 | pd.DataFrame(validation_stats, index=done_configs), 109 | pd.DataFrame(test_stats, index=done_configs), 110 | pd.DataFrame(whole_stats, index=done_configs) 111 | ], axis=1) 112 | 113 | config_df.columns = list(config_df.columns[:-9]) + [ 114 | 'validation_corr', 'validation_shp', 'validation_max_dd', 115 | 'test_corr', 'test_shp', 'test_max_dd', 116 | 'whole_corr', 'whole_shp', 'whole_max_dd' 117 | ] 118 | 119 | return config_df.sort_values('validation_shp').dropna() 120 | 121 | def compare_to_benchmark(res_df, benchmark_id=0): 122 | """ 123 | Compare random configurations to the benchmark. 124 | 125 | Parameters: 126 | res_df (pd.DataFrame): DataFrame with evaluation results for all configurations. 127 | benchmark_id (int): Index of the benchmark configuration in res_df. 128 | 129 | Returns: 130 | pd.DataFrame: Subset of res_df where random configurations outperform the benchmark. 131 | """ 132 | # Extract benchmark values 133 | benchmark_corr = res_df.loc[benchmark_id, 'whole_corr'] 134 | benchmark_sharpe = res_df.loc[benchmark_id, 'whole_shp'] 135 | 136 | # Find configurations that beat the benchmark 137 | outperforming_configs = res_df[ 138 | (res_df['whole_corr'] > benchmark_corr) & 139 | (res_df['whole_shp'] > benchmark_sharpe) & 140 | (res_df.index != benchmark_id) # Exclude the benchmark itself 141 | ] 142 | 143 | return outperforming_configs 144 | 145 | def evaluate_and_ensemble(ensemble, configurations, mmapped_array, data, all_splits, feature_cols, get_model, save_name="model"): 146 | """ 147 | Compare configurations to the benchmark, select the best, retrain it on all data, 148 | and package it into a predict function for deployment. 149 | 150 | Parameters: 151 | ensemble (list): List of configuration ids to retrain and export 152 | configurations (list): List of configuration dictionaries. 153 | mmapped_array (np.memmap): Memory-mapped array for predictions. 154 | data (pd.DataFrame): The dataset with eras, features, and mmap_idx. 155 | all_splits (list): List of train-test split indices. 156 | feature_cols (list): List of feature column names. 157 | get_model (function): Function to initialize the model based on configuration. 158 | save_name (str): Name to use for saving the pickle file. 159 | 160 | Returns: 161 | None 162 | """ 163 | print(f"Selected ensemble: {ensemble}") 164 | 165 | # Step 3: Validate the model replicates last fold performance 166 | train_didxs, test_didxs = all_splits[-1] 167 | k = ensemble[0] 168 | 169 | cfg = configurations[k] 170 | label = cfg['target'] 171 | train_rows = (data['era'].isin(np.array(sorted(data['era'].unique()))[train_didxs])) & (~data[label].isna()) 172 | test_rows = (data['era'].isin(np.array(sorted(data['era'].unique()))[test_didxs])) & (~data[label].isna()) 173 | 174 | model = get_model(cfg) 175 | model.fit( 176 | data.loc[train_rows, feature_cols].values, 177 | data.loc[train_rows, label].values 178 | ) 179 | 180 | result_vector = model.predict(data.loc[test_rows, feature_cols].values) 181 | if not np.isclose(result_vector, mmapped_array[test_rows, k], rtol=1e-03, atol=1e-05).all(): 182 | print("Model did not replicate last fold performance. Check your implementation.") 183 | return 184 | 185 | # Step 4: Retrain the ensemble on all data 186 | models = [] 187 | for k in ensemble: 188 | cfg = configurations[k] 189 | label = cfg['target'] 190 | train_rows = (~data[label].isna()) 191 | 192 | model = get_model(cfg) 193 | model.fit( 194 | data.loc[train_rows, feature_cols].values, 195 | data.loc[train_rows, label].values 196 | ) 197 | 198 | models.append(model) 199 | 200 | # Step 5: Define the predict function 201 | def predict(live_features: pd.DataFrame, live_benchmark_models: pd.DataFrame) -> pd.DataFrame: 202 | i = 0 203 | for model in models: 204 | live_features[f'pred_temp'] = model.predict(live_features[feature_cols].values) 205 | live_features[f'pred_{i}_rank'] = live_features[f'pred_temp'].rank(pct=True) 206 | i += 1 207 | live_predictions = live_features[[f'pred_{i}_rank' for i in range(len(models))]].mean(axis=1) 208 | submission = pd.Series(live_predictions, index=live_features.index) 209 | return submission.to_frame("prediction") 210 | 211 | # Step 6: Save the predict function 212 | import cloudpickle 213 | p = cloudpickle.dumps(predict) 214 | with open(f"predict_{save_name}_full.pkl", "wb") as f: 215 | f.write(p) 216 | 217 | print(f"Predict function saved as predict_{save_name}_full.pkl") 218 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | snake_axe_pixel_banner 3 | 4 | # Signal Miner 5 | 6 | > **Revolutionizing Staking:** Aligning users and the fund through unique models. 7 | 8 | This repository houses code and notebooks to **mine** (or systematically search for) machine learning models that aim to beat benchmarks for [Numerai Classic Tournament](https://numer.ai). By automating the process of iteratively training, evaluating, and retaining high-performing models, **Signal Miner** is your quickstart into generating models that potentially produce better-than-benchmark performance on historical data. 9 | 10 | --- 11 | 12 | ## Table of Contents 13 | 1. [Background](#background) 14 | 2. [Installation & Setup](#installation--setup) 15 | 3. [Usage Overview](#usage-overview) 16 | 4. [Performance Plot & Randomness](#performance-plot--randomness) 17 | 5. [Contributing](#contributing) 18 | 6. [License](#license) 19 | 20 | --- 21 | 22 | ## Background 23 | 24 | This notebook addresses the **Numerai Classic** data science tournament and aims to **align incentives for generic staking** on the tournament. Ideally, when more people stake, the hedge fund’s meta model improves because it can incorporate a diversity of unique signals. However, under the current setup, generic stakers often rely on **pre-existing models**—either Numerai’s example models or paid models from NumerBay—which limits the potential for fresh, **unique alpha**. 25 | 26 | **Make Staking Great Again:** 27 | The core idea of this project is that **every staker** should be able to contribute unique alpha to Numerai Classic. Why? Because unique alpha: 28 | - Has a better chance of producing **positive MMC (Meta Model Contribution)**. 29 | - Potentially earns higher payouts than staking on widely used example models. 30 | - Doesn’t compromise on performance (all generated models exceed specified benchmark metrics like correlation and Sharpe). 31 | 32 | By automatically **searching for and refining** these distinct models, we **increase** the variety of signals feeding into Numerai’s meta model, benefiting both stakers (via higher potential rewards) and Numerai (via more robust, diversified signals). 33 | 34 | **Signal Miner** extends this idea by creating a pipeline to **search** for robust models in an automated fashion, focusing on: 35 | - **Unique**: Emphasizing uncommon or orthogonal predictions that add new information. 36 | - **Transparent**: Offering clear performance metrics at each mining iteration. 37 | - **Efficient**: Letting your machine handle the computational tasks while you focus on analysis. 38 | 39 | The result is a **win-win**: 40 | - Stakers are happy because they can generate new signals and potentially earn more. 41 | - Numerai’s hedge fund is happy because it gains new, non-redundant alpha from the community. 42 | 43 | 44 | --- 45 | 46 | ## Installation & Setup 47 | 48 | 1. **Clone the repository**: 49 | ```bash 50 | git clone https://github.com/jefferythewind/signal_miner.git 51 | cd signal_miner 52 | 53 | 2. **Create (and activate) a virtual environment** (optional but recommended): 54 | ```bash 55 | python -m venv venv 56 | source venv/bin/activate # on Linux or macOS 57 | # or 58 | venv\Scripts\activate # on Windows 59 | 60 | 3. **Install required dependencies**: 61 | ```bash 62 | pip install -r requirements.txt 63 | ``` 64 | 4. **Install Jupyter (if you want to use the notebook)**: 65 | ```bash 66 | pip install jupyter 67 | ``` 68 | **That’s it!** Once done, you’re ready to either run the code directly (e.g., via Python scripts) or explore the iPython notebooks. 69 | 70 | ## Usage Overview 71 | 72 | > **Recommended**: See [`Model Miner.ipynb`](Model%20Miner.ipynb) for a complete end-to-end example. It’s best run from top to bottom using **Python 3.10**. 73 | 74 | Below is a high-level summary of how you might use **Signal Miner** in practice: 75 | 76 | 1. **Load your data** as usual (e.g., reading a CSV or Parquet file into a Pandas DataFrame). 77 | 2. **Define a benchmark configuration** to compare against (e.g., a standard LightGBM model). 78 | 3. **Create a parameter dictionary** (hyperparameters to be sampled or searched). 79 | 4. **Set up time-series cross-validation** with an embargo or gap (important to avoid leakage in financial data). 80 | 5. **Launch the asynchronous mining process** (which iterates through parameter combinations and evaluates them across the defined cross-validation folds). 81 | 6. **Check progress** periodically and see how many configurations have run. 82 | 7. **Evaluate results** relative to your benchmark on the validation and test folds (e.g., correlation, Sharpe). 83 | 8. **Export or ensemble** any configuration(s) that exceed your benchmark. 84 | 85 | ### Step-by-Step Example 86 | 87 | Below are excerpts from the notebook demonstrating these steps: 88 | 89 | **1. Define the benchmark configuration:** 90 | ```python 91 | benchmark_cfg = { 92 | "colsample_bytree": 0.1, 93 | "max_bin": 5, 94 | "max_depth": 5, 95 | "num_leaves": 2**4 - 1, 96 | "min_child_samples": 20, 97 | "n_estimators": 2000, 98 | "reg_lambda": 0.0, 99 | "learning_rate": 0.01, 100 | "target": 'target' # Using the first target for simplicity 101 | } 102 | ``` 103 | 104 | **2. Create the parameter dictionary to search:** 105 | ```python 106 | param_dict = { 107 | 'colsample_bytree': list(np.linspace(0.001, 1, 100)), 108 | 'reg_lambda': list(np.linspace(0, 100_000, 10000)), 109 | 'learning_rate': list(np.linspace(0.00001, 1.0, 1000, dtype='float')), 110 | 'max_bin': list(np.linspace(2, 5, 4, dtype='int')), 111 | 'max_depth': list(np.linspace(2, 12, 11, dtype='int')), 112 | 'num_leaves': list(np.linspace(2, 24, 15, dtype='int')), 113 | 'min_child_samples': list(np.linspace(1, 250, 250, dtype='int')), 114 | 'n_estimators': list(np.linspace(10, 2000, 1990, dtype='int')), 115 | 'target': targets 116 | } 117 | ``` 118 | 119 | **3. Set up time-series cross-validation** (with a gap/embargo to avoid leakage across eras): 120 | ```python 121 | ns = 2 # number of splits 122 | all_splits = list(TimeSeriesSplit(n_splits=ns, max_train_size=100_000_000, gap=12).split(eras)) 123 | ``` 124 | Here, we use two folds. The first fold acts as “validation” and the second as a “test” set, ensuring no overlap. 125 | 126 | ![output](https://github.com/user-attachments/assets/d12e2f2d-f8da-4f2e-9e50-b03d413e2161) 127 | 128 | **4. Launch the mining process** (asynchronous job pool) to train multiple configurations: 129 | ```python 130 | start_mining() 131 | ``` 132 | This begins training across the folds for each parameter combination. The process runs in the background, so you can continue using the notebook. 133 | 134 | **5. Periodically check progress:** 135 | ```python 136 | check_progress() 137 | # Example Output: 138 | # Progress: 122.0/2002 (6.09%) 139 | ``` 140 | This lets you know how many configurations have completed. 141 | 142 | **6. Evaluate results** once you’ve accumulated sufficient runs: 143 | ```python 144 | res_df = evaluate_completed_configs( 145 | data, configurations, mmapped_array, done_splits, all_splits, ns 146 | ) 147 | # Label any benchmark configuration 148 | res_df['is_benchmark'] = (res_df.index == BENCHMARK_ID) 149 | 150 | print("Benchmark Results:") 151 | res_df[res_df['is_benchmark']] 152 | ``` 153 | You’ll see metrics such as `validation_corr`, `test_corr`, `whole_corr`, `validation_shp`, etc., alongside your benchmark. 154 | 155 | **7. Compare models to the benchmark** to find superior configurations: 156 | ```python 157 | print("Better Than Benchmark Results:") 158 | compare_to_benchmark(res_df) 159 | ``` 160 | 161 | **8. Export any top-performing models** for deployment: 162 | ```python 163 | to_export = [res_df.sort_values('whole_shp').iloc[-1].name] # pick the best by Sharpe 164 | evaluate_and_ensemble( 165 | to_export, configurations, mmapped_array, data, 166 | all_splits, feature_cols, get_model, save_name="model" 167 | ) 168 | # Example output: 169 | # Predict function saved as predict_model_full.pkl 170 | ``` 171 | The above snippet creates an ensemble (even if it’s a single model) and saves a `.pkl` file suitable for future inference or Numerai submission. 172 | 173 | --- 174 | 175 | That’s the **overall usage flow** of **Signal Miner**. For the most up-to-date code and additional detail, please refer to the [**Model Miner** notebook](Model%20Miner.ipynb). 176 | 177 | 178 | ## Performance Plot & Randomness 179 | 180 | Below is a scatter plot, which illustrates the relationship between **past performance** (cross-validation / in-sample Sharpe) and **future performance** (test fold or out-of-sample Sharpe): 181 | 182 | sharpe_scatter 183 | 184 | 185 | > **Key Takeaway**: The best model on historical (validation) data is **not necessarily** the best model for unseen data. There’s inherent randomness in the modeling process, and no amount of backtesting can completely guarantee out-of-sample success. 186 | 187 | In our example plot, each dot represents a model configuration: 188 | - The **x-axis** is the validation Sharpe (past fold). 189 | - The **y-axis** is the test Sharpe (future fold). 190 | - The **benchmark** model is shown as a star, and we fit a best-fit line showing a strong linear relationship. 191 | 192 | Some observations: 193 | 1. **Not Perfect**: The top-performing validation model isn’t the top performer on the test set, confirming that overfitting or luck can play a role in “winning” the validation stage. 194 | 2. **Benchmark Surprises**: The benchmark ranks near the top in validation, yet multiple models outperformed it on the test set. 195 | 3. **Encouraging Correlation**: Despite the inevitable randomness, there is a strong positive correlation between past and future performance—**meaning high validation Sharpe often translates to high test Sharpe.** 196 | 4. **What If the Plot Looked Random?**: If, instead, you saw a circular or completely random distribution, that would mean your model selection is mostly noise. In such cases, “chasing” the top validation model yields little to no real out-of-sample edge. 197 | 198 | This dynamic mirrors the transition from **training** to **live deployment**: even the best backtested model might not be the best performer going forward. But a solid positive correlation provides some confidence that better in-sample results can lead to better out-of-sample performance. 199 | 200 | ## Hardware & Resource Considerations 201 | 202 | This project was developed using **Python 3.10** on **Ubuntu Linux** running on an **AMD chipset** with **128 GB of RAM**. 203 | 204 | ### Swap Space: The Secret to Avoiding Memory Errors 205 | 206 | One of the **most crucial** optimizations for running large-scale model mining is **ensuring you have enough swap space**. By default, Linux systems often allocate **far too little swap**, leading to **memory errors** when working with large datasets. 207 | 208 | **Recommendation:** Set your **swap space** to **2X your RAM**. 209 | 210 | In my case, that meant **expanding swap to 256 GB**—a full **1/4 of my 1 TB hard drive**! 211 | Since making this change, **99.99% of my memory errors have disappeared**. 212 | 213 | #### Linux Makes This Easy 214 | Ubuntu allows full control over **swap size**, unlike macOS (which doesn’t let you modify it) or Windows (which, well, let’s not even talk about Windows). 215 | 216 | ### Expanding Swap on Ubuntu 217 | 218 | Run the following commands to **increase swap space** to any desired size (example: **256 GB**). 219 | 220 | ``` 221 | # Step 1: Turn off existing swap 222 | sudo swapoff -a 223 | 224 | # Step 2: Create a new swap file of desired size (256 GB in this case) 225 | sudo fallocate -l 256G /swapfile 226 | 227 | # Step 3: Set proper permissions 228 | sudo chmod 600 /swapfile 229 | 230 | # Step 4: Format the swap space 231 | sudo mkswap /swapfile 232 | 233 | # Step 5: Enable swap 234 | sudo swapon /swapfile 235 | 236 | # Step 6: Make it permanent (add this line to /etc/fstab) 237 | echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab 238 | 239 | # Verify that swap is active 240 | swapon --show 241 | ``` 242 | 243 | 244 | ## Contributing 245 | 246 | We welcome contributions! Whether it’s: 247 | - Bug fixes or clarifications 248 | - Additional model-mining techniques 249 | - Expanded plotting and diagnostic tools 250 | 251 | Feel free to open a Pull Request or Issue. 252 | 253 | ## License 254 | 255 | This project is licensed under the [MIT License](LICENSE). You’re free to use and modify this code for your own modeling adventures. 256 | 257 | **Namaste, and happy mining!** 258 | --------------------------------------------------------------------------------